<?php
// $Id: class.rss_parser.php,v 1.1.1.1 2005/02/16 17:32:31 ohwada Exp $

// 2005-01-20 K.OHWADA
// add get_raw_content()
// add mode_arrange

// 2004-12-23 K.OHWADA
// undefined time zone
// correspond to 'channel contains items'
// change $rss_parent from string to array

// 2004-11-28 K.OHWADA
// use rss_atom_parser_base

// 2004-10-24 K.OHWADA
// add function find_encoding(), convert_encode_to_utf8()
// add element encoded

// 2004-09-01 K.OHWADA
// use parse_w3cdtf()
// add convert_array_from_utf8(), convert_from_utf8()

//--------------------------------------------------------
// undefined time zone
//
// http://www.xoopstotal.com.br/backend.php
// <lastBuildDate>Tue, 21 Dec 2004 21:21:09 BRT</lastBuildDate> 
//--------------------------------------------------------

//--------------------------------------------------------
// channel contains items
// item is in the outside of channel
//
// http://pear.php.net/feeds/latest.rss
// <channel rdf:about="http://pear.php.net/">
//   <link>http://pear.php.net/</link> 
//   <items>
//     <rdf:Seq>
//       <rdf:li rdf:resource="xxx" /> 
//     </rdf:Seq>
//   </items>
//   <title>PEAR: Latest releases</title> 
// </channel>
// <item rdf:about="xxx">
//   <title>Log 1.8.7</title> 
//   <link>http://pear.php.net/package/Log/download/1.8.7/</link> 
// </item>

//--------------------------------------------------------

//--------------------------------------------------------
// encoding isn't UTF-8
//
// http://mori-umi.net/index.rdf
// ?xml version="1.0" encoding="EUC-JP" ? 
//--------------------------------------------------------

//--------------------------------------------------------
// element encoded
//
// http://shibuya.pm.org/blosxom/index.rss10
// <content:encoded>
// <![CDATA[ 
//   Shibuya Perl Mongers Presents
// ]]> 
// </content:encoded>
//---------------------------------------------------------

//=========================================================
// class for RSS Parser 
// for PHP gennerally
// 2004-07-20 K.OHWADA
//=========================================================

// RSS 2.0 Specification
// http://blogs.law.harvard.edu/tech/rss

// Dublin Core Metadata Element Set
// http://dublincore.org/documents/dces/

// RDF Site Summary 1.0 Modules: Content
// http://web.resource.org/rss/1.0/modules/content/

//=========================================================
// global function
//=========================================================
//  $rss_channel
//  $rss_items
//  $rss_item_num
//  $rss_uris;
//  $rss_parent
//  $rss_current

//---------------------------------------------------------
// start element handler
//---------------------------------------------------------
function rss_start_element($parser, $name, $attrs)
{
	global $rss_parent, $rss_parent_num, $rss_uris;
	global $rss_channel, $rss_items, $rss_item_num;

//  echo "<br>\n";
//  echo "parent:  $rss_parent <br>\n";
//  echo "current: $rss_current <br>\n";
//  echo "name:    $name <br>\n";
//  print_r($attrs);
//  echo "<br>\n";

	$parent = $rss_parent[$rss_parent_num];

	$parent_num_prev = $rss_parent_num - 1;
	if ($parent_num_prev < 0)  $parent_num_prev = 0;
	$parent_prev = $rss_parent[$parent_num_prev];

	$name_ns = split(':',$name);
	$name_wk = array_pop($name_ns);
	$uri1 = implode($name_ns,":");

	$name_low = strtolower( $name_wk );

	$flag = 0;
	foreach($rss_uris as $uri2)
	{
  		if ($uri1 == $uri2)
  		{
    		$flag = 1;
    		break;
  		}
	}

// CHANNEL
	if ( $name_wk == 'CHANNEL' )
	{
		$rss_parent_num = 0;
		$rss_parent[0]  = $name_wk;
		return;
	}

// increment parent
	if ( $flag || empty($uri1) )
	{
		$rss_parent_num ++;
		$rss_parent[$rss_parent_num] = $name_wk;
	}

}

//---------------------------------------------------------
// end element handler
//---------------------------------------------------------
function rss_end_element($parser, $name)
{
	global $rss_parent, $rss_parent_num, $rss_item_num, $rss_items, $rss_uris;

	$parent = $rss_parent[$rss_parent_num];

	$parent_num_prev = $rss_parent_num - 1;
	if ($parent_num_prev < 0)  $parent_num_prev = 0;
	$parent_prev = $rss_parent[$parent_num_prev];

	$name_ns = split(':',$name);
	$name_wk = array_pop($name_ns);
	$uri1 = implode($name_ns,":");

//	echo "<br>\n";
//	echo "parent num : $rss_parent_num <br>\n";
//	echo "parent prev: $parent_prev <br>\n";
//	echo "parent  :    $parent <br>\n";
//	echo "current :    $name_wk <br>\n";

	$flag = 0;
	foreach($rss_uris as $uri2)
	{
  		if ($uri1 == $uri2)
  		{
			$flag = 1;
    		break;
  		}
	}

// decrement parent
	if (( $flag || empty($uri1) )&&( $parent == $name_wk ))
	{
		$rss_parent_num --;
		if ($rss_parent_num < 0)  $rss_parent_num = 0;

      	if ($name_wk == 'ITEM')
      	{
      		$rss_item_num ++;
      	}
	}

}

//---------------------------------------------------------
// character data handler
//---------------------------------------------------------
function rss_character_data($parser, $data) 
{
	global $rss_parent, $rss_parent_num, $rss_channel, $rss_image, $rss_items, $rss_item_num;

	$parent_0 = '';
	$parent_1 = '';
	$parent_2 = '';
	if ( isset($rss_parent[0]) )	$parent_0 = $rss_parent[0];
	if ( isset($rss_parent[1]) )	$parent_1 = $rss_parent[1];
	if ( isset($rss_parent[2]) )	$parent_2 = $rss_parent[2];

	$current     = $rss_parent[$rss_parent_num];
	$current_low = strtolower( $current );
	$data        = trim($data);

//	echo "<br>\n";
//	echo "parent num: $rss_parent_num <br>\n";
//	echo "parent 0:   $parent_0 <br>\n";
//	echo "parent 1:   $parent_1 <br>\n";
//	echo "parent 2:   $parent_2 <br>\n";
//	echo "current :   $current <br>\n";
//	echo "data:       $data <br>\n";
//	print_r($rss_items);
//	echo "<hr>\n";

//	if ($parent_0 != 'CHANNEL')  return;

	switch($parent_1)
	{
// ITEM
		case 'ITEM':
			switch($current)
			{
				case 'TITLE':
				case 'LINK':
				case 'DESCRIPTION':
				case 'AUTHOR':
				case 'CATEGORY':
				case 'COMMENTS':
				case 'ENCLOSURE':
				case 'GUID':
				case 'PUBDATE':
				case 'SOURCE':

// dc:xxx
//				case 'TITLE':
				case 'CREATOR':
				case 'SUBJECT':
//				case 'DESCRIPTION':
				case 'PUBLISHER':
				case 'CONTRIBUTOR':
				case 'DATE':
				case 'TYPE':
				case 'FORMAT':
				case 'IDENTIFIER':
//				case 'SOURCE':
				case 'LANGUAGE':
				case 'RELATION':
				case 'COVERAGE':
				case 'RIGHTS':

// content:encoded
    			case 'ENCODED':

// others
				case 'SRC':
				case 'WIDTH':
				case 'HEIGHT':    
					if ( isset( $rss_items[$rss_item_num][$current_low] ) )
					{
						$rss_items[$rss_item_num][$current_low] .= $data;
					}
					else
					{
						$rss_items[$rss_item_num][$current_low] = $data;
					}
					break;
			}
			break;

// IMAGE
		case 'IMAGE':
			switch($current)
			{
    			case 'TITLE':
    			case 'LINK':
    			case 'URL':
    			case 'WIDTH':
    			case 'HEIGHT':
    			    if ( isset( $rss_image[$current_low] ) )
     				{
     					$rss_image[$current_low] .= $data;
     				}
     				else
     				{
     					$rss_image[$current_low] = $data;
      				}
					break;
			}
			break;

// CHANNEL
		default:
			switch($current)
			{

// Required channel elements
				case 'TITLE':
				case 'LINK':
				case 'DESCRIPTION':

// Optional channel elements 
				case 'LANGUAGE':
				case 'COPYRIGHT':
				case 'MANAGINGEDITOR':
				case 'WEBMASTER':
				case 'PUBDATE':
				case 'LASTBUILDDATE':
				case 'CATEGORY':
				case 'GENERATOR':
				case 'DOCS':
				case 'CLOUD':
				case 'TTL':
				case 'RATING':
				case 'TEXTINPUT':
				case 'SKIPHOURS':
				case 'SKIPDAYS':

// dc:xxx
//				case 'TITLE':
				case 'CREATOR':
				case 'SUBJECT':
//				case 'DESCRIPTION':
				case 'PUBLISHER':
				case 'CONTRIBUTOR':
				case 'DATE':
				case 'TYPE':
				case 'FORMAT':
				case 'IDENTIFIER':
				case 'SOURCE':
				case 'LANGUAGE':
				case 'RELATION':
				case 'COVERAGE':
				case 'RIGHTS':

				    if ( isset( $rss_channel[$current_low] ) )
     				{
     					$rss_channel[$current_low] .= $data;
     				}
     				else
     				{
     					$rss_channel[$current_low] = $data;
      				}
					break;
			}
			break;
	}

}

//---------------------------------------------------------
// start namespace handler
//---------------------------------------------------------
function rss_ns_start($parser, $prefix, $uri)
{
	global $rss_uris;
//	echo "nss;$prefix;$uri <br>\n";
	array_push($rss_uris, strtoupper($uri));
}

//---------------------------------------------------------
// end namespace handler
//---------------------------------------------------------
function rss_ns_end($parser, $prefix)
{
	global $rss_uris;
	array_pop($rss_uris);
}

//=========================================================
// class rss_parser
//=========================================================
class rss_parser extends rss_atom_parser_base
{
	var $parse_error;

//---------------------------------------------------------
// constructor
//---------------------------------------------------------
function rss_parser()
{
	rss_atom_parser_base::rss_atom_parser_base();
}

function &getInstance()
{
	static $instance;
	if (!isset($instance)) 
	{
		$instance = new rss_parser();
	}

	return $instance;
}

//---------------------------------------------------------
// get_channel
//---------------------------------------------------------
function get_channel()
{
  	global $rss_channel;
  	return $rss_channel;
}

//---------------------------------------------------------
// get_image
//---------------------------------------------------------
function get_image()
{
	global $rss_image;
	return $rss_image;
}

//---------------------------------------------------------
// get_items
//---------------------------------------------------------
function get_items()
{
	global $rss_items;
	return $rss_items;
}

//---------------------------------------------------------
// get_parse_error
//---------------------------------------------------------
function get_parse_error()
{
  return $this->parse_error;
}

//---------------------------------------------------------
// parse
//---------------------------------------------------------
function parse($data)
{
	global $rss_item_num, $rss_parent_num, $rss_parent, $rss_channel, $rss_image, $rss_items, $rss_uris;

// global
	$rss_channel    = array();
	$rss_image      = array();
	$rss_items      = array();
	$rss_item_num   = 0;
	$rss_parent     = array();
	$rss_parent[0]  = '';
	$rss_parent_num = 0;
	$rss_uris       = array();

	$this->parse_error = '';

	$xml_parser = xml_parser_create_ns("UTF-8");
	xml_set_element_handler($xml_parser, "rss_start_element", "rss_end_element");
	xml_set_character_data_handler($xml_parser, "rss_character_data");
	xml_set_start_namespace_decl_handler($xml_parser, "rss_ns_start");
	xml_set_end_namespace_decl_handler($xml_parser, "rss_ns_end");

	if (!xml_parse($xml_parser, $data, sizeof($data)))
	{
		$line  = xml_get_current_line_number($xml_parser);
		$error = xml_error_string(xml_get_error_code($xml_parser));

		if ($line == 1)
		{
			$this->parse_error = 'XML error at line 1, check URL';
		}
		else
		{
			$this->parse_error = sprintf('XML error: %s at line %d', $error, $line );
		}

		xml_parser_free($xml_parser);
		return false;
	}

	xml_parser_free($xml_parser);
  
	if (empty($rss_channel))
	{
  		$this->parse_error = 'not RSS format';
		return false;
	}

	return true;
}

//=========================================================
// output option
//=========================================================

//---------------------------------------------------------
// arrange_items
//---------------------------------------------------------
function arrange_items($items, $site_title='', $site_url='')
{
	$count = count($items);
	if ($count <= 0)  return false;

	for ($i=0; $i<$count; $i++)
	{
		$item = $items[$i];

// view
		$unixtime = $this->get_unixtime( $item );
		$items[$i]['unixtime'] = $unixtime;

		$items[$i]['date'] = '';
		if ($unixtime)
		{
			$items[$i]['date'] = $this->format_timestamp($unixtime, "s");
		}

		$items[$i]['summary'] = $this->get_summary( $item );

// save
		$items[$i]['url']           = $item['link'];
		$items[$i]['time_modified'] = $unixtime;
		$items[$i]['time_issued']   = $unixtime;

		if ( isset($item['creator']) )
		{	$items[$i]['author_name'] = $item['creator'];	}

		if ( isset($item['author']) )
		{	$items[$i]['author_email'] = $item['author'];	}

		if ($site_title)
		{	$items[$i]['site_title'] = $site_title;	}

		if ($site_url)
		{	$items[$i]['site_url'] = $site_url;	}

// view
		if ($this->mode_arrange)
		{
			$items[$i]['content'] = $this->get_content( $item );
		}
// save
		else
		{
			$items[$i]['content'] = $this->get_raw_content( $item );
		}

	}

	return $items;
}

//---------------------------------------------------------
// get_unixtime
//---------------------------------------------------------
function get_unixtime($item)
{
	$unixtime = 0;

	if ( isset( $item['date'] ) )
	{
		$datetime = $item['date'];
		$time_arr = $this->parse_w3cdtf($datetime);
		$unixtime = $time_arr['timestamp'];
	}
	elseif ( isset( $item['pubdate'] ) )
	{
		$datetime = $item['pubdate'];
		$unixtime = $this->get_unixtime_rfc822( $datetime );
	}

	return $unixtime;
}

//---------------------------------------------------------
// get_content
//---------------------------------------------------------
function get_raw_content($item)
{
	$content= '';
	if ( isset($item['encoded']) )
	{
		$content = $item['encoded'];
	}
	elseif ( isset($item['description']) )
	{
		$content = $item['description'];
	}

	return $content;
}

//---------------------------------------------------------
// get_summary
//---------------------------------------------------------
function get_raw_summary($item)
{
	$summary = $this->get_raw_content( $item );
	$summary = $this->prepare_strip_tags( $summary );
	$summary = strip_tags( $summary );

	return $summary;
}

//=========================================================
// print option
//=========================================================

// --- class end ---
}

?>
