<?php
// $Id: class.rss_atom_parser_base.php,v 1.1.1.1 2005/02/16 17:32:31 ohwada Exp $

// 2005-01-20 K.OHWADA
// add set_mode_content()

// 2005-01-01 K.OHWADA
// add get_title()

//=========================================================
// class rss_atom_parser_base
// divid from class.rss_paser.php
// for PHP gennerally
// 2004-11-28 K.OHWADA
//=========================================================
class rss_atom_parser_base
{
	var $xml_data;
	var $xml_error;

	var $mode_title;
	var $mode_content;
	var $mode_arrange;
	var $max_content;
	var $max_summary;
	var $max_item;
	var $title_default;
	var $target;

	var $flag_print_error;

// multibyte
	var $user_encode;


//---------------------------------------------------------
// constructor
//---------------------------------------------------------
function rss_atom_parser_base()
{
	$this->set_mode_title(   0 );	// not allow HTML tag
	$this->set_mode_content( 0 );	// not allow HTML tag
	$this->set_mode_arrange( 0 );	// raw data
	$this->set_max_content( 100 );
	$this->set_max_summary( 100 );

	$this->set_max_item( 10 );
	$this->set_title_default( "---" );
	$this->set_target();

// for debug
	$this->flag_print_error = 1;

// for Japanese
	$this->set_user_encode( "EUC-JP" );
}

//=========================================================
// parse
//=========================================================

//---------------------------------------------------------
// parse_url
//---------------------------------------------------------
function parse_url($url)
{
	$ret = $this->read_remote_xml($url);
	if (!$ret)
	{
		if ($this->flag_print_error)
		{
			echo "<font color='red'>".$this->xml_error."</font><br />\n";
		}
	 	return false;
	}

	return $this->parse_data($this->xml_data);
}

function parse_data($xml_data)
{
	$ret = $this->parse($xml_data);
	if (!$ret)
	{
		if ($this->flag_print_error)
		{
			$error = $this->get_parse_error();
			echo "<font color='red'>".$error."</font><br />\n";
		}
		return false;
	}

	return true;
}

function parse($xml_data)
{
// dummy : overrid
}

function get_parse_error()
{
// dummy : overrid
}

//=========================================================
// input option
//=========================================================

//---------------------------------------------------------
// read_find_rss_atom_link
//---------------------------------------------------------
function read_find_rss_atom_link($url_html)
{
	$ret_false = array('', '');

	$data_html = $this->read_remote_file($url_html);
	if ( empty($data_html) )  return $ret_false;

	return  $this->find_rss_atom_link($data_html);
}

//---------------------------------------------------------
// read_remote_xml
//---------------------------------------------------------
function read_remote_xml($url_rss)
{
	$this->xml_error = '';

	if (empty($url_rss))
	{
  		$this->xml_error = "empty XML url";
  		return false;
	}

	$xml_data = $this->read_remote_file($url_rss);
  	if ($xml_data == false)
  	{
  		$this->xml_error = "can't open XML url: $url_rss";
  		return false;
  	}

  	if (empty($xml_data))
  	{
  		$this->xml_error = "no XML data";
  		return false;
  	}

	$encode = $this->find_encoding($xml_data);
	if ( $encode && ( strtoupper($encode) != "UTF-8" ))
	{
		$xml_data = $this->convert_encode_to_utf8($xml_data, $encode);
	}

	$xml_data = $this->strip_control_code($xml_data);
	$this->xml_data = $xml_data;

	return $xml_data;
}

// --------------------------------------------------------
// strip control code
// except 0A,0D (return code)
// --------------------------------------------------------
function strip_control_code($text)
{
	$text = preg_replace('/[\x00-\x09]/',' ',$text);
	$text = preg_replace('/[\x0B-\x0C]/',' ',$text);
	$text = preg_replace('/[\x0E-\x1F]/',' ',$text);
	$text = preg_replace('/[\x7F]/',     ' ',$text);
	return $text;
}

//=========================================================
// print option
//=========================================================

//---------------------------------------------------------
// print_list_with_channel_by_url
//---------------------------------------------------------
function print_list_by_url($url, $num=3)
{
	$ret = $this->read_remote_xml($url);
	if (!$ret)
	{
		if ($this->flag_print_error)
		{
			echo "<font color='red'>".$this->error."<br />\n";
		}
	 	return false;
	}

	$this->parse_data($this->xml_data);

  	$channel = $this->get_channel();
// 	$image   = $this->get_image();
  	$items   = $this->get_items();

	$channel = $this->convert_array_from_utf8( $channel );
//	$image   = $this->convert_array_from_utf8( $image );
	$items   = $this->convert_array_array_from_utf8( $items );

	$this->set_target("_blank");

  	print "<h4>";
  	$this->print_channel($channel);
  	print "</h4>\n";

  	$this->print_items($items, 1,      $num,            1);
  	$this->print_items($items, $num+1, $this->max_item, 0);
}

//---------------------------------------------------------
// print_channel
//---------------------------------------------------------
function print_channel($channel)
{
	print $this->get_title_link($channel);
}

//---------------------------------------------------------
// print_items
//---------------------------------------------------------
function print_items($items, $start=1, $end=100, $flag_cont=0)
{

	$start = $start - 1;
	if ( $start < 0 ) $start = 0;

	for ($i=$start; $i<count($items); $i++)
  	{
	   	if ($i >= $end ) break;
    	$this->print_item($items[$i], $flag_cont);
  	}
}

//---------------------------------------------------------
// print_item
//---------------------------------------------------------
function print_item($item, $flag_cont=0)
{
	print "<p id='rss'>";
	print $this->get_title_link( $item );
	print " "; 
	print $this->get_date_parenthesis( $item );
	print "</p>\n";

	if ($flag_cont)
	{
		print "<font size='-1'>";
		print $this->get_content( $item );
		print "</font><br>\n";
	}
}

//---------------------------------------------------------
// get_title_link
//---------------------------------------------------------
function get_title_link($arr)
{
	$title = $this->get_title($arr);
	$link  = htmlspecialchars( $arr['link'] );

	$target = '';
  	if ($this->target)
  	{
  		$target = "target=".$this->target;
	}

	$title_link = "<a href='$link' $target>$title</a>";
	return $title_link;
}

function get_title($arr)
{
	if ( $arr['title'] )
	{
		$title = $arr['title'];

// some site have title with html tag
		if ( !$this->mode_title )
		{
			$title = strip_tags( $title );
			$title = htmlspecialchars( $title );
		}
	}
	else
	{
		$title = $this->title_default;
	}

	return $title;
}

//---------------------------------------------------------
// get_date_parenthesis
//---------------------------------------------------------
function get_date_parenthesis($item)
{
	$unixtime = $this->get_unixtime( $item );
	$date     = $this->format_timestamp($unixtime, "s");

	if ($date)
	{
		$date = "($date)"; 
	}

	return $date;	
}

//---------------------------------------------------------
// get_content
//---------------------------------------------------------
function get_content($item)
{
	$content = $this->get_raw_content($item);

	if ( !$this->mode_content )
	{
		$content = $this->prepare_strip_tags( $content );
		$content = strip_tags( $content );
		$content = $this->shorten_text($content, $this->max_content);
		$content = htmlspecialchars( $content );
	}

	return $content;
}

//---------------------------------------------------------
// get_summary
//---------------------------------------------------------
function get_summary($item)
{
	$summary = $this->get_raw_summary($item);
	$summary = $this->shorten_text($summary, $this->max_summary);
	$summary = htmlspecialchars( $summary );

	return $summary;
}

function prepare_strip_tags($text)
{
	$text = str_replace(">",   "> ", $text);
	return $text;
}

//---------------------------------------------------------
// get_xxx
// overrid
//---------------------------------------------------------
function get_channel()
{
// dummy : overrid
}

function get_image()
{
// dummy : overrid
}

function get_items()
{
// dummy : overrid
}

function get_raw_content($item)
{
// dummy : overrid
}

function get_raw_sumary($item)
{
// dummy : overrid
}

function get_unixtime($item)
{
// dummy : overrid
}

//=========================================================
// set and get property
//=========================================================
function set_mode_title($value)
{
	$this->mode_title = intval($value);
}

function set_mode_content($value)
{
	$this->mode_content = intval($value);
}

function set_mode_arrange($value)
{
	$this->mode_arrange = intval($value);
}

function set_max_content($value)
{
	$this->max_content = intval($value);
}

function set_max_summary($value)
{
	$this->max_summary = intval($value);
}

function set_max_item($value)
{
	$this->max_item = intval($value);
}

function set_title_default($value='')
{
	$this->title_default = $value;
}

function set_target($value='')
{
	$this->target = $value;
}

function set_user_encode($value)
{
	$this->user_encode = $value;
}

function get_error()
{
	return $this->error;
}

//=========================================================
// convert_language
//=========================================================

//---------------------------------------------------------
// convert_array_array_from_utf8
//---------------------------------------------------------
function convert_array_array_from_utf8($in_arr)
{
	$out_arr = array();

	foreach ($in_arr as $key => $value)
	{
		$out_arr[$key] = $this->convert_array_from_utf8($value);
	}

	return $out_arr;
}

//---------------------------------------------------------
// convert_array_from_utf8
//---------------------------------------------------------
function convert_array_from_utf8($in_arr)
{
	$out_arr = array();

	foreach ($in_arr as $key => $value)
	{
		$out_arr[$key] = $this->convert_from_utf8($value);
	}

	return $out_arr;
}

//=========================================================
// multibyte
//=========================================================

function convert_from_utf8($text)
{
	if ( function_exists('mb_convert_encoding') )
	{
		$text = mb_convert_encoding($text, $this->user_encode, "UTF-8");
	}
	else
	{
		$text = utf8_decode($text);
	}

	return $text;
}

function convert_encode_to_utf8($text, $encode)
{
	if ( function_exists('mb_convert_encoding') )
	{
		$text = mb_convert_encoding($text, "UTF-8", $encode);
	}
	else
	{
		$text = utf8_encode($text);
	}

	return $text;
}

function shorten_text($text, $max=100)
{
	if ( strlen($text) > $max)
	{
		if (function_exists('mb_strimwidth'))
		{
			$text = mb_strimwidth( $text, 0, $max, " ..." );
		}
		else
		{
			$text = substr( $text, 0, $max )." ...";
		}
	}

	return $text;
}


//=========================================================
// find in HTML & XML
//=========================================================

//---------------------------------------------------------
// get html content by url 
//---------------------------------------------------------
function read_remote_file($url)
{
	$fp = fopen($url,"r");
	if (!$fp) return false;

	$text = '';
	while ( !feof($fp) )
	{
		$text .= fgets($fp,4096);
	}

	return $text;
}

//---------------------------------------------------------
// find_rss_atom_link in HTML
//---------------------------------------------------------
// <link rel="alternate" type="application/rss+xml" title="RSS" href="xxx" /> 
//---------------------------------------------------------
function find_rss_atom_link($html)
{
	$href_rss  = '';
	$href_atom = '';

// save all <link> tags
	preg_match_all('/<link\s+(.*?)\s*\/?>/si', $html, $match);
	$link_tag_arr = $match[1];

	$link_arr = array();
	$link_tag_count = count($link_tag_arr);

// store each <link> tags's attributes
	for($i=0; $i<$link_tag_count; $i++)
	{
		$attr_wk_arr   = array();
		$link_attr_arr = preg_split('/\s+/s', $link_tag_arr[$i]);

		foreach($link_attr_arr as $link_attr)
		{
			$link_attr_pair = preg_split('/\s*=\s*/s', $link_attr, 2);

			if( isset($link_attr_pair[0]) && isset($link_attr_pair[1]) )
			{
				$key   = $link_attr_pair[0];
				$value = $link_attr_pair[1];
				$key   = strtolower( $key );
				$value = preg_replace('/([\'"]?)(.*)\1/', '$2', $value);
				$attr_wk_arr[$key] = $value;
			}
		}

		$link_arr[$i] = $attr_wk_arr;
	}

// find the link file
	for($i=0; $i<$link_tag_count; $i++)
	{
		if ( !isset($link_arr[$i]['rel']) )   continue;
		if ( !isset($link_arr[$i]['type']) )  continue;
		if ( !isset($link_arr[$i]['href']) )  continue;

		$rel  = strtolower( $link_arr[$i]['rel'] );
		$type = strtolower( $link_arr[$i]['type'] );
		$href = $link_arr[$i]['href'];

		if ( $rel != 'alternate')  continue;

		if (empty($href_rss) && ($type == 'application/rss+xml'))
		{
			$href_rss = $href;
		}
		elseif (empty($href_atom) && ($type == 'application/atom+xml'))
		{
			$href_atom = $href;
		}
	}

	return array($href_rss, $href_atom);
}

//---------------------------------------------------------
// find_encoding in XML
//---------------------------------------------------------
function find_encoding($text)
{
	$encode = '';

	preg_match('/<\?xml(.*?)\?>/si', $text, $match1);
	$xml = $match1[1];

	if ( preg_match('/encoding="(.*?)"/si', $xml, $match2) )
	{
		$encode = $match2[1];
	}

	return $encode;
}

//=========================================================
// datetime
//=========================================================

//--------------------------------------------------------
// formatted times
// port from xoops functions.php
//--------------------------------------------------------
function format_timestamp($unixtime, $format="l")
{
	switch (strtolower($format)) 
	{

// 2001-01-02 03:04:05
	case 'mysql':
		$datestring = "Y-m-d H:i:s";
		break;

// 2001-01-02 03:04
	case 'm':
		$datestring = "Y-m-d H:i";
		break;

// 2001-01-02
	case 's':
		$datestring = "Y-m-d";
		break;

// RFC 822 : Thu, 21 Dec 2000 16:01:07 +0200
	case 'r':
		$datestring = "r";
		break;

	default:
		if ($format != '') {
			$datestring = $format;
		} else {
			$datestring = "Y-m-d H:i:s";
		}
		break;
	}

	$date = date($datestring, $unixtime);
	return $date;
}


//--------------------------------------------------------
// get unixtime from RFC822
//--------------------------------------------------------
function get_unixtime_rfc822( $datetime )
{
	$unixtime = strtotime($datetime);

// maybe undefined time zone
	if ($unixtime == -1)
	{

// delete time zone
		$datetime = preg_replace("/ [a-zA-Z]{3,}$/", '', $datetime);
		$unixtime = strtotime( $datetime );

// give up
		if ($unixtime == -1)
		{
			$unixtime = 0;
		}
	}

	return $unixtime;
}

// -------------------------------------------------------------------------
// http://www.arielworks.net/articles/2004/0224c/
// array parse_w3cdtf(string datetime)
// -------------------------------------------------------------------------
// http://www.w3.org/TR/NOTE-datetime
//  Year:
//      YYYY (eg 1997)
//   Year and month:
//      YYYY-MM (eg 1997-07)
//   Complete date:
//      YYYY-MM-DD (eg 1997-07-16)
//   Complete date plus hours and minutes:
//      YYYY-MM-DDThh:mmTZD (eg 1997-07-16T19:20+01:00)
//   Complete date plus hours, minutes and seconds:
//      YYYY-MM-DDThh:mm:ssTZD (eg 1997-07-16T19:20:30+01:00)
//   Complete date plus hours, minutes, seconds and a decimal fraction of a second
//      YYYY-MM-DDThh:mm:ss.sTZD (eg 1997-07-16T19:20:30.45+01:00)
// -------------------------------------------------------------------------
function parse_w3cdtf($datetime)
{

// 2004-10-24 K.OHWADA
// suppress warning
	$year     = 0;
    $month    = 0;
    $day      = 0;
    $hour     = 0;
    $minute   = 0;
    $second   = 0;
    $fraction = 0;
    $timezone = 0;
	$offset_sign   = 0;
    $offset_hour   = 0;
    $offset_minute = 0;

    // Year
    if(preg_match("/^(\d{4})$/", $datetime, $val)) {
        $year = $val[1];

    // Year and month
    } elseif(preg_match("/^([0-9]{4})-(0[1-9]|1[0-2])$/", $datetime, $val)) {
        $year = $val[1];
        $month = $val[2];

    // Complete date
    } elseif(preg_match("/^([0-9]{4})-(0[1-9]|1[0-2])-(0[1-9]|[1-2][0-9]|3[0-1])$/", $datetime, $val)) {
        $year = $val[1];
        $month = $val[2];
        $day = $val[3];

    // Complete date plus hours and minutes
    } elseif(preg_match("/^([0-9]{4})-(0[1-9]|1[0-2])-(0[1-9]|[1-2][0-9]|3[0-1])T([0-5][0-9]):([0-5][0-9])(Z|(\+|-)[0-5][0-9]:[0-5][0-9])$/", $datetime, $val)) {
        $year = $val[1];
        $month = $val[2];
        $day = $val[3];
        $hour = $val[4];
        $minute = $val[5];
        $timezone = $val[6];

    // Complete date plus hours, minutes and seconds
    } elseif(preg_match("/^([0-9]{4})-(0[1-9]|1[0-2])-(0[1-9]|[1-2][0-9]|3[0-1])T([0-5][0-9]):([0-5][0-9]):([0-5][0-9])(Z|(\+|-)[0-5][0-9]:[0-5][0-9])$/", $datetime, $val)) {
        $year = $val[1];
        $month = $val[2];
        $day = $val[3];
        $hour = $val[4];
        $minute = $val[5];
        $second = $val[6];
        $timezone = $val[7];

    // Complete date plus hours, minutes, seconds and a decimal fraction of a second
    } elseif(preg_match("/^([0-9]{4})-(0[1-9]|1[0-2])-(0[1-9]|[1-2][0-9]|3[0-1])T([0-5][0-9]):([0-5][0-9]):([0-5][0-9]).([0-9]+)(Z|(\+|-)[0-5][0-9]:[0-5][0-9])$/", $datetime, $val)) {
        $year = $val[1];
        $month = $val[2];
        $day = $val[3];
        $hour = $val[4];
        $minute = $val[5];
        $second = $val[6];
        $fraction = $val[7];
        $timezone = $val[8];

    // Not W3C-DTF
    } else {
        return false;
    }

    // Offset of Timezone for gmmktime()
    if($timezone != "Z") {
        $offset_sign = substr($timezone, 0, 1);
        $offset_hour = substr($timezone, 1, 2);
        $offset_minute = substr($timezone, 4, 2);
    }

    $timestamp = gmmktime($hour - ($offset_sign . $offset_hour), $minute - ($offset_sign . $offset_minute), $second, $month, $day, $year);

    $result = array("year" => $year, "month" => $month, "day" => $day,
                    "hour" => $hour, "minute" => $minute, "second" => $second,
                    "fraction" => $fraction, "timezone" => $timezone, "timestamp" => $timestamp);

    return $result;
}

// --- class end ---
}

?>
