<?php
// $Id: rssc_refresh_handler.php,v 1.13 2006/11/09 16:40:37 ohwada Exp $

// 2006-11-08 K.OHWADA
// add set_proxy()

// 2006-09-20 K.OHWADA
// add refresh_link_for_add_link()
// use RSSC_CODE_DB_ERROR
// use RSSC_CODE_PARSE_NOT_READ_XML

// 2006-07-18 K.OHWADA
// REQ 4146: some feed have no link

// 2006-07-10 K.OHWADA
// use happy_linux_error happy_linux_strings
// store image to channel
// change _parse_xml_by_url(), _update_link()

// 2006-06-04 K.OHWADA
// use link_basic feed_basic handler
// suppress notice : Only variable references should be returned by reference

// 2006-01-20 K.OHWADA
// small change

//=========================================================
// Rss Center Module
// 2006-01-01 K.OHWADA
//=========================================================

// === class begin ===
if( !class_exists('rssc_refresh_handler') ) 
{

//=========================================================
// class rssc_refresh_handler
// this class is used by command line
//=========================================================
class rssc_refresh_handler extends happy_linux_error
{
// handler
	var $_config_handler;
	var $_link_handler;
	var $_feed_handler;
	var $_black_handler;
	var $_white_handler;
	var $_parse_handler;

// class instance
	var $_xml_utility;
	var $_strings;

// object
	var $_link_obj;
	var $_parse_obj;

// black & white table
	var $_black_list_flag = false;
	var $_white_list_flag = false;
	var $_black_list = array();
	var $_white_list = array();

// result
	var $_parsed_data = array();
	var $_items_for_store = array();
	var $_rdf_url;
	var $_rss_url;
	var $_atom_url;
	var $_xml_data;
	var $_xml_encoding;
	var $_rssurl_list;

	var $_parse_error_code = 0;
	var $_parse_result     = null;

// basic config
//	var $_sel_rss_atom = RSSC_C_SEL_ATOM;

// set parameter
	var $_xml_mode  = 0;

// debug
	var $_flag_link_update    = true;	// update
	var $_flag_feed_update    = true;	// update
	var $_flag_force_discover = false;
	var $_flag_force_refresh  = false;

	var $_flag_debug_parse = false;
	var $_debug_xml_url    = '';
	var $_debug_encoding   = '';
	var $_debug_xml_mode   = '';


//---------------------------------------------------------
// constructor
//---------------------------------------------------------
function rssc_refresh_handler( $dirname )
{
	$this->happy_linux_error();

// handler
	$this->_config_handler =& rssc_get_handler('config_basic', $dirname);
	$this->_link_handler   =& rssc_get_handler('link_basic',   $dirname);
	$this->_feed_handler   =& rssc_get_handler('feed_basic',   $dirname);
	$this->_black_handler  =& rssc_get_handler('black_basic',  $dirname);
	$this->_white_handler  =& rssc_get_handler('white_basic',  $dirname);
	$this->_parse_handler  =& rssc_parse_handler::getInstance();

// class instance
	$this->_xml_utility  =& rssc_xml_utility::getInstance();
	$this->_strings      =& happy_linux_strings::getInstance();

	$this->_init_param();
}

//=========================================================
// public
//=========================================================

//---------------------------------------------------------
// refresh_link_for_add_link
// return code
// RSSC_CODE_PARSE_MSG:     set by this functuon
// RSSC_CODE_PARSE_FAILED:  set by _parse_xml_by_url()
// RSSC_CODE_REFRESH_ERROR: set by refresh()
// RSSC_CODE_DB_ERROR:      set by _update_link() _update_feed() etc
//---------------------------------------------------------
function refresh_link_for_add_link( $lid )
{
	$ret = $this->refresh( $lid );
	if ( !$ret )
	{
		return $this->getErrorCode();
	}

	if ( $this->_parse_result )
	{
		return RSSC_CODE_PARSE_MSG;
	}

	return 0;
}

function get_parse_error_code()
{
	return $this->_parse_error_code;
}

function get_parse_result()
{
	return $this->_parse_result;
}

//---------------------------------------------------------
// refresh one link
//---------------------------------------------------------
function refresh($lid)
{
	$this->_set_log_func_name('refresh');

	$link_obj =& $this->_link_handler->get_object_by_id($lid);
	if ( !is_object($link_obj) )
	{
		$this->_set_errors( "no link record: lid = $lid" );
		return false;
	}

	$ret = $this->refresh_by_obj($link_obj);
	if ( !$ret )
	{
		if ( $this->getErrorCode() == 0 )
		{
			$this->_set_error_code( RSSC_CODE_REFRESH_ERROR );
		}
	}

	return $ret;
}

//---------------------------------------------------------
// entry point for refresh all
//---------------------------------------------------------
function refresh_by_obj($link_obj)
{
	$this->_set_log_func_name('refresh_by_obj');

// save object
	$this->_link_obj = $link_obj;

	$lid = $link_obj->get('lid');
	$flag_expired = $link_obj->refresh_expired();

	$this->_clear_count();

	if ( $this->_flag_force_refresh || $flag_expired )
	{
		if ( !$this->refreshXmlUrl($lid) )
		{
			return false;
		}

		if ( !$this->refreshArchive($lid) )
		{
			return false;
		}
	}

	return true;
}

//---------------------------------------------------------
// refresh 
//---------------------------------------------------------
function refreshXmlUrl($lid)
{
	$this->_set_log_func_name('refreshXmlUrl');

// get new object
	$link_obj =& $this->get_link($lid);
	if ( !is_object($link_obj) )
	{
		return false;
	}

	$link_mode = $link_obj->get('mode');
	$link_url  = $link_obj->get('url');

// RSS auto discovary
	if ( $this->_flag_force_discover || ( $link_mode == RSSC_C_MODE_AUTO ) )
	{
		if ( !$this->discoverXmlUrl($link_url) )
		{
			return false;
		}

		if ( $this->_xml_mode )
		{
			if ( !$this->updateXmlUrl($lid, $this->_xml_mode, $this->_rdf_url, $this->_rss_url, $this->_atom_url) )
			{
				$this->_set_errors( 'cannot update xmlurl' );
				return false;
			}
		}
	}

	return true;
}

function refreshArchive($lid)
{
	$this->_set_log_func_name('refreshArchive');

// get new  object, if auto discovary
	$link_obj =& $this->get_link($lid);
	if ( !is_object($link_obj) )
	{
		return false;
	}

	$link_mode     = $link_obj->get('mode');
	$link_encoding = $link_obj->get('encoding');
	$xml_url       = $link_obj->get_rssurl_by_mode();

	if ( $this->_flag_debug_parse )
	{
		$link_mode     = $this->_debug_xml_mode;
		$link_encoding = $this->_debug_encoding;
		$xml_url       = $this->_debug_xml_url;
	}

// check mode to exist rss url
	if ( ($link_mode != RSSC_C_MODE_RDF) && ($link_mode != RSSC_C_MODE_RSS) && ($link_mode != RSSC_C_MODE_ATOM) )
	{
		return true;	// not execute
	}

// get and parse XML
	if ( !$this->parseXmlByUrl($xml_url, $link_encoding, $link_mode) )
	{
		return false;
	}

	if ( empty($link_encoding) )
	{
		if ( !$this->updateLinkEncoding($lid, $this->_xml_encoding) )
		{
			$this->_set_errors( 'cannot update link encoding' );
			return false;
		}
	}

// update archive
	if ( $this->_flag_link_update )
	{
		if ( !$this->updateLink($lid, $this->_parsed_data, $this->_xml_data, time() ) )
		{
			$this->_set_errors( 'cannot update link' );
			return false;
		}
	}

	if ( $this->_flag_feed_update )
	{
		if ( !$this->updateFeeds($lid, $this->_items_for_store, $this->_xml_data, $this->_xml_encoding, time() ) )
		{
			$this->_set_errors( 'cannot update feeds' );
			return false;
		}
	}

	return true;
}

function discoverXmlUrl($html_url, $sel_rss_atom='')
{
	$ret = $this->_discover_rssurl($html_url, $sel_rss_atom);
	return $ret;
}

function updateXmlUrl($lid, $rss_mode, $rdf_url, $rss_url, $atom_url)
{
	$ret = $this->_update_link_xmlurl($lid, $rss_mode, $rdf_url, $rss_url, $atom_url);
	return $ret;
}

function parseXmlByUrl($xml_url, $xml_encoding='', $xml_mode=0)
{
	$ret = $this->_parse_xml_by_url($xml_url, $xml_encoding, $xml_mode);
	return $ret;
}

function updateLinkEncoding($lid, $xml_encoding)
{
	$ret = $this->_update_link_encoding($lid, $xml_encoding);
	return $ret;
}

function updateLink($lid, $parsed_data, $xml_data, $updated='')
{
	$ret = $this->_update_link($lid, $parsed_data, $xml_data, $updated);
	return $ret;
}

function updateFeeds($lid, $items_for_store, $xml_data, $xml_encoding, $updated='')
{
	$ret = $this->_update_feeds($lid, $items_for_store, $xml_data, $xml_encoding, $updated);
	return $ret;
}

//---------------------------------------------------------
// get link
//---------------------------------------------------------
function &get_link($lid)
{
	if ( isset($this->_link_obj) && is_object($this->_link_obj) )
	{
		$link_obj = $this->_link_obj;
	}
	else
	{
		$link_obj =& $this->_link_handler->get_object_by_id($lid);

		if ( is_object($link_obj) )
		{
// save object
			$this->_link_obj =& $link_obj;
		}
		else
		{
			$this->_set_errors( "no link record: lid = $lid" );
		}

	}

	return $link_obj;
}

//---------------------------------------------------------
// get result
//---------------------------------------------------------
function &getData()
{
	return $this->_parsed_data;
}

function get_count_feed()
{
	$ret = $this->_feed_handler->get_count_refresh_feed();
	return $ret;
}

//---------------------------------------------------------
// set and get property
// for xml_utility
//---------------------------------------------------------
function setPriorityRssAtom($value)
{
	$this->_xml_utility->set_priority($value);
}

//function setRssParser($value)
//{
//	$this->_xml_utility->set_rss_parser($value);
//}

//function setAtomParser($value)
//{
//	$this->_xml_utility->set_atom_parser($value);
//}

function set_proxy( $host, $port='8080', $user='', $pass='' )
{
	$this->_xml_utility->set_proxy( $host, $port, $user, $pass );
}

//---------------------------------------------------------
// set debug parameter
//---------------------------------------------------------
function set_link_update($value)
{
	$this->_flag_link_update = (bool)$value;
}

function set_link_xml_save($value)
{
	$this->_link_handler->set_xml_save($value);
}

function set_feed_update($value)
{
	$this->_flag_feed_update = (bool)$value;
}

function set_force_discover($value)
{
	$this->_flag_force_discover = (bool)$value;
}

function set_force_refresh($value)
{
	$this->_flag_force_refresh = (bool)$value;
}

function set_force_overwrite($value)
{
	$this->_feed_handler->set_force_overwrite($value);
}

function set_debug_parse($flag, $url='', $encoding='', $mode='')
{
	$this->_flag_debug_parse = (bool)$flag;
	$this->_debug_xml_url    = $url;
	$this->_debug_encoding   = $encoding;
	$this->_debug_xml_mode   = $mode;
}

//=========================================================
// override
//=========================================================
function set_debug_print_log($value)
{
	$value = (bool)$value;
	$this->_flag_debug_print_log = $value;
	$this->_link_handler->set_debug_print_log($value);
	$this->_feed_handler->set_debug_print_log($value);
	$this->_xml_utility->set_debug_print_log($value);
}

function set_debug_print_error($value)
{
	$value = (bool)$value;
	$this->_flag_debug_print_error = $value;
	$this->_link_handler->set_debug_print_error($value);
	$this->_feed_handler->set_debug_print_error($value);
	$this->_xml_utility->set_debug_print_error($value);
}

//=========================================================
// private
//=========================================================
//---------------------------------------------------------
// initial
//---------------------------------------------------------
function _clear_count()
{
	$this->_set_log_func_name('_clear_count');

	$this->_feed_handler->clear_count();
}

//---------------------------------------------------------
// config
//---------------------------------------------------------
function _init_param()
{
	$conf_data =& $this->_config_handler->get_conf();

	$this->set_link_xml_save(  $conf_data['basic_xml_save'] );

// xml utility
	$this->setPriorityRssAtom( $conf_data['basic_rss_atom'] );
//	$this->setRssParser(  $conf_data['basic_parser_rss'] );
//	$this->setAtomParser( $conf_data['basic_parser_atom'] );

// proxy server
	if ( $conf_data['proxy_use'] )
	{
		$this->set_proxy( $conf_data['proxy_host'], $conf_data['proxy_port'], $conf_data['proxy_user'] , $conf_data['proxy_pass']  );
	}
}

//---------------------------------------------------------
// discover RSS URL
//---------------------------------------------------------
function _discover_rssurl($html_url, $sel='')
{
	$this->_set_log_func_name('_discover_rssurl');

	if ( !$this->_xml_utility->discover($html_url, $sel) )
	{
		$this->_set_errors( "cannot discover xml link" );
		$this->_set_errors( $this->_xml_utility->getErrors() );
		return false;
	}

	$this->_xml_mode = $this->_xml_utility->get_xml_mode();
	$this->_rdf_url  = $this->_xml_utility->get_rdf_url();
	$this->_rss_url  = $this->_xml_utility->get_rss_url();
	$this->_atom_url = $this->_xml_utility->get_atom_url();

	return true;
}

//---------------------------------------------------------
// update XmlUrl
//---------------------------------------------------------
function _update_link_xmlurl($lid, $rss_mode, $rdf_url, $rss_url, $atom_url)
{
	$ret = $this->_link_handler->update_xml_url($lid, $rss_mode, $rdf_url, $rss_url, $atom_url);
	if ( !$ret )
	{
		$this->_set_error_code( RSSC_CODE_DB_ERROR );
		$this->_set_errors( $this->_link_handler->getErrors() );
		return false;
	}

	unset( $this->_link_obj );
	return $ret;
}

//---------------------------------------------------------
// parse XML
//---------------------------------------------------------
function _parse_xml_by_url($xml_url, $xml_encoding='', $xml_mode=0)
{
	$this->_set_log_func_name('_parse_xml_by_url');

	$parse_obj =& $this->_parse_handler->parse_by_url($xml_url, $xml_encoding);
	if ( !is_object($parse_obj) )
	{
		$this->_parse_error_code = $this->_parse_handler->getErrorCode();
		switch ($this->_parse_error_code)
		{
			case RSSC_CODE_PARSE_NOT_READ_XML_URL:
				$code = RSSC_CODE_PARSE_NOT_READ_XML_URL;
				break;

			case RSSC_CODE_PARSE_FAILED:
			case RSSC_CODE_PARSE_NOT_FIND_ENCODING:
			default:
				$code = RSSC_CODE_PARSE_FAILED;
				break;
		}

		$this->_set_error_code( $code );
		$this->_set_errors( $this->_parse_handler->getErrors() );
		return false;
	}

	$this->_parse_result = $this->_parse_handler->get_parse_result();
	$this->_xml_data     = $this->_parse_handler->get_xml_data();
	$this->_parse_obj       = $parse_obj;
	$this->_parsed_data     = $parse_obj->get_converted_data();
	$this->_items_for_store = $parse_obj->get_items();

	if ( $xml_encoding )
	{
		$this->_xml_encoding = $xml_encoding;
	}
	else
	{
		$this->_xml_encoding = $this->_parse_handler->get_xml_encoding();
	}

	return true;
}

//---------------------------------------------------------
// update link encoding
//---------------------------------------------------------
function _update_link_encoding($lid, $encoding)
{
	$ret = $this->_link_handler->update_encoding($lid, $encoding);
	if ( !$ret )
	{
		$this->_set_error_code( RSSC_CODE_DB_ERROR );
		$this->_set_errors( $this->_link_handler->getErrors() );
		return false;
	}

	unset( $this->_link_obj );
	return $ret;
}

//---------------------------------------------------------
// update archive
//---------------------------------------------------------
function _update_link($lid, $parsed_data, $xml_data, $updated='' )
{
	$channel = array();

// store channel image textinput to channel field
	if ( isset($parsed_data['channel']) && is_array($parsed_data['channel']) && ( count($parsed_data['channel']) > 0 ) )
	{
		$channel['channel'] = $parsed_data['channel'];
	}

	if ( isset($parsed_data['image']) && is_array($parsed_data['image']) && ( count($parsed_data['image']) > 0 ) )
	{
		$channel['image'] = $parsed_data['image'];
	}

	if ( isset($parsed_data['textinput']) && is_array($parsed_data['textinput']) && ( count($parsed_data['textinput']) > 0 )  )
	{
		$channel['textinput'] = $parsed_data['textinput'];
	}

	$ret = $this->_link_handler->update_xml($lid, $channel, $xml_data, $updated);
	if ( !$ret )
	{
		$this->_set_error_code( RSSC_CODE_DB_ERROR );
		$this->_set_errors( $this->_link_handler->getErrors() );
		return false;
	}

	unset( $this->_link_obj );
	return true;
}

function _update_feeds($lid, $items_for_store, $xml_data, $xml_encoding, $updated='' )
{
// get new object
	$link_obj =& $this->get_link($lid);
	if ( !is_object($link_obj) )
	{
		return false;
	}

// some rss has no feed
	if ( !is_array($items_for_store) || ( count($items_for_store) == 0 ) )
	{
		return true;
	}

	$uid  = $link_obj->get('uid');
	$mid  = $link_obj->get('mid');
	$p1   = $link_obj->get('p1');
	$p2   = $link_obj->get('p2');
	$p3   = $link_obj->get('p3');

	$this->_get_black_list();
	$this->_get_white_list();

	$flag_err = false;

// refresh ATOM feed
	foreach( $items_for_store as $item )
	{
		if ( !$this->_update_feed($lid, $uid, $mid, $p1, $p2, $p3, $item) )
		{
			$flag_err = true;
		}
	}

	if ( $flag_err )
	{
		return false;
	}

	return true;
}

function _update_feed($lid, $uid, $mid, $p1, $p2, $p3, $item)
{
// some feed has no link
	if ( empty($item['link']) )
	{
		$this->_set_errors( 'link is empty: title='.$item['title'] );
		return false;
	}

	if ( $this->_check_black( $item['link'] ) )
	{
		return true;	// no action
	}

	if ( !$this->_feed_handler->refresh($lid, $uid, $mid, $p1, $p2, $p3, $item) )
	{
		$this->_set_error_code( RSSC_CODE_DB_ERROR );
		$this->_set_errors( $this->_feed_handler->getErrors() );
		return false;
	}

	return true;
}

function _get_black_list()
{
	if ( !$this->_black_list_flag )
	{
		$this->_black_list_flag = true;
		$this->_black_list = $this->_black_handler->get_url_list();
	}
}

function _get_white_list()
{
	if ( !$this->_white_list_flag )
	{
		$this->_white_list_flag = true;
		$this->_white_list = $this->_white_handler->get_url_list();
	}
}

function _check_black($url)
{
	if ( count($this->_black_list) == 0 )
	{	return false;	}

	foreach ( $this->_black_list as $black_url )
	{
// match black list
		if ( preg_match("|$black_url|i", $url) )
		{
			if ( !$this->_check_white($url) )
			{
				$this->_set_log( "match black list: $url" );
				return true;
			}
		}
	}

	return false;
}

function _check_white($url)
{
	if ( count($this->_white_list) == 0 )
	{	return false;	}

	foreach ( $this->_white_list as $white_url )
	{
// match white list
		if ( preg_match("|$white_url|i", $url) )
		{
			$this->_set_log( "match white list: $url" );
			return true;
		}
	}

	return false;
}

// --- class end ---
}

// === class end ===
}

?>