<?php
class Webscraper {
  function Webscraper() {
    $CI =& get_instance();
    $CI->load->library('htmlscraping');
  }

  function getBlogsXMLFromURL($server_xml) {
    $server_id = (int)$server_xml['id'];
    $url = (string)$server_xml['bloglist_url'];

    $source_xml = $this->getXMLFromURL($url);
    //TODO はてな以外のサーバの場合に処理振り分け
    $blogs_xml = $this->makeBlogsXMLFromHatena($source_xml, $server_id);

    return $blogs_xml;
  }
  
  function getBlogTimeSeries($server_name, $blogurls) {
    switch($server_name) {
      case "hatena":
        $response = $this->getBlogTimeSeriesFromHatena($blogurls);
    }
    
    return $response;
  }
  
  function getBlogTimeSeriesFromHatena($blogurls) {
    //TODO 実装作業
    $CI =& get_instance();
    $CI->load->library('xmlrpc');
    $CI->xmlrpc->server('http://b.hatena.ne.jp/xmlrpc');
    $CI->xmlrpc->method('bookmark.getCount');
    
    $request = $blogurls;
    $CI->xmlrpc->request($request);
    
    if (!$CI->xmlrpc->send_request()) {
      //TODO エラー時の処理
      return $CI->xmlrpc->display_error();
    }
    
    return $CI->xmlrpc->display_response();
  }

  private function getXMLFromURL($url) {
    $CI =& get_instance();

    try {
      $xml = $CI->htmlscraping->getXmlObject($url);
    } catch (Exception $e) {
      header("$_SERVER[SERVER_PROTOCOL] 400 Bad Request");
      header('Content-Type: text/plain;charset=UTF-8');
      exit($e->getMessage());
    }

    return $xml;
  }

  private function makeBlogsXMLFromHatena($source_xml, $server_id) {
    $CI =& get_instance();
    $CI->load->helper('string');
    
    $blogs_xml = new SimpleXMLElement('<blogs></blogs>');
    $date_format = 'Y-m-d H:i:s';
    $url_pattern_author = '/\/[^\.]+\//';

    $server_update = strtotime($source_xml['updated']);

    foreach($source_xml->weblog as $xml_weblog) {
      $blog_xml = $blogs_xml->addChild('blog');

      $blog_update = $server_update - (string)$xml_weblog['when'];
      $blog_update_string = date($date_format, $blog_update);

      $title = (string)$xml_weblog['name'];
      $url = (string)$xml_weblog['url'];
      $lastupdate = $blog_update_string;
      preg_match($url_pattern_author, $url, $matches);
      $author = trim_slashes($matches[0]);
      
      $blog_xml['server_id'] = $server_id;
      $blog_xml['author'] = $author;
      $blog_xml['title'] = $title;
      $blog_xml['url'] = $url;
      $blog_xml['lastupdate'] = $lastupdate;
    }

    return $blogs_xml;
  }
}
?>