<?php

require_once('file_util.inc');
require_once('pair.inc');


////////////////////////////
// ¤ηϥ饹
////////////////////////////

// ȥåǡѥ饹
class lexical_analysis_stock_data
{
//public:
public function get_data() { return $this->data_; }
public function push_data($n, $v)
{
//print "push data ($n, $v) \n";
  $wk = new pair;
  $wk->set($n, $v);
  $this->data_[] = $wk;
}

//
public function push_isdata_from_buf($n)
{
  if ("" != $this->get_buf()) {
    $this->push_data($n, $this->get_buf());
    $this->set_buf("");
  }
}
//
public function push_data_from_buf($n)
{
  $this->push_data($n, $this->get_buf());
  $this->set_buf("");
}

//
public function add_buf($c) { $this->buf_ .= $c; }
public function set_buf($s) { $this->buf_ = $s; }
public function get_buf() { return $this->buf_; }

//private:
private $data_ = array();	// vector<pair *> ȡǡ켰
private $buf_ = "";		// Хåեǡ
} // end of class


// ϸǡݻѥ饹
class lexical_analysis_data
{
//public:
public function __construct()
{
  $this->init();
}

//
public function init()
{
  $this->set_data("");
}

//
public function set_length($i) { $this->length_ = $i; }
public function get_length() { return $this->length_; }

//
public function get_data() { return $this->data_; }
public function set_data($s)
{
  $this->data_ = $s;
  $this->set_length(strlen($s));
  $this->set_count(-1);	// fetchߤ֥󥯥ꤷƻȤפΤ-1
}

// Ƚ
// ߤiterator󥿤lengthȤΥå
public function is_length()
{
  if ($this->get_length() <= $this->get_count()) {
    // Ǥ
    return false;
  }
  //
  return true;
}

//
public function set_count($i) { $this->count_ = $i; }
public function get_count() { return $this->count_; }

// 󥯥
// XXX ưis_lengthäȤ
public function inc_count() {
  $this->set_count($this->get_count() + 1);
  return $this->is_length();
}

// ǥǡμ
public function get_element() { return $this->data_[$this->get_count()]; }
// XXX ʸü
public function get_element_next(){ return $this->data_[$this->get_count()+1]; }


//private:
private $data_;		// ǡ
private $count_;	// ߤiterator
} // end of class




////////////////////////////
// 饹
////////////////////////////

// ⡼Σʸνô
class lexical_analysis_engine_unit
{
//public:
//
public function set_next_mode($s) { $this->next_mode_ = $s; }
public function set_processing_list($a) { $this->processing_list_ = $a; }

//
public function get_my_char() { return $this->my_char_; }

//
public function set_my_char($s)
{
  // XXX Ƚ
  if ("'" == $s[0]) {
    // ʸڤȴ
    $this->my_char_ = $s[1];
  } else {
    // оݤˤ
    $this->my_char_ = $s;
  }
//print "my char is ... " . $this->my_char_ . "\n";
}

//
public function set_processing_string($s)
{
  // &ڤڤ
  $this->set_processing_list( explode("&", $s) );
//print_r ($this->processing_list_);
}

//
public function run($data_obj, $stock_obj)
{
  ///////////////////////////
  // 򤹤
  // XXX ʣν¸ߤ
  ///////////////////////////
  foreach($this->processing_list_ as $processing) {
//print " : $processing\n";
    // error
    if ("error" == $processing) {
      //throw new Exception("lexical analysis error!!");
      throw new Exception( "error!! (" . $this->next_mode_ . ")" );
    }

    // no̵
    if ("no" == $processing) {
      continue;
    }

    // ­߷
    if ("+" == $processing[0]) {
      // +charmy_charbuf­
      if ("+char" == $processing) {
        $stock_obj->add_buf($data_obj->get_element());
      } else {
        // +'x'ꤵ줿ʸbuf­
        // XXX ʸꡣʤӤ
        $stock_obj->add_buf($processing[2]);
      }
    }

    // push:'name',data'name'˥ǡ(dataʤbuf)push
    //   ⤷dataʬ̵ʤֶʸװpush
    if ( preg_match("/^push/", $processing)) {
      // ޤʬ
      $push_data = explode(",", $processing);
      $push_data[] = "";

      // ̾
      // XXX ⤫ʤӤ
      $abuf = explode("'", $push_data[0]);
      $push_name = $abuf[1];
      
      // ȥå֥ؤΤ֤
      if ("data" == $push_data[1]) {
        // Хåե֤ߤĤĥꥢ⤷Ƥ
        $stock_obj->push_data_from_buf($push_name);
      } else if ("isdata" == $push_data[1]) {
        // Хåե֤ߤĤĥꥢ⤷Ƥ
        $stock_obj->push_isdata_from_buf($push_name);
      } else {
        // XXX dataʤ̵ǶʸˤƤ
        $stock_obj->push_data($push_name, "");
      }
    }

  }

  // Υ⡼ɤƽ
  return $this->next_mode_;
}

//private:
private $my_char_;	// ʬȤʸʸ+char
private $next_mode_;	// Υ⡼ʸ
private $processing_list_;	// ʸ:
	// &Ƕڤäʣν¸ߤ
	// no̵
	// +charmy_charbuf­
	// +'x'ꤵ줿ʸbuf­
	// push:'name',data'name'˥ǡ(dataʤbuf)push
	//   ⤷dataʬ̵ʤֶʸװpush
}



// ⡼ɤνô
class lexical_analysis_engine
{

// ʸ󤫤ɬפڤФƼ
public function set_mode_name_string($s)
{
  // Ȥꤢ֤ڤ
  $abuf = explode("\t", $s);

  // XXX ܤǤ̵Ѥ
  $this->mode_name_ = $abuf[0];
//print "mode name = " . $this->mode_name_ . "\n";
}
public function get_mode_name() { return $this->mode_name_; }

// ʸѥ󥹥󥹤
public function set_processing_data($s)
{
//print "processing data is $s \n";
  // ޤʬ
  $abuf = explode("\t", $s);
  // XXX errorΤ˥ХȤʽɲ
  $abuf[] = "";

  // ܤǤƬ# ФϥȤʤΤǽλ
  if ("#" == $abuf[1][0]) {
//print "this string is comment\n";
    return ;
  }

  // 󥹥󥹺
  $obj = new lexical_analysis_engine_unit;

  // 
  $obj->set_my_char($abuf[1]);
//print "my char is " . $obj->get_my_char() . "\n";
  $obj->set_processing_string($abuf[2]);
//print "processing string is " . $abuf[2] . "\n";
  $obj->set_next_mode($abuf[3]);
//print "next mode is " . $abuf[3] . "\n";

  // ѤΥ󥹥󥹤ȤϿ
  $this->processing_obj_[ $obj->get_my_char() ] = $obj;
  //
  if ("hs" == $obj->get_my_char()) {
    $this->hs_flg_ = true;
//print "hs flg is true\n";
  }
  //
  if ("crlf" == $obj->get_my_char()) {
    $this->crlf_flg_ = true;
//print "crlf flg is true\n";
  }
  //
  return ;
}


public function analysis($data_obj, $stock_obj)
{
  // оݤȤʤʸФ
  $itr = $data_obj->get_element();

  // ʸΥå
  $do_obj = NULL;
  if (isset($this->processing_obj_[$itr])) {
    // ¸ߤʤΤǽ֥
    $do_obj = $this->processing_obj_[$itr];
  } else {
    // ühs ۥ磻ȥڡ
    // XXX Ԥϡִޤޤʤ
    if ( $this->hs_flg_ ) {
      // ڡ // 
      if ((' ' == $itr) || ("\t" == $itr)) {
        $do_obj = $this->processing_obj_['hs'];
      }
    }
    // ücrlf 
    if ( $this->crlf_flg_ ) {
      // LFΤߤΥ
      if ('\n' == $itr) {
        $do_obj = $this->processing_obj_['crlf'];
      } else if ('\r' == $itr) {
        // CRLFθ
        if ('\n' == $data_obj->get_element_next()) {
          $data_obj->inc_count();
        }
        $do_obj = $this->processing_obj_['crlf'];
      }
    }
  }

  // ƳʤanyΥ֥ȤƤ
  if (is_null($do_obj)) {
    $do_obj = $this->processing_obj_['any'];
  }

  // ǡ
  // XXX ͤmodeʸ֤
  return $do_obj->run($data_obj, $stock_obj);
}

//
private $mode_name_;		// mode̾褦ϼʬȤ̾
private $processing_obj_;	// ѥ֥
				// map<name, lexical_analysis_engine_unit *>
private $hs_flg_ = false;	// ⤷hs󤬤true
private $crlf_flg_ = false;	// ⤷crlf󤬤true
} // end of class


// ᥤ󥯥饹
class lexical_analysis
{
//public:
// XXX getprivateǤͤ
private function get_config_file() { return $this->config_fn_; }
public function set_config_file($s) { $this->config_fn_ = $s; }

//
public function set_data($s) { $this->data_ = $s; }
// XXX getprivateǤͤ
private function get_data() { return $this->data_; }
//
public function set_data_file($filename)
{
  // եǡĤͤù
  $this->set_data(file_util::read_file($filename));
}


// ᥤå
// vector<pair *> parse(void);
function parse($data = "")
{
  /////////////////////////
  // configʬ
  /////////////////////////
  // ǡβ
  $fh = fopen($this->get_config_file(), "r");
  $mode = "";
  if ($fh) {
    while (!feof($fh)) {
      $wk = fgets($fh);
      // Ԥ
      $wk = rtrim($wk);
//print "config line is \n\t$wk \n";
//print "$wk \n";

      // Ԥʤ鼡
      if (preg_match("/^$/", $wk)) {
//print "empty ... next!!\n";
        continue;
      }

      // Ƭ # ϥȤʤΤǼ
      if ("#" == $wk[0]) {
//print "comment ... next!!\n";
        continue;
      }

      // ơǤ⤹ä
//print "$wk \n";
      // ⡼ɤݤ
      if ("\t" != $wk[0]) {
        // ⡼ɺ
        $obj = new lexical_analysis_engine;
        $obj->set_mode_name_string($wk);

        // ȤȤϤmode̾
        $mode = $obj->get_mode_name();

        // Ͽå
        if (isset($this->analysis_obj_list_[$mode])) {
//print "Ͽ ( $mode ) \n";
        } else {
          // ⡼Ͽ
          $this->analysis_obj_list_[$mode] = $obj;
        }
      } else {
        // ߤΥ⡼ɤФƥץϿ
        $this->analysis_obj_list_[$mode]->set_processing_data($wk);
      }
    }
    fclose($fh);
  }

  /////////////////////////
  // 
  /////////////////////////
  // ǡμ
  if ("" == $data) {
    $data = $this->get_data();
  }

  // ǡݻѤΥ饹˥ǡǼ
  // XXX ˣʸϤȤꤨΤ
  $data_obj = new lexical_analysis_data;
  $data_obj->set_data($data);

  // ֤󤷤ĤĲ
  $stock_obj = new lexical_analysis_stock_data;
  $mode = "base";

  try {
    while( $data_obj->inc_count() ) {
//print $data_obj->get_element() . "\n";

      // ʬ
      // XXX ϤǤ뤳Ȥ
      // XXX ĤǤ˼Υ⡼̾ǤΤ
//print "mode is $mode \n";
      $mode = $this->analysis_obj_list_[$mode]->analysis($data_obj, $stock_obj);
    }
  } catch (Exception $e) {
    // 顼ʤ̵Ѥǽλ
// XXX
print "get exception!!!\n";
print $e->getMessage() . "\n";
print $e->getCode() . "\n";
print $e->getFile() . "\n";
print $e->getLine() . "\n";
    //exit;
    return NULL;
  }
  
  // vector<pair *>
  return $stock_obj->get_data();
}


//private:
private $config_fn_;	// configե̾
private $data_;		// оݤΥǡ
private $length_;	// $data_ĹstrlenǼ

//
private $analysis_obj_list_;	// ʬϥ󥹥󥹤󡧥ϥå
				// map<name, lexical_analysis_engine *>
} // end of class


