/*
  Ngram     NgramΨοԤ
  Ngram.cc

 Copyright (C) 2006 Masahiko Higashiyama  All rights reserved.
 This is free software with ABSOLUTELY NO WARRANTY.

 This program is free software; you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by
 the Free Software Foundation; either version 2 of the License, or
 (at your option) any later version.

 This program is distributed in the hope that it will be useful,
 but WITHOUT ANY WARRANTY; without even the implied warranty of
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 GNU General Public License for more details.

 You should have received a copy of the GNU General Public License
 along with this program; if not, write to the Free Software
 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
 02111-1307, USA
*/

#include<iostream>
#include<sstream>
#include<fstream>
#include<map>
#include<vector>
#include<cstring>

std::istream *ifs;
int num = 1;
std::map <std::string, int> bf2num;


inline void
parse_string(std::vector<std::string>& sentence, std::vector<std::string>& v){

  for(std::vector<std::string>::iterator itr = sentence.begin();
      itr != sentence.end(); ++itr){
    std::istringstream iss2(*itr);
    
    std::string surface, base, pronounce, pos;
    iss2 >> surface;
    iss2 >> base;
    iss2 >> pronounce;
    iss2 >> pos;
    
    if(pos == "ư" || pos == "ƻ" || pos == "̾" || pos == "̤θ"){
      v.push_back(base);
    }
  }
}


void
read_file(int gram, int out_prob){

  std::map<int, std::map <int, int> > m;

  while(!ifs->eof()){
    std::string line;
    std::vector<std::string> sentence;
    while(line != "EOS" && !ifs->eof()){
      std::getline(*ifs, line);
      sentence.push_back(line + "\n");
    }

    std::vector<std::string> v;
    for(int i = 0; i < gram - 1; i++)
      v.push_back("BOS");

    parse_string(sentence, v);

    for(std::vector<std::string>::iterator itr = v.begin();
	itr != v.end() - gram + 1; ++itr){
      std::string key;
      for(int j = 0; j < gram-1; j++){
	key +=  *(itr + j) + ':';
      }
      if(bf2num[key] == 0){
	bf2num[key] = num++;
      }
      int key_num = bf2num[key];
      if(bf2num[*(itr + gram - 1)] == 0){
	bf2num[*(itr + gram - 1)] = num++;
      }

      m[key_num][bf2num[*(itr + gram - 1)]] += 1;
      m[key_num][2] += 1;
    }
  }

  std::ofstream ofs("tag.txt");
  for(std::map<std::string, int>::iterator value_itr = bf2num.begin();
      value_itr != bf2num.end(); ++value_itr){
    ofs << value_itr->first << '\t' << value_itr->second << std::endl;
  }

  for(std::map<int, std::map<int, int> >::iterator itr = m.begin();
      itr != m.end(); ++itr){
    int key = itr->first;
    std::map<int, int>& m2 = itr->second;
    int count = m2[-2];
    if(out_prob == 1) m2.erase(-2);
    std::cout << key;
    if(out_prob == 1){
      for(std::map<int, int>::iterator itr2 = m2.begin();
	  itr2 != m2.end(); ++itr2){
	double probability = (double)(itr2->second) / (double)count;
	std::cout << '\t' << itr2->first << ':' << probability;
      }
    }else{
      for(std::map<int, int>::iterator itr2 = m2.begin();
	  itr2 != m2.end(); ++itr2){
	std::cout << '\t' << itr2->first << ':' << itr2->second;
      }
    }
    std::cout << std::endl;
  }

}



int main(int argc, char* argv[]){

  int gram = 2;
  int out_prob = 0;
  if(argc > 1){
    gram = atoi(argv[1]);
  }
  if(argc > 2){
    out_prob = atoi(argv[3]);
  }

  bf2num["BOS"] = num++;
  bf2num["%%count%%"] = num++;

  ifs = new std::ifstream(argv[2]);

  if(! *ifs){
    std::cerr << "Can't open " << argv[2] << std::endl;
    exit(-1);
  }
  read_file(gram, out_prob);

  delete(ifs);

}
