﻿using UnityEngine;
using System.Collections;
using System.IO;
using NMeCab;
using NMeCab.Core;

namespace UMeCab
{
    public class UMeCabTagger : MonoBehaviour
    {
        MeCabTagger tagger = new MeCabTagger();
        public TextAsset charBin;
        public TextAsset unkDicAsset, sysDicAsset;
        public TextAsset matrixBin;

        void Start()
        {
            var param = new MeCabParam();
            IniParser ini = new IniParser();
            param.CostFactor = int.Parse(ini ["cost-factor"] ?? "0");
            param.BosFeature = ini ["bos-feature"];
            Open(param);
        }
        
        protected void Open(MeCabParam param)
        {
            OpenTokenizer(param);
            
            Stream stream = new MemoryStream(matrixBin.bytes);
            using (BinaryReader reader = new BinaryReader(stream))
            {
                viterbi.connector.Open(reader);
            }
            
            viterbi.costFactor = param.CostFactor;
            viterbi.Theta = param.Theta;
            viterbi.LatticeLevel = param.LatticeLevel;
            viterbi.Partial = param.Partial;
            viterbi.AllMorphs = param.AllMorphs;
            
            writer.Open(param);
        }
        
        void OpenTokenizer(MeCabParam param)
        {
            viterbi.tokenizer.dic = new MeCabDictionary[1];
            
            string prefix = param.DicDir;
            
            OpenCharProperty(prefix);
            
            OpenDictionary(viterbi.tokenizer.unkDic, unkDicAsset);
            if (viterbi.tokenizer.unkDic.Type != DictionaryType.Unk)
                throw new MeCabException("not a unk dictionary");
            
            MeCabDictionary sysDic = new MeCabDictionary();
            OpenDictionary(sysDic, sysDicAsset);
            if (sysDic.Type != DictionaryType.Sys)
                throw new MeCabException("not a system dictionary");
            viterbi.tokenizer.dic [0] = sysDic;

            viterbi.tokenizer.unkTokens = new Token[viterbi.tokenizer.property.Size][];
            for (int i = 0; i < viterbi.tokenizer.unkTokens.Length; i++)
            {
                string key = viterbi.tokenizer.property.Name(i);
                DoubleArray.ResultPair n = viterbi.tokenizer.unkDic.ExactMatchSearch(key);
                if (n.Value == -1)
                    throw new MeCabException("cannot find UNK category: " + key);
                viterbi.tokenizer.unkTokens [i] = viterbi.tokenizer.unkDic.GetToken(n);
            }
            
            viterbi.tokenizer.space = viterbi.tokenizer.property.GetCharInfo(' ');
            
            viterbi.tokenizer.bosFeature = param.BosFeature;
            viterbi.tokenizer.unkFeature = param.UnkFeature;
            
            viterbi.tokenizer.maxGroupingSize = param.MaxGroupingSize;
            if (viterbi.tokenizer.maxGroupingSize <= 0)
                viterbi.tokenizer.maxGroupingSize = 24;
        }

        void OpenDictionary(MeCabDictionary dic, TextAsset asset)
        {
            Stream stream = new MemoryStream(asset.bytes);
            
            using (BinaryReader reader = new BinaryReader(stream))
            {
                dic.Open(reader);
            }
        }

        void OpenCharProperty(string dicDir)
        {
            Stream stream = new MemoryStream(charBin.bytes);
            
            using (BinaryReader reader = new BinaryReader(stream))
            {
                viterbi.tokenizer.property.Open(reader);
            }
        }

        public string Parse(string str)
        {
            return tagger.Parse(str);
        }

        Viterbi viterbi
        {
            get
            {
                return tagger.viterbi;
            }
        }

        Writer writer
        {
            get
            {
                return tagger.writer;
            }
        }
    }
}
