package net.reduls.igo;

import java.io.IOException;
import java.util.List;
import java.util.ArrayList;
import net.reduls.igo.dictionary.Matrix;
import net.reduls.igo.dictionary.WordDic;
import net.reduls.igo.dictionary.Unknown;
import net.reduls.igo.dictionary.ViterbiNode;

public final class Tagger {
    private static final ArrayList<ViterbiNode> BOS_NODES = new ArrayList<ViterbiNode>(1);
    static {
	BOS_NODES.add(ViterbiNode.makeBOSEOS());
    }
    
    private final WordDic wdc;
    private final Unknown unk;
    private final Matrix  mtx;
    
    public Tagger(String dataDir) throws IOException {
	wdc = new WordDic(dataDir);
	unk = new Unknown(dataDir);
	mtx = new Matrix(dataDir);
    }
    
    public List<Morpheme> parse(String text) {
	List<Morpheme> result = new ArrayList<Morpheme>();
	parse(text, result);
	return result;
    }

    public void parse(String text, List<Morpheme> result) {
	final ArrayList<ViterbiNode> tmpRlt = parseImpl(text);
	
	for(int i=tmpRlt.size()-1; i >=0; i--) {
	    final ViterbiNode vn = tmpRlt.get(i);
	    final String surface = text.substring(vn.start, vn.start+vn.length);
	    final String info    = wdc.wordData(vn.wordId);
	    result.add(new Morpheme(surface, info));
	}
    }

    public List<String> wakati(String text) {
	List<String> result = new ArrayList<String>();
	wakati(text, result);
	return result;
    }

    public void wakati(String text, List<String> result) {
	final ArrayList<ViterbiNode> tmpRlt = parseImpl(text);
	
	for(int i=tmpRlt.size()-1; i >=0; i--) {
	    final ViterbiNode vn = tmpRlt.get(i);
	    result.add(text.substring(vn.start, vn.start+vn.length));
	}	
    }
    
    private ArrayList<ViterbiNode> parseImpl(String text) {
	final int len = text.length();
	final ArrayList<ArrayList<ViterbiNode>> nodesAry = new ArrayList<ArrayList<ViterbiNode>>(len+1);
	final ArrayList<ViterbiNode> perResult = new ArrayList<ViterbiNode>();
	
	nodesAry.add(BOS_NODES);
	for(int i=1; i <= len; i++) 
	    nodesAry.add(new ArrayList<ViterbiNode>());
	
	for(int i=0; i < len; i++, perResult.clear()) {
	    if(nodesAry.get(i).isEmpty()==false) {
		wdc.search(text, i, perResult);
		unk.search(text, i, wdc, perResult);

		final ArrayList<ViterbiNode> prevs = nodesAry.get(i);
		for(int j=0; j < perResult.size(); j++) {
                    final ViterbiNode vn = perResult.get(j);
		    if(vn.isSpace)
			nodesAry.get(i+vn.length).addAll(prevs);
		    else
			nodesAry.get(i+vn.length).add(setMincostNode(vn,prevs));
		}
	    }
	}

	// EOS
	ViterbiNode cur = setMincostNode(ViterbiNode.makeBOSEOS(), nodesAry.get(len)).prev;
	ArrayList<ViterbiNode> tmpRlt = new ArrayList<ViterbiNode>(len/2);
	for(; cur.prev != null; cur = cur.prev)
	    tmpRlt.add(cur);
	return tmpRlt;
    }

    private ViterbiNode setMincostNode(ViterbiNode vn, ArrayList<ViterbiNode> prevs) {
	final ViterbiNode f = vn.prev = prevs.get(0);
        vn.cost = f.cost + mtx.linkCost(f.rightId, vn.leftId);

        for(int i=1; i < prevs.size(); i++) {
            final ViterbiNode p = prevs.get(i);
	    final int cost = p.cost + mtx.linkCost(p.rightId, vn.leftId);
	    if(cost < vn.cost) {
		vn.cost = cost;
		vn.prev = p;
	    }
	}
	vn.cost += wdc.cost(vn.wordId);
	return vn;
    }
}