/*
 * blanco Framework
 * Copyright (C) 2004-2009 IGA Tosiki
 * 
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 */
package org.omegat.core.matching;

import java.util.ArrayList;
import java.util.List;

import org.omegat.core.data.LegacyTM;
import org.omegat.core.data.StringData;
import org.omegat.core.data.StringEntry;
import org.omegat.util.OConsts;
import org.omegat.util.StringUtil;
import org.omegat.util.Token;

public class FuzzyMatcherWrapper extends FuzzyMatcher {

    private static FuzzyMatcherWrapper fuzzyMatcherWrapper = new FuzzyMatcherWrapper();

    public static FuzzyMatcherWrapper getInstance() {
        return fuzzyMatcherWrapper;
    }

    private ITokenizer tokenizer = new Tokenizer();

    /**
     * Entry which is processed currently.
     * 
     * If entry in controller was changed, it means user has moved to another
     * entry, and there is no sense to continue.
     */
    // private final StringEntry processedEntry;
    /** Result list. */
    // private List<NearString> result = new ArrayList<NearString>(
    // OConsts.MAX_NEAR_STRINGS + 1);
    private ISimilarityCalculator distance = new LevenshteinDistance();

    /** Tokens for original string, with and without stems. */
    private Token[] strTokensStem, strTokensNoStem;

    /** Tokens for original string, includes numbers and tags. */
    private Token[] strTokensAll;

    public List<NearString> match(StringEntry tuString, List<LegacyTM> tmStrings) {
        List<NearString> result = new ArrayList<NearString>(
                OConsts.MAX_NEAR_STRINGS + 1);

        // final List<StringEntry> entries =
        // Core.getProject().getUniqueEntries();
        // final List<LegacyTM> memory = Core.getProject().getMemory();
        final List<LegacyTM> memory = tmStrings;
        if (memory == null) {
            return result;
        }

        // long before = 0;
        // if (LOGGER.isLoggable(Level.FINER)) {
        // // only if need to be logged
        // before = System.currentTimeMillis();
        // }

        // get tokens for original string
        strTokensStem = tokenizer.tokenizeWords(tuString.getSrcText(),
                ITokenizer.StemmingMode.MATCHING);
        if (strTokensStem.length == 0) {
            return result;
            // HP: maybe also test on strTokensComplete.size(), if strTokensSize
            // is 0
            // HP: perhaps that would result in better number/non-word matching
            // too
        }
        strTokensNoStem = tokenizer.tokenizeWords(tuString.getSrcText(),
                ITokenizer.StemmingMode.NONE);
        strTokensAll = tokenizer.tokenizeAllExactly(tuString.getSrcText());// HP:
        // includes
        // non-word
        // tokens

        // // travel by project entries
        // for (StringEntry candEntry : entries) {
        // // if (matcherController.processedEntry != processedEntry) {
        // // // Processed entry changed, because user moved to other entry.
        // // // I.e. we don't need to find and display data for old entry.
        // // return;
        // // }
        // if (StringUtil.isEmpty(candEntry.getTranslation())) {
        // continue;
        // }
        // // if (candEntry == processedEntry) {
        // // // skip original==original entry comparison
        // // continue;
        // // }
        // processEntry(candEntry, null);
        // }

        // travel by translation memories
        for (LegacyTM mem : memory) {
            for (StringEntry candEntry : mem.getStrings()) {
                // if (matcherController.processedEntry != processedEntry) {
                // // Processed entry changed, because user moved to other
                // // entry.
                // // I.e. we don't need to find and display data for old
                // // entry.
                // return;
                // }
                if (StringUtil.isEmpty(candEntry.getTranslation())) {
                    continue;
                }
                processEntry(candEntry, mem.getName(), result);
            }
        }

        // fill similarity data only for result
        for (NearString near : result) {
            // fix for bug 1586397
            byte[] similarityData = FuzzyMatcher.buildSimilarityData(
                    strTokensAll, tokenizer.tokenizeAllExactly(near.str
                            .getSrcText()));
            near.attr = similarityData;
        }

        // if (LOGGER.isLoggable(Level.FINER)) {
        // // only if need to be logged
        // long after = System.currentTimeMillis();
        // LOGGER.finer("Time for find matches: " + (after - before));
        // }

        return result;

    }

    /**
     * Compare one entry with original entry.
     * 
     * @param candEntry
     *            entry to compare
     * @param result
     */
    protected void processEntry(final StringEntry candEntry,
            final String tmxName, List<NearString> result) {
        Token[] candTokens = tokenizer.tokenizeWords(candEntry.getSrcText(),
                ITokenizer.StemmingMode.MATCHING);
        if (candTokens.length == 0) {
            return;
        }

        int similarityStem = calcSimilarity(strTokensStem, candTokens);

        if (similarityStem < OConsts.FUZZY_MATCH_THRESHOLD)
            return;

        Token[] candTokensNoStem = tokenizer.tokenizeWords(candEntry
                .getSrcText(), ITokenizer.StemmingMode.NONE);
        int similarityNoStem = calcSimilarity(strTokensNoStem, candTokensNoStem);

        if (haveChanceToAdd(similarityStem, similarityNoStem, result)) {
            Token[] candTokensAll = tokenizer.tokenizeAllExactly(candEntry
                    .getSrcText());
            int simAdjusted = calcSimilarity(strTokensAll, candTokensAll);

            addNearString(candEntry, similarityStem, similarityNoStem,
                    simAdjusted, null, tmxName, result);
        }
    }

    /**
     * Calculate similarity for tokens arrays(percent).
     * 
     * @param str
     *            original string tokens
     * @param cand
     *            candidate string tokens
     * @return similarity in percents
     */
    protected int calcSimilarity(final Token[] str, final Token cand[]) {
        int ld = distance.compute(str, cand);
        int similarity = (100 * (Math.max(str.length, cand.length) - ld))
                / Math.max(str.length, cand.length);
        return similarity;
    }

    /**
     * Check if entry have a chance to be added to result list. If no, there is
     * no sense to calculate other parameters.
     * 
     * @param similarity
     *            calculate similarity
     * @param result
     * @return true if additional calculation need
     */
    protected boolean haveChanceToAdd(final int similarity,
            final int similarityNoStem, List<NearString> result) {
        if (result.size() < OConsts.MAX_NEAR_STRINGS) {
            return true;
        }
        NearString st = result.get(result.size() - 1);
        if (st.score < similarity) {
            return true;
        } else if (st.score > similarity) {
            return false;
        } else {
            return st.scoreNoStem <= similarityNoStem;
        }
    }

    /**
     * Add near string into result list. Near strings sorted by
     * "similarity,simAdjusted"
     * 
     * @param result
     */
    protected void addNearString(final StringEntry candEntry,
            final int similarity, final int similarityNoStem,
            final int simAdjusted, final byte[] similarityData,
            final String tmxName, List<NearString> result) {
        // find position for new data
        int pos = 0;
        for (int i = 0; i < result.size(); i++) {
            NearString st = result.get(i);
            if (st.score < similarity) {
                break;
            }
            if (st.score == similarity) {
                if (st.scoreNoStem < similarityNoStem) {
                    break;
                }
                if (st.scoreNoStem == similarityNoStem) {
                    if (st.adjustedScore < simAdjusted) {
                        break;
                    }
                }
            }
            pos = i + 1;
        }

        result.add(pos, new NearString(candEntry, similarity, similarityNoStem,
                simAdjusted, similarityData, tmxName));
        if (result.size() > OConsts.MAX_NEAR_STRINGS) {
            result.remove(result.size() - 1);
        }
    }

    /**
     * Builds the similarity data for color highlight in match window.
     */
    public static byte[] buildSimilarityData(Token[] sourceTokens,
            Token[] matchTokens) {
        int len = matchTokens.length;
        byte[] result = new byte[len];

        boolean leftfound = true;
        for (int i = 0; i < len; i++) {
            result[i] = 0;

            Token righttoken = null;
            if (i + 1 < len)
                righttoken = matchTokens[i + 1];
            boolean rightfound = (i + 1 == len)
                    || Tokenizer.isContains(sourceTokens, righttoken);

            Token token = matchTokens[i];
            boolean found = Tokenizer.isContains(sourceTokens, token);

            if (found && (!leftfound || !rightfound))
                result[i] = StringData.PAIR;
            else if (!found)
                result[i] = StringData.UNIQ;

            leftfound = found;
        }
        return result;
    }
}
