/**************************************************************************
 OmegaT - Computer Assisted Translation (CAT) tool 
          with fuzzy matching, translation memory, keyword search, 
          glossaries, and translation leveraging into updated projects.

 Copyright (C) 2000-2006 Keith Godfrey and Maxym Mykhalchuk
               Home page: http://www.omegat.org/
               Support center: http://groups.yahoo.com/group/OmegaT/

 This program is free software; you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by
 the Free Software Foundation; either version 2 of the License, or
 (at your option) any later version.

 This program is distributed in the hope that it will be useful,
 but WITHOUT ANY WARRANTY; without even the implied warranty of
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 GNU General Public License for more details.

 You should have received a copy of the GNU General Public License
 along with this program; if not, write to the Free Software
 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 **************************************************************************/

package blanco.omegat.util.glossary;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.UnsupportedEncodingException;
import java.util.ArrayList;
import java.util.List;

import org.omegat.core.data.StringEntry;
import org.omegat.core.matching.ITokenizer;
import org.omegat.core.matching.Tokenizer;
import org.omegat.gui.glossary.GlossaryEntry;
import org.omegat.util.Token;

/**
 * Class that loads glossary files and adds glossary entries to strings of the
 * source files.
 * 
 * This class don't need any threads synchronization code, since it only set and
 * clear 'glossaryEntries' var.
 * 
 * @author Keith Godfrey
 * @author Maxym Mykhalchuk
 */
public class MyGlossaryManager {
    private final String EXT_DEF_ENC = ".tab"; // NOI18N

    private final String EXT_UTF8_ENC = ".utf8"; // NOI18N

    private final String EXT_UTF8_ENC_TSV = ".tsv"; // NOI18N

    private ITokenizer tokenizer = new Tokenizer();

    /** Creates a new instance of GlossaryLoader */
    public MyGlossaryManager() {
        glossaryEntries = new ArrayList<GlossaryEntry>();
    }

    /**
     * Loads the glossary files and builds the cash of glossary entries.
     * Supports
     * <ul>
     * <li>tab-separated files in default system encoding - with .tab extension
     * <li>tab-separated files in utf-8 encoding - with .utf8 extension
     * </ul>
     * Files with other extensions are ignored
     * 
     * @param folder
     *            - folder to look for the glossary files
     */
    public void loadGlossaryFiles(File folder) {
        if (!folder.isDirectory()) {
            // Log.logRB("CT_ERROR_ACCESS_GLOSSARY_DIR");
            return;
        }
        try {
//            List<GlossaryEntry> result = new ArrayList<GlossaryEntry>();
            for (String file : folder.list()) {
                String fname = folder.getAbsolutePath() + File.separator + file;
                String fname_lower = fname.toLowerCase();
                // ignoring files with unrecognized extensions -
                // http://sf.net/tracker/index.php?func=detail&aid=1088247&group_id=68187&atid=520347
                if (fname_lower.endsWith(EXT_DEF_ENC)
                        || fname_lower.endsWith(EXT_UTF8_ENC)
                        || fname_lower.endsWith(EXT_UTF8_ENC_TSV)) {
                    // Log.logRB("CT_LOADING_GLOSSARY", new Object[] { fname });
                    loadGlossaryFile(new File(fname), glossaryEntries);
                }
            }
//            glossaryEntries = Collections.unmodifiableList(result);
        } catch (Exception ex) {
            // Log.logRB("CT_ERROR_ACCESS_GLOSSARY_DIR");
            // Log.log(ex);
        	ex.printStackTrace();
        }
    }

    /**
     * Loads one glossary file. Detects a file format and loads a file in
     * appropriate encoding.
     */
    private void loadGlossaryFile(final File file,
            final List<GlossaryEntry> result) throws FileNotFoundException,
            UnsupportedEncodingException, IOException {
        String fname_lower = file.getName().toLowerCase();
        InputStreamReader reader = null;
        if (fname_lower.endsWith(EXT_DEF_ENC)) {
            reader = new InputStreamReader(new FileInputStream(file));
        } else if (fname_lower.endsWith(EXT_UTF8_ENC)
                || fname_lower.endsWith(EXT_UTF8_ENC_TSV)) {
            InputStream fis = new FileInputStream(file);
            reader = new InputStreamReader(fis, "UTF-8"); // NOI18N
        }

        BufferedReader in = new BufferedReader(reader);

        // BOM (byte order mark) bugfix
        in.mark(1);
        int ch = in.read();
        if (ch != 0xFEFF)
            in.reset();

        for (String s = in.readLine(); s != null; s = in.readLine()) {
            // skip lines that start with '#'
            if (s.startsWith("#")) // NOI18N
                continue;

            // divide lines on tabs
            String tokens[] = s.split("\t"); // NOI18N
            // check token list to see if it has a valid string
            if (tokens.length < 2 || tokens[0].length() == 0)
                continue;

            // creating glossary entry and add it to the hash
            // (even if it's already there!)
            String comment = ""; // NOI18N
            
            if (tokens.length >= 3)
                comment = tokens[2];
            result.add(new GlossaryEntry(tokens[0], tokens[1], comment));
        }
        in.close();
    }

    public List<GlossaryEntry> getGlossary(StringEntry strEntry) {
        List<GlossaryEntry> result = new ArrayList<GlossaryEntry>();
        // computer source entry tokens
        Token[] strTokens = tokenizer.tokenizeWords(strEntry.getSrcText(),
                ITokenizer.StemmingMode.GLOSSARY);

        List<GlossaryEntry> entries = getGlossaryEntries();
        if (entries != null) {
            for (GlossaryEntry glosEntry : entries) {
                // if (glossaryController.processedEntry != processedEntry) {
                // // Processed entry changed, because user moved to other
                // // entry.
                // // I.e. we don't need to find and display data for old
                // // entry.
                // return;
                // }

                // computer glossary entry tokens
                String glosStr = glosEntry.getSrcText();
                Token[] glosTokens = tokenizer.tokenizeWords(glosStr,
                        ITokenizer.StemmingMode.GLOSSARY);
                int glosTokensN = glosTokens.length;
                if (glosTokensN == 0)
                    continue;

                if (Tokenizer.isContainsAll(strTokens, glosTokens)) {
                    result.add(glosEntry);
                }
            }
        }

        // After the matched entries have been tokenized and listed.
        // We remove the duplicates and combine the synonyms.
        // Then the matches are ordered to display the biggest matches first.
//        result = FilterGlossary(result);
//        for (int z = 0; z < result.size(); z++) {
//            for (int x = z + 1; x < result.size() - 1; x++) {
//                GlossaryEntry zEntry = (GlossaryEntry) result.get(z);
//                GlossaryEntry xEntry = (GlossaryEntry) result.get(x);
//
//                if (xEntry.getSrcText().length() > zEntry.getSrcText().length()) {
//                    Object temp = result.get(x);
//                    result.set(x, result.get(z));
//                    result.set(z, (GlossaryEntry) temp);
//                }
//            }
//        }

        return result;

    }

//    private List<GlossaryEntry> FilterGlossary(List<GlossaryEntry> result) {
//        // First check that entries exist in the list.
//        if (result.size() == 0)
//            return result;
//
//        List<GlossaryEntry> returnList = new LinkedList<GlossaryEntry>();
//
//        // The default replace entry
//        GlossaryEntry replaceEntry = new GlossaryEntry("", "", "");
//
//        // ... Remove the duplicates from the list
//        // ..............................
//        boolean removedDuplicate = false;
//        for (int i = 0; i < result.size(); i++) {
//            GlossaryEntry nowEntry = result.get(i);
//
//            if (nowEntry.getSrcText().equals(""))
//                continue;
//
//            for (int j = i + 1; j < result.size(); j++) {
//                GlossaryEntry thenEntry = result.get(j);
//
//                if (thenEntry.getSrcText().equals(""))
//                    continue;
//
//                // If the Entries are exactely the same, insert a blank entry.
//                if (nowEntry.getSrcText().equals(thenEntry.getSrcText()))
//                    if (nowEntry.getLocText().equals(thenEntry.getLocText()))
//                        if (nowEntry.getCommentText().equals(
//                                thenEntry.getCommentText())) {
//                            result.set(j, replaceEntry);
//                            removedDuplicate = true;
//                        }
//            }
//        }
//        // ......................................................................
//
//        // -- Remove the blank entries from the list
//        // ----------------------------
//        if (removedDuplicate) {
//            Iterator<GlossaryEntry> myIter = result.iterator();
//            List<GlossaryEntry> newList = new LinkedList<GlossaryEntry>();
//
//            while (myIter.hasNext()) {
//                GlossaryEntry checkEntry = myIter.next();
//                if (checkEntry.getSrcText().equals("")
//                        || checkEntry.getLocText().equals(""))
//                    myIter.remove();
//                else
//                    newList.add(checkEntry);
//            }
//
//            result = newList;
//        }
//        // ----------------------------------------------------------------------
//
//        // ~~ Group items with same scrTxt
//        // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
//        for (int i = 0; i < result.size(); i++) {
//            List<GlossaryEntry> srcList = new LinkedList<GlossaryEntry>();
//            GlossaryEntry nowEntry = result.get(i);
//
//            if (nowEntry.getSrcText().equals(""))
//                continue;
//
//            srcList.add(nowEntry);
//
//            for (int j = i + 1; j < result.size(); j++) {
//                GlossaryEntry thenEntry = result.get(j);
//
//                // Double check, needed?
//                if (thenEntry.getSrcText().equals(""))
//                    continue;
//
//                if (nowEntry.getSrcText().equals(thenEntry.getSrcText())) {
//                    srcList.add(thenEntry);
//                    result.set(j, replaceEntry);
//                }
//            }
//
//            //  Sort items with same locTxt
//            // 
//            List<GlossaryEntry> sortList = new LinkedList<GlossaryEntry>();
//            if (srcList.size() > 1) {
//                for (int k = 0; k < srcList.size(); k++) {
//                    GlossaryEntry srcNow = srcList.get(k);
//
//                    if (srcNow.getSrcText().equals(""))
//                        continue;
//
//                    sortList.add(srcNow);
//
//                    for (int l = k + 1; l < srcList.size(); l++) {
//                        GlossaryEntry srcThen = srcList.get(l);
//
//                        if (srcThen.getSrcText().equals(""))
//                            continue;
//
//                        if (srcNow.getLocText().equals(srcThen.getLocText())) {
//                            sortList.add(srcThen);
//                            srcList.set(l, replaceEntry);
//                        }
//                    }
//                }
//            } else {
//                sortList = srcList;
//            }
//            // 
//
//            // == Now put the sortedList together
//            // ===============================
//            String srcTxt = sortList.get(0).getSrcText();
//            String locTxt = sortList.get(0).getLocText();
//            String comTxt = "";
//
//            int comCounter = 1;
//
//            String prevLocTxt = sortList.get(0).getLocText();
//            String prevComTxt = sortList.get(0).getCommentText();
//
//            if (!prevComTxt.equals(""))
//                comTxt = comCounter + ". " + prevComTxt;
//
//            for (int m = 1; m < sortList.size(); m++) {
//                if (!sortList.get(m).getLocText().equals(prevLocTxt)) {
//                    comCounter++;
//                    prevLocTxt = sortList.get(m).getLocText();
//                    locTxt += ", " + prevLocTxt;
//                    // The Comments cannot be equal because all the duplicates
//                    // have been removed earlier.
//                    if (!sortList.get(m).getCommentText().equals("")) {
//                        if (comTxt.equals(""))
//                            comTxt = comCounter + ". "
//                                    + sortList.get(m).getCommentText();
//                        else
//                            comTxt += "\n" + comCounter + ". "
//                                    + sortList.get(m).getCommentText();
//                    }
//                } else {
//                    if (!sortList.get(m).getCommentText().equals("")) {
//                        if (comTxt.equals(""))
//                            comTxt = comCounter + ". "
//                                    + sortList.get(m).getCommentText();
//                        else
//                            comTxt += "\n" + comCounter + ". "
//                                    + sortList.get(m).getCommentText();
//                    }
//                }
//            }
//            GlossaryEntry combineEntry = new GlossaryEntry(srcTxt, locTxt,
//                    comTxt);
//            returnList.add(combineEntry);
//            // ==================================================================
//        }
//        // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
//        return returnList;
//    }

    /**
     * Get glossary entries.
     * 
     * @return all entries
     */
    public List<GlossaryEntry> getGlossaryEntries() {
        return glossaryEntries;
    }

    /**
     * Clear data about glossaries.
     */
    public void clear() {
        glossaryEntries.clear();
    }

    private List<GlossaryEntry> glossaryEntries;

}
