/*******************************************************************************
 * Copyright (c) 2008 IGA Tosiki, NTT DATA BUSINESS BRAINS Corp.
 * All rights reserved. This program and the accompanying materials
 * are made available under the terms of the Eclipse Public License v1.0
 * which accompanies this distribution, and is available at
 * http://www.eclipse.org/legal/epl-v10.html
 *
 * Contributors:
 *    IGA Tosiki (NTT DATA BUSINESS BRAINS Corp.) - initial API and implementation
 *******************************************************************************/
/*
 * blanco Framework
 * Copyright (C) 2008 NTT DATA BUSINESS BRAINS CORPORATION
 * 
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 */
package blanco.tmx.util.task;

import java.io.File;
import java.io.IOException;
import java.math.BigDecimal;
import java.text.NumberFormat;
import java.util.HashMap;
import java.util.Map;
import java.util.StringTokenizer;

import blanco.commons.util.BlancoStringUtil;
import blanco.tmx.BlancoTmxParser;
import blanco.tmx.util.message.BlancoTmxUtilMessage;
import blanco.tmx.util.task.valueobject.BlancoTmxUtilStatisticsProcessInput;
import blanco.tmx.valueobject.BlancoTmx;
import blanco.tmx.valueobject.BlancoTmxTu;
import blanco.tmx.valueobject.BlancoTmxTuv;

public class BlancoTmxUtilStatisticsProcessImpl implements
        BlancoTmxUtilStatisticsProcess {
    private final BlancoTmxUtilMessage fMsg = new BlancoTmxUtilMessage();

    private static final String WORD_DELIMITER = " ,.!?:;/|";

    private int fTmxCount = 0;

    private int fTmxTuCount = 0;

    private int fTmxTuvCount = 0;

    private int fTmxTuvEnCount = 0;

    private int fTmxTuvDupEn = 0;

    private int fTmxTuvDupOther = 0;

    private int fEnWordsCount = 0;

    private Map<java.lang.String, BlancoTmxTuv> fMapTuv = new HashMap<java.lang.String, BlancoTmxTuv>();

    public int execute(final BlancoTmxUtilStatisticsProcessInput input)
            throws IOException, IllegalArgumentException {

        // ̓fBNg̑݃`FbNB
        final File fileTmxdir = new File(input.getTmxdir());
        if (fileTmxdir.exists() == false) {
            throw new IllegalArgumentException(fMsg.getMbtmut11(fileTmxdir
                    .getAbsolutePath()));
        }
        if (fileTmxdir.isDirectory() == false) {
            throw new IllegalArgumentException(fMsg.getMbtmut12(fileTmxdir
                    .getAbsolutePath()));
        }

        final File[] files = fileTmxdir.listFiles();
        if (files == null) {
            return 0;
        }
        for (int index = 0; index < files.length; index++) {
            if (files[index].isFile() == false) {
                continue;
            }
            if (files[index].getName().endsWith(".tmx")) {
                process(files[index]);
            }
        }

        final NumberFormat numFormat = NumberFormat.getNumberInstance();
        System.out.println("[tmx]");
        System.out
                .println("- tmx :"
                        + BlancoStringUtil.padLeft(numFormat.format(fTmxCount),
                                6, ' '));
        System.out.println("[tu]");
        System.out.println("- tu  :"
                + BlancoStringUtil.padLeft(numFormat.format(fTmxTuCount), 10,
                        ' '));
        System.out.println("- tuv :"
                + BlancoStringUtil.padLeft(numFormat.format(fTmxTuvCount), 10,
                        ' '));
        System.out.println("[tuv]");
        System.out.println("- en       :"
                + BlancoStringUtil.padLeft(numFormat.format(fTmxTuvEnCount),
                        10, ' '));
        System.out.println("- dup-en   :"
                + BlancoStringUtil.padLeft(numFormat.format(fTmxTuvDupEn), 10,
                        ' '));
        System.out.println("- dup-other:"
                + BlancoStringUtil.padLeft(numFormat.format(fTmxTuvDupOther),
                        10, ' '));

        String strWordsPerKeyRate = "";

        try {
            strWordsPerKeyRate = BigDecimal.valueOf(fEnWordsCount).setScale(2)
                    .divide(BigDecimal.valueOf(fTmxTuvEnCount),
                            BigDecimal.ROUND_DOWN).toString();

        } catch (ArithmeticException e) {
            // O͂ݏ܂B
        }

        System.out.println("[words]");
        System.out.println("- en       :"
                + BlancoStringUtil.padLeft(numFormat.format(fEnWordsCount), 10,
                        ' ') + " (" + strWordsPerKeyRate + " words/key)");

        return 0;
    }

    private void process(final File fileTarget) throws IOException {
        fTmxCount++;

        final BlancoTmxParser parser = new BlancoTmxParser();
        final BlancoTmx tmx = parser.parse(fileTarget);

        for (BlancoTmxTu tu : tmx.getBody().getTuList()) {
            fTmxTuCount++;
            processTu(tu);
        }
    }

    private void processTu(final BlancoTmxTu tu) throws IOException {
        for (BlancoTmxTuv tuv : tu.getTuvList()) {
            fTmxTuvCount++;

            if (BlancoStringUtil.null2Blank(tuv.getLang()).toUpperCase()
                    .startsWith("EN")) {
                fTmxTuvEnCount++;

                fEnWordsCount += new StringTokenizer(tuv.getSeg(),
                        WORD_DELIMITER).countTokens();
            }

            if (fMapTuv.get("[" + tuv.getLang() + "]:[" + tuv.getSeg() + "]") == null) {
                // o
                fMapTuv.put("[" + tuv.getLang() + "]:[" + tuv.getSeg() + "]",
                        tuv);
            } else {
                // d
                if (BlancoStringUtil.null2Blank(tuv.getLang()).toUpperCase()
                        .startsWith("EN")) {
                    fTmxTuvDupEn++;
                } else {
                    fTmxTuvDupOther++;
                }
            }
        }
    }
}
