package zephyr.util;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.Set;
import java.util.TreeSet;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class AppendLatinKeys {

    protected static int nKey = 0;

    public static final String SUPERPREFIX = "* ";

    protected String makeKey(String key) {
        if (key.startsWith(SUPERPREFIX)) {
            return key.substring(SUPERPREFIX.length());
        } else {
            return key;
        }
    }

    protected String makeKeyTitle(String dt, String key) {
        return key + " - " + dt;

    }

    public String makeKeys(String dt, String dtKey, String type, Set<String> keySet) {
        StringBuffer sb = new StringBuffer();
        for (String key : keySet) {
            if (ZephyrUtil.makeKey(key).equals(dtKey)) {
                continue;
            }
            sb.append("<key type=\"");
            sb.append(type);
            sb.append("\" title=\"");
            sb.append(makeKeyTitle(dt, key));
            sb.append("\">");
            sb.append(makeKey(key));
            sb.append("</key>");
            nKey++;
        }
        return sb.toString();
    }

    public Set<String> makeKeySet(String dt, String dd, Pattern pat) {
        Matcher m = pat.matcher(dd);
        TreeSet<String> keySet = new TreeSet<String>();
        while (m.find()) {
            String key = m.group(2);
            if (m.group(1) != null) {
                keySet.remove(key);
                keySet.add(SUPERPREFIX + key);
            } else {
                if (!keySet.contains(SUPERPREFIX + key)) {
                    keySet.add(key);
                }
            }

        }
        return keySet;
    }

    public String makeKeys(String dt, String dd, Pattern pat, String type) {
        Set<String> keySet = makeKeySet(dt, dd, pat);
        if (!keySet.isEmpty()) {
            Matcher m = pat.matcher(dt);
            String dtKey = (m.find() ? ZephyrUtil.makeKey(m.group(2)) : null);
            return makeKeys(dt, dtKey, type, keySet);
        } else {
            return "";
        }
    }

    public int getKeyNum() {
        return nKey;
    }

    public static final String DT_DD_REGEXP = "<dt[^>]+>(.+?)</dt>.*?(<dd>(.*)</dd>)";
    public static final Pattern DT_DD = Pattern.compile(DT_DD_REGEXP);

    // NOTE: u00D7 = ×, u00F7 = ÷
    static final String WORD_REGEXP = "(<b>)?([A-Za-z\u00C0-\u017E']{2,}+)";
    protected final Pattern WORD;

    private final String keyLang;

    protected AppendLatinKeys(Pattern wordPat, String keyLang) {
        this.WORD = (wordPat != null ? wordPat : Pattern.compile(WORD_REGEXP));
        if (keyLang == null || keyLang.isEmpty()) {
            this.keyLang = "";
        } else {
            this.keyLang = keyLang.trim() + " ";
        }
    }

    protected AppendLatinKeys(String keyLang) {
        this(null, keyLang);
    }

    public AppendLatinKeys() {
        this(null, "");
    }

    protected String makePlainText(String dd) {
        return dd;
    }

    protected String parse(String line, String type) {
        line = UnescapeChars.unescape(line);
        Matcher m = DT_DD.matcher(line);
        if (!m.find()) {
            return line;
        }
        String dt = m.group(1);
        String dd = m.group(3);
        String txt = makePlainText(dd);
        String keys = makeKeys(dt, txt, WORD, type);
        return line.substring(0, m.start(2)) + keys + m.group(2);
    }

    public static String getKeyType(String key) {
        if (key.equals("表記") || key.equals("条件")) {
            return key;
        } else {
            System.err.println("invalid keytype: " + key + ", must be 表記 or 条件");
            System.exit(1);
            return null;
        }
    }

    protected void appendKeys(String[] args) throws Exception {
        InputStream in;
        if (!args[0].equals("-")) {
            in = new FileInputStream(new File(args[0]));
        } else {
            in = System.in;
        }
        String type = getKeyType(args[1]);
        BufferedReader br = new BufferedReader(new InputStreamReader(in, "UTF-8"));
        String line;

        while ((line = br.readLine()) != null) {
            System.out.println(parse(line, type));
        }

        if (nKey > 0) {
            System.err.println("append " + nKey + " extra " + keyLang + type + " keys");
        }
    }

    public static void main(String[] args) {
        if (args.length < 2) {
            System.err.println("Usage: java AppendLatinKeys utf8file.html [表記|条件]");
            return;
        }

        ZephyrUtil.setUTF8Ouput();
        AppendLatinKeys app = new AppendLatinKeys();
        try {
            app.appendKeys(args);
        } catch (Exception e) {
            e.printStackTrace();
        }
    }

}
