package zephyr.util;

import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import zephyr.util.AppendLatinKeys;
import zephyr.util.ZephyrUtil;

public class AppendJPKeys extends AppendLatinKeys {

    // NOTE: 30FB = "・"。全角の ；などは 　FF1x 辺りにある
    // １文字でも見出しにする（猫、などのため）。先頭が半角数字でないこと
    public static final String JPWORD_REGEXP =
            "(<b>)?([\u3041-\u30FA\u30FC-\u9FFF][0-9\u3041-\u30FA\u30FC-\u9FFF]*)";
    public static final Pattern JPWORD = Pattern.compile(JPWORD_REGEXP);

    public static boolean isKataKana(char c) {
        return (('ァ' <= c && c <= 'ヶ') || (c == 'ー'));
    }

    public static boolean isKana(char c) {
        return (('ぁ' <= c && c <= 'ー'));
    }

    public static char kata2hira(char c) {
        if (c != 'ー') {
            return (char) (((int) c - (int) 'ァ') + (int) 'ぁ');
        } else {
            return c;
        }
    }

    public static String makeKana(String dt) {
        // ふりがなが無いのは、すべてひらがな or かたかな or 例外
        for (int i = 0; i < dt.length(); i++) {
            if (!isKataKana(dt.charAt(i))) {
                return null;
            }
        }
        // すべてカタカナならば、ひらがなに変換
        StringBuffer sb = new StringBuffer();
        for (int i = 0; i < dt.length(); i++) {
            sb.append(kata2hira(dt.charAt(i)));
        }
        return sb.toString();
    }

    protected AppendJPKeys() {
        super(JPWORD, "Japanese");
    }

    public static String dropBracket(String line) {
        final String[] regReps = { "《(.+?)》", "〔(.+?)〕", "\\((.+?)\\)", "\\[(.+?)\\]" };
        for (String rep : regReps) {
            line = line.replaceAll(rep, " $1 ");
        }
        return line;
    }

    private static final String SENSE_REGEXP = "(<indent val=\"([0-9])\">)?<p>(.+?)</p>";
    private static final Pattern SENSE = Pattern.compile(SENSE_REGEXP);

    protected String makePlainText(String dd) {
        dd = dropBracket(dd);
        Matcher m = SENSE.matcher(dd);
        String txt = "";
        int indent = 1;
        while (m.find()) {
            if (m.group(2) != null) {
                indent = Integer.parseInt(m.group(2));
            }
            if (indent <= 1) {
                // indentが2以上の文（例文）は無視する
                txt += m.group(3) + " ";
            }
        }
        return txt;
    }

    public String makeKeys(String dt, String dtKey, String type, Set<String> keySet) {
        String key = super.makeKeys(dt, dtKey, type, keySet);
        for (String k : keySet) {
            String kana = makeKana(k);
            if (kana != null) {
                key +=
                        String.format("<key type=\"かな\" title=\"%s\">%s</key>",
                                makeKeyTitle(dt, k), kana);
            }
        }
        return key;
    }

    public static void main(String[] args) {
        if (args.length < 2) {
            System.err.println("Usage: java AppendJPKeys [body-lajputf8.html|-] [表記|条件]");
            return;
        }

        ZephyrUtil.setUTF8Ouput();
        AppendJPKeys app = new AppendJPKeys();
        try {
            app.appendKeys(args);
        } catch (Exception e) {
            e.printStackTrace();
        }
    }

}
