package zephyr.obunsha.petitroyal;

import java.io.BufferedReader;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.TreeSet;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import zephyr.util.AppendLatinKeys;
import zephyr.util.ZephyrUtil;

public class SedBodyHtml {

    private static final String DT_ID_REGEXP = "<dt[^>]+>(<b>\\** *)?(.+?)(</b>)?</dt>";
    private static final Pattern DT_ID = Pattern.compile(DT_ID_REGEXP);
    private static final String YKG_REGEXP = "<ykg>(.*?)</ykg>";
    private static final Pattern YKG = Pattern.compile(YKG_REGEXP);
    private static final String YKG_BOLD_REGEXP = "<b>･*(.*?)</b>";
    private static final Pattern YKG_BOLD = Pattern.compile(YKG_BOLD_REGEXP);

    private static String getJPKey(String ykg) {
        // "...<ykg><b>アフリカ</b>(Afrique)<b>の</b></ykg>..."
        // ykg = "<b>アフリカ</b>(Afrique)<b>の</b>"
        // key = "アフリカの"
        Matcher m = YKG_BOLD.matcher(ykg);
        String key = "";
        while (m.find()) {
            key += m.group(1);
        }
        if (key.isEmpty()) {
            return ykg;
        } else {
            return "* " + key;
        }
    }

    private static final AppendLatinKeys LATKEY = new AppendLatinKeys();
    private static int nJPKey = 0;

    private static String fixJPKey(String line, String keyType) {
        Matcher m = DT_ID.matcher(line);
        String dt;
        if (m.find()) {
            dt = m.group(2);
        } else {
            dt = null;
        }
        //
        StringBuffer sb = new StringBuffer();
        m = YKG.matcher(line);
        int k = 0;
        TreeSet<String> set = new TreeSet<String>();
        while (m.find()) {
            if (k < m.start()) {
                sb.append(line.substring(k, m.start()));
            }
            String ykg = m.group(1);
            sb.append(ykg); // drop <ykg>, </ykg> tags
            String key = getJPKey(ykg);
            if (!key.isEmpty() && !set.contains(key)) {
                set.add(key);
            }
            k = m.end();
        }
        if (k < line.length()) {
            sb.append(line.substring(k));
        }
        String line2 = sb.toString();
        if (set.isEmpty()) {
            return line2;
        } else {
            k = line2.indexOf("<dd>");
            sb = new StringBuffer();
            if (k >= 0) {
                sb.append(line2.substring(0, k));
            }
            sb.append(LATKEY.makeKeys(dt, ZephyrUtil.makeKey(dt), keyType, set));
            nJPKey += set.size();
            if (k >= 0) {
                sb.append(line2.substring(k));
            } else {
                sb.append(line2);
            }
            return sb.toString();
        }
    }

    private final static String[][] REPLACES = { { "&lt;", "<" }, { "&gt;", ">" },
            { "&amp;amp;", "&" }, { "&amp;", "&" }, { "&hellip;", "･･･" }, { "&rarr;", "→" },
            { "&larr;", "←" }, { "&hArr;", "⇔" }, { "&rArr;", "⇒" }, { "&dagger;", "†" },
            { "&minus;", "-" }, { "&mdash;", "―" }, { "℃&deg;C", "℃" },
            { "</p><p><indent val=\"2\"><p>&nbsp;</p><p>", "</p><indent val=\"2\"><p><p>" },
            { "<b>［用例］</b>", "［用例］" } };

    private final static String[][] REG_REPLACES = { { "<p> +", "<p>" } };

    private static String sed(String line, String keyType) {
        line = ZephyrUtil.sed(line, REPLACES, REG_REPLACES);
        line = fixJPKey(line, keyType);
        return line;
    }

    private static void sed(String[] args) throws Exception {
        InputStream in = System.in;
        BufferedReader br = new BufferedReader(new InputStreamReader(in, "UTF-8"));
        String line;
        String keyType;
        if (args.length > 0) {
            keyType = AppendLatinKeys.getKeyType(args[0]);
        } else {
            keyType = "表記";
        }
        while ((line = br.readLine()) != null) {
            line = sed(line, keyType);
            if (line != null) {
                System.out.println(line);
            }
        }
        if (nJPKey > 0) {
            System.err.println("append " + nJPKey + " extra Japanese " + keyType + " keys");
        }
    }

    public static void main(String[] args) {
        ZephyrUtil.setUTF8Ouput();
        try {
            sed(args);
        } catch (Exception e) {
            e.printStackTrace();
        }
    }

}
