/*
 * Decompiled with CFR 0.152.
 */
package zephyr.kenkyusya.lajp;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedList;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import zephyr.util.MultiHashMap;
import zephyr.util.ZephyrUtil;

public class MakeMorphHtml {
    private final MultiHashMap<String, String> dictIdMap = new MultiHashMap();
    private final HashSet<String> depSet = new HashSet();
    private final MultiHashMap<String, String> bodyIdMap = new MultiHashMap();
    private static final String DT_ID_REGEXP = "<dt id=\"([0-9]+)\">(.+?)</dt>";
    private static final Pattern DT_ID = Pattern.compile("<dt id=\"([0-9]+)\">(.+?)</dt>");
    private static final String DICT_ENT_REGEXP = "<dt id=\"([0-9]+)\">(.+?)\\*</dt>";
    private static final Pattern DICT_ENT = Pattern.compile("<dt id=\"([0-9]+)\">(.+?)\\*</dt>");
    private static final String MORPHKEY_REGEXP = "([a-z123]+)|(<a .+?</a>)";
    private static final Pattern MORPHKEY = Pattern.compile("([a-z123]+)|(<a .+?</a>)");
    private static final HashMap<String, String> morphKeyMap = new HashMap();

    static {
        morphKeyMap.put("noun", "\u540d");
        morphKeyMap.put("pron", "\u4ee3");
        morphKeyMap.put("adj", "\u5f62");
        morphKeyMap.put("num", "\u6570");
        morphKeyMap.put("adv", "\u526f");
        morphKeyMap.put("verb", "\u52d5");
        morphKeyMap.put("part", "\u5206\u8a5e");
        morphKeyMap.put("prep", "\u524d");
        morphKeyMap.put("conj", "\u63a5");
        morphKeyMap.put("inter", "\u611f");
        morphKeyMap.put("m", "\u7537");
        morphKeyMap.put("f", "\u5973");
        morphKeyMap.put("n", "\u4e2d");
        morphKeyMap.put("nom", "\u4e3b");
        morphKeyMap.put("voc", "\u547c");
        morphKeyMap.put("gen", "\u5c5e");
        morphKeyMap.put("dat", "\u4e0e");
        morphKeyMap.put("acc", "\u5bfe");
        morphKeyMap.put("abl", "\u596a");
        morphKeyMap.put("loc", "\u5730");
        morphKeyMap.put("sg", "\u5358");
        morphKeyMap.put("pl", "\u8907");
        morphKeyMap.put("1st", "\uff11");
        morphKeyMap.put("2nd", "\uff12");
        morphKeyMap.put("3rd", "\uff13");
        morphKeyMap.put("comp", "\u6bd4");
        morphKeyMap.put("super", "\u6700");
        morphKeyMap.put("card", "\u57fa\u6570\u8a5e");
        morphKeyMap.put("ord", "\u5e8f\u6570\u8a5e");
        morphKeyMap.put("dist", "\u914d\u5206\u8a5e");
        morphKeyMap.put("adverb", "\u6570\u526f\u8a5e");
        morphKeyMap.put("pres", "\u73fe");
        morphKeyMap.put("impf", "\u672a\u5b8c");
        morphKeyMap.put("fut", "\u672a\u6765");
        morphKeyMap.put("perf", "\u5b8c");
        morphKeyMap.put("plup", "\u904e\u5b8c");
        morphKeyMap.put("futp", "\u672a\u6765\u5b8c");
        morphKeyMap.put("act", "\u80fd");
        morphKeyMap.put("pass", "\u53d7");
        morphKeyMap.put("ind", "\u76f4");
        morphKeyMap.put("sub", "\u63a5");
        morphKeyMap.put("imp", "\u547d");
        morphKeyMap.put("inf", "\u4e0d");
        morphKeyMap.put("part", "\u5206\u8a5e");
    }

    private void loadLajpBody(String bodyFile) throws Exception {
        String line;
        FileInputStream in = new FileInputStream(new File(bodyFile));
        BufferedReader br = new BufferedReader(new InputStreamReader((InputStream)in, "Windows-31J"));
        while ((line = br.readLine()) != null) {
            Matcher m = DT_ID.matcher(line);
            if (!m.find()) continue;
            String dt = ZephyrUtil.hex2uni(m.group(2));
            String key = ZephyrUtil.makeKey(dt, true);
            this.bodyIdMap.put(key, m.group(1));
        }
        br.close();
        System.err.println("loaded " + this.bodyIdMap.size() + " entries from " + bodyFile);
    }

    private void loadWhitackerDict(String dictFile) throws Exception {
        String line;
        FileInputStream in = new FileInputStream(new File(dictFile));
        BufferedReader br = new BufferedReader(new InputStreamReader((InputStream)in, "ASCII"));
        while ((line = br.readLine()) != null) {
            Matcher m = DICT_ENT.matcher(line);
            if (!m.find()) continue;
            String dt = m.group(2);
            this.dictIdMap.put(dt, m.group(1));
            if (!line.contains("DEP\"")) continue;
            this.depSet.add(dt);
        }
        br.close();
        System.err.println("loaded " + this.dictIdMap.size() + " entries from " + dictFile);
    }

    private String formatMorphDesc(String desc) {
        StringBuffer sb = new StringBuffer();
        int i = 0;
        Matcher m = MORPHKEY.matcher(desc);
        boolean isPart = false;
        while (m.find()) {
            String key;
            if (i < m.start()) {
                String sub = desc.substring(i, m.start());
                if (!isPart || !sub.trim().isEmpty()) {
                    sb.append(sub);
                }
            }
            if ((key = m.group(1)) != null) {
                if (key.equals("part")) {
                    isPart = true;
                } else {
                    String newKey = morphKeyMap.get(key);
                    if (newKey != null) {
                        sb.append(newKey);
                    } else {
                        sb.append(key);
                    }
                    if (isPart) {
                        sb.append("\u5206");
                        isPart = false;
                    }
                }
            } else {
                sb.append(m.group(2));
            }
            i = m.end();
        }
        if (i < desc.length()) {
            sb.append(desc.substring(i));
        }
        return sb.toString();
    }

    private void outBodyRef(String lemma) {
        LinkedList bodyIds = this.bodyIdMap.getList(lemma);
        if (bodyIds != null) {
            for (String id : bodyIds) {
                System.out.print(String.format("<a href=\"body-lajp.html#%s\">%s</a> ", id, lemma));
            }
        }
    }

    private void findFromBody(String lemma, char c1, char c2) {
        String lemma2 = lemma.replace(c1, c2);
        if (!lemma2.equals(lemma2)) {
            this.outBodyRef(lemma2);
        }
    }

    private void findFromBody(String lemma) {
        this.outBodyRef(lemma);
        if (this.depSet.contains(lemma)) {
            this.outBodyRef(String.valueOf(lemma) + "r");
        }
        this.findFromBody(lemma, 'i', 'j');
        this.findFromBody(lemma, 'j', 'i');
        this.findFromBody(lemma, 'u', 'v');
        this.findFromBody(lemma, 'v', 'u');
    }

    private void loadMorph(String morphFile) throws Exception {
        String line;
        FileInputStream in = new FileInputStream(new File(morphFile));
        BufferedReader br = new BufferedReader(new InputStreamReader((InputStream)in, "ASCII"));
        int entnum = 0;
        System.err.print("creating morphology file ");
        System.out.println("<html><body>");
        while ((line = br.readLine()) != null) {
            int idx = line.indexOf(44);
            int idx2 = line.indexOf(32, idx + 1);
            String morph = line.substring(0, idx);
            String lemma = line.substring(idx + 1, idx2);
            String desc = line.substring(idx2 + 1);
            System.out.print("<dt>");
            System.out.print(morph);
            System.out.print("</dt><dd>");
            this.findFromBody(lemma);
            LinkedList dictIds = this.dictIdMap.getList(lemma);
            if (dictIds != null) {
                for (String id : dictIds) {
                    System.out.print(String.format("<a href=\"whitaker-dict.html#%s\">%s*</a> ", id, lemma));
                }
            }
            System.out.print(this.formatMorphDesc(desc));
            System.out.println("</dd>");
            if (++entnum % 100000 == 0) {
                System.err.print(entnum / 100000);
                continue;
            }
            if (entnum % 10000 != 0) continue;
            System.err.print(".");
        }
        br.close();
        System.out.println("</body></html>");
        System.err.println();
        System.err.println("done. " + entnum + " entries");
    }

    private void makeMorph(String[] args) throws Exception {
        this.loadLajpBody(args[0]);
        this.loadWhitackerDict(args[1]);
        this.loadMorph(args[2]);
    }

    public static void main(String[] args) {
        if (args.length >= 3) {
            MakeMorphHtml app = new MakeMorphHtml();
            try {
                ZephyrUtil.setShiftJisOuput();
                app.makeMorph(args);
            }
            catch (Exception e) {
                e.printStackTrace();
            }
        } else {
            System.err.println("Usage: java MakeMorphHtml body-lajp.html whitaker-dict.html whitaker.morph.txt");
        }
    }
}

