/*
 * Decompiled with CFR 0.152.
 */
package kea.main;

import java.io.BufferedOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.ObjectOutputStream;
import java.util.Enumeration;
import java.util.Hashtable;
import java.util.Vector;
import kea.filters.KEAFilter;
import kea.stemmers.SremovalStemmer;
import kea.stemmers.Stemmer;
import kea.stopwords.Stopwords;
import kea.stopwords.StopwordsEnglish;
import weka.core.Attribute;
import weka.core.FastVector;
import weka.core.Instance;
import weka.core.Instances;
import weka.core.Option;
import weka.core.OptionHandler;
import weka.core.Utils;

public class KEAModelBuilder
implements OptionHandler {
    String m_dirName = null;
    String m_modelName = null;
    String m_vocabulary = null;
    String m_vocabularyFormat = null;
    String m_documentLanguage = "en";
    String m_encoding = "default";
    boolean m_debug = false;
    boolean m_useKFrequency = false;
    boolean m_disallowIPeriods = false;
    private int m_MaxPhraseLength = 5;
    private int m_MinPhraseLength = 1;
    private int m_MinNumOccur = 2;
    KEAFilter m_KEAFilter = null;
    private Stemmer m_Stemmer = new SremovalStemmer();
    private Stopwords m_Stopwords = new StopwordsEnglish();
    private boolean m_CheckForProperNouns = true;

    public boolean getCheckForProperNouns() {
        return this.m_CheckForProperNouns;
    }

    public void setCheckForProperNouns(boolean newM_CheckProperNouns) {
        this.m_CheckForProperNouns = newM_CheckProperNouns;
    }

    public Stopwords getStopwords() {
        return this.m_Stopwords;
    }

    public void setStopwords(Stopwords newM_Stopwords) {
        this.m_Stopwords = newM_Stopwords;
    }

    public Stemmer getStemmer() {
        return this.m_Stemmer;
    }

    public void setStemmer(Stemmer newStemmer) {
        this.m_Stemmer = newStemmer;
    }

    public int getMinNumOccur() {
        return this.m_MinNumOccur;
    }

    public void setMinNumOccur(int newMinNumOccur) {
        this.m_MinNumOccur = newMinNumOccur;
    }

    public int getMaxPhraseLength() {
        return this.m_MaxPhraseLength;
    }

    public void setMaxPhraseLength(int newMaxPhraseLength) {
        this.m_MaxPhraseLength = newMaxPhraseLength;
    }

    public int getMinPhraseLength() {
        return this.m_MinPhraseLength;
    }

    public void setMinPhraseLength(int newMinPhraseLength) {
        this.m_MinPhraseLength = newMinPhraseLength;
    }

    public boolean getDisallowIPeriods() {
        return this.m_disallowIPeriods;
    }

    public void setDisallowIPeriods(boolean newdisallowIPeriods) {
        this.m_disallowIPeriods = newdisallowIPeriods;
    }

    public boolean getUseKFrequency() {
        return this.m_useKFrequency;
    }

    public void setUseKFrequency(boolean newuseKFrequency) {
        this.m_useKFrequency = newuseKFrequency;
    }

    public boolean getDebug() {
        return this.m_debug;
    }

    public void setDebug(boolean newdebug) {
        this.m_debug = newdebug;
    }

    public String getEncoding() {
        return this.m_encoding;
    }

    public void setEncoding(String newencoding) {
        this.m_encoding = newencoding;
    }

    public String getVocabulary() {
        return this.m_vocabulary;
    }

    public void setVocabulary(String newvocabulary) {
        this.m_vocabulary = newvocabulary;
    }

    public String getDocumentLanguage() {
        return this.m_documentLanguage;
    }

    public void setDocumentLanguage(String newdocumentLanguage) {
        this.m_documentLanguage = newdocumentLanguage;
    }

    public String getVocabularyFormat() {
        return this.m_vocabularyFormat;
    }

    public void setVocabularyFormat(String newvocabularyFormat) {
        this.m_vocabularyFormat = newvocabularyFormat;
    }

    public String getModelName() {
        return this.m_modelName;
    }

    public void setModelName(String newmodelName) {
        this.m_modelName = newmodelName;
    }

    public String getDirName() {
        return this.m_dirName;
    }

    public void setDirName(String newdirName) {
        this.m_dirName = newdirName;
    }

    /*
     * Enabled aggressive block sorting
     */
    public void setOptions(String[] options) throws Exception {
        String stemmerString;
        String dirName = Utils.getOption((char)'l', (String[])options);
        if (dirName.length() <= 0) {
            this.setDirName(null);
            throw new Exception("Name of directory required argument.");
        }
        this.setDirName(dirName);
        String modelName = Utils.getOption((char)'m', (String[])options);
        if (modelName.length() <= 0) {
            this.setModelName(null);
            throw new Exception("Name of model required argument.");
        }
        this.setModelName(modelName);
        String vocabularyName = Utils.getOption((char)'v', (String[])options);
        if (vocabularyName.length() <= 0) {
            this.setVocabulary(null);
            throw new Exception("Name of vocabulary required argument.");
        }
        this.setVocabulary(vocabularyName);
        String vocabularyFormat = Utils.getOption((char)'f', (String[])options);
        if (!this.getVocabulary().equals("none")) {
            if (vocabularyFormat.length() <= 0) {
                this.setVocabularyFormat(null);
                throw new Exception("If a controlled vocabulary is used, format of vocabulary required argument (skos or text).");
            }
            if (!vocabularyFormat.equals("skos") && !vocabularyFormat.equals("text")) {
                throw new Exception("Unsupported format of vocabulary. It should be either \"skos\" or \"text\".");
            }
            this.setVocabularyFormat(vocabularyFormat);
        } else {
            this.setVocabularyFormat(null);
        }
        String encoding = Utils.getOption((char)'e', (String[])options);
        if (encoding.length() > 0) {
            this.setEncoding(encoding);
        } else {
            this.setEncoding("default");
        }
        String documentLanguage = Utils.getOption((char)'i', (String[])options);
        if (documentLanguage.length() > 0) {
            this.setDocumentLanguage(documentLanguage);
        } else {
            this.setDocumentLanguage("en");
        }
        String maxPhraseLengthString = Utils.getOption((char)'x', (String[])options);
        if (maxPhraseLengthString.length() > 0) {
            this.setMaxPhraseLength(Integer.parseInt(maxPhraseLengthString));
        } else {
            this.setMaxPhraseLength(5);
        }
        String minPhraseLengthString = Utils.getOption((char)'y', (String[])options);
        if (minPhraseLengthString.length() > 0) {
            this.setMinPhraseLength(Integer.parseInt(minPhraseLengthString));
        } else {
            this.setMinPhraseLength(1);
        }
        String minNumOccurString = Utils.getOption((char)'o', (String[])options);
        if (minNumOccurString.length() > 0) {
            this.setMinNumOccur(Integer.parseInt(minNumOccurString));
        } else {
            this.setMinNumOccur(2);
        }
        String stopwordsString = Utils.getOption((char)'s', (String[])options);
        if (stopwordsString.length() > 0) {
            stopwordsString = "kea.stopwords.".concat(stopwordsString);
            this.setStopwords((Stopwords)Class.forName(stopwordsString).newInstance());
        }
        if ((stemmerString = Utils.getOption((char)'t', (String[])options)).length() > 0) {
            stemmerString = "kea.stemmers.".concat(stemmerString);
            this.setStemmer((Stemmer)Class.forName(stemmerString).newInstance());
        }
        this.setDebug(Utils.getFlag((char)'d', (String[])options));
        this.setUseKFrequency(Utils.getFlag((char)'k', (String[])options));
        this.setDisallowIPeriods(Utils.getFlag((char)'p', (String[])options));
        this.setCheckForProperNouns(!Utils.getFlag((char)'n', (String[])options));
        Utils.checkForRemainingOptions((String[])options);
    }

    public String[] getOptions() {
        String[] options = new String[26];
        int current = 0;
        options[current++] = "-l";
        options[current++] = this.getDirName();
        options[current++] = "-m";
        options[current++] = this.getModelName();
        options[current++] = "-v";
        options[current++] = this.getVocabulary();
        options[current++] = "-f";
        options[current++] = this.getVocabularyFormat();
        options[current++] = "-e";
        options[current++] = this.getEncoding();
        options[current++] = "-i";
        options[current++] = this.getDocumentLanguage();
        if (this.getUseKFrequency()) {
            options[current++] = "-k";
        }
        if (this.getDebug()) {
            options[current++] = "-d";
        }
        if (this.getDisallowIPeriods()) {
            options[current++] = "-p";
        }
        options[current++] = "-x";
        options[current++] = "" + this.getMaxPhraseLength();
        options[current++] = "-y";
        options[current++] = "" + this.getMinPhraseLength();
        options[current++] = "-o";
        options[current++] = "" + this.getMinNumOccur();
        options[current++] = "-s";
        options[current++] = this.getStopwords().getClass().getName();
        options[current++] = "-t";
        options[current++] = this.getStemmer().getClass().getName();
        if (this.getCheckForProperNouns()) {
            options[current++] = "-n";
        }
        while (current < options.length) {
            options[current++] = "";
        }
        return options;
    }

    public Enumeration listOptions() {
        Vector<Option> newVector = new Vector<Option>(14);
        newVector.addElement(new Option("\tSpecifies name of directory.", "l", 1, "-l <directory name>"));
        newVector.addElement(new Option("\tSpecifies name of model.", "m", 1, "-m <model name>"));
        newVector.addElement(new Option("\tSpecifies vocabulary name.", "v", 1, "-v <vocabulary name>"));
        newVector.addElement(new Option("\tSpecifies vocabulary format (text or skos or none).", "f", 1, "-f <vocabulary format>"));
        newVector.addElement(new Option("\tSpecifies document language (en (default), es, de, fr).", "i", 1, "-i <document language>"));
        newVector.addElement(new Option("\tSpecifies encoding.", "e", 1, "-e <encoding>"));
        newVector.addElement(new Option("\tTurns debugging mode on.", "d", 0, "-d"));
        newVector.addElement(new Option("\tUse keyphrase frequency statistic.", "k", 0, "-k"));
        newVector.addElement(new Option("\tDisallow internal periods.", "p", 0, "-p"));
        newVector.addElement(new Option("\tSets the maximum phrase length (default: 5).", "x", 1, "-x <length>"));
        newVector.addElement(new Option("\tSets the minimum phrase length (default: 1).", "y", 1, "-y <length>"));
        newVector.addElement(new Option("\tSet the minimum number of occurences (default: 2).", "o", 1, "-o"));
        newVector.addElement(new Option("\tSets the list of stopwords to use (default: StopwordsEnglish).", "s", 1, "-s <name of stopwords class>"));
        newVector.addElement(new Option("\tSet the stemmer to use (default: SremovalStemmer).", "t", 1, "-t <name of stemmer class>"));
        newVector.addElement(new Option("\tDo not check for proper nouns.", "n", 0, "-n"));
        return newVector.elements();
    }

    public Hashtable collectStems() throws Exception {
        Hashtable<String, Double> stems = new Hashtable<String, Double>();
        try {
            File dir = new File(this.m_dirName);
            String[] files = dir.list();
            int i = 0;
            while (i < files.length) {
                String stem;
                if ((files[i].endsWith(".key") || files[i].endsWith(".txt")) && !stems.containsKey(stem = files[i].substring(0, files[i].length() - 4))) {
                    stems.put(stem, new Double(0.0));
                }
                ++i;
            }
        }
        catch (Exception e) {
            throw new Exception("Problem opening directory " + this.m_dirName);
        }
        return stems;
    }

    public void buildModel(Hashtable stems) throws Exception {
        if (stems.size() == 0) {
            throw new Exception("Couldn't find any data!");
        }
        FastVector atts = new FastVector(2);
        atts.addElement((Object)new Attribute("doc", null));
        atts.addElement((Object)new Attribute("keyphrases", null));
        Instances data = new Instances("keyphrase_training_data", atts, 0);
        this.m_KEAFilter = new KEAFilter();
        this.m_KEAFilter.setDebug(this.m_debug);
        this.m_KEAFilter.setDisallowInternalPeriods(this.getDisallowIPeriods());
        this.m_KEAFilter.setKFused(this.getUseKFrequency());
        this.m_KEAFilter.setMaxPhraseLength(this.getMaxPhraseLength());
        this.m_KEAFilter.setMinPhraseLength(this.getMinPhraseLength());
        this.m_KEAFilter.setMinNumOccur(this.getMinNumOccur());
        this.m_KEAFilter.setStemmer(this.getStemmer());
        this.m_KEAFilter.setDocumentLanguage(this.getDocumentLanguage());
        this.m_KEAFilter.setVocabulary(this.getVocabulary());
        this.m_KEAFilter.setVocabularyFormat(this.getVocabularyFormat());
        this.m_KEAFilter.setStopwords(this.getStopwords());
        this.m_KEAFilter.setCheckForProperNouns(this.getCheckForProperNouns());
        this.m_KEAFilter.setInputFormat(data);
        if (this.getVocabulary().equals("none")) {
            this.m_KEAFilter.m_NODEfeature = false;
        } else {
            this.m_KEAFilter.loadThesaurus(this.getStemmer(), this.getStopwords());
        }
        this.m_KEAFilter.setNumFeature();
        System.err.println("-- Reading the Documents... ");
        Enumeration elem = stems.keys();
        while (elem.hasMoreElements()) {
            int c;
            InputStreamReader is;
            String str = (String)elem.nextElement();
            double[] newInst = new double[2];
            try {
                File txt = new File(String.valueOf(this.m_dirName) + "/" + str + ".txt");
                is = !this.m_encoding.equals("default") ? new InputStreamReader((InputStream)new FileInputStream(txt), this.m_encoding) : new InputStreamReader(new FileInputStream(txt));
                StringBuffer txtStr = new StringBuffer();
                while ((c = is.read()) != -1) {
                    txtStr.append((char)c);
                }
                is.close();
                newInst[0] = data.attribute(0).addStringValue(txtStr.toString());
            }
            catch (Exception e) {
                if (this.m_debug) {
                    System.err.println("Can't find document for stem " + str + ".");
                }
                newInst[0] = Instance.missingValue();
            }
            try {
                File key = new File(String.valueOf(this.m_dirName) + "/" + str + ".key");
                is = !this.m_encoding.equals("default") ? new InputStreamReader((InputStream)new FileInputStream(key), this.m_encoding) : new InputStreamReader(new FileInputStream(key));
                StringBuffer keyStr = new StringBuffer();
                while ((c = is.read()) != -1) {
                    keyStr.append((char)c);
                }
                newInst[1] = data.attribute(1).addStringValue(keyStr.toString());
            }
            catch (Exception e) {
                if (this.m_debug) {
                    System.err.println("Can't find keyphrases for stem " + str + ".");
                }
                newInst[1] = Instance.missingValue();
            }
            data.add(new Instance(1.0, newInst));
            this.m_KEAFilter.input(data.instance(0));
            data = data.stringFreeStructure();
        }
        this.m_KEAFilter.batchFinished();
        while (this.m_KEAFilter.output() != null) {
        }
    }

    public void saveModel() throws Exception {
        BufferedOutputStream bufferedOut = new BufferedOutputStream(new FileOutputStream(this.m_modelName));
        ObjectOutputStream out = new ObjectOutputStream(bufferedOut);
        out.writeObject((Object)this.m_KEAFilter);
        out.flush();
        out.close();
    }

    /*
     * Unable to fully structure code
     */
    public static void main(String[] ops) {
        block4: {
            kmb = new KEAModelBuilder();
            try {
                kmb.setOptions(ops);
                System.err.print("Building model with options: ");
                optionSettings = kmb.getOptions();
                i = 0;
                while (i < optionSettings.length) {
                    System.err.print(String.valueOf(optionSettings[i]) + " ");
                    ++i;
                }
                System.err.println();
                kmb.buildModel(kmb.collectStems());
                kmb.saveModel();
                break block4;
            }
            catch (Exception e) {
                e.printStackTrace();
                System.err.println(e.getMessage());
                System.err.println("\nOptions:\n");
                en = kmb.listOptions();
                ** while (en.hasMoreElements())
            }
lbl-1000:
            // 1 sources

            {
                option = (Option)en.nextElement();
                System.err.println(option.synopsis());
                System.err.println(option.description());
                continue;
            }
        }
    }
}

