/*
 * Decompiled with CFR 0.152.
 */
package org.apache.ctakes.core.sentence;

import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStream;
import java.io.Reader;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import opennlp.maxent.GIS;
import opennlp.maxent.GISModel;
import opennlp.model.AbstractModel;
import opennlp.model.EventStream;
import opennlp.model.MaxentModel;
import opennlp.tools.dictionary.Dictionary;
import opennlp.tools.sentdetect.EndOfSentenceScanner;
import opennlp.tools.sentdetect.SDContextGenerator;
import opennlp.tools.sentdetect.SDEventStream;
import opennlp.tools.sentdetect.SentenceModel;
import opennlp.tools.sentdetect.SentenceSample;
import opennlp.tools.sentdetect.SentenceSampleStream;
import opennlp.tools.sentdetect.lang.Factory;
import opennlp.tools.util.HashSumEventStream;
import opennlp.tools.util.ObjectStream;
import opennlp.tools.util.PlainTextByLineStream;
import opennlp.tools.util.StringUtil;
import opennlp.tools.util.model.ModelUtil;

public class SentenceDetectorCtakes {
    public static final String SPLIT = "s";
    public static final String NO_SPLIT = "n";
    private static final Double ONE = new Double(1.0);
    private MaxentModel model;
    private final SDContextGenerator cgen;
    private final EndOfSentenceScanner scanner;
    private List<Double> sentProbs = new ArrayList<Double>();
    protected boolean useTokenEnd;

    public SentenceDetectorCtakes(MaxentModel model, SDContextGenerator cg, EndOfSentenceScanner eoss) {
        this.model = model;
        this.cgen = cg;
        this.scanner = eoss;
        this.useTokenEnd = false;
    }

    public String[] sentDetect(String s) {
        String[] sentences;
        int[] endsOfSentences = this.sentPosDetect(s);
        if (endsOfSentences.length != 0) {
            sentences = new String[endsOfSentences.length];
            int begin = 0;
            for (int si = 0; si < endsOfSentences.length; ++si) {
                sentences[si] = s.substring(begin, endsOfSentences[si] + 1);
                begin = endsOfSentences[si] + 1;
            }
        } else {
            sentences = new String[]{};
        }
        return sentences;
    }

    private int getFirstWS(String s, int pos) {
        while (pos < s.length() && !StringUtil.isWhitespace((char)s.charAt(pos))) {
            ++pos;
        }
        return pos;
    }

    private int getFirstNonWS(String s, int pos) {
        while (pos < s.length() && StringUtil.isWhitespace((char)s.charAt(pos))) {
            ++pos;
        }
        return pos;
    }

    public int[] sentPosDetect(String s) {
        double sentProb = 1.0;
        this.sentProbs.clear();
        StringBuffer sb = new StringBuffer(s);
        List enders = this.scanner.getPositions(s);
        ArrayList<Integer> positions = new ArrayList<Integer>(enders.size());
        int end = enders.size();
        int index = 0;
        for (int i = 0; i < end; ++i) {
            Integer candidate = (Integer)enders.get(i);
            int cint = candidate;
            int fws = this.getFirstWS(s, cint + 1);
            if (i + 1 < end && (Integer)enders.get(i + 1) < fws) continue;
            double[] probs = this.model.eval(this.cgen.getContext((CharSequence)sb, cint));
            String bestOutcome = this.model.getBestOutcome(probs);
            sentProb *= probs[this.model.getIndex(bestOutcome)];
            if (!bestOutcome.equals(SPLIT) || !this.isAcceptableBreak(s, index, cint)) continue;
            if (index != cint) {
                if (this.useTokenEnd) {
                    positions.add(this.getFirstNonWS(s, this.getFirstWS(s, cint + 1)));
                } else {
                    positions.add(this.getFirstNonWS(s, cint));
                }
                this.sentProbs.add(new Double(probs[this.model.getIndex(bestOutcome)]));
            }
            index = cint + 1;
        }
        int[] sentenceBreaks = new int[positions.size()];
        for (int i = 0; i < sentenceBreaks.length; ++i) {
            sentenceBreaks[i] = (Integer)positions.get(i) + 1;
        }
        return sentenceBreaks;
    }

    public double[] getSentenceProbabilities() {
        double[] sentProbArray = new double[this.sentProbs.size()];
        for (int i = 0; i < sentProbArray.length; ++i) {
            sentProbArray[i] = this.sentProbs.get(i);
        }
        return sentProbArray;
    }

    protected boolean isAcceptableBreak(String s, int fromIndex, int candidateIndex) {
        return true;
    }

    public static SentenceModel train(String languageCode, ObjectStream<SentenceSample> samples, boolean useTokenEnd, Dictionary abbreviations) throws IOException {
        return SentenceDetectorCtakes.train(languageCode, samples, useTokenEnd, abbreviations, 5, 100);
    }

    public static SentenceModel train(String languageCode, ObjectStream<SentenceSample> samples, boolean useTokenEnd, Dictionary abbreviations, int cutoff, int iterations) throws IOException {
        HashMap<String, String> manifestInfoEntries = new HashMap<String, String>();
        ModelUtil.addCutoffAndIterations(manifestInfoEntries, (int)cutoff, (int)iterations);
        Factory factory = new Factory();
        SDEventStream eventStream = new SDEventStream(samples, factory.createSentenceContextGenerator(languageCode), factory.createEndOfSentenceScanner(languageCode));
        HashSumEventStream hses = new HashSumEventStream((EventStream)eventStream);
        GISModel sentModel = GIS.trainModel((EventStream)hses, (int)iterations, (int)cutoff);
        manifestInfoEntries.put("Training-Eventhash", hses.calculateHashSum().toString(16));
        return new SentenceModel(languageCode, (AbstractModel)sentModel, useTokenEnd, abbreviations, manifestInfoEntries);
    }

    private static void usage() {
        System.err.println("Usage: SentenceDetectorME -encoding charset -lang language trainData modelName [cutoff iterations]");
        System.err.println("-encoding charset specifies the encoding which should be used ");
        System.err.println("                  for reading and writing text.");
        System.err.println("-lang language    specifies the language which ");
        System.err.println("                  is being processed.");
        System.err.println("trainData         specifies the name of the input training file");
        System.err.println("                  to train the resulting model.");
        System.err.println("modelName         specifies the resulting saved model after");
        System.err.println("                  training.");
        System.exit(1);
    }

    public static void main(String[] args) throws IOException {
        int ai = 0;
        String encoding = null;
        String lang = null;
        if (args.length == 0) {
            SentenceDetectorCtakes.usage();
        }
        while (args[ai].startsWith("-")) {
            if (args[ai].equals("-encoding")) {
                if (++ai < args.length) {
                    encoding = args[ai];
                    ++ai;
                    continue;
                }
                SentenceDetectorCtakes.usage();
                continue;
            }
            if (args[ai].equals("-lang")) {
                if (++ai < args.length) {
                    lang = args[ai];
                    ++ai;
                    continue;
                }
                SentenceDetectorCtakes.usage();
                continue;
            }
            SentenceDetectorCtakes.usage();
        }
        File inFile = new File(args[ai++]);
        File outFile = new File(args[ai++]);
        int numberOfArgs = args.length;
        int iters = ai < numberOfArgs ? SentenceDetectorCtakes.convertToInt(args[ai++]) : 100;
        int cutoff = ai < numberOfArgs ? SentenceDetectorCtakes.convertToInt(args[ai++]) : 4;
        try {
            if (lang == null || encoding == null) {
                SentenceDetectorCtakes.usage();
            }
            SentenceModel model = SentenceDetectorCtakes.train(lang, (ObjectStream<SentenceSample>)new SentenceSampleStream((ObjectStream)new PlainTextByLineStream((Reader)new InputStreamReader((InputStream)new FileInputStream(inFile), encoding))), true, null, cutoff, iters);
            System.out.println("Saving the model as: " + outFile);
            model.serialize((OutputStream)new FileOutputStream(outFile));
        }
        catch (Exception e) {
            e.printStackTrace();
        }
    }

    private static int convertToInt(String s) {
        int i = Integer.parseInt(s);
        return i;
    }
}

