package quipu.grok.preprocess.sentdetect;

import com.sun.xml.tree.XmlDocument;
import gnu.regexp.REMatch;
import java.io.File;
import java.io.FileReader;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.Set;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import quipu.maxent.ContextGenerator;
import quipu.maxent.DataIndexer;
import quipu.maxent.GIS;
import quipu.maxent.GISModel;
import quipu.maxent.MaxentModel;
import quipu.maxent.PerlHelp;
import quipu.opennlp.preprocess.SentenceDetector;
import quipu.opennlp.util.Pair;
import quipu.opennlp.xml.NLPDocument;
import quipu.opennlp.xml.XmlUtils;

/* loaded from: input_file:quipu/grok/preprocess/sentdetect/SentenceDetectorME.class */
public class SentenceDetectorME implements SentenceDetector {
    private static final ContextGenerator cgen = new SDContextGenerator();
    private MaxentModel model;
    double[] probs;
    ArrayList sents = new ArrayList();

    @Override // quipu.opennlp.Pipelink
    public void process(NLPDocument nLPDocument) {
        XmlDocument nLPDoc = nLPDocument.getNLPDoc();
        NodeList elementsByTagName = nLPDoc.getElementsByTagName("SENT");
        int length = elementsByTagName.getLength();
        Node[] nodeArr = new Node[length];
        for (int i = 0; i < length; i++) {
            nodeArr[i] = elementsByTagName.item(i);
        }
        for (int i2 = 0; i2 < length; i2++) {
            Node node = nodeArr[i2];
            Node parentNode = node.getParentNode();
            for (String str : sentDetect(XmlUtils.getValue((Element) node, "LEX"))) {
                Element createSENT = XmlUtils.createSENT(nLPDoc, str);
                if (createSENT != null) {
                    parentNode.insertBefore(createSENT, node);
                }
            }
            parentNode.removeChild(node);
        }
    }

    @Override // quipu.opennlp.Pipelink
    public Set requires() {
        return new HashSet();
    }

    @Override // quipu.opennlp.preprocess.SentenceDetector
    public String[] sentDetect(String str) {
        StringBuffer stringBuffer = new StringBuffer(str);
        int i = 0;
        for (REMatch rEMatch : PerlHelp.peqRE.getAllMatches(stringBuffer)) {
            int startIndex = rEMatch.getStartIndex();
            this.probs = this.model.eval(cgen.getContext(new Pair(stringBuffer, new Integer(startIndex))));
            if (this.model.getBestOutcome(this.probs).equals("T")) {
                String trim = stringBuffer.substring(i, startIndex + 1).trim();
                if (trim.length() > 0) {
                    this.sents.add(trim);
                }
                i = startIndex + 1;
            }
        }
        if (i < stringBuffer.length()) {
            String trim2 = stringBuffer.substring(i).trim();
            if (trim2.length() > 0) {
                this.sents.add(trim2);
            }
        }
        String[] strArr = new String[this.sents.size()];
        for (int i2 = 0; i2 < this.sents.size(); i2++) {
            strArr[i2] = ((String) this.sents.get(i2)).trim();
        }
        this.sents.clear();
        return strArr;
    }

    public static void main(String[] strArr) {
        try {
            GIS.trainModel(strArr[1], strArr[2], new DataIndexer(new SDEventCollector(new FileReader(new File(strArr[0]))).getEvents(), 3), 100);
        } catch (Exception e) {
            e.printStackTrace();
        }
    }

    public SentenceDetectorME(String str, String str2) {
        try {
            this.model = new GISModel(str, str2);
        } catch (Exception e) {
            e.printStackTrace();
        }
    }

    public SentenceDetectorME(InputStream inputStream, InputStream inputStream2) {
        this.model = new GISModel(inputStream, inputStream2);
    }
}
