package opennlp.grok.preprocess.sentdetect;

import com.sun.xml.tree.XmlDocument;
import gnu.regexp.REException;
import gnu.regexp.REMatch;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.util.HashSet;
import java.util.LinkedList;
import java.util.Set;
import opennlp.common.preprocess.SentenceDetector;
import opennlp.common.util.Pair;
import opennlp.common.xml.NLPDocument;
import opennlp.common.xml.XmlUtils;
import opennlp.maxent.ContextGenerator;
import opennlp.maxent.EventStream;
import opennlp.maxent.GIS;
import opennlp.maxent.GISModel;
import opennlp.maxent.MaxentModel;
import opennlp.maxent.PerlHelp;
import opennlp.maxent.PlainTextByLineDataStream;
import opennlp.maxent.io.SuffixSensitiveGISModelWriter;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;

/* loaded from: input_file:opennlp/grok/preprocess/sentdetect/SentenceDetectorME.class */
public class SentenceDetectorME implements SentenceDetector {
    private MaxentModel model;
    private final ContextGenerator cgen;

    public SentenceDetectorME(MaxentModel maxentModel) {
        this(maxentModel, new SDContextGenerator());
    }

    public SentenceDetectorME(MaxentModel maxentModel, ContextGenerator contextGenerator) {
        this.model = maxentModel;
        this.cgen = contextGenerator;
    }

    private int getFirstNonWS(String str, int i) {
        while (i < str.length() && Character.isWhitespace(str.charAt(i))) {
            i++;
        }
        return i;
    }

    public static void main(String[] strArr) throws IOException, REException {
        try {
            File file = new File(strArr[0]);
            File file2 = new File(strArr[1]);
            SDEventStream sDEventStream = new SDEventStream(new PlainTextByLineDataStream(new FileReader(file)));
            GISModel train = strArr.length > 3 ? train(sDEventStream, Integer.parseInt(strArr[2]), Integer.parseInt(strArr[3])) : train(sDEventStream, 100, 5);
            System.out.println(new StringBuffer("Saving the model as: ").append(strArr[1]).toString());
            new SuffixSensitiveGISModelWriter(train, file2).persist();
        } catch (Exception e) {
            e.printStackTrace();
        }
    }

    public void process(NLPDocument nLPDocument) {
        XmlDocument nLPDoc = nLPDocument.getNLPDoc();
        NodeList elementsByTagName = nLPDoc.getElementsByTagName("SENT");
        int length = elementsByTagName.getLength();
        Node[] nodeArr = new Node[length];
        for (int i = 0; i < length; i++) {
            nodeArr[i] = elementsByTagName.item(i);
        }
        for (int i2 = 0; i2 < length; i2++) {
            Node node = nodeArr[i2];
            Node parentNode = node.getParentNode();
            for (String str : sentDetect(XmlUtils.getValue((Element) node, "LEX"))) {
                Element createSENT = XmlUtils.createSENT(nLPDoc, str);
                if (createSENT != null) {
                    parentNode.insertBefore(createSENT, node);
                }
            }
            parentNode.removeChild(node);
        }
    }

    public Set requires() {
        return new HashSet();
    }

    public String[] sentDetect(String str) {
        StringBuffer stringBuffer = new StringBuffer(str);
        REMatch[] allMatches = PerlHelp.peqRE.getAllMatches(stringBuffer);
        int i = 0;
        LinkedList linkedList = new LinkedList();
        for (REMatch rEMatch : allMatches) {
            int startIndex = rEMatch.getStartIndex();
            if (this.model.getBestOutcome(this.model.eval(this.cgen.getContext(new Pair(stringBuffer, new Integer(startIndex))))).equals("T") && (stringBuffer.length() < startIndex + 4 || !stringBuffer.substring(startIndex + 1, startIndex + 4).equals("com"))) {
                String trim = stringBuffer.substring(i, startIndex + 1).trim();
                if (trim.length() > 0) {
                    linkedList.add(trim);
                }
                i = startIndex + 1;
            }
        }
        if (i < stringBuffer.length()) {
            String trim2 = stringBuffer.substring(i).trim();
            if (trim2.length() > 0) {
                linkedList.add(trim2);
            }
        }
        String[] strArr = new String[linkedList.size()];
        for (int i2 = 0; i2 < strArr.length; i2++) {
            strArr[i2] = ((String) linkedList.removeFirst()).trim();
        }
        return strArr;
    }

    public int[] sentPosDetect(String str) {
        StringBuffer stringBuffer = new StringBuffer(str);
        REMatch[] allMatches = PerlHelp.peqRE.getAllMatches(stringBuffer);
        LinkedList linkedList = new LinkedList();
        linkedList.addLast(new Integer(getFirstNonWS(str, 0)));
        int i = 0;
        int length = allMatches.length - 1;
        for (int i2 = 0; i2 < length; i2++) {
            int startIndex = allMatches[i2].getStartIndex();
            if (this.model.getBestOutcome(this.model.eval(this.cgen.getContext(new Pair(stringBuffer, new Integer(startIndex))))).equals("T")) {
                if (i != startIndex) {
                    linkedList.addLast(new Integer(getFirstNonWS(str, startIndex + 1)));
                }
                i = startIndex + 1;
            }
        }
        int[] iArr = new int[linkedList.size()];
        for (int i3 = 0; i3 < iArr.length; i3++) {
            iArr[i3] = ((Integer) linkedList.removeFirst()).intValue();
        }
        return iArr;
    }

    public static GISModel train(File file, int i, int i2, String str) throws IOException, REException {
        return GIS.trainModel(new SDEventStream(new PlainTextByLineDataStream(new FileReader(file)), str), i, i2);
    }

    public static GISModel train(EventStream eventStream, int i, int i2) throws IOException {
        return GIS.trainModel(eventStream, i, i2);
    }
}
