package quipu.grok.preprocess.tokenize;

import com.sun.xml.tree.XmlDocument;
import gnu.regexp.REMatch;
import java.io.File;
import java.io.FileReader;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.Set;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import quipu.maxent.ContextGenerator;
import quipu.maxent.DataIndexer;
import quipu.maxent.GIS;
import quipu.maxent.GISModel;
import quipu.maxent.MaxentModel;
import quipu.maxent.PerlHelp;
import quipu.opennlp.preprocess.Tokenizer;
import quipu.opennlp.util.Pair;
import quipu.opennlp.xml.NLPDocument;
import quipu.opennlp.xml.XmlUtils;

/* loaded from: input_file:quipu/grok/preprocess/tokenize/TokenizerME.class */
public class TokenizerME implements Tokenizer {
    private static final ContextGenerator cg = new TokContextGenerator();
    private MaxentModel model;
    private ArrayList tokens = new ArrayList();
    private static Class class$Lquipu$opennlp$preprocess$SentenceDetector;

    @Override // quipu.opennlp.Pipelink
    public void process(NLPDocument nLPDocument) {
        XmlDocument nLPDoc = nLPDocument.getNLPDoc();
        NodeList elementsByTagName = nLPDoc.getElementsByTagName("TOK");
        int length = elementsByTagName.getLength();
        Node[] nodeArr = new Node[length];
        for (int i = 0; i < length; i++) {
            nodeArr[i] = elementsByTagName.item(i);
        }
        for (int i2 = 0; i2 < length; i2++) {
            Node node = nodeArr[i2];
            Node parentNode = node.getParentNode();
            for (String str : tokenize(XmlUtils.getValue((Element) node, "LEX"))) {
                Element createTOK = XmlUtils.createTOK(nLPDoc, str);
                if (createTOK != null) {
                    parentNode.insertBefore(createTOK, node);
                }
            }
            parentNode.removeChild(node);
        }
    }

    @Override // quipu.opennlp.Pipelink
    public Set requires() {
        Class class$;
        HashSet hashSet = new HashSet();
        if (class$Lquipu$opennlp$preprocess$SentenceDetector != null) {
            class$ = class$Lquipu$opennlp$preprocess$SentenceDetector;
        } else {
            class$ = class$("quipu.opennlp.preprocess.SentenceDetector");
            class$Lquipu$opennlp$preprocess$SentenceDetector = class$;
        }
        hashSet.add(class$);
        return hashSet;
    }

    @Override // quipu.opennlp.preprocess.Tokenizer
    public String[] tokenize(String str) {
        StringBuffer stringBuffer = new StringBuffer(str);
        REMatch[] allMatches = PerlHelp.wsRE.getAllMatches(stringBuffer);
        int i = 0;
        for (int i2 = 0; i2 < allMatches.length; i2++) {
            int startIndex = allMatches[i2].getStartIndex();
            String trim = stringBuffer.substring(i, startIndex).trim();
            if (PerlHelp.alphanumRE.isMatch(trim)) {
                this.tokens.add(trim);
            } else {
                for (int i3 = i; i3 <= startIndex; i3++) {
                    String bestOutcome = this.model.getBestOutcome(this.model.eval(cg.getContext(new Pair(stringBuffer, new Integer(i3)))));
                    char charAt = stringBuffer.charAt(i3);
                    if ((bestOutcome.equals("T") || charAt == '?') && i <= i3 - 1 && (charAt != '.' || i2 == this.tokens.size() - 1)) {
                        this.tokens.add(stringBuffer.substring(i, i3));
                        i = i3;
                    }
                }
                if (i <= startIndex) {
                    this.tokens.add(stringBuffer.substring(i, startIndex));
                }
            }
            i = allMatches[i2].getEndIndex();
        }
        this.tokens.add(stringBuffer.substring(i));
        String[] strArr = new String[this.tokens.size()];
        for (int i4 = 0; i4 < this.tokens.size(); i4++) {
            strArr[i4] = (String) this.tokens.get(i4);
        }
        this.tokens.clear();
        return strArr;
    }

    public static void train(String[] strArr) {
        try {
            GIS.trainModel(strArr[1], strArr[2], new DataIndexer(new TokEventCollector(new FileReader(new File(strArr[0]))).getEvents(), 10), 100);
        } catch (Exception e) {
            e.printStackTrace();
        }
    }

    public static void main(String[] strArr) {
        for (String str : new EnglishTokenizerME().tokenize("This, my friend, is a string with $5.00 in it, and Mr. Brown knows that.")) {
            System.out.println(str);
        }
    }

    static Class class$(String str) {
        try {
            return Class.forName(str);
        } catch (ClassNotFoundException e) {
            throw new NoClassDefFoundError(e.getMessage());
        }
    }

    public TokenizerME(String str, String str2) {
        try {
            this.model = new GISModel(str, str2);
        } catch (Exception e) {
            e.printStackTrace();
        }
    }

    public TokenizerME(InputStream inputStream, InputStream inputStream2) {
        this.model = new GISModel(inputStream, inputStream2);
    }
}
