package edu.stanford.nlp.ling.tokensregex.demo;

import edu.stanford.nlp.io.IOUtils;
import edu.stanford.nlp.ling.CoreAnnotations;
import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.pipeline.Annotation;
import edu.stanford.nlp.pipeline.StanfordCoreNLP;
import edu.stanford.nlp.util.CoreMap;
import java.io.IOException;
import java.io.PrintWriter;
import java.util.Iterator;
import java.util.List;
import java.util.Properties;

/* loaded from: input_file:edu/stanford/nlp/ling/tokensregex/demo/TokensRegexRetokenizeDemo.class */
public class TokensRegexRetokenizeDemo {
    public static void runPipeline(StanfordCoreNLP stanfordCoreNLP, String str, PrintWriter printWriter) {
        Annotation annotation = new Annotation(str);
        stanfordCoreNLP.annotate(annotation);
        printWriter.println();
        printWriter.println("The top level annotation");
        printWriter.println(annotation.toShorterString(new String[0]));
        Iterator it = ((List) annotation.get(CoreAnnotations.SentencesAnnotation.class)).iterator();
        while (it.hasNext()) {
            for (CoreLabel coreLabel : (List) ((CoreMap) it.next()).get(CoreAnnotations.TokensAnnotation.class)) {
                printWriter.println("token: word=" + ((String) coreLabel.get(CoreAnnotations.TextAnnotation.class)) + ", lemma=" + ((String) coreLabel.get(CoreAnnotations.LemmaAnnotation.class)) + ", pos=" + ((String) coreLabel.get(CoreAnnotations.PartOfSpeechAnnotation.class)) + ", ne=" + ((String) coreLabel.get(CoreAnnotations.NamedEntityTagAnnotation.class)) + ", normalized=" + ((String) coreLabel.get(CoreAnnotations.NormalizedNamedEntityTagAnnotation.class)));
            }
        }
        printWriter.flush();
    }

    public static void main(String[] strArr) throws IOException {
        String str = strArr.length > 0 ? strArr[0] : "edu/stanford/nlp/ling/tokensregex/demo/rules/retokenize.rules.txt";
        PrintWriter printWriter = strArr.length > 2 ? new PrintWriter(strArr[2]) : new PrintWriter(System.out);
        String slurpFileNoExceptions = strArr.length > 1 ? IOUtils.slurpFileNoExceptions(strArr[1]) : "Do we tokenize on hyphens? one-two-three-four.  How about dates? 03-16-2015.";
        new Properties().setProperty("annotators", "tokenize,ssplit,pos,lemma,ner");
        StanfordCoreNLP stanfordCoreNLP = new StanfordCoreNLP();
        printWriter.println("Default tokenization: ");
        runPipeline(stanfordCoreNLP, slurpFileNoExceptions, printWriter);
        Properties properties = new Properties();
        properties.setProperty("annotators", "tokenize,retokenize,ssplit,pos,lemma,ner");
        properties.setProperty("customAnnotatorClass.retokenize", "edu.stanford.nlp.pipeline.TokensRegexAnnotator");
        properties.setProperty("retokenize.rules", str);
        StanfordCoreNLP stanfordCoreNLP2 = new StanfordCoreNLP(properties);
        printWriter.println();
        printWriter.println("Always tokenize hyphens: ");
        runPipeline(stanfordCoreNLP2, slurpFileNoExceptions, printWriter);
    }
}
