package edu.stanford.nlp.pipeline;

import edu.stanford.nlp.classify.LinearClassifier;
import edu.stanford.nlp.ie.pascal.PascalTemplate;
import edu.stanford.nlp.international.morph.MorphoFeatures;
import edu.stanford.nlp.io.IOUtils;
import edu.stanford.nlp.ling.CoreAnnotation;
import edu.stanford.nlp.ling.CoreAnnotations;
import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.semgraph.semgrex.ssurgeon.AddDep;
import edu.stanford.nlp.semgraph.semgrex.ssurgeon.SsurgeonPattern;
import edu.stanford.nlp.sequences.SeqClassifierFlags;
import edu.stanford.nlp.tagger.maxent.TaggerConfig;
import edu.stanford.nlp.util.StringUtils;
import java.io.File;
import java.io.PrintWriter;
import java.util.List;
import java.util.Properties;
import junit.framework.TestCase;

/* loaded from: input_file:edu/stanford/nlp/pipeline/TokensRegexNERAnnotatorITest.class */
public class TokensRegexNERAnnotatorITest extends TestCase {
    private static final String REGEX_ANNOTATOR_NAME = "tokensregexner";
    private static final String MAPPING = "/u/nlp/data/TAC-KBP2010/sentence_extraction/itest_map";
    private static StanfordCoreNLP pipeline;
    private static Annotator caseless;
    private static Annotator cased;
    private static Annotator annotator;

    /* loaded from: input_file:edu/stanford/nlp/pipeline/TokensRegexNERAnnotatorITest$TestAnnotation.class */
    public static class TestAnnotation implements CoreAnnotation<String> {
        @Override // edu.stanford.nlp.ling.CoreAnnotation
        public Class<String> getType() {
            return String.class;
        }
    }

    public void setUp() throws Exception {
        synchronized (TokensRegexNERAnnotatorITest.class) {
            if (pipeline == null) {
                Properties properties = new Properties();
                properties.setProperty("annotators", "tokenize, ssplit, pos, lemma, ner");
                pipeline = new StanfordCoreNLP(properties);
                caseless = new TokensRegexNERAnnotator(MAPPING, true);
                cased = new TokensRegexNERAnnotator(MAPPING);
                annotator = cased;
            }
        }
    }

    protected static TokensRegexNERAnnotator getTokensRegexNerAnnotator(Properties properties) {
        return new TokensRegexNERAnnotator(REGEX_ANNOTATOR_NAME, properties);
    }

    protected static TokensRegexNERAnnotator getTokensRegexNerAnnotator(String[][] strArr, boolean z) throws Exception {
        return getTokensRegexNerAnnotator(new Properties(), strArr, z);
    }

    protected static TokensRegexNERAnnotator getTokensRegexNerAnnotator(Properties properties, String[][] strArr, boolean z) throws Exception {
        File createTempFile = File.createTempFile("tokensregexnertest.patterns", "txt");
        createTempFile.deleteOnExit();
        PrintWriter printWriter = IOUtils.getPrintWriter(createTempFile.getAbsolutePath());
        for (String[] strArr2 : strArr) {
            printWriter.println(StringUtils.join(strArr2, LinearClassifier.TEXT_SERIALIZATION_DELIMITER));
        }
        printWriter.close();
        properties.setProperty("tokensregexner.mapping", createTempFile.getAbsolutePath());
        properties.setProperty("tokensregexner.ignorecase", String.valueOf(z));
        return new TokensRegexNERAnnotator(REGEX_ANNOTATOR_NAME, properties);
    }

    protected static Annotation createDocument(String str) {
        Annotation annotation = new Annotation(str);
        pipeline.annotate(annotation);
        return annotation;
    }

    private static void checkNerTags(List<CoreLabel> list, String... strArr) {
        assertEquals(strArr.length, list.size());
        for (int i = 0; i < strArr.length; i++) {
            assertEquals("Mismatch for token tag NER " + i + AddDep.ATOM_DELIMITER + list.get(i), strArr[i], (String) list.get(i).get(CoreAnnotations.NamedEntityTagAnnotation.class));
        }
    }

    private static void checkTags(List<CoreLabel> list, Class cls, String... strArr) {
        assertEquals(strArr.length, list.size());
        for (int i = 0; i < strArr.length; i++) {
            assertEquals("Mismatch for token tag " + cls + AddDep.ATOM_DELIMITER + i + AddDep.ATOM_DELIMITER + list.get(i), strArr[i], list.get(i).get(cls));
        }
    }

    private static void reannotate(List<CoreLabel> list, Class cls, String... strArr) {
        assertEquals(strArr.length, list.size());
        for (int i = 0; i < strArr.length; i++) {
            list.get(i).set(cls, strArr[i]);
        }
    }

    /* JADX WARN: Type inference failed for: r0v1, types: [java.lang.String[], java.lang.String[][]] */
    public void testTokensRegexSyntax() throws Exception {
        ?? r0 = {new String[]{"( /University/ /of/ [ {ner:LOCATION} ] )", "SCHOOL"}};
        TokensRegexNERAnnotator tokensRegexNerAnnotator = getTokensRegexNerAnnotator(r0, false);
        Annotation createDocument = createDocument("University of Alaska is located in Alaska.");
        tokensRegexNerAnnotator.annotate(createDocument);
        List list = (List) createDocument.get(CoreAnnotations.TokensAnnotation.class);
        checkNerTags(list, "ORGANIZATION", "ORGANIZATION", "ORGANIZATION", SeqClassifierFlags.DEFAULT_BACKGROUND_SYMBOL, SeqClassifierFlags.DEFAULT_BACKGROUND_SYMBOL, SeqClassifierFlags.DEFAULT_BACKGROUND_SYMBOL, "LOCATION", SeqClassifierFlags.DEFAULT_BACKGROUND_SYMBOL);
        reannotate(list, CoreAnnotations.NamedEntityTagAnnotation.class, SeqClassifierFlags.DEFAULT_BACKGROUND_SYMBOL, SeqClassifierFlags.DEFAULT_BACKGROUND_SYMBOL, "LOCATION", SeqClassifierFlags.DEFAULT_BACKGROUND_SYMBOL, SeqClassifierFlags.DEFAULT_BACKGROUND_SYMBOL, SeqClassifierFlags.DEFAULT_BACKGROUND_SYMBOL, "LOCATION", SeqClassifierFlags.DEFAULT_BACKGROUND_SYMBOL);
        tokensRegexNerAnnotator.annotate(createDocument);
        checkNerTags(list, "SCHOOL", "SCHOOL", "SCHOOL", SeqClassifierFlags.DEFAULT_BACKGROUND_SYMBOL, SeqClassifierFlags.DEFAULT_BACKGROUND_SYMBOL, SeqClassifierFlags.DEFAULT_BACKGROUND_SYMBOL, "LOCATION", SeqClassifierFlags.DEFAULT_BACKGROUND_SYMBOL);
        TokensRegexNERAnnotator tokensRegexNerAnnotator2 = getTokensRegexNerAnnotator(r0, true);
        Annotation createDocument2 = createDocument("university of alaska is located in alaska.");
        List list2 = (List) createDocument2.get(CoreAnnotations.TokensAnnotation.class);
        checkNerTags(list2, SeqClassifierFlags.DEFAULT_BACKGROUND_SYMBOL, SeqClassifierFlags.DEFAULT_BACKGROUND_SYMBOL, "LOCATION", SeqClassifierFlags.DEFAULT_BACKGROUND_SYMBOL, SeqClassifierFlags.DEFAULT_BACKGROUND_SYMBOL, SeqClassifierFlags.DEFAULT_BACKGROUND_SYMBOL, "LOCATION", SeqClassifierFlags.DEFAULT_BACKGROUND_SYMBOL);
        tokensRegexNerAnnotator.annotate(createDocument2);
        checkNerTags(list2, SeqClassifierFlags.DEFAULT_BACKGROUND_SYMBOL, SeqClassifierFlags.DEFAULT_BACKGROUND_SYMBOL, "LOCATION", SeqClassifierFlags.DEFAULT_BACKGROUND_SYMBOL, SeqClassifierFlags.DEFAULT_BACKGROUND_SYMBOL, SeqClassifierFlags.DEFAULT_BACKGROUND_SYMBOL, "LOCATION", SeqClassifierFlags.DEFAULT_BACKGROUND_SYMBOL);
        tokensRegexNerAnnotator2.annotate(createDocument2);
        checkNerTags(list2, "SCHOOL", "SCHOOL", "SCHOOL", SeqClassifierFlags.DEFAULT_BACKGROUND_SYMBOL, SeqClassifierFlags.DEFAULT_BACKGROUND_SYMBOL, SeqClassifierFlags.DEFAULT_BACKGROUND_SYMBOL, "LOCATION", SeqClassifierFlags.DEFAULT_BACKGROUND_SYMBOL);
    }

    /* JADX WARN: Type inference failed for: r0v1, types: [java.lang.String[], java.lang.String[][]] */
    public void testTokensRegexMatchGroup() throws Exception {
        TokensRegexNERAnnotator tokensRegexNerAnnotator = getTokensRegexNerAnnotator(new String[]{new String[]{"( /the/? /movie/ (/[A-Z].*/+) )", "MOVIE", "", PascalTemplate.BACKGROUND_SYMBOL, TaggerConfig.NTHREADS}}, false);
        Annotation createDocument = createDocument("the movie Mud was very muddy");
        tokensRegexNerAnnotator.annotate(createDocument);
        checkNerTags((List) createDocument.get(CoreAnnotations.TokensAnnotation.class), SeqClassifierFlags.DEFAULT_BACKGROUND_SYMBOL, SeqClassifierFlags.DEFAULT_BACKGROUND_SYMBOL, "MOVIE", SeqClassifierFlags.DEFAULT_BACKGROUND_SYMBOL, SeqClassifierFlags.DEFAULT_BACKGROUND_SYMBOL, SeqClassifierFlags.DEFAULT_BACKGROUND_SYMBOL);
    }

    /* JADX WARN: Type inference failed for: r0v4, types: [java.lang.String[], java.lang.String[][]] */
    public void testTokensRegexNormalizedAnnotate() throws Exception {
        Properties properties = new Properties();
        properties.setProperty("tokensregexner.mapping.header", "pattern,ner,normalized,overwrite,priority,group");
        TokensRegexNERAnnotator tokensRegexNerAnnotator = getTokensRegexNerAnnotator(properties, new String[]{new String[]{"blue", "COLOR", "B", "", PascalTemplate.BACKGROUND_SYMBOL}, new String[]{"red", "COLOR", "R", "", PascalTemplate.BACKGROUND_SYMBOL}, new String[]{"green", "COLOR", "G", "", PascalTemplate.BACKGROUND_SYMBOL}}, false);
        Annotation createDocument = createDocument("These are all colors: blue, red, and green.");
        tokensRegexNerAnnotator.annotate(createDocument);
        List list = (List) createDocument.get(CoreAnnotations.TokensAnnotation.class);
        checkTags(list, CoreAnnotations.TextAnnotation.class, "These", "are", "all", "colors", MorphoFeatures.KEY_VAL_DELIM, "blue", ",", "red", ",", SsurgeonPattern.PREDICATE_AND_TAG, "green", ".");
        checkTags(list, CoreAnnotations.NamedEntityTagAnnotation.class, SeqClassifierFlags.DEFAULT_BACKGROUND_SYMBOL, SeqClassifierFlags.DEFAULT_BACKGROUND_SYMBOL, SeqClassifierFlags.DEFAULT_BACKGROUND_SYMBOL, SeqClassifierFlags.DEFAULT_BACKGROUND_SYMBOL, SeqClassifierFlags.DEFAULT_BACKGROUND_SYMBOL, "COLOR", SeqClassifierFlags.DEFAULT_BACKGROUND_SYMBOL, "COLOR", SeqClassifierFlags.DEFAULT_BACKGROUND_SYMBOL, SeqClassifierFlags.DEFAULT_BACKGROUND_SYMBOL, "COLOR", SeqClassifierFlags.DEFAULT_BACKGROUND_SYMBOL);
        checkTags(list, CoreAnnotations.NormalizedNamedEntityTagAnnotation.class, null, null, null, null, null, "B", null, "R", null, null, "G", null);
    }

    /* JADX WARN: Type inference failed for: r0v6, types: [java.lang.String[], java.lang.String[][]] */
    public void testTokensRegexCustomAnnotate() throws Exception {
        Properties properties = new Properties();
        properties.setProperty("tokensregexner.mapping.header", "pattern,test,overwrite,priority,group");
        properties.setProperty("tokensregexner.mapping.field.test", "edu.stanford.nlp.pipeline.TokensRegexNERAnnotatorITest$TestAnnotation");
        TokensRegexNERAnnotator tokensRegexNerAnnotator = getTokensRegexNerAnnotator(properties, new String[]{new String[]{"test", "TEST", "", PascalTemplate.BACKGROUND_SYMBOL}}, true);
        Annotation createDocument = createDocument("Marking all test as test");
        tokensRegexNerAnnotator.annotate(createDocument);
        List list = (List) createDocument.get(CoreAnnotations.TokensAnnotation.class);
        checkTags(list, CoreAnnotations.TextAnnotation.class, "Marking", "all", "test", "as", "test");
        checkTags(list, TestAnnotation.class, null, null, "TEST", null, "TEST");
    }

    public void testBasicMatching() throws Exception {
        Annotation createDocument = createDocument("President Barack Obama lives in Chicago , Illinois , and is a practicing Christian .");
        annotator.annotate(createDocument);
        checkNerTags((List) createDocument.get(CoreAnnotations.TokensAnnotation.class), "TITLE", "PERSON", "PERSON", SeqClassifierFlags.DEFAULT_BACKGROUND_SYMBOL, SeqClassifierFlags.DEFAULT_BACKGROUND_SYMBOL, "LOCATION", SeqClassifierFlags.DEFAULT_BACKGROUND_SYMBOL, "STATE_OR_PROVINCE", SeqClassifierFlags.DEFAULT_BACKGROUND_SYMBOL, SeqClassifierFlags.DEFAULT_BACKGROUND_SYMBOL, SeqClassifierFlags.DEFAULT_BACKGROUND_SYMBOL, SeqClassifierFlags.DEFAULT_BACKGROUND_SYMBOL, SeqClassifierFlags.DEFAULT_BACKGROUND_SYMBOL, "IDEOLOGY", SeqClassifierFlags.DEFAULT_BACKGROUND_SYMBOL);
    }

    public void testOverwrite() throws Exception {
        Annotation createDocument = createDocument("I like Ontario Bank and Ontario Lake , and I like the Native American Church , too .");
        annotator.annotate(createDocument);
        checkNerTags((List) createDocument.get(CoreAnnotations.TokensAnnotation.class), SeqClassifierFlags.DEFAULT_BACKGROUND_SYMBOL, SeqClassifierFlags.DEFAULT_BACKGROUND_SYMBOL, "ORGANIZATION", "ORGANIZATION", SeqClassifierFlags.DEFAULT_BACKGROUND_SYMBOL, "STATE_OR_PROVINCE", "LOCATION", SeqClassifierFlags.DEFAULT_BACKGROUND_SYMBOL, SeqClassifierFlags.DEFAULT_BACKGROUND_SYMBOL, SeqClassifierFlags.DEFAULT_BACKGROUND_SYMBOL, SeqClassifierFlags.DEFAULT_BACKGROUND_SYMBOL, SeqClassifierFlags.DEFAULT_BACKGROUND_SYMBOL, "RELIGION", "RELIGION", "RELIGION", SeqClassifierFlags.DEFAULT_BACKGROUND_SYMBOL, SeqClassifierFlags.DEFAULT_BACKGROUND_SYMBOL, SeqClassifierFlags.DEFAULT_BACKGROUND_SYMBOL);
    }

    public void testPriority() throws Exception {
        Annotation createDocument = createDocument("Christianity is of higher regex priority than Early Christianity . ");
        annotator.annotate(createDocument);
        checkNerTags((List) createDocument.get(CoreAnnotations.TokensAnnotation.class), "RELIGION", SeqClassifierFlags.DEFAULT_BACKGROUND_SYMBOL, SeqClassifierFlags.DEFAULT_BACKGROUND_SYMBOL, SeqClassifierFlags.DEFAULT_BACKGROUND_SYMBOL, SeqClassifierFlags.DEFAULT_BACKGROUND_SYMBOL, SeqClassifierFlags.DEFAULT_BACKGROUND_SYMBOL, SeqClassifierFlags.DEFAULT_BACKGROUND_SYMBOL, SeqClassifierFlags.DEFAULT_BACKGROUND_SYMBOL, "RELIGION", SeqClassifierFlags.DEFAULT_BACKGROUND_SYMBOL);
    }

    public void testEmptyAnnotation() throws Exception {
        try {
            annotator.annotate(new Annotation(""));
            fail("Never expected to get this far... the annotator should have thrown an exception by now");
        } catch (RuntimeException e) {
        }
    }
}
