package edu.stanford.nlp.process;

import edu.stanford.nlp.ling.CoreAnnotations;
import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.pipeline.Annotation;
import edu.stanford.nlp.pipeline.Annotator;
import edu.stanford.nlp.pipeline.TokenizerAnnotator;
import edu.stanford.nlp.process.WordToSentenceProcessor;
import edu.stanford.nlp.util.Generics;
import edu.stanford.nlp.util.PropertiesUtils;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import java.util.Set;
import junit.framework.TestCase;

/* loaded from: input_file:edu/stanford/nlp/process/WordToSentenceProcessorTest.class */
public class WordToSentenceProcessorTest extends TestCase {
    private static final Annotator ptb = new TokenizerAnnotator(false, "en");
    private static final Annotator ptbNL = new TokenizerAnnotator(false, "en", "invertible,ptb3Escaping=true,tokenizeNLs=true");
    private static final Annotator wsNL = new TokenizerAnnotator(false, PropertiesUtils.asProperties("tokenize.whitespace", "true", "invertible", "true", "tokenizeNLs", "true"));
    private static final WordToSentenceProcessor<CoreLabel> wts = new WordToSentenceProcessor<>();
    private static final WordToSentenceProcessor<CoreLabel> wtsNull = new WordToSentenceProcessor<>(true);
    private static final WordToSentenceProcessor<CoreLabel> cwts = new WordToSentenceProcessor<>("[.。]|[!?！？]+", WordToSentenceProcessor.NewlineIsSentenceBreak.TWO_CONSECUTIVE, false);

    private static void checkResult(WordToSentenceProcessor<CoreLabel> wordToSentenceProcessor, String str, String... strArr) {
        checkResult(wordToSentenceProcessor, ptb, str, strArr);
    }

    private static void checkResult(WordToSentenceProcessor<CoreLabel> wordToSentenceProcessor, Annotator annotator, String str, String... strArr) {
        Annotation annotation = new Annotation(str);
        ptbNL.annotate(annotation);
        List<List<CoreLabel>> process = wordToSentenceProcessor.process((List) annotation.get(CoreAnnotations.TokensAnnotation.class));
        assertEquals("Output number of sentences didn't match:\n" + Arrays.toString(strArr) + " vs. \n" + process + '\n', strArr.length, process.size());
        Annotation[] annotationArr = new Annotation[strArr.length];
        for (int i = 0; i < strArr.length; i++) {
            annotationArr[i] = new Annotation(strArr[i]);
            annotator.annotate(annotationArr[i]);
            List list = (List) annotationArr[i].get(CoreAnnotations.TokensAnnotation.class);
            List<CoreLabel> list2 = process.get(i);
            int size = list.size();
            assertEquals("Sentence lengths didn't match:\n" + list + " vs. \n" + list2 + '\n', size, list2.size());
            for (int i2 = 0; i2 < size; i2++) {
                assertEquals(((CoreLabel) list.get(i2)).word(), list2.get(i2).word());
            }
        }
    }

    public void testNoSplitting() {
        checkResult(wts, "This should only be one sentence.", "This should only be one sentence.");
    }

    public void testTwoSentences() {
        checkResult(wts, "This should be two sentences.  There is a split.", "This should be two sentences.", "There is a split.");
        checkResult(wts, "This should be two sentences!  There is a split.", "This should be two sentences!", "There is a split.");
        checkResult(wts, "This should be two sentences?  There is a split.", "This should be two sentences?", "There is a split.");
        checkResult(wts, "This should be two sentences!!!?!!  There is a split.", "This should be two sentences!!!?!!", "There is a split.");
    }

    public void testEdgeCases() {
        checkResult(wts, "This should be two sentences.  Second one incomplete", "This should be two sentences.", "Second one incomplete");
        checkResult(wts, "One incomplete sentence", "One incomplete sentence");
        checkResult(wts, "(Break after a parenthesis.)  (Or after \"quoted stuff!\")", "(Break after a parenthesis.)", "(Or after \"quoted stuff!\")");
        checkResult(wts, "  ", new String[0]);
        checkResult(wts, "This should be\n one sentence.", "This should be one sentence.");
        checkResult(wts, "'') Funny stuff joined on.", "'') Funny stuff joined on.");
    }

    public void testMr() {
        checkResult(wts, "Mr. White got a loaf of bread", "Mr. White got a loaf of bread");
    }

    public void testNullSplitter() {
        checkResult(wtsNull, "This should be one sentence.  There is no split.", "This should be one sentence.  There is no split.");
    }

    public void testParagraphStrategies() {
        WordToSentenceProcessor wordToSentenceProcessor = new WordToSentenceProcessor(WordToSentenceProcessor.NewlineIsSentenceBreak.NEVER);
        WordToSentenceProcessor wordToSentenceProcessor2 = new WordToSentenceProcessor(WordToSentenceProcessor.NewlineIsSentenceBreak.ALWAYS);
        WordToSentenceProcessor wordToSentenceProcessor3 = new WordToSentenceProcessor(WordToSentenceProcessor.NewlineIsSentenceBreak.TWO_CONSECUTIVE);
        checkResult(wordToSentenceProcessor, "Depending on the options,\nthis could be all sorts of things,\n\n as I like chocolate. And cookies.", "Depending on the options,\nthis could be all sorts of things,\n\nas I like chocolate.", "And cookies.");
        checkResult(wordToSentenceProcessor2, "Depending on the options,\nthis could be all sorts of things,\n\n as I like chocolate. And cookies.", "Depending on the options,", "this could be all sorts of things,", "as I like chocolate.", "And cookies.");
        checkResult(wordToSentenceProcessor3, "Depending on the options,\nthis could be all sorts of things,\n\n as I like chocolate. And cookies.", "Depending on the options, this could be all sorts of things,", "as I like chocolate.", "And cookies.");
        checkResult(wordToSentenceProcessor, "Depending on the options,\nthis could be all sorts of things,\n as I like chocolate. And cookies.", "Depending on the options,\nthis could be all sorts of things,\nas I like chocolate.", "And cookies.");
        checkResult(wordToSentenceProcessor2, "Depending on the options,\nthis could be all sorts of things,\n as I like chocolate. And cookies.", "Depending on the options,", "this could be all sorts of things,", "as I like chocolate.", "And cookies.");
        checkResult(wordToSentenceProcessor3, "Depending on the options,\nthis could be all sorts of things,\n as I like chocolate. And cookies.", "Depending on the options,\nthis could be all sorts of things,\nas I like chocolate.", "And cookies.");
    }

    public void testXmlElements() {
        checkResult(new WordToSentenceProcessor(null, null, null, Generics.newHashSet(Arrays.asList("p", "chapter")), WordToSentenceProcessor.NewlineIsSentenceBreak.NEVER, null, null), "<chapter>Chapter 1</chapter><p>This is text. So is this.</p> <p>One without end</p><p>Another</p><p>And another</p>", "Chapter 1", "This is text.", "So is this.", "One without end", "Another", "And another");
    }

    public void testRegion() {
        checkResult(new WordToSentenceProcessor(WordToSentenceProcessor.DEFAULT_BOUNDARY_REGEX, WordToSentenceProcessor.DEFAULT_BOUNDARY_FOLLOWERS_REGEX, WordToSentenceProcessor.DEFAULT_SENTENCE_BOUNDARIES_TO_DISCARD, Generics.newHashSet(Collections.singletonList("p")), "chapter|preface", WordToSentenceProcessor.NewlineIsSentenceBreak.NEVER, null, null, false, false), "<title>Chris rules!</title><preface><p>Para one</p><p>Para two</p></preface><chapter><p>Text we like. Two sentences \n\n in it.</p></chapter><coda>Some more text here</coda>", "Para one", "Para two", "Text we like.", "Two sentences in it.");
    }

    public void testBlankLines() {
        WordToSentenceProcessor wordToSentenceProcessor = new WordToSentenceProcessor((Set<String>) Generics.newHashSet(WordToSentenceProcessor.DEFAULT_SENTENCE_BOUNDARIES_TO_DISCARD));
        checkResult(wordToSentenceProcessor, "Depending on the options,\nthis could be all sorts of things,\n\n as I like chocolate. And cookies.", "Depending on the options,", "this could be all sorts of things,", "", "as I like chocolate. And cookies.");
        checkResult(wordToSentenceProcessor, "Depending on the options,\nthis could be all sorts of things,\n\n as I like chocolate. And cookies.\n", "Depending on the options,", "this could be all sorts of things,", "", "as I like chocolate. And cookies.");
        checkResult(wordToSentenceProcessor, "Depending on the options,\nthis could be all sorts of things,\n\n as I like chocolate. And cookies.\n\n", "Depending on the options,", "this could be all sorts of things,", "", "as I like chocolate. And cookies.", "");
    }

    public void testExclamationPoint() {
        Annotation annotation = new Annotation("Foo!!");
        ptb.annotate(annotation);
        assertEquals("Wrong double bang", "[Foo, !!]", ((List) annotation.get(CoreAnnotations.TokensAnnotation.class)).toString());
    }

    public void testChinese() {
        checkResult(cwts, wsNL, "巴拉特 说 ： 「 我们 未 再 获得 任何 结果 。 」 ＜ 金融时报 ？ ＞ 《 金融时报 》 周三", "巴拉特 说 ： 「 我们 未 再 获得 任何 结果 。 」", "＜ 金融时报 ？ ＞", "《 金融时报 》 周三");
    }
}
