package edu.stanford.nlp.pipeline;

import edu.stanford.nlp.ling.CoreAnnotation;
import edu.stanford.nlp.ling.CoreAnnotations;
import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.ling.tokensregex.TokenSequencePattern;
import edu.stanford.nlp.process.PTBTokenizer;
import edu.stanford.nlp.process.WordToSentenceProcessor;
import edu.stanford.nlp.util.ArraySet;
import edu.stanford.nlp.util.ArrayUtils;
import edu.stanford.nlp.util.CoreMap;
import edu.stanford.nlp.util.Generics;
import edu.stanford.nlp.util.logging.Redwood;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Properties;
import java.util.Set;

/* loaded from: input_file:edu/stanford/nlp/pipeline/WordsToSentencesAnnotator.class */
public class WordsToSentencesAnnotator implements Annotator {
    private static final Redwood.RedwoodChannels log = Redwood.channels(WordsToSentencesAnnotator.class);
    private final WordToSentenceProcessor<CoreLabel> wts;
    private final boolean VERBOSE;
    private final boolean countLineNumbers;

    public WordsToSentencesAnnotator() {
        this(false);
    }

    public WordsToSentencesAnnotator(Properties properties) {
        if (Boolean.valueOf(properties.getProperty(StanfordCoreNLP.NEWLINE_SPLITTER_PROPERTY, "false")).booleanValue()) {
            if (!Boolean.valueOf(properties.getProperty("tokenize.whitespace", "false")).booleanValue()) {
                WordToSentenceProcessor<CoreLabel> wordToSentenceProcessor = new WordToSentenceProcessor<>((Set<String>) ArrayUtils.asImmutableSet(new String[]{PTBTokenizer.getNewlineToken()}));
                this.VERBOSE = false;
                this.countLineNumbers = true;
                this.wts = wordToSentenceProcessor;
                return;
            }
            if (System.lineSeparator().equals("\n")) {
                WordToSentenceProcessor<CoreLabel> wordToSentenceProcessor2 = new WordToSentenceProcessor<>((Set<String>) ArrayUtils.asImmutableSet(new String[]{"\n"}));
                this.VERBOSE = false;
                this.countLineNumbers = true;
                this.wts = wordToSentenceProcessor2;
                return;
            }
            WordToSentenceProcessor<CoreLabel> wordToSentenceProcessor3 = new WordToSentenceProcessor<>((Set<String>) ArrayUtils.asImmutableSet(new String[]{System.lineSeparator(), "\n"}));
            this.VERBOSE = false;
            this.countLineNumbers = true;
            this.wts = wordToSentenceProcessor3;
            return;
        }
        if (Boolean.parseBoolean(properties.getProperty("ssplit.isOneSentence"))) {
            WordToSentenceProcessor<CoreLabel> wordToSentenceProcessor4 = new WordToSentenceProcessor<>(true);
            this.VERBOSE = false;
            this.countLineNumbers = false;
            this.wts = wordToSentenceProcessor4;
            return;
        }
        String property = properties.getProperty("ssplit.boundaryMultiTokenRegex");
        String property2 = properties.getProperty("ssplit.tokenPatternsToDiscard");
        Set newHashSet = property2 != null ? Generics.newHashSet(Arrays.asList(property2.split(","))) : null;
        String property3 = properties.getProperty("ssplit.boundaryTokenRegex");
        Set set = null;
        String property4 = properties.getProperty("ssplit.boundariesToDiscard");
        set = property4 != null ? Generics.newHashSet(Arrays.asList(property4.split(","))) : set;
        Set set2 = null;
        String property5 = properties.getProperty("ssplit.htmlBoundariesToDiscard");
        WordToSentenceProcessor<CoreLabel> wordToSentenceProcessor5 = new WordToSentenceProcessor<>(property3, null, set, property5 != null ? Generics.newHashSet(Arrays.asList(property5.split(","))) : set2, WordToSentenceProcessor.stringToNewlineIsSentenceBreak(properties.getProperty(StanfordCoreNLP.NEWLINE_IS_SENTENCE_BREAK_PROPERTY, StanfordCoreNLP.DEFAULT_NEWLINE_IS_SENTENCE_BREAK)), property != null ? TokenSequencePattern.compile(property) : null, newHashSet);
        this.VERBOSE = false;
        this.countLineNumbers = false;
        this.wts = wordToSentenceProcessor5;
    }

    public WordsToSentencesAnnotator(boolean z) {
        this(z, false, new WordToSentenceProcessor());
    }

    public WordsToSentencesAnnotator(boolean z, String str, Set<String> set, Set<String> set2, String str2, String str3, Set<String> set3) {
        this(z, false, new WordToSentenceProcessor(str, null, set, set2, WordToSentenceProcessor.stringToNewlineIsSentenceBreak(str2), str3 != null ? TokenSequencePattern.compile(str3) : null, set3));
    }

    private WordsToSentencesAnnotator(boolean z, boolean z2, WordToSentenceProcessor<CoreLabel> wordToSentenceProcessor) {
        this.VERBOSE = z;
        this.countLineNumbers = z2;
        this.wts = wordToSentenceProcessor;
    }

    public static WordsToSentencesAnnotator newlineSplitter(String... strArr) {
        return new WordsToSentencesAnnotator(false, true, new WordToSentenceProcessor((Set<String>) ArrayUtils.asImmutableSet(strArr)));
    }

    public static WordsToSentencesAnnotator nonSplitter() {
        return new WordsToSentencesAnnotator(false, false, new WordToSentenceProcessor(true));
    }

    @Override // edu.stanford.nlp.pipeline.Annotator
    public void annotate(Annotation annotation) {
        if (this.VERBOSE) {
            log.info("Sentence splitting ...");
        }
        if (!annotation.containsKey(CoreAnnotations.TokensAnnotation.class)) {
            throw new IllegalArgumentException("WordsToSentencesAnnotator: unable to find words/tokens in: " + annotation);
        }
        String str = (String) annotation.get(CoreAnnotations.TextAnnotation.class);
        List<? extends CoreLabel> list = (List) annotation.get(CoreAnnotations.TokensAnnotation.class);
        String str2 = (String) annotation.get(CoreAnnotations.DocIDAnnotation.class);
        int i = 0;
        int i2 = 0;
        CoreMap coreMap = null;
        ArrayList arrayList = new ArrayList();
        for (List<CoreLabel> list2 : this.wts.process(list)) {
            if (this.countLineNumbers) {
                i2++;
            }
            if (!list2.isEmpty()) {
                int intValue = ((Integer) list2.get(0).get(CoreAnnotations.CharacterOffsetBeginAnnotation.class)).intValue();
                int intValue2 = ((Integer) list2.get(list2.size() - 1).get(CoreAnnotations.CharacterOffsetEndAnnotation.class)).intValue();
                Annotation annotation2 = new Annotation(str.substring(intValue, intValue2));
                annotation2.set(CoreAnnotations.CharacterOffsetBeginAnnotation.class, Integer.valueOf(intValue));
                annotation2.set(CoreAnnotations.CharacterOffsetEndAnnotation.class, Integer.valueOf(intValue2));
                annotation2.set(CoreAnnotations.TokensAnnotation.class, list2);
                annotation2.set(CoreAnnotations.TokenBeginAnnotation.class, Integer.valueOf(i));
                i += list2.size();
                annotation2.set(CoreAnnotations.TokenEndAnnotation.class, Integer.valueOf(i));
                annotation2.set(CoreAnnotations.SentenceIndexAnnotation.class, Integer.valueOf(arrayList.size()));
                if (this.countLineNumbers) {
                    annotation2.set(CoreAnnotations.LineNumberAnnotation.class, Integer.valueOf(i2));
                }
                CoreLabel coreLabel = list2.get(0);
                CoreLabel coreLabel2 = list2.get(list2.size() - 1);
                CoreMap coreMap2 = (CoreMap) coreLabel.get(CoreAnnotations.SectionStartAnnotation.class);
                if (coreMap2 != null) {
                    coreMap = coreMap2;
                }
                if (coreMap != null) {
                    ChunkAnnotationUtils.copyUnsetAnnotations(coreMap, annotation2);
                }
                if (((String) coreLabel2.get(CoreAnnotations.SectionEndAnnotation.class)) != null) {
                    coreMap = null;
                }
                if (str2 != null) {
                    annotation2.set(CoreAnnotations.DocIDAnnotation.class, str2);
                }
                int i3 = 1;
                for (CoreLabel coreLabel3 : list2) {
                    int i4 = i3;
                    i3++;
                    coreLabel3.setIndex(i4);
                    coreLabel3.setSentIndex(arrayList.size());
                    if (str2 != null) {
                        coreLabel3.setDocID(str2);
                    }
                }
                arrayList.add(annotation2);
            } else if (!this.countLineNumbers) {
                throw new IllegalStateException("unexpected empty sentence: " + list2);
            }
        }
        annotation.set(CoreAnnotations.SentencesAnnotation.class, arrayList);
    }

    @Override // edu.stanford.nlp.pipeline.Annotator
    public Set<Class<? extends CoreAnnotation>> requires() {
        return Collections.unmodifiableSet(new ArraySet(Arrays.asList(CoreAnnotations.TextAnnotation.class, CoreAnnotations.TokensAnnotation.class, CoreAnnotations.CharacterOffsetBeginAnnotation.class, CoreAnnotations.CharacterOffsetEndAnnotation.class)));
    }

    @Override // edu.stanford.nlp.pipeline.Annotator
    public Set<Class<? extends CoreAnnotation>> requirementsSatisfied() {
        return new HashSet(Arrays.asList(CoreAnnotations.SentencesAnnotation.class, CoreAnnotations.SentenceIndexAnnotation.class));
    }
}
