package edu.stanford.nlp.patterns;

import edu.stanford.nlp.io.IOUtils;
import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.ling.tokensregex.SequenceMatchRules;
import edu.stanford.nlp.patterns.Pattern;
import edu.stanford.nlp.patterns.PatternsAnnotations;
import edu.stanford.nlp.patterns.surface.Token;
import edu.stanford.nlp.pipeline.CoreNLPProtos;
import edu.stanford.nlp.pipeline.ProtobufAnnotationSerializer;
import edu.stanford.nlp.util.ArgumentParser;
import edu.stanford.nlp.util.CollectionValuedMap;
import edu.stanford.nlp.util.LuceneFieldType;
import edu.stanford.nlp.util.logging.Redwood;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import java.util.Set;
import java.util.function.Function;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.core.KeywordAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.StringField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryparser.classic.ParseException;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;

/* loaded from: input_file:edu/stanford/nlp/patterns/LuceneSentenceIndex.class */
public class LuceneSentenceIndex<E extends Pattern> extends SentenceIndex<E> {

    @ArgumentParser.Option(name = "saveTokens")
    boolean saveTokens;
    IndexWriter indexWriter;
    File indexDir;
    Directory dir;
    Analyzer analyzer;
    IndexWriterConfig iwc;
    DirectoryReader reader;
    IndexSearcher searcher;
    ProtobufAnnotationSerializer p;

    public LuceneSentenceIndex(Properties properties, Set<String> set, String str, Function<CoreLabel, Map<String, String>> function) {
        super(set, function);
        this.saveTokens = false;
        this.indexDir = null;
        this.analyzer = new KeywordAnalyzer();
        this.iwc = new IndexWriterConfig(Version.LUCENE_42, this.analyzer);
        this.reader = null;
        this.p = new ProtobufAnnotationSerializer();
        ArgumentParser.fillOptions(this, properties);
        this.indexDir = new File(str);
    }

    void setIndexReaderSearcher() throws IOException {
        FSDirectory open = FSDirectory.open(this.indexDir);
        if (this.reader == null) {
            this.reader = DirectoryReader.open(open);
            this.searcher = new IndexSearcher(this.reader);
            return;
        }
        DirectoryReader openIfChanged = DirectoryReader.openIfChanged(this.reader);
        if (openIfChanged != null) {
            this.reader.close();
            this.reader = openIfChanged;
            this.searcher = new IndexSearcher(this.reader);
        }
    }

    Set<String> queryIndexGetSentences(CollectionValuedMap<String, String> collectionValuedMap) throws IOException, ParseException {
        setIndexReaderSearcher();
        BooleanQuery booleanQuery = new BooleanQuery();
        String keyForClass = Token.getKeyForClass(PatternsAnnotations.ProcessedTextAnnotation.class);
        for (Map.Entry<String, Collection<String>> entry : collectionValuedMap.entrySet()) {
            boolean equals = entry.getKey().equals(keyForClass);
            for (String str : entry.getValue()) {
                if (!equals || !this.stopWords.contains(str.toLowerCase())) {
                    booleanQuery.add(new BooleanClause(new TermQuery(new Term(entry.getKey(), str)), BooleanClause.Occur.MUST));
                }
            }
        }
        TopDocs search = this.searcher.search(booleanQuery, Integer.MAX_VALUE);
        HashSet hashSet = new HashSet();
        if (search.totalHits <= 0) {
            throw new RuntimeException("how come no documents for " + collectionValuedMap + ". Query formed is " + booleanQuery);
        }
        for (ScoreDoc scoreDoc : search.scoreDocs) {
            hashSet.add(this.searcher.doc(scoreDoc.doc).get("sentid"));
        }
        return hashSet;
    }

    @Override // edu.stanford.nlp.patterns.SentenceIndex
    public void add(Map<String, DataInstance> map, boolean z) {
        try {
            setIndexWriter();
            for (Map.Entry<String, DataInstance> entry : map.entrySet()) {
                add(entry.getValue().getTokens(), entry.getKey(), z);
            }
            this.indexWriter.commit();
            closeIndexWriter();
        } catch (IOException e) {
            throw new RuntimeException(e);
        }
    }

    @Override // edu.stanford.nlp.patterns.SentenceIndex
    public Map<E, Set<String>> queryIndex(Collection<E> collection) {
        try {
            HashMap hashMap = new HashMap();
            for (E e : collection) {
                hashMap.put(e, queryIndexGetSentences(e.getRelevantWords()));
            }
            return hashMap;
        } catch (ParseException | IOException e2) {
            throw new RuntimeException((Throwable) e2);
        }
    }

    public void listAllDocuments() throws IOException {
        setIndexReaderSearcher();
        for (int i = 0; i < this.reader.numDocs(); i++) {
            System.out.println(this.searcher.doc(i).get("sentid"));
        }
    }

    private List<CoreLabel> readProtoBufAnnotation(byte[] bArr) throws IOException {
        CoreNLPProtos.Token parseDelimitedFrom;
        ProtobufAnnotationSerializer protobufAnnotationSerializer = new ProtobufAnnotationSerializer();
        ArrayList arrayList = new ArrayList();
        ByteArrayInputStream byteArrayInputStream = new ByteArrayInputStream(bArr);
        do {
            parseDelimitedFrom = CoreNLPProtos.Token.parseDelimitedFrom(byteArrayInputStream);
            if (parseDelimitedFrom != null) {
                arrayList.add(protobufAnnotationSerializer.fromProto(parseDelimitedFrom));
            }
        } while (parseDelimitedFrom != null);
        return arrayList;
    }

    byte[] getProtoBufAnnotation(List<CoreLabel> list) throws IOException {
        ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
        Iterator<CoreLabel> it = list.iterator();
        while (it.hasNext()) {
            this.p.toProto(it.next()).writeDelimitedTo(byteArrayOutputStream);
        }
        byteArrayOutputStream.flush();
        return byteArrayOutputStream.toByteArray();
    }

    @Override // edu.stanford.nlp.patterns.SentenceIndex
    protected void add(List<CoreLabel> list, String str, boolean z) {
        try {
            setIndexWriter();
            Document document = new Document();
            for (CoreLabel coreLabel : list) {
                for (Map.Entry<String, String> entry : this.transformCoreLabeltoString.apply(coreLabel).entrySet()) {
                    document.add(new StringField(entry.getKey(), entry.getValue(), Field.Store.YES));
                }
                if (z) {
                    String str2 = (String) coreLabel.get(PatternsAnnotations.ProcessedTextAnnotation.class);
                    if (!this.stopWords.contains(str2.toLowerCase())) {
                        document.add(new StringField(Token.getKeyForClass(PatternsAnnotations.ProcessedTextAnnotation.class), str2, Field.Store.YES));
                    }
                }
            }
            document.add(new StringField("sentid", str, Field.Store.YES));
            if (list != null && this.saveTokens) {
                document.add(new Field(SequenceMatchRules.TOKEN_PATTERN_RULE_TYPE, getProtoBufAnnotation(list), LuceneFieldType.NOT_INDEXED));
            }
            this.indexWriter.addDocument(document);
        } catch (IOException e) {
            throw new RuntimeException(e);
        }
    }

    @Override // edu.stanford.nlp.patterns.SentenceIndex
    public void finishUpdating() {
        if (this.indexWriter != null) {
            try {
                this.indexWriter.commit();
            } catch (IOException e) {
                throw new RuntimeException(e);
            }
        }
        closeIndexWriter();
    }

    @Override // edu.stanford.nlp.patterns.SentenceIndex
    public void update(List<CoreLabel> list, String str) {
        try {
            setIndexWriter();
            this.indexWriter.deleteDocuments(new Query[]{new TermQuery(new Term("sentid", str))});
            add(list, str, true);
        } catch (IOException e) {
            throw new RuntimeException(e);
        }
    }

    void setIndexWriter() {
        try {
            if (this.indexWriter == null) {
                this.dir = FSDirectory.open(this.indexDir);
                Redwood.log(Redwood.DBG, "Updating lucene index at " + this.indexDir);
                this.indexWriter = new IndexWriter(this.dir, this.iwc);
            }
        } catch (IOException e) {
            throw new RuntimeException(e);
        }
    }

    void closeIndexWriter() {
        try {
            if (this.indexWriter != null) {
                this.indexWriter.close();
            }
            this.indexWriter = null;
            if (this.dir != null) {
                this.dir.close();
            }
        } catch (IOException e) {
            throw new RuntimeException(e);
        }
    }

    @Override // edu.stanford.nlp.patterns.SentenceIndex
    public void saveIndex(String str) {
        if (this.indexDir.toString().equals(str)) {
            return;
        }
        try {
            IOUtils.cp(this.indexDir, new File(str), true);
        } catch (IOException e) {
            throw new RuntimeException(e);
        }
    }

    public static LuceneSentenceIndex createIndex(Map<String, List<CoreLabel>> map, Properties properties, Set<String> set, String str, Function<CoreLabel, Map<String, String>> function) {
        try {
            LuceneSentenceIndex luceneSentenceIndex = new LuceneSentenceIndex(properties, set, str, function);
            System.out.println("Creating lucene index at " + str);
            IOUtils.deleteDirRecursively(luceneSentenceIndex.indexDir);
            if (map != null) {
                luceneSentenceIndex.setIndexWriter();
                luceneSentenceIndex.add(map, true);
                luceneSentenceIndex.closeIndexWriter();
                luceneSentenceIndex.setIndexReaderSearcher();
                System.out.println("Number of documents added are " + luceneSentenceIndex.reader.numDocs());
                luceneSentenceIndex.numAllSentences += luceneSentenceIndex.reader.numDocs();
            }
            return luceneSentenceIndex;
        } catch (IOException e) {
            throw new RuntimeException(e);
        }
    }

    public static LuceneSentenceIndex loadIndex(Properties properties, Set<String> set, String str, Function<CoreLabel, Map<String, String>> function) {
        try {
            LuceneSentenceIndex luceneSentenceIndex = new LuceneSentenceIndex(properties, set, str, function);
            luceneSentenceIndex.setIndexReaderSearcher();
            System.out.println("Number of documents read from the index " + str + " are " + luceneSentenceIndex.reader.numDocs());
            luceneSentenceIndex.numAllSentences += luceneSentenceIndex.reader.numDocs();
            return luceneSentenceIndex;
        } catch (IOException e) {
            throw new RuntimeException(e);
        }
    }
}
