package edu.stanford.nlp.patterns;

import edu.stanford.nlp.classify.Classifier;
import edu.stanford.nlp.classify.Dataset;
import edu.stanford.nlp.classify.GeneralDataset;
import edu.stanford.nlp.classify.LinearClassifier;
import edu.stanford.nlp.classify.LinearClassifierFactory;
import edu.stanford.nlp.classify.LogPrior;
import edu.stanford.nlp.classify.LogisticClassifier;
import edu.stanford.nlp.classify.LogisticClassifierFactory;
import edu.stanford.nlp.classify.MultinomialLogisticClassifier;
import edu.stanford.nlp.classify.RVFDataset;
import edu.stanford.nlp.classify.SVMLightClassifier;
import edu.stanford.nlp.classify.SVMLightClassifierFactory;
import edu.stanford.nlp.classify.ShiftParamsLogisticClassifierFactory;
import edu.stanford.nlp.io.IOUtils;
import edu.stanford.nlp.ling.BasicDatum;
import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.ling.IndexedWord;
import edu.stanford.nlp.ling.RVFDatum;
import edu.stanford.nlp.patterns.ConstantsAndVariables;
import edu.stanford.nlp.patterns.Pattern;
import edu.stanford.nlp.patterns.PatternFactory;
import edu.stanford.nlp.patterns.PatternsAnnotations;
import edu.stanford.nlp.patterns.PhraseScorer;
import edu.stanford.nlp.patterns.dep.DataInstanceDep;
import edu.stanford.nlp.patterns.dep.ExtractPhraseFromPattern;
import edu.stanford.nlp.patterns.dep.ExtractedPhrase;
import edu.stanford.nlp.semgraph.SemanticGraph;
import edu.stanford.nlp.semgraph.semgrex.ssurgeon.AddDep;
import edu.stanford.nlp.stats.ClassicCounter;
import edu.stanford.nlp.stats.Counter;
import edu.stanford.nlp.stats.Counters;
import edu.stanford.nlp.stats.TwoDimensionalCounter;
import edu.stanford.nlp.util.ArgumentParser;
import edu.stanford.nlp.util.ArrayUtils;
import edu.stanford.nlp.util.BinaryHeapPriorityQueue;
import edu.stanford.nlp.util.CollectionUtils;
import edu.stanford.nlp.util.Generics;
import edu.stanford.nlp.util.Pair;
import edu.stanford.nlp.util.Quintuple;
import edu.stanford.nlp.util.StringUtils;
import edu.stanford.nlp.util.TypesafeMap;
import edu.stanford.nlp.util.concurrent.AtomicDouble;
import edu.stanford.nlp.util.concurrent.ConcurrentHashCounter;
import edu.stanford.nlp.util.logging.Redwood;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Random;
import java.util.Set;
import java.util.concurrent.Callable;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;
import java.util.function.Function;
import java.util.stream.Collectors;

/* loaded from: input_file:edu/stanford/nlp/patterns/ScorePhrasesLearnFeatWt.class */
public class ScorePhrasesLearnFeatWt<E extends Pattern> extends PhraseScorer<E> {

    @ArgumentParser.Option(name = "scoreClassifierType")
    ClassifierType scoreClassifierType;
    static Map<String, double[]> wordVectors;
    public TwoDimensionalCounter<CandidatePhrase, ConstantsAndVariables.ScorePhraseMeasures> phraseScoresRaw;
    ConcurrentHashMap<CandidatePhrase, Counter<Integer>> wordClassClustersForPhrase;
    Counter<CandidatePhrase> closeToPositivesFirstIter;
    Counter<CandidatePhrase> closeToNegativesFirstIter;
    static Counter<PhrasePair> cacheSimilarities;
    static Map<String, Map<String, double[]>> similaritiesWithLabeledPhrases;
    static final /* synthetic */ boolean $assertionsDisabled;

    /* loaded from: input_file:edu/stanford/nlp/patterns/ScorePhrasesLearnFeatWt$ChooseDatumsThread.class */
    public class ChooseDatumsThread implements Callable {
        Collection<String> keys;
        Map<String, DataInstance> sents;
        Class answerClass;
        String answerLabel;
        TwoDimensionalCounter<CandidatePhrase, E> wordsPatExtracted;
        Counter<E> allSelectedPatterns;
        Counter<Integer> wordClassClustersOfPositive;
        Map<String, Collection<CandidatePhrase>> allPossiblePhrases;
        boolean expandPos;
        boolean expandNeg;
        static final /* synthetic */ boolean $assertionsDisabled;

        public ChooseDatumsThread(String str, Map<String, DataInstance> map, Collection<String> collection, TwoDimensionalCounter<CandidatePhrase, E> twoDimensionalCounter, Counter<E> counter, Counter<Integer> counter2, Map<String, Collection<CandidatePhrase>> map2, boolean z, boolean z2) {
            this.answerLabel = str;
            this.sents = map;
            this.keys = collection;
            this.wordsPatExtracted = twoDimensionalCounter;
            this.allSelectedPatterns = counter;
            this.wordClassClustersOfPositive = counter2;
            this.allPossiblePhrases = map2;
            this.answerClass = ScorePhrasesLearnFeatWt.this.constVars.getAnswerClass().get(this.answerLabel);
            this.expandNeg = z2;
            this.expandPos = z;
        }

        /* JADX WARN: Multi-variable type inference failed */
        @Override // java.util.concurrent.Callable
        public Quintuple<Set<CandidatePhrase>, Set<CandidatePhrase>, Set<CandidatePhrase>, Counter<CandidatePhrase>, Counter<CandidatePhrase>> call() throws Exception {
            Random random = new Random(10L);
            Random random2 = new Random(10L);
            HashSet hashSet = new HashSet();
            HashSet hashSet2 = new HashSet();
            HashSet hashSet3 = new HashSet();
            ClassicCounter classicCounter = new ClassicCounter();
            ClassicCounter classicCounter2 = new ClassicCounter();
            Set unionAsSet = CollectionUtils.unionAsSet(ScorePhrasesLearnFeatWt.this.constVars.getLearnedWords(this.answerLabel).keySet(), ScorePhrasesLearnFeatWt.this.constVars.getSeedLabelDictionary().get(this.answerLabel));
            HashSet hashSet4 = new HashSet();
            Map<Class, Object> map = ScorePhrasesLearnFeatWt.this.constVars.getIgnoreWordswithClassesDuringSelection().get(this.answerLabel);
            int i = 0;
            for (String str : this.keys) {
                DataInstance dataInstance = this.sents.get(str);
                List<CoreLabel> tokens = dataInstance.getTokens();
                CoreLabel[] coreLabelArr = (CoreLabel[]) tokens.toArray(new CoreLabel[tokens.size()]);
                for (int i2 = 0; i2 < coreLabelArr.length; i2++) {
                    CoreLabel coreLabel = coreLabelArr[i2];
                    if (coreLabel.get(this.answerClass).equals(this.answerLabel)) {
                        i++;
                        CandidatePhrase candidatePhrase = (CandidatePhrase) ((Map) coreLabel.get(PatternsAnnotations.LongestMatchedPhraseForEachLabel.class)).get(this.answerLabel);
                        if (candidatePhrase == null) {
                            throw new RuntimeException("for sentence id " + str + " and token id " + i2 + " candidate is null for " + coreLabel.word() + " and longest matching" + coreLabel.get(PatternsAnnotations.LongestMatchedPhraseForEachLabel.class) + " and matched phrases are " + coreLabel.get(PatternsAnnotations.MatchedPhrases.class));
                        }
                        if (!Data.rawFreq.containsKey(candidatePhrase)) {
                            candidatePhrase = CandidatePhrase.createOrGet(coreLabel.word());
                        }
                        if (!ScorePhrasesLearnFeatWt.hasElement(this.allPossiblePhrases, candidatePhrase, this.answerLabel) && !PatternFactory.ignoreWordRegex.matcher(candidatePhrase.getPhrase()).matches()) {
                            hashSet.add(candidatePhrase);
                        }
                    } else {
                        Map map2 = (Map) coreLabel.get(PatternsAnnotations.LongestMatchedPhraseForEachLabel.class);
                        boolean z = false;
                        CandidatePhrase createOrGet = CandidatePhrase.createOrGet(coreLabel.word());
                        Iterator<Class> it = map.keySet().iterator();
                        while (true) {
                            if (!it.hasNext()) {
                                break;
                            }
                            if (((Boolean) coreLabel.get(it.next())).booleanValue()) {
                                z = true;
                                createOrGet = map2.containsKey("OTHERSEM") ? (CandidatePhrase) map2.get("OTHERSEM") : createOrGet;
                            }
                        }
                        if (!z) {
                            z = ScorePhrasesLearnFeatWt.this.constVars.functionWords.contains(coreLabel.word());
                        }
                        boolean z2 = false;
                        boolean z3 = false;
                        for (Map.Entry entry : map2.entrySet()) {
                            if (!((String) entry.getKey()).equals(this.answerLabel) && entry.getValue() != null) {
                                z2 = true;
                                z3 = true;
                                if (Data.rawFreq.containsKey(entry.getValue())) {
                                    createOrGet = (CandidatePhrase) entry.getValue();
                                }
                            }
                        }
                        if (!z2 && z) {
                            z3 = true;
                        }
                        if (z3 && random2.nextDouble() < ScorePhrasesLearnFeatWt.this.constVars.perSelectNeg) {
                            if (!$assertionsDisabled && createOrGet.getPhrase().isEmpty()) {
                                throw new AssertionError();
                            }
                            hashSet2.add(createOrGet);
                        }
                        if (!z2 && !z && ((this.expandPos || this.expandNeg) && !ScorePhrasesLearnFeatWt.hasElement(this.allPossiblePhrases, createOrGet, this.answerLabel) && !PatternFactory.ignoreWordRegex.matcher(createOrGet.getPhrase()).matches() && !hashSet4.contains(createOrGet))) {
                            if (!$assertionsDisabled && createOrGet == null) {
                                throw new AssertionError();
                            }
                            Pair<Counter<CandidatePhrase>, Counter<CandidatePhrase>> computeSimWithWordVectors = ScorePhrasesLearnFeatWt.this.constVars.useWordVectorsToComputeSim ? ScorePhrasesLearnFeatWt.this.computeSimWithWordVectors((List<CandidatePhrase>) Arrays.asList(createOrGet), unionAsSet, this.allPossiblePhrases, this.answerLabel) : ScorePhrasesLearnFeatWt.this.computeSimWithWordCluster(Arrays.asList(createOrGet), unionAsSet, new AtomicDouble());
                            boolean z4 = false;
                            if (this.expandPos) {
                                double count = computeSimWithWordVectors.first().getCount(createOrGet);
                                if (count > ScorePhrasesLearnFeatWt.this.constVars.similarityThresholdHighPrecision) {
                                    classicCounter.setCount(createOrGet, count);
                                    z4 = true;
                                }
                            }
                            if (this.expandNeg && !z4) {
                                double count2 = computeSimWithWordVectors.second().getCount(createOrGet);
                                if (count2 > ScorePhrasesLearnFeatWt.this.constVars.similarityThresholdHighPrecision) {
                                    classicCounter2.setCount(createOrGet, count2);
                                }
                            }
                            hashSet4.add(createOrGet);
                        }
                    }
                }
                hashSet3.addAll(ScorePhrasesLearnFeatWt.this.chooseUnknownPhrases(dataInstance, random, ScorePhrasesLearnFeatWt.this.constVars.perSelectRand, ScorePhrasesLearnFeatWt.this.constVars.getAnswerClass().get(this.answerLabel), this.answerLabel, Math.max(0, Integer.MAX_VALUE)));
            }
            return new Quintuple<>(hashSet, hashSet2, hashSet3, classicCounter, classicCounter2);
        }

        static {
            $assertionsDisabled = !ScorePhrasesLearnFeatWt.class.desiredAssertionStatus();
        }
    }

    /* loaded from: input_file:edu/stanford/nlp/patterns/ScorePhrasesLearnFeatWt$ClassifierType.class */
    public enum ClassifierType {
        DT,
        LR,
        RF,
        SVM,
        SHIFTLR,
        LINEAR
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    /* loaded from: input_file:edu/stanford/nlp/patterns/ScorePhrasesLearnFeatWt$ComputeSim.class */
    public class ComputeSim implements Callable<Pair<Counter<CandidatePhrase>, Counter<CandidatePhrase>>> {
        List<CandidatePhrase> candidatePhrases;
        String label;
        AtomicDouble allMaxSim;
        Collection<CandidatePhrase> positivePhrases;
        Map<String, Collection<CandidatePhrase>> knownNegativePhrases;

        public ComputeSim(String str, List<CandidatePhrase> list, AtomicDouble atomicDouble, Collection<CandidatePhrase> collection, Map<String, Collection<CandidatePhrase>> map) {
            this.label = str;
            this.candidatePhrases = list;
            this.allMaxSim = atomicDouble;
            this.positivePhrases = collection;
            this.knownNegativePhrases = map;
        }

        /* JADX WARN: Can't rename method to resolve collision */
        @Override // java.util.concurrent.Callable
        public Pair<Counter<CandidatePhrase>, Counter<CandidatePhrase>> call() throws Exception {
            if (!ScorePhrasesLearnFeatWt.this.constVars.useWordVectorsToComputeSim) {
                return ScorePhrasesLearnFeatWt.this.computeSimWithWordCluster(this.candidatePhrases, this.positivePhrases, this.allMaxSim);
            }
            Pair<Counter<CandidatePhrase>, Counter<CandidatePhrase>> computeSimWithWordVectors = ScorePhrasesLearnFeatWt.this.computeSimWithWordVectors(this.candidatePhrases, this.positivePhrases, this.knownNegativePhrases, this.label);
            Redwood.log(Redwood.DBG, "Computed similarities with positive and negative phrases");
            return computeSimWithWordVectors;
        }
    }

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:edu/stanford/nlp/patterns/ScorePhrasesLearnFeatWt$PhrasePair.class */
    public static class PhrasePair {
        final String p1;
        final String p2;
        final int hashCode;

        public PhrasePair(String str, String str2) {
            if (str.compareTo(str2) <= 0) {
                this.p1 = str;
                this.p2 = str2;
            } else {
                this.p1 = str2;
                this.p2 = str;
            }
            this.hashCode = str.hashCode() + str2.hashCode() + 331;
        }

        public int hashCode() {
            return this.hashCode;
        }

        public boolean equals(Object obj) {
            if (!(obj instanceof PhrasePair)) {
                return false;
            }
            PhrasePair phrasePair = (PhrasePair) obj;
            return phrasePair.getPhrase1().equals(getPhrase1()) && phrasePair.getPhrase2().equals(getPhrase2());
        }

        public String getPhrase1() {
            return this.p1;
        }

        public String getPhrase2() {
            return this.p2;
        }
    }

    public ScorePhrasesLearnFeatWt(ConstantsAndVariables constantsAndVariables) {
        super(constantsAndVariables);
        this.scoreClassifierType = ClassifierType.LR;
        this.phraseScoresRaw = new TwoDimensionalCounter<>();
        this.wordClassClustersForPhrase = new ConcurrentHashMap<>();
        this.closeToPositivesFirstIter = null;
        this.closeToNegativesFirstIter = null;
        if (constantsAndVariables.useWordVectorsToComputeSim && ((constantsAndVariables.subsampleUnkAsNegUsingSim || constantsAndVariables.expandPositivesWhenSampling || constantsAndVariables.expandNegativesWhenSampling || this.constVars.usePhraseEvalWordVector) && wordVectors == null)) {
            if (Data.rawFreq == null) {
                Data.rawFreq = new ClassicCounter();
                Data.computeRawFreqIfNull(PatternFactory.numWordsCompoundMax, constantsAndVariables.batchProcessSents);
            }
            Redwood.log(Redwood.DBG, "Reading word vectors");
            wordVectors = new HashMap();
            Iterator<String> it = IOUtils.readLines(this.constVars.wordVectorFile).iterator();
            while (it.hasNext()) {
                String[] split = it.next().split("\\s+");
                String str = split[0];
                CandidatePhrase createOrGet = CandidatePhrase.createOrGet(str);
                if (Data.rawFreq.containsKey(createOrGet) || ConstantsAndVariables.getStopWords().contains(createOrGet) || constantsAndVariables.getEnglishWords().contains(str) || constantsAndVariables.hasSeedWordOrOtherSem(createOrGet)) {
                    double[] dArr = new double[split.length - 1];
                    for (int i = 1; i < split.length; i++) {
                        dArr[i - 1] = Double.valueOf(split[i]).doubleValue();
                    }
                    wordVectors.put(str, dArr);
                } else {
                    CandidatePhrase.deletePhrase(createOrGet);
                }
            }
            Redwood.log(Redwood.DBG, "Read " + wordVectors.size() + " word vectors");
        }
        this.OOVExternalFeatWt = 0.0d;
        this.OOVdictOdds = 0.0d;
        this.OOVDomainNgramScore = 0.0d;
        this.OOVGoogleNgramScore = 0.0d;
    }

    public Classifier learnClassifier(String str, boolean z, TwoDimensionalCounter<CandidatePhrase, E> twoDimensionalCounter, Counter<E> counter) throws IOException, ClassNotFoundException {
        Classifier trainClassifier;
        this.phraseScoresRaw.clear();
        this.learnedScores.clear();
        if (Data.domainNGramsFile != null) {
            Data.loadDomainNGrams();
        }
        boolean z2 = false;
        if (Data.rawFreq == null) {
            Data.rawFreq = new ClassicCounter();
            z2 = true;
        }
        GeneralDataset<String, ConstantsAndVariables.ScorePhraseMeasures> choosedatums = choosedatums(z, str, twoDimensionalCounter, counter, z2);
        if (this.scoreClassifierType.equals(ClassifierType.LR)) {
            LogisticClassifierFactory logisticClassifierFactory = new LogisticClassifierFactory();
            LogPrior logPrior = new LogPrior();
            logPrior.setSigma(this.constVars.LRSigma);
            trainClassifier = logisticClassifierFactory.trainClassifier((GeneralDataset) choosedatums, logPrior, false);
            LogisticClassifier logisticClassifier = (LogisticClassifier) trainClassifier;
            String str2 = (String) logisticClassifier.getLabelForInternalPositiveClass();
            Counter weightsAsCounter = logisticClassifier.weightsAsCounter();
            if (str2.equals(Boolean.FALSE.toString())) {
                Counters.multiplyInPlace(weightsAsCounter, -1.0d);
            }
            List descendingMagnitudeSortedListWithCounts = Counters.toDescendingMagnitudeSortedListWithCounts(weightsAsCounter);
            Redwood.log(ConstantsAndVariables.minimaldebug, "The weights are " + StringUtils.join(descendingMagnitudeSortedListWithCounts.subList(0, Math.min(descendingMagnitudeSortedListWithCounts.size(), 600)), "\n"));
        } else if (this.scoreClassifierType.equals(ClassifierType.SVM)) {
            trainClassifier = new SVMLightClassifierFactory(true).trainClassifier((GeneralDataset) choosedatums);
            Redwood.log(ConstantsAndVariables.minimaldebug, "The weights are " + StringUtils.join(((SVMLightClassifier) trainClassifier).getTopFeatures(Generics.newHashSet(Arrays.asList("true")), 0.0d, true, 600, true), "\n"));
        } else if (this.scoreClassifierType.equals(ClassifierType.SHIFTLR)) {
            Dataset dataset = new Dataset();
            Iterator<RVFDatum<String, ConstantsAndVariables.ScorePhraseMeasures>> it = choosedatums.iterator();
            while (it.hasNext()) {
                RVFDatum<String, ConstantsAndVariables.ScorePhraseMeasures> next = it.next();
                dataset.add(new BasicDatum(next.asFeatures(), next.label()));
            }
            trainClassifier = new ShiftParamsLogisticClassifierFactory().trainClassifier((GeneralDataset) dataset);
            List descendingMagnitudeSortedListWithCounts2 = Counters.toDescendingMagnitudeSortedListWithCounts((Counter) ((MultinomialLogisticClassifier) trainClassifier).weightsAsGenericCounter().get("true"));
            Redwood.log(ConstantsAndVariables.minimaldebug, "The weights are " + StringUtils.join(descendingMagnitudeSortedListWithCounts2.subList(0, Math.min(descendingMagnitudeSortedListWithCounts2.size(), 600)), "\n"));
        } else {
            if (!this.scoreClassifierType.equals(ClassifierType.LINEAR)) {
                throw new RuntimeException("cannot identify classifier " + this.scoreClassifierType);
            }
            trainClassifier = new LinearClassifierFactory().trainClassifier((GeneralDataset) choosedatums);
            Redwood.log(ConstantsAndVariables.minimaldebug, "The weights are " + StringUtils.join(((LinearClassifier) trainClassifier).getTopFeatures(Generics.newHashSet(Arrays.asList("true")), 0.0d, true, 600, true), "\n"));
        }
        BufferedWriter bufferedWriter = new BufferedWriter(new FileWriter("tempscorestrainer.txt"));
        System.out.println("size of learned scores is " + this.phraseScoresRaw.size());
        for (CandidatePhrase candidatePhrase : this.phraseScoresRaw.firstKeySet()) {
            bufferedWriter.write(candidatePhrase + LinearClassifier.TEXT_SERIALIZATION_DELIMITER + this.phraseScoresRaw.getCounter((TwoDimensionalCounter<CandidatePhrase, ConstantsAndVariables.ScorePhraseMeasures>) candidatePhrase) + "\n");
        }
        bufferedWriter.close();
        return trainClassifier;
    }

    @Override // edu.stanford.nlp.patterns.PhraseScorer
    public void printReasonForChoosing(Counter<CandidatePhrase> counter) {
        Redwood.log(Redwood.DBG, "Features of selected phrases");
        for (Map.Entry<CandidatePhrase, Double> entry : counter.entrySet()) {
            Redwood.log(Redwood.DBG, entry.getKey().getPhrase() + LinearClassifier.TEXT_SERIALIZATION_DELIMITER + entry.getValue() + LinearClassifier.TEXT_SERIALIZATION_DELIMITER + this.phraseScoresRaw.getCounter((TwoDimensionalCounter<CandidatePhrase, ConstantsAndVariables.ScorePhraseMeasures>) entry.getKey()));
        }
    }

    @Override // edu.stanford.nlp.patterns.PhraseScorer
    public Counter<CandidatePhrase> scorePhrases(String str, TwoDimensionalCounter<CandidatePhrase, E> twoDimensionalCounter, TwoDimensionalCounter<CandidatePhrase, E> twoDimensionalCounter2, Counter<E> counter, Set<CandidatePhrase> set, boolean z) throws IOException, ClassNotFoundException {
        getAllLabeledWordsCluster();
        ClassicCounter classicCounter = new ClassicCounter();
        Classifier learnClassifier = learnClassifier(str, z, twoDimensionalCounter2, counter);
        for (Map.Entry<CandidatePhrase, ClassicCounter<E>> entry : twoDimensionalCounter.entrySet()) {
            Double valueOf = Double.valueOf(scoreUsingClassifer(learnClassifier, entry.getKey(), str, z, entry.getValue(), counter));
            if (valueOf.isNaN() || valueOf.isInfinite()) {
                Redwood.log(Redwood.DBG, "Ignoring " + entry.getKey() + " because score is " + valueOf);
            } else {
                classicCounter.setCount(entry.getKey(), valueOf.doubleValue());
            }
        }
        return classicCounter;
    }

    @Override // edu.stanford.nlp.patterns.PhraseScorer
    public Counter<CandidatePhrase> scorePhrases(String str, Set<CandidatePhrase> set, boolean z) throws IOException, ClassNotFoundException {
        getAllLabeledWordsCluster();
        ClassicCounter classicCounter = new ClassicCounter();
        Classifier learnClassifier = learnClassifier(str, z, null, null);
        for (CandidatePhrase candidatePhrase : set) {
            classicCounter.setCount(candidatePhrase, scoreUsingClassifer(learnClassifier, candidatePhrase, str, z, null, null));
        }
        return classicCounter;
    }

    public static boolean getRandomBoolean(Random random, double d) {
        return ((double) random.nextFloat()) < d;
    }

    static double logistic(double d) {
        return 1.0d / (1.0d + Math.exp((-1.0d) * d));
    }

    Counter<Integer> wordClass(String str, String str2) {
        ClassicCounter classicCounter = new ClassicCounter();
        String[] split = str2 != null ? str2.split("\\s+") : null;
        int i = 0;
        for (String str3 : str.split("\\s+")) {
            Integer num = this.constVars.getWordClassClusters().get(str3);
            if (num == null && split != null) {
                num = this.constVars.getWordClassClusters().get(split[i]);
            }
            if (num == null) {
                num = this.constVars.getWordClassClusters().get(str3.toLowerCase());
                if (num == null && split != null) {
                    num = this.constVars.getWordClassClusters().get(split[i].toLowerCase());
                }
            }
            if (num != null) {
                classicCounter.incrementCount(num);
            }
            i++;
        }
        return classicCounter;
    }

    void getAllLabeledWordsCluster() {
        for (String str : this.constVars.getLabels()) {
            for (Map.Entry<CandidatePhrase, Double> entry : this.constVars.getLearnedWords(str).entrySet()) {
                this.wordClassClustersForPhrase.put(entry.getKey(), wordClass(entry.getKey().getPhrase(), entry.getKey().getPhraseLemma()));
            }
            for (CandidatePhrase candidatePhrase : this.constVars.getSeedLabelDictionary().get(str)) {
                this.wordClassClustersForPhrase.put(candidatePhrase, wordClass(candidatePhrase.getPhrase(), candidatePhrase.getPhraseLemma()));
            }
        }
    }

    private Counter<CandidatePhrase> computeSimWithWordVectors(Collection<CandidatePhrase> collection, Collection<CandidatePhrase> collection2, boolean z, String str) {
        double sqrt;
        ClassicCounter classicCounter = new ClassicCounter(collection.size());
        for (CandidatePhrase candidatePhrase : collection) {
            Map<String, double[]> map = similaritiesWithLabeledPhrases.get(candidatePhrase.getPhrase());
            if (map == null) {
                map = new HashMap();
            }
            double[] dArr = map.get(str);
            if (dArr == null) {
                dArr = new double[PhraseScorer.Similarities.values().length];
            }
            if (!wordVectors.containsKey(candidatePhrase.getPhrase()) || (z && PatternFactory.ignoreWordRegex.matcher(candidatePhrase.getPhrase()).matches())) {
                classicCounter.setCount(candidatePhrase, Double.MIN_VALUE);
            } else {
                double[] dArr2 = wordVectors.get(candidatePhrase.getPhrase());
                BinaryHeapPriorityQueue binaryHeapPriorityQueue = new BinaryHeapPriorityQueue(this.constVars.expandPhrasesNumTopSimilar);
                double d = 0.0d;
                double d2 = Double.MIN_VALUE;
                boolean z2 = false;
                Iterator<CandidatePhrase> it = collection2.iterator();
                while (true) {
                    if (!it.hasNext()) {
                        break;
                    }
                    CandidatePhrase next = it.next();
                    if (candidatePhrase.equals(next)) {
                        z2 = true;
                        break;
                    }
                    if (wordVectors.containsKey(next.getPhrase())) {
                        PhrasePair phrasePair = new PhrasePair(candidatePhrase.getPhrase(), next.getPhrase());
                        if (cacheSimilarities.containsKey(phrasePair)) {
                            sqrt = cacheSimilarities.getCount(phrasePair);
                        } else {
                            double[] dArr3 = wordVectors.get(next.getPhrase());
                            double d3 = 0.0d;
                            double d4 = 0.0d;
                            double d5 = 0.0d;
                            for (int i = 0; i < dArr2.length; i++) {
                                d3 += dArr2[i] * dArr3[i];
                                d4 += dArr2[i] * dArr2[i];
                                d5 += dArr3[i] * dArr3[i];
                            }
                            sqrt = d3 / (Math.sqrt(d4) * Math.sqrt(d5));
                            cacheSimilarities.setCount(phrasePair, sqrt);
                        }
                        binaryHeapPriorityQueue.add(next, sqrt);
                        if (binaryHeapPriorityQueue.size() > this.constVars.expandPhrasesNumTopSimilar) {
                            binaryHeapPriorityQueue.removeLastEntry();
                        }
                        d += sqrt;
                        if (sqrt > d2) {
                            d2 = sqrt;
                        }
                    }
                }
                double d6 = 0.0d;
                int i2 = 0;
                while (binaryHeapPriorityQueue.hasNext()) {
                    d6 += binaryHeapPriorityQueue.getPriority();
                    binaryHeapPriorityQueue.next();
                    i2++;
                }
                double d7 = d6 / i2;
                double d8 = dArr[PhraseScorer.Similarities.NUMITEMS.ordinal()];
                double d9 = dArr[PhraseScorer.Similarities.AVGSIM.ordinal()];
                double d10 = dArr[PhraseScorer.Similarities.MAXSIM.ordinal()];
                double size = d8 + collection2.size();
                double d11 = ((d9 * d8) + d) / size;
                double d12 = d10 > d2 ? d10 : d2;
                dArr[PhraseScorer.Similarities.NUMITEMS.ordinal()] = size;
                dArr[PhraseScorer.Similarities.AVGSIM.ordinal()] = d11;
                dArr[PhraseScorer.Similarities.MAXSIM.ordinal()] = d12;
                if (!z2) {
                    classicCounter.setCount(candidatePhrase, d7);
                }
            }
            map.put(str, dArr);
            similaritiesWithLabeledPhrases.put(candidatePhrase.getPhrase(), map);
        }
        return classicCounter;
    }

    /* JADX INFO: Access modifiers changed from: private */
    public Pair<Counter<CandidatePhrase>, Counter<CandidatePhrase>> computeSimWithWordVectors(List<CandidatePhrase> list, Collection<CandidatePhrase> collection, Map<String, Collection<CandidatePhrase>> map, String str) {
        if (!$assertionsDisabled && wordVectors == null) {
            throw new AssertionError("Why are word vectors null?");
        }
        Counter<CandidatePhrase> computeSimWithWordVectors = computeSimWithWordVectors((Collection<CandidatePhrase>) list, collection, true, str);
        ClassicCounter classicCounter = new ClassicCounter();
        for (Map.Entry<String, Collection<CandidatePhrase>> entry : map.entrySet()) {
            classicCounter.addAll(computeSimWithWordVectors((Collection<CandidatePhrase>) list, entry.getValue(), true, entry.getKey()));
        }
        Counters.retainKeys(computeSimWithWordVectors, candidatePhrase -> {
            return classicCounter.getCount(candidatePhrase) <= computeSimWithWordVectors.getCount(candidatePhrase);
        });
        return new Pair<>(computeSimWithWordVectors, classicCounter);
    }

    Pair<Counter<CandidatePhrase>, Counter<CandidatePhrase>> computeSimWithWordCluster(Collection<CandidatePhrase> collection, Collection<CandidatePhrase> collection2, AtomicDouble atomicDouble) {
        ClassicCounter classicCounter = new ClassicCounter(collection.size());
        for (CandidatePhrase candidatePhrase : collection) {
            Counter<Integer> counter = this.wordClassClustersForPhrase.get(candidatePhrase);
            if (counter == null) {
                counter = wordClass(candidatePhrase.getPhrase(), candidatePhrase.getPhraseLemma());
                this.wordClassClustersForPhrase.put(candidatePhrase, counter);
            }
            double d = 0.0d;
            if (counter.size() > 0) {
                for (CandidatePhrase candidatePhrase2 : collection2) {
                    if (!candidatePhrase.equals(candidatePhrase2)) {
                        Counter<Integer> counter2 = this.wordClassClustersForPhrase.get(candidatePhrase2);
                        if (counter2 == null) {
                            counter2 = wordClass(candidatePhrase2.getPhrase(), candidatePhrase2.getPhraseLemma());
                            this.wordClassClustersForPhrase.put(candidatePhrase2, counter);
                        }
                        if (counter2.size() > 0) {
                            Double valueOf = Double.valueOf(Counters.jaccardCoefficient(counter2, counter));
                            if (!valueOf.isInfinite() && !valueOf.isNaN()) {
                                d += valueOf.doubleValue();
                            }
                        }
                    }
                }
                d /= collection2.size();
            }
            classicCounter.setCount(candidatePhrase, d);
            if (atomicDouble.get() < d) {
                atomicDouble.set(d);
            }
        }
        return new Pair<>(classicCounter, null);
    }

    Set<CandidatePhrase> chooseUnknownAsNegatives(Set<CandidatePhrase> set, String str, Collection<CandidatePhrase> collection, Map<String, Collection<CandidatePhrase>> map, BufferedWriter bufferedWriter) throws IOException {
        List threadBatches = GetPatternsFromDataMultiClass.getThreadBatches(CollectionUtils.toList(set), this.constVars.numThreads);
        ClassicCounter classicCounter = new ClassicCounter();
        AtomicDouble atomicDouble = new AtomicDouble(Double.MIN_VALUE);
        ExecutorService newFixedThreadPool = Executors.newFixedThreadPool(this.constVars.numThreads);
        ArrayList arrayList = new ArrayList();
        Iterator it = threadBatches.iterator();
        while (it.hasNext()) {
            arrayList.add(newFixedThreadPool.submit(new ComputeSim(str, (List) it.next(), atomicDouble, collection, map)));
        }
        Iterator it2 = arrayList.iterator();
        while (it2.hasNext()) {
            try {
                classicCounter.addAll((Counter) ((Pair) ((Future) it2.next()).get()).first());
            } catch (Exception e) {
                newFixedThreadPool.shutdownNow();
                throw new RuntimeException(e);
            }
        }
        newFixedThreadPool.shutdown();
        if (atomicDouble.get() == Double.MIN_VALUE) {
            Redwood.log(Redwood.DBG, "No similarity recorded between the positives and the unknown!");
        }
        CandidatePhrase candidatePhrase = (CandidatePhrase) Counters.argmax(classicCounter);
        System.out.println("Maximum similarity was " + classicCounter.getCount(candidatePhrase) + " for word " + candidatePhrase);
        Counter retainBelow = Counters.retainBelow(classicCounter, this.constVars.positiveSimilarityThresholdLowPrecision);
        System.out.println("removing phrases as negative phrases that were higher that positive similarity threshold of " + this.constVars.positiveSimilarityThresholdLowPrecision + retainBelow);
        if (bufferedWriter != null && wordVectors != null) {
            for (Map.Entry entry : retainBelow.entrySet()) {
                if (wordVectors.containsKey(((CandidatePhrase) entry.getKey()).getPhrase())) {
                    bufferedWriter.write(entry.getKey() + "-PN " + ArrayUtils.toString(wordVectors.get(((CandidatePhrase) entry.getKey()).getPhrase()), AddDep.ATOM_DELIMITER) + "\n");
                }
            }
        }
        return classicCounter.keySet();
    }

    Set<CandidatePhrase> chooseUnknownPhrases(DataInstance dataInstance, Random random, double d, Class cls, String str, int i) {
        HashSet hashSet = new HashSet();
        if (i == 0) {
            return hashSet;
        }
        Function<CoreLabel, Boolean> function = coreLabel -> {
            return (coreLabel.get(cls).equals(str) || this.constVars.functionWords.contains(coreLabel.word())) ? false : true;
        };
        Random random2 = new Random(0L);
        ArrayList arrayList = new ArrayList();
        for (int i2 = 1; i2 <= PatternFactory.numWordsCompoundMapped.get(str).intValue(); i2++) {
            arrayList.add(Integer.valueOf(i2));
        }
        int intValue = ((Integer) CollectionUtils.sample(arrayList, random2)).intValue();
        if (this.constVars.patternType.equals(PatternFactory.PatternType.DEP)) {
            ExtractPhraseFromPattern extractPhraseFromPattern = new ExtractPhraseFromPattern(true, intValue);
            SemanticGraph graph = ((DataInstanceDep) dataInstance).getGraph();
            Collection<CoreLabel> sampleWithoutReplacement = CollectionUtils.sampleWithoutReplacement(dataInstance.getTokens(), Math.min(i, (int) (d * dataInstance.getTokens().size())), random);
            List<String> list = (List) dataInstance.getTokens().stream().map(coreLabel2 -> {
                return coreLabel2.word();
            }).collect(Collectors.toList());
            for (CoreLabel coreLabel3 : sampleWithoutReplacement) {
                if (function.apply(coreLabel3).booleanValue()) {
                    IndexedWord nodeByIndex = graph.getNodeByIndex(coreLabel3.index());
                    ArrayList arrayList2 = new ArrayList();
                    ArrayList<ExtractedPhrase> arrayList3 = new ArrayList();
                    extractPhraseFromPattern.printSubGraph(graph, nodeByIndex, new ArrayList(), list, arrayList2, new ArrayList(), new ArrayList(), new ArrayList(), false, arrayList3, null, function);
                    for (ExtractedPhrase extractedPhrase : arrayList3) {
                        hashSet.add(CandidatePhrase.createOrGet(extractedPhrase.getValue(), null, extractedPhrase.getFeatures()));
                    }
                }
            }
        } else {
            if (!this.constVars.patternType.equals(PatternFactory.PatternType.SURFACE)) {
                throw new RuntimeException("not yet implemented");
            }
            CoreLabel[] coreLabelArr = (CoreLabel[]) dataInstance.getTokens().toArray(new CoreLabel[0]);
            for (int i3 = 0; i3 < coreLabelArr.length; i3++) {
                if (random.nextDouble() < d) {
                    int i4 = (int) ((intValue - 1) / 2.0d);
                    int i5 = (intValue - 1) - i4;
                    String str2 = "";
                    boolean z = false;
                    int max = Math.max(0, i3 - i4);
                    while (true) {
                        if (max >= coreLabelArr.length || max > i3 + i5) {
                            break;
                        }
                        if (coreLabelArr[max].get(cls).equals(str)) {
                            z = true;
                            break;
                        }
                        str2 = str2 + AddDep.ATOM_DELIMITER + coreLabelArr[max].word();
                        max++;
                    }
                    String trim = str2.trim();
                    if (!z && !trim.trim().isEmpty() && !this.constVars.functionWords.contains(trim)) {
                        hashSet.add(CandidatePhrase.createOrGet(trim));
                    }
                }
            }
        }
        return hashSet;
    }

    static <E, F> boolean hasElement(Map<E, Collection<F>> map, F f, E e) {
        for (Map.Entry<E, Collection<F>> entry : map.entrySet()) {
            if (!entry.getKey().equals(e) && entry.getValue().contains(f)) {
                return true;
            }
        }
        return false;
    }

    Counter<String> numLabeledTokens() {
        ClassicCounter classicCounter = new ClassicCounter();
        ConstantsAndVariables.DataSentsIterator dataSentsIterator = new ConstantsAndVariables.DataSentsIterator(this.constVars.batchProcessSents);
        while (dataSentsIterator.hasNext()) {
            Iterator<Map.Entry<String, DataInstance>> it = dataSentsIterator.next().first().entrySet().iterator();
            while (it.hasNext()) {
                for (CoreLabel coreLabel : it.next().getValue().getTokens()) {
                    for (Map.Entry<String, Class<? extends TypesafeMap.Key<String>>> entry : this.constVars.getAnswerClass().entrySet()) {
                        if (((String) coreLabel.get(entry.getValue())).equals(entry.getKey())) {
                            classicCounter.incrementCount(entry.getKey());
                        }
                    }
                }
            }
        }
        return classicCounter;
    }

    Map<String, Collection<CandidatePhrase>> getAllPossibleNegativePhrases(String str) {
        HashMap hashMap = new HashMap();
        HashSet hashSet = new HashSet();
        ConstantsAndVariables constantsAndVariables = this.constVars;
        hashSet.addAll(ConstantsAndVariables.getStopWords());
        hashSet.addAll(CandidatePhrase.convertStringPhrases(this.constVars.functionWords));
        hashSet.addAll(CandidatePhrase.convertStringPhrases(this.constVars.getEnglishWords()));
        hashMap.put("NEGATIVE", hashSet);
        for (String str2 : this.constVars.getLabels()) {
            if (!str2.equals(str)) {
                hashMap.put(str2, new HashSet());
                if (this.constVars.getLearnedWordsEachIter().containsKey(str2)) {
                    ((Collection) hashMap.get(str2)).addAll(this.constVars.getLearnedWords(str2).keySet());
                }
                ((Collection) hashMap.get(str2)).addAll(this.constVars.getSeedLabelDictionary().get(str2));
            }
        }
        hashMap.put("OTHERSEM", this.constVars.getOtherSemanticClassesWords());
        return hashMap;
    }

    /* JADX WARN: Multi-variable type inference failed */
    public GeneralDataset<String, ConstantsAndVariables.ScorePhraseMeasures> choosedatums(boolean z, String str, TwoDimensionalCounter<CandidatePhrase, E> twoDimensionalCounter, Counter<E> counter, boolean z2) throws IOException {
        boolean z3 = false;
        if (this.closeToNegativesFirstIter == null) {
            this.closeToNegativesFirstIter = new ClassicCounter();
            if (this.constVars.expandNegativesWhenSampling) {
                z3 = true;
            }
        }
        boolean z4 = false;
        if (this.closeToPositivesFirstIter == null) {
            this.closeToPositivesFirstIter = new ClassicCounter();
            if (this.constVars.expandPositivesWhenSampling) {
                z4 = true;
            }
        }
        ClassicCounter classicCounter = new ClassicCounter();
        if ((z4 || z3) && !this.constVars.useWordVectorsToComputeSim) {
            for (CandidatePhrase candidatePhrase : CollectionUtils.union(this.constVars.getLearnedWords(str).keySet(), this.constVars.getSeedLabelDictionary().get(str))) {
                String[] split = candidatePhrase.getPhrase().split("\\s+");
                Integer num = this.constVars.getWordClassClusters().get(candidatePhrase.getPhrase());
                if (num == null) {
                    num = this.constVars.getWordClassClusters().get(candidatePhrase.getPhrase().toLowerCase());
                }
                if (num == null) {
                    for (String str2 : split) {
                        Integer num2 = this.constVars.getWordClassClusters().get(str2);
                        if (num2 == null) {
                            num2 = this.constVars.getWordClassClusters().get(str2.toLowerCase());
                        }
                        if (num2 != null) {
                            classicCounter.incrementCount(num2);
                        }
                    }
                } else {
                    classicCounter.incrementCount(num);
                }
            }
        }
        Map<String, Collection<CandidatePhrase>> allPossibleNegativePhrases = getAllPossibleNegativePhrases(str);
        RVFDataset rVFDataset = new RVFDataset();
        int i = 0;
        HashSet<CandidatePhrase> hashSet = new HashSet();
        HashSet hashSet2 = new HashSet();
        HashSet<CandidatePhrase> hashSet3 = new HashSet();
        ConstantsAndVariables.DataSentsIterator dataSentsIterator = new ConstantsAndVariables.DataSentsIterator(this.constVars.batchProcessSents);
        while (dataSentsIterator.hasNext()) {
            Pair<Map<String, DataInstance>, File> next = dataSentsIterator.next();
            Map<String, DataInstance> first = next.first();
            Redwood.log(Redwood.DBG, "Sampling datums from " + next.second());
            if (z2) {
                Data.computeRawFreqIfNull(first, PatternFactory.numWordsCompoundMax);
            }
            List threadBatches = GetPatternsFromDataMultiClass.getThreadBatches(new ArrayList(first.keySet()), this.constVars.numThreads);
            ExecutorService newFixedThreadPool = Executors.newFixedThreadPool(this.constVars.numThreads);
            ArrayList arrayList = new ArrayList();
            Iterator it = threadBatches.iterator();
            while (it.hasNext()) {
                arrayList.add(newFixedThreadPool.submit(new ChooseDatumsThread(str, first, (List) it.next(), twoDimensionalCounter, counter, classicCounter, allPossibleNegativePhrases, z4, z3)));
            }
            Iterator it2 = arrayList.iterator();
            while (it2.hasNext()) {
                try {
                    Quintuple quintuple = (Quintuple) ((Future) it2.next()).get();
                    hashSet3.addAll((Collection) quintuple.first());
                    hashSet.addAll((Collection) quintuple.second());
                    hashSet2.addAll((Collection) quintuple.third());
                    if (z4) {
                        for (Map.Entry entry : ((Counter) quintuple.fourth()).entrySet()) {
                            this.closeToPositivesFirstIter.setCount(entry.getKey(), ((Double) entry.getValue()).doubleValue());
                        }
                    }
                    if (z3) {
                        for (Map.Entry entry2 : ((Counter) quintuple.fifth()).entrySet()) {
                            this.closeToNegativesFirstIter.setCount(entry2.getKey(), ((Double) entry2.getValue()).doubleValue());
                        }
                    }
                } catch (Exception e) {
                    newFixedThreadPool.shutdownNow();
                    throw new RuntimeException(e);
                }
            }
            newFixedThreadPool.shutdown();
        }
        hashSet3.addAll(this.constVars.getLearnedWords(str).keySet());
        BufferedWriter bufferedWriter = null;
        BufferedWriter bufferedWriter2 = null;
        if (this.constVars.logFileVectorSimilarity != null) {
            bufferedWriter = new BufferedWriter(new FileWriter(this.constVars.logFileVectorSimilarity));
            bufferedWriter2 = new BufferedWriter(new FileWriter(this.constVars.logFileVectorSimilarity + "_feat"));
            if (wordVectors != null) {
                for (CandidatePhrase candidatePhrase2 : hashSet3) {
                    if (wordVectors.containsKey(candidatePhrase2.getPhrase())) {
                        bufferedWriter.write(candidatePhrase2.getPhrase() + "-P " + ArrayUtils.toString(wordVectors.get(candidatePhrase2.getPhrase()), AddDep.ATOM_DELIMITER) + "\n");
                    }
                }
            }
        }
        if (this.constVars.expandPositivesWhenSampling) {
            Redwood.log("Expanding positives by adding " + Counters.toSortedString(this.closeToPositivesFirstIter, this.closeToPositivesFirstIter.size(), "%1$s:%2$f", LinearClassifier.TEXT_SERIALIZATION_DELIMITER) + " phrases");
            hashSet3.addAll(this.closeToPositivesFirstIter.keySet());
            if (bufferedWriter != null && wordVectors != null && z3) {
                for (CandidatePhrase candidatePhrase3 : this.closeToPositivesFirstIter.keySet()) {
                    if (wordVectors.containsKey(candidatePhrase3.getPhrase())) {
                        bufferedWriter.write(candidatePhrase3.getPhrase() + "-PP " + ArrayUtils.toString(wordVectors.get(candidatePhrase3.getPhrase()), AddDep.ATOM_DELIMITER) + "\n");
                    }
                }
            }
        }
        if (this.constVars.expandNegativesWhenSampling) {
            Redwood.log("Expanding negatives by adding " + Counters.toSortedString(this.closeToNegativesFirstIter, this.closeToNegativesFirstIter.size(), "%1$s:%2$f", LinearClassifier.TEXT_SERIALIZATION_DELIMITER) + " phrases");
            hashSet.addAll(this.closeToNegativesFirstIter.keySet());
            if (bufferedWriter != null && wordVectors != null && z3) {
                for (CandidatePhrase candidatePhrase4 : this.closeToNegativesFirstIter.keySet()) {
                    if (wordVectors.containsKey(candidatePhrase4.getPhrase())) {
                        bufferedWriter.write(candidatePhrase4.getPhrase() + "-NN " + ArrayUtils.toString(wordVectors.get(candidatePhrase4.getPhrase()), AddDep.ATOM_DELIMITER) + "\n");
                    }
                }
            }
        }
        System.out.println("all positive phrases of size " + hashSet3.size() + " are  " + hashSet3);
        for (CandidatePhrase candidatePhrase5 : hashSet3) {
            Counter<ConstantsAndVariables.ScorePhraseMeasures> phraseFeaturesForPattern = z ? getPhraseFeaturesForPattern(str, candidatePhrase5) : getFeatures(str, candidatePhrase5, twoDimensionalCounter.getCounter((TwoDimensionalCounter<CandidatePhrase, E>) candidatePhrase5), counter);
            rVFDataset.add(new RVFDatum(phraseFeaturesForPattern, "true"));
            i++;
            if (bufferedWriter2 != null) {
                bufferedWriter2.write("POSITIVE " + candidatePhrase5.getPhrase() + LinearClassifier.TEXT_SERIALIZATION_DELIMITER + Counters.toSortedByKeysString(phraseFeaturesForPattern, "%1$s:%2$.0f", ";", "%s") + "\n");
            }
        }
        Redwood.log(Redwood.DBG, "Number of pure negative phrases is " + hashSet.size());
        Redwood.log(Redwood.DBG, "Number of unknown phrases is " + hashSet2.size());
        if (this.constVars.subsampleUnkAsNegUsingSim) {
            Set<CandidatePhrase> chooseUnknownAsNegatives = chooseUnknownAsNegatives(hashSet2, str, hashSet3, allPossibleNegativePhrases, bufferedWriter);
            Redwood.log(Redwood.DBG, "Choosing " + chooseUnknownAsNegatives.size() + " unknowns as negative based to their similarity to the positive phrases");
            hashSet.addAll(chooseUnknownAsNegatives);
        } else {
            hashSet.addAll(hashSet2);
        }
        if (hashSet.size() > i) {
            Redwood.log(Redwood.WARN, "Num of negative (" + hashSet.size() + ") is higher than number of positive phrases (" + i + ") = " + (hashSet.size() / i) + ". Capping the number by taking the first numPositives as negative. Consider decreasing perSelectRand");
            int i2 = 0;
            HashSet hashSet4 = new HashSet();
            for (CandidatePhrase candidatePhrase6 : hashSet) {
                if (i2 >= i) {
                    break;
                }
                hashSet4.add(candidatePhrase6);
                i2++;
            }
            hashSet.clear();
            hashSet = hashSet4;
        }
        System.out.println("all negative phrases are " + hashSet);
        for (CandidatePhrase candidatePhrase7 : hashSet) {
            Counter<ConstantsAndVariables.ScorePhraseMeasures> phraseFeaturesForPattern2 = z ? getPhraseFeaturesForPattern(str, candidatePhrase7) : getFeatures(str, candidatePhrase7, twoDimensionalCounter.getCounter((TwoDimensionalCounter<CandidatePhrase, E>) candidatePhrase7), counter);
            rVFDataset.add(new RVFDatum(phraseFeaturesForPattern2, "false"));
            if (bufferedWriter != null && wordVectors != null && wordVectors.containsKey(candidatePhrase7.getPhrase())) {
                bufferedWriter.write(candidatePhrase7.getPhrase() + "-N" + AddDep.ATOM_DELIMITER + ArrayUtils.toString(wordVectors.get(candidatePhrase7.getPhrase()), AddDep.ATOM_DELIMITER) + "\n");
            }
            if (bufferedWriter2 != null) {
                bufferedWriter2.write("NEGATIVE " + candidatePhrase7.getPhrase() + LinearClassifier.TEXT_SERIALIZATION_DELIMITER + Counters.toSortedByKeysString(phraseFeaturesForPattern2, "%1$s:%2$.0f", ";", "%s") + "\n");
            }
        }
        if (bufferedWriter != null) {
            bufferedWriter.close();
        }
        if (bufferedWriter2 != null) {
            bufferedWriter2.close();
        }
        System.out.println("Before feature count threshold, dataset stats are ");
        rVFDataset.summaryStatistics();
        rVFDataset.applyFeatureCountThreshold(this.constVars.featureCountThreshold);
        System.out.println("AFTER feature count threshold of " + this.constVars.featureCountThreshold + ", dataset stats are ");
        rVFDataset.summaryStatistics();
        Redwood.log(Redwood.DBG, "Eventually, number of positive datums:  " + i + " and number of negative datums: " + hashSet.size());
        return rVFDataset;
    }

    public Map<String, double[]> getSimilarities(String str) {
        return similaritiesWithLabeledPhrases.get(str);
    }

    Counter<ConstantsAndVariables.ScorePhraseMeasures> getPhraseFeaturesForPattern(String str, CandidatePhrase candidatePhrase) {
        if (this.phraseScoresRaw.containsFirstKey(candidatePhrase)) {
            return this.phraseScoresRaw.getCounter((TwoDimensionalCounter<CandidatePhrase, ConstantsAndVariables.ScorePhraseMeasures>) candidatePhrase);
        }
        ClassicCounter classicCounter = new ClassicCounter();
        if (candidatePhrase.getFeatures() != null) {
            classicCounter.addAll(Counters.transform(candidatePhrase.getFeatures(), str2 -> {
                return ConstantsAndVariables.ScorePhraseMeasures.create(str2);
            }));
        } else {
            Redwood.log(ConstantsAndVariables.extremedebug, "features are null for " + candidatePhrase);
        }
        if (this.constVars.usePatternEvalSemanticOdds) {
            classicCounter.setCount(ConstantsAndVariables.ScorePhraseMeasures.SEMANTICODDS, getDictOddsScore(candidatePhrase, str, 0.0d));
        }
        if (this.constVars.usePatternEvalGoogleNgram) {
            Double valueOf = Double.valueOf(getGoogleNgramScore(candidatePhrase));
            if (valueOf.isInfinite() || valueOf.isNaN()) {
                throw new RuntimeException("how is the google ngrams score " + valueOf + " for " + candidatePhrase);
            }
            classicCounter.setCount(ConstantsAndVariables.ScorePhraseMeasures.GOOGLENGRAM, valueOf.doubleValue());
        }
        if (this.constVars.usePatternEvalDomainNgram) {
            Double valueOf2 = Double.valueOf(getDomainNgramScore(candidatePhrase.getPhrase()));
            if (valueOf2.isInfinite() || valueOf2.isNaN()) {
                throw new RuntimeException("how is the domain ngrams score " + valueOf2 + " for " + candidatePhrase + " when domain raw freq is " + Data.domainNGramRawFreq.getCount(candidatePhrase) + " and raw freq is " + Data.rawFreq.getCount(candidatePhrase));
            }
            classicCounter.setCount(ConstantsAndVariables.ScorePhraseMeasures.DOMAINNGRAM, valueOf2.doubleValue());
        }
        if (this.constVars.usePatternEvalWordClass) {
            Integer num = this.constVars.getWordClassClusters().get(candidatePhrase.getPhrase());
            if (num == null) {
                num = this.constVars.getWordClassClusters().get(candidatePhrase.getPhrase().toLowerCase());
            }
            classicCounter.setCount(ConstantsAndVariables.ScorePhraseMeasures.create(ConstantsAndVariables.ScorePhraseMeasures.DISTSIM.toString() + "-" + num), 1.0d);
        }
        if (this.constVars.usePatternEvalEditDistSame) {
            double editDistanceScoresThisClass = this.constVars.getEditDistanceScoresThisClass(str, candidatePhrase.getPhrase());
            if (!$assertionsDisabled && editDistanceScoresThisClass > 1.0d) {
                throw new AssertionError(" how come edit distance from the true class is " + editDistanceScoresThisClass + " for word " + candidatePhrase);
            }
            classicCounter.setCount(ConstantsAndVariables.ScorePhraseMeasures.EDITDISTSAME, editDistanceScoresThisClass);
        }
        if (this.constVars.usePatternEvalEditDistOther) {
            double editDistanceScoresOtherClass = this.constVars.getEditDistanceScoresOtherClass(str, candidatePhrase.getPhrase());
            if (!$assertionsDisabled && editDistanceScoresOtherClass > 1.0d) {
                throw new AssertionError(" how come edit distance from the true class is " + editDistanceScoresOtherClass + " for word " + candidatePhrase);
            }
            classicCounter.setCount(ConstantsAndVariables.ScorePhraseMeasures.EDITDISTOTHER, editDistanceScoresOtherClass);
        }
        if (this.constVars.usePatternEvalWordShape) {
            classicCounter.setCount(ConstantsAndVariables.ScorePhraseMeasures.WORDSHAPE, getWordShapeScore(candidatePhrase.getPhrase(), str));
        }
        if (this.constVars.usePatternEvalWordShapeStr) {
            classicCounter.setCount(ConstantsAndVariables.ScorePhraseMeasures.create(ConstantsAndVariables.ScorePhraseMeasures.WORDSHAPESTR + "-" + wordShape(candidatePhrase.getPhrase())), 1.0d);
        }
        if (this.constVars.usePatternEvalFirstCapital) {
            classicCounter.setCount(ConstantsAndVariables.ScorePhraseMeasures.ISFIRSTCAPITAL, StringUtils.isCapitalized(candidatePhrase.getPhrase()) ? 1.0d : 0.0d);
        }
        if (this.constVars.usePatternEvalBOW) {
            for (String str3 : candidatePhrase.getPhrase().split("\\s+")) {
                classicCounter.setCount(ConstantsAndVariables.ScorePhraseMeasures.create(ConstantsAndVariables.ScorePhraseMeasures.BOW + "-" + str3), 1.0d);
            }
        }
        this.phraseScoresRaw.setCounter(candidatePhrase, classicCounter);
        return classicCounter;
    }

    public double scoreUsingClassifer(Classifier classifier, CandidatePhrase candidatePhrase, String str, boolean z, Counter<E> counter, Counter<E> counter2) {
        double count;
        if (this.learnedScores.containsKey(candidatePhrase)) {
            return this.learnedScores.getCount(candidatePhrase);
        }
        if (this.scoreClassifierType.equals(ClassifierType.DT)) {
            count = classifier.scoresOf(new RVFDatum(z ? getPhraseFeaturesForPattern(str, candidatePhrase) : getFeatures(str, candidatePhrase, counter, counter2), Boolean.FALSE.toString())).getCount(Boolean.TRUE.toString());
        } else if (this.scoreClassifierType.equals(ClassifierType.LR)) {
            LogisticClassifier logisticClassifier = (LogisticClassifier) classifier;
            count = logisticClassifier.probabilityOf(new RVFDatum(z ? getPhraseFeaturesForPattern(str, candidatePhrase) : getFeatures(str, candidatePhrase, counter, counter2), Boolean.TRUE.toString()));
        } else if (this.scoreClassifierType.equals(ClassifierType.SHIFTLR)) {
            count = ((MultinomialLogisticClassifier) classifier).probabilityOf(new BasicDatum((z ? getPhraseFeaturesForPattern(str, candidatePhrase) : getFeatures(str, candidatePhrase, counter, counter2)).keySet(), Boolean.FALSE.toString())).getCount(Boolean.TRUE.toString());
        } else {
            if (!this.scoreClassifierType.equals(ClassifierType.SVM) && !this.scoreClassifierType.equals(ClassifierType.RF) && !this.scoreClassifierType.equals(ClassifierType.LINEAR)) {
                throw new RuntimeException("cannot identify classifier " + this.scoreClassifierType);
            }
            count = classifier.scoresOf(new RVFDatum(z ? getPhraseFeaturesForPattern(str, candidatePhrase) : getFeatures(str, candidatePhrase, counter, counter2), Boolean.FALSE.toString())).getCount(Boolean.TRUE.toString());
        }
        this.learnedScores.setCount(candidatePhrase, count);
        return count;
    }

    Counter<ConstantsAndVariables.ScorePhraseMeasures> getFeatures(String str, CandidatePhrase candidatePhrase, Counter<E> counter, Counter<E> counter2) {
        if (this.phraseScoresRaw.containsFirstKey(candidatePhrase)) {
            return this.phraseScoresRaw.getCounter((TwoDimensionalCounter<CandidatePhrase, ConstantsAndVariables.ScorePhraseMeasures>) candidatePhrase);
        }
        ClassicCounter classicCounter = new ClassicCounter();
        if (candidatePhrase.getFeatures() != null) {
            classicCounter.addAll(Counters.transform(candidatePhrase.getFeatures(), str2 -> {
                return ConstantsAndVariables.ScorePhraseMeasures.create(str2);
            }));
        } else {
            Redwood.log(ConstantsAndVariables.extremedebug, "features are null for " + candidatePhrase);
        }
        if (this.constVars.usePhraseEvalPatWtByFreq) {
            classicCounter.setCount(ConstantsAndVariables.ScorePhraseMeasures.PATWTBYFREQ, getPatTFIDFScore(candidatePhrase, counter, counter2));
        }
        if (this.constVars.usePhraseEvalSemanticOdds) {
            classicCounter.setCount(ConstantsAndVariables.ScorePhraseMeasures.SEMANTICODDS, getDictOddsScore(candidatePhrase, str, 0.0d));
        }
        if (this.constVars.usePhraseEvalGoogleNgram) {
            Double valueOf = Double.valueOf(getGoogleNgramScore(candidatePhrase));
            if (valueOf.isInfinite() || valueOf.isNaN()) {
                throw new RuntimeException("how is the google ngrams score " + valueOf + " for " + candidatePhrase);
            }
            classicCounter.setCount(ConstantsAndVariables.ScorePhraseMeasures.GOOGLENGRAM, valueOf.doubleValue());
        }
        if (this.constVars.usePhraseEvalDomainNgram) {
            Double valueOf2 = Double.valueOf(getDomainNgramScore(candidatePhrase.getPhrase()));
            if (valueOf2.isInfinite() || valueOf2.isNaN()) {
                throw new RuntimeException("how is the domain ngrams score " + valueOf2 + " for " + candidatePhrase + " when domain raw freq is " + Data.domainNGramRawFreq.getCount(candidatePhrase) + " and raw freq is " + Data.rawFreq.getCount(candidatePhrase));
            }
            classicCounter.setCount(ConstantsAndVariables.ScorePhraseMeasures.DOMAINNGRAM, valueOf2.doubleValue());
        }
        if (this.constVars.usePhraseEvalWordClass) {
            Integer num = this.constVars.getWordClassClusters().get(candidatePhrase.getPhrase());
            if (num == null) {
                num = this.constVars.getWordClassClusters().get(candidatePhrase.getPhrase().toLowerCase());
            }
            classicCounter.setCount(ConstantsAndVariables.ScorePhraseMeasures.create(ConstantsAndVariables.ScorePhraseMeasures.DISTSIM.toString() + "-" + num), 1.0d);
        }
        if (this.constVars.usePhraseEvalWordVector) {
            Map<String, double[]> similarities = getSimilarities(candidatePhrase.getPhrase());
            if (similarities == null) {
                computeSimWithWordVectors(Arrays.asList(candidatePhrase), CollectionUtils.unionAsSet(this.constVars.getLearnedWords(str).keySet(), this.constVars.getSeedLabelDictionary().get(str)), getAllPossibleNegativePhrases(str), str);
                similarities = getSimilarities(candidatePhrase.getPhrase());
            }
            if (!$assertionsDisabled && similarities == null) {
                throw new AssertionError(" Why are there no similarities for " + candidatePhrase);
            }
            double d = similarities.get(str)[PhraseScorer.Similarities.AVGSIM.ordinal()];
            double d2 = similarities.get(str)[PhraseScorer.Similarities.MAXSIM.ordinal()];
            double d3 = 0.0d;
            double d4 = Double.MIN_VALUE;
            double d5 = 0.0d;
            for (Map.Entry<String, double[]> entry : similarities.entrySet()) {
                if (!entry.getKey().equals(str)) {
                    double d6 = entry.getValue()[PhraseScorer.Similarities.NUMITEMS.ordinal()];
                    d3 += entry.getValue()[PhraseScorer.Similarities.AVGSIM.ordinal()] * d6;
                    d5 += d6;
                    double d7 = entry.getValue()[PhraseScorer.Similarities.MAXSIM.ordinal()];
                    if (d4 < d7) {
                        d4 = d7;
                    }
                }
            }
            classicCounter.setCount(ConstantsAndVariables.ScorePhraseMeasures.WORDVECPOSSIMAVG, d);
            classicCounter.setCount(ConstantsAndVariables.ScorePhraseMeasures.WORDVECPOSSIMMAX, d2);
            classicCounter.setCount(ConstantsAndVariables.ScorePhraseMeasures.WORDVECNEGSIMAVG, d3 / d5);
            classicCounter.setCount(ConstantsAndVariables.ScorePhraseMeasures.WORDVECNEGSIMAVG, d4);
        }
        if (this.constVars.usePhraseEvalEditDistSame) {
            double editDistanceScoresThisClass = this.constVars.getEditDistanceScoresThisClass(str, candidatePhrase.getPhrase());
            if (!$assertionsDisabled && editDistanceScoresThisClass > 1.0d) {
                throw new AssertionError(" how come edit distance from the true class is " + editDistanceScoresThisClass + " for word " + candidatePhrase);
            }
            classicCounter.setCount(ConstantsAndVariables.ScorePhraseMeasures.EDITDISTSAME, editDistanceScoresThisClass);
        }
        if (this.constVars.usePhraseEvalEditDistOther) {
            double editDistanceScoresOtherClass = this.constVars.getEditDistanceScoresOtherClass(str, candidatePhrase.getPhrase());
            if (!$assertionsDisabled && editDistanceScoresOtherClass > 1.0d) {
                throw new AssertionError(" how come edit distance from the true class is " + editDistanceScoresOtherClass + " for word " + candidatePhrase);
            }
            classicCounter.setCount(ConstantsAndVariables.ScorePhraseMeasures.EDITDISTOTHER, editDistanceScoresOtherClass);
        }
        if (this.constVars.usePhraseEvalWordShape) {
            classicCounter.setCount(ConstantsAndVariables.ScorePhraseMeasures.WORDSHAPE, getWordShapeScore(candidatePhrase.getPhrase(), str));
        }
        if (this.constVars.usePhraseEvalWordShapeStr) {
            classicCounter.setCount(ConstantsAndVariables.ScorePhraseMeasures.create(ConstantsAndVariables.ScorePhraseMeasures.WORDSHAPESTR + "-" + wordShape(candidatePhrase.getPhrase())), 1.0d);
        }
        if (this.constVars.usePhraseEvalFirstCapital) {
            classicCounter.setCount(ConstantsAndVariables.ScorePhraseMeasures.ISFIRSTCAPITAL, StringUtils.isCapitalized(candidatePhrase.getPhrase()) ? 1.0d : 0.0d);
        }
        if (this.constVars.usePhraseEvalBOW) {
            for (String str3 : candidatePhrase.getPhrase().split("\\s+")) {
                classicCounter.setCount(ConstantsAndVariables.ScorePhraseMeasures.create(ConstantsAndVariables.ScorePhraseMeasures.BOW + "-" + str3), 1.0d);
            }
        }
        this.phraseScoresRaw.setCounter(candidatePhrase, classicCounter);
        return classicCounter;
    }

    static {
        $assertionsDisabled = !ScorePhrasesLearnFeatWt.class.desiredAssertionStatus();
        wordVectors = null;
        cacheSimilarities = new ConcurrentHashCounter();
        similaritiesWithLabeledPhrases = new ConcurrentHashMap();
    }
}
