package edu.stanford.nlp.patterns;

import edu.stanford.nlp.patterns.Pattern;
import edu.stanford.nlp.process.WordShapeClassifier;
import edu.stanford.nlp.semgraph.semgrex.ssurgeon.AddDep;
import edu.stanford.nlp.stats.ClassicCounter;
import edu.stanford.nlp.stats.Counter;
import edu.stanford.nlp.stats.Counters;
import edu.stanford.nlp.stats.TwoDimensionalCounter;
import edu.stanford.nlp.util.ArgumentParser;
import edu.stanford.nlp.util.GoogleNGramsSQLBacked;
import edu.stanford.nlp.util.logging.Redwood;
import java.io.IOException;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;

/* loaded from: input_file:edu/stanford/nlp/patterns/PhraseScorer.class */
public abstract class PhraseScorer<E extends Pattern> {
    private static Redwood.RedwoodChannels log;
    ConstantsAndVariables constVars;
    double OOVExternalFeatWt = 0.5d;
    double OOVdictOdds = 1.0E-10d;
    double OOVDomainNgramScore = 1.0E-10d;
    double OOVGoogleNgramScore = 1.0E-10d;

    @ArgumentParser.Option(name = "usePatternWeights")
    public boolean usePatternWeights = true;

    @ArgumentParser.Option(name = "wordFreqNorm")
    Normalization wordFreqNorm = Normalization.valueOf("LOG");

    @ArgumentParser.Option(name = "useAvgInsteadofMinPhraseScoring")
    boolean useAvgInsteadofMinPhraseScoring = false;
    Counter<CandidatePhrase> learnedScores = new ClassicCounter();
    static final /* synthetic */ boolean $assertionsDisabled;

    /* loaded from: input_file:edu/stanford/nlp/patterns/PhraseScorer$Normalization.class */
    public enum Normalization {
        NONE,
        SQRT,
        LOG
    }

    /* loaded from: input_file:edu/stanford/nlp/patterns/PhraseScorer$Similarities.class */
    public enum Similarities {
        NUMITEMS,
        AVGSIM,
        MAXSIM
    }

    public PhraseScorer(ConstantsAndVariables constantsAndVariables) {
        this.constVars = constantsAndVariables;
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    public abstract Counter<CandidatePhrase> scorePhrases(String str, TwoDimensionalCounter<CandidatePhrase, E> twoDimensionalCounter, TwoDimensionalCounter<CandidatePhrase, E> twoDimensionalCounter2, Counter<E> counter, Set<CandidatePhrase> set, boolean z) throws IOException, ClassNotFoundException;

    /* JADX INFO: Access modifiers changed from: package-private */
    public Counter<CandidatePhrase> getLearnedScores() {
        return this.learnedScores;
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    public double getPatTFIDFScore(CandidatePhrase candidatePhrase, Counter<E> counter, Counter<E> counter2) {
        if (Data.processedDataFreq.getCount(candidatePhrase) == 0.0d) {
            Redwood.log(Redwood.WARN, "How come the processed corpus freq has count of " + candidatePhrase + " 0. The count in raw freq is " + Data.rawFreq.getCount(candidatePhrase) + " and the Data.rawFreq size is " + Data.rawFreq.size());
            return 0.0d;
        }
        double d = 0.0d;
        HashSet hashSet = new HashSet();
        for (Map.Entry<E, Double> entry : counter.entrySet()) {
            double d2 = 1.0d;
            if (this.usePatternWeights) {
                d2 = counter2.getCount(entry.getKey());
                if (d2 == 0.0d) {
                    Redwood.log(Redwood.FORCE, "Warning: Weight zero for " + entry.getKey() + ". May be pattern was removed when choosing other patterns (if subsumed by another pattern).");
                    hashSet.add(entry.getKey());
                }
            }
            d += d2;
        }
        Counters.removeKeys(counter, hashSet);
        return d / Data.processedDataFreq.getCount(candidatePhrase);
    }

    public static double getGoogleNgramScore(CandidatePhrase candidatePhrase) {
        double count = GoogleNGramsSQLBacked.getCount(candidatePhrase.getPhrase().toLowerCase()) + GoogleNGramsSQLBacked.getCount(candidatePhrase.getPhrase());
        if (count == -1.0d) {
            return 0.0d;
        }
        if (Data.rawFreq.containsKey(candidatePhrase)) {
            return (1.0d + (Data.rawFreq.getCount(candidatePhrase) * Math.sqrt(Data.ratioGoogleNgramFreqWithDataFreq))) / count;
        }
        return 1.0d;
    }

    public double getDomainNgramScore(String str) {
        String str2 = str;
        if (!Data.domainNGramRawFreq.containsKey(str2)) {
            str2 = str.replaceAll(AddDep.ATOM_DELIMITER, "");
        }
        if (Data.domainNGramRawFreq.containsKey(str2)) {
            str = str2;
        } else {
            str2 = str.replaceAll("-", "");
        }
        if (Data.domainNGramRawFreq.containsKey(str2)) {
            String str3 = str2;
            return (1.0d + (Data.rawFreq.getCount(str3) * Math.sqrt(Data.ratioDomainNgramFreqWithDataFreq))) / Data.domainNGramRawFreq.getCount(str3);
        }
        log.info("domain count 0 for " + str);
        return 0.0d;
    }

    public double getDistSimWtScore(String str, String str2) {
        Integer num = this.constVars.getWordClassClusters().get(str);
        if (num == null) {
            num = this.constVars.getWordClassClusters().get(str.toLowerCase());
        }
        if (num != null && this.constVars.distSimWeights.get(str2).containsKey(num)) {
            return this.constVars.distSimWeights.get(str2).getCount(num);
        }
        String[] split = str.split("\\s+");
        if (split.length < 2) {
            return this.OOVExternalFeatWt;
        }
        double d = 0.0d;
        double d2 = Double.MAX_VALUE;
        for (String str3 : split) {
            double d3 = this.OOVExternalFeatWt;
            Integer num2 = this.constVars.getWordClassClusters().get(str3);
            if (num == null) {
                num = this.constVars.getWordClassClusters().get(str3.toLowerCase());
            }
            if (num2 != null && this.constVars.distSimWeights.get(str2).containsKey(num2)) {
                d3 = this.constVars.distSimWeights.get(str2).getCount(num2);
            }
            if (d3 < d2) {
                d2 = d3;
            }
            d += d3;
        }
        return this.useAvgInsteadofMinPhraseScoring ? d / str.length() : d2;
    }

    public String wordShape(String str) {
        String str2 = this.constVars.getWordShapeCache().get(str);
        if (str2 == null) {
            str2 = WordShapeClassifier.wordShape(str, this.constVars.wordShaper);
            this.constVars.getWordShapeCache().put(str, str2);
        }
        return str2;
    }

    public double getWordShapeScore(String str, String str2) {
        String wordShape = wordShape(str);
        double d = 0.0d;
        double d2 = 0.0d;
        for (Map.Entry<String, Counter<String>> entry : this.constVars.getWordShapesForLabels().entrySet()) {
            if (entry.getKey().equals(str2)) {
                d = entry.getValue().getCount(wordShape);
            }
            d2 += entry.getValue().getCount(wordShape);
        }
        return d / (d2 + 1.0d);
    }

    public double getDictOddsScore(CandidatePhrase candidatePhrase, String str, double d) {
        Counter<CandidatePhrase> counter = this.constVars.dictOddsWeights.get(str);
        if ($assertionsDisabled || counter != null) {
            return counter.containsKey(candidatePhrase) ? counter.getCount(candidatePhrase) : getPhraseWeightFromWords(counter, candidatePhrase, d);
        }
        throw new AssertionError("dictOddsWordWeights is null for label " + str);
    }

    public double getPhraseWeightFromWords(Counter<CandidatePhrase> counter, CandidatePhrase candidatePhrase, double d) {
        String[] split = candidatePhrase.getPhrase().split("\\s+");
        if (split.length < 2) {
            return counter.containsKey(candidatePhrase) ? counter.getCount(candidatePhrase) : d;
        }
        double d2 = 0.0d;
        double d3 = Double.MAX_VALUE;
        for (String str : split) {
            double d4 = d;
            if (counter.containsKey(CandidatePhrase.createOrGet(str))) {
                d4 = counter.getCount(str);
            }
            if (d4 < d3) {
                d3 = d4;
            }
            d2 += d4;
        }
        return this.useAvgInsteadofMinPhraseScoring ? d2 / candidatePhrase.getPhrase().length() : d3;
    }

    public abstract Counter<CandidatePhrase> scorePhrases(String str, Set<CandidatePhrase> set, boolean z) throws IOException, ClassNotFoundException;

    public abstract void printReasonForChoosing(Counter<CandidatePhrase> counter);

    static {
        $assertionsDisabled = !PhraseScorer.class.desiredAssertionStatus();
        log = Redwood.channels(PhraseScorer.class);
    }
}
