package edu.stanford.nlp.parser.lexparser;

import edu.stanford.nlp.ie.ChineseMorphFeatureSets;
import edu.stanford.nlp.ling.TaggedWord;
import edu.stanford.nlp.stats.IntCounter;
import edu.stanford.nlp.trees.Tree;
import edu.stanford.nlp.trees.international.pennchinese.RadicalMap;
import edu.stanford.nlp.util.Pair;
import edu.stanford.nlp.util.logging.Redwood;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;

/* loaded from: input_file:edu/stanford/nlp/parser/lexparser/ChineseWordFeatureExtractor.class */
public class ChineseWordFeatureExtractor implements WordFeatureExtractor {
    private static Redwood.RedwoodChannels log = Redwood.channels(ChineseWordFeatureExtractor.class);
    private static final long serialVersionUID = -4327267414095852504L;
    boolean morpho;
    boolean chars;
    boolean rads;
    boolean useLength;
    boolean useFreq;
    boolean bigrams;
    boolean conjunctions;
    boolean mildConjunctions;
    private static final String featureDir = "gbfeatures";
    private Collection<String> threshedFeatures;
    public boolean turnOffWordFeatures = false;
    private ChineseMorphFeatureSets cmfs = null;
    private IntCounter wordCounter = new IntCounter();

    /* JADX WARN: Failed to find 'out' block for switch in B:2:0x0029. Please report as an issue. */
    @Override // edu.stanford.nlp.parser.lexparser.WordFeatureExtractor
    public void setFeatureLevel(int i) {
        this.morpho = false;
        this.chars = false;
        this.rads = false;
        this.useLength = false;
        this.useFreq = false;
        this.bigrams = false;
        this.conjunctions = false;
        this.mildConjunctions = false;
        switch (i) {
            case 3:
                this.bigrams = true;
                this.conjunctions = true;
            case 2:
                this.chars = true;
            case 1:
                this.morpho = true;
                this.mildConjunctions = true;
                loadFeatures();
            case 0:
                this.rads = true;
            case -1:
                this.useLength = true;
                this.useFreq = true;
                return;
            default:
                log.info("Feature level " + i + " is not supported in ChineseWordFeatureExtractor.");
                log.info("Using level 0");
                setFeatureLevel(0);
                return;
        }
    }

    public ChineseWordFeatureExtractor(int i) {
        setFeatureLevel(i);
    }

    public void train(Collection<Tree> collection) {
        train(collection, 1.0d);
    }

    public void train(Collection<Tree> collection, double d) {
        Iterator<Tree> it = collection.iterator();
        while (it.hasNext()) {
            train(it.next(), d);
        }
    }

    public void train(Tree tree, double d) {
        train((List<TaggedWord>) tree.taggedYield(), d);
    }

    public void train(List<TaggedWord> list, double d) {
        Iterator<TaggedWord> it = list.iterator();
        while (it.hasNext()) {
            this.wordCounter.incrementCount((IntCounter) it.next().word(), d);
        }
    }

    private void loadFeatures() {
        if (this.cmfs != null) {
            return;
        }
        this.cmfs = new ChineseMorphFeatureSets(featureDir);
        log.info("Total affix features: " + this.cmfs.getAffixFeatures().size());
    }

    @Override // edu.stanford.nlp.parser.lexparser.WordFeatureExtractor
    public void applyFeatureCountThreshold(Collection<String> collection, int i) {
        IntCounter intCounter = new IntCounter();
        Iterator<String> it = collection.iterator();
        while (it.hasNext()) {
            Iterator<String> it2 = makeFeatures(it.next()).iterator();
            while (it2.hasNext()) {
                intCounter.incrementCount(it2.next());
            }
        }
        this.threshedFeatures = intCounter.keysAbove(i);
        log.info((intCounter.size() - this.threshedFeatures.size()) + " word features removed due to thresholding.");
    }

    @Override // edu.stanford.nlp.parser.lexparser.WordFeatureExtractor
    public Collection<String> makeFeatures(String str) {
        ArrayList arrayList = new ArrayList();
        if (this.morpho) {
            for (Map.Entry<String, Set<Character>> entry : this.cmfs.getSingletonFeatures().entrySet()) {
                if (entry.getValue().contains(Character.valueOf(str.charAt(0)))) {
                    arrayList.add(entry.getKey() + "-1");
                }
            }
            for (Map.Entry<String, Pair<Set<Character>, Set<Character>>> entry2 : this.cmfs.getAffixFeatures().entrySet()) {
                boolean z = false;
                if (entry2.getValue().first().contains(Character.valueOf(str.charAt(0)))) {
                    arrayList.add(entry2.getKey() + "-P");
                    z = true;
                }
                if (entry2.getValue().second().contains(Character.valueOf(str.charAt(str.length() - 1)))) {
                    arrayList.add(entry2.getKey() + "-S");
                } else {
                    z = false;
                }
                if (z && this.mildConjunctions && !this.conjunctions) {
                    arrayList.add(entry2.getKey() + "-PS");
                }
            }
            if (this.conjunctions) {
                int size = arrayList.size();
                for (int i = 1; i < size; i++) {
                    String str2 = (String) arrayList.get(i);
                    for (int i2 = 0; i2 < i; i2++) {
                        arrayList.add(str2 + "&&" + ((String) arrayList.get(i2)));
                    }
                }
            }
        }
        if (!this.turnOffWordFeatures) {
            arrayList.add(str + "-W");
        }
        if (this.rads) {
            arrayList.add(RadicalMap.getRadical(str.charAt(0)) + "-FR");
            arrayList.add(RadicalMap.getRadical(str.charAt(str.length() - 1)) + "-LR");
            for (int i3 = 0; i3 < str.length(); i3++) {
                arrayList.add(RadicalMap.getRadical(str.charAt(i3)) + "-CR");
            }
        }
        if (this.chars) {
            arrayList.add(str.charAt(0) + "-FC");
            arrayList.add(str.charAt(str.length() - 1) + "-LC");
            for (int i4 = 0; i4 < str.length(); i4++) {
                arrayList.add(str.charAt(i4) + "-CC");
            }
            if (this.bigrams && str.length() > 1) {
                arrayList.add(str.substring(0, 2) + "-FB");
                arrayList.add(str.substring(str.length() - 2) + "-LB");
                for (int i5 = 2; i5 <= str.length(); i5++) {
                    arrayList.add(str.substring(i5 - 2, i5) + "-CB");
                }
            }
        }
        if (this.useLength) {
            int length = str.length();
            if (length >= 5 && length >= 8) {
            }
            arrayList.add(str.length() + "-L");
        }
        if (this.useFreq && !this.turnOffWordFeatures) {
            int intCount = this.wordCounter.getIntCount(str);
            arrayList.add((intCount <= 1 ? 0 : intCount <= 3 ? 1 : intCount <= 6 ? 2 : intCount <= 15 ? 3 : intCount <= 50 ? 4 : 5) + "-FQ");
        }
        arrayList.add("PR");
        if (this.threshedFeatures != null) {
            Iterator it = arrayList.iterator();
            while (it.hasNext()) {
                if (!this.threshedFeatures.contains((String) it.next())) {
                    it.remove();
                }
            }
        }
        return arrayList;
    }
}
