package edu.stanford.nlp.parser.eval;

import edu.stanford.nlp.international.Language;
import edu.stanford.nlp.international.arabic.ArabicMorphoFeatureSpecification;
import edu.stanford.nlp.international.french.FrenchMorphoFeatureSpecification;
import edu.stanford.nlp.international.morph.MorphoFeatureSpecification;
import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.ling.Label;
import edu.stanford.nlp.parser.lexparser.TreebankLangParserParams;
import edu.stanford.nlp.quoteattribution.Sieves.MSSieves.BaselineTopSpeakerSieve;
import edu.stanford.nlp.semgraph.semgrex.ssurgeon.AddDep;
import edu.stanford.nlp.stats.ClassicCounter;
import edu.stanford.nlp.stats.TwoDimensionalIntCounter;
import edu.stanford.nlp.trees.DiskTreebank;
import edu.stanford.nlp.trees.Tree;
import edu.stanford.nlp.util.Generics;
import edu.stanford.nlp.util.Pair;
import edu.stanford.nlp.util.logging.Redwood;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;

/* loaded from: input_file:edu/stanford/nlp/parser/eval/TreebankFactoredLexiconStats.class */
public class TreebankFactoredLexiconStats {
    private static Redwood.RedwoodChannels log;
    static final /* synthetic */ boolean $assertionsDisabled;

    public static void main(String[] strArr) {
        if (strArr.length != 3) {
            System.err.printf("Usage: java %s language filename features%n", TreebankFactoredLexiconStats.class.getName());
            System.exit(-1);
        }
        Language valueOf = Language.valueOf(strArr[0]);
        TreebankLangParserParams treebankLangParserParams = valueOf.params;
        if (valueOf.equals(Language.Arabic)) {
            treebankLangParserParams.setOptionFlag(new String[]{"-arabicFactored"}, 0);
        } else {
            treebankLangParserParams.setOptionFlag(new String[]{"-frenchFactored"}, 0);
        }
        DiskTreebank diskTreebank = treebankLangParserParams.diskTreebank();
        diskTreebank.loadPath(strArr[1]);
        MorphoFeatureSpecification arabicMorphoFeatureSpecification = valueOf.equals(Language.Arabic) ? new ArabicMorphoFeatureSpecification() : new FrenchMorphoFeatureSpecification();
        for (String str : strArr[2].trim().split(",")) {
            arabicMorphoFeatureSpecification.activate(MorphoFeatureSpecification.MorphoFeatureType.valueOf(str));
        }
        ClassicCounter classicCounter = new ClassicCounter(30000);
        ClassicCounter classicCounter2 = new ClassicCounter(BaselineTopSpeakerSieve.FORWARD_WINDOW);
        ClassicCounter classicCounter3 = new ClassicCounter(BaselineTopSpeakerSieve.FORWARD_WINDOW);
        ClassicCounter classicCounter4 = new ClassicCounter(30000);
        ClassicCounter classicCounter5 = new ClassicCounter(300);
        ClassicCounter classicCounter6 = new ClassicCounter(25000);
        ClassicCounter classicCounter7 = new ClassicCounter(25000);
        ClassicCounter classicCounter8 = new ClassicCounter(1000);
        ClassicCounter classicCounter9 = new ClassicCounter(BaselineTopSpeakerSieve.FORWARD_WINDOW);
        ClassicCounter classicCounter10 = new ClassicCounter(BaselineTopSpeakerSieve.FORWARD_WINDOW);
        Map newHashMap = Generics.newHashMap();
        TwoDimensionalIntCounter twoDimensionalIntCounter = new TwoDimensionalIntCounter(30000);
        TwoDimensionalIntCounter twoDimensionalIntCounter2 = new TwoDimensionalIntCounter(BaselineTopSpeakerSieve.FORWARD_WINDOW);
        TwoDimensionalIntCounter twoDimensionalIntCounter3 = new TwoDimensionalIntCounter(300);
        int i = 0;
        Iterator<Tree> it = diskTreebank.iterator();
        while (it.hasNext()) {
            Tree next = it.next();
            Iterator<Tree> it2 = next.iterator();
            while (it2.hasNext()) {
                Tree next2 = it2.next();
                if (!next2.isLeaf()) {
                    treebankLangParserParams.transformTree(next2, next);
                }
            }
            List<Label> preTerminalYield = next.preTerminalYield();
            ArrayList<Label> yield = next.yield();
            if (!$assertionsDisabled && yield.size() != preTerminalYield.size()) {
                throw new AssertionError();
            }
            int size = yield.size();
            for (int i2 = 0; i2 < size; i2++) {
                String value = preTerminalYield.get(i2).value();
                String value2 = yield.get(i2).value();
                String originalText = ((CoreLabel) yield.get(i2)).originalText();
                Pair<String, String> splitMorphString = MorphoFeatureSpecification.splitMorphString(value2, originalText);
                String first = splitMorphString.first();
                String second = splitMorphString.second();
                if (value.contains("MW")) {
                    first = first + "-MWE";
                }
                classicCounter6.incrementCount(first);
                classicCounter7.incrementCount(first + value);
                classicCounter8.incrementCount(second);
                String morphoFeatures = arabicMorphoFeatureSpecification.strToFeatures(second).toString();
                classicCounter9.incrementCount(morphoFeatures);
                classicCounter10.incrementCount(morphoFeatures + first);
                classicCounter.incrementCount(value2 + value);
                classicCounter2.incrementCount(originalText + value);
                classicCounter3.incrementCount(originalText);
                classicCounter4.incrementCount(value2);
                classicCounter5.incrementCount(value);
                String str2 = morphoFeatures.equals("") ? "NONE" : morphoFeatures;
                if (newHashMap.containsKey(value2)) {
                    ((Set) newHashMap.get(value2)).add(first);
                } else {
                    newHashMap.put(value2, Generics.newHashSet(1));
                }
                twoDimensionalIntCounter.incrementCount(first, str2);
                twoDimensionalIntCounter2.incrementCount(first + str2, value);
                twoDimensionalIntCounter3.incrementCount(value, str2);
            }
            i++;
        }
        System.out.println("Language: " + valueOf.toString());
        System.out.printf("#trees:\t%d%n", Integer.valueOf(i));
        System.out.printf("#tokens:\t%d%n", Integer.valueOf((int) classicCounter4.totalCount()));
        System.out.printf("#words:\t%d%n", Integer.valueOf(classicCounter4.keySet().size()));
        System.out.printf("#tags:\t%d%n", Integer.valueOf(classicCounter5.keySet().size()));
        System.out.printf("#wordTagPairs:\t%d%n", Integer.valueOf(classicCounter.keySet().size()));
        System.out.printf("#lemmas:\t%d%n", Integer.valueOf(classicCounter6.keySet().size()));
        System.out.printf("#lemmaTagPairs:\t%d%n", Integer.valueOf(classicCounter7.keySet().size()));
        System.out.printf("#feattags:\t%d%n", Integer.valueOf(classicCounter9.keySet().size()));
        System.out.printf("#feattag+lemmas:\t%d%n", Integer.valueOf(classicCounter10.keySet().size()));
        System.out.printf("#richtags:\t%d%n", Integer.valueOf(classicCounter8.keySet().size()));
        System.out.printf("#richtag+lemma:\t%d%n", Integer.valueOf(classicCounter3.keySet().size()));
        System.out.printf("#richtag+lemmaTagPairs:\t%d%n", Integer.valueOf(classicCounter2.keySet().size()));
        System.out.println("==================");
        StringBuilder sb = new StringBuilder();
        StringBuilder sb2 = new StringBuilder();
        for (Map.Entry entry : newHashMap.entrySet()) {
            String str3 = (String) entry.getKey();
            Set set = (Set) entry.getValue();
            if (set.size() == 0) {
                sb.append("NO LEMMAS FOR WORD: " + str3 + "\n");
            } else if (set.size() > 1) {
                sb2.append("MULTIPLE LEMMAS: " + str3 + AddDep.ATOM_DELIMITER + setToString(set) + "\n");
            } else {
                String str4 = (String) set.iterator().next();
                Set<String> keySet = twoDimensionalIntCounter.getCounter(str4).keySet();
                if (keySet.size() > 1) {
                    System.out.printf("%s --> %s%n", str3, str4);
                    for (String str5 : keySet) {
                        System.out.printf("\t%s\t%d\t%s%n", str5, Integer.valueOf(twoDimensionalIntCounter.getCount(str4, str5)), setToString(twoDimensionalIntCounter2.getCounter(str4 + str5).keySet()));
                    }
                    System.out.println();
                }
            }
        }
        System.out.println("==================");
        System.out.println(sb.toString());
        System.out.println(sb2.toString());
        System.out.println("==================");
        ArrayList<String> arrayList = new ArrayList(twoDimensionalIntCounter3.firstKeySet());
        Collections.sort(arrayList);
        for (String str6 : arrayList) {
            System.out.println(str6);
            for (String str7 : twoDimensionalIntCounter3.getCounter(str6).keySet()) {
                System.out.printf("\t%s\t%d%n", str7, Integer.valueOf(twoDimensionalIntCounter3.getCount(str6, str7)));
            }
            System.out.println();
        }
        System.out.println("==================");
    }

    private static String setToString(Set<String> set) {
        StringBuilder sb = new StringBuilder();
        sb.append("[");
        Iterator<String> it = set.iterator();
        while (it.hasNext()) {
            sb.append(it.next()).append(AddDep.ATOM_DELIMITER);
        }
        sb.append("]");
        return sb.toString();
    }

    static {
        $assertionsDisabled = !TreebankFactoredLexiconStats.class.desiredAssertionStatus();
        log = Redwood.channels(TreebankFactoredLexiconStats.class);
    }
}
