package edu.stanford.nlp.tagger.util;

import edu.stanford.nlp.ling.TaggedWord;
import edu.stanford.nlp.semgraph.semgrex.ssurgeon.AddDep;
import edu.stanford.nlp.tagger.io.TaggedFileReader;
import edu.stanford.nlp.tagger.io.TaggedFileRecord;
import edu.stanford.nlp.util.Generics;
import edu.stanford.nlp.util.StringUtils;
import edu.stanford.nlp.util.logging.Redwood;
import java.io.IOException;
import java.io.OutputStream;
import java.io.PrintStream;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import java.util.Set;
import java.util.TreeSet;

/* loaded from: input_file:edu/stanford/nlp/tagger/util/CountClosedTags.class */
public class CountClosedTags {
    Set<String> closedTags;
    Map<String, Set<String>> trainingWords = Generics.newHashMap();
    Map<String, Set<String>> allWords = Generics.newHashMap();
    static final double DEFAULT_TRAINING_RATIO = 0.6666666666666666d;
    final double trainingRatio;
    final boolean printWords;
    private static final String tagSeparator = "_";
    private static Redwood.RedwoodChannels log = Redwood.channels(CountClosedTags.class);
    public static final String TEST_FILE_PROPERTY = "testFile";
    public static final String TRAIN_FILE_PROPERTY = "trainFile";
    public static final String CLOSED_TAGS_PROPERTY = "closedTags";
    public static final String TRAINING_RATIO_PROPERTY = "trainingRatio";
    public static final String PRINT_WORDS_PROPERTY = "printWords";
    private static final Set<String> knownArgs = Generics.newHashSet(Arrays.asList(TEST_FILE_PROPERTY, TRAIN_FILE_PROPERTY, CLOSED_TAGS_PROPERTY, TRAINING_RATIO_PROPERTY, PRINT_WORDS_PROPERTY, "encoding", "tagSeparator"));

    private CountClosedTags(Properties properties) {
        String property = properties.getProperty(CLOSED_TAGS_PROPERTY);
        if (property != null) {
            this.closedTags = new TreeSet();
            Collections.addAll(this.closedTags, property.split("\\s+"));
        } else {
            this.closedTags = null;
        }
        if (properties.containsKey(TRAINING_RATIO_PROPERTY)) {
            this.trainingRatio = Double.valueOf(properties.getProperty(TRAINING_RATIO_PROPERTY)).doubleValue();
        } else {
            this.trainingRatio = DEFAULT_TRAINING_RATIO;
        }
        this.printWords = Boolean.valueOf(properties.getProperty(PRINT_WORDS_PROPERTY, "false")).booleanValue();
    }

    private static int countSentences(TaggedFileRecord taggedFileRecord) throws IOException {
        int i = 0;
        for (List<TaggedWord> list : taggedFileRecord.reader()) {
            i++;
        }
        return i;
    }

    void addTaggedWords(List<TaggedWord> list, Map<String, Set<String>> map) {
        for (TaggedWord taggedWord : list) {
            String word = taggedWord.word();
            String tag = taggedWord.tag();
            if (this.closedTags == null || this.closedTags.contains(tag)) {
                if (!map.containsKey(tag)) {
                    map.put(tag, new TreeSet());
                }
                map.get(tag).add(word);
            }
        }
    }

    void countTrainingTags(TaggedFileRecord taggedFileRecord) throws IOException {
        int countSentences = (int) (countSentences(taggedFileRecord) * this.trainingRatio);
        TaggedFileReader reader = taggedFileRecord.reader();
        for (int i = 0; i < countSentences && reader.hasNext(); i++) {
            List<TaggedWord> next = reader.next();
            addTaggedWords(next, this.trainingWords);
            addTaggedWords(next, this.allWords);
        }
        while (reader.hasNext()) {
            addTaggedWords(reader.next(), this.allWords);
        }
    }

    void countTestTags(TaggedFileRecord taggedFileRecord) throws IOException {
        Iterator<List<TaggedWord>> it = taggedFileRecord.reader().iterator();
        while (it.hasNext()) {
            addTaggedWords(it.next(), this.allWords);
        }
    }

    void report() {
        ArrayList arrayList = new ArrayList();
        TreeSet<String> treeSet = new TreeSet();
        treeSet.addAll(this.allWords.keySet());
        treeSet.addAll(this.trainingWords.keySet());
        if (this.closedTags != null) {
            treeSet.addAll(this.closedTags);
        }
        for (String str : treeSet) {
            int size = this.trainingWords.containsKey(str) ? this.trainingWords.get(str).size() : 0;
            int size2 = this.allWords.containsKey(str) ? this.allWords.get(str).size() : 0;
            if (size == size2 && size > 0) {
                arrayList.add(str);
            }
            System.out.println(str + AddDep.ATOM_DELIMITER + size + AddDep.ATOM_DELIMITER + size2);
            if (this.printWords) {
                Set<String> set = this.trainingWords.get(str);
                if (set == null) {
                    set = Collections.emptySet();
                }
                Set<String> set2 = this.allWords.get(str);
                Iterator<String> it = set.iterator();
                while (it.hasNext()) {
                    System.out.print(AddDep.ATOM_DELIMITER + it.next());
                }
                if (set.size() < set2.size()) {
                    System.out.println();
                    System.out.print(" *");
                    for (String str2 : this.allWords.get(str)) {
                        if (!set.contains(str2)) {
                            System.out.print(AddDep.ATOM_DELIMITER + str2);
                        }
                    }
                }
                System.out.println();
            }
        }
        System.out.println(arrayList);
    }

    private static void help(String str) {
        if (str != null && !str.equals("")) {
            log.info(str);
        }
        System.exit(2);
    }

    private static void checkArgs(Properties properties) {
        if (!properties.containsKey(TRAIN_FILE_PROPERTY)) {
            help("No trainFile specified");
        }
        for (String str : properties.stringPropertyNames()) {
            if (!knownArgs.contains(str)) {
                help("Unknown arg " + str);
            }
        }
    }

    public static void main(String[] strArr) throws Exception {
        System.setOut(new PrintStream((OutputStream) System.out, true, "UTF-8"));
        System.setErr(new PrintStream((OutputStream) System.err, true, "UTF-8"));
        Properties argsToProperties = StringUtils.argsToProperties(strArr);
        checkArgs(argsToProperties);
        CountClosedTags countClosedTags = new CountClosedTags(argsToProperties);
        String property = argsToProperties.getProperty(TRAIN_FILE_PROPERTY);
        String property2 = argsToProperties.getProperty(TEST_FILE_PROPERTY);
        Iterator<TaggedFileRecord> it = TaggedFileRecord.createRecords(argsToProperties, property).iterator();
        while (it.hasNext()) {
            countClosedTags.countTrainingTags(it.next());
        }
        if (property2 != null) {
            Iterator<TaggedFileRecord> it2 = TaggedFileRecord.createRecords(argsToProperties, property2).iterator();
            while (it2.hasNext()) {
                countClosedTags.countTestTags(it2.next());
            }
        }
        countClosedTags.report();
    }
}
