package edu.stanford.nlp.international.arabic.pipeline;

import edu.stanford.nlp.international.arabic.pipeline.ATBArabicDataset;
import edu.stanford.nlp.ling.tokensregex.types.Expressions;
import edu.stanford.nlp.time.SUTime;
import edu.stanford.nlp.trees.LabeledScoredTreeFactory;
import edu.stanford.nlp.trees.Tree;
import edu.stanford.nlp.trees.international.arabic.ATBTreeUtils;
import edu.stanford.nlp.trees.treebank.ConfigParser;
import edu.stanford.nlp.util.Generics;
import edu.stanford.nlp.util.logging.Redwood;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.OutputStreamWriter;
import java.io.PrintWriter;
import java.io.UnsupportedEncodingException;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.Map;

/* loaded from: input_file:edu/stanford/nlp/international/arabic/pipeline/DecimatedArabicDataset.class */
public class DecimatedArabicDataset extends ATBArabicDataset {
    private static Redwood.RedwoodChannels log = Redwood.channels(DecimatedArabicDataset.class);
    private boolean taggedOutput = false;
    private String wordTagDelim = Expressions.VAR_SELF;

    /* loaded from: input_file:edu/stanford/nlp/international/arabic/pipeline/DecimatedArabicDataset$ArabicTreeDecimatedNormalizer.class */
    public class ArabicTreeDecimatedNormalizer extends ATBArabicDataset.ArabicRawTreeNormalizer {
        private int treesVisited;
        private final String trainExtension = ".train";
        private final String testExtension = ".test";
        private final String devExtension = ".dev";
        private final String flatExtension = ".flat";
        private boolean makeFlatFile;
        private boolean taggedOutput;
        private Map<String, String> outFilenames;
        private Map<String, PrintWriter> outFiles;

        public ArabicTreeDecimatedNormalizer(String str, boolean z, boolean z2) {
            super(null, null);
            this.treesVisited = 0;
            this.trainExtension = ".train";
            this.testExtension = ".test";
            this.devExtension = ".dev";
            this.flatExtension = ".flat";
            this.makeFlatFile = false;
            this.taggedOutput = false;
            this.makeFlatFile = z;
            this.taggedOutput = z2;
            this.outFilenames = Generics.newHashMap();
            this.outFilenames.put(".train", str + ".train");
            this.outFilenames.put(".test", str + ".test");
            this.outFilenames.put(".dev", str + ".dev");
            if (this.makeFlatFile) {
                this.outFilenames.put(".train.flat", str + ".train.flat");
                this.outFilenames.put(".test.flat", str + ".test.flat");
                this.outFilenames.put(".dev.flat", str + ".dev.flat");
            }
            setupOutputFiles();
        }

        private void setupOutputFiles() {
            String str = "";
            try {
                this.outFiles = Generics.newHashMap();
                for (String str2 : this.outFilenames.keySet()) {
                    str = this.outFilenames.get(str2);
                    if (this.makeFlatFile || !str.contains(".flat")) {
                        this.outFiles.put(str2, new PrintWriter(new BufferedWriter(new OutputStreamWriter(new FileOutputStream(str), "UTF-8"))));
                    }
                }
            } catch (FileNotFoundException e) {
                System.err.printf("%s: Could not open %s for writing\n", getClass().getName(), str);
            } catch (UnsupportedEncodingException e2) {
                System.err.printf("%s: Filesystem does not support UTF-8 output\n", getClass().getName());
                e2.printStackTrace();
            }
        }

        public void closeOutputFiles() {
            Iterator<String> it = this.outFiles.keySet().iterator();
            while (it.hasNext()) {
                this.outFiles.get(it.next()).close();
            }
        }

        @Override // edu.stanford.nlp.international.arabic.pipeline.ATBArabicDataset.ArabicRawTreeNormalizer, edu.stanford.nlp.trees.TreeVisitor
        public void visitTree(Tree tree) {
            if (tree == null || tree.value().equals(SUTime.PAD_FIELD_UNKNOWN)) {
                return;
            }
            Tree prune = tree.prune(this.nullFilter, new LabeledScoredTreeFactory());
            Iterator<Tree> it = prune.iterator();
            while (it.hasNext()) {
                Tree next = it.next();
                if (next.isPreTerminal()) {
                    processPreterminal(next);
                }
            }
            this.treesVisited++;
            String flattenTree = this.makeFlatFile ? ATBTreeUtils.flattenTree(prune) : null;
            if (this.treesVisited % 9 == 0) {
                write(prune, this.outFiles.get(".dev"));
                if (this.makeFlatFile) {
                    this.outFiles.get(".dev.flat").println(flattenTree);
                    return;
                }
                return;
            }
            if (this.treesVisited % 10 == 0) {
                write(prune, this.outFiles.get(".test"));
                if (this.makeFlatFile) {
                    this.outFiles.get(".test.flat").println(flattenTree);
                    return;
                }
                return;
            }
            write(prune, this.outFiles.get(".train"));
            if (this.makeFlatFile) {
                this.outFiles.get(".train.flat").println(flattenTree);
            }
        }

        private void write(Tree tree, PrintWriter printWriter) {
            if (this.taggedOutput) {
                printWriter.println(ATBTreeUtils.taggedStringFromTree(tree, DecimatedArabicDataset.this.removeEscapeTokens, DecimatedArabicDataset.this.wordTagDelim));
            } else {
                tree.pennPrint(printWriter);
            }
        }

        public List<String> getFilenames() {
            ArrayList arrayList = new ArrayList();
            Iterator<String> it = this.outFilenames.keySet().iterator();
            while (it.hasNext()) {
                arrayList.add(this.outFilenames.get(it.next()));
            }
            return arrayList;
        }

        @Override // edu.stanford.nlp.international.arabic.pipeline.ATBArabicDataset.ArabicRawTreeNormalizer
        public /* bridge */ /* synthetic */ Tree arabicAoverAFilter(Tree tree) {
            return super.arabicAoverAFilter(tree);
        }
    }

    @Override // edu.stanford.nlp.international.arabic.pipeline.ATBArabicDataset, edu.stanford.nlp.trees.treebank.AbstractDataset, edu.stanford.nlp.trees.treebank.Dataset
    public void build() {
        if (this.options.containsKey(ConfigParser.paramSplit)) {
            System.err.printf("%s: Ignoring split parameter for this dataset type\n", getClass().getName());
        } else if (this.options.containsKey(ConfigParser.paramTagDelim)) {
            this.wordTagDelim = this.options.getProperty(ConfigParser.paramTagDelim);
            this.taggedOutput = true;
        }
        for (File file : this.pathsToData) {
            int size = this.treebank.size();
            this.treebank.loadPath(file, this.treeFileExtension, false);
            this.toStringBuffer.append(String.format(" Loaded %d trees from %s\n", Integer.valueOf(this.treebank.size() - size), file.getPath()));
            this.treebank.size();
        }
        ArabicTreeDecimatedNormalizer arabicTreeDecimatedNormalizer = new ArabicTreeDecimatedNormalizer(this.outFileName, this.makeFlatFile, this.taggedOutput);
        this.treebank.apply(arabicTreeDecimatedNormalizer);
        this.outputFileList.addAll(arabicTreeDecimatedNormalizer.getFilenames());
        arabicTreeDecimatedNormalizer.closeOutputFiles();
    }
}
