package edu.stanford.nlp.international.arabic.pipeline;

import edu.stanford.nlp.international.arabic.pipeline.ATBArabicDataset;
import edu.stanford.nlp.trees.Tree;
import edu.stanford.nlp.trees.international.arabic.ATBTreeUtils;
import edu.stanford.nlp.trees.international.french.FrenchXMLTreeReader;
import edu.stanford.nlp.util.logging.Redwood;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.OutputStreamWriter;
import java.io.PrintWriter;
import java.io.UnsupportedEncodingException;
import java.util.regex.Pattern;

/* loaded from: input_file:edu/stanford/nlp/international/arabic/pipeline/LabeledATBDataset.class */
public class LabeledATBDataset extends ATBArabicDataset {
    private static Redwood.RedwoodChannels log = Redwood.channels(LabeledATBDataset.class);

    /* loaded from: input_file:edu/stanford/nlp/international/arabic/pipeline/LabeledATBDataset$LabelingTreeNormalizer.class */
    protected class LabelingTreeNormalizer extends ATBArabicDataset.ArabicRawTreeNormalizer {
        private final Pattern leftClitic;
        private final Pattern rightClitic;

        public LabelingTreeNormalizer(PrintWriter printWriter, PrintWriter printWriter2) {
            super(printWriter, printWriter2);
            this.leftClitic = Pattern.compile("^-");
            this.rightClitic = Pattern.compile("-$");
        }

        /* JADX INFO: Access modifiers changed from: protected */
        @Override // edu.stanford.nlp.international.arabic.pipeline.ATBArabicDataset.ArabicRawTreeNormalizer
        public void processPreterminal(Tree tree) {
            String value = tree.value();
            if (value.equals(FrenchXMLTreeReader.EMPTY_LEAF)) {
                return;
            }
            String trim = tree.firstChild().value().trim();
            boolean find = this.leftClitic.matcher(trim).find();
            boolean find2 = this.rightClitic.matcher(trim).find();
            if (value.equals(ATBTreeUtils.puncTag) || !(find2 || find)) {
                tree.firstChild().setValue("XSEG");
                return;
            }
            if (find2 && find) {
                tree.firstChild().setValue("SEGC");
            } else if (find2) {
                tree.firstChild().setValue("SEGL");
            } else {
                if (!find) {
                    throw new RuntimeException("Messy token: " + trim);
                }
                tree.firstChild().setValue("SEGR");
            }
        }
    }

    @Override // edu.stanford.nlp.international.arabic.pipeline.ATBArabicDataset, edu.stanford.nlp.trees.treebank.AbstractDataset, edu.stanford.nlp.trees.treebank.Dataset
    public void build() {
        for (File file : this.pathsToData) {
            int size = this.treebank.size();
            if (this.splitFilter == null) {
                this.treebank.loadPath(file, this.treeFileExtension, false);
            } else {
                this.treebank.loadPath(file, this.splitFilter);
            }
            this.toStringBuffer.append(String.format(" Loaded %d trees from %s\n", Integer.valueOf(this.treebank.size() - size), file.getPath()));
        }
        PrintWriter printWriter = null;
        PrintWriter printWriter2 = null;
        try {
            try {
                printWriter = new PrintWriter(new BufferedWriter(new OutputStreamWriter(new FileOutputStream(this.outFileName), "UTF-8")));
                printWriter2 = this.makeFlatFile ? new PrintWriter(new BufferedWriter(new OutputStreamWriter(new FileOutputStream(this.flatFileName), "UTF-8"))) : null;
                this.treebank.apply(new LabelingTreeNormalizer(printWriter, printWriter2));
                this.outputFileList.add(this.outFileName);
                if (this.makeFlatFile) {
                    this.outputFileList.add(this.flatFileName);
                    this.toStringBuffer.append(" Made flat files\n");
                }
                if (printWriter != null) {
                    printWriter.close();
                }
                if (printWriter2 != null) {
                    printWriter2.close();
                }
            } catch (FileNotFoundException e) {
                System.err.printf("%s: Could not open %s for writing\n", getClass().getName(), this.outFileName);
                if (printWriter != null) {
                    printWriter.close();
                }
                if (printWriter2 != null) {
                    printWriter2.close();
                }
            } catch (UnsupportedEncodingException e2) {
                System.err.printf("%s: Filesystem does not support UTF-8 output\n", getClass().getName());
                e2.printStackTrace();
                if (printWriter != null) {
                    printWriter.close();
                }
                if (printWriter2 != null) {
                    printWriter2.close();
                }
            }
        } catch (Throwable th) {
            if (printWriter != null) {
                printWriter.close();
            }
            if (printWriter2 != null) {
                printWriter2.close();
            }
            throw th;
        }
    }
}
