package edu.stanford.nlp.international.arabic.parsesegment;

import edu.stanford.nlp.ling.CategoryWordTagFactory;
import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.ling.HasWord;
import edu.stanford.nlp.parser.lexparser.ArabicTreebankParserParams;
import edu.stanford.nlp.parser.lexparser.BiLexPCFGParser;
import edu.stanford.nlp.parser.lexparser.BinaryGrammar;
import edu.stanford.nlp.parser.lexparser.BinaryGrammarExtractor;
import edu.stanford.nlp.parser.lexparser.Debinarizer;
import edu.stanford.nlp.parser.lexparser.DependencyGrammar;
import edu.stanford.nlp.parser.lexparser.Edge;
import edu.stanford.nlp.parser.lexparser.ExhaustiveDependencyParser;
import edu.stanford.nlp.parser.lexparser.ExhaustivePCFGParser;
import edu.stanford.nlp.parser.lexparser.Extractor;
import edu.stanford.nlp.parser.lexparser.Hook;
import edu.stanford.nlp.parser.lexparser.Item;
import edu.stanford.nlp.parser.lexparser.Lattice;
import edu.stanford.nlp.parser.lexparser.LatticeScorer;
import edu.stanford.nlp.parser.lexparser.LatticeXMLReader;
import edu.stanford.nlp.parser.lexparser.LexicalizedParser;
import edu.stanford.nlp.parser.lexparser.Lexicon;
import edu.stanford.nlp.parser.lexparser.Options;
import edu.stanford.nlp.parser.lexparser.ParentAnnotationStats;
import edu.stanford.nlp.parser.lexparser.TreeAnnotatorAndBinarizer;
import edu.stanford.nlp.parser.lexparser.TreebankLangParserParams;
import edu.stanford.nlp.parser.lexparser.UnaryGrammar;
import edu.stanford.nlp.trees.DiskTreebank;
import edu.stanford.nlp.trees.Tree;
import edu.stanford.nlp.trees.TreePrint;
import edu.stanford.nlp.trees.TreeTransformer;
import edu.stanford.nlp.trees.Treebank;
import edu.stanford.nlp.trees.TreebankLanguagePack;
import edu.stanford.nlp.util.HashIndex;
import edu.stanford.nlp.util.Pair;
import edu.stanford.nlp.util.Timing;
import edu.stanford.nlp.util.logging.Redwood;
import java.io.File;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;

/* loaded from: input_file:edu/stanford/nlp/international/arabic/parsesegment/JointParsingModel.class */
public class JointParsingModel {
    private static Redwood.RedwoodChannels log = Redwood.channels(JointParsingModel.class);
    private static ExhaustivePCFGParser pparser;
    private static ExhaustiveDependencyParser dparser;
    private BiLexPCFGParser bparser;
    private Options op;
    private LexicalizedParser lp;
    private TreeTransformer debinarizer;
    private TreeTransformer subcategoryStripper;
    private TreePrint treePrint;
    private static List<CoreLabel> bestSegmentationB;
    private static final int trainLengthLimit = 100000;
    private boolean VERBOSE = false;
    private boolean serInput = false;
    private int maxSentLen = 5000;

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:edu/stanford/nlp/international/arabic/parsesegment/JointParsingModel$GenericLatticeScorer.class */
    public static class GenericLatticeScorer implements LatticeScorer {
        private GenericLatticeScorer() {
        }

        @Override // edu.stanford.nlp.parser.lexparser.LatticeScorer
        public Item convertItemSpan(Item item) {
            if (JointParsingModel.bestSegmentationB == null || JointParsingModel.bestSegmentationB.isEmpty()) {
                throw new RuntimeException(getClass().getName() + ": No 1best segmentation available");
            }
            item.start = ((CoreLabel) JointParsingModel.bestSegmentationB.get(item.start)).beginPosition();
            item.end = ((CoreLabel) JointParsingModel.bestSegmentationB.get(item.end - 1)).endPosition();
            return item;
        }

        @Override // edu.stanford.nlp.parser.lexparser.Scorer
        public double oScore(Edge edge) {
            return JointParsingModel.pparser.oScore((Edge) convertItemSpan(new Edge(edge))) + JointParsingModel.dparser.oScore(edge);
        }

        @Override // edu.stanford.nlp.parser.lexparser.Scorer
        public double iScore(Edge edge) {
            return JointParsingModel.pparser.iScore((Edge) convertItemSpan(new Edge(edge))) + JointParsingModel.dparser.iScore(edge);
        }

        @Override // edu.stanford.nlp.parser.lexparser.Scorer
        public boolean oPossible(Hook hook) {
            return JointParsingModel.pparser.oPossible((Hook) convertItemSpan(new Hook(hook))) && JointParsingModel.dparser.oPossible(hook);
        }

        @Override // edu.stanford.nlp.parser.lexparser.Scorer
        public boolean iPossible(Hook hook) {
            return JointParsingModel.pparser.iPossible((Hook) convertItemSpan(new Hook(hook))) && JointParsingModel.dparser.iPossible(hook);
        }

        @Override // edu.stanford.nlp.parser.lexparser.Scorer
        public boolean parse(List<? extends HasWord> list) {
            throw new UnsupportedOperationException(getClass().getName() + ": Does not support parse operation.");
        }
    }

    public void setVerbose(boolean z) {
        this.VERBOSE = z;
        this.op.testOptions.verbose = z;
        this.op.trainOptions.printAnnotatedStateCounts = z;
        this.op.trainOptions.printAnnotatedRuleCounts = z;
    }

    public void setSerInput(boolean z) {
        this.serInput = z;
    }

    public void setMaxEvalSentLen(int i) {
        this.maxSentLen = i;
    }

    private void removeDeleteSplittersFromSplitters(TreebankLanguagePack treebankLanguagePack) {
        if (this.op.trainOptions.deleteSplitters != null) {
            ArrayList arrayList = new ArrayList();
            for (String str : this.op.trainOptions.deleteSplitters) {
                String basicCategory = treebankLanguagePack.basicCategory(str);
                boolean equals = str.equals(basicCategory);
                Iterator<String> it = this.op.trainOptions.splitters.iterator();
                while (it.hasNext()) {
                    String next = it.next();
                    if ((equals && treebankLanguagePack.basicCategory(next).equals(basicCategory)) || next.equals(str)) {
                        it.remove();
                        arrayList.add(next);
                    }
                }
            }
            if (this.op.testOptions.verbose) {
                log.info("Removed from vertical splitters: " + arrayList);
            }
        }
    }

    public List<Tree> getAnnotatedBinaryTreebankFromTreebank(Treebank treebank) {
        TreebankLangParserParams treebankLangParserParams = this.op.tlpParams;
        TreebankLanguagePack treebankLanguagePack = treebankLangParserParams.treebankLanguagePack();
        if (this.VERBOSE) {
            log.info("\n\n" + treebank.textualSummary(treebankLanguagePack));
        }
        log.info("Binarizing trees...");
        TreeAnnotatorAndBinarizer treeAnnotatorAndBinarizer = new TreeAnnotatorAndBinarizer(treebankLangParserParams, this.op.forceCNF, !this.op.trainOptions.outsideFactor(), true, this.op);
        Timing.tick("done.");
        if (this.op.trainOptions.selectiveSplit) {
            this.op.trainOptions.splitters = ParentAnnotationStats.getSplitCategories(treebank, this.op.trainOptions.tagSelectiveSplit, 0, this.op.trainOptions.selectiveSplitCutOff, this.op.trainOptions.tagSelectiveSplitCutOff, treebankLanguagePack);
            removeDeleteSplittersFromSplitters(treebankLanguagePack);
            if (this.op.testOptions.verbose) {
                ArrayList arrayList = new ArrayList(this.op.trainOptions.splitters);
                Collections.sort(arrayList);
                log.info("Parent split categories: " + arrayList);
            }
        }
        if (this.op.trainOptions.hSelSplit) {
            int i = this.op.trainOptions.printTreeTransformations;
            this.op.trainOptions.printTreeTransformations = 0;
            treeAnnotatorAndBinarizer.setDoSelectiveSplit(false);
            Iterator<Tree> it = treebank.iterator();
            while (it.hasNext()) {
                treeAnnotatorAndBinarizer.transformTree(it.next());
            }
            treeAnnotatorAndBinarizer.setDoSelectiveSplit(true);
            this.op.trainOptions.printTreeTransformations = i;
        }
        ArrayList arrayList2 = new ArrayList();
        Iterator<Tree> it2 = treebank.iterator();
        while (it2.hasNext()) {
            Tree transformTree = treeAnnotatorAndBinarizer.transformTree(it2.next());
            if (transformTree.yield().size() - 1 <= trainLengthLimit) {
                arrayList2.add(transformTree);
            }
        }
        return arrayList2;
    }

    public LexicalizedParser getParserDataFromTreebank(Treebank treebank) {
        log.info("Binarizing training trees...");
        List<Tree> annotatedBinaryTreebankFromTreebank = getAnnotatedBinaryTreebankFromTreebank(treebank);
        Timing.tick("done.");
        HashIndex hashIndex = new HashIndex();
        log.info("Extracting PCFG...");
        Pair<UnaryGrammar, BinaryGrammar> extract = new BinaryGrammarExtractor(this.op, hashIndex).extract(annotatedBinaryTreebankFromTreebank);
        BinaryGrammar binaryGrammar = extract.second;
        binaryGrammar.splitRules();
        UnaryGrammar unaryGrammar = extract.first;
        unaryGrammar.purgeRules();
        Timing.tick("done.");
        log.info("Extracting Lexicon...");
        HashIndex hashIndex2 = new HashIndex();
        HashIndex hashIndex3 = new HashIndex();
        Lexicon lex = this.op.tlpParams.lex(this.op, hashIndex2, hashIndex3);
        lex.initializeTraining(annotatedBinaryTreebankFromTreebank.size());
        lex.train(annotatedBinaryTreebankFromTreebank);
        lex.finishTraining();
        Timing.tick("done.");
        Extractor<DependencyGrammar> dependencyGrammarExtractor = this.op.tlpParams.dependencyGrammarExtractor(this.op, hashIndex2, hashIndex3);
        DependencyGrammar dependencyGrammar = null;
        if (this.op.doDep) {
            log.info("Extracting Dependencies...");
            dependencyGrammar = dependencyGrammarExtractor.extract(annotatedBinaryTreebankFromTreebank);
            dependencyGrammar.setLexicon(lex);
            Timing.tick("done.");
        }
        log.info("Done extracting grammars and lexicon.");
        return new LexicalizedParser(lex, binaryGrammar, unaryGrammar, dependencyGrammar, hashIndex, hashIndex2, hashIndex3, this.op);
    }

    private void makeParsers() {
        if (this.lp == null) {
            throw new RuntimeException(getClass().getName() + ": Parser grammar does not exist");
        }
        pparser = new ExhaustivePCFGParser(this.lp.bg, this.lp.ug, this.lp.lex, this.op, this.lp.stateIndex, this.lp.wordIndex, this.lp.tagIndex);
        dparser = new ExhaustiveDependencyParser(this.lp.dg, this.lp.lex, this.op, this.lp.wordIndex, this.lp.tagIndex);
        this.bparser = new BiLexPCFGParser(new GenericLatticeScorer(), pparser, dparser, this.lp.bg, this.lp.ug, this.lp.dg, this.lp.lex, this.op, this.lp.stateIndex, this.lp.wordIndex, this.lp.tagIndex);
    }

    private boolean parse(InputStream inputStream) {
        LatticeXMLReader latticeXMLReader = new LatticeXMLReader();
        if (!latticeXMLReader.load(inputStream, this.serInput)) {
            System.err.printf("%s: Error loading input lattice xml from stdin%n", getClass().getName());
            return false;
        }
        System.err.printf("%s: Entering main parsing loop...%n", getClass().getName());
        int i = 0;
        int i2 = 0;
        int i3 = 0;
        int i4 = 0;
        Iterator<Lattice> it = latticeXMLReader.iterator();
        while (it.hasNext()) {
            Lattice next = it.next();
            if (next.getNumNodes() > this.op.testOptions.maxLength + 1) {
                System.err.printf("%s: Lattice %d too big! (%d nodes)%n", getClass().getName(), Integer.valueOf(i), Integer.valueOf(next.getNumNodes()));
                i++;
            } else {
                i2++;
                try {
                    Tree tree = null;
                    if (this.op.doPCFG && pparser.parse(next)) {
                        tree = pparser.getBestParse();
                        bestSegmentationB = tree.yield((List) new ArrayList());
                        if (this.op.doDep && dparser.parse(bestSegmentationB)) {
                            System.err.printf("%s: Dependency parse succeeded!%n", getClass().getName());
                            if (this.bparser.parse(bestSegmentationB)) {
                                System.err.printf("%s: Factored parse succeeded!%n", getClass().getName());
                                tree = this.bparser.getBestParse();
                                i4++;
                            }
                        } else {
                            System.out.printf("%s: Dependency parse failed. Backing off to PCFG...%n", getClass().getName());
                        }
                    } else {
                        System.out.printf("%s: WARNING: parsing failed for lattice %d%n", getClass().getName(), Integer.valueOf(i));
                    }
                    if (tree == null) {
                        System.out.printf("%s: WARNING: Could not extract best parse for lattice %d%n", getClass().getName(), Integer.valueOf(i));
                    } else {
                        this.treePrint.printTree(this.subcategoryStripper.transformTree(this.debinarizer.transformTree(tree)));
                        i3++;
                    }
                } catch (Exception e) {
                    System.out.printf("%s: WARNING: Could not extract best parse for lattice %d%n", getClass().getName(), Integer.valueOf(i));
                    e.printStackTrace();
                }
                i++;
            }
        }
        log.info("===================================================================");
        log.info("===================================================================");
        log.info("Post mortem:");
        log.info("  Input:     " + i);
        log.info("  Parseable: " + i2);
        log.info("  Parsed:    " + i3);
        log.info("  f_Parsed:  " + i4);
        log.info("  String %:  " + (((int) ((i3 * 10000.0d) / i2)) / 100.0d));
        return true;
    }

    public boolean run(File file, File file2, InputStream inputStream) {
        this.op = new Options();
        this.op.tlpParams = new ArabicTreebankParserParams();
        this.op.setOptions("-arabicFactored");
        this.op.testOptions.maxLength = this.maxSentLen;
        this.op.testOptions.MAX_ITEMS = 5000000;
        this.op.testOptions.outputFormatOptions = "removeTopBracket,includePunctuationDependencies";
        this.op.testOptions.maxSpanForTags = 80;
        this.treePrint = this.op.testOptions.treePrint(this.op.tlpParams);
        this.debinarizer = new Debinarizer(this.op.forceCNF, new CategoryWordTagFactory());
        this.subcategoryStripper = this.op.tlpParams.subcategoryStripper();
        Timing.startTime();
        DiskTreebank diskTreebank = this.op.tlpParams.diskTreebank();
        diskTreebank.loadPath(file);
        this.lp = getParserDataFromTreebank(diskTreebank);
        makeParsers();
        if (this.VERBOSE) {
            this.op.display();
            String num = pparser != null ? Integer.toString(this.lp.lex.numRules()) : "";
            log.info("Grammar\tStates\tTags\tWords\tUnaryR\tBinaryR\tTaggings");
            Redwood.RedwoodChannels redwoodChannels = log;
            Object[] objArr = new Object[1];
            objArr[0] = "Grammar\t" + this.lp.stateIndex.size() + '\t' + this.lp.tagIndex.size() + '\t' + this.lp.wordIndex.size() + '\t' + (pparser != null ? Integer.valueOf(this.lp.ug.numRules()) : "") + '\t' + (pparser != null ? Integer.valueOf(this.lp.bg.numRules()) : "") + '\t' + num;
            redwoodChannels.info(objArr);
            log.info("ParserPack is " + this.op.tlpParams.getClass().getName());
            log.info("Lexicon is " + this.lp.lex.getClass().getName());
        }
        return parse(inputStream);
    }
}
