package edu.stanford.nlp.time;

import edu.stanford.nlp.ie.regexp.ChineseNumberSequenceClassifier;
import edu.stanford.nlp.io.IOUtils;
import edu.stanford.nlp.io.RuntimeIOException;
import edu.stanford.nlp.ling.CoreAnnotations;
import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.pipeline.Annotation;
import edu.stanford.nlp.trees.Tree;
import edu.stanford.nlp.trees.TreeCoreAnnotations;
import edu.stanford.nlp.util.ArrayCoreMap;
import edu.stanford.nlp.util.Iterables;
import edu.stanford.nlp.util.logging.Redwood;
import java.io.BufferedReader;
import java.io.File;
import java.io.IOException;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.Calendar;
import java.util.Iterator;
import java.util.List;
import java.util.NoSuchElementException;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import nu.xom.Builder;
import nu.xom.Element;
import nu.xom.Elements;
import nu.xom.ParsingException;

/* loaded from: input_file:edu/stanford/nlp/time/ParsedGigawordReader.class */
public class ParsedGigawordReader implements Iterable<Annotation> {
    private Iterable<File> files;
    private static Redwood.RedwoodChannels log = Redwood.channels(ParsedGigawordReader.class);
    private static final Pattern datePattern = Pattern.compile("^\\w+_\\w+_(\\d+)\\.");

    public ParsedGigawordReader(File file) {
        this.files = IOUtils.iterFilesRecursive(file);
    }

    @Override // java.lang.Iterable
    public Iterator<Annotation> iterator() {
        return new Iterator<Annotation>() { // from class: edu.stanford.nlp.time.ParsedGigawordReader.1
            private Iterator<BufferedReader> readers;
            private BufferedReader reader = findReader();
            private Annotation annotation = findAnnotation();

            {
                this.readers = Iterables.transform(ParsedGigawordReader.this.files, file -> {
                    return IOUtils.readerFromFile(file);
                }).iterator();
            }

            @Override // java.util.Iterator
            public boolean hasNext() {
                return this.annotation != null;
            }

            /* JADX WARN: Can't rename method to resolve collision */
            @Override // java.util.Iterator
            public Annotation next() {
                if (this.annotation == null) {
                    throw new NoSuchElementException();
                }
                Annotation annotation = this.annotation;
                this.annotation = findAnnotation();
                return annotation;
            }

            @Override // java.util.Iterator
            public void remove() {
                throw new UnsupportedOperationException();
            }

            private BufferedReader findReader() {
                if (this.readers.hasNext()) {
                    return this.readers.next();
                }
                return null;
            }

            private Annotation findAnnotation() {
                String readLine;
                if (this.reader == null) {
                    return null;
                }
                try {
                    StringBuilder sb = new StringBuilder();
                    do {
                        readLine = this.reader.readLine();
                        if (readLine != null) {
                            sb.append(readLine);
                            sb.append('\n');
                            if (readLine.equals("</DOC>")) {
                            }
                        }
                        if (readLine == null) {
                            this.reader.close();
                            this.reader = findReader();
                        }
                        String replaceAll = sb.toString().replaceAll("&", "&amp;");
                        return (replaceAll == null || replaceAll.equals("")) ? findAnnotation() : ParsedGigawordReader.toAnnotation(new String(replaceAll.replaceAll("num=([0-9]+) (.*)", "num=\"$1\" $2").replaceAll("sid=(.*)>", "sid=\"$1\">").replaceAll("</SENT>\n</DOC>", "</SENT>\n</TEXT>\n</DOC>").getBytes(), "UTF8"));
                    } while (!readLine.contains("</DOC>"));
                    throw new RuntimeException(String.format("invalid line '%s'", readLine));
                } catch (IOException e) {
                    throw new RuntimeIOException(e);
                }
            }
        };
    }

    /* JADX INFO: Access modifiers changed from: private */
    public static Annotation toAnnotation(String str) throws IOException {
        try {
            Element rootElement = new Builder().build(new StringReader(str)).getRootElement();
            Element firstChildElement = rootElement.getFirstChildElement("TEXT");
            StringBuilder sb = new StringBuilder();
            int i = 0;
            ArrayList arrayList = new ArrayList();
            Elements childElements = firstChildElement.getChildElements("SENT");
            for (int i2 = 0; i2 < childElements.size(); i2++) {
                Element element = childElements.get(i2);
                ArrayCoreMap arrayCoreMap = new ArrayCoreMap();
                arrayCoreMap.set(CoreAnnotations.CharacterOffsetBeginAnnotation.class, Integer.valueOf(i));
                Tree valueOf = Tree.valueOf(element.getChild(0).getValue());
                ArrayList arrayList2 = new ArrayList();
                List<Tree> preTerminals = preTerminals(valueOf);
                for (Tree tree : preTerminals) {
                    String value = tree.value();
                    for (Tree tree2 : tree.children()) {
                        String value2 = tree2.value();
                        CoreLabel coreLabel = new CoreLabel();
                        coreLabel.set(CoreAnnotations.TextAnnotation.class, value2);
                        coreLabel.set(CoreAnnotations.TextAnnotation.class, value2);
                        coreLabel.set(CoreAnnotations.PartOfSpeechAnnotation.class, value);
                        coreLabel.set(CoreAnnotations.CharacterOffsetBeginAnnotation.class, Integer.valueOf(i));
                        int length = i + value2.length();
                        coreLabel.set(CoreAnnotations.CharacterOffsetEndAnnotation.class, Integer.valueOf(length));
                        sb.append(value2);
                        sb.append(' ');
                        i = length + 1;
                        arrayList2.add(coreLabel);
                    }
                }
                if (preTerminals.size() > 0) {
                    sb.setCharAt(sb.length() - 1, '\n');
                }
                arrayCoreMap.set(CoreAnnotations.CharacterOffsetEndAnnotation.class, Integer.valueOf(i - 1));
                arrayCoreMap.set(CoreAnnotations.TokensAnnotation.class, arrayList2);
                arrayCoreMap.set(TreeCoreAnnotations.TreeAnnotation.class, valueOf);
                arrayList.add(arrayCoreMap);
            }
            String attributeValue = rootElement.getAttributeValue("id");
            Matcher matcher = datePattern.matcher(attributeValue);
            matcher.find();
            Calendar date = new Timex(ChineseNumberSequenceClassifier.DATE_TAG, matcher.group(1)).getDate();
            Annotation annotation = new Annotation(sb.toString());
            annotation.set(CoreAnnotations.DocIDAnnotation.class, attributeValue);
            annotation.set(CoreAnnotations.CalendarAnnotation.class, date);
            annotation.set(CoreAnnotations.SentencesAnnotation.class, arrayList);
            return annotation;
        } catch (IOException e) {
            throw new RuntimeException(String.format("error:\n%s\ninput:\n%s", e, str));
        } catch (ParsingException e2) {
            throw new RuntimeException(String.format("error:\n%s\ninput:\n%s", e2, str));
        }
    }

    private static List<Tree> preTerminals(Tree tree) {
        ArrayList arrayList = new ArrayList();
        Iterator<Tree> it = tree.iterator();
        while (it.hasNext()) {
            Tree next = it.next();
            if (isPreterminal(next)) {
                arrayList.add(next);
            }
        }
        return arrayList;
    }

    private static boolean isPreterminal(Tree tree) {
        if (tree.isLeaf()) {
            return false;
        }
        for (Tree tree2 : tree.children()) {
            if (!tree2.isLeaf()) {
                return false;
            }
        }
        return true;
    }
}
