package edu.stanford.nlp.sentiment;

import edu.stanford.nlp.classify.Classifier;
import edu.stanford.nlp.classify.GeneralDataset;
import edu.stanford.nlp.classify.LinearClassifier;
import edu.stanford.nlp.classify.LinearClassifierFactory;
import edu.stanford.nlp.classify.RVFDataset;
import edu.stanford.nlp.ie.NERClassifierCombiner;
import edu.stanford.nlp.io.IOUtils;
import edu.stanford.nlp.io.RuntimeIOException;
import edu.stanford.nlp.ling.CoreAnnotations;
import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.ling.RVFDatum;
import edu.stanford.nlp.optimization.QNMinimizer;
import edu.stanford.nlp.pipeline.Annotation;
import edu.stanford.nlp.pipeline.StanfordCoreNLP;
import edu.stanford.nlp.simple.Document;
import edu.stanford.nlp.simple.SentimentClass;
import edu.stanford.nlp.stats.ClassicCounter;
import edu.stanford.nlp.stats.Counter;
import edu.stanford.nlp.util.CoreMap;
import edu.stanford.nlp.util.Lazy;
import edu.stanford.nlp.util.Pair;
import edu.stanford.nlp.util.StringUtils;
import edu.stanford.nlp.util.logging.Redwood;
import edu.stanford.nlp.util.logging.RedwoodConfiguration;
import java.io.File;
import java.io.IOException;
import java.io.ObjectOutputStream;
import java.io.OutputStream;
import java.lang.invoke.SerializedLambda;
import java.text.DecimalFormat;
import java.util.Arrays;
import java.util.Iterator;
import java.util.List;
import java.util.Optional;
import java.util.Properties;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.regex.Pattern;
import java.util.stream.Stream;
import java.util.stream.StreamSupport;

/* loaded from: input_file:edu/stanford/nlp/sentiment/SimpleSentiment.class */
public class SimpleSentiment {
    private static final Redwood.RedwoodChannels log = Redwood.channels(SimpleSentiment.class);
    private static Lazy<StanfordCoreNLP> pipeline = Lazy.of(() -> {
        Properties properties = new Properties();
        properties.setProperty("annotators", "tokenize,ssplit,pos,lemma");
        properties.setProperty(NERClassifierCombiner.NER_LANGUAGE_PROPERTY_BASE, "english");
        properties.setProperty("ssplit.isOneSentence", "true");
        properties.setProperty("tokenize.class", "PTBTokenizer");
        properties.setProperty("tokenize.language", "en");
        return new StanfordCoreNLP(properties);
    });
    private static final Pattern alpha = Pattern.compile("[a-zA-Z]+");
    private static final Pattern number = Pattern.compile("[0-9]+");
    private final Classifier<SentimentClass, String> impl;

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:edu/stanford/nlp/sentiment/SimpleSentiment$SentimentDatum.class */
    public static class SentimentDatum {
        public final String sentence;
        public final SentimentClass sentiment;

        private SentimentDatum(String str, SentimentClass sentimentClass) {
            this.sentence = str;
            this.sentiment = sentimentClass;
        }

        CoreMap asCoreMap() {
            Annotation annotation;
            if ("".equals(this.sentence.trim())) {
                switch (this.sentiment) {
                    case VERY_POSITIVE:
                        annotation = new Annotation("cats are super awesome!");
                        break;
                    case POSITIVE:
                        annotation = new Annotation("cats are great");
                        break;
                    case NEUTRAL:
                        annotation = new Annotation("cats have tails");
                        break;
                    case NEGATIVE:
                        annotation = new Annotation("cats suck");
                        break;
                    case VERY_NEGATIVE:
                        annotation = new Annotation("cats are literally the worst, I can't even.");
                        break;
                    default:
                        throw new IllegalStateException();
                }
            } else {
                annotation = new Annotation(this.sentence);
            }
            ((StanfordCoreNLP) SimpleSentiment.pipeline.get()).annotate(annotation);
            return (CoreMap) ((List) annotation.get(CoreAnnotations.SentencesAnnotation.class)).get(0);
        }
    }

    private static Counter<String> featurize(CoreMap coreMap) {
        ClassicCounter classicCounter = new ClassicCounter();
        String str = "^";
        Iterator it = ((List) coreMap.get(CoreAnnotations.TokensAnnotation.class)).iterator();
        while (it.hasNext()) {
            String lowerCase = ((CoreLabel) it.next()).lemma().toLowerCase();
            if (number.matcher(lowerCase).matches()) {
                classicCounter.incrementCount("**num**");
            } else {
                classicCounter.incrementCount(lowerCase);
            }
            if (alpha.matcher(lowerCase).matches()) {
                classicCounter.incrementCount(str + "__" + lowerCase);
                str = lowerCase;
            }
        }
        classicCounter.incrementCount(str + "__$");
        return classicCounter;
    }

    private SimpleSentiment(Classifier<SentimentClass, String> classifier) {
        this.impl = classifier;
    }

    public SentimentClass classify(CoreMap coreMap) {
        return this.impl.classOf(new RVFDatum(featurize(coreMap)));
    }

    public SentimentClass classify(String str) {
        Annotation annotation = new Annotation(str);
        pipeline.get().annotate(annotation);
        return this.impl.classOf(new RVFDatum(featurize((CoreMap) ((List) annotation.get(CoreAnnotations.SentencesAnnotation.class)).get(0))));
    }

    /* JADX WARN: Multi-variable type inference failed */
    public static SimpleSentiment train(Stream<SentimentDatum> stream, Optional<OutputStream> optional) {
        boolean z = true;
        double d = 1.0d;
        Redwood.Util.forceTrack("Featurizing");
        RVFDataset rVFDataset = new RVFDataset();
        AtomicInteger atomicInteger = new AtomicInteger(0);
        ClassicCounter classicCounter = new ClassicCounter();
        ((Stream) ((Stream) stream.unordered()).parallel()).map(sentimentDatum -> {
            if (atomicInteger.incrementAndGet() % 10000 == 0) {
                Redwood.Util.log("Added " + atomicInteger.get() + " datums");
            }
            return new RVFDatum(featurize(sentimentDatum.asCoreMap()), sentimentDatum.sentiment);
        }).forEach(rVFDatum -> {
            synchronized (rVFDataset) {
                classicCounter.incrementCount(rVFDatum.label());
                rVFDataset.add(rVFDatum);
            }
        });
        Redwood.Util.endTrack("Featurizing");
        Redwood.Util.startTrack("Distribution");
        for (SentimentClass sentimentClass : SentimentClass.values()) {
            Redwood.Util.log(String.format("%7d", Integer.valueOf((int) classicCounter.getCount(sentimentClass))) + "   " + sentimentClass);
        }
        Redwood.Util.endTrack("Distribution");
        Redwood.Util.forceTrack("Training");
        if (5 > 1) {
            rVFDataset.applyFeatureCountThreshold(5);
        }
        rVFDataset.randomize(42L);
        LinearClassifierFactory linearClassifierFactory = new LinearClassifierFactory();
        linearClassifierFactory.setVerbose(true);
        try {
            linearClassifierFactory.setMinimizerCreator(() -> {
                QNMinimizer qNMinimizer = new QNMinimizer();
                if (z) {
                    qNMinimizer.useOWLQN(true, 1.0d / (d * d));
                } else {
                    linearClassifierFactory.setSigma(d);
                }
                return qNMinimizer;
            });
        } catch (Exception e) {
        }
        linearClassifierFactory.setSigma(1.0d);
        LinearClassifier trainClassifier = linearClassifierFactory.trainClassifier((GeneralDataset) rVFDataset);
        optional.ifPresent(outputStream -> {
            try {
                ObjectOutputStream objectOutputStream = new ObjectOutputStream(outputStream);
                objectOutputStream.writeObject(trainClassifier);
                objectOutputStream.close();
            } catch (IOException e2) {
                log.err("Could not save model to stream!");
            }
        });
        Redwood.Util.endTrack("Training");
        Redwood.Util.forceTrack("Evaluating");
        linearClassifierFactory.setVerbose(false);
        double d2 = 0.0d;
        ClassicCounter classicCounter2 = new ClassicCounter();
        ClassicCounter classicCounter3 = new ClassicCounter();
        for (int i = 0; i < 4; i++) {
            Pair<GeneralDataset<L, F>, GeneralDataset<L, F>> splitOutFold = rVFDataset.splitOutFold(i, 4);
            LinearClassifier trainClassifierWithInitialWeights = linearClassifierFactory.trainClassifierWithInitialWeights((GeneralDataset) splitOutFold.first, trainClassifier);
            d2 += trainClassifierWithInitialWeights.evaluateAccuracy((GeneralDataset) splitOutFold.second);
            for (SentimentClass sentimentClass2 : SentimentClass.values()) {
                Pair<Double, Double> evaluatePrecisionAndRecall = trainClassifierWithInitialWeights.evaluatePrecisionAndRecall((GeneralDataset) splitOutFold.second, sentimentClass2);
                classicCounter2.incrementCount(sentimentClass2, evaluatePrecisionAndRecall.first.doubleValue());
                classicCounter2.incrementCount(sentimentClass2, evaluatePrecisionAndRecall.second.doubleValue());
            }
        }
        DecimalFormat decimalFormat = new DecimalFormat("0.000%");
        log.info("----------");
        log.info("4-fold accuracy: " + decimalFormat.format(d2 / 4));
        log.info("");
        for (SentimentClass sentimentClass3 : SentimentClass.values()) {
            double count = classicCounter2.getCount(sentimentClass3) / 4;
            double count2 = classicCounter3.getCount(sentimentClass3) / 4;
            log.info(sentimentClass3 + " (P)  = " + decimalFormat.format(count));
            log.info(sentimentClass3 + " (R)  = " + decimalFormat.format(count2));
            log.info(sentimentClass3 + " (F1) = " + decimalFormat.format(((2.0d * count) * count2) / (count + count2)));
            log.info("");
        }
        log.info("----------");
        Redwood.Util.endTrack("Evaluating");
        return new SimpleSentiment(trainClassifier);
    }

    private static Stream<SentimentDatum> imdb(String str, SentimentClass sentimentClass) {
        return StreamSupport.stream(IOUtils.iterFilesRecursive(new File(str)).spliterator(), true).map(file -> {
            try {
                return new SentimentDatum(IOUtils.slurpFile(file), sentimentClass);
            } catch (IOException e) {
                throw new RuntimeIOException(e);
            }
        });
    }

    private static Stream<SentimentDatum> stanford(String str) {
        return StreamSupport.stream(IOUtils.readLines(str).spliterator(), true).map(str2 -> {
            String[] split = str2.split(LinearClassifier.TEXT_SERIALIZATION_DELIMITER);
            return (split.length < 4 || "Sentiment".equalsIgnoreCase(split[3]) || split[2].equals("")) ? new SentimentDatum("Cats have tails", SentimentClass.NEUTRAL) : new SentimentDatum(split[2], SentimentClass.fromInt(Integer.parseInt(split[3])));
        });
    }

    private static Stream<SentimentDatum> twitter(String str) {
        return StreamSupport.stream(IOUtils.readLines(str).spliterator(), true).map(str2 -> {
            List asList = Arrays.asList(str2.split(","));
            if (asList.size() < 3 || "Sentiment".equalsIgnoreCase((String) asList.get(1)) || ((String) asList.get(3)).equals("")) {
                return new SentimentDatum("Cats have tails", SentimentClass.NEUTRAL);
            }
            return new SentimentDatum(StringUtils.join(asList.subList(3, asList.size()), ","), SentimentClass.fromInt(Integer.parseInt((String) asList.get(1))));
        });
    }

    private static Stream<SentimentDatum> unlabelled(String str) throws IOException {
        return StreamSupport.stream(IOUtils.iterFilesRecursive(new File(str)).spliterator(), true).flatMap(file -> {
            return new Document(IOUtils.slurpReader(IOUtils.readerFromFile(file))).sentences().stream().map(sentence -> {
                return new SentimentDatum(sentence.text(), SentimentClass.NEUTRAL);
            });
        });
    }

    public static void main(String[] strArr) throws IOException {
        RedwoodConfiguration.standard().apply();
        Redwood.Util.startTrack("main");
        Stream concat = Stream.concat(Stream.concat(Stream.concat(imdb("/users/gabor/tmp/aclImdb/train/pos", SentimentClass.POSITIVE), imdb("/users/gabor/tmp/aclImdb/train/neg", SentimentClass.NEGATIVE)), Stream.concat(imdb("/users/gabor/tmp/aclImdb/test/pos", SentimentClass.POSITIVE), imdb("/users/gabor/tmp/aclImdb/test/neg", SentimentClass.NEGATIVE))), Stream.concat(Stream.concat(stanford("/users/gabor/tmp/train.tsv"), stanford("/users/gabor/tmp/test.tsv")), Stream.concat(twitter("/users/gabor/tmp/twitter.csv"), unlabelled("/users/gabor/tmp/wikipedia"))));
        OutputStream fileOutputStream = IOUtils.getFileOutputStream("/users/gabor/tmp/model.ser.gz");
        SimpleSentiment train = train(concat, Optional.of(fileOutputStream));
        fileOutputStream.close();
        log.info(train.classify("I think life is great"));
        Redwood.Util.endTrack("main");
    }

    private static /* synthetic */ Object $deserializeLambda$(SerializedLambda serializedLambda) {
        String implMethodName = serializedLambda.getImplMethodName();
        boolean z = -1;
        switch (implMethodName.hashCode()) {
            case -1243084963:
                if (implMethodName.equals("lambda$train$946015b$1")) {
                    z = false;
                    break;
                }
                break;
        }
        switch (z) {
            case false:
                if (serializedLambda.getImplMethodKind() == 6 && serializedLambda.getFunctionalInterfaceClass().equals("edu/stanford/nlp/util/Factory") && serializedLambda.getFunctionalInterfaceMethodName().equals("create") && serializedLambda.getFunctionalInterfaceMethodSignature().equals("()Ljava/lang/Object;") && serializedLambda.getImplClass().equals("edu/stanford/nlp/sentiment/SimpleSentiment") && serializedLambda.getImplMethodSignature().equals("(ZDLedu/stanford/nlp/classify/LinearClassifierFactory;)Ledu/stanford/nlp/optimization/Minimizer;")) {
                    boolean booleanValue = ((Boolean) serializedLambda.getCapturedArg(0)).booleanValue();
                    double doubleValue = ((Double) serializedLambda.getCapturedArg(1)).doubleValue();
                    LinearClassifierFactory linearClassifierFactory = (LinearClassifierFactory) serializedLambda.getCapturedArg(2);
                    return () -> {
                        QNMinimizer qNMinimizer = new QNMinimizer();
                        if (booleanValue) {
                            qNMinimizer.useOWLQN(true, 1.0d / (doubleValue * doubleValue));
                        } else {
                            linearClassifierFactory.setSigma(doubleValue);
                        }
                        return qNMinimizer;
                    };
                }
                break;
        }
        throw new IllegalArgumentException("Invalid lambda deserialization");
    }
}
