package edu.stanford.nlp.international.arabic.process;

import edu.stanford.nlp.international.arabic.pipeline.DefaultLexicalMapper;
import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.ling.SentenceUtils;
import edu.stanford.nlp.process.TokenizerFactory;
import edu.stanford.nlp.semgraph.semgrex.ssurgeon.AddDep;
import edu.stanford.nlp.util.logging.Redwood;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.StringReader;
import java.io.UnsupportedEncodingException;
import java.util.Arrays;
import java.util.List;

/* loaded from: input_file:edu/stanford/nlp/international/arabic/process/ArabicTokenizerTester.class */
public class ArabicTokenizerTester {
    private static Redwood.RedwoodChannels log = Redwood.channels(ArabicTokenizerTester.class);

    public static void main(String[] strArr) {
        if (strArr.length != 2) {
            System.out.printf("Usage: java %s OPTS filename%n", ArabicTokenizerTester.class.getName());
            System.exit(-1);
        }
        String str = strArr[0];
        File file = new File(strArr[1]);
        log.info("Reading from: " + file.getPath());
        try {
            BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(new FileInputStream(file), "UTF-8"));
            TokenizerFactory<CoreLabel> factory = ArabicTokenizer.factory();
            factory.setOptions(str);
            DefaultLexicalMapper defaultLexicalMapper = new DefaultLexicalMapper();
            defaultLexicalMapper.setup(null, "StripSegMarkersInUTF8", "StripMorphMarkersInUTF8");
            int i = 0;
            while (true) {
                String readLine = bufferedReader.readLine();
                if (readLine == null) {
                    System.err.printf("Read %d lines.%n", Integer.valueOf(i));
                    return;
                }
                String trim = readLine.trim();
                List<CoreLabel> list = factory.getTokenizer(new StringReader(trim)).tokenize();
                System.out.println(SentenceUtils.listToString(list));
                StringBuilder sb = new StringBuilder();
                for (String str2 : trim.split("\\s+")) {
                    sb.append(defaultLexicalMapper.map(null, str2)).append(AddDep.ATOM_DELIMITER);
                }
                List asList = Arrays.asList(sb.toString().trim().split("\\s+"));
                if (asList.size() != list.size()) {
                    System.err.printf("Line length mismatch:%norig: %s%ntok: %s%nmap: %s%n%n", trim, SentenceUtils.listToString(list), SentenceUtils.listToString(asList));
                } else {
                    boolean z = false;
                    for (int i2 = 0; i2 < asList.size(); i2++) {
                        String str3 = (String) asList.get(i2);
                        String word = list.get(i2).word();
                        if (!str3.equals(word)) {
                            System.err.printf("Token mismatch:%nmap: %s%ntok: %s%n", str3, word);
                            z = true;
                        }
                    }
                    if (z) {
                        System.err.printf("orig: %s%ntok: %s%nmap: %s%n%n", trim, SentenceUtils.listToString(list), SentenceUtils.listToString(asList));
                    }
                }
                i++;
            }
        } catch (FileNotFoundException e) {
            e.printStackTrace();
        } catch (UnsupportedEncodingException e2) {
            e2.printStackTrace();
        } catch (IOException e3) {
            e3.printStackTrace();
        }
    }
}
