package edu.stanford.nlp.trees.international.arabic;

import edu.stanford.nlp.ie.pascal.PascalTemplate;
import edu.stanford.nlp.io.EncodingPrintWriter;
import edu.stanford.nlp.naturalli.demo.CORSFilter;
import edu.stanford.nlp.semgraph.semgrex.ssurgeon.AddDep;
import edu.stanford.nlp.tagger.maxent.TaggerConfig;
import edu.stanford.nlp.util.Generics;
import edu.stanford.nlp.util.StringUtils;
import edu.stanford.nlp.util.logging.Redwood;
import java.io.BufferedReader;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.Iterator;
import java.util.Map;
import java.util.Properties;

/* loaded from: input_file:edu/stanford/nlp/trees/international/arabic/ArabicUtils.class */
public class ArabicUtils {
    private static Redwood.RedwoodChannels log = Redwood.channels(ArabicUtils.class);

    public static Map<String, String> presToLogicalMap() {
        Map<String, String> newHashMap = Generics.newHashMap();
        newHashMap.put("\\ufc5e", " ٌّ");
        newHashMap.put("\\ufc5f", " ٍّ");
        newHashMap.put("\\ufc60", " َّ");
        newHashMap.put("\\ufc61", " ُّ");
        newHashMap.put("\\ufc62", " ِّ");
        newHashMap.put("\\ufe80", "ء");
        newHashMap.put("[\\ufe81\\ufe82]", "آ");
        newHashMap.put("[\\ufe83\\ufe84]", "أ");
        newHashMap.put("[\\ufe85\\ufe86]", "ؤ");
        newHashMap.put("[\\ufe87\\ufe88]", "إ");
        newHashMap.put("[\\ufe89\\ufe8a\\ufe8b\\ufe8c]", "ئ");
        newHashMap.put("[\\ufe8d\\ufe8e]", "ا");
        newHashMap.put("[\\ufe8f\\ufe90\\ufe91\\ufe92]", "ب");
        newHashMap.put("[\\ufe93\\ufe94]", "ة");
        newHashMap.put("[\\ufe95\\ufe96\\ufe97\\ufe98]", "ت");
        newHashMap.put("[\\ufe99\\ufe9a\\ufe9b\\ufe9c]", "ث");
        newHashMap.put("[\\ufe9d\\ufe9e\\ufe9f\\ufea0]", "ج");
        newHashMap.put("[\\ufea1\\ufea2\\ufea3\\ufea4]", "ح");
        newHashMap.put("[\\ufea5\\ufea6\\ufea7\\ufea8]", "خ");
        newHashMap.put("[\\ufea9\\ufeaa]", "د");
        newHashMap.put("[\\ufeab\\ufeac]", "ذ");
        newHashMap.put("[\\ufead\\ufeae]", "ر");
        newHashMap.put("[\\ufeaf\\ufeb0]", "ز");
        newHashMap.put("[\\ufeb1\\ufeb2\\ufeb3\\ufeb4]", "س");
        newHashMap.put("[\\ufeb5\\ufeb6\\ufeb7\\ufeb8]", "ش");
        newHashMap.put("[\\ufeb9\\ufeba\\ufebb\\ufebc]", "ص");
        newHashMap.put("[\\ufebd\\ufebe\\ufebf\\ufec0]", "ض");
        newHashMap.put("[\\ufec1\\ufec2\\ufec3\\ufec4]", "ط");
        newHashMap.put("[\\ufec5\\ufec6\\ufec7\\ufec8]", "ظ");
        newHashMap.put("[\\ufec9\\ufeca\\ufecb\\ufecc]", "ع");
        newHashMap.put("[\\ufecd\\ufece\\ufecf\\ufed0]", "غ");
        newHashMap.put("[\\ufed1\\ufed2\\ufed3\\ufed4]", "ف");
        newHashMap.put("[\\ufed5\\ufed6\\ufed7\\ufed8]", "ق");
        newHashMap.put("[\\ufed9\\ufeda\\ufedb\\ufedc]", "ك");
        newHashMap.put("[\\ufedd\\ufede\\ufedf\\ufee0]", "ل");
        newHashMap.put("[\\ufee1\\ufee2\\ufee3\\ufee4]", "م");
        newHashMap.put("[\\ufee5\\ufee6\\ufee7\\ufee8]", "ن");
        newHashMap.put("[\\ufee9\\ufeea\\ufeeb\\ufeec]", "ه");
        newHashMap.put("[\\ufeed\\ufeee]", "و");
        newHashMap.put("[\\ufeef\\ufef0]", "ى");
        newHashMap.put("[\\ufef1\\ufef2\\ufef3\\ufef4]", "ي");
        newHashMap.put("[\\ufef5\\ufef6]", "لآ");
        newHashMap.put("[\\ufef7\\ufef8]", "لأ");
        newHashMap.put("[\\ufef9\\ufefa]", "لإ");
        newHashMap.put("[\\ufefb\\ufefc]", "لا");
        return newHashMap;
    }

    public static Map<String, String> getArabicIBMNormalizerMap() {
        Map<String, String> newHashMap = Generics.newHashMap();
        try {
            newHashMap.put("[\\u0622\\u0623\\u0625]", "ا");
            newHashMap.put("[\\u0649]", "ي");
            newHashMap.put("[\\u064B\\u064C\\u064D\\u064E\\u064F\\u0650\\u0651\\u0652\\u0653\\u0670]", "");
            newHashMap.put("\\u0640(?=\\s*\\S)", "");
            newHashMap.put("(\\S)\\u0640", "$1");
            newHashMap.put("[\\ufeff\\u00a0]", AddDep.ATOM_DELIMITER);
            newHashMap.put("\\u060c", ",");
            newHashMap.put("\\u061b", ";");
            newHashMap.put("\\u061f", "?");
            newHashMap.put("\\u066a", "%");
            newHashMap.put("\\u066b", ".");
            newHashMap.put("\\u066c", ",");
            newHashMap.put("\\u066d", CORSFilter.DEFAULT_ALLOWED_ORIGINS);
            newHashMap.put("\\u06d4", ".");
            newHashMap.put("[\\u0660\\u06f0\\u0966]", PascalTemplate.BACKGROUND_SYMBOL);
            newHashMap.put("[\\u0661\\u06f1\\u0967]", TaggerConfig.NTHREADS);
            newHashMap.put("[\\u0662\\u06f2\\u0968]", TaggerConfig.CUR_WORD_MIN_FEATURE_THRESH);
            newHashMap.put("[\\u0663\\u06f3\\u0969]", "3");
            newHashMap.put("[\\u0664\\u06f4\\u096a]", "4");
            newHashMap.put("[\\u0665\\u06f5\\u096b]", "5");
            newHashMap.put("[\\u0666\\u06f6\\u096c]", "6");
            newHashMap.put("[\\u0667\\u06f7\\u096d]", "7");
            newHashMap.put("[\\u0668\\u06f8\\u096e]", "8");
            newHashMap.put("[\\u0669\\u06f9\\u096f]", "9");
            newHashMap.put("[\\u0654\\u0655\\u0670]", "");
            newHashMap.put("\\u064A\\u0621", "ئ");
            newHashMap.put("\\u2013", "-");
            newHashMap.put("\\u2014", "--");
            newHashMap.put("[\\u0091\\u0092\\u2018\\u2019]", "'");
            newHashMap.put("[\\u0093\\u0094\\u201C\\u201D]", "\"");
        } catch (Exception e) {
            log.info("Caught exception creating Arabic normalizer map: " + e.toString());
        }
        return newHashMap;
    }

    public static String normalize(String str) {
        Map<String, String> arabicIBMNormalizerMap = getArabicIBMNormalizerMap();
        arabicIBMNormalizerMap.putAll(presToLogicalMap());
        Iterator<Map.Entry<String, String>> it = arabicIBMNormalizerMap.entrySet().iterator();
        String str2 = str;
        while (true) {
            String str3 = str2;
            if (!it.hasNext()) {
                return str3;
            }
            Map.Entry<String, String> next = it.next();
            str2 = str3.replaceAll(next.getKey(), next.getValue());
        }
    }

    public static void main(String[] strArr) throws IOException {
        Properties argsToProperties = StringUtils.argsToProperties(strArr);
        if (!argsToProperties.containsKey("input")) {
            return;
        }
        BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(new FileInputStream(argsToProperties.getProperty("input")), "UTF-8"));
        while (true) {
            String readLine = bufferedReader.readLine();
            if (readLine == null) {
                return;
            } else {
                EncodingPrintWriter.out.println(normalize(readLine), "UTF-8");
            }
        }
    }
}
