package edu.stanford.nlp.process;

import edu.stanford.nlp.ie.pascal.PascalTemplate;
import edu.stanford.nlp.international.morph.MorphoFeatures;
import edu.stanford.nlp.ling.CoreAnnotations;
import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.ling.Label;
import edu.stanford.nlp.ling.SentenceUtils;
import edu.stanford.nlp.parser.lexparser.LatticeXMLReader;
import edu.stanford.nlp.pipeline.Annotator;
import edu.stanford.nlp.quoteattribution.Sieves.Sieve;
import edu.stanford.nlp.semgraph.semgrex.ssurgeon.AddDep;
import edu.stanford.nlp.semgraph.semgrex.ssurgeon.SsurgeonPattern;
import edu.stanford.nlp.tagger.maxent.TaggerConfig;
import edu.stanford.nlp.trees.international.negra.NegraLabel;
import edu.stanford.nlp.trees.international.negra.NegraPennLanguagePack;
import java.io.StringReader;
import java.util.Arrays;
import java.util.List;
import org.junit.Assert;
import org.junit.Test;

/* loaded from: input_file:edu/stanford/nlp/process/PTBTokenizerTest.class */
public class PTBTokenizerTest {
    private static final String[] untokInputs;
    private static final String[] untokOutputs;
    static final /* synthetic */ boolean $assertionsDisabled;
    private final String[] ptbInputs = {"This is a sentence.", "U.S. insurance: Conseco acquires Kemper Corp. \n</HEADLINE>\n<P>\nU.S insurance", "Based in Eugene,Ore., PakTech needs a new distributor after Sydney-based Creative Pack Pty. Ltd. went into voluntary administration.", "The Iron Age (ca. 1300 – ca. 300 BC).", "Indo\u00adnesian ship\u00adping \u00ad", "Gimme a phone, I'm gonna call.", "\"John & Mary's dog,\" Jane thought (to herself).\n\"What a #$%!\na- ``I like AT&T''.\"", "I said at 4:45pm.", "I can't believe they wanna keep 40% of that.\"\n``Whatcha think?''\n\"I don't --- think so...,\"", "You `paid' US$170,000?!\nYou should've paid only$16.75.", "1. Buy a new Chevrolet (37%-owned in the U.S..) . 15%", "I like you ;-) but do you care :(. I'm happy ^_^ but shy (x.x)!", "Diamond (``Not even the chair'') lives near Udaipur (84km). {1. A potential Palmer trade:}", "No. I like No. 24 and no.47.", "You can get a B.S. or a B. A. or a Ph.D (sometimes a Ph. D) from Stanford.", "@Harry_Styles didn`t like Mu`ammar al-Qaddafi", "Kenneth liked Windows 3.1, Windows 3.x, and Mesa A.B as I remember things.", "I like programming in F# more than C#.", "NBC Live will be available free through the Yahoo! Chat Web site. E! Entertainment said ``Jeopardy!'' is a game show.", "I lived in O’Malley and read OK! Magazine.", "I lived in O\u0092Malley and read OK! Magazine.", "I like: •wine, \u0095cheese, ‣salami, & ⁃speck.", "I don't give a f**k about your sh*tty life.", "First sentence.... Second sentence.", "First sentence . . . . Second sentence.", "I wasn’t really ... well, what I mean...see . . . what I'm saying, the thing is . . . I didn’t mean it.", "This is a url test. Here is one: http://google.com.", "This is a url test. Here is one: htvp://google.com.", "Download from ftp://myname@host.dom/%2Fetc/motd", "Download from svn://user@location.edu/path/to/magic/unicorns", "Download from svn+ssh://user@location.edu/path/to/magic/unicorns", "We traveled from No. Korea to So. Calif. yesterday.", "I dunno.", "The o-kay was received by the anti-acquisition front on its foolishness-filled fish market.", "We ran the pre-tests through the post-scripted centrifuge.", "School-aged parents should be aware of the unique problems that they face.", "I dispute Art. 53 of the convention.", "I like Art. And I like History."};
    private final String[][] ptbGold = {new String[]{"This", "is", "a", LatticeXMLReader.SENTENCE, "."}, new String[]{"U.S.", "insurance", MorphoFeatures.KEY_VAL_DELIM, "Conseco", "acquires", "Kemper", "Corp.", ".", "</HEADLINE>", "<P>", "U.S", "insurance"}, new String[]{"Based", "in", "Eugene", ",", "Ore.", ",", "PakTech", "needs", "a", "new", "distributor", "after", "Sydney-based", "Creative", "Pack", "Pty.", "Ltd.", "went", "into", "voluntary", "administration", "."}, new String[]{"The", "Iron", "Age", "-LRB-", "ca.", "1300", "--", "ca.", "300", "BC", "-RRB-", "."}, new String[]{"Indonesian", "shipping", "-"}, new String[]{"Gim", "me", "a", "phone", ",", "I", "'m", "gon", "na", "call", "."}, new String[]{"``", "John", "&", "Mary", "'s", "dog", ",", "''", "Jane", "thought", "-LRB-", LatticeXMLReader.TO_NODE, "herself", "-RRB-", ".", "``", "What", "a", NegraLabel.FEATURE_SEP, "$", "%", "!", "a", "-", "``", "I", "like", "AT&T", "''", ".", "''"}, new String[]{"I", "said", "at", "4:45", "pm", "."}, new String[]{"I", "ca", "n't", "believe", "they", "wan", "na", "keep", TaggerConfig.CLOSED_CLASS_THRESHOLD, "%", "of", "that", ".", "''", "``", "Whatcha", "think", "?", "''", "``", "I", "do", "n't", "--", "think", "so", "...", ",", "''"}, new String[]{"You", "`", "paid", "'", "US$", "170,000", "?!", "You", "should", "'ve", "paid", "only", "$", "16.75", "."}, new String[]{TaggerConfig.NTHREADS, ".", "Buy", "a", "new", "Chevrolet", "-LRB-", "37", "%", "-", "owned", "in", "the", "U.S.", ".", "-RRB-", ".", "15", "%"}, new String[]{"I", "like", "you", ";--RRB-", "but", "do", "you", "care", ":-LRB-", ".", "I", "'m", "happy", "^_^", "but", "shy", "-LRB-x.x-RRB-", "!"}, new String[]{"Diamond", "-LRB-", "``", "Not", "even", "the", "chair", "''", "-RRB-", "lives", "near", "Udaipur", "-LRB-", "84km", "-RRB-", ".", "-LCB-", TaggerConfig.NTHREADS, ".", "A", "potential", "Palmer", "trade", MorphoFeatures.KEY_VAL_DELIM, "-RCB-"}, new String[]{"No", ".", "I", "like", "No.", "24", SsurgeonPattern.PREDICATE_AND_TAG, "no.", "47", "."}, new String[]{"You", "can", "get", "a", "B.S.", SsurgeonPattern.PREDICATE_OR_TAG, "a", "B.", "A.", SsurgeonPattern.PREDICATE_OR_TAG, "a", "Ph.D", "-LRB-", "sometimes", "a", "Ph.", "D", "-RRB-", LatticeXMLReader.FROM_NODE, "Stanford", "."}, new String[]{"@Harry_Styles", "did", "n`t", "like", "Mu`ammar", "al-Qaddafi"}, new String[]{"Kenneth", "liked", "Windows", "3.1", ",", "Windows", "3.x", ",", SsurgeonPattern.PREDICATE_AND_TAG, "Mesa", "A.B", "as", "I", "remember", "things", "."}, new String[]{"I", "like", "programming", "in", "F#", "more", "than", "C#", "."}, new String[]{"NBC", "Live", "will", "be", "available", "free", "through", "the", "Yahoo!", "Chat", "Web", "site", ".", "E!", "Entertainment", "said", "``", "Jeopardy!", "''", "is", "a", "game", "show", "."}, new String[]{"I", "lived", "in", "O'Malley", SsurgeonPattern.PREDICATE_AND_TAG, "read", "OK!", "Magazine", "."}, new String[]{"I", "lived", "in", "O'Malley", SsurgeonPattern.PREDICATE_AND_TAG, "read", "OK!", "Magazine", "."}, new String[]{"I", "like", MorphoFeatures.KEY_VAL_DELIM, "•", "wine", ",", "•", "cheese", ",", "‣", "salami", ",", "&", "⁃", "speck", "."}, new String[]{"I", "do", "n't", "give", "a", "f**k", "about", "your", "sh*tty", "life", "."}, new String[]{"First", LatticeXMLReader.SENTENCE, "...", ".", "Second", LatticeXMLReader.SENTENCE, "."}, new String[]{"First", LatticeXMLReader.SENTENCE, "...", ".", "Second", LatticeXMLReader.SENTENCE, "."}, new String[]{"I", "was", "n't", "really", "...", "well", ",", "what", "I", "mean", "...", "see", "...", "what", "I", "'m", "saying", ",", "the", "thing", "is", "...", "I", "did", "n't", "mean", "it", "."}, new String[]{"This", "is", "a", "url", "test", ".", "Here", "is", "one", MorphoFeatures.KEY_VAL_DELIM, "http://google.com", "."}, new String[]{"This", "is", "a", "url", "test", ".", "Here", "is", "one", MorphoFeatures.KEY_VAL_DELIM, "htvp", MorphoFeatures.KEY_VAL_DELIM, "/", "/", "google.com", "."}, new String[]{"Download", LatticeXMLReader.FROM_NODE, "ftp://myname@host.dom/%2Fetc/motd"}, new String[]{"Download", LatticeXMLReader.FROM_NODE, "svn://user@location.edu/path/to/magic/unicorns"}, new String[]{"Download", LatticeXMLReader.FROM_NODE, "svn+ssh://user@location.edu/path/to/magic/unicorns"}, new String[]{"We", "traveled", LatticeXMLReader.FROM_NODE, "No.", "Korea", LatticeXMLReader.TO_NODE, "So.", "Calif.", "yesterday", "."}, new String[]{"I", "du", "n", "no", "."}, new String[]{"The", "o-kay", "was", "received", "by", "the", "anti-acquisition", "front", "on", "its", "foolishness-filled", "fish", "market", "."}, new String[]{"We", "ran", "the", "pre-tests", "through", "the", "post-scripted", "centrifuge", "."}, new String[]{"School-aged", "parents", "should", "be", "aware", "of", "the", "unique", "problems", "that", "they", "face", "."}, new String[]{"I", "dispute", "Art.", "53", "of", "the", "convention", "."}, new String[]{"I", "like", "Art", ".", "And", "I", "like", "History", "."}};
    private final String[][] ptbGoldSplitHyphenated = {new String[]{"This", "is", "a", LatticeXMLReader.SENTENCE, "."}, new String[]{"U.S.", "insurance", MorphoFeatures.KEY_VAL_DELIM, "Conseco", "acquires", "Kemper", "Corp.", ".", "</HEADLINE>", "<P>", "U.S", "insurance"}, new String[]{"Based", "in", "Eugene", ",", "Ore.", ",", "PakTech", "needs", "a", "new", "distributor", "after", "Sydney", "-", "based", "Creative", "Pack", "Pty.", "Ltd.", "went", "into", "voluntary", "administration", "."}, new String[]{"The", "Iron", "Age", "-LRB-", "ca.", "1300", "--", "ca.", "300", "BC", "-RRB-", "."}, new String[]{"Indonesian", "shipping", "-"}, new String[]{"Gim", "me", "a", "phone", ",", "I", "'m", "gon", "na", "call", "."}, new String[]{"``", "John", "&", "Mary", "'s", "dog", ",", "''", "Jane", "thought", "-LRB-", LatticeXMLReader.TO_NODE, "herself", "-RRB-", ".", "``", "What", "a", NegraLabel.FEATURE_SEP, "$", "%", "!", "a", "-", "``", "I", "like", "AT&T", "''", ".", "''"}, new String[]{"I", "said", "at", "4:45", "pm", "."}, new String[]{"I", "ca", "n't", "believe", "they", "wan", "na", "keep", TaggerConfig.CLOSED_CLASS_THRESHOLD, "%", "of", "that", ".", "''", "``", "Whatcha", "think", "?", "''", "``", "I", "do", "n't", "--", "think", "so", "...", ",", "''"}, new String[]{"You", "`", "paid", "'", "US$", "170,000", "?!", "You", "should", "'ve", "paid", "only", "$", "16.75", "."}, new String[]{TaggerConfig.NTHREADS, ".", "Buy", "a", "new", "Chevrolet", "-LRB-", "37", "%", "-", "owned", "in", "the", "U.S.", ".", "-RRB-", ".", "15", "%"}, new String[]{"I", "like", "you", ";--RRB-", "but", "do", "you", "care", ":-LRB-", ".", "I", "'m", "happy", "^_^", "but", "shy", "-LRB-x.x-RRB-", "!"}, new String[]{"Diamond", "-LRB-", "``", "Not", "even", "the", "chair", "''", "-RRB-", "lives", "near", "Udaipur", "-LRB-", "84km", "-RRB-", ".", "-LCB-", TaggerConfig.NTHREADS, ".", "A", "potential", "Palmer", "trade", MorphoFeatures.KEY_VAL_DELIM, "-RCB-"}, new String[]{"No", ".", "I", "like", "No.", "24", SsurgeonPattern.PREDICATE_AND_TAG, "no.", "47", "."}, new String[]{"You", "can", "get", "a", "B.S.", SsurgeonPattern.PREDICATE_OR_TAG, "a", "B.", "A.", SsurgeonPattern.PREDICATE_OR_TAG, "a", "Ph.D", "-LRB-", "sometimes", "a", "Ph.", "D", "-RRB-", LatticeXMLReader.FROM_NODE, "Stanford", "."}, new String[]{"@Harry_Styles", "did", "n`t", "like", "Mu`ammar", "al", "-", "Qaddafi"}, new String[]{"Kenneth", "liked", "Windows", "3.1", ",", "Windows", "3.x", ",", SsurgeonPattern.PREDICATE_AND_TAG, "Mesa", "A.B", "as", "I", "remember", "things", "."}, new String[]{"I", "like", "programming", "in", "F#", "more", "than", "C#", "."}, new String[]{"NBC", "Live", "will", "be", "available", "free", "through", "the", "Yahoo!", "Chat", "Web", "site", ".", "E!", "Entertainment", "said", "``", "Jeopardy!", "''", "is", "a", "game", "show", "."}, new String[]{"I", "lived", "in", "O'Malley", SsurgeonPattern.PREDICATE_AND_TAG, "read", "OK!", "Magazine", "."}, new String[]{"I", "lived", "in", "O'Malley", SsurgeonPattern.PREDICATE_AND_TAG, "read", "OK!", "Magazine", "."}, new String[]{"I", "like", MorphoFeatures.KEY_VAL_DELIM, "•", "wine", ",", "•", "cheese", ",", "‣", "salami", ",", "&", "⁃", "speck", "."}, new String[]{"I", "do", "n't", "give", "a", "f**k", "about", "your", "sh*tty", "life", "."}, new String[]{"First", LatticeXMLReader.SENTENCE, "...", ".", "Second", LatticeXMLReader.SENTENCE, "."}, new String[]{"First", LatticeXMLReader.SENTENCE, "...", ".", "Second", LatticeXMLReader.SENTENCE, "."}, new String[]{"I", "was", "n't", "really", "...", "well", ",", "what", "I", "mean", "...", "see", "...", "what", "I", "'m", "saying", ",", "the", "thing", "is", "...", "I", "did", "n't", "mean", "it", "."}, new String[]{"This", "is", "a", "url", "test", ".", "Here", "is", "one", MorphoFeatures.KEY_VAL_DELIM, "http://google.com", "."}, new String[]{"This", "is", "a", "url", "test", ".", "Here", "is", "one", MorphoFeatures.KEY_VAL_DELIM, "htvp", MorphoFeatures.KEY_VAL_DELIM, "/", "/", "google.com", "."}, new String[]{"Download", LatticeXMLReader.FROM_NODE, "ftp://myname@host.dom/%2Fetc/motd"}, new String[]{"Download", LatticeXMLReader.FROM_NODE, "svn://user@location.edu/path/to/magic/unicorns"}, new String[]{"Download", LatticeXMLReader.FROM_NODE, "svn+ssh://user@location.edu/path/to/magic/unicorns"}, new String[]{"We", "traveled", LatticeXMLReader.FROM_NODE, "No.", "Korea", LatticeXMLReader.TO_NODE, "So.", "Calif.", "yesterday", "."}, new String[]{"I", "du", "n", "no", "."}, new String[]{"The", "o-kay", "was", "received", "by", "the", "anti-acquisition", "front", "on", "its", "foolishness", "-", "filled", "fish", "market", "."}, new String[]{"We", "ran", "the", "pre-tests", "through", "the", "post-scripted", "centrifuge", "."}, new String[]{"School", "-", "aged", "parents", "should", "be", "aware", "of", "the", "unique", "problems", "that", "they", "face", "."}, new String[]{"I", "dispute", "Art.", "53", "of", "the", "convention", "."}, new String[]{"I", "like", "Art", ".", "And", "I", "like", "History", "."}};
    private final String[] corpInputs = {"So, too, many analysts predict, will Exxon Corp., Chevron Corp. and Amoco Corp.", "So, too, many analysts predict, will Exxon Corp., Chevron Corp. and Amoco Corp.   "};
    private final String[][] corpGold = {new String[]{"So", ",", "too", ",", "many", "analysts", "predict", ",", "will", "Exxon", "Corp.", ",", "Chevron", "Corp.", SsurgeonPattern.PREDICATE_AND_TAG, "Amoco", "Corp", "."}, new String[]{"So", ",", "too", ",", "many", "analysts", "predict", ",", "will", "Exxon", "Corp.", ",", "Chevron", "Corp.", SsurgeonPattern.PREDICATE_AND_TAG, "Amoco", "Corp.", "."}};
    private final String[] sgmlInputs = {"Significant improvements in peak FEV1 were demonstrated with tiotropium/olodaterol 5/2 μg (p\u2009=\u20090.008), 5/5 μg (p\u2009=\u20090.012), and 5/10 μg (p\u2009<\u20090.0001) versus tiotropium monotherapy [51].", "Panasonic brand products are produced by Samsung Electronics Co. Ltd. Sanyo products aren't.", "Oesophageal acid exposure (% time <pH 4) was similar in patients with or without complications (19.2% v 19.3% p>0.05).", "<!DOCTYPE html PUBLIC \"-//W3C//DTD HTML 4.01 Strict//EN\" \"http://www.w3.org/TR/html4/strict.dtd\">", "Hi! <foo bar=\"baz xy = foo !$*) 422\" > <?PITarget PIContent?> <?PITarget PIContent> Hi!", "<?xml version=\"1.0\" encoding=\"UTF-8\" ?>\n<?xml-stylesheet type=\"text/xsl\" href=\"style.xsl\"?>\n<book xml:id=\"simple_book\" xmlns=\"http://docbook.org/ns/docbook\" version=\"5.0\">\n", "<chapter xml:id=\"chapter_1\"><?php echo $a; ?>\n<!-- This is an SGML/XML comment \"Hi!\" -->\n<p> </p> <p-fix / >", "<a href=\"http:\\\\it's\\here\"> <quote orig_author='some \"dude'/> <not sgmltag", "<quote previouspost=\"\n&gt; &gt; I really don't want to process this junk.\n&gt; No one said you did, runny.  What's got you so scared, anyway?-\n\">", "&lt;b...@canada.com&gt; funky@thedismalscience.net <myemail@where.com>", "<DOC> <DOCID> nyt960102.0516 </DOCID><STORYID cat=w pri=u> A0264 </STORYID> <SLUG fv=ttj-z> ", "<!-- copy from here --> <a href=\"http://strategis.gc.ca/epic/internet/inabc-eac.nsf/en/home\"><img src=\"id-images/ad-220x80_01e.jpg\" alt=\"Aboriginal Business Canada:\nOpening New Doors for Your Business\" width=\"220\" height=\"80\" border=\"0\"></a> <!-- copy to here --> Small ABC Graphic Instructions 1.", "We traveled from No.\nKorea to the U.S.A.\nWhy?"};
    private final String[][] sgmlGold = {new String[]{"Significant", "improvements", "in", "peak", "FEV1", "were", "demonstrated", "with", "tiotropium/olodaterol", "5/2", "μg", "-LRB-", "p", AddDep.TUPLE_DELIMITER, "0.008", "-RRB-", ",", "5/5", "μg", "-LRB-", "p", AddDep.TUPLE_DELIMITER, "0.012", "-RRB-", ",", SsurgeonPattern.PREDICATE_AND_TAG, "5/10", "μg", "-LRB-", "p", "<", "0.0001", "-RRB-", "versus", "tiotropium", "monotherapy", "-LSB-", "51", "-RSB-", "."}, new String[]{"Panasonic", "brand", "products", "are", "produced", "by", "Samsung", "Electronics", "Co.", "Ltd.", ".", "Sanyo", "products", "are", "n't", "."}, new String[]{"Oesophageal", "acid", "exposure", "-LRB-", "%", "time", "<", "pH", "4", "-RRB-", "was", "similar", "in", "patients", "with", SsurgeonPattern.PREDICATE_OR_TAG, "without", "complications", "-LRB-", "19.2", "%", "v", "19.3", "%", "p", ">", "0.05", "-RRB-", "."}, new String[]{"<!DOCTYPE html PUBLIC \"-//W3C//DTD HTML 4.01 Strict//EN\" \"http://www.w3.org/TR/html4/strict.dtd\">"}, new String[]{"Hi", "!", "<foo bar=\"baz xy = foo !$*) 422\" >", "<?PITarget PIContent?>", "<?PITarget PIContent>", "Hi", "!"}, new String[]{"<?xml version=\"1.0\" encoding=\"UTF-8\" ?>", "<?xml-stylesheet type=\"text/xsl\" href=\"style.xsl\"?>", "<book xml:id=\"simple_book\" xmlns=\"http://docbook.org/ns/docbook\" version=\"5.0\">"}, new String[]{"<chapter xml:id=\"chapter_1\">", "<?php echo $a; ?>", "<!-- This is an SGML/XML comment \"Hi!\" -->", "<p>", "</p>", "<p-fix / >"}, new String[]{"<a href=\"http:\\\\it's\\here\">", "<quote orig_author='some \"dude'/>", "<", "not", "sgmltag"}, new String[]{"<quote previouspost=\" &gt; &gt; I really don't want to process this junk. &gt; No one said you did, runny.  What's got you so scared, anyway?- \">"}, new String[]{"&lt;b...@canada.com&gt;", "funky@thedismalscience.net", "<myemail@where.com>"}, new String[]{"<DOC>", "<DOCID>", "nyt960102", ".0516", "</DOCID>", "<STORYID cat=w pri=u>", "A0264", "</STORYID>", "<SLUG fv=ttj-z>"}, new String[]{"<!-- copy from here -->", "<a href=\"http://strategis.gc.ca/epic/internet/inabc-eac.nsf/en/home\">", "<img src=\"id-images/ad-220x80_01e.jpg\" alt=\"Aboriginal Business Canada: Opening New Doors for Your Business\" width=\"220\" height=\"80\" border=\"0\">", "</a>", "<!-- copy to here -->", "Small", "ABC", "Graphic", "Instructions", TaggerConfig.NTHREADS, "."}, new String[]{"We", "traveled", LatticeXMLReader.FROM_NODE, "No.", "Korea", LatticeXMLReader.TO_NODE, "the", "U.S.A.", ".", "Why", "?"}};
    private final String[][] sgmlPerLineGold = {new String[]{"Significant", "improvements", "in", "peak", "FEV1", "were", "demonstrated", "with", "tiotropium/olodaterol", "5/2", "μg", "-LRB-", "p", AddDep.TUPLE_DELIMITER, "0.008", "-RRB-", ",", "5/5", "μg", "-LRB-", "p", AddDep.TUPLE_DELIMITER, "0.012", "-RRB-", ",", SsurgeonPattern.PREDICATE_AND_TAG, "5/10", "μg", "-LRB-", "p", "<", "0.0001", "-RRB-", "versus", "tiotropium", "monotherapy", "-LSB-", "51", "-RSB-", "."}, new String[]{"Panasonic", "brand", "products", "are", "produced", "by", "Samsung", "Electronics", "Co.", "Ltd.", ".", "Sanyo", "products", "are", "n't", "."}, new String[]{"Oesophageal", "acid", "exposure", "-LRB-", "%", "time", "<", "pH", "4", "-RRB-", "was", "similar", "in", "patients", "with", SsurgeonPattern.PREDICATE_OR_TAG, "without", "complications", "-LRB-", "19.2", "%", "v", "19.3", "%", "p", ">", "0.05", "-RRB-", "."}, new String[]{"<!DOCTYPE html PUBLIC \"-//W3C//DTD HTML 4.01 Strict//EN\" \"http://www.w3.org/TR/html4/strict.dtd\">"}, new String[]{"Hi", "!", "<foo bar=\"baz xy = foo !$*) 422\" >", "<?PITarget PIContent?>", "<?PITarget PIContent>", "Hi", "!"}, new String[]{"<?xml version=\"1.0\" encoding=\"UTF-8\" ?>", "<?xml-stylesheet type=\"text/xsl\" href=\"style.xsl\"?>", "<book xml:id=\"simple_book\" xmlns=\"http://docbook.org/ns/docbook\" version=\"5.0\">"}, new String[]{"<chapter xml:id=\"chapter_1\">", "<?php echo $a; ?>", "<!-- This is an SGML/XML comment \"Hi!\" -->", "<p>", "</p>", "<p-fix / >"}, new String[]{"<a href=\"http:\\\\it's\\here\">", "<quote orig_author='some \"dude'/>", "<", "not", "sgmltag"}, new String[]{"<", Annotator.STANFORD_QUOTE, "previouspost", AddDep.TUPLE_DELIMITER, "''", ">", ">", "I", "really", "do", "n't", "want", LatticeXMLReader.TO_NODE, "process", "this", "junk", ".", ">", "No", "one", "said", "you", "did", ",", "runny", ".", "What", "'s", "got", "you", "so", "scared", ",", "anyway", "?", "-", "''", ">"}, new String[]{"&lt;b...@canada.com&gt;", "funky@thedismalscience.net", "<myemail@where.com>"}, new String[]{"<DOC>", "<DOCID>", "nyt960102", ".0516", "</DOCID>", "<STORYID cat=w pri=u>", "A0264", "</STORYID>", "<SLUG fv=ttj-z>"}, new String[]{"<!-- copy from here -->", "<a href=\"http://strategis.gc.ca/epic/internet/inabc-eac.nsf/en/home\">", "<", "img", "src", AddDep.TUPLE_DELIMITER, "``", "id-images/ad-220x80_01e.jpg", "''", "alt", AddDep.TUPLE_DELIMITER, "``", "Aboriginal", "Business", "Canada", MorphoFeatures.KEY_VAL_DELIM, "Opening", "New", "Doors", "for", "Your", "Business", "''", "width", AddDep.TUPLE_DELIMITER, "``", "220", "''", "height", AddDep.TUPLE_DELIMITER, "``", "80", "''", "border", AddDep.TUPLE_DELIMITER, "``", PascalTemplate.BACKGROUND_SYMBOL, "''", ">", "</a>", "<!-- copy to here -->", "Small", "ABC", "Graphic", "Instructions", TaggerConfig.NTHREADS, "."}, new String[]{"We", "traveled", LatticeXMLReader.FROM_NODE, "No", ".", "Korea", LatticeXMLReader.TO_NODE, "the", "U.S.A.", "Why", "?"}};
    private final String[] mtInputs = {"Enter an option [?/Current]:{1}", "for example, {1}http://www.autodesk.com{2}, or a path", "enter {3}@{4} at the Of prompt.", "{1}block name={2}", "1202-03-04 5:32:56 2004-03-04T18:32:56", "20°C is 68°F because 0℃ is 32℉", "a.jpg a-b.jpg a.b.jpg a-b.jpg a_b.jpg a-b-c.jpg 0-1-2.jpg a-b/c-d_e.jpg a-b/c-9a9_9a.jpg\n", "#hashtag #Azərbaycanca #mûǁae #Čeština #日本語ハッシュタグ #1 #23 #Trump2016 @3 @acl_2016", "Sect. 793 of the Penal Code"};
    private final String[][] mtGold = {new String[]{"Enter", "an", "option", "-LSB-", "?", "/", "Current", "-RSB-", MorphoFeatures.KEY_VAL_DELIM, "-LCB-", TaggerConfig.NTHREADS, "-RCB-"}, new String[]{"for", "example", ",", "-LCB-", TaggerConfig.NTHREADS, "-RCB-", "http://www.autodesk.com", "-LCB-", TaggerConfig.CUR_WORD_MIN_FEATURE_THRESH, "-RCB-", ",", SsurgeonPattern.PREDICATE_OR_TAG, "a", "path"}, new String[]{"enter", "-LCB-", "3", "-RCB-", "@", "-LCB-", "4", "-RCB-", "at", "the", "Of", "prompt", "."}, new String[]{"-LCB-", TaggerConfig.NTHREADS, "-RCB-", "block", Sieve.NAME, AddDep.TUPLE_DELIMITER, "-LCB-", TaggerConfig.CUR_WORD_MIN_FEATURE_THRESH, "-RCB-"}, new String[]{"1202-03-04", "5:32:56", "2004-03-04T18:32:56"}, new String[]{"20", "°C", "is", "68", "°F", "because", PascalTemplate.BACKGROUND_SYMBOL, "℃", "is", "32", "℉"}, new String[]{"a.jpg", "a-b.jpg", "a.b.jpg", "a-b.jpg", "a_b.jpg", "a-b-c.jpg", "0-1-2.jpg", "a-b/c-d_e.jpg", "a-b/c-9a9_9a.jpg"}, new String[]{"#hashtag", "#Azərbaycanca", "#mûǁae", "#Čeština", "#日本語ハッシュタグ", NegraLabel.FEATURE_SEP, TaggerConfig.NTHREADS, NegraLabel.FEATURE_SEP, "23", "#Trump2016", "@", "3", "@acl_2016"}, new String[]{"Sect.", "793", "of", "the", "Penal", "Code"}};

    @Test
    public void testPTBTokenizerWord() {
        runOnTwoArrays(PTBTokenizer.factory(), this.ptbInputs, this.ptbGold);
    }

    /* JADX WARN: Multi-variable type inference failed */
    @Test
    public void testCorp() {
        for (int i = 0; i < 4; i++) {
            PTBTokenizer pTBTokenizer = new PTBTokenizer(new StringReader(this.corpInputs[i / 2]), new CoreLabelTokenFactory(), i % 2 == 0 ? "strictTreebank3" : "");
            int i2 = 0;
            while (pTBTokenizer.hasNext()) {
                try {
                    Assert.assertEquals("PTBTokenizer problem", this.corpGold[i % 2][i2], ((CoreLabel) pTBTokenizer.next()).word());
                } catch (ArrayIndexOutOfBoundsException e) {
                }
                i2++;
            }
            if (i2 != this.corpGold[i % 2].length) {
                System.out.println("Gold: " + Arrays.toString(this.corpGold[i % 2]));
                System.out.println("Guess: " + SentenceUtils.listToString(new PTBTokenizer(new StringReader(this.corpInputs[i / 2]), new CoreLabelTokenFactory(), i % 2 == 0 ? "strictTreebank3" : "").tokenize()));
                System.out.flush();
            }
            Assert.assertEquals("PTBTokenizer num tokens problem", i2, this.corpGold[i % 2].length);
        }
    }

    @Test
    public void testJacobEisensteinApostropheCase() {
        Assert.assertEquals(PTBTokenizer.newPTBTokenizer(new StringReader("it's")).tokenize(), PTBTokenizer.newPTBTokenizer(new StringReader(" it's ")).tokenize());
    }

    @Test
    public void testUntok() {
        if (!$assertionsDisabled && untokInputs.length != untokOutputs.length) {
            throw new AssertionError();
        }
        for (int i = 0; i < untokInputs.length; i++) {
            Assert.assertEquals("untok gave the wrong result", untokOutputs[i], PTBTokenizer.ptb2Text(untokInputs[i]));
        }
    }

    @Test
    public void testInvertible() {
        List<T> list = PTBTokenizer.newPTBTokenizer(new StringReader("  This     is     a      colourful sentence.    "), false, true).tokenize();
        Assert.assertEquals(6L, list.size());
        Assert.assertEquals("  ", ((CoreLabel) list.get(0)).get(CoreAnnotations.BeforeAnnotation.class));
        Assert.assertEquals("     ", ((CoreLabel) list.get(0)).get(CoreAnnotations.AfterAnnotation.class));
        Assert.assertEquals("Wrong begin char offset", 2L, ((Integer) ((CoreLabel) list.get(0)).get(CoreAnnotations.CharacterOffsetBeginAnnotation.class)).intValue());
        Assert.assertEquals("Wrong end char offset", 6L, ((Integer) ((CoreLabel) list.get(0)).get(CoreAnnotations.CharacterOffsetEndAnnotation.class)).intValue());
        Assert.assertEquals("This", ((CoreLabel) list.get(0)).get(CoreAnnotations.OriginalTextAnnotation.class));
        Assert.assertEquals("     ", ((CoreLabel) list.get(0)).get(CoreAnnotations.AfterAnnotation.class));
        Assert.assertEquals("     ", ((CoreLabel) list.get(1)).get(CoreAnnotations.BeforeAnnotation.class));
        Assert.assertEquals("colourful", ((CoreLabel) list.get(3)).get(CoreAnnotations.TextAnnotation.class));
        Assert.assertEquals("colourful", ((CoreLabel) list.get(3)).get(CoreAnnotations.OriginalTextAnnotation.class));
        Assert.assertEquals("", ((CoreLabel) list.get(4)).after());
        Assert.assertEquals("", ((CoreLabel) list.get(5)).before());
        Assert.assertEquals("    ", ((CoreLabel) list.get(5)).get(CoreAnnotations.AfterAnnotation.class));
        StringBuilder sb = new StringBuilder();
        sb.append((String) ((CoreLabel) list.get(0)).get(CoreAnnotations.BeforeAnnotation.class));
        for (T t : list) {
            sb.append((String) t.get(CoreAnnotations.OriginalTextAnnotation.class));
            String str = (String) t.get(CoreAnnotations.AfterAnnotation.class);
            if (str != null) {
                sb.append(str);
            }
        }
        Assert.assertEquals("  This     is     a      colourful sentence.    ", sb.toString());
        for (int i = 0; i < list.size() - 1; i++) {
            Assert.assertEquals(((CoreLabel) list.get(i)).get(CoreAnnotations.AfterAnnotation.class), ((CoreLabel) list.get(i + 1)).get(CoreAnnotations.BeforeAnnotation.class));
        }
    }

    @Test
    public void testPTBTokenizerSGML() {
        runOnTwoArrays(PTBTokenizer.coreLabelFactory(), this.sgmlInputs, this.sgmlGold);
    }

    @Test
    public void testPTBTokenizerTokenizePerLineSGML() {
        runOnTwoArrays(PTBTokenizer.coreLabelFactory("tokenizePerLine=true"), this.sgmlInputs, this.sgmlPerLineGold);
    }

    @Test
    public void testPTBTokenizerTokenizeSplitHyphens() {
        runOnTwoArrays(PTBTokenizer.coreLabelFactory("splitHyphenated=true"), this.ptbInputs, this.ptbGoldSplitHyphenated);
    }

    /* JADX WARN: Type inference failed for: r0v3, types: [java.lang.String[], java.lang.String[][]] */
    /* JADX WARN: Type inference failed for: r0v5, types: [java.lang.String[], java.lang.String[][]] */
    @Test
    public void testFractions() {
        String[] strArr = {"5-1/4 plus 2 3/16 = 7 7/16 in the U.S.S.R. Why not?"};
        ?? r0 = {new String[]{"5-1/4", "plus", "2 3/16", AddDep.TUPLE_DELIMITER, "7 7/16", "in", "the", "U.S.S.R.", ".", "Why", "not", "?"}};
        ?? r02 = {new String[]{"5-1/4", "plus", TaggerConfig.CUR_WORD_MIN_FEATURE_THRESH, "3/16", AddDep.TUPLE_DELIMITER, "7", "7/16", "in", "the", "U.S.S.R", ".", "Why", "not", "?"}};
        TokenizerFactory<CoreLabel> coreLabelFactory = PTBTokenizer.coreLabelFactory();
        TokenizerFactory<CoreLabel> coreLabelFactory2 = PTBTokenizer.coreLabelFactory("strictTreebank3");
        runOnTwoArrays(coreLabelFactory, strArr, r0);
        runOnTwoArrays(coreLabelFactory2, strArr, r02);
    }

    private static <T extends Label> void runOnTwoArrays(TokenizerFactory<T> tokenizerFactory, String[] strArr, String[][] strArr2) {
        Assert.assertEquals("Test data arrays don't match in length", strArr.length, strArr2.length);
        for (int i = 0; i < strArr.length; i++) {
            Tokenizer<T> tokenizer = tokenizerFactory.getTokenizer(new StringReader(strArr[i]));
            int i2 = 0;
            while (true) {
                if (tokenizer.hasNext() || i2 < strArr2[i].length) {
                    if (!tokenizer.hasNext()) {
                        Assert.fail("PTBTokenizer generated too few tokens for sentence " + i + "! Missing " + strArr2[i][i2]);
                    }
                    T next = tokenizer.next();
                    if (i2 >= strArr2[i].length) {
                        Assert.fail("PTBTokenizer generated too many tokens for sentence " + i + "! Added " + next.value());
                    } else {
                        Assert.assertEquals("PTBTokenizer got wrong token", strArr2[i][i2], next.value());
                    }
                    i2++;
                }
            }
        }
    }

    /* JADX WARN: Type inference failed for: r0v3, types: [java.lang.String[], java.lang.String[][]] */
    @Test
    public void testPTBTokenizerGerman() {
        runOnTwoArrays(new NegraPennLanguagePack().getTokenizerFactory(), new String[]{"Das TV-Duell von Kanzlerin Merkel und SPD-Herausforderer Steinbrück war eher lahm - können es die Spitzenleute der kleinen Parteien besser? ", "Die erquickende Sicherheit und Festigkeit in der Bewegung, den Vorrat von Kraft, kann ja die Versammlung nicht fühlen, hören will sie sie nicht, also muß sie sie sehen; und die sehe man einmal in einem Paar spitzen Schultern, zylindrischen Schenkeln, oder leeren Ärmeln, oder lattenförmigen Beinen."}, new String[]{new String[]{"Das", "TV-Duell", "von", "Kanzlerin", "Merkel", "und", "SPD-Herausforderer", "Steinbrück", "war", "eher", "lahm", "-", "können", "es", "die", "Spitzenleute", "der", "kleinen", "Parteien", "besser", "?"}, new String[]{"Die", "erquickende", "Sicherheit", "und", "Festigkeit", "in", "der", "Bewegung", ",", "den", "Vorrat", "von", "Kraft", ",", "kann", "ja", "die", "Versammlung", "nicht", "fühlen", ",", "hören", "will", "sie", "sie", "nicht", ",", "also", "muß", "sie", "sie", "sehen", ";", "und", "die", "sehe", "man", "einmal", "in", "einem", "Paar", "spitzen", "Schultern", ",", "zylindrischen", "Schenkeln", ",", "oder", "leeren", "Ärmeln", ",", "oder", "lattenförmigen", "Beinen", "."}});
    }

    @Test
    public void testPTBTokenizerMT() {
        runOnTwoArrays(PTBTokenizer.factory(), this.mtInputs, this.mtGold);
    }

    static {
        $assertionsDisabled = !PTBTokenizerTest.class.desiredAssertionStatus();
        untokInputs = new String[]{"London - AFP reported junk .", "Paris - Reuters reported news .", "Sydney - News said - something .", "HEADLINE - New Android phone !", "I did it 'cause I wanted to , and you 'n' me know that .", "He said that `` Luxembourg needs surface - to - air missiles . ''"};
        untokOutputs = new String[]{"London - AFP reported junk.", "Paris - Reuters reported news.", "Sydney - News said - something.", "HEADLINE - New Android phone!", "I did it 'cause I wanted to, and you 'n' me know that.", "He said that \"Luxembourg needs surface-to-air missiles.\""};
    }
}
