public class ChineseTreebankLanguagePack extends AbstractTreebankLanguagePack
Modifier and Type | Field and Description |
---|---|
static String |
ENCODING |
DEFAULT_ENCODING, DEFAULT_GF_CHAR, gfCharacter
Constructor and Description |
---|
ChineseTreebankLanguagePack() |
Modifier and Type | Method and Description |
---|---|
static java.util.function.Predicate<String> |
chineseColonAcceptFilter() |
static java.util.function.Predicate<String> |
chineseCommaAcceptFilter() |
static java.util.function.Predicate<String> |
chineseDashAcceptFilter() |
static java.util.function.Predicate<String> |
chineseDouHaoAcceptFilter() |
static java.util.function.Predicate<String> |
chineseEndSentenceAcceptFilter() |
static java.util.function.Predicate<String> |
chineseLeftParenthesisAcceptFilter() |
static java.util.function.Predicate<String> |
chineseLeftQuoteMarkAcceptFilter() |
static java.util.function.Predicate<String> |
chineseOtherAcceptFilter() |
static java.util.function.Predicate<String> |
chineseParenthesisAcceptFilter() |
static java.util.function.Predicate<String> |
chineseQuoteMarkAcceptFilter() |
static java.util.function.Predicate<String> |
chineseRightParenthesisAcceptFilter() |
static java.util.function.Predicate<String> |
chineseRightQuoteMarkAcceptFilter() |
String |
getEncoding()
Return the input Charset encoding for the Treebank.
|
TokenizerFactory<? extends HasWord> |
getTokenizerFactory()
Return a tokenizer which might be suitable for tokenizing text that
will be used with this Treebank/Language pair, without tokenizing carriage returns (i.e., treating them as white space).
|
GrammaticalStructureFactory |
grammaticalStructureFactory()
Return a GrammaticalStructureFactory suitable for this language/treebank.
|
GrammaticalStructureFactory |
grammaticalStructureFactory(java.util.function.Predicate<String> puncFilt)
Return a GrammaticalStructureFactory suitable for this language/treebank.
|
GrammaticalStructureFactory |
grammaticalStructureFactory(java.util.function.Predicate<String> puncFilt,
HeadFinder hf)
Return a GrammaticalStructureFactory suitable for this language/treebank.
|
HeadFinder |
headFinder()
The HeadFinder to use for your treebank.
|
boolean |
isEvalBIgnoredPunctuationTag(String str)
Accepts a String that is a punctuation
tag that should be ignored by EVALB-style evaluation,
and rejects everything else.
|
boolean |
isPunctuationTag(String str)
Accepts a String that is a punctuation
tag name, and rejects everything else.
|
boolean |
isPunctuationWord(String str)
Accepts a String that is a punctuation
word, and rejects everything else.
|
boolean |
isSentenceFinalPunctuationTag(String str)
Accepts a String that is a sentence end
punctuation tag, and rejects everything else.
|
char[] |
labelAnnotationIntroducingCharacters()
Return an array of characters at which a String should be
truncated to give the basic syntactic category of a label.
|
String[] |
punctuationTags()
Returns a String array of punctuation tags for this treebank/language.
|
String[] |
punctuationWords()
Returns a String array of punctuation words for this treebank/language.
|
String[] |
sentenceFinalPunctuationTags()
Returns a String array of sentence final punctuation tags for this
treebank/language.
|
String[] |
sentenceFinalPunctuationWords()
Returns a String array of sentence final punctuation words for this
treebank/language.
|
void |
setTokenizerFactory(TokenizerFactory<? extends HasWord> tf) |
String[] |
startSymbols()
Returns a String array of treebank start symbols.
|
boolean |
supportsGrammaticalStructures()
Whether or not we have typed dependencies for this language.
|
String |
treebankFileExtension()
Returns the extension of treebank files for this treebank.
|
TreeReaderFactory |
treeReaderFactory()
Returns a TreeReaderFactory suitable for general purpose use
with this language/treebank.
|
HeadFinder |
typedDependencyHeadFinder()
The HeadFinder to use when making typed dependencies.
|
basicCategory, categoryAndFunction, evalBIgnoredPunctuationTagAcceptFilter, evalBIgnoredPunctuationTagRejectFilter, evalBIgnoredPunctuationTags, getBasicCategoryFunction, getCategoryAndFunctionFunction, getGfCharacter, isLabelAnnotationIntroducingCharacter, isStartSymbol, morphFeatureSpec, punctuationTagAcceptFilter, punctuationTagRejectFilter, punctuationWordAcceptFilter, punctuationWordRejectFilter, sentenceFinalPunctuationTagAcceptFilter, setGfCharacter, startSymbol, startSymbolAcceptFilter, stripGF, treeTokenizerFactory
public static final String ENCODING
public void setTokenizerFactory(TokenizerFactory<? extends HasWord> tf)
public TokenizerFactory<? extends HasWord> getTokenizerFactory()
AbstractTreebankLanguagePack
WhitespaceTokenizer
.getTokenizerFactory
in interface TreebankLanguagePack
getTokenizerFactory
in class AbstractTreebankLanguagePack
public String getEncoding()
Charset
class.getEncoding
in interface TreebankLanguagePack
getEncoding
in class AbstractTreebankLanguagePack
public boolean isPunctuationTag(String str)
isPunctuationTag
in interface TreebankLanguagePack
isPunctuationTag
in class AbstractTreebankLanguagePack
str
- The string to checkpublic boolean isPunctuationWord(String str)
isPunctuationWord
in interface TreebankLanguagePack
isPunctuationWord
in class AbstractTreebankLanguagePack
str
- The string to checkpublic boolean isSentenceFinalPunctuationTag(String str)
isSentenceFinalPunctuationTag
in interface TreebankLanguagePack
isSentenceFinalPunctuationTag
in class AbstractTreebankLanguagePack
str
- The string to checkpublic String[] punctuationTags()
punctuationTags
in interface TreebankLanguagePack
punctuationTags
in class AbstractTreebankLanguagePack
public String[] punctuationWords()
punctuationWords
in interface TreebankLanguagePack
punctuationWords
in class AbstractTreebankLanguagePack
public String[] sentenceFinalPunctuationTags()
sentenceFinalPunctuationTags
in interface TreebankLanguagePack
sentenceFinalPunctuationTags
in class AbstractTreebankLanguagePack
public String[] sentenceFinalPunctuationWords()
public boolean isEvalBIgnoredPunctuationTag(String str)
isEvalBIgnoredPunctuationTag
in interface TreebankLanguagePack
isEvalBIgnoredPunctuationTag
in class AbstractTreebankLanguagePack
str
- The string to checkpublic char[] labelAnnotationIntroducingCharacters()
labelAnnotationIntroducingCharacters
in interface TreebankLanguagePack
labelAnnotationIntroducingCharacters
in class AbstractTreebankLanguagePack
public String[] startSymbols()
startSymbols
in interface TreebankLanguagePack
startSymbols
in class AbstractTreebankLanguagePack
public static java.util.function.Predicate<String> chineseCommaAcceptFilter()
public static java.util.function.Predicate<String> chineseEndSentenceAcceptFilter()
public static java.util.function.Predicate<String> chineseDouHaoAcceptFilter()
public static java.util.function.Predicate<String> chineseQuoteMarkAcceptFilter()
public static java.util.function.Predicate<String> chineseParenthesisAcceptFilter()
public static java.util.function.Predicate<String> chineseColonAcceptFilter()
public static java.util.function.Predicate<String> chineseDashAcceptFilter()
public static java.util.function.Predicate<String> chineseOtherAcceptFilter()
public static java.util.function.Predicate<String> chineseLeftParenthesisAcceptFilter()
public static java.util.function.Predicate<String> chineseRightParenthesisAcceptFilter()
public static java.util.function.Predicate<String> chineseLeftQuoteMarkAcceptFilter()
public static java.util.function.Predicate<String> chineseRightQuoteMarkAcceptFilter()
public String treebankFileExtension()
public GrammaticalStructureFactory grammaticalStructureFactory()
AbstractTreebankLanguagePack
grammaticalStructureFactory
in interface TreebankLanguagePack
grammaticalStructureFactory
in class AbstractTreebankLanguagePack
public GrammaticalStructureFactory grammaticalStructureFactory(java.util.function.Predicate<String> puncFilt)
AbstractTreebankLanguagePack
grammaticalStructureFactory
in interface TreebankLanguagePack
grammaticalStructureFactory
in class AbstractTreebankLanguagePack
puncFilt
- A filter which should reject punctuation words (as Strings)public GrammaticalStructureFactory grammaticalStructureFactory(java.util.function.Predicate<String> puncFilt, HeadFinder hf)
AbstractTreebankLanguagePack
grammaticalStructureFactory
in interface TreebankLanguagePack
grammaticalStructureFactory
in class AbstractTreebankLanguagePack
puncFilt
- A filter which should reject punctuation words (as Strings)hf
- A HeadFinder which finds heads for typed dependenciespublic boolean supportsGrammaticalStructures()
TreebankLanguagePack
supportsGrammaticalStructures
in interface TreebankLanguagePack
supportsGrammaticalStructures
in class AbstractTreebankLanguagePack
public TreeReaderFactory treeReaderFactory()
AbstractTreebankLanguagePack
treeReaderFactory
in interface TreebankLanguagePack
treeReaderFactory
in class AbstractTreebankLanguagePack
public HeadFinder headFinder()
public HeadFinder typedDependencyHeadFinder()