public class ChineseDictionary
extends java.lang.Object
Sighan2005DocumentReaderAndWriter.addDictionaryFeatures(edu.stanford.nlp.wordseg.ChineseDictionary, java.lang.Class<? extends edu.stanford.nlp.ling.CoreAnnotation<java.lang.String>>, java.lang.Class<? extends edu.stanford.nlp.ling.CoreAnnotation<java.lang.String>>, java.lang.Class<? extends edu.stanford.nlp.ling.CoreAnnotation<java.lang.String>>, java.util.List<edu.stanford.nlp.ling.CoreLabel>)
.Modifier and Type | Field and Description |
---|---|
static int |
MAX_LEXICON_LENGTH |
Constructor and Description |
---|
ChineseDictionary(java.lang.String dict) |
ChineseDictionary(java.lang.String[] dicts) |
ChineseDictionary(java.lang.String[] dicts,
ChineseDocumentToSentenceProcessor cdtos) |
ChineseDictionary(java.lang.String[] dicts,
ChineseDocumentToSentenceProcessor cdtos,
boolean expandMidDot) |
ChineseDictionary(java.lang.String serDicts,
ChineseDocumentToSentenceProcessor cdtos,
boolean expandMidDot)
The first argument can be one file path, or multiple files separated by
commas.
|
Modifier and Type | Method and Description |
---|---|
boolean |
contains(java.lang.String word) |
static void |
main(java.lang.String[] args)
This program creates or expands a Chinese dictionary, primarily
for use in the CRF segmentation tool.
|
public static final int MAX_LEXICON_LENGTH
public ChineseDictionary(java.lang.String dict)
public ChineseDictionary(java.lang.String[] dicts)
public ChineseDictionary(java.lang.String[] dicts, ChineseDocumentToSentenceProcessor cdtos)
public ChineseDictionary(java.lang.String serDicts, ChineseDocumentToSentenceProcessor cdtos, boolean expandMidDot)
public ChineseDictionary(java.lang.String[] dicts, ChineseDocumentToSentenceProcessor cdtos, boolean expandMidDot)
public boolean contains(java.lang.String word)
public static void main(java.lang.String[] args)
java edu.stanford.nlp.wordseg.ChineseDictionary -inputDicts foo.txt,bar.txt -output mydict.ser.gz
java edu.stanford.nlp.wordseg.ChineseDictionary -inputDicts edu/stanford/nlp/models/segmenter/chinese/dict-chris6.ser.gz,new_words.txt -output mydict.ser.gz