|
|||||||||
PREV CLASS NEXT CLASS | FRAMES NO FRAMES | ||||||||
SUMMARY: NESTED | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD |
java.lang.Objectedu.stanford.nlp.parser.lexparser.ChineseCharacterBasedLexicon
public class ChineseCharacterBasedLexicon
Field Summary | |
---|---|
protected static java.text.NumberFormat |
formatter
|
protected static java.io.PrintWriter |
pw
|
Fields inherited from interface edu.stanford.nlp.parser.lexparser.Lexicon |
---|
BOUNDARY, BOUNDARY_TAG, UNKNOWN_WORD |
Constructor Summary | |
---|---|
ChineseCharacterBasedLexicon()
|
Method Summary | |
---|---|
Distribution<java.lang.String> |
getPOSDistribution()
|
UnknownWordModel |
getUnknownWordModel()
|
static boolean |
isForeign(java.lang.String s)
|
boolean |
isKnown(int word)
Checks whether a word is in the lexicon. |
boolean |
isKnown(java.lang.String word)
Checks whether a word is in the lexicon. |
static void |
main(java.lang.String[] args)
|
int |
numRules()
Returns the number of rules (tag rewrites as word) in the Lexicon. |
static void |
printStats(java.util.Collection<Tree> trees)
|
void |
readData(java.io.BufferedReader in)
Read the lexicon from the BufferedReader in the format written by writeData. |
java.util.Iterator<IntTaggedWord> |
ruleIteratorByWord(int word,
int loc,
java.lang.String featureSpec)
Get an iterator over all rules (pairs of (word, POS)) for this word. |
java.lang.String |
sampleFrom()
Samples over words regardless of POS: first samples POS, then samples word according to that POS |
java.lang.String |
sampleFrom(java.lang.String tag)
Samples from the distribution over words with this POS according to the lexicon. |
float |
score(IntTaggedWord iTW,
int loc)
Get the score of this word with this tag (as an IntTaggedWord) at this loc. |
void |
setUnknownWordModel(UnknownWordModel uwm)
|
void |
train(java.util.Collection<Tree> trees)
Trains this lexicon on the Collection of trees. |
void |
tune(java.util.List trees)
|
void |
writeData(java.io.Writer w)
Write the lexicon in human-readable format to the Writer. |
Methods inherited from class java.lang.Object |
---|
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait |
Field Detail |
---|
protected static java.io.PrintWriter pw
protected static final java.text.NumberFormat formatter
Constructor Detail |
---|
public ChineseCharacterBasedLexicon()
Method Detail |
---|
public static void printStats(java.util.Collection<Tree> trees)
public void train(java.util.Collection<Tree> trees)
Lexicon
train
in interface Lexicon
trees
- Trees to train onpublic Distribution<java.lang.String> getPOSDistribution()
public static boolean isForeign(java.lang.String s)
public float score(IntTaggedWord iTW, int loc)
Lexicon
score
in interface Lexicon
iTW
- An IntTaggedWord pairing a word and POS tagloc
- The position in the sentence. In the default implementation
this is used only for unknown words to change their
probability distribution when sentence initial.
public java.lang.String sampleFrom(java.lang.String tag)
tag
- the POS of the word to sample
public java.lang.String sampleFrom()
public java.util.Iterator<IntTaggedWord> ruleIteratorByWord(int word, int loc, java.lang.String featureSpec)
Lexicon
ruleIteratorByWord
in interface Lexicon
word
- The word, represented as an integer in Numbererloc
- The position of the word in the sentence (counting from 0).
Implementation note: The BaseLexicon class doesn't
actually make use of this position information.featureSpec
- Additional word features like morphosyntactic information.
tag -> word rule.)
public int numRules()
numRules
in interface Lexicon
public void tune(java.util.List trees)
public static void main(java.lang.String[] args) throws java.io.IOException
java.io.IOException
public void readData(java.io.BufferedReader in) throws java.io.IOException
Lexicon
readData
in interface Lexicon
in
- The BufferedReader to read from
java.io.IOException
- If any I/O problempublic void writeData(java.io.Writer w) throws java.io.IOException
Lexicon
writeData
in interface Lexicon
w
- The writer to output to
java.io.IOException
- If any I/O problempublic boolean isKnown(int word)
Lexicon
isKnown
in interface Lexicon
word
- The word as an int
public boolean isKnown(java.lang.String word)
Lexicon
isKnown
in interface Lexicon
word
- The word as a String
public UnknownWordModel getUnknownWordModel()
getUnknownWordModel
in interface Lexicon
public void setUnknownWordModel(UnknownWordModel uwm)
setUnknownWordModel
in interface Lexicon
|
|||||||||
PREV CLASS NEXT CLASS | FRAMES NO FRAMES | ||||||||
SUMMARY: NESTED | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD |