Serialized Form


Package edu.stanford.nlp.international.morph

Class edu.stanford.nlp.international.morph.MorphoFeatures extends Object implements Serializable

serialVersionUID: -3893316324305154940L

Serialized Fields

fSpec

Map<K,V> fSpec

altTag

String altTag

Class edu.stanford.nlp.international.morph.MorphoFeatureSpecification extends Object implements Serializable

serialVersionUID: -5720683653931585664L

Serialized Fields

activeFeatures

Set<E> activeFeatures

Package edu.stanford.nlp.io

Class edu.stanford.nlp.io.RuntimeIOException extends RuntimeException implements Serializable

serialVersionUID: -8572218999165094626L


Package edu.stanford.nlp.ling

Class edu.stanford.nlp.ling.CategoryWordTag extends StringLabel implements Serializable

serialVersionUID: -745085381666943254L

Serialized Fields

word

String word

tag

String tag

Class edu.stanford.nlp.ling.CoreLabel extends ArrayCoreMap implements Serializable

serialVersionUID: 2L

Class edu.stanford.nlp.ling.CyclicCoreLabel extends CoreLabel implements Serializable

serialVersionUID: 1L

Class edu.stanford.nlp.ling.LabeledWord extends Word implements Serializable

serialVersionUID: -7252006452127051085L

Serialized Fields

tag

Label tag

Class edu.stanford.nlp.ling.StringLabel extends ValueLabel implements Serializable

serialVersionUID: -4153619273767524247L

Serialized Fields

str

String str

beginPosition

int beginPosition
Start position of the word in the original input string


endPosition

int endPosition
End position of the word in the original input string

Class edu.stanford.nlp.ling.TaggedWord extends Word implements Serializable

serialVersionUID: -7252006452127051085L

Serialized Fields

tag

String tag

Class edu.stanford.nlp.ling.ValueLabel extends Object implements Serializable

serialVersionUID: -1413303679077285530L

Class edu.stanford.nlp.ling.Word extends StringLabel implements Serializable

serialVersionUID: -4817252915997034058L

Class edu.stanford.nlp.ling.WordLemmaTag extends Object implements Serializable

serialVersionUID: -5993410244163988138L

Serialized Fields

word

String word

lemma

String lemma

tag

String tag

Class edu.stanford.nlp.ling.WordTag extends Object implements Serializable

serialVersionUID: -1859527239216813742L

Serialized Fields

word

String word

tag

String tag

Package edu.stanford.nlp.objectbank

Class edu.stanford.nlp.objectbank.LineIterator.LineIteratorFactory extends Object implements Serializable

serialVersionUID: 1L

Serialized Fields

oper

Function<T1,T2> oper

Class edu.stanford.nlp.objectbank.ObjectBank extends Object implements Serializable

serialVersionUID: -4030295596701541770L

Serialized Fields

rif

ReaderIteratorFactory rif

ifrf

IteratorFromReaderFactory<T> ifrf

contents

List<E> contents

keepInMemory

boolean keepInMemory

Package edu.stanford.nlp.optimization

Class edu.stanford.nlp.optimization.QNMinimizer.SurpriseConvergence extends Throwable implements Serializable

serialVersionUID: 4290178321643529559L


Package edu.stanford.nlp.parser.lexparser

Class edu.stanford.nlp.parser.lexparser.AbstractDependencyGrammar extends Object implements Serializable

serialVersionUID: 3L

Serialization Methods

readObject

private void readObject(ObjectInputStream ois)
                 throws IOException,
                        ClassNotFoundException
Throws:
IOException
ClassNotFoundException
Serialized Fields

tagProjection

TagProjection tagProjection

tagIndex

Index<E> tagIndex

wordIndex

Index<E> wordIndex

numTagBins

int numTagBins

tagBin

int[] tagBin

tlp

TreebankLanguagePack tlp

directional

boolean directional

useDistance

boolean useDistance

useCoarseDistance

boolean useCoarseDistance

lex

Lexicon lex

stopTW

IntTaggedWord stopTW

wildTW

IntTaggedWord wildTW

coarseDistanceBins

int[] coarseDistanceBins

regDistanceBins

int[] regDistanceBins

op

Options op

Class edu.stanford.nlp.parser.lexparser.AbstractTreebankParserParams extends Object implements Serializable

serialVersionUID: 4299501909017975915L

Serialized Fields

evalGF

boolean evalGF
If true, then evaluation is over grammatical functions as well as the labels If false, then grammatical functions are stripped for evaluation. This really only makes sense if you've trained with grammatical functions but want to evaluate without them.


inputEncoding

String inputEncoding

outputEncoding

String outputEncoding

tlp

TreebankLanguagePack tlp

Class edu.stanford.nlp.parser.lexparser.AbstractTreebankParserParams.AnnotatePunctuationFunction extends Object implements Serializable

serialVersionUID: 1L

Serialized Fields

key

String key

annotationMark

String annotationMark

Class edu.stanford.nlp.parser.lexparser.BaseLexicon extends Object implements Serializable

serialVersionUID: 40L

Serialization Methods

readObject

private void readObject(ObjectInputStream ois)
                 throws IOException,
                        ClassNotFoundException
Throws:
IOException
ClassNotFoundException
Serialized Fields

uwModel

UnknownWordModel uwModel

trainOptions

TrainOptions trainOptions

testOptions

TestOptions testOptions

smoothInUnknownsThreshold

int smoothInUnknownsThreshold
If a word has been seen more than this many times, then relative frequencies of tags are used for POS assignment; if not, they are smoothed with tag priors.


smartMutation

boolean smartMutation
Have tags changeable based on statistics on word types having various taggings.


wordIndex

Index<E> wordIndex

tagIndex

Index<E> tagIndex

seenCounter

ClassicCounter<E> seenCounter
Records the number of times word/tag pair was seen in training data. Includes word/tag pairs where one is a wildcard not a real word/tag.


smooth

double[] smooth

flexiTag

boolean flexiTag

useSignatureForKnownSmoothing

boolean useSignatureForKnownSmoothing

tagsToBaseTags

int[] tagsToBaseTags

Class edu.stanford.nlp.parser.lexparser.EnglishTreebankParserParams extends AbstractTreebankParserParams implements Serializable

serialVersionUID: 4153878351331522581L

Serialized Fields

headFinder

HeadFinder headFinder

englishTrain

EnglishTreebankParserParams.EnglishTrain englishTrain

englishTest

EnglishTreebankParserParams.EnglishTest englishTest

Class edu.stanford.nlp.parser.lexparser.EnglishTreebankParserParams.EnglishTest extends Object implements Serializable

serialVersionUID: 183157656745674521L

Serialized Fields

retainNPTMPSubcategories

boolean retainNPTMPSubcategories

retainTMPSubcategories

boolean retainTMPSubcategories

retainADVSubcategories

boolean retainADVSubcategories

makeCopulaHead

boolean makeCopulaHead

Class edu.stanford.nlp.parser.lexparser.EnglishTreebankParserParams.EnglishTrain extends Object implements Serializable

serialVersionUID: 1831576434872643L

Serialized Fields

leaveItAll

int leaveItAll
if true, leave all PTB (functional tag) annotations (bad)


splitIN

int splitIN
Annotate prepositions into subcategories. Values: 0 = no annotation 1 = IN with a ^S.* parent (putative subordinating conjunctions) marked differently from others (real prepositions). OK. 2 = Annotate IN prepositions 3 ways: ^S.* parent, ^N.* parent or rest (generally predicative ADJP, VP). Better than sIN=1. Good. 3 = Annotate prepositions 6 ways: real feature engineering. Great. 4 = Refinement of 3: allows -SC under SINV, WHADVP for -T and no -SCC if the parent is an NP. 5 = Like 4 but maps TO to IN in a "nominal" (N*, P*, A*) context. 6 = 4, but mark V/A complement and leave noun ones unmarked instead.


splitQuotes

boolean splitQuotes
Mark quote marks for single vs. double so don't get mismatched ones.


splitSFP

boolean splitSFP
Separate out sentence final punct. (. ! ?). Doesn't help.


splitPercent

boolean splitPercent
Mark the nouns that are percent signs. Slightly good.


splitNPpercent

int splitNPpercent
Mark phrases that are headed by %. A value of 0 = do nothing, 1 = only NP, 2 = NP and ADJP, 3 = NP, ADJP and QP, 4 = any phrase.


tagRBGPA

boolean tagRBGPA
Grand parent annotate RB to try to distinguish sentential ones and ones in places like NP post modifier (things like 'very' are already distinguished as their parent is ADJP).


splitNNP

int splitNNP
Mark NNP words as to position in phrase (single, left, right, inside) or subcategorizes NNP(S) as initials or initial/final in NP.


joinPound

boolean joinPound
Join pound with dollar.


joinJJ

boolean joinJJ
Joint comparative and superlative adjective with positive.


joinNounTags

boolean joinNounTags
Join proper nouns with common nouns. This isn't to improve performance, but because Genia doesn't use proper noun tags in general.


splitPPJJ

boolean splitPPJJ
A special test for "such" mainly ("such as Fred"). A wash, so omit


splitTRJJ

boolean splitTRJJ
Put a special tag on 'transitive adjectives' with NP complement, like 'due May 15' -- it also catches 'such' in 'such as NP', which may be a good. Matches 658 times in 2-21 training corpus. Wash.


splitJJCOMP

boolean splitJJCOMP
Put a special tag on 'adjectives with complements'. This acts as a general subcat feature for adjectives.


splitMoreLess

boolean splitMoreLess
Specially mark the comparative/superlative words: less, least, more, most


unaryDT

boolean unaryDT
Mark "Intransitive" DT. Good.


unaryRB

boolean unaryRB
Mark "Intransitive" RB. Good.


unaryPRP

boolean unaryPRP
"Intransitive" PRP. Wash -- basically a no-op really.


markReflexivePRP

boolean markReflexivePRP
Mark reflexivie PRP words.


unaryIN

boolean unaryIN
Mark "Intransitive" IN. Minutely negative.


splitCC

int splitCC
Provide annotation of conjunctions. Gives modest gains (numbers shown F1 increase with respect to goodPCFG in June 2005). A value of 1 annotates both "and" and "or" as "CC-C" (+0.29%), 2 annotates "but" and "&" separately (+0.17%), 3 annotates just "and" (equalsIgnoreCase) (+0.11%), 0 annotates nothing (+0.00%).


splitNOT

boolean splitNOT
Annotates forms of "not" specially as tag "NOT". BAD


splitRB

boolean splitRB
Split modifier (NP, AdjP) adverbs from others. This does nothing if you're already doing tagPA.


splitAux

int splitAux
Make special tags for forms of BE and HAVE (and maybe DO/HELP, etc.). A value of 0 is do nothing. A value of 1 is the basic form. Positive PCFG effect, but neutral to negative in Factored, and impossible if you use gPA. A value of 2 adds in "s" = "'s" and delves further to disambiguate "'s" as BE or HAVE. Theoretically good, but no practical gains. A value of 3 adds DO. A value of 4 adds HELP (which also takes VB form complement) as DO. A value of 5 adds LET (which also takes VB form complement) as DO. A value of 6 adds MAKE (which also takes VB form complement) as DO. A value of 7 adds WATCH, SEE (which also take VB form complement) as DO. A value of 8 adds come, go, but not inflections (which colloquially can take a VB form complement) as DO. A value of 9 adds GET as BE. Differences are small. You get about 0.3 F1 by doing something; the best appear to be 2 or 3 for sentence exact and 7 or 8 for LP/LR F1.


vpSubCat

boolean vpSubCat
Pitiful attempt at marking V* preterms with their surface subcat frames. Bad so far.


markDitransV

int markDitransV
Attempt to record ditransitive verbs. The value 0 means do nothing; 1 records two or more NP or S* arguments, and 2 means to only record two or more NP arguments (that aren't NP-TMP). 1 gave neutral to bad results.


splitVP

int splitVP
Add (head) tags to VPs. An argument of 0 = no head-subcategorization of VPs, 1 = add head tags (anything, as given by HeadFinder), 2 = add head tags, but collapse finite verb tags (VBP, VBD, VBZ, MD) together, 3 = only annotate verbal tags, and collapse finite verb tags (annotation is VBF, TO, VBG, VBN, VB, or zero), 4 = only split on categories of VBF, TO, VBG, VBN, VB, and map cases that are not headed by a verbal category to an appropriate category based on word suffix (ing, d, t, s, to) or to VB otherwise. We usually use a value of 3; 2 or 3 is much better than 0. See also splitVPNPAgr. If it is true, its effects override any value set for this parameter.


splitVPNPAgr

boolean splitVPNPAgr
Put enough marking on VP and NP to permit "agreement".


splitSTag

int splitSTag
Mark S/SINV/SQ nodes according to verbal tag. Meanings are: 0 = no subcategorization. 1 = mark with head tag 2 = mark only -VBF if VBZ/VBD/VBP/MD tag 3 = as 2 and mark -VBNF if TO/VBG/VBN/VB 4 = as 2 but only mark S not SINV/SQ 5 = as 3 but only mark S not SINV/SQ Previously seen as bad. Option 4 might be promising now.


markContainedVP

boolean markContainedVP

splitNPPRP

boolean splitNPPRP

dominatesV

int dominatesV
Verbal distance -- mark whether symbol dominates a verb (V*, MD). Very good.


dominatesI

boolean dominatesI
Verbal distance -- mark whether symbol dominates a preposition (IN)


dominatesC

boolean dominatesC
Verbal distance -- mark whether symbol dominates a conjunction (CC)


markCC

int markCC
Mark phrases which are conjunctions. 0 = No marking 1 = Any phrase with a CC daughter that isn't first or last. Possibly marginally positive. 2 = As 0 but also a non-marginal CONJP daughter. In principle good, but no gains. 3 = More like Charniak. Not yet implemented. Need to annotate _before_ annotate children! np or vp with two or more np/vp children, a comma, cc or conjp, and nothing else.


splitSGapped

int splitSGapped
Mark specially S nodes with "gapped" subject (control, raising). 1 is basic version. 2 is better mark S nodes with "gapped" subject. 3 seems best on small training set, but all of these are too similar; 4 can't be differentiated. 5 is done on tree before empty splitting. (Bad!?)


splitNumNP

boolean splitNumNP
Mark "numeric NPs". Probably bad?


splitPoss

int splitPoss
Give a special tag to NPs which are possessive NPs (end in 's). A value of 0 means do nothing, 1 means tagging possessive NPs with "-P", 2 means restructure possessive NPs so that they introduce a POSSP node that takes as children the POS and a regularly structured NP. I.e., recover standard good linguistic practice circa 1985. This seems a good idea, but is almost a no-op (modulo fine points of markovization), since the previous NP-P phrase already uniquely captured what is now a POSSP.


splitBaseNP

int splitBaseNP
Mark base NPs. A value of 0 = no marking, 1 = marking baseNP (ones which rewrite just as preterminals), and 2 = doing Collins-style marking, where an extra NP node is inserted above a baseNP, if it isn't already in an NP over NP construction, as in Collins 1999. This option shouldn't really be in EnglishTrain since it's needed at parsing time. But we don't currently use it.... A value of 1 is good.


splitTMP

int splitTMP
Retain NP-TMP (or maybe PP-TMP) annotation. Good. The values for this parameter are defined in NPTmpRetainingTreeNormalizer.


splitSbar

int splitSbar
Split SBAR nodes. 1 = mark 'in order to' purpose clauses; this is actually a small and inconsistent part of what is marked SBAR-PRP in the treebank, which is mainly 'because' reason clauses. 2 = mark all infinitive SBAR. 3 = do 1 and 2. A value of 1 seems minutely positive; 2 and 3 seem negative. Also get 'in case Sfin', 'In order to', and on one occasion 'in order that'


splitNPADV

int splitNPADV
Retain NP-ADV annotation. 0 means strip "-ADV" annotation. 1 means to retain it, and to percolate it down to a head tag providing it can do it through a path of only NP nodes.


splitNPNNP

int splitNPNNP
Mark NP-NNP. 0 is nothing; 1 is only NNP head, 2 is NNP and NNPS head; 3 is NNP or NNPS anywhere in local NP. All bad!


correctTags

boolean correctTags
'Correct' tags to produce verbs in VPs, etc. where possible


rightPhrasal

boolean rightPhrasal
Right edge has a phrasal node. Bad?


sisterSplitLevel

int sisterSplitLevel
Set the support * KL cutoff level (1-4) for sister splitting -- don't use it, as far as we can tell so far


gpaRootVP

boolean gpaRootVP
Grand-parent annotate (root mark) VP below ROOT. Seems negative.


makePPTOintoIN

int makePPTOintoIN
Change TO inside PP to IN.

Class edu.stanford.nlp.parser.lexparser.IntDependency extends Object implements Serializable

serialVersionUID: 1L

Serialized Fields

head

IntTaggedWord head

arg

IntTaggedWord arg

leftHeaded

boolean leftHeaded

distance

short distance

Class edu.stanford.nlp.parser.lexparser.IntTaggedWord extends Object implements Serializable

serialVersionUID: 1L

Serialized Fields

word

int word

tag

short tag

Class edu.stanford.nlp.parser.lexparser.MLEDependencyGrammar extends AbstractDependencyGrammar implements Serializable

serialVersionUID: 1L

Serialization Methods

readObject

private void readObject(ObjectInputStream stream)
                 throws IOException,
                        ClassNotFoundException
Throws:
IOException
ClassNotFoundException

writeObject

private void writeObject(ObjectOutputStream stream)
                  throws IOException
Throws:
IOException
Serialized Fields

useSmoothTagProjection

boolean useSmoothTagProjection

useUnigramWordSmoothing

boolean useUnigramWordSmoothing

numWordTokens

int numWordTokens

argCounter

ClassicCounter<E> argCounter
Stores all the counts for dependencies (with and without the word being a wildcard) in the reduced tag space.


stopCounter

ClassicCounter<E> stopCounter

smooth_aT_hTWd

double smooth_aT_hTWd
Bayesian m-estimate prior for aT given hTWd against base distribution of aT given hTd. TODO: Note that these values are overwritten in the constructor. Find what is best and then maybe remove these defaults!


smooth_aTW_hTWd

double smooth_aTW_hTWd
Bayesian m-estimate prior for aTW given hTWd against base distribution of aTW given hTd.


smooth_stop

double smooth_stop

interp

double interp
Interpolation between model that directly predicts aTW and model that predicts aT and then aW given aT. This percent of the mass is on the model directly predicting aTW.


smooth_aTW_aT

double smooth_aTW_aT

smooth_aTW_hTd

double smooth_aTW_hTd

smooth_aT_hTd

double smooth_aT_hTd

smooth_aPTW_aPT

double smooth_aPTW_aPT

smoothTP

TagProjection smoothTP

smoothTPIndex

Index<E> smoothTPIndex

Class edu.stanford.nlp.parser.lexparser.Options extends Object implements Serializable

serialVersionUID: 4L

Serialization Methods

readObject

private void readObject(ObjectInputStream in)
                 throws IOException,
                        ClassNotFoundException
Making the TestOptions transient means it won't even be constructed when you deserialize an Options, so we need to construct it on our own when deserializing

Throws:
IOException
ClassNotFoundException
Serialized Fields

lexOptions

Options.LexOptions lexOptions

tlpParams

TreebankLangParserParams tlpParams
The treebank-specific parser parameters to use.


forceCNF

boolean forceCNF
Forces parsing with strictly CNF grammar -- unary chains are converted to XP&YP symbols and back


doPCFG

boolean doPCFG
Do a PCFG parse of the sentence. If both variables are on, also do a combined parse of the sentence.


doDep

boolean doDep
Do a dependency parse of the sentence.


freeDependencies

boolean freeDependencies
if true, any child can be the head (seems rather bad!)


directional

boolean directional
Whether dependency grammar considers left/right direction. Good.


genStop

boolean genStop

useSmoothTagProjection

boolean useSmoothTagProjection

useUnigramWordSmoothing

boolean useUnigramWordSmoothing

distance

boolean distance
Use distance bins in the dependency calculations


coarseDistance

boolean coarseDistance
Use coarser distance (4 bins) in dependency calculations


dcTags

boolean dcTags
"double count" tags rewrites as word in PCFG and Dep parser. Good for combined parsing only (it used to not kick in for PCFG parsing). This option is only used at Test time, but it is now in Options, so the correct choice for a grammar is recorded by a serialized parser. You should turn this off for a vanilla PCFG parser.


nodePrune

boolean nodePrune
If true, inside the factored parser, remove any node from the final chosen tree which improves the PCFG score. This was added as the dependency factor tends to encourage 'deep' trees.


trainOptions

TrainOptions trainOptions

wordFunction

Function<T1,T2> wordFunction
A function that maps words used in training and testing to new words. For example, it could be a function to lowercase text, such as edu.stanford.nlp.util.LowercaseFunction (which makes the parser case insensitive). This function is applied in LexicalizedParserQuery.parse and in the training methods which build a new parser.

Class edu.stanford.nlp.parser.lexparser.Options.LexOptions extends Object implements Serializable

serialVersionUID: 2805351374506855632L

Serialized Fields

useUnknownWordSignatures

int useUnknownWordSignatures
Whether to use suffix and capitalization information for unknowns. Within the BaseLexicon model options have the following meaning: 0 means a single unknown token. 1 uses suffix, and capitalization. 2 uses a variant (richer) form of signature. Good. Use this one. Using the richer signatures in versions 3 or 4 seems to have very marginal or no positive value. 3 uses a richer form of signature that mimics the NER word type patterns. 4 is a variant of 2. 5 is another with more English specific morphology (good for English unknowns!). 6-9 are options for Arabic. 9 codes some patterns for numbers and derivational morphology, but also supports unknownPrefixSize and unknownSuffixSize. For German, 0 means a single unknown token, and non-zero means to use capitalization of first letter and a suffix of length unknownSuffixSize.


smoothInUnknownsThreshold

int smoothInUnknownsThreshold
Words more common than this are tagged with MLE P(t|w). Default 100. The smoothing is sufficiently slight that changing this has little effect. But set this to 0 to be able to use the parser as a vanilla PCFG with no smoothing (not as a practical parser but for exposition or debugging).


smartMutation

boolean smartMutation
Smarter smoothing for rare words.


useUnicodeType

boolean useUnicodeType
Make use of unicode code point types in smoothing.


unknownSuffixSize

int unknownSuffixSize
For certain Lexicons, a certain number of word-final letters are used to subclassify the unknown token. This gives the number of letters.


unknownPrefixSize

int unknownPrefixSize
For certain Lexicons, a certain number of word-initial letters are used to subclassify the unknown token. This gives the number of letters.


uwModel

String uwModel
Model for unknown words that the lexicon should use


flexiTag

boolean flexiTag

useSignatureForKnownSmoothing

boolean useSignatureForKnownSmoothing
Whether to use signature rather than just being unknown as prior in known word smoothing. Currently only works if turned on for English.

Class edu.stanford.nlp.parser.lexparser.TestOptions extends Object implements Serializable

serialVersionUID: 7256526346598L

Serialized Fields

noRecoveryTagging

boolean noRecoveryTagging
If false, then failure of the PCFG parser to parse a sentence will trigger allowing all tags for words in parse recovery mode, with a log probability of -1000. If true, these extra taggings are not added. It is false by default. Use option -noRecoveryTagging to set to true.


doRecovery

boolean doRecovery
If true, then failure of the PCFG factor to parse a sentence will trigger parse recovery mode.


useN5

boolean useN5
If true, the n^4 "speed-up" is not used with the Factored Parser.


useFastFactored

boolean useFastFactored
If true, use approximate factored algorithm, which just rescores PCFG k best, rather than exact factored algorithm. This algorithm requires the dependency grammar to exist for rescoring, but not for the dependency grammar to be run. Hence the correct usage for guarding code only required for exact A* factored parsing is now if (op.doPCFG && op.doDep && ! Test.useFastFactored).


iterativeCKY

boolean iterativeCKY
If true, use faster iterative deepening CKY algorithm.


maxLength

int maxLength
The maximum sentence length (including punctuation, etc.) to parse.


MAX_ITEMS

int MAX_ITEMS
The maximum number of edges and hooks combined that the factored parser will build before giving up. This number should probably be relative to the sentence length parsed. In general, though, if the parser cannot parse a sentence after this much work then there is no good parse consistent between the PCFG and Dependency parsers. (Normally, depending on other flags), the parser will then just return the best PCFG parse.)


unseenSmooth

double unseenSmooth
The amount of smoothing put in (as an m-estimate) for unknown words. If negative, set by the code in the lexicon class.


increasingLength

boolean increasingLength
Parse trees in test treebank in order of increasing length.


preTag

boolean preTag
Tag the sentences first, then parse given those (coarse) tags.


forceTags

boolean forceTags
Parse using only tags given from correct answer or the POS tagger


forceTagBeginnings

boolean forceTagBeginnings

taggerSerializedFile

String taggerSerializedFile
POS tagger model used when preTag is enabled.


noFunctionalForcing

boolean noFunctionalForcing
Only valid with force tags - strips away functionals when forcing the tags, meaning tags have to start appropriately but the parser will assign the functional part.


evalb

boolean evalb
Write EvalB-readable output files.


verbose

boolean verbose
Print a lot of extra output as you parse.


exhaustiveTest

boolean exhaustiveTest

pcfgThreshold

boolean pcfgThreshold
If this variable is true, and the sum of the inside and outside score for a constituent is worse than the best known score for a sentence by more than pcfgThresholdValue, then -Inf is returned as the outside Score by oScore() (while otherwise the true outside score is returned).


pcfgThresholdValue

double pcfgThresholdValue

printAllBestParses

boolean printAllBestParses
Print out all best PCFG parses.


depWeight

double depWeight
Weighting on dependency log probs. The dependency grammar negative log probability scores are simply multiplied by this number.


prunePunc

boolean prunePunc

addMissingFinalPunctuation

boolean addMissingFinalPunctuation
If a token list does not have sentence final punctuation near the end, then automatically add the default one. This might help parsing if the treebank is all punctuated. Not done if reading a treebank.


outputFormat

String outputFormat
Determines format of output trees: choose among penn, oneline


outputFormatOptions

String outputFormatOptions

writeOutputFiles

boolean writeOutputFiles
If true, write files parsed to a new file with the same name except for an added ".stp" extension.


outputFilesDirectory

String outputFilesDirectory
If the writeOutputFiles option is true, then output files appear in this directory. An unset value (null) means to use the directory of the source files. Use "" or . for the current directory.


outputFilesExtension

String outputFilesExtension
If the writeOutputFiles option is true, then output files appear with this extension. Use "" for no extension.


outputFilesPrefix

String outputFilesPrefix
If the writeOutputFiles option is true, then output files appear with this prefix.


outputkBestEquivocation

String outputkBestEquivocation
If this option is not null, output the k-best equivocation. Must be specified with printPCFGkBest.


maxSpanForTags

int maxSpanForTags
The largest span to consider for word-hood. Used for parsing unsegmented Chinese text and parsing lattices. Keep it at 1 unless you know what you're doing.


lengthNormalization

boolean lengthNormalization
Turns on normalizing scores for sentence length. Makes no difference (except decreased efficiency) unless maxSpanForTags is greater than one. Works only for PCFG (so far).


sample

boolean sample
Used when you want to generate sample parses instead of finding the best parse. (NOT YET USED.)


printPCFGkBest

int printPCFGkBest
Printing k-best parses from PCFG, when k > 0.


printFactoredKGood

int printFactoredKGood
Printing k-best parses from PCFG, when k > 0.


evals

Properties evals
What evaluations to report and how to report them (using LexicalizedParser). Known evaluations are: pcfgLB, pcfgCB, pcfgDA, pcfgTA, pcfgLL, pcfgRUO, pcfgCUO, pcfgCatE, depDA, depTA, depLL, factLB, factCB, factDA, factTA, factLL. The default is pcfgLB,depDA,factLB,factTA. You need to negate those ones out (e.g., -evals "depDA=false") if you don't want them. LB = ParseEval labeled bracketing, CB = crossing brackets and zero crossing bracket rate, DA = dependency accuracy, TA = tagging accuracy, LL = log likelihood score, RUO/CUO = rules/categories under and over proposed, CatE = evaluation by phrasal category. Known styles are: runningAverages, summary, tsv. The default style is summary. You need to negate it out if you don't want it. Invalid names in the argument to this option are not reported!


fastFactoredCandidateMultiplier

int fastFactoredCandidateMultiplier
This variable says to find k good fast factored parses, how many times k of the best PCFG parses should be examined.


fastFactoredCandidateAddend

int fastFactoredCandidateAddend
This variable says to find k good factored parses, how many added on best PCFG parses should be examined.


useLexiconToScoreDependencyPwGt

boolean useLexiconToScoreDependencyPwGt
If this is true, the Lexicon is used to score P(w|t) in the backoff inside the dependency grammar. (Otherwise, a MLE is used is w is seen, and a constant if w is unseen.


useNonProjectiveDependencyParser

boolean useNonProjectiveDependencyParser
If this is true, perform non-projective dependency parsing.

Class edu.stanford.nlp.parser.lexparser.TrainOptions extends Object implements Serializable

serialVersionUID: 72571349843538L

Serialized Fields

trainTreeFile

String trainTreeFile

trainLengthLimit

int trainLengthLimit

cheatPCFG

boolean cheatPCFG
Add all test set trees to training data for PCFG. (Currently only supported in FactoredParser main.)


markovFactor

boolean markovFactor
Whether to do "horizontal Markovization" (as in ACL 2003 paper). False means regular PCFG expansions.


markovOrder

int markovOrder

hSelSplit

boolean hSelSplit

HSEL_CUT

int HSEL_CUT

markFinalStates

boolean markFinalStates
Whether or not to mark final states in binarized grammar. This must be off to get most value out of grammar compaction.


openClassTypesThreshold

int openClassTypesThreshold
A POS tag has to have been attributed to more than this number of word types before it is regarded as an open-class tag. Unknown words will only possibly be tagged as open-class tags (unless flexiTag is on). If flexiTag is on, unknown words will be able to be tagged any POS for which the unseenMap has nonzero count (that is, the tag was seen for a new word after unseen signature counting was started).


fractionBeforeUnseenCounting

double fractionBeforeUnseenCounting
Start to aggregate signature-tag pairs only for words unseen in the first this fraction of the data.


PA

boolean PA
This variable controls doing parent annotation of phrasal nodes. Good.


gPA

boolean gPA
This variable controls doing 2 levels of parent annotation. Bad.


postPA

boolean postPA

postGPA

boolean postGPA

selectiveSplit

boolean selectiveSplit
Only split the "common high KL divergence" parent categories.... Good.


selectiveSplitCutOff

double selectiveSplitCutOff

selectivePostSplit

boolean selectivePostSplit

selectivePostSplitCutOff

double selectivePostSplitCutOff

postSplitWithBaseCategory

boolean postSplitWithBaseCategory
Whether, in post-splitting of categories, nodes are annotated with the (grand)parent's base category or with its complete subcategorized category.


sisterAnnotate

boolean sisterAnnotate
Selective Sister annotation.


sisterSplitters

Set<E> sisterSplitters

markUnary

int markUnary
Mark all unary nodes specially. Good for just PCFG. Bad for factored. markUnary affects phrasal nodes. A value of 0 means to do nothing; a value of 1 means to mark the parent (higher) node of a unary rewrite. A value of 2 means to mark the child (lower) node of a unary rewrie. Values of 1 and 2 only apply if the child (lower) node is phrasal. (A value of 1 is better than 2 in combos.) A value of 1 corresponds to the old boolean -unary flag.


markUnaryTags

boolean markUnaryTags
Mark POS tags which are the sole member of their phrasal constituent. This is like markUnary=2, applied to POS tags.


splitPrePreT

boolean splitPrePreT
Mark all pre-preterminals (also does splitBaseNP: don't need both)


tagPA

boolean tagPA
Parent annotation on tags. Good (for PCFG?)


tagSelectiveSplit

boolean tagSelectiveSplit
Do parent annotation on tags selectively. Neutral, but less splits.


tagSelectiveSplitCutOff

double tagSelectiveSplitCutOff

tagSelectivePostSplit

boolean tagSelectivePostSplit

tagSelectivePostSplitCutOff

double tagSelectivePostSplitCutOff

rightRec

boolean rightRec
Right edge is right-recursive (X << X) Bad. (NP only is good)


leftRec

boolean leftRec
Left edge is right-recursive (X << X) Bad.


collinsPunc

boolean collinsPunc
Promote/delete punctuation like Collins. Bad (!)


splitters

Set<E> splitters
Set the splitter strings. These are a set of parent and/or grandparent annotated categories which should be split off.


postSplitters

Set<E> postSplitters

deleteSplitters

Set<E> deleteSplitters

printTreeTransformations

int printTreeTransformations
Just for debugging: check that your tree transforms work correctly. This will print the transformations of the first printTreeTransformations trees.


printAnnotatedPW

PrintWriter printAnnotatedPW

printBinarizedPW

PrintWriter printBinarizedPW

printStates

boolean printStates

compactGrammar

int compactGrammar
How to compact grammars as FSMs. 0 = no compaction [uses makeSyntheticLabel1], 1 = no compaction but use label names that wrap from right to left in binarization [uses makeSyntheticLabel2], 2 = wrapping labels and materialize unary at top rewriting passive to active, 3 = ExactGrammarCompactor, 4 = LossyGrammarCompactor, 5 = CategoryMergingGrammarCompactor. (May 2007 CDM note: options 4 and 5 don't seem to be functioning sensibly. 0, 1, and 3 seem to be the 'good' options. 2 is only useful as input to 3. There seems to be no reason not to use 0, despite the default.)


leftToRight

boolean leftToRight

noTagSplit

boolean noTagSplit

ruleSmoothing

boolean ruleSmoothing
Enables linear rule smoothing during grammar extraction but before grammar compaction. The alpha term is the same as that described in Petrov et al. (2006), and has range [0,1].


ruleSmoothingAlpha

double ruleSmoothingAlpha

smoothing

boolean smoothing
TODO wsg2011: This is the old grammar smoothing parameter that no longer does anything in the parser. It should be removed.


ruleDiscount

double ruleDiscount
Discounts the count of BinaryRule's (only, apparently) in training data.


printAnnotatedRuleCounts

boolean printAnnotatedRuleCounts

printAnnotatedStateCounts

boolean printAnnotatedStateCounts

basicCategoryTagsInDependencyGrammar

boolean basicCategoryTagsInDependencyGrammar
Where to use the basic or split tags in the dependency grammar


Package edu.stanford.nlp.process

Class edu.stanford.nlp.process.CoreLabelTokenFactory extends Object implements Serializable

serialVersionUID: 4L

Serialized Fields

addIndices

boolean addIndices

Package edu.stanford.nlp.sequences

Class edu.stanford.nlp.sequences.PlainTextDocumentReaderAndWriter extends Object implements Serializable

serialVersionUID: -2420535144980273136L

Serialized Fields

wts

WordToSentenceProcessor<IN> wts

flags

SeqClassifierFlags flags

tokenizerFactory

TokenizerFactory<T> tokenizerFactory

tokenFactory

CoreTokenFactory<IN extends CoreMap> tokenFactory

Class edu.stanford.nlp.sequences.SeqClassifierFlags extends Object implements Serializable

serialVersionUID: -7076671761070232567L

Serialized Fields

stringRep

String stringRep

useNGrams

boolean useNGrams

conjoinShapeNGrams

boolean conjoinShapeNGrams

lowercaseNGrams

boolean lowercaseNGrams

dehyphenateNGrams

boolean dehyphenateNGrams

usePrev

boolean usePrev

useNext

boolean useNext

useTags

boolean useTags

useWordPairs

boolean useWordPairs

useGazettes

boolean useGazettes

useSequences

boolean useSequences

usePrevSequences

boolean usePrevSequences

useNextSequences

boolean useNextSequences

useLongSequences

boolean useLongSequences

useBoundarySequences

boolean useBoundarySequences

useTaggySequences

boolean useTaggySequences

useExtraTaggySequences

boolean useExtraTaggySequences

dontExtendTaggy

boolean dontExtendTaggy

useTaggySequencesShapeInteraction

boolean useTaggySequencesShapeInteraction

strictlyZeroethOrder

boolean strictlyZeroethOrder

strictlyFirstOrder

boolean strictlyFirstOrder

strictlySecondOrder

boolean strictlySecondOrder

strictlyThirdOrder

boolean strictlyThirdOrder

entitySubclassification

String entitySubclassification

retainEntitySubclassification

boolean retainEntitySubclassification

useGazettePhrases

boolean useGazettePhrases

makeConsistent

boolean makeConsistent

useWordLabelCounts

boolean useWordLabelCounts

useViterbi

boolean useViterbi

binnedLengths

int[] binnedLengths

verboseMode

boolean verboseMode

useSum

boolean useSum

tolerance

double tolerance

printFeatures

String printFeatures

useSymTags

boolean useSymTags

useSymWordPairs

boolean useSymWordPairs
useSymWordPairs Has a small negative effect.


printClassifier

String printClassifier

printClassifierParam

int printClassifierParam

intern

boolean intern

intern2

boolean intern2

selfTest

boolean selfTest

sloppyGazette

boolean sloppyGazette

cleanGazette

boolean cleanGazette

noMidNGrams

boolean noMidNGrams

maxNGramLeng

int maxNGramLeng

useReverse

boolean useReverse

greekifyNGrams

boolean greekifyNGrams

useParenMatching

boolean useParenMatching

useLemmas

boolean useLemmas

usePrevNextLemmas

boolean usePrevNextLemmas

normalizeTerms

boolean normalizeTerms

normalizeTimex

boolean normalizeTimex

useNB

boolean useNB

useQN

boolean useQN

useFloat

boolean useFloat

QNsize

int QNsize

QNsize2

int QNsize2

maxIterations

int maxIterations

wordShape

int wordShape

useShapeStrings

boolean useShapeStrings

useTypeSeqs

boolean useTypeSeqs

useTypeSeqs2

boolean useTypeSeqs2

useTypeSeqs3

boolean useTypeSeqs3

useDisjunctive

boolean useDisjunctive

disjunctionWidth

int disjunctionWidth

useDisjunctiveShapeInteraction

boolean useDisjunctiveShapeInteraction

useDisjShape

boolean useDisjShape

useWord

boolean useWord

useClassFeature

boolean useClassFeature

useShapeConjunctions

boolean useShapeConjunctions

useWordTag

boolean useWordTag

useNPHead

boolean useNPHead

useNPGovernor

boolean useNPGovernor

useHeadGov

boolean useHeadGov

useLastRealWord

boolean useLastRealWord

useNextRealWord

boolean useNextRealWord

useOccurrencePatterns

boolean useOccurrencePatterns

useTypeySequences

boolean useTypeySequences

justify

boolean justify

normalize

boolean normalize

priorType

String priorType

sigma

double sigma

epsilon

double epsilon

beamSize

int beamSize

maxLeft

int maxLeft

maxRight

int maxRight

usePosition

boolean usePosition

useBeginSent

boolean useBeginSent

useGazFeatures

boolean useGazFeatures

useMoreGazFeatures

boolean useMoreGazFeatures

useAbbr

boolean useAbbr

useMinimalAbbr

boolean useMinimalAbbr

useAbbr1

boolean useAbbr1

useMinimalAbbr1

boolean useMinimalAbbr1

useMoreAbbr

boolean useMoreAbbr

deleteBlankLines

boolean deleteBlankLines

useGENIA

boolean useGENIA

useTOK

boolean useTOK

useABSTR

boolean useABSTR

useABSTRFreqDict

boolean useABSTRFreqDict

useABSTRFreq

boolean useABSTRFreq

useFREQ

boolean useFREQ

useABGENE

boolean useABGENE

useWEB

boolean useWEB

useWEBFreqDict

boolean useWEBFreqDict

useIsURL

boolean useIsURL

useURLSequences

boolean useURLSequences

useIsDateRange

boolean useIsDateRange

useEntityTypes

boolean useEntityTypes

useEntityTypeSequences

boolean useEntityTypeSequences

useEntityRule

boolean useEntityRule

useOrdinal

boolean useOrdinal

useACR

boolean useACR

useANTE

boolean useANTE

useMoreTags

boolean useMoreTags

useChunks

boolean useChunks

useChunkySequences

boolean useChunkySequences

usePrevVB

boolean usePrevVB

useNextVB

boolean useNextVB

useVB

boolean useVB

subCWGaz

boolean subCWGaz

documentReader

String documentReader

map

String map

useWideDisjunctive

boolean useWideDisjunctive

wideDisjunctionWidth

int wideDisjunctionWidth

useRadical

boolean useRadical

useBigramInTwoClique

boolean useBigramInTwoClique

morphFeatureFile

String morphFeatureFile

useReverseAffix

boolean useReverseAffix

charHalfWindow

int charHalfWindow

useWord1

boolean useWord1

useWord2

boolean useWord2

useWord3

boolean useWord3

useWord4

boolean useWord4

useRad1

boolean useRad1

useRad2

boolean useRad2

useWordn

boolean useWordn

useCTBPre1

boolean useCTBPre1

useCTBSuf1

boolean useCTBSuf1

useASBCPre1

boolean useASBCPre1

useASBCSuf1

boolean useASBCSuf1

usePKPre1

boolean usePKPre1

usePKSuf1

boolean usePKSuf1

useHKPre1

boolean useHKPre1

useHKSuf1

boolean useHKSuf1

useCTBChar2

boolean useCTBChar2

useASBCChar2

boolean useASBCChar2

useHKChar2

boolean useHKChar2

usePKChar2

boolean usePKChar2

useRule2

boolean useRule2

useDict2

boolean useDict2

useOutDict2

boolean useOutDict2

outDict2

String outDict2

useDictleng

boolean useDictleng

useDictCTB2

boolean useDictCTB2

useDictASBC2

boolean useDictASBC2

useDictPK2

boolean useDictPK2

useDictHK2

boolean useDictHK2

useBig5

boolean useBig5

useNegDict2

boolean useNegDict2

useNegDict3

boolean useNegDict3

useNegDict4

boolean useNegDict4

useNegCTBDict2

boolean useNegCTBDict2

useNegCTBDict3

boolean useNegCTBDict3

useNegCTBDict4

boolean useNegCTBDict4

useNegASBCDict2

boolean useNegASBCDict2

useNegASBCDict3

boolean useNegASBCDict3

useNegASBCDict4

boolean useNegASBCDict4

useNegHKDict2

boolean useNegHKDict2

useNegHKDict3

boolean useNegHKDict3

useNegHKDict4

boolean useNegHKDict4

useNegPKDict2

boolean useNegPKDict2

useNegPKDict3

boolean useNegPKDict3

useNegPKDict4

boolean useNegPKDict4

usePre

boolean usePre

useSuf

boolean useSuf

useRule

boolean useRule

useHk

boolean useHk

useMsr

boolean useMsr

useMSRChar2

boolean useMSRChar2

usePk

boolean usePk

useAs

boolean useAs

useFilter

boolean useFilter

largeChSegFile

boolean largeChSegFile

useRad2b

boolean useRad2b

keepEnglishWhitespaces

boolean keepEnglishWhitespaces
Keep the whitespace between English words in testFile when printing out answers. Doesn't really change the content of the CoreLabels. (For Chinese segmentation.)


keepAllWhitespaces

boolean keepAllWhitespaces
Keep all the whitespace words in testFile when printing out answers. Doesn't really change the content of the CoreLabels. (For Chinese segmentation.)


sighanPostProcessing

boolean sighanPostProcessing

useChPos

boolean useChPos
use POS information (an "open" feature for Chinese segmentation)


normalizationTable

String normalizationTable

dictionary

String dictionary

serializedDictionary

String serializedDictionary

dictionary2

String dictionary2

normTableEncoding

String normTableEncoding

sighanCorporaDict

String sighanCorporaDict
for Sighan bakeoff 2005, the path to the dictionary of bigrams appeared in corpus


useWordShapeGaz

boolean useWordShapeGaz

wordShapeGaz

String wordShapeGaz

splitDocuments

boolean splitDocuments

printXML

boolean printXML

useSeenFeaturesOnly

boolean useSeenFeaturesOnly

lastNameList

String lastNameList

maleNameList

String maleNameList

femaleNameList

String femaleNameList

inputEncoding

String inputEncoding

bioSubmitOutput

boolean bioSubmitOutput

numRuns

int numRuns

answerFile

String answerFile

altAnswerFile

String altAnswerFile

dropGaz

String dropGaz

printGazFeatures

String printGazFeatures

numStartLayers

int numStartLayers

dump

boolean dump

mergeTags

boolean mergeTags

splitOnHead

boolean splitOnHead

featureCountThreshold

int featureCountThreshold

featureWeightThreshold

double featureWeightThreshold

featureFactory

String featureFactory

backgroundSymbol

String backgroundSymbol

useObservedSequencesOnly

boolean useObservedSequencesOnly

maxDocSize

int maxDocSize

printProbs

boolean printProbs

printFirstOrderProbs

boolean printFirstOrderProbs

saveFeatureIndexToDisk

boolean saveFeatureIndexToDisk

removeBackgroundSingletonFeatures

boolean removeBackgroundSingletonFeatures

doGibbs

boolean doGibbs

numSamples

int numSamples

useNERPrior

boolean useNERPrior

useAcqPrior

boolean useAcqPrior

useUniformPrior

boolean useUniformPrior
If true and doGibbs also true, will do generic Gibbs inference without any priors


useMUCFeatures

boolean useMUCFeatures

annealingRate

double annealingRate

annealingType

String annealingType

loadProcessedData

String loadProcessedData

initViterbi

boolean initViterbi

useUnknown

boolean useUnknown

checkNameList

boolean checkNameList

useSemPrior

boolean useSemPrior

useFirstWord

boolean useFirstWord

useNumberFeature

boolean useNumberFeature

ocrFold

int ocrFold

classifierType

String classifierType

svmModelFile

String svmModelFile

inferenceType

String inferenceType

useLemmaAsWord

boolean useLemmaAsWord

type

String type

readerAndWriter

String readerAndWriter

comboProps

List<E> comboProps

usePrediction

boolean usePrediction

useAltGazFeatures

boolean useAltGazFeatures

gazFilesFile

String gazFilesFile

usePrediction2

boolean usePrediction2

baseTrainDir

String baseTrainDir

baseTestDir

String baseTestDir

trainFiles

String trainFiles

trainFileList

String trainFileList

testFiles

String testFiles

trainDirs

String trainDirs

testDirs

String testDirs

useOnlySeenWeights

boolean useOnlySeenWeights

predProp

String predProp

pad

CoreLabel pad

useObservedFeaturesOnly

boolean useObservedFeaturesOnly

distSimLexicon

String distSimLexicon

useDistSim

boolean useDistSim

removeTopN

int removeTopN

numTimesRemoveTopN

int numTimesRemoveTopN

randomizedRatio

double randomizedRatio

removeTopNPercent

double removeTopNPercent

purgeFeatures

int purgeFeatures

booleanFeatures

boolean booleanFeatures

iobWrapper

boolean iobWrapper

iobTags

boolean iobTags

useSegmentation

boolean useSegmentation

memoryThrift

boolean memoryThrift

timitDatum

boolean timitDatum

serializeDatasetsDir

String serializeDatasetsDir

loadDatasetsDir

String loadDatasetsDir

pushDir

String pushDir

purgeDatasets

boolean purgeDatasets

keepOBInMemory

boolean keepOBInMemory

fakeDataset

boolean fakeDataset

restrictTransitionsTimit

boolean restrictTransitionsTimit

numDatasetsPerFile

int numDatasetsPerFile

useTitle

boolean useTitle

lowerNewgeneThreshold

boolean lowerNewgeneThreshold

useEitherSideWord

boolean useEitherSideWord

useEitherSideDisjunctive

boolean useEitherSideDisjunctive

twoStage

boolean twoStage

crfType

String crfType

featureThreshold

int featureThreshold

featThreshFile

String featThreshFile

featureDiffThresh

double featureDiffThresh

numTimesPruneFeatures

int numTimesPruneFeatures

newgeneThreshold

double newgeneThreshold

doAdaptation

boolean doAdaptation

useInternal

boolean useInternal

useExternal

boolean useExternal

selfTrainConfidenceThreshold

double selfTrainConfidenceThreshold

selfTrainIterations

int selfTrainIterations

selfTrainWindowSize

int selfTrainWindowSize

useHuber

boolean useHuber

useQuartic

boolean useQuartic

adaptSigma

double adaptSigma

numFolds

int numFolds

startFold

int startFold

endFold

int endFold

cacheNGrams

boolean cacheNGrams

outputFormat

String outputFormat

useSMD

boolean useSMD

useSGDtoQN

boolean useSGDtoQN

useStochasticQN

boolean useStochasticQN

useScaledSGD

boolean useScaledSGD

scaledSGDMethod

int scaledSGDMethod

SGDPasses

int SGDPasses

QNPasses

int QNPasses

tuneSGD

boolean tuneSGD

stochasticMethod

StochasticCalculateMethods stochasticMethod

initialGain

double initialGain

stochasticBatchSize

int stochasticBatchSize

useSGD

boolean useSGD

gainSGD

double gainSGD

useHybrid

boolean useHybrid

hybridCutoffIteration

int hybridCutoffIteration

outputIterationsToFile

boolean outputIterationsToFile

testObjFunction

boolean testObjFunction

testVariance

boolean testVariance

SGD2QNhessSamples

int SGD2QNhessSamples

testHessSamples

boolean testHessSamples

CRForder

int CRForder

CRFwindow

int CRFwindow

estimateInitial

boolean estimateInitial

outputEncoding

String outputEncoding

useKBest

boolean useKBest

searchGraphPrefix

String searchGraphPrefix

searchGraphPrune

double searchGraphPrune

kBest

int kBest

useFeaturesC4gram

boolean useFeaturesC4gram

useFeaturesC5gram

boolean useFeaturesC5gram

useFeaturesC6gram

boolean useFeaturesC6gram

useFeaturesCpC4gram

boolean useFeaturesCpC4gram

useFeaturesCpC5gram

boolean useFeaturesCpC5gram

useFeaturesCpC6gram

boolean useFeaturesCpC6gram

useUnicodeType

boolean useUnicodeType

useUnicodeType4gram

boolean useUnicodeType4gram

useUnicodeType5gram

boolean useUnicodeType5gram

use4Clique

boolean use4Clique

useUnicodeBlock

boolean useUnicodeBlock

useShapeStrings1

boolean useShapeStrings1

useShapeStrings3

boolean useShapeStrings3

useShapeStrings4

boolean useShapeStrings4

useShapeStrings5

boolean useShapeStrings5

useGoodForNamesCpC

boolean useGoodForNamesCpC

useDictionaryConjunctions

boolean useDictionaryConjunctions

expandMidDot

boolean expandMidDot

printFeaturesUpto

int printFeaturesUpto

useDictionaryConjunctions3

boolean useDictionaryConjunctions3

useWordUTypeConjunctions2

boolean useWordUTypeConjunctions2

useWordUTypeConjunctions3

boolean useWordUTypeConjunctions3

useWordShapeConjunctions2

boolean useWordShapeConjunctions2

useWordShapeConjunctions3

boolean useWordShapeConjunctions3

useMidDotShape

boolean useMidDotShape

augmentedDateChars

boolean augmentedDateChars

suppressMidDotPostprocessing

boolean suppressMidDotPostprocessing

printNR

boolean printNR

classBias

String classBias

printLabelValue

boolean printLabelValue

useRobustQN

boolean useRobustQN

combo

boolean combo

useGenericFeatures

boolean useGenericFeatures

verboseForTrueCasing

boolean verboseForTrueCasing

trainHierarchical

String trainHierarchical

domain

String domain

baseline

boolean baseline

transferSigmas

String transferSigmas

doFE

boolean doFE

restrictLabels

boolean restrictLabels

announceObjectBankEntries

boolean announceObjectBankEntries

usePos

boolean usePos

useAgreement

boolean useAgreement

useAccCase

boolean useAccCase

useInna

boolean useInna

useConcord

boolean useConcord

useFirstNgram

boolean useFirstNgram

useLastNgram

boolean useLastNgram

collapseNN

boolean collapseNN

useConjBreak

boolean useConjBreak

useAuxPairs

boolean useAuxPairs

usePPVBPairs

boolean usePPVBPairs

useAnnexing

boolean useAnnexing

useTemporalNN

boolean useTemporalNN

usePath

boolean usePath

innaPPAttach

boolean innaPPAttach

markProperNN

boolean markProperNN

markMasdar

boolean markMasdar

useSVO

boolean useSVO

numTags

int numTags

useTagsCpC

boolean useTagsCpC

useTagsCpCp2C

boolean useTagsCpCp2C

useTagsCpCp2Cp3C

boolean useTagsCpCp2Cp3C

useTagsCpCp2Cp3Cp4C

boolean useTagsCpCp2Cp3Cp4C

l1reg

double l1reg

mixedCaseMapFile

String mixedCaseMapFile

auxTrueCaseModels

String auxTrueCaseModels

use2W

boolean use2W

useLC

boolean useLC

useYetMoreCpCShapes

boolean useYetMoreCpCShapes

useIfInteger

boolean useIfInteger

exportFeatures

String exportFeatures

useInPlaceSGD

boolean useInPlaceSGD

useTopics

boolean useTopics

evaluateIters

int evaluateIters

evalCmd

String evalCmd

evaluateTrain

boolean evaluateTrain

tuneSampleSize

int tuneSampleSize

usePhraseFeatures

boolean usePhraseFeatures

usePhraseWords

boolean usePhraseWords

usePhraseWordTags

boolean usePhraseWordTags

usePhraseWordSpecialTags

boolean usePhraseWordSpecialTags

useCommonWordsFeature

boolean useCommonWordsFeature

useProtoFeatures

boolean useProtoFeatures

useWordnetFeatures

boolean useWordnetFeatures

tokenFactory

String tokenFactory

tokensAnnotationClassName

String tokensAnnotationClassName

useCorefFeatures

boolean useCorefFeatures

wikiFeatureDbFile

String wikiFeatureDbFile

useNoisyNonNoisyFeature

boolean useNoisyNonNoisyFeature

useYear

boolean useYear

useSentenceNumber

boolean useSentenceNumber

useLabelSource

boolean useLabelSource

casedDistSim

boolean casedDistSim
Whether to (not) lowercase tokens before looking them up in distsim lexicon. By default lowercasing was done, but now it doesn't have to be true :-).


distSimFileFormat

String distSimFileFormat
The format of the distsim file. Known values are: alexClark = TSV file. word TAB clusterNumber [optional other content] terryKoo = TSV file. clusterBitString TAB word TAB frequency


distSimMaxBits

int distSimMaxBits
If this number is greater than 0, the distSim class is assume to be a bit string and is truncated at this many characters. Normal distSim features will then use this amount of resolution. Extra, special distsim features may work at a coarser level of resolution. Since the lexicon only stores this length of bit string, there is then no way to have finer-grained clusters.


numberEquivalenceDistSim

boolean numberEquivalenceDistSim
If this is set to true, all digit characters get mapped to '9' in a distsim lexicon and for lookup. This is a simple word shaping that can shrink distsim lexicons and improve their performance.


unknownWordDistSimClass

String unknownWordDistSimClass
What class to assign to words not found in the dist sim lexicon. You might want to make it a known class, if one is the "default class.


useNeighborNGrams

boolean useNeighborNGrams
Use prefixes and suffixes from the previous and next word.


wordFunction

Function<T1,T2> wordFunction
This function maps words in the training or test data to new words. They are used at the feature extractor level, ie in the FeatureFactory. For now, only the NERFeatureFactory uses this.


Package edu.stanford.nlp.stats

Class edu.stanford.nlp.stats.ClassicCounter extends Object implements Serializable

serialVersionUID: 4L

Serialized Fields

map

Map<K,V> map

mapFactory

MapFactory<K,V> mapFactory

totalCount

double totalCount

defaultValue

double defaultValue

Class edu.stanford.nlp.stats.IntCounter extends AbstractCounter<E> implements Serializable

serialVersionUID: 4L

Serialized Fields

map

Map<K,V> map

mapFactory

MapFactory<K,V> mapFactory

totalCount

int totalCount

defaultValue

int defaultValue

Class edu.stanford.nlp.stats.TwoDimensionalCounter extends Object implements Serializable

serialVersionUID: 1L

Serialized Fields

map

Map<K,V> map

total

double total

outerMF

MapFactory<K,V> outerMF

innerMF

MapFactory<K,V> innerMF

defaultValue

double defaultValue

Package edu.stanford.nlp.tagger.maxent

Class edu.stanford.nlp.tagger.maxent.DictionaryExtractor extends Extractor implements Serializable

serialVersionUID: 692763177746328195L

Class edu.stanford.nlp.tagger.maxent.Extractor extends Object implements Serializable

serialVersionUID: -4694133872973560083L

Serialized Fields

position

int position

isTag

boolean isTag

Class edu.stanford.nlp.tagger.maxent.ExtractorDistsim extends Extractor implements Serializable

serialVersionUID: 1L

Serialized Fields

lexicon

Map<K,V> lexicon

Class edu.stanford.nlp.tagger.maxent.ExtractorDistsim.ExtractorDistsimConjunction extends Extractor implements Serializable

serialVersionUID: 1L

Serialized Fields

lexicon

Map<K,V> lexicon

left

int left

right

int right

name

String name

Class edu.stanford.nlp.tagger.maxent.Extractors extends Object implements Serializable

serialVersionUID: -4777107742414749890L

Serialized Fields

v

Extractor[] v

local

Map<K,V> local

localContext

Map<K,V> localContext

dynamic

Map<K,V> dynamic

Class edu.stanford.nlp.tagger.maxent.ExtractorVerbalVBNZero extends DictionaryExtractor implements Serializable

serialVersionUID: -5881204185400060636L

Serialized Fields

bound

int bound

Class edu.stanford.nlp.tagger.maxent.MaxentTaggerGUI extends JFrame implements Serializable

serialVersionUID: -2574711492469740892L

Serialized Fields

inputBox

JTextArea inputBox

outputBox

JTextArea outputBox

tagButton

JButton tagButton

Class edu.stanford.nlp.tagger.maxent.TaggerConfig extends Properties implements Serializable

serialVersionUID: -4136407850147157497L

Serialized Fields

mode

TaggerConfig.Mode mode

Package edu.stanford.nlp.trees

Class edu.stanford.nlp.trees.AbstractCollinsHeadFinder extends Object implements Serializable

serialVersionUID: -6540278059442931087L

Serialized Fields

tlp

TreebankLanguagePack tlp

nonTerminalInfo

Map<K,V> nonTerminalInfo

defaultRule

String[] defaultRule
Default direction if no rule is found for category. Subclasses can turn it on if they like. If they don't it is an error if no rule is defined for a category (null is returned).


defaultLeftRule

String[] defaultLeftRule
These are built automatically from categoriesToAvoid and used in a fairly different fashion from defaultRule (above). These are used for categories that do have defined rules but where none of them have matched. Rather than picking the rightmost or leftmost child, we will use these to pick the the rightmost or leftmost child which isn't in categoriesToAvoid.


defaultRightRule

String[] defaultRightRule

Class edu.stanford.nlp.trees.AbstractTreebankLanguagePack extends Object implements Serializable

serialVersionUID: -6506749780512708352L

Serialized Fields

gfCharacter

char gfCharacter
Default character for indicating that something is a grammatical fn; probably should be overridden by lang specific ones


punctTagStringAcceptFilter

Filter<T> punctTagStringAcceptFilter

punctWordStringAcceptFilter

Filter<T> punctWordStringAcceptFilter

sFPunctTagStringAcceptFilter

Filter<T> sFPunctTagStringAcceptFilter

eIPunctTagStringAcceptFilter

Filter<T> eIPunctTagStringAcceptFilter

startSymbolAcceptFilter

Filter<T> startSymbolAcceptFilter

Class edu.stanford.nlp.trees.BobChrisTreeNormalizer extends TreeNormalizer implements Serializable

serialVersionUID: -1005188028979810143L

Serialized Fields

tlp

TreebankLanguagePack tlp

emptyFilter

Filter<T> emptyFilter

aOverAFilter

Filter<T> aOverAFilter

Class edu.stanford.nlp.trees.BobChrisTreeNormalizer.AOverAFilter extends Object implements Serializable

serialVersionUID: 1L

Class edu.stanford.nlp.trees.BobChrisTreeNormalizer.EmptyFilter extends Object implements Serializable

serialVersionUID: 8914098359495987617L

Class edu.stanford.nlp.trees.CollinsHeadFinder extends AbstractCollinsHeadFinder implements Serializable

serialVersionUID: -8747319554557223437L

Class edu.stanford.nlp.trees.Dependencies.DependentPuncTagRejectFilter extends Object implements Serializable

serialVersionUID: -7732189363171164852L

Serialized Fields

tagRejectFilter

Filter<T> tagRejectFilter

Class edu.stanford.nlp.trees.Dependencies.DependentPuncWordRejectFilter extends Object implements Serializable

serialVersionUID: 1166489968248785287L

Serialized Fields

wordRejectFilter

Filter<T> wordRejectFilter

Class edu.stanford.nlp.trees.EnglishGrammaticalStructure extends GrammaticalStructure implements Serializable

serialVersionUID: -1866362375001969402L

Class edu.stanford.nlp.trees.GrammaticalRelation extends Object implements Serializable

serialVersionUID: 892618003417550128L

Serialized Fields

language

GrammaticalRelation.Language language

shortName

String shortName

longName

String longName

parent

GrammaticalRelation parent

children

List<E> children

sourcePattern

Pattern sourcePattern

targetPatterns

List<E> targetPatterns

specific

String specific

Class edu.stanford.nlp.trees.GrammaticalStructure extends TreeGraph implements Serializable

serialVersionUID: 2286294455343892678L

Serialized Fields

dependencies

Set<E> dependencies

typedDependencies

List<E> typedDependencies

allTypedDependencies

List<E> allTypedDependencies

Class edu.stanford.nlp.trees.LabeledScoredTreeNode extends Tree implements Serializable

serialVersionUID: -8992385140984593817L

Serialized Fields

label

Label label
Label of the parse tree.


score

double score
Score of TreeNode


daughterTrees

Tree[] daughterTrees
Daughters of the parse tree.

Class edu.stanford.nlp.trees.ModCollinsHeadFinder extends CollinsHeadFinder implements Serializable

serialVersionUID: -5870387458902637256L

Class edu.stanford.nlp.trees.NamedDependency extends UnnamedDependency implements Serializable

serialVersionUID: -1635646451505721133L

Serialized Fields

name

Object name

Class edu.stanford.nlp.trees.NPTmpRetainingTreeNormalizer extends BobChrisTreeNormalizer implements Serializable

serialVersionUID: 7548777133196579107L

Serialized Fields

temporalAnnotation

int temporalAnnotation

doSGappedStuff

boolean doSGappedStuff

leaveItAll

int leaveItAll

doAdverbialNP

boolean doAdverbialNP

headFinder

HeadFinder headFinder

Class edu.stanford.nlp.trees.PennTreebankLanguagePack extends AbstractTreebankLanguagePack implements Serializable

serialVersionUID: 9081305982861675328L

Class edu.stanford.nlp.trees.SemanticHeadFinder extends ModCollinsHeadFinder implements Serializable

serialVersionUID: 5721799188009249808L

Serialized Fields

verbalAuxiliaries

HashSet<E> verbalAuxiliaries

copulars

HashSet<E> copulars

passiveAuxiliaries

HashSet<E> passiveAuxiliaries

verbalTags

HashSet<E> verbalTags

Class edu.stanford.nlp.trees.SimpleTree extends Tree implements Serializable

serialVersionUID: -8075763706877132926L

Serialized Fields

daughterTrees

Tree[] daughterTrees
Daughters of the parse tree.

Class edu.stanford.nlp.trees.Tree extends AbstractCollection<Tree> implements Serializable

serialVersionUID: 5441849457648722744L

Serialized Fields

score

double score

Class edu.stanford.nlp.trees.TreeGraph extends Object implements Serializable

serialVersionUID: 1L

Serialized Fields

root

TreeGraphNode root
The root node of this treegraph.


indexMap

Map<K,V> indexMap
A map from arbitrary integer indices to nodes.

Class edu.stanford.nlp.trees.TreeGraphNode extends Tree implements Serializable

serialVersionUID: 5080098143617475328L

Serialized Fields

label

CyclicCoreLabel label
Label for this node.


parent

TreeGraphNode parent
Parent of this node.


children

TreeGraphNode[] children
Children of this node.


tg

TreeGraph tg
The TreeGraph of which this node is part.

Class edu.stanford.nlp.trees.TreeNormalizer extends Object implements Serializable

serialVersionUID: 1540681875853883387L

Class edu.stanford.nlp.trees.TypedDependency extends Object implements Serializable

serialVersionUID: -7690294213151279779L

Serialized Fields

reln

GrammaticalRelation reln

gov

TreeGraphNode gov

dep

TreeGraphNode dep

extra

boolean extra

Class edu.stanford.nlp.trees.UnnamedConcreteDependency extends UnnamedDependency implements Serializable

serialVersionUID: -8836949694741145222L

Serialized Fields

headIndex

int headIndex

depIndex

int depIndex

Class edu.stanford.nlp.trees.UnnamedDependency extends Object implements Serializable

serialVersionUID: -3768440215342256085L

Serialized Fields

regentText

String regentText

dependentText

String dependentText

regent

Label regent

dependent

Label dependent

Package edu.stanford.nlp.trees.international.pennchinese

Class edu.stanford.nlp.trees.international.pennchinese.ChineseEnglishWordMap extends Object implements Serializable

serialVersionUID: 7655332268578049993L

Serialized Fields

map

Map<K,V> map

normalized

boolean normalized

Class edu.stanford.nlp.trees.international.pennchinese.ChineseHeadFinder extends AbstractCollinsHeadFinder implements Serializable

serialVersionUID: 6143632784691159283L

Class edu.stanford.nlp.trees.international.pennchinese.ChineseTreebankLanguagePack extends AbstractTreebankLanguagePack implements Serializable

serialVersionUID: 5757403475523638802L


Package edu.stanford.nlp.trees.tregex

Class edu.stanford.nlp.trees.tregex.ParseException extends Exception implements Serializable

serialVersionUID: 1L

Serialized Fields

currentToken

edu.stanford.nlp.trees.tregex.Token currentToken
This is the last token that has been consumed successfully. If this object has been created due to a parse error, the token followng this token will (therefore) be the first error token.


expectedTokenSequences

int[][] expectedTokenSequences
Each entry in this array is an array of integers. Each array of integers represents a sequence of tokens (by their ordinal values) that is expected at this point of the parse.


tokenImage

String[] tokenImage
This is a reference to the "tokenImage" array of the generated parser within which the parse error occurred. This array is defined in the generated ...Constants interface.


eol

String eol
The end of line string for this machine.

Class edu.stanford.nlp.trees.tregex.TregexParseException extends RuntimeException implements Serializable

Class edu.stanford.nlp.trees.tregex.TregexPattern extends Object implements Serializable

serialVersionUID: 5060298043763944913L

Serialized Fields

neg

boolean neg

opt

boolean opt

patternString

String patternString

Package edu.stanford.nlp.trees.tregex.tsurgeon

Class edu.stanford.nlp.trees.tregex.tsurgeon.ParseException extends Exception implements Serializable

serialVersionUID: 1L

Serialized Fields

currentToken

edu.stanford.nlp.trees.tregex.tsurgeon.Token currentToken
This is the last token that has been consumed successfully. If this object has been created due to a parse error, the token followng this token will (therefore) be the first error token.


expectedTokenSequences

int[][] expectedTokenSequences
Each entry in this array is an array of integers. Each array of integers represents a sequence of tokens (by their ordinal values) that is expected at this point of the parse.


tokenImage

String[] tokenImage
This is a reference to the "tokenImage" array of the generated parser within which the parse error occurred. This array is defined in the generated ...Constants interface.


eol

String eol
The end of line string for this machine.

Class edu.stanford.nlp.trees.tregex.tsurgeon.TsurgeonParseException extends RuntimeException implements Serializable


Package edu.stanford.nlp.util

Class edu.stanford.nlp.util.ArrayCoreMap extends Object implements Serializable

serialVersionUID: 1L

Serialization Methods

writeObject

private void writeObject(ObjectOutputStream out)
                  throws IOException
Overridden serialization method: compacts our map before writing.

Throws:
IOException - If IO error
Serialized Fields

keys

Class<T>[] keys
Array of keys


values

Object[] values
Array of values


size

int size
Total number of elements actually in keys,values

Class edu.stanford.nlp.util.ArrayMap extends AbstractMap<K,V> implements Serializable

serialVersionUID: 1L

Serialized Fields

entryArray

edu.stanford.nlp.util.ArrayMap.Entry<K,V>[] entryArray

capacity

int capacity

size

int size

hashCodeCache

int hashCodeCache

Class edu.stanford.nlp.util.CollectionFactory extends Object implements Serializable

serialVersionUID: 3711321773145894069L

Class edu.stanford.nlp.util.CollectionFactory.ArrayListFactory extends CollectionFactory<T> implements Serializable

serialVersionUID: 1L

Class edu.stanford.nlp.util.CollectionFactory.HashSetFactory extends CollectionFactory<T> implements Serializable

serialVersionUID: -6268401669449458602L

Class edu.stanford.nlp.util.CollectionFactory.LinkedListFactory extends CollectionFactory<T> implements Serializable

serialVersionUID: -4236184979948498000L

Class edu.stanford.nlp.util.CollectionFactory.SizedArrayListFactory extends CollectionFactory<T> implements Serializable

serialVersionUID: 1L

Serialized Fields

defaultSize

int defaultSize

Class edu.stanford.nlp.util.CollectionFactory.TreeSetFactory extends CollectionFactory<T> implements Serializable

serialVersionUID: -3451920268219478134L

Class edu.stanford.nlp.util.CollectionValuedMap extends Object implements Serializable

serialVersionUID: -9064664153962599076L

Serialized Fields

map

Map<K,V> map

cf

CollectionFactory<T> cf

treatCollectionsAsImmutable

boolean treatCollectionsAsImmutable

mf

MapFactory<K,V> mf

emptyValue

Collection<E> emptyValue
The empty collection to be returned when a get doesn't find the key. The collection returned should be empty, such as Collections.emptySet, for example.

Class edu.stanford.nlp.util.DeltaCollectionValuedMap extends CollectionValuedMap<K,V> implements Serializable

serialVersionUID: 1L

Serialized Fields

originalMap

CollectionValuedMap<K,V> originalMap

deltaMap

Map<K,V> deltaMap

Class edu.stanford.nlp.util.FixedPrioritiesPriorityQueue extends AbstractSet<E> implements Serializable

serialVersionUID: 1L

Serialized Fields

size

int size

capacity

int capacity

elements

List<E> elements

priorities

double[] priorities

Class edu.stanford.nlp.util.HashableCoreMap extends ArrayCoreMap implements Serializable

serialVersionUID: 1L

Serialized Fields

immutableKeys

Set<E> immutableKeys
Set of immutable keys


hashcode

int hashcode
Pre-computed hashcode

Class edu.stanford.nlp.util.HashableCoreMap.HashableCoreMapException extends RuntimeException implements Serializable

serialVersionUID: 1L

Class edu.stanford.nlp.util.HashIndex extends AbstractCollection<E> implements Serializable

serialVersionUID: 5398562825928375260L

Serialized Fields

objects

ArrayList<E> objects

indexes

HashMap<K,V> indexes

locked

boolean locked

semaphore

Semaphore semaphore

Class edu.stanford.nlp.util.IdentityHashSet extends AbstractSet<E> implements Serializable

serialVersionUID: -5024744406713321676L

Serialization Methods

readObject

private void readObject(ObjectInputStream s)
                 throws IOException,
                        ClassNotFoundException
Deserialize this Object in a manner which is binary-compatible with the JDK.

Throws:
IOException
ClassNotFoundException

writeObject

private void writeObject(ObjectOutputStream s)
                  throws IOException
Serialize this Object in a manner which is binary-compatible with the JDK.

Throws:
IOException

Class edu.stanford.nlp.util.Interval extends Pair<E extends Comparable<E>,E extends Comparable<E>> implements Serializable

Serialized Fields

flags

int flags

Class edu.stanford.nlp.util.IntPair extends IntTuple implements Serializable

serialVersionUID: 1L

Class edu.stanford.nlp.util.IntQuadruple extends IntTuple implements Serializable

serialVersionUID: 7154973101012473479L

Class edu.stanford.nlp.util.IntTriple extends IntTuple implements Serializable

serialVersionUID: -3744404627253652799L

Class edu.stanford.nlp.util.IntTuple extends Object implements Serializable

serialVersionUID: 7266305463893511982L

Serialized Fields

elements

int[] elements

Class edu.stanford.nlp.util.IntUni extends IntTuple implements Serializable

serialVersionUID: -7182556672628741200L

Class edu.stanford.nlp.util.LowercaseFunction extends Object implements Serializable

serialVersionUID: 1L

Class edu.stanford.nlp.util.MapFactory extends Object implements Serializable

serialVersionUID: 4529666940763477360L

Class edu.stanford.nlp.util.MetaClass.ClassCreationException extends RuntimeException implements Serializable

serialVersionUID: -5980065992461870357L

Class edu.stanford.nlp.util.MetaClass.ConstructorNotFoundException extends MetaClass.ClassCreationException implements Serializable

serialVersionUID: -5980065992461870357L

Class edu.stanford.nlp.util.MutableDouble extends Number implements Serializable

serialVersionUID: 624465615824626762L

Serialized Fields

d

double d

Class edu.stanford.nlp.util.MutableInteger extends Number implements Serializable

serialVersionUID: 624465615824626762L

Serialized Fields

i

int i

Class edu.stanford.nlp.util.Pair extends Object implements Serializable

serialVersionUID: 1360822168806852921L

Serialized Fields

first

Object first
Direct access is deprecated. Use first().

 

second

Object second
Direct access is deprecated. Use second().

 

Class edu.stanford.nlp.util.ReflectionLoading.ReflectionLoadingException extends RuntimeException implements Serializable

Class edu.stanford.nlp.util.Triple extends Object implements Serializable

serialVersionUID: -4182871682751645440L

Serialized Fields

first

Object first

second

Object second

third

Object third



Stanford NLP Group