public class ConstantsAndVariables
extends java.lang.Object
implements java.io.Serializable
Modifier and Type | Class and Description |
---|---|
static class |
ConstantsAndVariables.DataSentsIterator |
static class |
ConstantsAndVariables.PatternForEachTokenWay |
static class |
ConstantsAndVariables.PatternIndexWay |
static class |
ConstantsAndVariables.ScorePhraseMeasures |
Modifier and Type | Field and Description |
---|---|
boolean |
addIndvWordsFromPhrasesExceptLastAsNeg
For example, if positive seed dict contains "cancer" and "breast cancer" then "breast" is included as negative
|
java.util.Map<java.lang.String,java.util.Set<java.lang.String>> |
allowedNERsforLabels |
java.util.Map<java.lang.String,java.util.Set<java.lang.String>> |
allowedTagsInitials |
java.lang.String |
allPatternsDir
Cached file of all patterns for all tokens
|
static java.lang.String |
backgroundSymbol |
boolean |
batchProcessSents
Use this option if you are limited by memory ; ignored if fileFormat is ser.
|
boolean |
clubNeighboringLabeledWords |
java.lang.String |
commonWordsPatternFiles
Words to be ignored when learning phrases if
removePhrasesWithStopWords or
removeStopWordsFromSelectedPhrases is true. |
boolean |
computeAllPatterns
If all patterns should be computed.
|
int |
debug
Debug flag for learning patterns.
|
java.util.Map<java.lang.String,Counter<CandidatePhrase>> |
dictOddsWeights |
java.util.Map<java.lang.String,Counter<java.lang.Integer>> |
distSimWeights |
boolean |
doNotApplyPatterns |
boolean |
doNotExtractPhraseAnyWordLabeledOtherClass
Especially useful for multi word phrase extraction.
|
java.lang.String |
englishWordsFiles
English words that are not labeled when labeling using seed dictionaries
|
java.util.Map<java.lang.String,Env> |
env
Environment for
TokenSequencePattern |
boolean |
evaluate |
boolean |
expandNegativesWhenSampling |
int |
expandPhrasesNumTopSimilar |
boolean |
expandPositivesWhenSampling |
java.lang.String |
externalFeatureWeightsDir |
static java.lang.String |
extremedebug |
int |
featureCountThreshold |
java.util.List<java.lang.String> |
functionWords |
boolean |
fuzzyMatch
Whether to do a fuzzy matching when matching seeds to text.
|
static Env |
globalEnv |
java.lang.String |
goldEntitiesEvalFiles |
java.lang.String |
identifier
Save this run as ...
|
java.util.Map<java.lang.String,java.lang.String> |
ignoreCaseSeedMatch
Ignore case when matching seed words.
|
SentenceIndex |
invertedIndex |
java.lang.Class<? extends SentenceIndex> |
invertedIndexClass |
java.lang.String |
invertedIndexDirectory
Where the inverted index (either in memory or lucene) is stored
|
boolean |
justify |
boolean |
learn |
boolean |
loadInvertedIndex
You can load the inverted index using this file.
|
double |
LRSigma
Sigma for L2 regularization in Logisitic regression, if a classifier is
used to score phrases
|
static boolean |
matchLowerCaseContext
Lowercase the context words/lemmas
|
int |
maxExtractNumWords
Maximum number of words to learn
|
static java.lang.String |
minimaldebug |
int |
minLen4FuzzyForPattern
Minimum length of words that can be matched fuzzily
|
int |
minPosPhraseSupportForPat
Remove patterns that have number of positive words less than this.
|
int |
minUnlabPhraseSupportForPat
Remove patterns that have number of unlabeled words is less than this.
|
java.lang.Integer |
numIterationsForPatterns
Maximum number of iterations to run
|
int |
numPatterns
Maximum number of patterns learned in each iteration
|
int |
numThreads
Number of threads
|
int |
numWordsToAdd
Number of words to learn in each iteration
|
java.lang.String |
otherSemanticClassesFiles
List of dictionary phrases that are negative for all labels to be learned.
|
java.lang.String |
outDir
The output directory where the justifications of learning patterns and
phrases would be saved.
|
GetPatternsFromDataMultiClass.PatternScoring |
patternScoring
Pattern Scoring mechanism.
|
PatternFactory.PatternType |
patternType |
double |
perSelectNeg
These are used to learn weights for features if using logistic regression.
|
double |
perSelectRand
These are used to learn weights for features if using logistic regression.
|
double |
positiveSimilarityThresholdLowPrecision |
boolean |
removeOverLappingLabelsFromSeed
Keeps only one label for each token, whichever has the longest
|
boolean |
removePhrasesWithStopWords |
boolean |
removeStopWordsFromSelectedPhrases |
boolean |
restrictToMatched
Currently, does not work correctly.
|
boolean |
saveInvertedIndex
You can save the inverted index.
|
boolean |
savePatternsWordsDir |
java.lang.String |
sentsOutFile |
double |
similarityThresholdHighPrecision |
boolean |
sqrtPatScore
If score for a pattern is square rooted
|
java.lang.String |
stopWordsPatternFiles
Words that are not learned.
|
ConstantsAndVariables.PatternForEachTokenWay |
storePatsForEachToken |
boolean |
subsampleUnkAsNegUsingSim |
java.lang.String |
targetAllowedNERs
Allowed NERs for labels.
|
java.lang.String |
targetAllowedTagsInitialsStr
Initials of all POS tags to use if
usePOS4Pattern is true, separated by comma. |
double |
thresholdNumPatternsApplied |
double |
thresholdSelectPattern
Threshold for learning a pattern
|
double |
thresholdWordExtract |
boolean |
tuneThresholdKeepRunning
Reduce pattern threshold (=0.8*current_value) to extract as many patterns
as possible (still restricted by
numPatterns ) |
boolean |
useMatchingPhrase
Use the actual dictionary matching phrase(s) instead of the token word or
lemma in calculating the stats
|
boolean |
useOtherLabelsWordsasNegative
use the seed dictionaries and the new words learned for the other labels in
the previous iterations as negative
|
boolean |
usePatternEvalBOW
use bag of words
|
boolean |
usePatternEvalDomainNgram
|
boolean |
usePatternEvalEditDistOther
|
boolean |
usePatternEvalEditDistSame
|
boolean |
usePatternEvalFirstCapital |
boolean |
usePatternEvalGoogleNgram
|
boolean |
usePatternEvalSemanticOdds
|
boolean |
usePatternEvalWordClass
|
boolean |
usePatternEvalWordShape
|
boolean |
usePatternEvalWordShapeStr |
boolean |
usePatternResultAsLabel
Label words that are learned so that in further iterations we have more
information
|
boolean |
usePhraseEvalBOW
use bag of words
|
boolean |
usePhraseEvalDomainNgram
use domain tf-idf for learning phrases
|
boolean |
usePhraseEvalEditDistOther
Edit distance between this phrase and other phrases in other dictionaries
|
boolean |
usePhraseEvalEditDistSame
Edit distance between this phrase and the other phrases in the label
dictionary
|
boolean |
usePhraseEvalFirstCapital |
boolean |
usePhraseEvalGoogleNgram
use google tf-idf for learning phrases.
|
boolean |
usePhraseEvalPatWtByFreq
use \sum_allpat pattern_wt_that_extracted_phrase/phrase_freq for learning
phrases
|
boolean |
usePhraseEvalSemanticOdds
odds of the phrase freq in the label dictionary vs other dictionaries
|
boolean |
usePhraseEvalWordClass
Only works if you have single label.
|
boolean |
usePhraseEvalWordShape |
boolean |
usePhraseEvalWordShapeStr |
boolean |
usePhraseEvalWordVector
Only works if you have single label.
|
boolean |
useWordVectorsToComputeSim |
java.lang.String |
wordIgnoreRegex
Do not learn phrases that match this regex.
|
edu.stanford.nlp.patterns.GetPatternsFromDataMultiClass.WordScoring |
wordScoring |
java.lang.String |
wordVectorFile |
boolean |
writeMatchedTokensFiles |
boolean |
writeMatchedTokensIdsForEachPhrase |
Constructor and Description |
---|
ConstantsAndVariables(java.util.Properties props,
java.util.Map<java.lang.String,java.util.Set<CandidatePhrase>> labelDictionary,
java.util.Map<java.lang.String,java.lang.Class<? extends TypesafeMap.Key<java.lang.String>>> answerClass,
java.util.Map<java.lang.String,java.lang.Class> generalizeClasses,
java.util.Map<java.lang.String,java.util.Map<java.lang.Class,java.lang.Object>> ignoreClasses) |
ConstantsAndVariables(java.util.Properties props,
java.util.Set<java.lang.String> labels,
java.util.Map<java.lang.String,java.lang.Class<? extends TypesafeMap.Key<java.lang.String>>> answerClass) |
ConstantsAndVariables(java.util.Properties props,
java.util.Set<java.lang.String> labels,
java.util.Map<java.lang.String,java.lang.Class<? extends TypesafeMap.Key<java.lang.String>>> answerClass,
java.util.Map<java.lang.String,java.lang.Class> generalizeClasses) |
ConstantsAndVariables(java.util.Properties props,
java.util.Set<java.lang.String> labels,
java.util.Map<java.lang.String,java.lang.Class<? extends TypesafeMap.Key<java.lang.String>>> answerClass,
java.util.Map<java.lang.String,java.lang.Class> generalizeClasses,
java.util.Map<java.lang.String,java.util.Map<java.lang.Class,java.lang.Object>> ignoreClasses) |
ConstantsAndVariables(java.util.Properties props,
java.lang.String label,
java.lang.Class<? extends TypesafeMap.Key<java.lang.String>> answerClass) |
Modifier and Type | Method and Description |
---|---|
void |
addSeedWords(java.lang.String label,
java.util.Collection<CandidatePhrase> seeds) |
void |
addWordShapes(java.lang.String label,
java.util.Set<CandidatePhrase> words) |
static CandidatePhrase |
containsFuzzy(java.util.Set<CandidatePhrase> words,
CandidatePhrase w,
int minLen4Fuzzy) |
java.util.Map<java.lang.String,java.lang.String> |
getAllOptions() |
java.util.Map<java.lang.String,java.lang.Class<? extends TypesafeMap.Key<java.lang.String>>> |
getAnswerClass() |
java.util.Set<java.lang.String> |
getCommonEngWords() |
java.util.concurrent.ConcurrentHashMap<java.lang.String,java.lang.Double> |
getEditDistanceFromEnglishWords() |
java.util.concurrent.ConcurrentHashMap<java.lang.String,java.lang.String> |
getEditDistanceFromEnglishWordsMatches() |
Pair<java.lang.String,java.lang.Double> |
getEditDistanceFromOtherClasses(java.lang.String label,
java.lang.String ph,
int minLen) |
Pair<java.lang.String,java.lang.Double> |
getEditDistanceFromThisClass(java.lang.String label,
java.lang.String ph,
int minLen) |
double |
getEditDistanceScoresOtherClass(java.lang.String label,
java.lang.String g) |
double |
getEditDistanceScoresOtherClassThreshold(java.lang.String label,
java.lang.String g)
1 if lies in edit distance, 0 if not close to any words
|
double |
getEditDistanceScoresThisClass(java.lang.String label,
java.lang.String g) |
double |
getEditDistanceScoresThisClassThreshold(java.lang.String label,
java.lang.String g) |
java.util.Set<java.lang.String> |
getEnglishWords() |
static java.util.Map<java.lang.String,java.lang.Class> |
getGeneralizeClasses() |
java.util.Map<java.lang.String,java.lang.Integer> |
getGeneralWordClassClusters() |
java.util.Map<java.lang.String,java.util.Map<java.lang.Class,java.lang.Object>> |
getIgnoreWordswithClassesDuringSelection() |
java.util.Set<java.lang.String> |
getLabels() |
Counter<CandidatePhrase> |
getLearnedWords(java.lang.String label) |
java.lang.String |
getLearnedWordsAsJson() |
java.lang.String |
getLearnedWordsAsJsonLastIteration() |
java.util.Map<java.lang.String,java.util.TreeMap<java.lang.Integer,Counter<CandidatePhrase>>> |
getLearnedWordsEachIter() |
java.util.TreeMap<java.lang.Integer,Counter<CandidatePhrase>> |
getLearnedWordsEachIter(java.lang.String label) |
java.util.Set<CandidatePhrase> |
getOtherSemanticClassesWords() |
java.util.Map<java.lang.String,java.util.Set<CandidatePhrase>> |
getSeedLabelDictionary() |
java.lang.String |
getSetWordsAsJson(java.util.Map<java.lang.String,Counter<CandidatePhrase>> words) |
static java.util.Set<CandidatePhrase> |
getStopWords() |
java.util.Map<java.lang.String,java.lang.Integer> |
getWordClassClusters() |
java.util.Map<java.lang.String,java.lang.String> |
getWordShapeCache() |
java.util.Map<java.lang.String,Counter<java.lang.String>> |
getWordShapesForLabels() |
boolean |
hasSeedWordOrOtherSem(CandidatePhrase p) |
static boolean |
isFuzzyMatch(java.lang.String w1,
java.lang.String w2,
int minLen4Fuzzy) |
static java.lang.Iterable<java.io.File> |
listFileIncludingItself(java.lang.String file) |
void |
setGeneralWordClassClusters(java.util.Map<java.lang.String,java.lang.Integer> generalWordClassClusters) |
void |
setLearnedWordsEachIter(java.util.TreeMap<java.lang.Integer,Counter<CandidatePhrase>> words,
java.lang.String label) |
void |
setOtherSemanticClassesWords(java.util.Set<CandidatePhrase> other) |
void |
setUp(java.util.Properties props) |
@ArgumentParser.Option(name="numIterationsForPatterns") public java.lang.Integer numIterationsForPatterns
@ArgumentParser.Option(name="numPatterns") public int numPatterns
@ArgumentParser.Option(name="outDir") public java.lang.String outDir
@ArgumentParser.Option(name="allPatternsDir") public java.lang.String allPatternsDir
@ArgumentParser.Option(name="computeAllPatterns") public boolean computeAllPatterns
@ArgumentParser.Option(name="patternScoring") public GetPatternsFromDataMultiClass.PatternScoring patternScoring
GetPatternsFromDataMultiClass.PatternScoring
for options.@ArgumentParser.Option(name="thresholdSelectPattern") public double thresholdSelectPattern
@ArgumentParser.Option(name="restrictToMatched") public boolean restrictToMatched
@ArgumentParser.Option(name="usePatternResultAsLabel") public boolean usePatternResultAsLabel
@ArgumentParser.Option(name="debug") public int debug
@ArgumentParser.Option(name="identifier") public java.lang.String identifier
@ArgumentParser.Option(name="useMatchingPhrase") public boolean useMatchingPhrase
@ArgumentParser.Option(name="tuneThresholdKeepRunning") public boolean tuneThresholdKeepRunning
numPatterns
)@ArgumentParser.Option(name="maxExtractNumWords") public int maxExtractNumWords
@ArgumentParser.Option(name="useOtherLabelsWordsasNegative") public boolean useOtherLabelsWordsasNegative
@ArgumentParser.Option(name="matchLowerCaseContext") public static boolean matchLowerCaseContext
@ArgumentParser.Option(name="targetAllowedTagsInitialsStr") public java.lang.String targetAllowedTagsInitialsStr
usePOS4Pattern
is true, separated by comma.public java.util.Map<java.lang.String,java.util.Set<java.lang.String>> allowedTagsInitials
@ArgumentParser.Option(name="targetAllowedNERs") public java.lang.String targetAllowedNERs
useTargetNERRestriction
flag should be truepublic java.util.Map<java.lang.String,java.util.Set<java.lang.String>> allowedNERsforLabels
@ArgumentParser.Option(name="numWordsToAdd") public int numWordsToAdd
@ArgumentParser.Option(name="thresholdNumPatternsApplied") public double thresholdNumPatternsApplied
@ArgumentParser.Option(name="wordScoring") public edu.stanford.nlp.patterns.GetPatternsFromDataMultiClass.WordScoring wordScoring
@ArgumentParser.Option(name="thresholdWordExtract") public double thresholdWordExtract
public boolean justify
@ArgumentParser.Option(name="LRSigma") public double LRSigma
@ArgumentParser.Option(name="englishWordsFiles") public java.lang.String englishWordsFiles
@ArgumentParser.Option(name="commonWordsPatternFiles") public java.lang.String commonWordsPatternFiles
removePhrasesWithStopWords
or
removeStopWordsFromSelectedPhrases
is true. Also, these words
are considered negative when scoring a pattern (similar to
othersemanticclasses).@ArgumentParser.Option(name="otherSemanticClassesFiles") public java.lang.String otherSemanticClassesFiles
@ArgumentParser.Option(name="minLen4FuzzyForPattern") public int minLen4FuzzyForPattern
@ArgumentParser.Option(name="wordIgnoreRegex") public java.lang.String wordIgnoreRegex
@ArgumentParser.Option(name="numThreads") public int numThreads
@ArgumentParser.Option(name="stopWordsPatternFiles", gloss="stop words") public java.lang.String stopWordsPatternFiles
CreatePatterns
is true.public java.util.Map<java.lang.String,Env> env
TokenSequencePattern
public static Env globalEnv
@ArgumentParser.Option(name="removeStopWordsFromSelectedPhrases") public boolean removeStopWordsFromSelectedPhrases
@ArgumentParser.Option(name="removePhrasesWithStopWords") public boolean removePhrasesWithStopWords
@ArgumentParser.Option(name="externalFeatureWeightsFile") public java.lang.String externalFeatureWeightsDir
@ArgumentParser.Option(name="doNotApplyPatterns") public boolean doNotApplyPatterns
@ArgumentParser.Option(name="sqrtPatScore") public boolean sqrtPatScore
@ArgumentParser.Option(name="minUnlabPhraseSupportForPat") public int minUnlabPhraseSupportForPat
@ArgumentParser.Option(name="minPosPhraseSupportForPat") public int minPosPhraseSupportForPat
@ArgumentParser.Option(name="addIndvWordsFromPhrasesExceptLastAsNeg") public boolean addIndvWordsFromPhrasesExceptLastAsNeg
public java.util.Map<java.lang.String,Counter<java.lang.Integer>> distSimWeights
public java.util.Map<java.lang.String,Counter<CandidatePhrase>> dictOddsWeights
@ArgumentParser.Option(name="invertedIndexClass", gloss="another option is Lucene backed, which is not included in the CoreNLP release. Contact us to get a copy (distributed under Apache License).") public java.lang.Class<? extends SentenceIndex> invertedIndexClass
@ArgumentParser.Option(name="invertedIndexDirectory") public java.lang.String invertedIndexDirectory
@ArgumentParser.Option(name="clubNeighboringLabeledWords") public boolean clubNeighboringLabeledWords
@ArgumentParser.Option(name="patternType") public PatternFactory.PatternType patternType
@ArgumentParser.Option(name="subsampleUnkAsNegUsingSim", gloss="When learning a classifier, remove phrases from unknown phrases that are too close to the positive phrases") public boolean subsampleUnkAsNegUsingSim
@ArgumentParser.Option(name="expandPositivesWhenSampling", gloss="when sampling for learning feature wts for learning phrases, expand the positives") public boolean expandPositivesWhenSampling
@ArgumentParser.Option(name="expandNegativesWhenSampling", gloss="when sampling for learning feature wts for learning phrases, expand the negatives") public boolean expandNegativesWhenSampling
@ArgumentParser.Option(name="similarityThresholdHighPrecision", gloss="used for expanding positives") public double similarityThresholdHighPrecision
@ArgumentParser.Option(name="positiveSimilarityThresholdLowPrecision", gloss="used for not choosing close unknowns as positives") public double positiveSimilarityThresholdLowPrecision
@ArgumentParser.Option(name="wordVectorFile", gloss="if using word vectors for computing similarities") public java.lang.String wordVectorFile
@ArgumentParser.Option(name="useWordVectorsToComputeSim", gloss="use vectors directly instead of word classes for computing similarity") public boolean useWordVectorsToComputeSim
@ArgumentParser.Option(name="goldEntitiesEvalFiles", gloss="label1,gold_list_of_entities_file;label2,...") public java.lang.String goldEntitiesEvalFiles
@ArgumentParser.Option(name="evaluate") public boolean evaluate
@ArgumentParser.Option(name="featureCountThreshold") public int featureCountThreshold
@ArgumentParser.Option(name="expandPhrasesNumTopSimilar", gloss="k in kNN") public int expandPhrasesNumTopSimilar
@ArgumentParser.Option(name="fuzzyMatch") public boolean fuzzyMatch
@ArgumentParser.Option(name="ignoreCaseSeedMatch") public java.util.Map<java.lang.String,java.lang.String> ignoreCaseSeedMatch
@ArgumentParser.Option(name="sentsOutFile") public java.lang.String sentsOutFile
@ArgumentParser.Option(name="savePatternsWordsDir") public boolean savePatternsWordsDir
@ArgumentParser.Option(name="learn") public boolean learn
@ArgumentParser.Option(name="removeOverLappingLabelsFromSeed") public boolean removeOverLappingLabelsFromSeed
@ArgumentParser.Option(name="usePhraseEvalWordClass") public boolean usePhraseEvalWordClass
@ArgumentParser.Option(name="usePhraseEvalWordVector") public boolean usePhraseEvalWordVector
@ArgumentParser.Option(name="usePhraseEvalGoogleNgram") public boolean usePhraseEvalGoogleNgram
@ArgumentParser.Option(name="usePhraseEvalDomainNgram") public boolean usePhraseEvalDomainNgram
@ArgumentParser.Option(name="usePhraseEvalPatWtByFreq") public boolean usePhraseEvalPatWtByFreq
@ArgumentParser.Option(name="usePhraseEvalSemanticOdds") public boolean usePhraseEvalSemanticOdds
@ArgumentParser.Option(name="usePhraseEvalEditDistSame") public boolean usePhraseEvalEditDistSame
@ArgumentParser.Option(name="usePhraseEvalEditDistOther") public boolean usePhraseEvalEditDistOther
@ArgumentParser.Option(name="usePhraseEvalWordShape", gloss="% of phrases of that label that have the same word shape") public boolean usePhraseEvalWordShape
@ArgumentParser.Option(name="usePhraseEvalWordShapeStr", gloss="uses the word shape str as a feature") public boolean usePhraseEvalWordShapeStr
@ArgumentParser.Option(name="usePhraseEvalFirstCapital", gloss="words starts with a capital letter") public boolean usePhraseEvalFirstCapital
@ArgumentParser.Option(name="usePhraseEvalBOW") public boolean usePhraseEvalBOW
@ArgumentParser.Option(name="usePatternEvalWordClass") public boolean usePatternEvalWordClass
@ArgumentParser.Option(name="usePatternEvalWordShape") public boolean usePatternEvalWordShape
@ArgumentParser.Option(name="usePatternEvalWordShapeStr", gloss="uses the word shape str as a feature") public boolean usePatternEvalWordShapeStr
@ArgumentParser.Option(name="usePatternEvalFirstCapital", gloss="words starts with a capital letter") public boolean usePatternEvalFirstCapital
@ArgumentParser.Option(name="usePatternEvalGoogleNgram") public boolean usePatternEvalGoogleNgram
@ArgumentParser.Option(name="usePatternEvalDomainNgram") public boolean usePatternEvalDomainNgram
patternScoring
is PhEvalInPat
or
PhEvalInPat
. See usePhrase* for meanings. Need to also provide googleNgram_dbname,
googleNgram_username and googleNgram_host@ArgumentParser.Option(name="usePatternEvalSemanticOdds") public boolean usePatternEvalSemanticOdds
@ArgumentParser.Option(name="usePatternEvalEditDistSame") public boolean usePatternEvalEditDistSame
@ArgumentParser.Option(name="usePatternEvalEditDistOther") public boolean usePatternEvalEditDistOther
@ArgumentParser.Option(name="usePatternEvalBOW") public boolean usePatternEvalBOW
@ArgumentParser.Option(name="perSelectRand") public double perSelectRand
@ArgumentParser.Option(name="perSelectNeg") public double perSelectNeg
@ArgumentParser.Option(name="doNotExtractPhraseAnyWordLabeledOtherClass") public boolean doNotExtractPhraseAnyWordLabeledOtherClass
@ArgumentParser.Option(name="saveInvertedIndex") public boolean saveInvertedIndex
invertedIndexDirectory
if given.@ArgumentParser.Option(name="loadInvertedIndex") public boolean loadInvertedIndex
@ArgumentParser.Option(name="storePatsForEachToken", gloss="used for storing patterns in PSQL/MEMORY/LUCENE") public ConstantsAndVariables.PatternForEachTokenWay storePatsForEachToken
public static java.lang.String backgroundSymbol
public SentenceIndex invertedIndex
public static java.lang.String extremedebug
public static java.lang.String minimaldebug
public java.util.List<java.lang.String> functionWords
@ArgumentParser.Option(name="batchProcessSents") public boolean batchProcessSents
@ArgumentParser.Option(name="writeMatchedTokensFiles") public boolean writeMatchedTokensFiles
@ArgumentParser.Option(name="writeMatchedTokensIdsForEachPhrase") public boolean writeMatchedTokensIdsForEachPhrase
public ConstantsAndVariables(java.util.Properties props, java.util.Set<java.lang.String> labels, java.util.Map<java.lang.String,java.lang.Class<? extends TypesafeMap.Key<java.lang.String>>> answerClass, java.util.Map<java.lang.String,java.lang.Class> generalizeClasses, java.util.Map<java.lang.String,java.util.Map<java.lang.Class,java.lang.Object>> ignoreClasses) throws java.io.IOException
java.io.IOException
public ConstantsAndVariables(java.util.Properties props, java.util.Map<java.lang.String,java.util.Set<CandidatePhrase>> labelDictionary, java.util.Map<java.lang.String,java.lang.Class<? extends TypesafeMap.Key<java.lang.String>>> answerClass, java.util.Map<java.lang.String,java.lang.Class> generalizeClasses, java.util.Map<java.lang.String,java.util.Map<java.lang.Class,java.lang.Object>> ignoreClasses) throws java.io.IOException
java.io.IOException
public ConstantsAndVariables(java.util.Properties props, java.util.Set<java.lang.String> labels, java.util.Map<java.lang.String,java.lang.Class<? extends TypesafeMap.Key<java.lang.String>>> answerClass) throws java.io.IOException
java.io.IOException
public ConstantsAndVariables(java.util.Properties props, java.lang.String label, java.lang.Class<? extends TypesafeMap.Key<java.lang.String>> answerClass) throws java.io.IOException
java.io.IOException
public ConstantsAndVariables(java.util.Properties props, java.util.Set<java.lang.String> labels, java.util.Map<java.lang.String,java.lang.Class<? extends TypesafeMap.Key<java.lang.String>>> answerClass, java.util.Map<java.lang.String,java.lang.Class> generalizeClasses) throws java.io.IOException
java.io.IOException
public java.util.Set<java.lang.String> getLabels()
public java.util.Map<java.lang.String,java.lang.String> getAllOptions()
public boolean hasSeedWordOrOtherSem(CandidatePhrase p)
public java.util.TreeMap<java.lang.Integer,Counter<CandidatePhrase>> getLearnedWordsEachIter(java.lang.String label)
public java.util.Map<java.lang.String,java.util.TreeMap<java.lang.Integer,Counter<CandidatePhrase>>> getLearnedWordsEachIter()
public void setLearnedWordsEachIter(java.util.TreeMap<java.lang.Integer,Counter<CandidatePhrase>> words, java.lang.String label)
public void setUp(java.util.Properties props) throws java.io.IOException
java.io.IOException
public static java.lang.Iterable<java.io.File> listFileIncludingItself(java.lang.String file)
public java.util.Map<java.lang.String,Counter<java.lang.String>> getWordShapesForLabels()
public static java.util.Map<java.lang.String,java.lang.Class> getGeneralizeClasses()
public static java.util.Set<CandidatePhrase> getStopWords()
public void addWordShapes(java.lang.String label, java.util.Set<CandidatePhrase> words)
public java.util.Map<java.lang.String,java.util.Set<CandidatePhrase>> getSeedLabelDictionary()
public Counter<CandidatePhrase> getLearnedWords(java.lang.String label)
public java.lang.String getLearnedWordsAsJson()
public java.lang.String getLearnedWordsAsJsonLastIteration()
public java.lang.String getSetWordsAsJson(java.util.Map<java.lang.String,Counter<CandidatePhrase>> words)
public java.util.Set<java.lang.String> getEnglishWords()
public java.util.Set<java.lang.String> getCommonEngWords()
public java.util.Set<CandidatePhrase> getOtherSemanticClassesWords()
public void setOtherSemanticClassesWords(java.util.Set<CandidatePhrase> other)
public java.util.Map<java.lang.String,java.lang.Integer> getWordClassClusters()
public Pair<java.lang.String,java.lang.Double> getEditDistanceFromThisClass(java.lang.String label, java.lang.String ph, int minLen)
public Pair<java.lang.String,java.lang.Double> getEditDistanceFromOtherClasses(java.lang.String label, java.lang.String ph, int minLen)
public java.util.concurrent.ConcurrentHashMap<java.lang.String,java.lang.Double> getEditDistanceFromEnglishWords()
public java.util.concurrent.ConcurrentHashMap<java.lang.String,java.lang.String> getEditDistanceFromEnglishWordsMatches()
public double getEditDistanceScoresOtherClass(java.lang.String label, java.lang.String g)
public double getEditDistanceScoresOtherClassThreshold(java.lang.String label, java.lang.String g)
g
- public double getEditDistanceScoresThisClassThreshold(java.lang.String label, java.lang.String g)
public double getEditDistanceScoresThisClass(java.lang.String label, java.lang.String g)
public static boolean isFuzzyMatch(java.lang.String w1, java.lang.String w2, int minLen4Fuzzy)
public static CandidatePhrase containsFuzzy(java.util.Set<CandidatePhrase> words, CandidatePhrase w, int minLen4Fuzzy)
public java.util.Map<java.lang.String,java.lang.Integer> getGeneralWordClassClusters()
public void setGeneralWordClassClusters(java.util.Map<java.lang.String,java.lang.Integer> generalWordClassClusters)
public java.util.Map<java.lang.String,java.lang.String> getWordShapeCache()
public java.util.Map<java.lang.String,java.lang.Class<? extends TypesafeMap.Key<java.lang.String>>> getAnswerClass()
public java.util.Map<java.lang.String,java.util.Map<java.lang.Class,java.lang.Object>> getIgnoreWordswithClassesDuringSelection()
public void addSeedWords(java.lang.String label, java.util.Collection<CandidatePhrase> seeds) throws java.lang.Exception
java.lang.Exception