public class GetPatternsFromDataMultiClass<E extends Pattern>
extends java.lang.Object
implements java.io.Serializable
The multi-threaded class (nthread parameter for number of
threads) takes as input.
To use the default options, run
java -mx1000m edu.stanford.nlp.patterns.GetPatternsFromDataMultiClass -file text_file -seedWordsFiles label1,seedwordlist1;label2,seedwordlist2;... -outDir output_directory (optional)
fileFormat: (Optional) Default is text. Valid values are text
(or txt) and ser, where the serialized file is of the type Map<String,
List<CoreLabel>>.
file: (Required) Input file(s) (default assumed text). Can be
one or more of (concatenated by comma or semi-colon): file, directory, files
with regex in the filename (for example: "mydir/health-.*-processed.txt")
seedWordsFiles: (Required)
label1,file_seed_words1;label2,file_seed_words2;... where file_seed_words are
files with list of seed words, one in each line
outDir: (Optional) output directory where visualization/output
files are stored
For other flags, see individual comments for each flag.
To use a properties file, see
projects/core/data/edu/stanford/nlp/patterns/surface/example.properties or patterns/example.properties (depends on which codebase you are using)
as an example for the flags and their brief descriptions. Run the code as:
java -mx1000m -cp classpath edu.stanford.nlp.patterns.GetPatternsFromDataMultiClass -props dir-as-above/example.properties
IMPORTANT: Many flags are described in the classes
ConstantsAndVariables, CreatePatterns, and
PhraseScorer.
| Modifier and Type | Class and Description |
|---|---|
static class |
GetPatternsFromDataMultiClass.Flags |
static class |
GetPatternsFromDataMultiClass.LabelWithSeedWords
Warning: sets labels of words that are not in the given seed set as O!!!
|
static class |
GetPatternsFromDataMultiClass.PatternScoring
RlogF is from Riloff 1996, when R's denominator is (pos+neg+unlabeled)
|
| Modifier and Type | Field and Description |
|---|---|
ConstantsAndVariables |
constVars |
CreatePatterns |
createPats |
java.util.Map<java.lang.String,Counter<E>> |
currentPatternWeights |
java.util.Map<java.lang.String,TwoDimensionalCounter<E,CandidatePhrase>> |
patternsandWords |
TwoDimensionalCounter<java.lang.String,ConstantsAndVariables.ScorePhraseMeasures> |
phInPatScoresCache |
ScorePhrases |
scorePhrases |
java.util.Map<java.lang.String,java.util.Set<java.lang.String>> |
wordsForOtherClass |
java.util.Map<java.lang.String,TwoDimensionalCounter<CandidatePhrase,E>> |
wordsPatExtracted |
| Constructor and Description |
|---|
GetPatternsFromDataMultiClass(java.util.Properties props,
java.util.Map<java.lang.String,DataInstance> sents,
java.util.Map<java.lang.String,java.util.Set<CandidatePhrase>> seedSets,
boolean labelUsingSeedSets) |
GetPatternsFromDataMultiClass(java.util.Properties props,
java.util.Map<java.lang.String,DataInstance> sents,
java.util.Map<java.lang.String,java.util.Set<CandidatePhrase>> seedSets,
boolean labelUsingSeedSets,
java.util.Map<java.lang.String,java.lang.Class<? extends TypesafeMap.Key<java.lang.String>>> answerClass) |
GetPatternsFromDataMultiClass(java.util.Properties props,
java.util.Map<java.lang.String,DataInstance> sents,
java.util.Map<java.lang.String,java.util.Set<CandidatePhrase>> seedSets,
boolean labelUsingSeedSets,
java.util.Map<java.lang.String,java.lang.Class<? extends TypesafeMap.Key<java.lang.String>>> answerClass,
java.util.Map<java.lang.String,java.lang.Class> generalizeClasses,
java.util.Map<java.lang.String,java.util.Map<java.lang.Class,java.lang.Object>> ignoreClasses)
generalize classes basically maps label strings to a map of generalized
strings and the corresponding class ignoreClasses have to be boolean
|
GetPatternsFromDataMultiClass(java.util.Properties props,
java.util.Map<java.lang.String,DataInstance> sents,
java.util.Set<CandidatePhrase> seedSet,
boolean labelUsingSeedSets,
java.lang.Class answerClass,
java.lang.String answerLabel) |
GetPatternsFromDataMultiClass(java.util.Properties props,
java.util.Map<java.lang.String,DataInstance> sents,
java.util.Set<CandidatePhrase> seedSet,
boolean labelUsingSeedSets,
java.lang.Class answerClass,
java.lang.String answerLabel,
java.util.Map<java.lang.String,java.lang.Class> generalizeClasses,
java.util.Map<java.lang.Class,java.lang.Object> ignoreClasses) |
GetPatternsFromDataMultiClass(java.util.Properties props,
java.util.Map<java.lang.String,DataInstance> sents,
java.util.Set<CandidatePhrase> seedSet,
boolean labelUsingSeedSets,
java.lang.String answerLabel) |
GetPatternsFromDataMultiClass(java.util.Properties props,
java.util.Map<java.lang.String,DataInstance> sents,
java.util.Set<CandidatePhrase> seedSet,
boolean labelUsingSeedSets,
java.lang.String answerLabel,
java.util.Map<java.lang.String,java.lang.Class> generalizeClasses,
java.util.Map<java.lang.Class,java.lang.Object> ignoreClasses) |
| Modifier and Type | Method and Description |
|---|---|
static void |
countResults(java.util.List<CoreLabel> doc,
Counter<java.lang.String> entityTP,
Counter<java.lang.String> entityFP,
Counter<java.lang.String> entityFN,
java.lang.String background,
Counter<java.lang.String> wordTP,
Counter<java.lang.String> wordTN,
Counter<java.lang.String> wordFP,
Counter<java.lang.String> wordFN,
java.lang.Class<? extends TypesafeMap.Key<java.lang.String>> whichClassToCompare,
boolean evalPerEntity) |
static boolean |
countResultsPerEntity(java.util.List<CoreLabel> doc,
Counter<java.lang.String> entityTP,
Counter<java.lang.String> entityFP,
Counter<java.lang.String> entityFN,
java.lang.String background,
Counter<java.lang.String> wordTP,
Counter<java.lang.String> wordTN,
Counter<java.lang.String> wordFP,
Counter<java.lang.String> wordFN,
java.lang.Class<? extends TypesafeMap.Key<java.lang.String>> whichClassToCompare)
COPIED from CRFClassifier: Count the successes and failures of the model on
the given document.
|
static void |
countResultsPerToken(java.util.List<CoreLabel> doc,
Counter<java.lang.String> entityTP,
Counter<java.lang.String> entityFP,
Counter<java.lang.String> entityFN,
java.lang.String background,
Counter<java.lang.String> wordTP,
Counter<java.lang.String> wordTN,
Counter<java.lang.String> wordFP,
Counter<java.lang.String> wordFN,
java.lang.Class<? extends TypesafeMap.Key<java.lang.String>> whichClassToCompare)
Count the successes and failures of the model on the given document
***token-based***.
|
static java.lang.String |
elapsedTime(java.util.Date d1,
java.util.Date d2) |
void |
evaluate(java.util.Map<java.lang.String,DataInstance> testSentences,
boolean evalPerEntity) |
static <D> Counter<D> |
FScore(Counter<D> precision,
Counter<D> recall,
double beta) |
double |
FScore(double precision,
double recall,
double beta) |
static java.util.List<java.io.File> |
getAllFiles(java.lang.String file) |
java.util.Map<java.lang.String,java.lang.String> |
getAllOptions() |
static void |
getFeatures(SemanticGraph graph,
IndexedWord vertex,
boolean isHead,
java.util.Collection<java.lang.String> features,
GrammaticalRelation reln) |
java.util.Map<java.lang.String,Counter<E>> |
getLearnedPatterns() |
Counter<E> |
getLearnedPatterns(java.lang.String label) |
java.util.Map<java.lang.String,java.util.Map<java.lang.Integer,Counter<E>>> |
getLearnedPatternsEachIter() |
java.util.Map<java.lang.Integer,Counter<E>> |
getLearnedPatternsEachIter(java.lang.String label) |
java.util.Set<java.lang.String> |
getNonBackgroundLabels(CoreLabel l) |
PatternsForEachToken |
getPatsForEachToken() |
Counter<E> |
getPatterns(java.lang.String label,
java.util.Set<E> alreadyIdentifiedPatterns,
E p0,
Counter<CandidatePhrase> p0Set,
java.util.Set<E> ignorePatterns) |
static java.lang.Class |
getPatternScoringClass(GetPatternsFromDataMultiClass.PatternScoring patternScoring) |
static java.util.List<java.lang.Integer> |
getSubListIndex(java.lang.String[] l1,
java.lang.String[] l2,
java.lang.String[] subl2,
java.util.Set<java.lang.String> doNotLabelTheseWords,
java.util.HashSet<java.lang.String> seenFuzzyMatches,
int minLen4Fuzzy,
boolean fuzzyMatch,
boolean ignoreCaseSeedMatch)
If l1 is a part of l2, it finds the starting index of l1 in l2 If l1 is not
a sub-array of l2, then it returns -1 note that l2 should have the exact
elements and order as in l1
|
static <E> java.util.List<java.util.List<E>> |
getThreadBatches(java.util.List<E> keyset,
int numThreads) |
void |
iterateExtractApply() |
void |
iterateExtractApply(java.util.Map<java.lang.String,E> p0,
java.util.Map<java.lang.String,Counter<CandidatePhrase>> p0Set,
java.lang.String wordsOutputFile,
java.lang.String sentsOutFile,
java.lang.String patternsOutFile,
java.util.Map<java.lang.String,java.util.Set<E>> ignorePatterns) |
void |
labelWords(java.lang.String label,
java.util.Map<java.lang.String,DataInstance> sents,
java.util.Collection<CandidatePhrase> identifiedWords) |
void |
labelWords(java.lang.String label,
java.util.Map<java.lang.String,DataInstance> sents,
java.util.Collection<CandidatePhrase> identifiedWords,
java.lang.String outFile,
CollectionValuedMap<E,Triple<java.lang.String,java.lang.Integer,java.lang.Integer>> matchedTokensByPat) |
static <E extends Pattern> |
loadFromSavedPatternsWordsDir(GetPatternsFromDataMultiClass<E> model,
java.util.Properties props,
boolean labelSentsUsingModel,
boolean applyPatsUsingModel,
int numIterationsOfSavedPatternsToLoad) |
static void |
main(java.lang.String[] args) |
static java.lang.String |
matchedTokensByPhraseJsonString() |
static java.lang.String |
matchedTokensByPhraseJsonString(java.lang.String phrase) |
static <E> Counter<E> |
normalizeSoftMaxMinMaxScores(Counter<E> scores,
boolean minMaxNorm,
boolean softmax,
boolean oneMinusSoftMax) |
void |
processSents(java.util.Map<java.lang.String,DataInstance> sents,
java.lang.Boolean deleteExistingIndex) |
static Pair |
processSents(java.util.Properties props,
java.util.Set<java.lang.String> labels) |
static java.util.Map<java.lang.String,java.util.Set<CandidatePhrase>> |
readSeedWords(java.util.Properties props) |
static java.util.Map<java.lang.String,java.util.Set<CandidatePhrase>> |
readSeedWords(java.lang.String seedWordsFiles) |
static java.util.Map<java.lang.String,java.util.Set<CandidatePhrase>> |
readSeedWordsFromJSONString(java.lang.String str) |
void |
removeOverLappingLabels(java.util.Map<java.lang.String,DataInstance> sents)
If a token is labeled for two or more labels, then keep the one that has the longest matching phrase.
|
static <E extends Pattern> |
run(java.util.Properties props)
Execute the system give a properties file or object.
|
static void |
runLabelSeedWords(java.util.Map<java.lang.String,DataInstance> sents,
java.lang.Class answerclass,
java.lang.String label,
java.util.Collection<CandidatePhrase> seedWords,
ConstantsAndVariables constVars,
boolean overwriteExistingLabels)
Warning: sets labels of words that are not in the given seed set as O!!!
|
static java.util.Map<java.lang.String,DataInstance> |
runPOSNEROnTokens(java.util.List<CoreMap> sentsCM,
java.lang.String posModelPath,
boolean useTargetNERRestriction,
java.lang.String prefix,
boolean useTargetParserParentRestriction,
java.lang.String numThreads,
PatternFactory.PatternType type) |
static java.util.Map<java.lang.String,DataInstance> |
runPOSNERParseOnTokens(java.util.Map<java.lang.String,DataInstance> sents,
java.util.Properties propsoriginal) |
void |
setLearnedPatterns(Counter<E> patterns,
java.lang.String label) |
static <E> java.util.List<java.util.List<E>> |
splitIntoNumThreadsWithSampling(java.util.List<E> c,
int n,
int numThreads) |
static int |
tokenize(java.util.Iterator<java.lang.String> textReader,
java.lang.String posModelPath,
boolean lowercase,
boolean useTargetNERRestriction,
java.lang.String sentIDPrefix,
boolean useTargetParserParentRestriction,
java.lang.String numThreads,
boolean batchProcessSents,
int numMaxSentencesPerBatchFile,
java.io.File saveSentencesSerDirFile,
java.util.Map<java.lang.String,DataInstance> sents,
int numFilesTillNow,
PatternFactory.PatternType type) |
static void |
writeColumnOutput(java.lang.String outFile,
boolean batchProcessSents,
java.util.Map<java.lang.String,java.lang.Class<? extends TypesafeMap.Key<java.lang.String>>> answerclasses) |
void |
writeLabeledData(java.lang.String outFile) |
public java.util.Map<java.lang.String,java.util.Set<java.lang.String>> wordsForOtherClass
public java.util.Map<java.lang.String,TwoDimensionalCounter<CandidatePhrase,E extends Pattern>> wordsPatExtracted
public ScorePhrases scorePhrases
public ConstantsAndVariables constVars
public CreatePatterns createPats
public java.util.Map<java.lang.String,TwoDimensionalCounter<E extends Pattern,CandidatePhrase>> patternsandWords
public java.util.Map<java.lang.String,Counter<E extends Pattern>> currentPatternWeights
public TwoDimensionalCounter<java.lang.String,ConstantsAndVariables.ScorePhraseMeasures> phInPatScoresCache
public GetPatternsFromDataMultiClass(java.util.Properties props,
java.util.Map<java.lang.String,DataInstance> sents,
java.util.Set<CandidatePhrase> seedSet,
boolean labelUsingSeedSets,
java.lang.String answerLabel)
throws java.io.IOException,
java.lang.InstantiationException,
java.lang.IllegalAccessException,
java.lang.IllegalArgumentException,
java.lang.reflect.InvocationTargetException,
java.lang.NoSuchMethodException,
java.lang.SecurityException,
java.lang.InterruptedException,
java.util.concurrent.ExecutionException,
java.lang.ClassNotFoundException
java.io.IOExceptionjava.lang.InstantiationExceptionjava.lang.IllegalAccessExceptionjava.lang.IllegalArgumentExceptionjava.lang.reflect.InvocationTargetExceptionjava.lang.NoSuchMethodExceptionjava.lang.SecurityExceptionjava.lang.InterruptedExceptionjava.util.concurrent.ExecutionExceptionjava.lang.ClassNotFoundExceptionpublic GetPatternsFromDataMultiClass(java.util.Properties props,
java.util.Map<java.lang.String,DataInstance> sents,
java.util.Set<CandidatePhrase> seedSet,
boolean labelUsingSeedSets,
java.lang.Class answerClass,
java.lang.String answerLabel)
throws java.io.IOException,
java.lang.InstantiationException,
java.lang.IllegalAccessException,
java.lang.IllegalArgumentException,
java.lang.reflect.InvocationTargetException,
java.lang.NoSuchMethodException,
java.lang.SecurityException,
java.lang.InterruptedException,
java.util.concurrent.ExecutionException,
java.lang.ClassNotFoundException
java.io.IOExceptionjava.lang.InstantiationExceptionjava.lang.IllegalAccessExceptionjava.lang.IllegalArgumentExceptionjava.lang.reflect.InvocationTargetExceptionjava.lang.NoSuchMethodExceptionjava.lang.SecurityExceptionjava.lang.InterruptedExceptionjava.util.concurrent.ExecutionExceptionjava.lang.ClassNotFoundExceptionpublic GetPatternsFromDataMultiClass(java.util.Properties props,
java.util.Map<java.lang.String,DataInstance> sents,
java.util.Set<CandidatePhrase> seedSet,
boolean labelUsingSeedSets,
java.lang.String answerLabel,
java.util.Map<java.lang.String,java.lang.Class> generalizeClasses,
java.util.Map<java.lang.Class,java.lang.Object> ignoreClasses)
throws java.io.IOException,
java.lang.InstantiationException,
java.lang.IllegalAccessException,
java.lang.IllegalArgumentException,
java.lang.reflect.InvocationTargetException,
java.lang.NoSuchMethodException,
java.lang.SecurityException,
java.lang.InterruptedException,
java.util.concurrent.ExecutionException,
java.lang.ClassNotFoundException
java.io.IOExceptionjava.lang.InstantiationExceptionjava.lang.IllegalAccessExceptionjava.lang.IllegalArgumentExceptionjava.lang.reflect.InvocationTargetExceptionjava.lang.NoSuchMethodExceptionjava.lang.SecurityExceptionjava.lang.InterruptedExceptionjava.util.concurrent.ExecutionExceptionjava.lang.ClassNotFoundExceptionpublic GetPatternsFromDataMultiClass(java.util.Properties props,
java.util.Map<java.lang.String,DataInstance> sents,
java.util.Set<CandidatePhrase> seedSet,
boolean labelUsingSeedSets,
java.lang.Class answerClass,
java.lang.String answerLabel,
java.util.Map<java.lang.String,java.lang.Class> generalizeClasses,
java.util.Map<java.lang.Class,java.lang.Object> ignoreClasses)
throws java.io.IOException,
java.lang.InstantiationException,
java.lang.IllegalAccessException,
java.lang.IllegalArgumentException,
java.lang.reflect.InvocationTargetException,
java.lang.NoSuchMethodException,
java.lang.SecurityException,
java.lang.InterruptedException,
java.util.concurrent.ExecutionException,
java.lang.ClassNotFoundException
java.io.IOExceptionjava.lang.InstantiationExceptionjava.lang.IllegalAccessExceptionjava.lang.IllegalArgumentExceptionjava.lang.reflect.InvocationTargetExceptionjava.lang.NoSuchMethodExceptionjava.lang.SecurityExceptionjava.lang.InterruptedExceptionjava.util.concurrent.ExecutionExceptionjava.lang.ClassNotFoundExceptionpublic GetPatternsFromDataMultiClass(java.util.Properties props,
java.util.Map<java.lang.String,DataInstance> sents,
java.util.Map<java.lang.String,java.util.Set<CandidatePhrase>> seedSets,
boolean labelUsingSeedSets)
throws java.io.IOException,
java.lang.InstantiationException,
java.lang.IllegalAccessException,
java.lang.IllegalArgumentException,
java.lang.reflect.InvocationTargetException,
java.lang.NoSuchMethodException,
java.lang.SecurityException,
java.lang.ClassNotFoundException,
java.lang.InterruptedException,
java.util.concurrent.ExecutionException
java.io.IOExceptionjava.lang.InstantiationExceptionjava.lang.IllegalAccessExceptionjava.lang.IllegalArgumentExceptionjava.lang.reflect.InvocationTargetExceptionjava.lang.NoSuchMethodExceptionjava.lang.SecurityExceptionjava.lang.ClassNotFoundExceptionjava.lang.InterruptedExceptionjava.util.concurrent.ExecutionExceptionpublic GetPatternsFromDataMultiClass(java.util.Properties props,
java.util.Map<java.lang.String,DataInstance> sents,
java.util.Map<java.lang.String,java.util.Set<CandidatePhrase>> seedSets,
boolean labelUsingSeedSets,
java.util.Map<java.lang.String,java.lang.Class<? extends TypesafeMap.Key<java.lang.String>>> answerClass)
throws java.io.IOException,
java.lang.InstantiationException,
java.lang.IllegalAccessException,
java.lang.IllegalArgumentException,
java.lang.reflect.InvocationTargetException,
java.lang.NoSuchMethodException,
java.lang.SecurityException,
java.lang.InterruptedException,
java.util.concurrent.ExecutionException,
java.lang.ClassNotFoundException
java.io.IOExceptionjava.lang.InstantiationExceptionjava.lang.IllegalAccessExceptionjava.lang.IllegalArgumentExceptionjava.lang.reflect.InvocationTargetExceptionjava.lang.NoSuchMethodExceptionjava.lang.SecurityExceptionjava.lang.InterruptedExceptionjava.util.concurrent.ExecutionExceptionjava.lang.ClassNotFoundExceptionpublic GetPatternsFromDataMultiClass(java.util.Properties props,
java.util.Map<java.lang.String,DataInstance> sents,
java.util.Map<java.lang.String,java.util.Set<CandidatePhrase>> seedSets,
boolean labelUsingSeedSets,
java.util.Map<java.lang.String,java.lang.Class<? extends TypesafeMap.Key<java.lang.String>>> answerClass,
java.util.Map<java.lang.String,java.lang.Class> generalizeClasses,
java.util.Map<java.lang.String,java.util.Map<java.lang.Class,java.lang.Object>> ignoreClasses)
throws java.io.IOException,
java.lang.InstantiationException,
java.lang.IllegalAccessException,
java.lang.IllegalArgumentException,
java.lang.reflect.InvocationTargetException,
java.lang.NoSuchMethodException,
java.lang.SecurityException,
java.lang.InterruptedException,
java.util.concurrent.ExecutionException,
java.lang.ClassNotFoundException
java.io.IOExceptionjava.lang.SecurityExceptionjava.lang.NoSuchMethodExceptionjava.lang.reflect.InvocationTargetExceptionjava.lang.IllegalArgumentExceptionjava.lang.IllegalAccessExceptionjava.lang.InstantiationExceptionjava.util.concurrent.ExecutionExceptionjava.lang.InterruptedExceptionjava.lang.ClassNotFoundExceptionpublic PatternsForEachToken getPatsForEachToken()
public void removeOverLappingLabels(java.util.Map<java.lang.String,DataInstance> sents)
PatternsAnnotations.Ln set, which is already done in runLabelSeedWords function.public static java.util.Map<java.lang.String,DataInstance> runPOSNERParseOnTokens(java.util.Map<java.lang.String,DataInstance> sents, java.util.Properties propsoriginal)
public static java.util.Map<java.lang.String,DataInstance> runPOSNEROnTokens(java.util.List<CoreMap> sentsCM, java.lang.String posModelPath, boolean useTargetNERRestriction, java.lang.String prefix, boolean useTargetParserParentRestriction, java.lang.String numThreads, PatternFactory.PatternType type)
public static int tokenize(java.util.Iterator<java.lang.String> textReader,
java.lang.String posModelPath,
boolean lowercase,
boolean useTargetNERRestriction,
java.lang.String sentIDPrefix,
boolean useTargetParserParentRestriction,
java.lang.String numThreads,
boolean batchProcessSents,
int numMaxSentencesPerBatchFile,
java.io.File saveSentencesSerDirFile,
java.util.Map<java.lang.String,DataInstance> sents,
int numFilesTillNow,
PatternFactory.PatternType type)
throws java.lang.InterruptedException,
java.util.concurrent.ExecutionException,
java.io.IOException
java.lang.InterruptedExceptionjava.util.concurrent.ExecutionExceptionjava.io.IOExceptionpublic static java.util.List<java.lang.Integer> getSubListIndex(java.lang.String[] l1,
java.lang.String[] l2,
java.lang.String[] subl2,
java.util.Set<java.lang.String> doNotLabelTheseWords,
java.util.HashSet<java.lang.String> seenFuzzyMatches,
int minLen4Fuzzy,
boolean fuzzyMatch,
boolean ignoreCaseSeedMatch)
l1 - array you want to find in l2l2 - public static <E> java.util.List<java.util.List<E>> getThreadBatches(java.util.List<E> keyset,
int numThreads)
public static void runLabelSeedWords(java.util.Map<java.lang.String,DataInstance> sents, java.lang.Class answerclass, java.lang.String label, java.util.Collection<CandidatePhrase> seedWords, ConstantsAndVariables constVars, boolean overwriteExistingLabels) throws java.lang.InterruptedException, java.util.concurrent.ExecutionException, java.io.IOException
java.lang.InterruptedExceptionjava.util.concurrent.ExecutionExceptionjava.io.IOExceptionpublic static void getFeatures(SemanticGraph graph, IndexedWord vertex, boolean isHead, java.util.Collection<java.lang.String> features, GrammaticalRelation reln)
public void processSents(java.util.Map<java.lang.String,DataInstance> sents, java.lang.Boolean deleteExistingIndex) throws java.io.IOException, java.lang.ClassNotFoundException
java.io.IOExceptionjava.lang.ClassNotFoundExceptionpublic Counter<E> getPatterns(java.lang.String label, java.util.Set<E> alreadyIdentifiedPatterns, E p0, Counter<CandidatePhrase> p0Set, java.util.Set<E> ignorePatterns) throws java.io.IOException, java.lang.ClassNotFoundException
java.io.IOExceptionjava.lang.ClassNotFoundExceptionpublic static java.lang.Class getPatternScoringClass(GetPatternsFromDataMultiClass.PatternScoring patternScoring)
public static <E> java.util.List<java.util.List<E>> splitIntoNumThreadsWithSampling(java.util.List<E> c,
int n,
int numThreads)
public static <E> Counter<E> normalizeSoftMaxMinMaxScores(Counter<E> scores, boolean minMaxNorm, boolean softmax, boolean oneMinusSoftMax)
public void labelWords(java.lang.String label,
java.util.Map<java.lang.String,DataInstance> sents,
java.util.Collection<CandidatePhrase> identifiedWords)
throws java.io.IOException
java.io.IOExceptionpublic void labelWords(java.lang.String label,
java.util.Map<java.lang.String,DataInstance> sents,
java.util.Collection<CandidatePhrase> identifiedWords,
java.lang.String outFile,
CollectionValuedMap<E,Triple<java.lang.String,java.lang.Integer,java.lang.Integer>> matchedTokensByPat)
throws java.io.IOException
java.io.IOExceptionpublic void iterateExtractApply()
throws java.io.IOException,
java.lang.ClassNotFoundException
java.io.IOExceptionjava.lang.ClassNotFoundExceptionpublic void iterateExtractApply(java.util.Map<java.lang.String,E> p0, java.util.Map<java.lang.String,Counter<CandidatePhrase>> p0Set, java.lang.String wordsOutputFile, java.lang.String sentsOutFile, java.lang.String patternsOutFile, java.util.Map<java.lang.String,java.util.Set<E>> ignorePatterns) throws java.io.IOException, java.lang.ClassNotFoundException
p0 - Null in most cases. only used for BPBp0Set - Null in most caseswordsOutputFile - If null, output is in the output directorysentsOutFile - patternsOutFile - ignorePatterns - java.io.IOExceptionjava.lang.ClassNotFoundExceptionpublic static java.lang.String matchedTokensByPhraseJsonString(java.lang.String phrase)
public static java.lang.String matchedTokensByPhraseJsonString()
public java.util.Map<java.lang.String,java.util.Map<java.lang.Integer,Counter<E>>> getLearnedPatternsEachIter()
public java.util.Map<java.lang.Integer,Counter<E>> getLearnedPatternsEachIter(java.lang.String label)
public static boolean countResultsPerEntity(java.util.List<CoreLabel> doc, Counter<java.lang.String> entityTP, Counter<java.lang.String> entityFP, Counter<java.lang.String> entityFN, java.lang.String background, Counter<java.lang.String> wordTP, Counter<java.lang.String> wordTN, Counter<java.lang.String> wordFP, Counter<java.lang.String> wordFN, java.lang.Class<? extends TypesafeMap.Key<java.lang.String>> whichClassToCompare)
public static void countResultsPerToken(java.util.List<CoreLabel> doc, Counter<java.lang.String> entityTP, Counter<java.lang.String> entityFP, Counter<java.lang.String> entityFN, java.lang.String background, Counter<java.lang.String> wordTP, Counter<java.lang.String> wordTN, Counter<java.lang.String> wordFP, Counter<java.lang.String> wordFN, java.lang.Class<? extends TypesafeMap.Key<java.lang.String>> whichClassToCompare)
public static void countResults(java.util.List<CoreLabel> doc, Counter<java.lang.String> entityTP, Counter<java.lang.String> entityFP, Counter<java.lang.String> entityFN, java.lang.String background, Counter<java.lang.String> wordTP, Counter<java.lang.String> wordTN, Counter<java.lang.String> wordFP, Counter<java.lang.String> wordFN, java.lang.Class<? extends TypesafeMap.Key<java.lang.String>> whichClassToCompare, boolean evalPerEntity)
public void writeLabeledData(java.lang.String outFile)
throws java.io.IOException,
java.lang.ClassNotFoundException
java.io.IOExceptionjava.lang.ClassNotFoundExceptionpublic static void writeColumnOutput(java.lang.String outFile,
boolean batchProcessSents,
java.util.Map<java.lang.String,java.lang.Class<? extends TypesafeMap.Key<java.lang.String>>> answerclasses)
throws java.io.IOException,
java.lang.ClassNotFoundException
java.io.IOExceptionjava.lang.ClassNotFoundExceptionpublic void evaluate(java.util.Map<java.lang.String,DataInstance> testSentences, boolean evalPerEntity) throws java.io.IOException
java.io.IOExceptionpublic static java.util.List<java.io.File> getAllFiles(java.lang.String file)
public double FScore(double precision,
double recall,
double beta)
public java.util.Set<java.lang.String> getNonBackgroundLabels(CoreLabel l)
public static java.util.Map<java.lang.String,java.util.Set<CandidatePhrase>> readSeedWordsFromJSONString(java.lang.String str)
public static java.util.Map<java.lang.String,java.util.Set<CandidatePhrase>> readSeedWords(java.util.Properties props)
public static java.util.Map<java.lang.String,java.util.Set<CandidatePhrase>> readSeedWords(java.lang.String seedWordsFiles)
public java.util.Map<java.lang.String,java.lang.String> getAllOptions()
public static Pair processSents(java.util.Properties props, java.util.Set<java.lang.String> labels) throws java.io.IOException, java.util.concurrent.ExecutionException, java.lang.InterruptedException, java.lang.ClassNotFoundException
java.io.IOExceptionjava.util.concurrent.ExecutionExceptionjava.lang.InterruptedExceptionjava.lang.ClassNotFoundExceptionpublic static <E extends Pattern> GetPatternsFromDataMultiClass<E> run(java.util.Properties props) throws java.io.IOException, java.lang.ClassNotFoundException, java.lang.IllegalAccessException, java.lang.InterruptedException, java.util.concurrent.ExecutionException, java.lang.InstantiationException, java.lang.NoSuchMethodException, java.lang.reflect.InvocationTargetException, java.sql.SQLException
props - java.io.IOExceptionjava.lang.ClassNotFoundExceptionjava.lang.IllegalAccessExceptionjava.lang.InterruptedExceptionjava.util.concurrent.ExecutionExceptionjava.lang.InstantiationExceptionjava.lang.NoSuchMethodExceptionjava.lang.reflect.InvocationTargetExceptionjava.sql.SQLExceptionpublic static <E extends Pattern> java.util.Map<E,java.lang.String> loadFromSavedPatternsWordsDir(GetPatternsFromDataMultiClass<E> model, java.util.Properties props, boolean labelSentsUsingModel, boolean applyPatsUsingModel, int numIterationsOfSavedPatternsToLoad) throws java.io.IOException, java.lang.ClassNotFoundException
java.io.IOExceptionjava.lang.ClassNotFoundExceptionpublic static java.lang.String elapsedTime(java.util.Date d1,
java.util.Date d2)
public static void main(java.lang.String[] args)