public class SeqClassifierFlags
extends java.lang.Object
implements java.io.Serializable
NERFeatureFactory
. Documentation for the flags
for Chinese word segmentation can be found in the Javadoc of
ChineseSegmenterFeatureFactory
.
IMPORTANT NOTE IF CHANGING THIS FILE: MAKE SURE TO
ONLY ADD NEW VARIABLES AT THE END OF THE LIST OF VARIABLES (and not
to change existing variables)! Otherwise you usually break all
currently serialized classifiers!!! Search for "ADD VARIABLES ABOVE
HERE" below.
Some general flags are described here
Property Name | Type | Default Value | Description |
useQN | boolean | true | Use Quasi-Newton (L-BFGS) optimization to find minimum. NOTE: Need to set this to false if using other minimizers such as SGD. |
QNsize | int | 25 | Number of previous iterations of Quasi-Newton to store (this increases memory use, but speeds convergence by letting the Quasi-Newton optimization more effectively approximate the second derivative). |
QNsize2 | int | 25 | Number of previous iterations of Quasi-Newton to store (used when pruning features, after the first iteration - the first iteration is with QNSize). |
useInPlaceSGD | boolean | false | Use SGD (tweaking weights in place) to find minimum (more efficient than the old SGD, faster to converge than Quasi-Newton if there are very large of samples). Implemented for CRFClassifier. NOTE: Remember to set useQN to false |
tuneSampleSize | int | -1 | If this number is greater than 0, specifies the number of samples to use for tuning (default is 1000). |
SGDPasses | int | -1 | If this number is greater than 0, specifies the number of SGD passes over entire training set) to do before giving up (default is 50). Can be smaller if sample size is very large. |
useSGD | boolean | false | Use SGD to find minimum (can be slow). NOTE: Remember to set useQN to false |
useSGDtoQN | boolean | false | Use SGD (SGD version selected by useInPlaceSGD or useSGD) for a certain number of passes (SGDPasses) and then switches to QN. Gives the quick initial convergence of SGD, with the desired convergence criterion of QN (there is some ramp up time for QN). NOTE: Remember to set useQN to false |
evaluateIters | int | 0 | If this number is greater than 0, evaluates on the test set every so often while minimizing. Implemented for CRFClassifier. |
evalCmd | String | If specified (and evaluateIters is set), runs the specified cmdline command during evaluation (instead of default CONLL-like NER evaluation) | |
evaluateTrain | boolean | false | If specified (and evaluateIters is set), also evaluate on training set (can be expensive) |
tokenizerOptions | String | (null) | Extra options to supply to the tokenizer when creating it. |
tokenizerFactory | String | (null) | A different tokenizer factory to use if the ReaderAndWriter in question uses tokenizers. |
Modifier and Type | Class and Description |
---|---|
static class |
SeqClassifierFlags.SlashHyphenEnum |
Constructor and Description |
---|
SeqClassifierFlags()
Create a new SeqClassifierFlags object initialized with default values.
|
SeqClassifierFlags(java.util.Properties props)
Create a new SeqClassifierFlags object and initialize it using values in
the Properties object.
|
SeqClassifierFlags(java.util.Properties props,
boolean printProps)
Create a new SeqClassifierFlags object and initialize it using values in
the Properties object.
|
Modifier and Type | Method and Description |
---|---|
static java.util.Map<java.lang.String,java.lang.Integer> |
flagsToNumArgs() |
java.lang.String |
getNotNullTrueStringRep()
note that this does *not* return string representation of arrays, lists and
enums
|
void |
setProperties(java.util.Properties props)
Initialize this object using values in Properties object.
|
void |
setProperties(java.util.Properties props,
boolean printProps)
Initialize using values in Properties file.
|
java.lang.String |
toString()
Print the properties specified by this object.
|
public static final java.lang.String DEFAULT_BACKGROUND_SYMBOL
public boolean useNGrams
public boolean conjoinShapeNGrams
public boolean lowercaseNGrams
public boolean dehyphenateNGrams
public boolean usePrev
public boolean useNext
public boolean useTags
public boolean useWordPairs
public boolean useGazettes
public boolean useSequences
public boolean usePrevSequences
public boolean useNextSequences
public boolean useLongSequences
public boolean useBoundarySequences
public boolean useTaggySequences
public boolean useExtraTaggySequences
public boolean dontExtendTaggy
public boolean useTaggySequencesShapeInteraction
public boolean strictlyZeroethOrder
public boolean strictlyFirstOrder
public boolean strictlySecondOrder
public boolean strictlyThirdOrder
public java.lang.String entitySubclassification
public boolean retainEntitySubclassification
public boolean useGazettePhrases
public boolean makeConsistent
public boolean useViterbi
public int[] binnedLengths
public boolean verboseMode
public boolean useSum
public double tolerance
public java.lang.String printFeatures
public boolean useSymTags
public boolean useSymWordPairs
public java.lang.String printClassifier
public int printClassifierParam
public boolean intern
public boolean intern2
public boolean selfTest
public boolean sloppyGazette
public boolean cleanGazette
public boolean noMidNGrams
public int maxNGramLeng
public boolean useReverse
public boolean greekifyNGrams
public boolean useParenMatching
public boolean useLemmas
public boolean usePrevNextLemmas
public boolean normalizeTerms
public boolean normalizeTimex
public boolean useNB
public boolean useQN
public boolean useFloat
public int QNsize
public int QNsize2
public int maxIterations
public int wordShape
public boolean useShapeStrings
public boolean useTypeSeqs
public boolean useTypeSeqs2
public boolean useTypeSeqs3
public boolean useDisjunctive
public int disjunctionWidth
public boolean useDisjunctiveShapeInteraction
public boolean useDisjShape
public boolean useWord
public boolean useClassFeature
public boolean useShapeConjunctions
public boolean useWordTag
public boolean useNPHead
public boolean useNPGovernor
public boolean useHeadGov
public boolean useLastRealWord
public boolean useNextRealWord
public boolean useOccurrencePatterns
public boolean useTypeySequences
public boolean justify
public boolean normalize
public java.lang.String priorType
public double sigma
public double epsilon
public int beamSize
public int maxLeft
public int maxRight
public boolean usePosition
public boolean useBeginSent
public boolean useGazFeatures
public boolean useMoreGazFeatures
public boolean useAbbr
public boolean useMinimalAbbr
public boolean useAbbr1
public boolean useMinimalAbbr1
public boolean useMoreAbbr
public boolean deleteBlankLines
public boolean useGENIA
public boolean useTOK
public boolean useABSTR
public boolean useABSTRFreqDict
public boolean useABSTRFreq
public boolean useFREQ
public boolean useABGENE
public boolean useWEB
public boolean useWEBFreqDict
public boolean useIsURL
public boolean useURLSequences
public boolean useIsDateRange
public boolean useEntityTypes
public boolean useEntityTypeSequences
public boolean useEntityRule
public boolean useOrdinal
public boolean useACR
public boolean useANTE
public boolean useMoreTags
public boolean useChunks
public boolean useChunkySequences
public boolean usePrevVB
public boolean useNextVB
public boolean useVB
public boolean subCWGaz
public java.lang.String documentReader
public java.lang.String map
public boolean useWideDisjunctive
public int wideDisjunctionWidth
public boolean useRadical
public boolean useBigramInTwoClique
public java.lang.String morphFeatureFile
public boolean useReverseAffix
public int charHalfWindow
public boolean useWord1
public boolean useWord2
public boolean useWord3
public boolean useWord4
public boolean useRad1
public boolean useRad2
public boolean useWordn
public boolean useCTBPre1
public boolean useCTBSuf1
public boolean useASBCPre1
public boolean useASBCSuf1
public boolean usePKPre1
public boolean usePKSuf1
public boolean useHKPre1
public boolean useHKSuf1
public boolean useCTBChar2
public boolean useASBCChar2
public boolean useHKChar2
public boolean usePKChar2
public boolean useRule2
public boolean useDict2
public boolean useOutDict2
public java.lang.String outDict2
public boolean useDictleng
public boolean useDictCTB2
public boolean useDictASBC2
public boolean useDictPK2
public boolean useDictHK2
public boolean useBig5
public boolean useNegDict2
public boolean useNegDict3
public boolean useNegDict4
public boolean useNegCTBDict2
public boolean useNegCTBDict3
public boolean useNegCTBDict4
public boolean useNegASBCDict2
public boolean useNegASBCDict3
public boolean useNegASBCDict4
public boolean useNegHKDict2
public boolean useNegHKDict3
public boolean useNegHKDict4
public boolean useNegPKDict2
public boolean useNegPKDict3
public boolean useNegPKDict4
public boolean usePre
public boolean useSuf
public boolean useRule
public boolean useHk
public boolean useMsr
public boolean useMSRChar2
public boolean usePk
public boolean useAs
public boolean useFilter
public boolean largeChSegFile
public boolean useRad2b
public boolean keepEnglishWhitespaces
public boolean keepAllWhitespaces
public boolean sighanPostProcessing
public boolean useChPos
public java.lang.String normalizationTable
public java.lang.String dictionary
public java.lang.String serializedDictionary
public java.lang.String dictionary2
public java.lang.String normTableEncoding
public java.lang.String sighanCorporaDict
public boolean useWordShapeGaz
public java.lang.String wordShapeGaz
public boolean splitDocuments
public boolean printXML
public boolean useSeenFeaturesOnly
public java.lang.String lastNameList
public java.lang.String maleNameList
public java.lang.String femaleNameList
public transient java.lang.String trainFile
public transient java.lang.String adaptFile
public transient java.lang.String devFile
public transient java.lang.String testFile
public transient java.lang.String textFile
public transient java.lang.String textFiles
public transient boolean readStdin
public transient java.lang.String outputFile
public transient java.lang.String loadClassifier
public transient java.lang.String loadTextClassifier
public transient java.lang.String loadJarClassifier
public transient java.lang.String loadAuxClassifier
public transient java.lang.String serializeTo
public transient java.lang.String serializeToText
public transient int interimOutputFreq
public transient java.lang.String initialWeights
public transient java.util.List<java.lang.String> gazettes
public transient java.lang.String selfTrainFile
public java.lang.String inputEncoding
public boolean bioSubmitOutput
public int numRuns
public java.lang.String answerFile
public java.lang.String altAnswerFile
public java.lang.String dropGaz
public java.lang.String printGazFeatures
public int numStartLayers
public boolean dump
public boolean mergeTags
public boolean splitOnHead
public int featureCountThreshold
public double featureWeightThreshold
public java.lang.String featureFactory
public java.lang.Object[] featureFactoryArgs
public java.lang.String backgroundSymbol
public boolean useObservedSequencesOnly
public int maxDocSize
public boolean printProbs
public boolean printFirstOrderProbs
public boolean saveFeatureIndexToDisk
public boolean removeBackgroundSingletonFeatures
public boolean doGibbs
public int numSamples
public boolean useNERPrior
public boolean useAcqPrior
public boolean useUniformPrior
public boolean useMUCFeatures
public double annealingRate
public java.lang.String annealingType
public java.lang.String loadProcessedData
public boolean initViterbi
public boolean useUnknown
public boolean checkNameList
public boolean useSemPrior
public boolean useFirstWord
public boolean useNumberFeature
public int ocrFold
public transient boolean ocrTrain
public java.lang.String classifierType
public java.lang.String svmModelFile
public java.lang.String inferenceType
public boolean useLemmaAsWord
public java.lang.String type
public java.lang.String readerAndWriter
public java.util.List<java.lang.String> comboProps
public boolean usePrediction
public boolean useAltGazFeatures
public java.lang.String gazFilesFile
public boolean usePrediction2
public java.lang.String baseTrainDir
public java.lang.String baseTestDir
public java.lang.String trainFiles
public java.lang.String trainFileList
public java.lang.String testFiles
public java.lang.String trainDirs
public java.lang.String testDirs
public boolean useOnlySeenWeights
public java.lang.String predProp
public CoreLabel pad
public boolean useObservedFeaturesOnly
public java.lang.String distSimLexicon
public boolean useDistSim
public int removeTopN
public int numTimesRemoveTopN
public double randomizedRatio
public double removeTopNPercent
public int purgeFeatures
public boolean booleanFeatures
public boolean iobWrapper
public boolean iobTags
public boolean useSegmentation
public boolean memoryThrift
public boolean timitDatum
public java.lang.String serializeDatasetsDir
public java.lang.String loadDatasetsDir
public java.lang.String pushDir
public boolean purgeDatasets
public boolean keepOBInMemory
public boolean fakeDataset
public boolean restrictTransitionsTimit
public int numDatasetsPerFile
public boolean useTitle
public boolean lowerNewgeneThreshold
public boolean useEitherSideWord
public boolean useEitherSideDisjunctive
public boolean twoStage
public java.lang.String crfType
public int featureThreshold
public java.lang.String featThreshFile
public double featureDiffThresh
public int numTimesPruneFeatures
public double newgeneThreshold
public boolean doAdaptation
public boolean useInternal
public boolean useExternal
public double selfTrainConfidenceThreshold
public int selfTrainIterations
public int selfTrainWindowSize
public boolean useHuber
public boolean useQuartic
public double adaptSigma
public int numFolds
public int startFold
public int endFold
public boolean cacheNGrams
public java.lang.String outputFormat
public boolean useSMD
public boolean useSGDtoQN
public boolean useStochasticQN
public boolean useScaledSGD
public int scaledSGDMethod
public int SGDPasses
public int QNPasses
public boolean tuneSGD
public StochasticCalculateMethods stochasticMethod
public double initialGain
public int stochasticBatchSize
public boolean useSGD
public double gainSGD
public boolean useHybrid
public int hybridCutoffIteration
public boolean outputIterationsToFile
public boolean testObjFunction
public boolean testVariance
public int SGD2QNhessSamples
public boolean testHessSamples
public int CRForder
public int CRFwindow
public boolean estimateInitial
public transient java.lang.String biasedTrainFile
public transient java.lang.String confusionMatrix
public java.lang.String outputEncoding
public boolean useKBest
public java.lang.String searchGraphPrefix
public double searchGraphPrune
public int kBest
public boolean useFeaturesC4gram
public boolean useFeaturesC5gram
public boolean useFeaturesC6gram
public boolean useFeaturesCpC4gram
public boolean useFeaturesCpC5gram
public boolean useFeaturesCpC6gram
public boolean useUnicodeType
public boolean useUnicodeType4gram
public boolean useUnicodeType5gram
public boolean use4Clique
public boolean useUnicodeBlock
public boolean useShapeStrings1
public boolean useShapeStrings3
public boolean useShapeStrings4
public boolean useShapeStrings5
public boolean useGoodForNamesCpC
public boolean useDictionaryConjunctions
public boolean expandMidDot
public int printFeaturesUpto
public boolean useDictionaryConjunctions3
public boolean useWordUTypeConjunctions2
public boolean useWordUTypeConjunctions3
public boolean useWordShapeConjunctions2
public boolean useWordShapeConjunctions3
public boolean useMidDotShape
public boolean augmentedDateChars
public boolean suppressMidDotPostprocessing
public boolean printNR
public java.lang.String classBias
public boolean printLabelValue
public boolean useRobustQN
public boolean combo
public boolean useGenericFeatures
public boolean verboseForTrueCasing
public java.lang.String trainHierarchical
public java.lang.String domain
public boolean baseline
public java.lang.String transferSigmas
public boolean doFE
public boolean restrictLabels
public boolean announceObjectBankEntries
public double l1reg
public java.lang.String mixedCaseMapFile
public java.lang.String auxTrueCaseModels
public boolean use2W
public boolean useLC
public boolean useYetMoreCpCShapes
public boolean useIfInteger
public java.lang.String exportFeatures
public boolean useInPlaceSGD
public boolean useTopics
public int evaluateIters
public java.lang.String evalCmd
public boolean evaluateTrain
public int tuneSampleSize
public boolean usePhraseFeatures
public boolean usePhraseWords
public boolean usePhraseWordTags
public boolean usePhraseWordSpecialTags
public boolean useCommonWordsFeature
public boolean useProtoFeatures
public boolean useWordnetFeatures
public java.lang.String tokenFactory
public java.lang.Object[] tokenFactoryArgs
public java.lang.String tokensAnnotationClassName
public transient java.lang.String tokenizerOptions
public transient java.lang.String tokenizerFactory
public boolean useCorefFeatures
public java.lang.String wikiFeatureDbFile
public boolean useNoisyNonNoisyFeature
public boolean useYear
public boolean useSentenceNumber
public boolean useLabelSource
public boolean casedDistSim
public java.lang.String distSimFileFormat
public int distSimMaxBits
public boolean numberEquivalenceDistSim
public java.lang.String unknownWordDistSimClass
public boolean useNeighborNGrams
public java.util.function.Function<java.lang.String,java.lang.String> wordFunction
public static final java.lang.String DEFAULT_PLAIN_TEXT_READER
public java.lang.String plainTextDocumentReaderAndWriter
public boolean useBagOfWords
public boolean evaluateBackground
public int numLopExpert
public transient java.lang.String initialLopScales
public transient java.lang.String initialLopWeights
public boolean includeFullCRFInLOP
public boolean backpropLopTraining
public boolean randomLopWeights
public boolean randomLopFeatureSplit
public boolean nonLinearCRF
public boolean secondOrderNonLinear
public int numHiddenUnits
public boolean useOutputLayer
public boolean useHiddenLayer
public boolean gradientDebug
public boolean checkGradient
public boolean useSigmoid
public boolean skipOutputRegularization
public boolean sparseOutputLayer
public boolean tieOutputLayer
public boolean blockInitialize
public boolean softmaxOutputLayer
public java.lang.String loadBisequenceClassifierEn
public java.lang.String loadBisequenceClassifierCh
public java.lang.String bisequenceClassifierPropEn
public java.lang.String bisequenceClassifierPropCh
public java.lang.String bisequenceTestFileEn
public java.lang.String bisequenceTestFileCh
public java.lang.String bisequenceTestOutputEn
public java.lang.String bisequenceTestOutputCh
public java.lang.String bisequenceTestAlignmentFile
public java.lang.String bisequenceAlignmentTestOutput
public int bisequencePriorType
public java.lang.String bisequenceAlignmentPriorPenaltyCh
public java.lang.String bisequenceAlignmentPriorPenaltyEn
public double alignmentPruneThreshold
public double alignmentDecodeThreshold
public boolean factorInAlignmentProb
public boolean useChromaticSampling
public boolean useSequentialScanSampling
public int maxAllowedChromaticSize
public boolean keepEmptySentences
public boolean useBilingualNERPrior
public int samplingSpeedUpThreshold
public java.lang.String entityMatrixCh
public java.lang.String entityMatrixEn
public int multiThreadGibbs
public boolean matchNERIncentive
public boolean useEmbedding
public boolean prependEmbedding
public java.lang.String embeddingWords
public java.lang.String embeddingVectors
public boolean transitionEdgeOnly
public double priorLambda
public boolean addCapitalFeatures
public int arbitraryInputLayerSize
public boolean noEdgeFeature
public boolean terminateOnEvalImprovement
public int terminateOnEvalImprovementNumOfEpoch
public boolean useMemoryEvaluator
public boolean suppressTestDebug
public boolean useOWLQN
public boolean printWeights
public int totalDataSlice
public int numOfSlices
public boolean regularizeSoftmaxTieParam
public double softmaxTieLambda
public int totalFeatureSlice
public int numOfFeatureSlices
public boolean addBiasToEmbedding
public boolean hardcodeSoftmaxOutputWeights
public boolean useNERPriorBIO
public java.lang.String entityMatrix
public int multiThreadClassifier
public boolean useDualDecomp
public boolean biAlignmentPriorIsPMI
public boolean dampDDStepSizeWithAlignmentProb
public boolean dualDecompAlignment
public double dualDecompInitialStepSizeAlignment
public boolean dualDecompNotBIO
public java.lang.String berkeleyAlignerLoadPath
public boolean useBerkeleyAlignerForViterbi
public boolean useBerkeleyCompetitivePosterior
public boolean useDenero
public double alignDDAlpha
public boolean factorInBiEdgePotential
public boolean noNeighborConstraints
public boolean includeC2EViterbi
public boolean initWithPosterior
public int nerSkipFirstK
public int nerSlowerTimes
public boolean powerAlignProb
public boolean powerAlignProbAsAddition
public boolean initWithNERPosterior
public boolean applyNERPenalty
public boolean printFactorTable
public boolean useAdaGradFOBOS
public double initRate
public boolean groupByFeatureTemplate
public boolean groupByOutputClass
public double priorAlpha
public java.lang.String splitWordRegex
public boolean groupByInput
public boolean groupByHiddenUnit
public java.lang.String unigramLM
public java.lang.String bigramLM
public int wordSegBeamSize
public java.lang.String vocabFile
public java.lang.String normalizedFile
public boolean averagePerceptron
public java.lang.String loadCRFSegmenterPath
public java.lang.String loadPCTSegmenterPath
public java.lang.String crfSegmenterProp
public java.lang.String pctSegmenterProp
public java.lang.String intermediateSegmenterOut
public java.lang.String intermediateSegmenterModel
public int dualDecompMaxItr
public double dualDecompInitialStepSize
public boolean dualDecompDebug
public boolean useCWSWordFeatures
public boolean useCWSWordFeaturesAll
public boolean useCWSWordFeaturesBigram
public boolean pctSegmenterLenAdjust
public boolean useTrainLexicon
public boolean useCWSFeatures
public boolean appendLC
public boolean perceptronDebug
public boolean pctSegmenterScaleByCRF
public double pctSegmenterScale
public boolean separateASCIIandRange
public double dropoutRate
public double dropoutScale
public int multiThreadGrad
public int maxQNItr
public boolean dropoutApprox
public java.lang.String unsupDropoutFile
public double unsupDropoutScale
public int startEvaluateIters
public int multiThreadPerceptron
public boolean lazyUpdate
public int featureCountThresh
public transient java.lang.String serializeWeightsTo
public boolean geDebug
public boolean doFeatureDiscovery
public transient java.lang.String loadWeightsFrom
public transient java.lang.String loadClassIndexFrom
public transient java.lang.String serializeClassIndexTo
public boolean learnCHBasedOnEN
public boolean learnENBasedOnCH
public java.lang.String loadWeightsFromEN
public java.lang.String loadWeightsFromCH
public java.lang.String serializeToEN
public java.lang.String serializeToCH
public java.lang.String testFileEN
public java.lang.String testFileCH
public java.lang.String unsupFileEN
public java.lang.String unsupFileCH
public java.lang.String unsupAlignFile
public java.lang.String supFileEN
public java.lang.String supFileCH
public transient java.lang.String serializeFeatureIndexTo
public transient java.lang.String serializeFeatureIndexToText
public java.lang.String loadFeatureIndexFromEN
public java.lang.String loadFeatureIndexFromCH
public double lambdaEN
public double lambdaCH
public boolean alternateTraining
public boolean weightByEntropy
public boolean useKL
public boolean useHardGE
public boolean useCRFforUnsup
public boolean useGEforSup
public boolean useKnownLCWords
public java.lang.String[] featureFactories
public java.util.List<java.lang.Object[]> featureFactoriesArgs
public boolean useNoisyLabel
public java.lang.String errorMatrix
public boolean printTrainLabels
public int labelDictionaryCutoff
public boolean useAdaDelta
public boolean useAdaDiff
public double adaGradEps
public double adaDeltaRho
public boolean useRandomSeed
public boolean terminateOnAvgImprovement
public boolean strictGoodCoNLL
public boolean removeStrictGoodCoNLLDuplicates
public java.lang.String priorModelFactory
public boolean useUndirectedDisjunctive
public boolean splitSlashHyphenWords
public int maxAdditionalKnownLCWords
public SeqClassifierFlags.SlashHyphenEnum slashHyphenTreatment
public boolean useTitle2
public boolean showNCCInfo
public boolean showCCInfo
public java.lang.String crfToExamine
public boolean useSUTime
public boolean applyNumericClassifiers
public java.lang.String combinationMode
public java.lang.String nerModel
public boolean useMoreNeighborNGrams
public java.lang.String dict2name
public transient java.util.List<java.lang.String> phraseGazettes
public transient java.util.Properties props
public SeqClassifierFlags()
public SeqClassifierFlags(java.util.Properties props)
props
- The properties object used for initializationpublic SeqClassifierFlags(java.util.Properties props, boolean printProps)
props
- The properties object used for initializationprintProps
- Whether to print the properties on constructionpublic final void setProperties(java.util.Properties props)
props
- The properties object used for initializationpublic void setProperties(java.util.Properties props, boolean printProps)
props
- The properties object used for initializationprintProps
- Whether to print the properties to stderr as it works.public static java.util.Map<java.lang.String,java.lang.Integer> flagsToNumArgs()
public java.lang.String toString()
toString
in class java.lang.Object
public java.lang.String getNotNullTrueStringRep()
java.lang.IllegalAccessException
java.lang.IllegalArgumentException