public abstract class AbstractDataset extends java.lang.Object implements Dataset
Modifier and Type | Class and Description |
---|---|
protected static class |
AbstractDataset.SplitFilter |
Dataset.Encoding
Modifier and Type | Field and Description |
---|---|
protected boolean |
addDeterminer |
protected boolean |
addRoot |
protected java.util.Set<java.lang.String> |
configuredOptions |
protected TreeVisitor |
customTreeVisitor |
protected Dataset.Encoding |
encoding |
protected java.util.regex.Pattern |
fileNameNormalizer |
protected java.lang.String |
flatFileName |
protected java.lang.String |
lexMapOptions |
protected Mapper |
lexMapper |
protected boolean |
makeFlatFile |
protected int |
maxLen |
protected java.lang.String |
morphDelim |
protected java.util.Properties |
options
Provides access for sub-classes to the data set parameters
|
protected java.lang.String |
outFileName |
protected java.util.List<java.lang.String> |
outputFileList |
protected java.util.List<java.io.File> |
pathsToData |
protected java.util.List<java.io.File> |
pathsToMappings |
protected java.lang.String |
posMapOptions |
protected Mapper |
posMapper |
protected boolean |
removeDashTags |
protected boolean |
removeEscapeTokens |
protected java.util.Set<java.lang.String> |
requiredOptions |
protected java.io.FileFilter |
splitFilter |
protected java.lang.StringBuilder |
toStringBuilder |
protected Treebank |
treebank |
protected java.lang.String |
treeFileExtension |
Constructor and Description |
---|
AbstractDataset() |
Modifier and Type | Method and Description |
---|---|
abstract void |
build()
Generic method for loading, processing, and writing a dataset.
|
protected java.util.Set<java.lang.String> |
buildSplitMap(java.lang.String path) |
java.util.List<java.lang.String> |
getFilenames()
Returns the filenames written by
Dataset.build() . |
boolean |
setOptions(java.util.Properties opts)
Sets options for a dataset.
|
java.lang.String |
toString() |
protected final java.util.List<java.lang.String> outputFileList
protected Mapper posMapper
protected java.lang.String posMapOptions
protected Mapper lexMapper
protected java.lang.String lexMapOptions
protected Dataset.Encoding encoding
protected final java.util.List<java.io.File> pathsToData
protected final java.util.List<java.io.File> pathsToMappings
protected java.io.FileFilter splitFilter
protected boolean addDeterminer
protected boolean removeDashTags
protected boolean addRoot
protected boolean removeEscapeTokens
protected int maxLen
protected java.lang.String morphDelim
protected TreeVisitor customTreeVisitor
protected java.lang.String outFileName
protected java.lang.String flatFileName
protected boolean makeFlatFile
protected final java.util.regex.Pattern fileNameNormalizer
protected Treebank treebank
protected final java.util.Set<java.lang.String> configuredOptions
protected final java.util.Set<java.lang.String> requiredOptions
protected final java.lang.StringBuilder toStringBuilder
protected java.lang.String treeFileExtension
protected java.util.Properties options
public abstract void build()
Dataset
public boolean setOptions(java.util.Properties opts)
Dataset
setOptions
in interface Dataset
opts
- A map from parameter types defined in ConfigParser
to
valuesprotected java.util.Set<java.lang.String> buildSplitMap(java.lang.String path)
public java.util.List<java.lang.String> getFilenames()
Dataset
Dataset.build()
.getFilenames
in interface Dataset
public java.lang.String toString()
toString
in class java.lang.Object