edu.stanford.nlp.international.process
Class AbstractDataset

java.lang.Object
  extended by edu.stanford.nlp.international.process.AbstractDataset
All Implemented Interfaces:
Dataset
Direct Known Subclasses:
ATBArabicDataset, FTBDataset

public abstract class AbstractDataset
extends Object
implements Dataset

Author:
Spence Green

Nested Class Summary
protected  class AbstractDataset.SplitFilter
           
 
Nested classes/interfaces inherited from interface edu.stanford.nlp.international.process.Dataset
Dataset.Encoding
 
Field Summary
protected  boolean addDeterminer
           
protected  boolean addRoot
           
protected  Set<String> configuredOptions
           
protected  TreeVisitor customTreeVisitor
           
protected  Dataset.Encoding encoding
           
protected  Pattern fileNameNormalizer
           
protected  String flatFileName
           
protected  String lexMapOptions
           
protected  Mapper lexMapper
           
protected  boolean makeFlatFile
           
protected  int maxLen
           
protected  String morphDelim
           
protected  StringMap options
          Provides access for sub-classes to the data set parameters
protected  String outFileName
           
protected  List<String> outputFileList
           
protected  List<File> pathsToData
           
protected  List<File> pathsToMappings
           
protected  Mapper posMapper
           
protected  boolean removeDashTags
           
protected  boolean removeEscapeTokens
           
protected  Set<String> requiredOptions
           
protected  FileFilter splitFilter
           
protected  StringBuilder toStringBuffer
           
protected  Treebank treebank
           
protected  String treeFileExtension
           
 
Constructor Summary
AbstractDataset()
           
 
Method Summary
abstract  void build()
          Generic method for loading, processing, and writing a dataset.
protected  StringMap buildSplitMap(File path)
           
 List<String> getFilenames()
          Returns the filenames written by Dataset.build().
 boolean setOptions(StringMap opts)
          Sets options for a dataset.
 String toString()
           
 
Methods inherited from class java.lang.Object
clone, equals, finalize, getClass, hashCode, notify, notifyAll, wait, wait, wait
 

Field Detail

outputFileList

protected final List<String> outputFileList

posMapper

protected Mapper posMapper

lexMapper

protected Mapper lexMapper

encoding

protected Dataset.Encoding encoding

pathsToData

protected final List<File> pathsToData

pathsToMappings

protected final List<File> pathsToMappings

splitFilter

protected FileFilter splitFilter

addDeterminer

protected boolean addDeterminer

removeDashTags

protected boolean removeDashTags

addRoot

protected boolean addRoot

removeEscapeTokens

protected boolean removeEscapeTokens

lexMapOptions

protected String lexMapOptions

maxLen

protected int maxLen

morphDelim

protected String morphDelim

customTreeVisitor

protected TreeVisitor customTreeVisitor

outFileName

protected String outFileName

flatFileName

protected String flatFileName

makeFlatFile

protected boolean makeFlatFile

fileNameNormalizer

protected final Pattern fileNameNormalizer

treebank

protected Treebank treebank

configuredOptions

protected final Set<String> configuredOptions

requiredOptions

protected final Set<String> requiredOptions

toStringBuffer

protected final StringBuilder toStringBuffer

treeFileExtension

protected String treeFileExtension

options

protected StringMap options
Provides access for sub-classes to the data set parameters

Constructor Detail

AbstractDataset

public AbstractDataset()
Method Detail

build

public abstract void build()
Description copied from interface: Dataset
Generic method for loading, processing, and writing a dataset.

Specified by:
build in interface Dataset

setOptions

public boolean setOptions(StringMap opts)
Description copied from interface: Dataset
Sets options for a dataset.

Specified by:
setOptions in interface Dataset
Parameters:
opts - A map from parameter types defined in ConfigParser to values
Returns:
true if opts contains all required options. false, otherwise.

buildSplitMap

protected StringMap buildSplitMap(File path)

getFilenames

public List<String> getFilenames()
Description copied from interface: Dataset
Returns the filenames written by Dataset.build().

Specified by:
getFilenames in interface Dataset
Returns:
A collection of filenames

toString

public String toString()
Overrides:
toString in class Object


Stanford NLP Group