public class IBMMTArabicDataset extends java.lang.Object implements Dataset
NOTE: This class expects UTF-8 input (not Buckwalter)
Dataset.Encoding
Modifier and Type | Field and Description |
---|---|
protected java.util.Set<java.lang.String> |
configuredOptions |
protected IBMArabicEscaper |
escaper |
protected java.util.regex.Pattern |
fileNameNormalizer |
protected Mapper |
lexMapper |
protected java.lang.String |
outFileName |
protected java.util.List<java.io.File> |
pathsToData |
protected java.util.Set<java.lang.String> |
requiredOptions |
protected java.lang.StringBuilder |
toStringBuilder |
Constructor and Description |
---|
IBMMTArabicDataset() |
Modifier and Type | Method and Description |
---|---|
void |
build()
Generic method for loading, processing, and writing a dataset.
|
java.util.List<java.lang.String> |
getFilenames()
Returns the filenames written by
Dataset.build() . |
boolean |
setOptions(java.util.Properties opts)
Sets options for a dataset.
|
java.lang.String |
toString() |
protected Mapper lexMapper
protected final java.util.List<java.io.File> pathsToData
protected java.lang.String outFileName
protected final java.util.regex.Pattern fileNameNormalizer
protected final IBMArabicEscaper escaper
protected final java.util.Set<java.lang.String> configuredOptions
protected final java.util.Set<java.lang.String> requiredOptions
protected final java.lang.StringBuilder toStringBuilder
public void build()
Dataset
public java.util.List<java.lang.String> getFilenames()
Dataset
Dataset.build()
.getFilenames
in interface Dataset
public java.lang.String toString()
toString
in class java.lang.Object
public boolean setOptions(java.util.Properties opts)
Dataset
setOptions
in interface Dataset
opts
- A map from parameter types defined in ConfigParser
to
values