|
|||||||||
PREV CLASS NEXT CLASS | FRAMES NO FRAMES | ||||||||
SUMMARY: NESTED | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD |
java.lang.Objectedu.stanford.nlp.classify.GeneralDataset<L,F>
L
- The type of the labels in the DatasetF
- The type of the features in the Datasetpublic abstract class GeneralDataset<L,F>
The purpose of this interface is to unify Dataset
and RVFDataset
.
Field Summary | |
---|---|
protected int[][] |
data
|
Index<F> |
featureIndex
|
Index<L> |
labelIndex
|
protected int[] |
labels
|
protected int |
size
|
Constructor Summary | |
---|---|
GeneralDataset()
|
Method Summary | ||
---|---|---|
abstract void |
add(Datum<L,F> d)
|
|
void |
addAll(Iterable<? extends Datum<L,F>> data)
Adds all Datums in the given collection of data to this dataset |
|
void |
applyFeatureCountThreshold(int k)
Applies a feature count threshold to the Dataset. |
|
void |
applyFeatureMaxCountThreshold(int k)
Applies a max feature count threshold to the Dataset. |
|
void |
clear()
Resets the Dataset so that it is empty and ready to collect data. |
|
void |
clear(int numDatums)
Resets the Dataset so that it is empty and ready to collect data. |
|
Index<F> |
featureIndex()
|
|
int[][] |
getDataArray()
|
|
abstract Datum<L,F> |
getDatum(int index)
|
|
float[] |
getFeatureCounts()
Get the total count (over all data instances) of each feature |
|
int[] |
getLabelsArray()
|
|
abstract RVFDatum<L,F> |
getRVFDatum(int index)
|
|
abstract double[][] |
getValuesArray()
|
|
protected abstract void |
initialize(int numDatums)
This method takes care of resetting values of the dataset such that it is empty with an initial capacity of numDatums Should be accessed only by appropriate methods within the class, such as clear(), which take care of other parts of the emptying of data |
|
Iterator<RVFDatum<L,F>> |
iterator()
|
|
Index<L> |
labelIndex()
|
|
Iterator<L> |
labelIterator()
Returns an iterator over the class labels of the Dataset |
|
String[] |
makeSvmLabelMap()
Maps our labels to labels that are compatible with svm_light |
|
GeneralDataset<L,F> |
mapDataset(GeneralDataset<L,F> dataset)
|
|
|
mapDataset(GeneralDataset<L,F> dataset,
Index<L2> newLabelIndex,
Map<L,L2> labelMapping,
L2 defaultLabel)
|
|
static
|
mapDatum(Datum<L,F> d,
Map<L,L2> labelMapping,
L2 defaultLabel)
|
|
int |
numClasses()
|
|
int |
numFeatures()
|
|
int |
numFeatureTokens()
returns the number of feature tokens in the Dataset. |
|
int |
numFeatureTypes()
returns the number of distinct feature types in the Dataset. |
|
void |
printSVMLightFormat()
Dumps the Dataset as a training/test file for SVMLight. |
|
void |
printSVMLightFormat(PrintWriter pw)
Print SVM Light Format file. |
|
void |
randomize(int randomSeed)
Randomizes the data array in place Note: this cannot change the values array or the datum weights, so redefine this for RVFDataset and WeightedDataset! |
|
GeneralDataset<L,F> |
sampleDataset(int randomSeed,
double sampleFrac,
boolean sampleWithReplacement)
|
|
int |
size()
Returns the number of examples ( Datum s) in the Dataset. |
|
abstract Pair<GeneralDataset<L,F>,GeneralDataset<L,F>> |
split(double p)
|
|
abstract Pair<GeneralDataset<L,F>,GeneralDataset<L,F>> |
split(int start,
int end)
|
|
abstract void |
summaryStatistics()
Print some statistics summarizing the dataset |
|
protected void |
trimData()
|
|
protected void |
trimLabels()
|
|
protected double[][] |
trimToSize(double[][] i)
|
|
protected int[] |
trimToSize(int[] i)
|
|
protected int[][] |
trimToSize(int[][] i)
|
Methods inherited from class java.lang.Object |
---|
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait |
Field Detail |
---|
public Index<L> labelIndex
public Index<F> featureIndex
protected int[] labels
protected int[][] data
protected int size
Constructor Detail |
---|
public GeneralDataset()
Method Detail |
---|
public Index<L> labelIndex()
public Index<F> featureIndex()
public int numFeatures()
public int numClasses()
public int[] getLabelsArray()
public int[][] getDataArray()
public abstract double[][] getValuesArray()
public void clear()
public void clear(int numDatums)
numDatums
- initial capacity of datasetprotected abstract void initialize(int numDatums)
numDatums
- initial capacity of datasetpublic abstract RVFDatum<L,F> getRVFDatum(int index)
public abstract Datum<L,F> getDatum(int index)
public abstract void add(Datum<L,F> d)
public float[] getFeatureCounts()
public void applyFeatureCountThreshold(int k)
public void applyFeatureMaxCountThreshold(int k)
public int numFeatureTokens()
public int numFeatureTypes()
public void addAll(Iterable<? extends Datum<L,F>> data)
data
- collection of datums you would like to add to the datasetpublic abstract Pair<GeneralDataset<L,F>,GeneralDataset<L,F>> split(int start, int end)
public abstract Pair<GeneralDataset<L,F>,GeneralDataset<L,F>> split(double p)
public int size()
Datum
s) in the Dataset.
protected void trimData()
protected void trimLabels()
protected int[] trimToSize(int[] i)
protected int[][] trimToSize(int[][] i)
protected double[][] trimToSize(double[][] i)
public void randomize(int randomSeed)
randomSeed
- public GeneralDataset<L,F> sampleDataset(int randomSeed, double sampleFrac, boolean sampleWithReplacement)
public abstract void summaryStatistics()
public Iterator<L> labelIterator()
public GeneralDataset<L,F> mapDataset(GeneralDataset<L,F> dataset)
dataset
-
public static <L,L2,F> Datum<L2,F> mapDatum(Datum<L,F> d, Map<L,L2> labelMapping, L2 defaultLabel)
public <L2> GeneralDataset<L2,F> mapDataset(GeneralDataset<L,F> dataset, Index<L2> newLabelIndex, Map<L,L2> labelMapping, L2 defaultLabel)
dataset
-
public void printSVMLightFormat()
public String[] makeSvmLabelMap()
public void printSVMLightFormat(PrintWriter pw)
public Iterator<RVFDatum<L,F>> iterator()
iterator
in interface Iterable<RVFDatum<L,F>>
|
|||||||||
PREV CLASS NEXT CLASS | FRAMES NO FRAMES | ||||||||
SUMMARY: NESTED | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD |