|
|||||||||
PREV CLASS NEXT CLASS | FRAMES NO FRAMES | ||||||||
SUMMARY: NESTED | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD |
java.lang.Objectedu.stanford.nlp.classify.GeneralDataset<L,F>
L
- The type of the labels in the DatasetF
- The type of the features in the Datasetpublic abstract class GeneralDataset<L,F>
The purpose of this interface is to unify Dataset
and RVFDataset
.
Field Summary | |
---|---|
protected int[][] |
data
|
Index<F> |
featureIndex
|
Index<L> |
labelIndex
|
protected int[] |
labels
|
protected int |
size
|
Constructor Summary | |
---|---|
GeneralDataset()
|
Method Summary | ||
---|---|---|
abstract void |
add(Datum<L,F> d)
|
|
void |
addAll(java.lang.Iterable<? extends Datum<L,F>> data)
Adds all Datums in the given collection of data to this dataset |
|
void |
applyFeatureCountThreshold(int k)
Applies a feature count threshold to the Dataset. |
|
void |
applyFeatureMaxCountThreshold(int k)
Applies a max feature count threshold to the Dataset. |
|
void |
clear()
Resets the Dataset so that it is empty and ready to collect data. |
|
void |
clear(int numDatums)
Resets the Dataset so that it is empty and ready to collect data. |
|
Index<F> |
featureIndex()
|
|
int[][] |
getDataArray()
|
|
abstract Datum<L,F> |
getDatum(int index)
|
|
float[] |
getFeatureCounts()
Get the total count (over all data instances) of each feature |
|
int[] |
getLabelsArray()
|
|
abstract RVFDatum<L,F> |
getRVFDatum(int index)
|
|
abstract double[][] |
getValuesArray()
|
|
protected abstract void |
initialize(int numDatums)
This method takes care of resetting values of the dataset such that it is empty with an initial capacity of numDatums. |
|
java.util.Iterator<RVFDatum<L,F>> |
iterator()
|
|
Index<L> |
labelIndex()
|
|
java.util.Iterator<L> |
labelIterator()
Returns an iterator over the class labels of the Dataset |
|
java.lang.String[] |
makeSvmLabelMap()
Maps our labels to labels that are compatible with svm_light |
|
GeneralDataset<L,F> |
mapDataset(GeneralDataset<L,F> dataset)
|
|
|
mapDataset(GeneralDataset<L,F> dataset,
Index<L2> newLabelIndex,
java.util.Map<L,L2> labelMapping,
L2 defaultLabel)
|
|
static
|
mapDatum(Datum<L,F> d,
java.util.Map<L,L2> labelMapping,
L2 defaultLabel)
|
|
int |
numClasses()
|
|
int |
numFeatures()
|
|
int |
numFeatureTokens()
returns the number of feature tokens in the Dataset. |
|
int |
numFeatureTypes()
returns the number of distinct feature types in the Dataset. |
|
void |
printSVMLightFormat()
Dumps the Dataset as a training/test file for SVMLight. |
|
void |
printSVMLightFormat(java.io.PrintWriter pw)
Print SVM Light Format file. |
|
void |
randomize(int randomSeed)
Randomizes the data array in place. |
|
GeneralDataset<L,F> |
sampleDataset(int randomSeed,
double sampleFrac,
boolean sampleWithReplacement)
|
|
int |
size()
Returns the number of examples ( Datum s) in the Dataset. |
|
abstract Pair<GeneralDataset<L,F>,GeneralDataset<L,F>> |
split(double p)
|
|
abstract Pair<GeneralDataset<L,F>,GeneralDataset<L,F>> |
split(int start,
int end)
|
|
abstract void |
summaryStatistics()
Print some statistics summarizing the dataset |
|
protected void |
trimData()
|
|
protected void |
trimLabels()
|
|
protected double[][] |
trimToSize(double[][] i)
|
|
protected int[] |
trimToSize(int[] i)
|
|
protected int[][] |
trimToSize(int[][] i)
|
Methods inherited from class java.lang.Object |
---|
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait |
Field Detail |
---|
public Index<L> labelIndex
public Index<F> featureIndex
protected int[] labels
protected int[][] data
protected int size
Constructor Detail |
---|
public GeneralDataset()
Method Detail |
---|
public Index<L> labelIndex()
public Index<F> featureIndex()
public int numFeatures()
public int numClasses()
public int[] getLabelsArray()
public int[][] getDataArray()
public abstract double[][] getValuesArray()
public void clear()
public void clear(int numDatums)
numDatums
- initial capacity of datasetprotected abstract void initialize(int numDatums)
numDatums
- initial capacity of datasetpublic abstract RVFDatum<L,F> getRVFDatum(int index)
public abstract Datum<L,F> getDatum(int index)
public abstract void add(Datum<L,F> d)
public float[] getFeatureCounts()
public void applyFeatureCountThreshold(int k)
public void applyFeatureMaxCountThreshold(int k)
public int numFeatureTokens()
public int numFeatureTypes()
public void addAll(java.lang.Iterable<? extends Datum<L,F>> data)
data
- collection of datums you would like to add to the datasetpublic abstract Pair<GeneralDataset<L,F>,GeneralDataset<L,F>> split(int start, int end)
public abstract Pair<GeneralDataset<L,F>,GeneralDataset<L,F>> split(double p)
public int size()
Datum
s) in the Dataset.
protected void trimData()
protected void trimLabels()
protected int[] trimToSize(int[] i)
protected int[][] trimToSize(int[][] i)
protected double[][] trimToSize(double[][] i)
public void randomize(int randomSeed)
randomSeed
- public GeneralDataset<L,F> sampleDataset(int randomSeed, double sampleFrac, boolean sampleWithReplacement)
public abstract void summaryStatistics()
public java.util.Iterator<L> labelIterator()
public GeneralDataset<L,F> mapDataset(GeneralDataset<L,F> dataset)
dataset
-
public static <L,L2,F> Datum<L2,F> mapDatum(Datum<L,F> d, java.util.Map<L,L2> labelMapping, L2 defaultLabel)
public <L2> GeneralDataset<L2,F> mapDataset(GeneralDataset<L,F> dataset, Index<L2> newLabelIndex, java.util.Map<L,L2> labelMapping, L2 defaultLabel)
dataset
-
public void printSVMLightFormat()
public java.lang.String[] makeSvmLabelMap()
public void printSVMLightFormat(java.io.PrintWriter pw)
public java.util.Iterator<RVFDatum<L,F>> iterator()
iterator
in interface java.lang.Iterable<RVFDatum<L,F>>
|
|||||||||
PREV CLASS NEXT CLASS | FRAMES NO FRAMES | ||||||||
SUMMARY: NESTED | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD |