edu.harvard.seas.iis.abilities.classify
Class DataSet

java.lang.Object
  extended by weka.core.Instances
      extended by edu.harvard.seas.iis.abilities.classify.DataSet
All Implemented Interfaces:
java.io.Serializable, weka.core.RevisionHandler
Direct Known Subclasses:
UserDataSet

public class DataSet
extends weka.core.Instances

See Also:
Serialized Form

Field Summary
 
Fields inherited from class weka.core.Instances
ARFF_DATA, ARFF_RELATION, FILE_EXTENSION, SERIALIZED_OBJ_FILE_EXTENSION
 
Constructor Summary
DataSet(weka.core.Instances insts)
           
DataSet(java.lang.String name, weka.core.FastVector baseAttrNames, int suspectedCapacity)
           
 
Method Summary
 void addInstances(weka.core.Instances insts)
           
 double[] attributeToDoubleArray(java.lang.String name)
           
static DataSet fromArffFile(java.io.File f)
          A convenience method that creates an instance of a UserDataSet from an ARFF file
static DataSet fromArffFiles(java.io.File[] files)
          Creates a single UserDataSet object from multiple ARFF files
 DataSet getExplicitInstances()
          Convenience method which returns a subset of the data containing only explicit examples
 DataSet getImplicitInstances()
          Convenience method which returns a subset of the data containing only implicit examples
 DataSet getInstancesForUser(java.lang.String user)
           
 DataSet getInstancesWithAttributeValueEqual(weka.core.Attribute att, java.lang.String attVal)
           
 DataSet getInstancesWithAttributeValueGreaterThan(weka.core.Attribute att, double attVal)
           
 DataSet getInstancesWithAttributeValueNotEqual(weka.core.Attribute att, java.lang.String attVal)
           
 DataSet getInstancesWithAttributeValues(weka.core.Attribute att, java.util.Collection<java.lang.String> values)
           
 int getNumExplicitInstances()
           
 int getNumImplicitInstances()
           
 java.util.Vector<java.lang.String> getValuesOfStringOrNominalAttribute(weka.core.Attribute attr)
           
 void saveAsARFF(java.lang.String outfile)
           
 void saveAsBothARFFandCSV(java.lang.String outFile)
           
 void saveAsCSV(java.lang.String outfile)
          Saves the data set in the CSV format
 void setValue(weka.core.Attribute attr, java.lang.String value, InstanceFilter condition)
          Sets the value of an attribute to a particular value for all instances that match the condition
 
Methods inherited from class weka.core.Instances
add, attribute, attribute, attributeStats, attributeToDoubleArray, checkForAttributeType, checkForStringAttributes, checkInstance, classAttribute, classIndex, compactify, delete, delete, deleteAttributeAt, deleteAttributeType, deleteStringAttributes, deleteWithMissing, deleteWithMissing, deleteWithMissingClass, enumerateAttributes, enumerateInstances, equalHeaders, firstInstance, getRandomNumberGenerator, getRevision, insertAttributeAt, instance, kthSmallestValue, kthSmallestValue, lastInstance, main, meanOrMode, meanOrMode, mergeInstances, numAttributes, numClasses, numDistinctValues, numDistinctValues, numInstances, randomize, readInstance, relationName, renameAttribute, renameAttribute, renameAttributeValue, renameAttributeValue, resample, resampleWithWeights, resampleWithWeights, setClass, setClassIndex, setRelationName, sort, sort, stratify, stringFreeStructure, sumOfWeights, swap, test, testCV, toString, toSummaryString, trainCV, trainCV, variance, variance
 
Methods inherited from class java.lang.Object
equals, getClass, hashCode, notify, notifyAll, wait, wait, wait
 

Constructor Detail

DataSet

public DataSet(java.lang.String name,
               weka.core.FastVector baseAttrNames,
               int suspectedCapacity)

DataSet

public DataSet(weka.core.Instances insts)
Method Detail

addInstances

public void addInstances(weka.core.Instances insts)

getInstancesWithAttributeValueEqual

public DataSet getInstancesWithAttributeValueEqual(weka.core.Attribute att,
                                                   java.lang.String attVal)

getInstancesWithAttributeValueNotEqual

public DataSet getInstancesWithAttributeValueNotEqual(weka.core.Attribute att,
                                                      java.lang.String attVal)

getInstancesWithAttributeValues

public DataSet getInstancesWithAttributeValues(weka.core.Attribute att,
                                               java.util.Collection<java.lang.String> values)

getInstancesWithAttributeValueGreaterThan

public DataSet getInstancesWithAttributeValueGreaterThan(weka.core.Attribute att,
                                                         double attVal)

getExplicitInstances

public DataSet getExplicitInstances()
Convenience method which returns a subset of the data containing only explicit examples

Returns:

getImplicitInstances

public DataSet getImplicitInstances()
Convenience method which returns a subset of the data containing only implicit examples

Returns:

getInstancesForUser

public DataSet getInstancesForUser(java.lang.String user)

getNumImplicitInstances

public int getNumImplicitInstances()

getNumExplicitInstances

public int getNumExplicitInstances()

getValuesOfStringOrNominalAttribute

public java.util.Vector<java.lang.String> getValuesOfStringOrNominalAttribute(weka.core.Attribute attr)

setValue

public void setValue(weka.core.Attribute attr,
                     java.lang.String value,
                     InstanceFilter condition)
Sets the value of an attribute to a particular value for all instances that match the condition

Parameters:
attr -
value -
condition - if null, then all instances are set to the given value

attributeToDoubleArray

public double[] attributeToDoubleArray(java.lang.String name)

saveAsBothARFFandCSV

public void saveAsBothARFFandCSV(java.lang.String outFile)
                          throws java.io.IOException
Throws:
java.io.IOException

saveAsARFF

public void saveAsARFF(java.lang.String outfile)
                throws java.io.IOException
Parameters:
outfile -
Throws:
java.io.IOException

saveAsCSV

public void saveAsCSV(java.lang.String outfile)
               throws java.io.IOException
Saves the data set in the CSV format

Parameters:
outfile -
Throws:
java.io.IOException

fromArffFile

public static DataSet fromArffFile(java.io.File f)
                            throws java.io.IOException
A convenience method that creates an instance of a UserDataSet from an ARFF file

Parameters:
f -
Returns:
Throws:
java.io.IOException

fromArffFiles

public static DataSet fromArffFiles(java.io.File[] files)
                             throws java.io.IOException
Creates a single UserDataSet object from multiple ARFF files

Parameters:
files -
Returns:
Throws:
java.io.IOException