package edu.harvard.seas.iis.abilities.classify;

import edu.harvard.seas.iis.util.io.FileManipulation;
import java.io.File;
import weka.classifiers.Classifier;
import weka.core.Instances;

/* loaded from: input_file:edu/harvard/seas/iis/abilities/classify/BuildClassifier.class */
public class BuildClassifier {
    public static int PARSE = 1;
    public static int CLEAN1 = 2;
    public static int COMPUTE_FEATURES = 3;
    public static int CLEAN2 = 4;
    public static int NORMALIZE = 5;
    public static int TRAIN = 6;
    protected File explicitDataDir;
    protected File naturalDataDir;
    protected File otherStudiesReliableDataDir;
    protected File otherStudiesUnreliableDataDir;
    protected File parsedDataDir;
    protected File transformedDataDir;
    protected File cleanDataDir;
    protected File normalizedDataDir;

    public BuildClassifier() {
        prepareFileHandles();
    }

    protected void prepareFileHandles() {
        if (!new File(Settings.DATA_DIRECTORY).exists()) {
            System.out.println("Select the data directory");
            Settings.DATA_DIRECTORY = FileManipulation.getUserSpecifiedDirForReading().getAbsolutePath();
        }
        this.explicitDataDir = new File(Settings.EXPLICIT_DATA_DIRECTORY);
        this.naturalDataDir = new File(Settings.NATURAL_DATA_DIRECTORY);
        this.otherStudiesReliableDataDir = new File(Settings.OTHER_STUDIES_RELIABLE_DATA_DIRECTORY);
        this.otherStudiesUnreliableDataDir = new File(Settings.OTHER_STUDIES_UNRELIABLE_DATA_DIRECTORY);
        this.parsedDataDir = new File(Settings.PARSED_DATA_DIRECTORY);
        this.transformedDataDir = new File(Settings.TRANSFORMED_DATA_DIRECTORY);
        this.cleanDataDir = new File(Settings.CLEAN_DATA_DIRECTORY);
        this.normalizedDataDir = new File(Settings.NORMALIZED_DATA_DIRECTORY);
    }

    public PositiveAndUnlabeledClassifier[] buildClassifier(int i, boolean z) throws Exception {
        if (i <= PARSE) {
            System.out.println("** Parsing raw data **");
            UserDataSet.parseRawData(this.explicitDataDir, this.naturalDataDir, this.parsedDataDir, Settings.ALL_USER_NAMES);
        }
        if (i <= CLEAN1) {
            System.out.println("** Cleaning the data - removing useless data points **");
            Clean.clean(this.parsedDataDir, this.parsedDataDir, 1);
        }
        if (i <= COMPUTE_FEATURES) {
            System.out.println("** Computing additional features **");
            Transform.computeAdditonalFeatures(this.parsedDataDir, this.transformedDataDir);
            Transform.combineDataSets(this.transformedDataDir, Settings.USERS_WITH_SUFFICIENT_IMPLICIT_AND_EXPLICIT_DATA, new File(Settings.COMBINED_TRANSFORMED_DATA_FILE));
        }
        if (i <= CLEAN2) {
            System.out.println("** Cleaning the data - removing outliers **");
            Clean.clean(this.transformedDataDir, this.cleanDataDir, 2);
            Transform.combineDataSets(this.cleanDataDir, Settings.USERS_WITH_SUFFICIENT_IMPLICIT_AND_EXPLICIT_DATA, new File(Settings.COMBINED_CLEAN_DATA_FILE));
        }
        if (i <= NORMALIZE) {
            System.out.println("** Normalizing data **");
            Transform.normalize(this.cleanDataDir, this.normalizedDataDir, Settings.FEATURES_TO_NORMALIZE, Settings.USERS_WITH_SUFFICIENT_IMPLICIT_DATA);
            Transform.combineDataSets(this.normalizedDataDir, Settings.USERS_WITH_SUFFICIENT_IMPLICIT_DATA, new File(Settings.INDIVIDUALLY_NORMALIZED_COMBINED_DATA_FILE));
            NormalizationConstants createGloballyNormalizedFile = Transform.createGloballyNormalizedFile(this.cleanDataDir, this.normalizedDataDir, Settings.USERS_WITH_SOME_USEFUL_DATA, Settings.USERS_WITH_SUFFICIENT_IMPLICIT_DATA, Settings.GLOBALLY_NORMALIZED_COMBINED_DATA_FILE, Settings.FEATURES_TO_NORMALIZE);
            Transform.combineGloballyAndIndividuallyNormalizedData(new File(String.valueOf(Settings.GLOBALLY_NORMALIZED_COMBINED_DATA_FILE) + Instances.FILE_EXTENSION), new File(String.valueOf(Settings.INDIVIDUALLY_NORMALIZED_COMBINED_DATA_FILE) + Instances.FILE_EXTENSION), Settings.ALLOWED_FEATURES, Settings.USERS_WITH_SUFFICIENT_IMPLICIT_DATA, Settings.MIXED_NORMALIZED_COMBINED_DATA_FILE);
            System.out.println("Saving the global normalization constants to " + Settings.TRAINED_CLASSIFIER_DIRECTORY);
            FileManipulation.saveObjectToFile(createGloballyNormalizedFile, new File(String.valueOf(Settings.TRAINED_CLASSIFIER_DIRECTORY) + File.separator + "normalizationConstants"));
        }
        System.out.println("** Building classifiers **");
        DataSet fromArffFile = UserDataSet.fromArffFile(new File(String.valueOf(Settings.GLOBALLY_NORMALIZED_COMBINED_DATA_FILE) + Instances.FILE_EXTENSION));
        DataSet fromArffFile2 = UserDataSet.fromArffFile(new File(String.valueOf(Settings.MIXED_NORMALIZED_COMBINED_DATA_FILE) + Instances.FILE_EXTENSION));
        int numInstances = fromArffFile.getExplicitInstances().numInstances();
        int numInstances2 = fromArffFile.getImplicitInstances().numInstances();
        PositiveAndUnlabeledClassifier positiveAndUnlabeledClassifier = new PositiveAndUnlabeledClassifier(getBaseClassifier(), Settings.BEST_FEATURES_LOGISTIC_GLOBAL);
        PositiveAndUnlabeledClassifier positiveAndUnlabeledClassifier2 = new PositiveAndUnlabeledClassifier(getBaseClassifier(), Settings.BEST_FEATURES_LOGISTIC_MIXED);
        PositiveAndUnlabeledClassifier positiveAndUnlabeledClassifier3 = new PositiveAndUnlabeledClassifier(getBaseClassifier(), Settings.BEST_FEATURES_LOGISTIC_GLOBAL_TARGET_AGNOSTIC);
        PositiveAndUnlabeledClassifier positiveAndUnlabeledClassifier4 = new PositiveAndUnlabeledClassifier(getBaseClassifier(), Settings.BEST_FEATURES_LOGISTIC_MIXED_TARGET_AGNOSTIC);
        positiveAndUnlabeledClassifier.buildClassifier(fromArffFile);
        positiveAndUnlabeledClassifier2.buildClassifier(fromArffFile2);
        positiveAndUnlabeledClassifier3.buildClassifier(fromArffFile);
        positiveAndUnlabeledClassifier4.buildClassifier(fromArffFile2);
        System.out.println("Saving the classifiers to " + Settings.TRAINED_CLASSIFIER_DIRECTORY);
        FileManipulation.saveObjectToFile(positiveAndUnlabeledClassifier, new File(String.valueOf(Settings.TRAINED_CLASSIFIER_DIRECTORY) + File.separator + "c1.classifier"));
        FileManipulation.saveObjectToFile(positiveAndUnlabeledClassifier2, new File(String.valueOf(Settings.TRAINED_CLASSIFIER_DIRECTORY) + File.separator + "c2.classifier"));
        FileManipulation.saveObjectToFile(positiveAndUnlabeledClassifier3, new File(String.valueOf(Settings.TRAINED_CLASSIFIER_DIRECTORY) + File.separator + "c3.classifier"));
        FileManipulation.saveObjectToFile(positiveAndUnlabeledClassifier4, new File(String.valueOf(Settings.TRAINED_CLASSIFIER_DIRECTORY) + File.separator + "c4.classifier"));
        ClassifierEvalStats classifierEvalStats = null;
        if (z) {
            System.out.println("** Evaluating classifiers **");
            PositiveAndUnlabeledClassifier positiveAndUnlabeledClassifier5 = new PositiveAndUnlabeledClassifier(getBaseClassifier());
            FeatureSelection featureSelection = new FeatureSelection();
            DataSet fromArffFile3 = DataSet.fromArffFile(new File(String.valueOf(Settings.GLOBALLY_NORMALIZED_COMBINED_DATA_FILE) + Instances.FILE_EXTENSION));
            classifierEvalStats = featureSelection.evaluateFeatureSet(Settings.BEST_FEATURES_LOGISTIC_GLOBAL, positiveAndUnlabeledClassifier5, fromArffFile3, true);
            fromArffFile3.saveAsBothARFFandCSV(String.valueOf(Settings.CLASSIFIED_DATA_DIRECTORY) + File.separator + "allUsersWithGoodImplicitAndExplicitData-globallyNormalized-classified with C1");
            DataSet fromArffFile4 = DataSet.fromArffFile(new File(String.valueOf(Settings.MIXED_NORMALIZED_COMBINED_DATA_FILE) + Instances.FILE_EXTENSION));
            ClassifierEvalStats evaluateFeatureSet = featureSelection.evaluateFeatureSet(Settings.BEST_FEATURES_LOGISTIC_MIXED, positiveAndUnlabeledClassifier5, fromArffFile4, true);
            fromArffFile4.saveAsBothARFFandCSV(String.valueOf(Settings.CLASSIFIED_DATA_DIRECTORY) + File.separator + "allUsersWithGoodImplicitAndExplicitData-mixed-classified with C2");
            DataSet fromArffFile5 = DataSet.fromArffFile(new File(String.valueOf(Settings.GLOBALLY_NORMALIZED_COMBINED_DATA_FILE) + Instances.FILE_EXTENSION));
            ClassifierEvalStats evaluateFeatureSet2 = featureSelection.evaluateFeatureSet(Settings.BEST_FEATURES_LOGISTIC_GLOBAL_TARGET_AGNOSTIC, positiveAndUnlabeledClassifier5, fromArffFile5, true);
            fromArffFile5.saveAsBothARFFandCSV(String.valueOf(Settings.CLASSIFIED_DATA_DIRECTORY) + File.separator + "allUsersWithGoodImplicitAndExplicitData-globallyNormalized-classified with C3");
            DataSet fromArffFile6 = DataSet.fromArffFile(new File(String.valueOf(Settings.MIXED_NORMALIZED_COMBINED_DATA_FILE) + Instances.FILE_EXTENSION));
            ClassifierEvalStats evaluateFeatureSet3 = featureSelection.evaluateFeatureSet(Settings.BEST_FEATURES_LOGISTIC_MIXED_TARGET_AGNOSTIC, positiveAndUnlabeledClassifier5, fromArffFile6, true);
            fromArffFile6.saveAsBothARFFandCSV(String.valueOf(Settings.CLASSIFIED_DATA_DIRECTORY) + File.separator + "allUsersWithGoodImplicitAndExplicitData-mixed-classified with C4");
            System.out.println("\n\nclassifier\t" + classifierEvalStats.getHeader1());
            System.out.println("Natural data (unfiltered)\t" + classifierEvalStats.getReport1ForNaturalData());
            System.out.println("Logistic, globally normalized\t" + classifierEvalStats.getReport1());
            System.out.println("Logistic, globally and individually normalized\t" + evaluateFeatureSet.getReport1());
            System.out.println("Logistic, globally normalized, target agnostic\t" + evaluateFeatureSet2.getReport1());
            System.out.println("Logistic, globally and individually normalized, target agnostic\t" + evaluateFeatureSet3.getReport1());
        }
        if (classifierEvalStats != null) {
            System.out.println("Mean per user Stdev of MT/ID in data from a formal experiment: " + classifierEvalStats.explicitMTbyIDstdev);
        }
        System.out.println("\nThere were a total of " + (numInstances + numInstances2) + " movements (" + numInstances + " from a formal experiment and " + numInstances2 + " from natural observations)");
        return new PositiveAndUnlabeledClassifier[]{positiveAndUnlabeledClassifier, positiveAndUnlabeledClassifier2, positiveAndUnlabeledClassifier3, positiveAndUnlabeledClassifier4};
    }

    protected Classifier getBaseClassifier() {
        Classifier classifier = null;
        try {
            classifier = Classifier.forName("weka.classifiers.functions.Logistic", new String[]{"-R", "1.0E-8", "-M", "-1"});
        } catch (Exception e) {
            e.printStackTrace();
        }
        return classifier;
    }

    public static void main(String[] strArr) throws Exception {
        new BuildClassifier().buildClassifier(PARSE, true);
    }
}
