package com.matrobot.gha.insights.app.repo;
import java.io.IOException;
import com.matrobot.gha.insights.classifier.BayesClassifier;
import com.matrobot.gha.insights.classifier.Binary1RClassifier;
import com.matrobot.gha.insights.classifier.IBinaryClassifier;
import com.matrobot.gha.insights.classifier.LogisticRegressionClassifier;
import com.matrobot.gha.insights.filter.ClassifyRepositoryFilter;
import com.matrobot.gha.insights.ml.Dataset;
import com.matrobot.gha.insights.ml.EvaluationMetrics;
import com.matrobot.gha.insights.ml.Sample;
public class ClassifierEvaluatorApp {
private Dataset dataset;
private int counter;
private EvaluationMetrics metrics;
protected ClassifierEvaluatorApp(String firstPath, String secondPath, String thirdPath) throws IOException{
String dataPath = "/home/klangner/datasets/github/";
ClassifyRepositoryFilter filter = new ClassifyRepositoryFilter(
dataPath + firstPath,
dataPath + secondPath,
dataPath + thirdPath);
dataset = filter.getDataset();
dataset.normalize();
}
private double evaluate(IBinaryClassifier classifier, Dataset dataset) {
metrics = new EvaluationMetrics();
counter = 0;
double sum = 0;
for(Sample sample : dataset.getData()){
double confidence = classifier.classify(dataset.normalize(sample.features));
double error = Math.pow(sample.output-confidence, 2);
sum += error;
if(error > 0.25){
if(sample.output == 1){
metrics.addFalseNegative();
}
else{
metrics.addFalsePositive();
}
}
else{
if(sample.output == 1){
metrics.addTruePositive();
}
else{
metrics.addTrueNegative();
}
}
counter += 1;
}
return Math.sqrt(sum/counter);
}
/**
* Feature vector:
* feature[0] = currentActivity in log10 scale
* feature[1] = current activity rating (from previous month)
*/
public static void main(String[] args) throws IOException {
ClassifierEvaluatorApp app = new ClassifierEvaluatorApp("2012-1/", "2012-10/", "2012-11/");
Dataset dataset = app.dataset;
// 1R classifier
System.out.println("1R: ");
app.evaluate(new Binary1RClassifier(), dataset);
app.metrics.print();
System.out.println();
// Bayes classifier
System.out.println("Bayes: ");
BayesClassifier bayes = new BayesClassifier(2);
System.out.println("Train");
bayes.train(dataset);
app.evaluate(bayes, dataset);
app.metrics.print();
bayes.printModel();
System.out.println();
// Logistic regression
LogisticRegressionClassifier classifier = new LogisticRegressionClassifier();
System.out.println("Train");
classifier.train(dataset);
System.out.println("Evaluate");
app.evaluate(classifier, dataset);
System.out.println("Logistic regression: ");
app.metrics.print();
System.out.println();
}
}