package com.matrobot.gha.insights.app.repo;
import java.io.IOException;
import org.apache.commons.math3.stat.regression.SimpleRegression;
import com.matrobot.gha.Configuration;
import com.matrobot.gha.ICommand;
import com.matrobot.gha.archive.event.EventReader;
import com.matrobot.gha.archive.event.FilteredEventReader;
import com.matrobot.gha.archive.event.IEventReader;
import com.matrobot.gha.archive.repotimeline.ITimelineRepoReader;
import com.matrobot.gha.archive.repotimeline.RepoTimeline;
import com.matrobot.gha.archive.repotimeline.TimelineRepoReader;
import com.matrobot.gha.insights.ml.EvaluationMetrics;
/**
* Hypothesis:
* Linear model will predict if next month activity is higher or lower then current month
*
* Results (2012-5 - 2012-11):
* Accuracy: 0.9491334942467083
* Precision: 0.10961678684274488
* Recall: 0.5852076124567474
* F score: 0.18464687819856707
*
* @author Krzysztof Langner
*/
public class LinearModelApp implements ICommand{
private EvaluationMetrics metrics;
@Override
public void run(Configuration params) throws IOException {
metrics = new EvaluationMetrics();
IEventReader eventReader = createEventReader(params);
ITimelineRepoReader reader = createRepoReader(params, eventReader);
analize(reader);
}
/**
* Filter by repository if repository param provided
*/
private IEventReader createEventReader(Configuration params) {
IEventReader eventReader = new EventReader(params.getMonthFolders());
if(params.getRepositories().size() > 0){
FilteredEventReader filteredEventReader = new FilteredEventReader(eventReader);
for(String repo : params.getRepositories()){
filteredEventReader.addRepoFilter(repo);
}
eventReader = filteredEventReader;
}
return eventReader;
}
/**
* Create repository reader.
* Add:
* - min activity filter
* - ordered reader
*/
private ITimelineRepoReader createRepoReader(Configuration params, IEventReader eventReader) {
ITimelineRepoReader repoReader = new TimelineRepoReader(eventReader);
return repoReader;
}
private void analize(ITimelineRepoReader reader){
RepoTimeline record;
while((record = reader.next()) != null){
boolean isPositive = isPositiveSample(record);
boolean expected = getExpectedValue(record);
if(isPositive){
if(expected){
metrics.addTruePositive();
}
else{
metrics.addFalsePositive();
}
}
else{
if(expected){
metrics.addFalseNegative();
}
else{
metrics.addTrueNegative();
}
}
}
}
private boolean isPositiveSample(RepoTimeline record) {
SimpleRegression regression = new SimpleRegression();
int[] values = record.getDataPoints();
int count = values.length;
int prevValue = values[count-2];
for(int i = 0; i < count-1; i++){
regression.addData(i, values[i]);
}
int lastValue = (int) regression.predict(count-1);
return (lastValue - prevValue > 0);
}
private boolean getExpectedValue(RepoTimeline record) {
int count = record.getDataPoints().length;
int lastValue = record.getDataPoints()[count-1];
int prevValue = record.getDataPoints()[count-2];
return (lastValue - prevValue > 0);
}
/**
* for local testing
* @param args
* @throws IOException
*/
public static void main(String[] args) throws IOException {
Configuration params = new Configuration("configs/lm.yaml");
LinearModelApp app = new LinearModelApp();
app.run(params);
app.metrics.print();
}
}