package com.matrobot.gha.archive.cmd;
import java.io.IOException;
import java.io.PrintStream;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.List;
import com.matrobot.gha.Configuration;
import com.matrobot.gha.ICommand;
import com.matrobot.gha.archive.event.EventReader;
import com.matrobot.gha.archive.event.EventRecord;
import com.matrobot.gha.archive.event.IEventReader;
import com.matrobot.gha.archive.repo.RepositoryRecord;
/**
* Find repository state by going back in history events.
* E.g.
* Find number of forks in 2011-2 by counting back from
*
* @author Krzysztof Langner
*/
public class RepoHistoryCmd implements ICommand{
private HashMap<String, RepositoryRecord> repos = new HashMap<String, RepositoryRecord>();
private List<RepositoryRecord> estimatedRepos = new ArrayList<RepositoryRecord>();
@Override
public void run(Configuration params) throws IOException {
IEventReader reader;
System.out.println("Looking for active repositories in the first month");
List<String> months = params.getMonthFolders();
reader = new EventReader(months.get(0));
months.remove(0);
prepareRepos(reader);
System.out.println("Found " + repos.size() + " repositories");
reader = new EventReader(months);
System.out.println("Estimating number of forks");
estimateForks(reader);
sortByForks();
System.out.println("Saving data");
saveAsCSV(params.getOutputStream());
System.out.println("Done");
}
/**
* Find all active repos.
*/
private void prepareRepos(IEventReader reader) {
EventRecord event;
while((event = reader.next()) != null){
String repoName = event.getRepositoryId();
if(repoName != null){
RepositoryRecord repoRecord = repos.get(repoName);
if(repoRecord == null){
repoRecord = new RepositoryRecord(repoName);
repos.put(repoName, repoRecord);
}
if(event.type.equals("ForkEvent")){
repoRecord.forkEventCount ++;
}
}
}
}
private void estimateForks(IEventReader reader) {
EventRecord event;
while((event = reader.next()) != null){
String repoName = event.getRepositoryId();
RepositoryRecord repoRecord = repos.get(repoName);
if(repoRecord != null){
if(event.repository != null){
estimateRepoFork(event, repoRecord);
}
else if(event.type.equals("ForkEvent")){
repoRecord.forkEventCount ++;
}
}
}
}
private void estimateRepoFork(EventRecord event, RepositoryRecord repoRecord) {
repoRecord.forkEventCount = event.repository.forks - repoRecord.forkEventCount;
repos.remove(repoRecord.repoName);
estimatedRepos.add(repoRecord);
}
private void sortByForks() {
Comparator<RepositoryRecord> cmp = new Comparator<RepositoryRecord>() {
public int compare(RepositoryRecord o1, RepositoryRecord o2) {
return o2.forkEventCount-o1.forkEventCount;
}
};
Collections.sort(estimatedRepos, cmp);
}
private void saveAsCSV(PrintStream printStream) throws IOException{
printStream.println("repository,forks");
for(RepositoryRecord record : estimatedRepos){
printStream.println(record.repoName + "," + record.forkEventCount);
}
}
/**
* for local testing
* @param args
* @throws IOException
*/
public static void main(String[] args) throws IOException {
Configuration params = new Configuration("configs/forks.yaml");
RepoHistoryCmd app = new RepoHistoryCmd();
app.run(params);
}
}