package org.zalando.catwatch.backend.util; import com.fasterxml.jackson.annotation.JsonProperty; import com.fasterxml.jackson.databind.annotation.JsonSerialize; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.zalando.catwatch.backend.model.Project; import java.util.ArrayList; import java.util.Date; import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Set; import java.util.TreeSet; public class LanguageStats { private static final Logger logger = LoggerFactory.getLogger(LanguageStats.class); private String languageName; private List<Integer> projectCounts; private List<Date> snapshotDates; public static String UNKNOWN = "unknown"; public LanguageStats(String languageName, List<Integer> projectCounts, List<Date> snapshotDates) { if (languageName == null) { this.languageName = UNKNOWN; } else { this.languageName = languageName; } this.projectCounts = projectCounts; this.snapshotDates = snapshotDates; } /** * Go through all the projects and collect the counts per snapshot and * per language. * * @param projectList * @return */ public static List<LanguageStats> buildStats(List<Project> projectList) { List<Project> projects = filterUniqueSnapshots(projectList); // For each date, we have a map of all the counts. Later we piece the // results together from these pieces of information. Map<Date, Map<String,Integer>> counts = new HashMap<>(); TreeSet<Date> dates = new TreeSet<>(); Set<String> languages = new HashSet<>(); for (Project p: projects) { String language = p.getPrimaryLanguage(); Date date = p.getSnapshotDate(); if (language == null) language = "unknown"; dates.add(date); languages.add(language); Map<String,Integer> hist = counts.get(date); if (hist == null) { hist = new HashMap<>(); counts.put(date, hist); } if (hist.containsKey(language)) { hist.put(language, hist.get(language) + 1); } else { hist.put(language, 1); } } List<LanguageStats> result = new ArrayList<>(); for (String l: languages) { List<Integer> projectCounts = new ArrayList<>(); List<Date> snapshotDates = new ArrayList<>(dates); for(Date d: snapshotDates) { Integer i = counts.get(d).get(l); if (i == null) { projectCounts.add(0); } else { projectCounts.add(i); } } result.add(new LanguageStats(l, projectCounts, snapshotDates)); } return result; } /** * For some reason, there are duplicate snapshots sometimes. This method takes care of that * and removes the duplicates. * * @param projects * @return */ public static List<Project> filterUniqueSnapshots(List<Project> projects) { Set<String> nameAndDateSet = new HashSet<>(); int newCount = 0; int oldCount = 0; List<Project> result = new ArrayList<>(); for (Project p: projects) { String key = p.getPrimaryLanguage() + ":" + p.getName() + ":" + p.getOrganizationName() + ":" + p.getSnapshotDate().getTime(); if (!nameAndDateSet.contains(key)) { newCount++; result.add(p); nameAndDateSet.add(key); } else { oldCount++; } } return result; } @JsonProperty(value="name") public String getLanguageName() { return languageName; } @JsonProperty(value="project_counts") public List<Integer> getProjectCounts() { return projectCounts; } @JsonProperty(value="snapshot_dates") @JsonSerialize(using = JsonDateListSerializer.class) public List<Date> getSnapshotDates() { return snapshotDates; } }