package org.zalando.catwatch.backend.github;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.fasterxml.jackson.dataformat.yaml.YAMLFactory;
import com.google.common.collect.Lists;
import org.apache.tomcat.util.http.fileupload.util.Streams;
import org.kohsuke.github.GHObject;
import org.kohsuke.github.GHRepository;
import org.kohsuke.github.GitHub;
import org.kohsuke.github.RateLimitHandler;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.zalando.catwatch.backend.model.CatwatchYaml;
import org.zalando.catwatch.backend.model.Contributor;
import org.zalando.catwatch.backend.model.Language;
import org.zalando.catwatch.backend.model.Project;
import org.zalando.catwatch.backend.model.Statistics;
import org.zalando.catwatch.backend.model.util.Scorer;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.net.URISyntaxException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
import java.util.Date;
import java.util.IntSummaryStatistics;
import java.util.List;
import java.util.LongSummaryStatistics;
import java.util.Map;
import java.util.Set;
import java.util.TreeSet;
import java.util.concurrent.Callable;
import static java.util.stream.Collectors.*;
/**
* Task to get organisation snapshot from GitHub using Kohsuke GitHub API.
* <p>
* The code of this class is not optimised in terms of number of API requests
* in favour of code simplicity and readability. However, this should not affect
* API rate limit if http cache is used. If rate limit is reached the task
* is blocked until the limit is reset.
*
* @see RateLimitHandler
* @see <a href="http://github-api.kohsuke.org">Kohsuke GitHub API</a>
* @see <a href="https://developer.github.com/v3/#rate-limiting">API documentation from GitHub</a>
*/
public class TakeSnapshotTask implements Callable<Snapshot> {
private static final Logger logger = LoggerFactory.getLogger(TakeSnapshotTask.class);
private final GitHub gitHub;
private final String organisationName;
private final Date snapshotDate;
private Scorer scorer;
public TakeSnapshotTask(final GitHub gitHub, final String organisationName, Scorer scorer, Date snapshotDate) {
this.gitHub = gitHub;
this.organisationName = organisationName;
this.scorer = scorer;
this.snapshotDate = snapshotDate;
}
@Override
public Snapshot call() throws Exception {
logger.info("Taking snapshot of organization '{}'.", organisationName);
final OrganizationWrapper organization = new OrganizationWrapper(gitHub.getOrganization(organisationName));
Snapshot snapshot = new Snapshot(
collectStatistics(organization),
collectProjects(organization),
collectContributors(organization),
collectLanguages(organization));
logger.info("Successfully taken snapshot of organization '{}'.", organisationName);
return snapshot;
}
Statistics collectStatistics(final OrganizationWrapper organization) throws IOException {
logger.info("Started collecting statistics for organization '{}'.", organisationName);
Statistics statistics = new Statistics(organization.getId(), snapshotDate);
statistics.setPublicProjectCount(organization.listRepositories().size());
statistics.setMembersCount(organization.listMembers().size());
statistics.setTeamsCount(organization.listTeams().size());
statistics.setAllContributorsCount((int) organization.listRepositories().stream()
.map(RepositoryWrapper::listContributors)
.flatMap(List::stream)
.map(GHRepository.Contributor::getId)
.distinct()
.count());
statistics.setExternalContributorsCount((int) organization.listRepositories().stream()
.map(RepositoryWrapper::listContributors)
.flatMap(List::stream)
.filter(contributor -> !organization.contributorIsMember(contributor))
.map(GHRepository.Contributor::getId)
.distinct()
.count());
statistics.setAllStarsCount(organization.listRepositories().stream()
.map(RepositoryWrapper::getStarsCount)
.reduce(0, Integer::sum));
statistics.setAllForksCount(organization.listRepositories().stream()
.map(RepositoryWrapper::getForksCount)
.reduce(0, Integer::sum));
statistics.setAllSizeCount(organization.listRepositories().stream()
.map(RepositoryWrapper::getSize)
.reduce(0, Integer::sum));
statistics.setProgramLanguagesCount((int) organization.listRepositories().stream()
.map(RepositoryWrapper::getPrimaryLanguage)
.distinct()
.count());
statistics.setTagsCount((int) organization.listRepositories().stream()
.map(RepositoryWrapper::listTags)
.flatMap(List::stream)
.count());
statistics.setOrganizationName(organization.getLogin());
logger.info("Finished collecting statistics for organization '{}'.", organisationName);
return statistics;
}
Collection<Project> collectProjects(OrganizationWrapper organization) throws IOException, URISyntaxException {
logger.info("Started collecting projects for organization '{}'.", organisationName);
List<Project> projects = new ArrayList<>();
for (RepositoryWrapper repository : organization.listRepositories()) {
Project project = new Project();
project.setGitHubProjectId(repository.getId());
project.setSnapshotDate(snapshotDate);
project.setName(repository.getName());
project.setUrl(repository.getUrl().toURI().toString());
project.setDescription(repository.getDescription());
project.setStarsCount(repository.getStarsCount());
project.setForksCount(repository.getForksCount());
project.setLastPushed(repository.getLastPushed().toString());
project.setPrimaryLanguage(repository.getPrimaryLanguage());
project.setLanguageList(new ArrayList<>(repository.listLanguages().keySet()));
project.setOrganizationName(organization.getLogin());
project.setCommitsCount(repository.listCommits().size());
project.setContributorsCount(repository.listContributors().size());
project.setExternalContributorsCount((int) repository.listContributors().stream()
.filter(contributor -> !organization.contributorIsMember(contributor))
.map(GHRepository.Contributor::getId)
.distinct()
.count());
project.setScore(scorer.score(project));
project.setMaintainers(getProjectMaintainers(repository));
readCatwatchYaml(repository, project);
projects.add(project);
}
logger.info("Finished collecting projects for organization '{}'.", organisationName);
return projects;
}
List<String> getProjectMaintainers(RepositoryWrapper repository) {
try {
return Lists.newArrayList(Streams.asString(repository.getFileContent("MAINTAINERS")).split("\n"));
} catch (IOException ioe) {
return Collections.emptyList();
}
}
void readCatwatchYaml(RepositoryWrapper repository, Project project) {
CatwatchYaml data;
try {
final ObjectMapper mapper = new ObjectMapper(new YAMLFactory()); // jackson databind
data = mapper.readValue(repository.getFileContent(".catwatch.yaml"), CatwatchYaml.class);
} catch (FileNotFoundException fnfe) {
// ignore 404 for .catwatch.yaml
data = null;
} catch (IOException ioe) {
logger.warn("Failed to read .catwatch.yaml for '{}'", repository.getName(), ioe);
data = null;
}
if (null != data) {
project.setTitle(data.getTitle());
project.setImage(data.getImage());
}
}
@SuppressWarnings("unchecked")
Collection<Contributor> collectContributors(OrganizationWrapper organization) throws IOException, URISyntaxException {
logger.info("Started collecting contributors for organization '{}'.", organisationName);
Collection<Contributor> contributors = new ArrayList<>();
// Get a list of all contributors of all repositories
Collection<GHRepository.Contributor> ghContributors = organization.listRepositories().stream()
.map(RepositoryWrapper::listContributors)
.flatMap(List::stream)
.collect(toList());
// Get a map of <Contributor ID> - <Contributions statistics>
Map<Integer, IntSummaryStatistics> idStatisticsMap = ghContributors.stream()
.collect(groupingBy(GHObject::getId, summarizingInt(GHRepository.Contributor::getContributions)));
// Eliminate duplicates in contributors list
ghContributors = ghContributors.stream()
.collect(collectingAndThen(toCollection(() ->
new TreeSet<>(Comparator.comparingInt(GHObject::getId))), ArrayList::new));
// Build a list of contributors
for (GHRepository.Contributor ghContributor : ghContributors) {
Contributor contributor = new Contributor(ghContributor.getId(), organization.getId(), snapshotDate);
contributor.setName(ghContributor.getName());
contributor.setUrl(ghContributor.getHtmlUrl().toURI().toString());
contributor.setOrganizationalCommitsCount((int) idStatisticsMap.get(ghContributor.getId()).getSum());
contributor.setOrganizationalProjectsCount((int) idStatisticsMap.get(ghContributor.getId()).getCount());
contributor.setPersonalProjectsCount(ghContributor.getPublicRepoCount());
contributor.setOrganizationName(organisationName);
contributors.add(contributor);
}
// TODO contributor.setPersonalCommitsCount()
logger.info("Finished collecting contributors for organization '{}'.", organisationName);
return contributors;
}
@SuppressWarnings("rawtypes")
Collection<Language> collectLanguages(OrganizationWrapper organization) {
logger.info("Started collecting languages for organization '{}'.", organisationName);
Collection<Language> languages = new ArrayList<>();
Map<String, LongSummaryStatistics> stat = organization.listRepositories().stream()
.map(RepositoryWrapper::listLanguages)
.map(Map::entrySet)
.flatMap(Set::stream)
.collect(groupingBy(Map.Entry::getKey,
summarizingLong(entry -> ((Number) ((Map.Entry) entry).getValue()).longValue())));
final long allLanguageSize = stat.entrySet().stream()
.map(entry -> entry.getValue().getSum())
.reduce(0L, Long::sum);
for (Map.Entry<String, LongSummaryStatistics> entry : stat.entrySet()) {
Language language = new Language();
language.setName(entry.getKey());
language.setProjectsCount((int) entry.getValue().getCount());
language.setPercentage((int) (entry.getValue().getSum() * 100 / allLanguageSize));
languages.add(language);
}
logger.info("Finished collecting languages for organization '{}'.", organisationName);
return languages;
}
}