package sagan.projects.support; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.beans.factory.annotation.Value; import org.springframework.stereotype.Service; import org.springframework.util.StringUtils; import sagan.Indexer; import sagan.projects.Project; import sagan.search.support.CrawledWebDocumentProcessor; import sagan.search.support.CrawlerService; import sagan.search.support.DocumentProcessor; import sagan.search.support.SearchService; @Service public class ProjectPagesIndexer implements Indexer<Project> { private static final Log logger = LogFactory.getLog(ProjectPagesIndexer.class); @Value(value = "${search.indexer.gh_pages.domains:projects.spring.io}") private String githubPagesDomains; private final ProjectMetadataService metadataService; private final CrawlerService crawlerService; private final SearchService searchService; @Autowired public ProjectPagesIndexer(ProjectMetadataService metadataService, CrawlerService crawlerService, SearchService searchService) { this.metadataService = metadataService; this.crawlerService = crawlerService; this.searchService = searchService; } @Override public Iterable<Project> indexableItems() { return metadataService.getProjectsWithReleases(); } @Override public void indexItem(Project project) { logger.debug("Indexing project page for: " + project.getId()); String projectPageUrl = project.getSiteUrl(); GithubPagesSearchEntryMapper mapper = new GithubPagesSearchEntryMapper(project); DocumentProcessor documentProcessor = new CrawledWebDocumentProcessor(searchService, mapper); if (StringUtils.commaDelimitedListToSet(githubPagesDomains).stream() .anyMatch(domain -> projectPageUrl.startsWith("http://" + domain) || projectPageUrl.startsWith("https://" + domain))) { crawlerService.crawl(projectPageUrl, 0, documentProcessor); } else { logger.debug(projectPageUrl + " does not match allowed domains"); } } @Override public String counterName() { return "projects_pages"; } @Override public String getId(Project project) { return project.getId(); } }