package org.gbif.occurrence.cli.crawl; import org.gbif.cli.BaseCommand; import org.gbif.cli.Command; import org.gbif.common.messaging.DefaultMessagePublisher; import org.gbif.common.messaging.api.MessagePublisher; import java.io.IOException; import java.nio.file.Paths; import org.codehaus.jackson.map.ObjectMapper; import org.codehaus.jackson.map.SerializationConfig; import org.kohsuke.MetaInfServices; import org.slf4j.Logger; import org.slf4j.LoggerFactory; /** * Commands to manage occurrence record from previous crawls. */ @MetaInfServices(Command.class) public class PreviousCrawlsManagerCommand extends BaseCommand { private static final Logger LOG = LoggerFactory.getLogger(PreviousCrawlsManagerCommand.class); private final PreviousCrawlsManagerConfiguration config = new PreviousCrawlsManagerConfiguration(); public PreviousCrawlsManagerCommand() { super("previous-crawls-manager"); } @Override protected Object getConfigurationObject() { return config; } @Override protected void doRun() { MessagePublisher messagePublisher = buildMessagePublisher(); DeletePreviousCrawlsService deletePreviousCrawlsService = new DeletePreviousCrawlsService(config, messagePublisher); PreviousCrawlsManagerService checkPreviousCrawlsService = new PreviousCrawlsManagerService(config, null); checkPreviousCrawlsService.start(this::printReportToJson); } private MessagePublisher buildMessagePublisher() { MessagePublisher publisher = null; try { publisher = new DefaultMessagePublisher(config.messaging.getConnectionParameters()); } catch (IOException e) { LOG.error("Error while building DefaultMessagePublisher", e); } return publisher; } private void printReportToJson(Object report) { ObjectMapper om = new ObjectMapper(); om.configure(SerializationConfig.Feature.INDENT_OUTPUT, true); try { om.writeValue(Paths.get(config.reportLocation).toFile(), report); if(config.displayReport) { System.out.print(om.writeValueAsString(report)); } } catch (IOException e) { LOG.error("Failed to write report.", e); } } // public void startFromDisk(String reportLocation) { // ObjectMapper om = new ObjectMapper(); // om.configure(DeserializationConfig.Feature.FAIL_ON_UNKNOWN_PROPERTIES, false); // try { // Map<UUID, PreviousCrawlsManagerService.DatasetRecordCountInfo> allDatasetWithMoreThanOneCrawl = // om.readValue(new File(reportLocation), new TypeReference<Map<UUID, PreviousCrawlsManagerService.DatasetRecordCountInfo>>() {}); // analyseReport(allDatasetWithMoreThanOneCrawl); // } catch (IOException e) { // e.printStackTrace(); // } // } // public void analyseReport(Map<UUID, PreviousCrawlsManagerService.DatasetRecordCountInfo> allDatasetWithMoreThanOneCrawl) { // long allRecordsToDelete = allDatasetWithMoreThanOneCrawl.entrySet() // .stream() // .filter( e -> e.getValue().diffSolrLastCrawlPercentage < config.automaticRecordDeletionThreshold) // .mapToLong( e-> e.getValue().getSumAllPreviousCrawl()) // .sum(); // // long allRecordsToDeleteNotAutomatic = allDatasetWithMoreThanOneCrawl.entrySet() // .stream() // .filter( e -> e.getValue().diffSolrLastCrawlPercentage >= config.automaticRecordDeletionThreshold) // .mapToLong( e-> e.getValue().getSumAllPreviousCrawl()) // .sum(); // // long datasetsInvolded = allDatasetWithMoreThanOneCrawl.entrySet() // .stream() // .filter( e -> e.getValue().diffSolrLastCrawlPercentage < config.automaticRecordDeletionThreshold) // .count(); // // long datasetsTooHigh = allDatasetWithMoreThanOneCrawl.entrySet() // .stream() // .filter( e -> e.getValue().diffSolrLastCrawlPercentage >= config.automaticRecordDeletionThreshold) // .count(); // // System.out.println("Total datasets Involded: " + allDatasetWithMoreThanOneCrawl.keySet().size()); // System.out.println("allRecordsToDelete: " + allRecordsToDelete); // System.out.println("allRecordsToDeleteNotAutomatic: " + allRecordsToDeleteNotAutomatic); // System.out.println("datasetsInvolded: " + datasetsInvolded); // System.out.println("datasetsTooHigh: " + datasetsTooHigh); // } }