package org.gbif.occurrence.download.file.dwca;
import org.gbif.api.model.common.search.Facet;
import org.gbif.api.model.registry.Dataset;
import org.gbif.api.model.registry.DatasetOccurrenceDownloadUsage;
import org.gbif.api.service.registry.DatasetOccurrenceDownloadUsageService;
import org.gbif.api.service.registry.DatasetService;
import java.io.IOException;
import java.util.Map;
import java.util.Map.Entry;
import java.util.UUID;
import com.google.common.base.Charsets;
import com.google.common.base.Throwables;
import org.apache.commons.io.output.FileWriterWithEncoding;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.supercsv.cellprocessor.ParseLong;
import org.supercsv.cellprocessor.constraint.NotNull;
import org.supercsv.cellprocessor.ift.CellProcessor;
import org.supercsv.io.CsvBeanWriter;
import org.supercsv.io.ICsvBeanWriter;
import org.supercsv.prefs.CsvPreference;
/**
* Utility class that creates a datset citations file from Map that contains the dataset usages (record count).
* The output file contains a list of datasets keys/uuids and its counts of occurrence records.
*/
public final class CitationsFileWriter {
private static final Logger LOG = LoggerFactory.getLogger(CitationsFileWriter.class);
// Java fields of class solr.FacetField.Count that are used to create the citations file.
private static final String[] HEADER = {"name", "count"};
// Processors used to create the citations file.
private static final CellProcessor[] PROCESSORS = {new NotNull(), new ParseLong()};
/**
* Creates the dataset citation file using the the Solr query response.
*
* @param datasetUsages record count per dataset
* @param citationFileName output file name
* @param datasetOccUsageService usage service
* @param downloadKey download key
*/
public static void createCitationFile(Map<UUID, Long> datasetUsages, String citationFileName,
DatasetOccurrenceDownloadUsageService datasetOccUsageService,
DatasetService datasetService, String downloadKey) {
if (datasetUsages != null && !datasetUsages.isEmpty()) {
try (ICsvBeanWriter beanWriter = new CsvBeanWriter(new FileWriterWithEncoding(citationFileName, Charsets.UTF_8),
CsvPreference.TAB_PREFERENCE)) {
for (Entry<UUID, Long> entry : datasetUsages.entrySet()) {
if (entry.getKey() != null) {
beanWriter.write(new Facet.Count(entry.getKey().toString(), entry.getValue()), HEADER, PROCESSORS);
persistDatasetUsage(entry, downloadKey, datasetOccUsageService, datasetService);
}
}
beanWriter.flush();
} catch (IOException e) {
LOG.error("Error creating citations file", e);
throw Throwables.propagate(e);
}
}
}
/**
* Persists the dataset usage information and swallows any exception to avoid an error during the file building.
*/
private static void persistDatasetUsage(Entry<UUID, Long> usage, String downloadKey,
DatasetOccurrenceDownloadUsageService datasetOccUsageService,
DatasetService datasetService) {
try {
Dataset dataset = datasetService.get(usage.getKey());
if (dataset != null) { //the dataset still exists
DatasetOccurrenceDownloadUsage datasetUsage = new DatasetOccurrenceDownloadUsage();
datasetUsage.setDatasetKey(dataset.getKey());
datasetUsage.setNumberRecords(usage.getValue());
datasetUsage.setDownloadKey(downloadKey);
datasetUsage.setDatasetDOI(dataset.getDoi());
if (dataset.getCitation() != null && dataset.getCitation().getText() != null) {
datasetUsage.setDatasetCitation(dataset.getCitation().getText());
}
datasetUsage.setDatasetTitle(dataset.getTitle());
datasetOccUsageService.create(datasetUsage);
}
} catch (Exception e) {
LOG.error("Error persisting dataset usage information", e);
}
}
/**
* Private/default constructor.
*/
private CitationsFileWriter() {
// private constructor
}
}