package org.gbif.occurrence.download.file;
import org.gbif.api.model.occurrence.DownloadFormat;
import org.gbif.common.search.solr.SolrConstants;
import org.gbif.occurrence.download.file.dwca.DownloadDwcaActor;
import org.gbif.occurrence.download.file.simplecsv.SimpleCsvDownloadActor;
import org.gbif.occurrence.download.inject.DownloadWorkflowModule;
import org.gbif.utils.file.FileUtils;
import org.gbif.wrangler.lock.Lock;
import org.gbif.wrangler.lock.LockFactory;
import java.io.File;
import java.io.IOException;
import java.util.List;
import java.util.concurrent.TimeUnit;
import akka.actor.Actor;
import akka.actor.ActorRef;
import akka.actor.Props;
import akka.actor.UntypedActor;
import akka.actor.UntypedActorFactory;
import akka.routing.RoundRobinRouter;
import com.google.common.base.Strings;
import com.google.common.collect.Lists;
import com.google.inject.Inject;
import com.google.inject.name.Named;
import org.apache.commons.lang3.time.StopWatch;
import org.apache.hadoop.fs.Path;
import org.apache.solr.client.solrj.SolrClient;
import org.apache.solr.client.solrj.SolrQuery;
import org.apache.solr.client.solrj.SolrServerException;
import org.apache.solr.client.solrj.response.QueryResponse;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* Actor that controls the multi-threaded creation of occurrence downloads.
*/
public class DownloadMaster extends UntypedActor {
private static final Logger LOG = LoggerFactory.getLogger(DownloadMaster.class);
private static final String FINISH_MSG_FMT = "Time elapsed %d minutes and %d seconds";
private final SolrClient solrClient;
private final Configuration conf;
private final LockFactory lockFactory;
private final OccurrenceMapReader occurrenceMapReader;
private final DownloadAggregator aggregator;
private final DownloadJobConfiguration jobConfiguration;
private List<Result> results = Lists.newArrayList();
private int calcNrOfWorkers;
private int nrOfResults;
/**
* Default constructor.
*/
@Inject
public DownloadMaster(
LockFactory lockFactory,
Configuration configuration,
SolrClient solrClient,
OccurrenceMapReader occurrenceMapReader,
DownloadJobConfiguration jobConfiguration,
DownloadAggregator aggregator
) {
conf = configuration;
this.jobConfiguration = jobConfiguration;
this.lockFactory = lockFactory;
this.solrClient = solrClient;
this.occurrenceMapReader = occurrenceMapReader;
this.aggregator = aggregator;
}
/**
* Aggregates the result and shutdown the system of actors
*/
private void aggregateAndShutdown() {
aggregator.aggregate(results);
getContext().stop(getSelf());
}
@Override
public void onReceive(Object message) throws Exception {
if (message instanceof Start) {
runActors();
} else if (message instanceof Result) {
results.add((Result) message);
nrOfResults += 1;
if (nrOfResults == calcNrOfWorkers) {
aggregateAndShutdown();
}
}
}
/**
* Creates and gets a reference to a Lock.
*/
private Lock getLock() {
return lockFactory.makeLock(conf.lockName);
}
/**
* Executes a search and get number of records.
*/
private Long getSearchCount(String query) {
try {
SolrQuery solrQuery = new SolrQuery().setQuery(SolrConstants.DEFAULT_QUERY).setRows(0);
if (!Strings.isNullOrEmpty(query)) {
solrQuery.addFilterQuery(query);
}
QueryResponse queryResponse = solrClient.query(solrQuery);
return queryResponse.getResults().getNumFound();
} catch (SolrServerException | IOException e) {
LOG.error("Error executing Solr query", e);
return 0L;
}
}
/**
* Run the list of jobs. The amount of records is assigned evenly among the worker threads.
* If the amount of records is not divisible by the calcNrOfWorkers the remaining records are assigned "evenly" among
* the
* first jobs.
*/
private void runActors() {
StopWatch stopwatch = new StopWatch();
stopwatch.start();
File downloadTempDir = new File(jobConfiguration.getDownloadTempDir());
if (downloadTempDir.exists()) {
FileUtils.deleteDirectoryRecursively(downloadTempDir);
}
downloadTempDir.mkdirs();
final int recordCount = getSearchCount(jobConfiguration.getSolrQuery()).intValue();
if( recordCount <= 0) { //now work to do: shutdown the system
aggregateAndShutdown();
} else {
final int nrOfRecords = Math.min(recordCount, conf.maximumNrOfRecords);
// Calculates the required workers.
calcNrOfWorkers =
conf.minNrOfRecords >= nrOfRecords ? 1 : Math.min(conf.nrOfWorkers, nrOfRecords / conf.minNrOfRecords);
ActorRef workerRouter =
getContext().actorOf(new Props(new DownloadActorsFactory(jobConfiguration.getDownloadFormat())).withRouter(new RoundRobinRouter(
calcNrOfWorkers)), "downloadWorkerRouter");
// Number of records that will be assigned to each job
int sizeOfChunks = Math.max(nrOfRecords / calcNrOfWorkers, 1);
// Remaining jobs, that are not assigned to a job yet
int remaining = nrOfRecords - (sizeOfChunks * calcNrOfWorkers);
// How many of the remaining jobs will be assigned to one job
int remainingPerJob = remaining > 0 ? Math.max(remaining / calcNrOfWorkers, 1) : 0;
int to = 0;
int additionalJobsCnt = 0;
for (int i = 0; i < calcNrOfWorkers; i++) {
int from = i == 0 ? 0 : to;
to = from + sizeOfChunks + remainingPerJob;
// Calculates the remaining jobs that will be assigned to the new FileJob.
additionalJobsCnt += remainingPerJob;
if (remainingPerJob != 0 && additionalJobsCnt > remaining) {
remainingPerJob = additionalJobsCnt - remaining;
} else if (additionalJobsCnt == remaining) {
remainingPerJob = 0;
}
// Awaits for an available thread
Lock lock = getLock();
DownloadFileWork work = new DownloadFileWork(from,
to,
jobConfiguration.getSourceDir()
+ Path.SEPARATOR
+ jobConfiguration.getDownloadKey()
+ Path.SEPARATOR
+ jobConfiguration.getDownloadTableName(),
i,
jobConfiguration.getSolrQuery(),
lock,
solrClient,
occurrenceMapReader);
LOG.info("Requesting a lock for job {}, detail: {}", i, work.toString());
lock.lock();
LOG.info("Lock granted for job {}, detail: {}", i, work.toString());
// Adds the Job to the list. The file name is the output file name + the sequence i
workerRouter.tell(work, getSelf());
}
stopwatch.stop();
long timeInSeconds = TimeUnit.MILLISECONDS.toSeconds(stopwatch.getTime());
LOG.info(String.format(FINISH_MSG_FMT, TimeUnit.SECONDS.toMinutes(timeInSeconds), timeInSeconds % 60));
}
}
/**
* Used as a command to start this master actor.
*/
public static class Start { }
/**
* Creates an instance of the download actor/job to be used.
*/
private static class DownloadActorsFactory implements UntypedActorFactory {
private final DownloadFormat downloadFormat;
DownloadActorsFactory(DownloadFormat downloadFormat) {
this.downloadFormat = downloadFormat;
}
@Override
public Actor create() throws Exception {
if (downloadFormat == DownloadFormat.SIMPLE_CSV) {
return new SimpleCsvDownloadActor();
} else if (downloadFormat == DownloadFormat.DWCA) {
return new DownloadDwcaActor();
}
throw new IllegalStateException("Unsupported download format");
}
}
/**
* Utility class that holds the general execution settings.
*/
public static class Configuration {
// Maximum number of workers
private final int nrOfWorkers;
// Minimum number of records per job
private final int minNrOfRecords;
// Limits the maximum number of records that can be processed.
// This parameters avoids use this class to create file with a size beyond a maximum.
private final int maximumNrOfRecords;
// Occurrence download lock/counter name
private final String lockName;
/**
* Default/full constructor.
*/
@Inject
public Configuration(
@Named(DownloadWorkflowModule.DefaultSettings.MAX_THREADS_KEY) int nrOfWorkers,
@Named(DownloadWorkflowModule.DefaultSettings.JOB_MIN_RECORDS_KEY) int minNrOfRecords,
@Named(DownloadWorkflowModule.DefaultSettings.MAX_RECORDS_KEY) int maximumNrOfRecords,
@Named(DownloadWorkflowModule.DefaultSettings.ZK_LOCK_NAME_KEY) String lockName
) {
this.nrOfWorkers = nrOfWorkers;
this.minNrOfRecords = minNrOfRecords;
this.maximumNrOfRecords = maximumNrOfRecords;
this.lockName = lockName;
}
}
}