package gobblin.compaction.mapreduce;
import gobblin.compaction.action.CompactionCompleteAction;
import gobblin.compaction.suite.CompactionSuite;
import gobblin.compaction.suite.CompactionSuiteUtils;
import gobblin.compaction.verify.CompactionVerifier;
import gobblin.dataset.Dataset;
import gobblin.runtime.TaskContext;
import gobblin.runtime.mapreduce.MRTask;
import java.util.List;
import java.io.IOException;
import lombok.extern.slf4j.Slf4j;
import org.apache.hadoop.mapreduce.Job;
/**
* Customized task of type {@link MRTask}, which runs MR job to compact dataset.
* The job creation is delegated to {@link CompactionSuite#createJob(Dataset)}
* After job is created, {@link MRCompactionTask#run()} is invoked and after compaction is finished.
* a callback {@link CompactionSuite#getCompactionCompleteActions()} will be invoked
*/
@Slf4j
public class MRCompactionTask extends MRTask {
protected final CompactionSuite suite;
protected final Dataset dataset;
/**
* Constructor
*/
public MRCompactionTask(TaskContext taskContext) throws IOException {
super(taskContext);
this.suite = CompactionSuiteUtils.getCompactionSuiteFactory (taskContext.getTaskState()).
createSuite(taskContext.getTaskState());
this.dataset = this.suite.load(taskContext.getTaskState());
}
/**
* Below three steps are performed for a compaction task:
* Do verifications before a map-reduce job is launched.
* Start a map-reduce job and wait until it is finished
* Do post-actions after map-reduce job is finished
*/
@Override
public void run() {
List<CompactionVerifier> verifiers = this.suite.getMapReduceVerifiers();
for (CompactionVerifier verifier : verifiers) {
if (!verifier.verify(dataset)) {
log.error("Verification {} for {} is not passed.", verifier.getName(), dataset.datasetURN());
return;
}
}
super.run();
List<CompactionCompleteAction> actions = this.suite.getCompactionCompleteActions();
for (CompactionCompleteAction action: actions) {
action.onCompactionJobComplete(dataset);
}
}
/**
* Create a map-reduce job
* The real job configuration is delegated to {@link CompactionSuite#createJob(Dataset)}
*
* @return a map-reduce job
*/
protected Job createJob() throws IOException {
return this.suite.createJob(dataset);
}
}