package gobblin.compaction.action;
import gobblin.compaction.mapreduce.CompactionAvroJobConfigurator;
import gobblin.compaction.mapreduce.MRCompactorJobRunner;
import gobblin.compaction.mapreduce.avro.AvroKeyMapper;
import gobblin.compaction.parser.CompactionPathParser;
import gobblin.compaction.verify.InputRecordCountHelper;
import gobblin.configuration.State;
import gobblin.dataset.FileSystemDataset;
import gobblin.util.HadoopUtils;
import gobblin.util.WriterUtils;
import lombok.AllArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.permission.FsPermission;
import org.apache.hadoop.mapreduce.Counter;
import org.apache.hadoop.mapreduce.Job;
import java.io.IOException;
/**
* A type of post action {@link CompactionCompleteAction} which focus on the file operations
*/
@Slf4j
@AllArgsConstructor
public class CompactionCompleteFileOperationAction implements CompactionCompleteAction<FileSystemDataset> {
protected State state;
private CompactionAvroJobConfigurator configurator;
private InputRecordCountHelper helper;
private FileSystem fs;
public CompactionCompleteFileOperationAction (State state, CompactionAvroJobConfigurator configurator) {
this.state = state;
this.helper = new InputRecordCountHelper(state);
this.configurator = configurator;
this.fs = configurator.getFs();
}
/**
* Replace the destination folder with new output from map-reduce job
* and create a file for next run record count comparison.
*/
public void onCompactionJobComplete (FileSystemDataset dataset) {
if (configurator != null && configurator.isJobCreated()) {
CompactionPathParser.CompactionParserResult result = new CompactionPathParser(state).parse(dataset);
Path tmpPath = configurator.getMrOutputPath();
Path dstPath = new Path (result.getDstAbsoluteDir());
try {
// get record count from map reduce job counter
Job job = this.configurator.getConfiguredJob();
Counter counter = job.getCounters().findCounter(AvroKeyMapper.EVENT_COUNTER.RECORD_COUNT);
long recordCount = counter.getValue();
// move output from mapreduce to final destination defined by dataset
this.fs.delete(dstPath, true);
FsPermission permission = HadoopUtils.deserializeFsPermission(this.state,
MRCompactorJobRunner.COMPACTION_JOB_OUTPUT_DIR_PERMISSION,
FsPermission.getDefault());
WriterUtils.mkdirsWithRecursivePermission (this.fs, dstPath.getParent(), permission);
if (!this.fs.rename(tmpPath, dstPath)) {
throw new IOException(
String.format("Unable to move %s to %s", tmpPath, dstPath));
}
// write record count
InputRecordCountHelper.writeRecordCount (helper.getFs(), new Path (result.getDstAbsoluteDir()), recordCount);
log.info("Writing record count {} to {}", recordCount, dstPath);
} catch (Exception e) {
log.error(e.toString());
}
}
}
public String getName () {
return CompactionCompleteFileOperationAction.class.getName();
}
}