/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package gobblin.runtime.mapreduce;
import com.google.common.collect.Maps;
import gobblin.configuration.State;
import gobblin.configuration.WorkUnitState;
import gobblin.metrics.event.EventSubmitter;
import gobblin.runtime.TaskContext;
import gobblin.runtime.task.BaseAbstractTask;
import java.io.IOException;
import java.util.Map;
import lombok.extern.slf4j.Slf4j;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.mapreduce.Job;
/**
* A task that runs an MR job.
*
* Usage:
* TaskUtils.setTaskFactoryClass(workUnit, MRTaskFactory.class);
* MRTask.serializeJobToState(workUnit, myJob);
*
* Subclasses can override {@link #createJob()} to customize the way the MR job is prepared.
*/
@Slf4j
public class MRTask extends BaseAbstractTask {
private static final String JOB_CONFIGURATION_PREFIX = "MRTask.jobConfiguration.";
public static class Events {
public static final String MR_JOB_STARTED_EVENT = "MRJobStarted";
public static final String MR_JOB_SUCCESSFUL = "MRJobSuccessful";
public static final String MR_JOB_FAILED = "MRJobFailed";
public static final String JOB_URL = "jobTrackingUrl";
public static final String FAILURE_CONTEXT = "failureContext";
}
public static void serializeJobToState(State state, Job job) {
for (Map.Entry<String, String> entry : job.getConfiguration()) {
state.setProp(JOB_CONFIGURATION_PREFIX + entry.getKey(), entry.getValue());
}
}
private final TaskContext taskContext;
private final EventSubmitter eventSubmitter;
public MRTask(TaskContext taskContext) {
super(taskContext);
this.taskContext = taskContext;
this.eventSubmitter = new EventSubmitter.Builder(this.metricContext, "gobblin.MRTask")
.addMetadata(additionalEventMetadata()).build();
}
@Override
public void run() {
try {
Job job = createJob();
job.submit();
this.eventSubmitter.submit(Events.MR_JOB_STARTED_EVENT, Events.JOB_URL, job.getTrackingURL());
job.waitForCompletion(false);
if (job.isSuccessful()) {
this.eventSubmitter.submit(Events.MR_JOB_SUCCESSFUL, Events.JOB_URL, job.getTrackingURL());
this.workingState = WorkUnitState.WorkingState.SUCCESSFUL;
} else {
this.eventSubmitter.submit(Events.MR_JOB_FAILED, Events.JOB_URL, job.getTrackingURL());
this.workingState = WorkUnitState.WorkingState.FAILED;
}
} catch (Throwable t) {
log.error("Failed to run MR job.", t);
this.eventSubmitter.submit(Events.MR_JOB_FAILED, Events.FAILURE_CONTEXT, t.getMessage());
this.workingState = WorkUnitState.WorkingState.FAILED;
}
}
protected Map<String, String> additionalEventMetadata() {
return Maps.newHashMap();
}
protected Job createJob() throws IOException {
Job job = Job.getInstance(new Configuration());
for (Map.Entry<Object, Object> entry : this.taskContext.getTaskState().getProperties().entrySet()) {
if (entry.getKey() instanceof String && ((String) entry.getKey()).startsWith(JOB_CONFIGURATION_PREFIX)) {
String actualKey = ((String) entry.getKey()).substring(JOB_CONFIGURATION_PREFIX.length());
job.getConfiguration().set(actualKey, (String) entry.getValue());
}
}
return job;
}
}