/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.tez.mapreduce.committer;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.apache.hadoop.classification.InterfaceAudience.Public;
import org.apache.hadoop.mapred.FileOutputCommitter;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.JobContext;
import org.apache.hadoop.mapreduce.JobID;
import org.apache.hadoop.mapreduce.JobStatus;
import org.apache.hadoop.mapreduce.OutputFormat;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.hadoop.mapreduce.TaskAttemptID;
import org.apache.hadoop.mapreduce.TaskType;
import org.apache.hadoop.mapreduce.TypeConverter;
import org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl;
import org.apache.hadoop.security.UserGroupInformation;
import org.apache.hadoop.util.ReflectionUtils;
import org.apache.tez.common.TezUtils;
import org.apache.tez.dag.api.TezUncheckedException;
import org.apache.tez.dag.api.UserPayload;
import org.apache.tez.dag.api.client.VertexStatus;
import org.apache.tez.mapreduce.hadoop.MRConfig;
import org.apache.tez.mapreduce.hadoop.MRJobConfig;
import org.apache.tez.runtime.api.OutputCommitter;
import org.apache.tez.runtime.api.OutputCommitterContext;
import java.io.IOException;
/**
* Implements the {@link OutputCommitter} and provide Map Reduce compatible
* output commit operations for Map Reduce compatible data sinks.
*/
@Public
public class MROutputCommitter extends OutputCommitter {
private static final Logger LOG = LoggerFactory.getLogger(MROutputCommitter.class);
private org.apache.hadoop.mapreduce.OutputCommitter committer = null;
private JobContext jobContext = null;
private volatile boolean initialized = false;
private JobConf jobConf = null;
private boolean newApiCommitter;
public MROutputCommitter(OutputCommitterContext committerContext) {
super(committerContext);
}
@Override
public void initialize() throws IOException {
UserPayload userPayload = getContext().getOutputUserPayload();
if (!userPayload.hasPayload()) {
jobConf = new JobConf();
} else {
jobConf = new JobConf(
TezUtils.createConfFromUserPayload(userPayload));
}
// Read all credentials into the credentials instance stored in JobConf.
jobConf.getCredentials().mergeAll(UserGroupInformation.getCurrentUser().getCredentials());
jobConf.setInt(MRJobConfig.APPLICATION_ATTEMPT_ID,
getContext().getDAGAttemptNumber());
committer = getOutputCommitter(getContext());
jobContext = getJobContextFromVertexContext(getContext());
initialized = true;
}
@Override
public void setupOutput() throws IOException {
if (!initialized) {
throw new RuntimeException("Committer not initialized");
}
committer.setupJob(jobContext);
}
@Override
public void commitOutput() throws IOException {
if (!initialized) {
throw new RuntimeException("Committer not initialized");
}
committer.commitJob(jobContext);
}
@Override
public void abortOutput(VertexStatus.State finalState) throws IOException {
if (!initialized) {
throw new RuntimeException("Committer not initialized");
}
JobStatus.State jobState = getJobStateFromVertexStatusState(finalState);
committer.abortJob(jobContext, jobState);
}
@SuppressWarnings("rawtypes")
private org.apache.hadoop.mapreduce.OutputCommitter
getOutputCommitter(OutputCommitterContext context) {
org.apache.hadoop.mapreduce.OutputCommitter committer = null;
newApiCommitter = false;
if (jobConf.getBoolean("mapred.reducer.new-api", false)
|| jobConf.getBoolean("mapred.mapper.new-api", false)) {
newApiCommitter = true;
}
LOG.info("Committer for " + getContext().getVertexName() + ":" + getContext().getOutputName() +
" using " + (newApiCommitter ? "new" : "old") + "mapred API");
if (newApiCommitter) {
TaskAttemptID taskAttemptID = new TaskAttemptID(
Long.toString(context.getApplicationId().getClusterTimestamp()),
context.getApplicationId().getId(),
((jobConf.getBoolean(MRConfig.IS_MAP_PROCESSOR, false) ?
TaskType.MAP : TaskType.REDUCE)),
0, context.getDAGAttemptNumber());
TaskAttemptContext taskContext = new TaskAttemptContextImpl(jobConf,
taskAttemptID);
try {
OutputFormat outputFormat = ReflectionUtils.newInstance(taskContext
.getOutputFormatClass(), jobConf);
committer = outputFormat.getOutputCommitter(taskContext);
} catch (Exception e) {
throw new TezUncheckedException(e);
}
} else {
committer = ReflectionUtils.newInstance(jobConf.getClass(
"mapred.output.committer.class", FileOutputCommitter.class,
org.apache.hadoop.mapred.OutputCommitter.class), jobConf);
}
LOG.info("OutputCommitter for outputName="
+ context.getOutputName()
+ ", vertexName=" + context.getVertexName()
+ ", outputCommitterClass="
+ committer.getClass().getName());
return committer;
}
// FIXME we are using ApplicationId as DAG id
private JobContext getJobContextFromVertexContext(OutputCommitterContext context)
throws IOException {
JobID jobId = TypeConverter.fromYarn(
context.getApplicationId());
return new MRJobContextImpl(jobConf, jobId);
}
private JobStatus.State getJobStateFromVertexStatusState(VertexStatus.State state) {
switch(state) {
case INITED:
return JobStatus.State.PREP;
case RUNNING:
return JobStatus.State.RUNNING;
case SUCCEEDED:
return JobStatus.State.SUCCEEDED;
case KILLED:
return JobStatus.State.KILLED;
case FAILED:
case ERROR:
return JobStatus.State.FAILED;
default:
throw new TezUncheckedException("Unknown VertexStatus.State: " + state);
}
}
private static class MRJobContextImpl
extends org.apache.hadoop.mapred.JobContextImpl {
public MRJobContextImpl(JobConf jobConf, JobID jobId) {
super(jobConf, jobId);
}
}
@SuppressWarnings("deprecation")
@Override
public boolean isTaskRecoverySupported() {
if (!initialized) {
throw new RuntimeException("Committer not initialized");
}
return committer.isRecoverySupported();
}
@Override
public void recoverTask(int taskIndex, int attemptId) throws IOException {
if (!initialized) {
throw new RuntimeException("Committer not initialized");
}
TaskAttemptID taskAttemptID = new TaskAttemptID(
Long.toString(getContext().getApplicationId().getClusterTimestamp())
+ String.valueOf(getContext().getVertexIndex()),
getContext().getApplicationId().getId(),
((jobConf.getBoolean(MRConfig.IS_MAP_PROCESSOR, false) ?
TaskType.MAP : TaskType.REDUCE)),
taskIndex, attemptId);
TaskAttemptContext taskContext = new TaskAttemptContextImpl(jobConf,
taskAttemptID);
committer.recoverTask(taskContext);
}
}