/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.tez.test;
import static org.junit.Assert.assertEquals;
import java.io.IOException;
import java.util.ArrayList;
import java.util.EnumSet;
import java.util.List;
import java.util.Random;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hdfs.MiniDFSCluster;
import org.apache.hadoop.yarn.api.records.ApplicationId;
import org.apache.hadoop.yarn.conf.YarnConfiguration;
import org.apache.tez.client.TezClient;
import org.apache.tez.client.TezClientUtils;
import org.apache.tez.common.TezCommonUtils;
import org.apache.tez.common.TezUtils;
import org.apache.tez.common.counters.DAGCounter;
import org.apache.tez.common.counters.TezCounter;
import org.apache.tez.common.counters.TezCounters;
import org.apache.tez.dag.api.DAG;
import org.apache.tez.dag.api.Edge;
import org.apache.tez.dag.api.EdgeProperty;
import org.apache.tez.dag.api.EdgeProperty.DataMovementType;
import org.apache.tez.dag.api.EdgeProperty.DataSourceType;
import org.apache.tez.dag.api.EdgeProperty.SchedulingType;
import org.apache.tez.dag.api.InputDescriptor;
import org.apache.tez.dag.api.ProcessorDescriptor;
import org.apache.tez.dag.api.TezConfiguration;
import org.apache.tez.dag.api.TezConstants;
import org.apache.tez.dag.api.UserPayload;
import org.apache.tez.dag.api.Vertex;
import org.apache.tez.dag.api.VertexManagerPlugin;
import org.apache.tez.dag.api.VertexManagerPluginContext;
import org.apache.tez.dag.api.VertexManagerPluginContext.ScheduleTaskRequest;
import org.apache.tez.dag.api.VertexManagerPluginDescriptor;
import org.apache.tez.dag.api.client.DAGClient;
import org.apache.tez.dag.api.client.DAGStatus;
import org.apache.tez.dag.api.client.StatusGetOpts;
import org.apache.tez.dag.app.RecoveryParser;
import org.apache.tez.dag.app.dag.impl.ImmediateStartVertexManager;
import org.apache.tez.dag.history.HistoryEvent;
import org.apache.tez.dag.history.HistoryEventType;
import org.apache.tez.dag.history.events.TaskAttemptFinishedEvent;
import org.apache.tez.dag.history.recovery.RecoveryService;
import org.apache.tez.dag.library.vertexmanager.InputReadyVertexManager;
import org.apache.tez.dag.library.vertexmanager.ShuffleVertexManager;
import org.apache.tez.runtime.api.Event;
import org.apache.tez.runtime.api.ProcessorContext;
import org.apache.tez.runtime.api.TaskAttemptIdentifier;
import org.apache.tez.runtime.api.events.VertexManagerEvent;
import org.apache.tez.runtime.library.processor.SimpleProcessor;
import org.junit.After;
import org.junit.AfterClass;
import org.junit.Assert;
import org.junit.Before;
import org.junit.BeforeClass;
import org.junit.Test;
import com.google.common.collect.Lists;
public class TestAMRecovery {
private static final Logger LOG = LoggerFactory.getLogger(TestAMRecovery.class);
private static Configuration conf = new Configuration();
private static TezConfiguration tezConf;
private static int MAX_AM_ATTEMPT = 10;
private static MiniTezCluster miniTezCluster = null;
private static String TEST_ROOT_DIR = "target" + Path.SEPARATOR
+ TestAMRecovery.class.getName() + "-tmpDir";
private static MiniDFSCluster dfsCluster = null;
private static TezClient tezSession = null;
private static FileSystem remoteFs = null;
private static String FAIL_ON_PARTIAL_FINISHED = "FAIL_ON_PARTIAL_COMPLETED";
private static String FAIL_ON_ATTEMPT = "FAIL_ON_ATTEMPT";
@BeforeClass
public static void beforeClass() throws Exception {
LOG.info("Starting mini clusters");
try {
conf.set(MiniDFSCluster.HDFS_MINIDFS_BASEDIR, TEST_ROOT_DIR);
dfsCluster =
new MiniDFSCluster.Builder(conf).numDataNodes(3).format(true)
.racks(null).build();
remoteFs = dfsCluster.getFileSystem();
} catch (IOException io) {
throw new RuntimeException("problem starting mini dfs cluster", io);
}
if (miniTezCluster == null) {
miniTezCluster =
new MiniTezCluster(TestAMRecovery.class.getName(), 1, 1, 1);
Configuration miniTezconf = new Configuration(conf);
miniTezconf.setInt(YarnConfiguration.RM_AM_MAX_ATTEMPTS, MAX_AM_ATTEMPT);
miniTezconf.set("fs.defaultFS", remoteFs.getUri().toString()); // use HDFS
conf.setLong(TezConfiguration.TEZ_AM_SLEEP_TIME_BEFORE_EXIT_MILLIS, 500);
miniTezCluster.init(miniTezconf);
miniTezCluster.start();
}
}
@AfterClass
public static void afterClass() throws InterruptedException {
if (tezSession != null) {
try {
LOG.info("Stopping Tez Session");
tezSession.stop();
} catch (Exception e) {
e.printStackTrace();
}
}
if (miniTezCluster != null) {
try {
LOG.info("Stopping MiniTezCluster");
miniTezCluster.stop();
miniTezCluster = null;
} catch (Exception e) {
e.printStackTrace();
}
}
if (dfsCluster != null) {
try {
LOG.info("Stopping DFSCluster");
dfsCluster.shutdown();
} catch (Exception e) {
e.printStackTrace();
}
}
}
@Before
public void setup() throws Exception {
LOG.info("Starting session");
Path remoteStagingDir =
remoteFs.makeQualified(new Path(TEST_ROOT_DIR, String
.valueOf(new Random().nextInt(100000))));
TezClientUtils.ensureStagingDirExists(conf, remoteStagingDir);
tezConf = new TezConfiguration(miniTezCluster.getConfig());
tezConf.setInt(TezConfiguration.DAG_RECOVERY_MAX_UNFLUSHED_EVENTS, 0);
tezConf.set(TezConfiguration.TEZ_AM_LOG_LEVEL, "INFO");
tezConf.set(TezConfiguration.TEZ_AM_STAGING_DIR,
remoteStagingDir.toString());
tezConf
.setBoolean(TezConfiguration.TEZ_AM_NODE_BLACKLISTING_ENABLED, false);
tezConf.setInt(TezConfiguration.TEZ_AM_MAX_APP_ATTEMPTS, MAX_AM_ATTEMPT);
tezConf.setInt(TezConfiguration.TEZ_AM_RESOURCE_MEMORY_MB, 500);
tezConf.set(TezConfiguration.TEZ_AM_LAUNCH_CMD_OPTS, " -Xmx256m");
tezConf.setBoolean(TezConfiguration.TEZ_AM_SESSION_MODE, true);
tezConf.setBoolean(
TezConfiguration.TEZ_AM_STAGING_SCRATCH_DATA_AUTO_DELETE, false);
tezConf.setBoolean(
RecoveryService.TEZ_TEST_RECOVERY_DRAIN_EVENTS_WHEN_STOPPED,
true);
tezSession = TezClient.create("TestDAGRecovery", tezConf);
tezSession.start();
}
@After
public void teardown() throws InterruptedException {
if (tezSession != null) {
try {
LOG.info("Stopping Tez Session");
tezSession.stop();
} catch (Exception e) {
e.printStackTrace();
}
}
tezSession = null;
}
/**
* Fine-grained recovery task-level, In a vertex (v1), task 0 is done task 1
* is not started. History flush happens. AM dies. Once AM is recovered, task 0 is
* not re-run. Task 1 is re-run. (Broadcast)
*
* @throws Exception
*/
@Test(timeout = 120000)
public void testVertexPartiallyFinished_Broadcast() throws Exception {
DAG dag =
createDAG("VertexPartiallyFinished_Broadcast", ControlledImmediateStartVertexManager.class,
DataMovementType.BROADCAST, true);
TezCounters counters = runDAGAndVerify(dag, DAGStatus.State.SUCCEEDED);
assertEquals(4, counters.findCounter(DAGCounter.NUM_SUCCEEDED_TASKS).getValue());
assertEquals(2, counters.findCounter(TestCounter.Counter_1).getValue());
List<HistoryEvent> historyEvents1 = readRecoveryLog(1);
List<HistoryEvent> historyEvents2 = readRecoveryLog(2);
printHistoryEvents(historyEvents1, 1);
printHistoryEvents(historyEvents1, 2);
// task_0 of v1 is finished in attempt 1, task_1 of v1 is not finished in
// attempt 1
assertEquals(1, findTaskAttemptFinishedEvent(historyEvents1, 0, 0).size());
assertEquals(0, findTaskAttemptFinishedEvent(historyEvents1, 0, 1).size());
// task_0 of v1 is finished in attempt 1 and not rerun, task_1 of v1 is
// finished in attempt 2
assertEquals(1, findTaskAttemptFinishedEvent(historyEvents2, 0, 0).size());
assertEquals(1, findTaskAttemptFinishedEvent(historyEvents2, 0, 1).size());
}
/**
* Fine-grained recovery task-level, In a vertex (v1), task 0 is done task 1
* is also done. History flush happens. AM dies. Once AM is recovered, task 0
* and Task 1 is not re-run. (Broadcast)
*
* @throws Exception
*/
@Test(timeout = 120000)
public void testVertexCompletelyFinished_Broadcast() throws Exception {
DAG dag =
createDAG("VertexCompletelyFinished_Broadcast", ControlledImmediateStartVertexManager.class,
DataMovementType.BROADCAST, false);
TezCounters counters = runDAGAndVerify(dag, DAGStatus.State.SUCCEEDED);
assertEquals(4, counters.findCounter(DAGCounter.NUM_SUCCEEDED_TASKS).getValue());
assertEquals(2, counters.findCounter(TestCounter.Counter_1).getValue());
List<HistoryEvent> historyEvents1 = readRecoveryLog(1);
List<HistoryEvent> historyEvents2 = readRecoveryLog(2);
printHistoryEvents(historyEvents1, 1);
printHistoryEvents(historyEvents1, 2);
// task_0 of v1 is finished in attempt 1, task_1 of v1 is not finished in
// attempt 1
assertEquals(1, findTaskAttemptFinishedEvent(historyEvents1, 0, 0).size());
assertEquals(1, findTaskAttemptFinishedEvent(historyEvents1, 0, 1).size());
// task_0 of v1 is finished in attempt 1 and not rerun, task_1 of v1 is
// finished in attempt 2
assertEquals(1, findTaskAttemptFinishedEvent(historyEvents2, 0, 0).size());
assertEquals(1, findTaskAttemptFinishedEvent(historyEvents2, 0, 1).size());
}
/**
* Fine-grained recovery task-level, In a vertex (v1), task 0 is done task 1
* is not started. History flush happens. AM dies. Once AM is recovered, task 0 is
* not re-run. Task 1 is re-run. (ONE_TO_ONE)
*
* @throws Exception
*/
@Test(timeout = 120000)
public void testVertexPartialFinished_One2One() throws Exception {
DAG dag =
createDAG("VertexPartialFinished_One2One", ControlledInputReadyVertexManager.class,
DataMovementType.ONE_TO_ONE, true);
TezCounters counters = runDAGAndVerify(dag, DAGStatus.State.SUCCEEDED);
assertEquals(4, counters.findCounter(DAGCounter.NUM_SUCCEEDED_TASKS).getValue());
assertEquals(2, counters.findCounter(TestCounter.Counter_1).getValue());
List<HistoryEvent> historyEvents1 = readRecoveryLog(1);
List<HistoryEvent> historyEvents2 = readRecoveryLog(2);
printHistoryEvents(historyEvents1, 1);
printHistoryEvents(historyEvents1, 2);
// task_0 of v1 is finished in attempt 1, task_1 of v1 is not finished in
// attempt 1
assertEquals(1, findTaskAttemptFinishedEvent(historyEvents1, 0, 0).size());
assertEquals(0, findTaskAttemptFinishedEvent(historyEvents1, 0, 1).size());
// task_0 of v1 is finished in attempt 1 and not rerun, task_1 of v1 is
// finished in attempt 2
assertEquals(1, findTaskAttemptFinishedEvent(historyEvents2, 0, 0).size());
assertEquals(1, findTaskAttemptFinishedEvent(historyEvents2, 0, 1).size());
}
/**
* Fine-grained recovery task-level, In a vertex (v1), task 0 is done task 1
* is also done. History flush happens. AM dies. Once AM is recovered, task 0
* and Task 1 is not re-run. (ONE_TO_ONE)
*
* @throws Exception
*/
@Test(timeout = 120000)
public void testVertexCompletelyFinished_One2One() throws Exception {
DAG dag =
createDAG("VertexCompletelyFinished_One2One", ControlledInputReadyVertexManager.class,
DataMovementType.ONE_TO_ONE, false);
TezCounters counters = runDAGAndVerify(dag, DAGStatus.State.SUCCEEDED);
assertEquals(4, counters.findCounter(DAGCounter.NUM_SUCCEEDED_TASKS).getValue());
assertEquals(2, counters.findCounter(TestCounter.Counter_1).getValue());
List<HistoryEvent> historyEvents1 = readRecoveryLog(1);
List<HistoryEvent> historyEvents2 = readRecoveryLog(2);
printHistoryEvents(historyEvents1, 1);
printHistoryEvents(historyEvents1, 2);
// task_0 of v1 is finished in attempt 1, task_1 of v1 is not finished in
// attempt 1
assertEquals(1, findTaskAttemptFinishedEvent(historyEvents1, 0, 0).size());
assertEquals(1, findTaskAttemptFinishedEvent(historyEvents1, 0, 1).size());
// task_0 of v1 is finished in attempt 1 and not rerun, task_1 of v1 is
// finished in attempt 2
assertEquals(1, findTaskAttemptFinishedEvent(historyEvents2, 0, 0).size());
assertEquals(1, findTaskAttemptFinishedEvent(historyEvents2, 0, 1).size());
}
/**
* Fine-grained recovery task-level, In a vertex (v1), task 0 is done task 1
* is not started. History flush happens. AM dies. Once AM is recovered, task 0 is
* not re-run. Task 1 is re-run. (SCATTER_GATHER)
*
* @throws Exception
*/
@Test(timeout = 120000)
public void testVertexPartiallyFinished_ScatterGather() throws Exception {
DAG dag =
createDAG("VertexPartiallyFinished_ScatterGather", ControlledShuffleVertexManager.class,
DataMovementType.SCATTER_GATHER, true);
TezCounters counters = runDAGAndVerify(dag, DAGStatus.State.SUCCEEDED);
assertEquals(4, counters.findCounter(DAGCounter.NUM_SUCCEEDED_TASKS).getValue());
assertEquals(2, counters.findCounter(TestCounter.Counter_1).getValue());
List<HistoryEvent> historyEvents1 = readRecoveryLog(1);
List<HistoryEvent> historyEvents2 = readRecoveryLog(2);
printHistoryEvents(historyEvents1, 1);
printHistoryEvents(historyEvents1, 2);
// task_0 of v1 is finished in attempt 1, task_1 of v1 is not finished in
// attempt 1
assertEquals(1, findTaskAttemptFinishedEvent(historyEvents1, 0, 0).size());
assertEquals(0, findTaskAttemptFinishedEvent(historyEvents1, 0, 1).size());
// task_0 of v1 is finished in attempt 1 and not rerun, task_1 of v1 is
// finished in attempt 2
assertEquals(1, findTaskAttemptFinishedEvent(historyEvents2, 0, 0).size());
assertEquals(1, findTaskAttemptFinishedEvent(historyEvents2, 0, 1).size());
}
/**
* Fine-grained recovery task-level, In a vertex (v1), task 0 is done task 1
* is also done. History flush happens. AM dies. Once AM is recovered, task 0
* and Task 1 is not re-run. (SCATTER_GATHER)
*
* @throws Exception
*/
@Test(timeout = 120000)
public void testVertexCompletelyFinished_ScatterGather() throws Exception {
DAG dag =
createDAG("VertexCompletelyFinished_ScatterGather", ControlledShuffleVertexManager.class,
DataMovementType.SCATTER_GATHER, false);
TezCounters counters = runDAGAndVerify(dag, DAGStatus.State.SUCCEEDED);
assertEquals(4, counters.findCounter(DAGCounter.NUM_SUCCEEDED_TASKS).getValue());
assertEquals(2, counters.findCounter(TestCounter.Counter_1).getValue());
TezCounter outputCounter = counters.findCounter(TestOutput.COUNTER_NAME, TestOutput.COUNTER_NAME);
TezCounter inputCounter = counters.findCounter(TestInput.COUNTER_NAME, TestInput.COUNTER_NAME);
// verify that processor, input and output counters, are all being collected
Assert.assertTrue(outputCounter.getValue() > 0);
Assert.assertTrue(inputCounter.getValue() > 0);
List<HistoryEvent> historyEvents1 = readRecoveryLog(1);
List<HistoryEvent> historyEvents2 = readRecoveryLog(2);
printHistoryEvents(historyEvents1, 1);
printHistoryEvents(historyEvents1, 2);
// task_0 of v1 is finished in attempt 1, task_1 of v1 is not finished in
// attempt 1
assertEquals(1, findTaskAttemptFinishedEvent(historyEvents1, 0, 0).size());
assertEquals(1, findTaskAttemptFinishedEvent(historyEvents1, 0, 1).size());
// task_0 of v1 is finished in attempt 1 and not rerun, task_1 of v1 is
// finished in attempt 2
assertEquals(1, findTaskAttemptFinishedEvent(historyEvents2, 0, 0).size());
assertEquals(1, findTaskAttemptFinishedEvent(historyEvents2, 0, 1).size());
}
/**
* Set AM max attempt to high number. Kill many attempts. Last AM can still be
* recovered with latest AM history data.
*
* @throws Exception
*/
@Test(timeout = 600000)
public void testHighMaxAttempt() throws Exception {
Random rand = new Random();
tezConf.set(FAIL_ON_ATTEMPT, rand.nextInt(MAX_AM_ATTEMPT) + "");
LOG.info("Set FAIL_ON_ATTEMPT=" + tezConf.get(FAIL_ON_ATTEMPT));
DAG dag =
createDAG("HighMaxAttempt", FailOnAttemptVertexManager.class,
DataMovementType.SCATTER_GATHER, false);
runDAGAndVerify(dag, DAGStatus.State.SUCCEEDED);
}
TezCounters runDAGAndVerify(DAG dag, DAGStatus.State finalState) throws Exception {
tezSession.waitTillReady();
DAGClient dagClient = tezSession.submitDAG(dag);
DAGStatus dagStatus =
dagClient.waitForCompletionWithStatusUpdates(EnumSet
.of(StatusGetOpts.GET_COUNTERS));
Assert.assertEquals(finalState, dagStatus.getState());
return dagStatus.getDAGCounters();
}
/**
* v1 --> v2 <br>
* v1 has a customized VM to control whether to schedule only one second task when it is partiallyFinished test case.
* v2 has a customized VM which could control when to kill AM
*
* @param vertexManagerClass
* @param dmType
* @param failOnParitialCompleted
* @return
* @throws IOException
*/
private DAG createDAG(String dagName, Class vertexManagerClass, DataMovementType dmType,
boolean failOnParitialCompleted) throws IOException {
if (failOnParitialCompleted) {
tezConf.set(FAIL_ON_PARTIAL_FINISHED, "true");
} else {
tezConf.set(FAIL_ON_PARTIAL_FINISHED, "false");
}
DAG dag = DAG.create(dagName);
UserPayload payload = UserPayload.create(null);
Vertex v1 = Vertex.create("v1", MyProcessor.getProcDesc(), 2);
v1.setVertexManagerPlugin(VertexManagerPluginDescriptor.create(
ScheduleControlledVertexManager.class.getName()).setUserPayload(
TezUtils.createUserPayloadFromConf(tezConf)));
Vertex v2 = Vertex.create("v2", DoNothingProcessor.getProcDesc(), 2);
v2.setVertexManagerPlugin(VertexManagerPluginDescriptor.create(
vertexManagerClass.getName()).setUserPayload(
TezUtils.createUserPayloadFromConf(tezConf)));
dag.addVertex(v1).addVertex(v2);
dag.addEdge(Edge.create(v1, v2, EdgeProperty.create(dmType,
DataSourceType.PERSISTED, SchedulingType.SEQUENTIAL,
TestOutput.getOutputDesc(payload), TestInput.getInputDesc(payload))));
return dag;
}
private List<TaskAttemptFinishedEvent> findTaskAttemptFinishedEvent(
List<HistoryEvent> historyEvents, int vertexId, int taskId) {
List<TaskAttemptFinishedEvent> resultEvents =
new ArrayList<TaskAttemptFinishedEvent>();
for (HistoryEvent historyEvent : historyEvents) {
if (historyEvent.getEventType() == HistoryEventType.TASK_ATTEMPT_FINISHED) {
TaskAttemptFinishedEvent taFinishedEvent =
(TaskAttemptFinishedEvent) historyEvent;
if (taFinishedEvent.getTaskAttemptID().getTaskID().getVertexID()
.getId() == vertexId
&& taFinishedEvent.getTaskAttemptID().getTaskID().getId() == taskId) {
resultEvents.add(taFinishedEvent);
}
}
}
return resultEvents;
}
private List<HistoryEvent> readRecoveryLog(int attemptNum) throws IOException {
ApplicationId appId = tezSession.getAppMasterApplicationId();
Path tezSystemStagingDir =
TezCommonUtils.getTezSystemStagingPath(tezConf, appId.toString());
Path recoveryDataDir =
TezCommonUtils.getRecoveryPath(tezSystemStagingDir, tezConf);
FileSystem fs = tezSystemStagingDir.getFileSystem(tezConf);
List<HistoryEvent> historyEvents = new ArrayList<HistoryEvent>();
for (int i=1; i <= attemptNum; ++i) {
Path currentAttemptRecoveryDataDir =
TezCommonUtils.getAttemptRecoveryPath(recoveryDataDir, i);
Path recoveryFilePath =
new Path(currentAttemptRecoveryDataDir, appId.toString().replace(
"application", "dag")
+ "_1" + TezConstants.DAG_RECOVERY_RECOVER_FILE_SUFFIX);
if (fs.exists(recoveryFilePath)) {
LOG.info("Read recovery file:" + recoveryFilePath);
historyEvents.addAll(RecoveryParser.parseDAGRecoveryFile(fs.open(recoveryFilePath)));
}
}
return historyEvents;
}
private void printHistoryEvents(List<HistoryEvent> historyEvents, int attemptId) {
LOG.info("RecoveryLogs from attempt:" + attemptId);
for(HistoryEvent historyEvent : historyEvents) {
LOG.info("Parsed event from recovery stream"
+ ", eventType=" + historyEvent.getEventType()
+ ", event=" + historyEvent);
}
LOG.info("");
}
public static class ControlledInputReadyVertexManager extends
InputReadyVertexManager {
private Configuration conf;
private int completedTaskNum = 0;
public ControlledInputReadyVertexManager(VertexManagerPluginContext context) {
super(context);
}
@Override
public void initialize() {
super.initialize();
try {
conf =
TezUtils.createConfFromUserPayload(getContext().getUserPayload());
} catch (IOException e) {
e.printStackTrace();
}
}
@Override
public void onSourceTaskCompleted(TaskAttemptIdentifier attempt) {
super.onSourceTaskCompleted(attempt);
completedTaskNum ++;
if (getContext().getDAGAttemptNumber() == 1) {
if (conf.getBoolean(FAIL_ON_PARTIAL_FINISHED, true)) {
if (completedTaskNum == 1) {
System.exit(-1);
}
} else {
if (completedTaskNum == getContext().
getVertexNumTasks(attempt.getTaskIdentifier().getVertexIdentifier().getName())) {
System.exit(-1);
}
}
}
}
}
public static class ControlledShuffleVertexManager extends
ShuffleVertexManager {
private Configuration conf;
private int completedTaskNum = 0;
public ControlledShuffleVertexManager(VertexManagerPluginContext context) {
super(context);
}
@Override
public void initialize() {
super.initialize();
try {
conf =
TezUtils.createConfFromUserPayload(getContext().getUserPayload());
} catch (IOException e) {
e.printStackTrace();
}
}
@Override
public void onSourceTaskCompleted(TaskAttemptIdentifier attempt) {
super.onSourceTaskCompleted(attempt);
completedTaskNum ++;
if (getContext().getDAGAttemptNumber() == 1) {
if (conf.getBoolean(FAIL_ON_PARTIAL_FINISHED, true)) {
if (completedTaskNum == 1) {
System.exit(-1);
}
} else {
if (completedTaskNum == getContext().
getVertexNumTasks(attempt.getTaskIdentifier().getVertexIdentifier().getName())) {
System.exit(-1);
}
}
}
}
}
public static class ControlledImmediateStartVertexManager extends
ImmediateStartVertexManager {
private Configuration conf;
private int completedTaskNum = 0;
public ControlledImmediateStartVertexManager(
VertexManagerPluginContext context) {
super(context);
}
@Override
public void initialize() {
super.initialize();
try {
conf =
TezUtils.createConfFromUserPayload(getContext().getUserPayload());
} catch (IOException e) {
e.printStackTrace();
}
}
@Override
public void onSourceTaskCompleted(TaskAttemptIdentifier attempt) {
super.onSourceTaskCompleted(attempt);
completedTaskNum ++;
if (getContext().getDAGAttemptNumber() == 1) {
if (conf.getBoolean(FAIL_ON_PARTIAL_FINISHED, true)) {
if (completedTaskNum == 1) {
System.exit(-1);
}
} else {
if (completedTaskNum == getContext().
getVertexNumTasks(attempt.getTaskIdentifier().getVertexIdentifier().getName())) {
System.exit(-1);
}
}
}
}
}
/**
* VertexManager which control schedule only one task when it is test case of partially-finished.
*
*/
public static class ScheduleControlledVertexManager extends VertexManagerPlugin {
private Configuration conf;
public ScheduleControlledVertexManager(VertexManagerPluginContext context) {
super(context);
}
@Override
public void initialize() {
try {
conf =
TezUtils.createConfFromUserPayload(getContext().getUserPayload());
} catch (IOException e) {
e.printStackTrace();
}
}
@Override
public void onVertexStarted(List<TaskAttemptIdentifier> completions)
throws Exception {
if (getContext().getDAGAttemptNumber() == 1) {
// only schedule one task if it is partiallyFinished case
if (conf.getBoolean(FAIL_ON_PARTIAL_FINISHED, true)) {
getContext().scheduleTasks(Lists.newArrayList(ScheduleTaskRequest.create(0, null)));
return ;
}
}
// schedule all tasks when it is not partiallyFinished
int taskNum = getContext().getVertexNumTasks(getContext().getVertexName());
List<ScheduleTaskRequest> taskWithLocationHints = new ArrayList<ScheduleTaskRequest>();
for (int i=0;i<taskNum;++i) {
taskWithLocationHints.add(ScheduleTaskRequest.create(i, null));
}
getContext().scheduleTasks(taskWithLocationHints);
}
@Override
public void onSourceTaskCompleted(TaskAttemptIdentifier attempt)
throws Exception {
}
@Override
public void onVertexManagerEventReceived(VertexManagerEvent vmEvent)
throws Exception {
}
@Override
public void onRootVertexInitialized(String inputName,
InputDescriptor inputDescriptor, List<Event> events) throws Exception {
}
}
/**
* VM which could control fail on attempt less than a specified number
*
*/
public static class FailOnAttemptVertexManager extends ShuffleVertexManager {
private Configuration conf;
public FailOnAttemptVertexManager(VertexManagerPluginContext context) {
super(context);
}
@Override
public void initialize() {
super.initialize();
try {
conf =
TezUtils.createConfFromUserPayload(getContext().getUserPayload());
} catch (IOException e) {
e.printStackTrace();
}
}
@Override
public void onSourceTaskCompleted(TaskAttemptIdentifier attempt) {
int curAttempt = getContext().getDAGAttemptNumber();
super.onSourceTaskCompleted(attempt);
int failOnAttempt = conf.getInt(FAIL_ON_ATTEMPT, 1);
LOG.info("failOnAttempt:" + failOnAttempt);
LOG.info("curAttempt:" + curAttempt);
if (curAttempt < failOnAttempt) {
System.exit(-1);
}
}
}
public static enum TestCounter {
Counter_1,
}
public static class MyProcessor extends SimpleProcessor {
public MyProcessor(ProcessorContext context) {
super(context);
}
@Override
public void run() throws Exception {
getContext().getCounters().findCounter(TestCounter.Counter_1).increment(1);
}
public static ProcessorDescriptor getProcDesc() {
return ProcessorDescriptor.create(MyProcessor.class.getName());
}
}
public static class DoNothingProcessor extends SimpleProcessor {
public DoNothingProcessor(ProcessorContext context) {
super(context);
}
@Override
public void run() throws Exception {
// Sleep 3 second in vertex2 to avoid that vertex2 completed
// before vertex2 get the SourceVertexTaskAttemptCompletedEvent.
// SourceVertexTaskAttemptCompletedEvent will been ingored if vertex in SUCCEEDED,
// so AM won't been killed in the VM of vertex2
Thread.sleep(3000);
}
public static ProcessorDescriptor getProcDesc() {
return ProcessorDescriptor.create(DoNothingProcessor.class.getName());
}
}
}