/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.tez.dag.app; import java.io.IOException; import java.util.concurrent.atomic.AtomicBoolean; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.tez.common.counters.TaskCounter; import org.apache.tez.dag.api.DAG; import org.apache.tez.dag.api.Edge; import org.apache.tez.dag.api.EdgeProperty; import org.apache.tez.dag.api.EdgeProperty.DataMovementType; import org.apache.tez.dag.api.EdgeProperty.DataSourceType; import org.apache.tez.dag.api.EdgeProperty.SchedulingType; import org.apache.tez.dag.api.InputDescriptor; import org.apache.tez.dag.api.OutputDescriptor; import org.apache.tez.dag.api.ProcessorDescriptor; import org.apache.tez.dag.api.TezConfiguration; import org.apache.tez.dag.api.Vertex; import org.apache.tez.dag.api.client.DAGClient; import org.apache.tez.dag.api.client.DAGStatus; import org.apache.tez.dag.app.MockDAGAppMaster.MockContainerLauncher; import org.apache.tez.dag.app.dag.Task; import org.apache.tez.dag.app.dag.TaskAttempt; import org.apache.tez.dag.app.dag.impl.DAGImpl; import org.apache.tez.dag.library.vertexmanager.ShuffleVertexManager; import org.apache.tez.dag.records.TaskAttemptTerminationCause; import org.apache.tez.dag.records.TezTaskAttemptID; import org.apache.tez.dag.records.TezTaskID; import org.apache.tez.dag.records.TezVertexID; import org.junit.Assert; import org.junit.Test; import com.google.common.base.Joiner; public class TestSpeculation { static Configuration defaultConf; static FileSystem localFs; MockDAGAppMaster mockApp; MockContainerLauncher mockLauncher; static { try { defaultConf = new Configuration(false); defaultConf.set("fs.defaultFS", "file:///"); defaultConf.setBoolean(TezConfiguration.TEZ_LOCAL_MODE, true); defaultConf.setBoolean(TezConfiguration.TEZ_AM_SPECULATION_ENABLED, true); defaultConf.setFloat(ShuffleVertexManager.TEZ_SHUFFLE_VERTEX_MANAGER_MIN_SRC_FRACTION, 1); defaultConf.setFloat(ShuffleVertexManager.TEZ_SHUFFLE_VERTEX_MANAGER_MAX_SRC_FRACTION, 1); localFs = FileSystem.getLocal(defaultConf); String stagingDir = "target" + Path.SEPARATOR + TestSpeculation.class.getName() + "-tmpDir"; defaultConf.set(TezConfiguration.TEZ_AM_STAGING_DIR, stagingDir); } catch (IOException e) { throw new RuntimeException("init failure", e); } } MockTezClient createTezSession() throws Exception { TezConfiguration tezconf = new TezConfiguration(defaultConf); AtomicBoolean mockAppLauncherGoFlag = new AtomicBoolean(false); MockTezClient tezClient = new MockTezClient("testspeculation", tezconf, true, null, null, new MockClock(), mockAppLauncherGoFlag, false, false, 1, 2); tezClient.start(); syncWithMockAppLauncher(false, mockAppLauncherGoFlag, tezClient); return tezClient; } void syncWithMockAppLauncher(boolean allowScheduling, AtomicBoolean mockAppLauncherGoFlag, MockTezClient tezClient) throws Exception { synchronized (mockAppLauncherGoFlag) { while (!mockAppLauncherGoFlag.get()) { mockAppLauncherGoFlag.wait(); } mockApp = tezClient.getLocalClient().getMockApp(); mockLauncher = mockApp.getContainerLauncher(); mockLauncher.startScheduling(allowScheduling); mockAppLauncherGoFlag.notify(); } } public void testBasicSpeculation(boolean withProgress) throws Exception { DAG dag = DAG.create("test"); Vertex vA = Vertex.create("A", ProcessorDescriptor.create("Proc.class"), 5); dag.addVertex(vA); MockTezClient tezClient = createTezSession(); DAGClient dagClient = tezClient.submitDAG(dag); DAGImpl dagImpl = (DAGImpl) mockApp.getContext().getCurrentDAG(); TezVertexID vertexId = TezVertexID.getInstance(dagImpl.getID(), 0); // original attempt is killed and speculative one is successful TezTaskAttemptID killedTaId = TezTaskAttemptID.getInstance(TezTaskID.getInstance(vertexId, 0), 0); TezTaskAttemptID successTaId = TezTaskAttemptID.getInstance(TezTaskID.getInstance(vertexId, 0), 1); mockLauncher.updateProgress(withProgress); // cause speculation trigger mockLauncher.setStatusUpdatesForTask(killedTaId, 100); mockLauncher.startScheduling(true); dagClient.waitForCompletion(); Assert.assertEquals(DAGStatus.State.SUCCEEDED, dagClient.getDAGStatus(null).getState()); Task task = dagImpl.getTask(killedTaId.getTaskID()); Assert.assertEquals(2, task.getAttempts().size()); Assert.assertEquals(successTaId, task.getSuccessfulAttempt().getID()); TaskAttempt killedAttempt = task.getAttempt(killedTaId); Joiner.on(",").join(killedAttempt.getDiagnostics()).contains("Killed as speculative attempt"); Assert.assertEquals(TaskAttemptTerminationCause.TERMINATED_EFFECTIVE_SPECULATION, killedAttempt.getTerminationCause()); if (withProgress) { // without progress updates occasionally more than 1 task speculates Assert.assertEquals(1, task.getCounters().findCounter(TaskCounter.NUM_SPECULATIONS) .getValue()); Assert.assertEquals(1, dagImpl.getAllCounters().findCounter(TaskCounter.NUM_SPECULATIONS) .getValue()); org.apache.tez.dag.app.dag.Vertex v = dagImpl.getVertex(killedTaId.getTaskID().getVertexID()); Assert.assertEquals(1, v.getAllCounters().findCounter(TaskCounter.NUM_SPECULATIONS) .getValue()); } tezClient.stop(); } @Test (timeout=10000) public void testBasicSpeculationWithProgress() throws Exception { testBasicSpeculation(true); } @Test (timeout=10000) public void testBasicSpeculationWithoutProgress() throws Exception { testBasicSpeculation(false); } @Test (timeout=10000) public void testBasicSpeculationPerVertexConf() throws Exception { DAG dag = DAG.create("test"); String vNameNoSpec = "A"; String vNameSpec = "B"; Vertex vA = Vertex.create(vNameNoSpec, ProcessorDescriptor.create("Proc.class"), 5); Vertex vB = Vertex.create(vNameSpec, ProcessorDescriptor.create("Proc.class"), 5); vA.setConf(TezConfiguration.TEZ_AM_SPECULATION_ENABLED, "false"); dag.addVertex(vA); dag.addVertex(vB); // min/max src fraction is set to 1. So vertices will run sequentially dag.addEdge( Edge.create(vA, vB, EdgeProperty.create(DataMovementType.SCATTER_GATHER, DataSourceType.PERSISTED, SchedulingType.SEQUENTIAL, OutputDescriptor.create("O"), InputDescriptor.create("I")))); MockTezClient tezClient = createTezSession(); DAGClient dagClient = tezClient.submitDAG(dag); DAGImpl dagImpl = (DAGImpl) mockApp.getContext().getCurrentDAG(); TezVertexID vertexId = dagImpl.getVertex(vNameSpec).getVertexId(); TezVertexID vertexIdNoSpec = dagImpl.getVertex(vNameNoSpec).getVertexId(); // original attempt is killed and speculative one is successful TezTaskAttemptID killedTaId = TezTaskAttemptID.getInstance(TezTaskID.getInstance(vertexId, 0), 0); TezTaskAttemptID noSpecTaId = TezTaskAttemptID .getInstance(TezTaskID.getInstance(vertexIdNoSpec, 0), 0); // cause speculation trigger for both mockLauncher.setStatusUpdatesForTask(killedTaId, 100); mockLauncher.setStatusUpdatesForTask(noSpecTaId, 100); mockLauncher.startScheduling(true); dagClient.waitForCompletion(); Assert.assertEquals(DAGStatus.State.SUCCEEDED, dagClient.getDAGStatus(null).getState()); org.apache.tez.dag.app.dag.Vertex vSpec = dagImpl.getVertex(vertexId); org.apache.tez.dag.app.dag.Vertex vNoSpec = dagImpl.getVertex(vertexIdNoSpec); // speculation for vA but not for vB Assert.assertTrue(vSpec.getAllCounters().findCounter(TaskCounter.NUM_SPECULATIONS) .getValue() > 0); Assert.assertEquals(0, vNoSpec.getAllCounters().findCounter(TaskCounter.NUM_SPECULATIONS) .getValue()); tezClient.stop(); } @Test (timeout=10000) public void testBasicSpeculationNotUseful() throws Exception { DAG dag = DAG.create("test"); Vertex vA = Vertex.create("A", ProcessorDescriptor.create("Proc.class"), 5); dag.addVertex(vA); MockTezClient tezClient = createTezSession(); DAGClient dagClient = tezClient.submitDAG(dag); DAGImpl dagImpl = (DAGImpl) mockApp.getContext().getCurrentDAG(); TezVertexID vertexId = TezVertexID.getInstance(dagImpl.getID(), 0); // original attempt is successful and speculative one is killed TezTaskAttemptID successTaId = TezTaskAttemptID.getInstance(TezTaskID.getInstance(vertexId, 0), 0); TezTaskAttemptID killedTaId = TezTaskAttemptID.getInstance(TezTaskID.getInstance(vertexId, 0), 1); mockLauncher.setStatusUpdatesForTask(successTaId, 100); mockLauncher.setStatusUpdatesForTask(killedTaId, 100); mockLauncher.startScheduling(true); dagClient.waitForCompletion(); Assert.assertEquals(DAGStatus.State.SUCCEEDED, dagClient.getDAGStatus(null).getState()); Task task = dagImpl.getTask(killedTaId.getTaskID()); Assert.assertEquals(2, task.getAttempts().size()); Assert.assertEquals(successTaId, task.getSuccessfulAttempt().getID()); TaskAttempt killedAttempt = task.getAttempt(killedTaId); Joiner.on(",").join(killedAttempt.getDiagnostics()).contains("Killed speculative attempt as"); Assert.assertEquals(TaskAttemptTerminationCause.TERMINATED_INEFFECTIVE_SPECULATION, killedAttempt.getTerminationCause()); Assert.assertEquals(1, task.getCounters().findCounter(TaskCounter.NUM_SPECULATIONS) .getValue()); Assert.assertEquals(1, dagImpl.getAllCounters().findCounter(TaskCounter.NUM_SPECULATIONS) .getValue()); org.apache.tez.dag.app.dag.Vertex v = dagImpl.getVertex(killedTaId.getTaskID().getVertexID()); Assert.assertEquals(1, v.getAllCounters().findCounter(TaskCounter.NUM_SPECULATIONS) .getValue()); tezClient.stop(); } }