/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.streaming;
import java.io.DataOutputStream;
import java.io.IOException;
import java.io.File;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;
import static org.junit.Assert.*;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.mapred.Counters;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.MiniMRCluster;
import org.apache.hadoop.mapred.TaskAttemptID;
import org.apache.hadoop.mapred.TaskID;
import org.apache.hadoop.mapred.TaskLog;
import org.apache.hadoop.mapred.TaskReport;
import org.apache.hadoop.mapreduce.MRJobConfig;
import org.apache.hadoop.mapreduce.TaskType;
import org.apache.hadoop.mapreduce.MapReduceTestUtil;
import org.apache.hadoop.mapreduce.server.jobtracker.JTConfig;
/**
* Tests if mapper/reducer with empty/nonempty input works properly if
* reporting is done using lines like "reporter:status:" and
* "reporter:counter:" before map()/reduce() method is called.
* Validates the task's log of STDERR if messages are written to stderr before
* map()/reduce() is called.
* Also validates job output.
* Uses MiniMR since the local jobtracker doesn't track task status.
*/
public class TestStreamingStatus {
protected static String TEST_ROOT_DIR =
new File(System.getProperty("test.build.data","/tmp"),
TestStreamingStatus.class.getSimpleName())
.toURI().toString().replace(' ', '+');
protected String INPUT_FILE = TEST_ROOT_DIR + "/input.txt";
protected String OUTPUT_DIR = TEST_ROOT_DIR + "/out";
protected String input = "roses.are.red\nviolets.are.blue\nbunnies.are.pink\n";
protected String map = null;
protected String reduce = null;
protected String scriptFile = TEST_ROOT_DIR + "/perlScript.pl";
protected String scriptFileName = new Path(scriptFile).toUri().getPath();
String expectedStderr = "my error msg before consuming input\n" +
"my error msg after consuming input\n";
String expectedOutput = null;// inited in setUp()
String expectedStatus = "before consuming input";
// This script does the following
// (a) setting task status before reading input
// (b) writing to stderr before reading input and after reading input
// (c) writing to stdout before reading input
// (d) incrementing user counter before reading input and after reading input
// Write lines to stdout before reading input{(c) above} is to validate
// the hanging task issue when input to task is empty(because of not starting
// output thread).
protected String script =
"#!/usr/bin/perl\n" +
"print STDERR \"reporter:status:" + expectedStatus + "\\n\";\n" +
"print STDERR \"reporter:counter:myOwnCounterGroup,myOwnCounter,1\\n\";\n" +
"print STDERR \"my error msg before consuming input\\n\";\n" +
"for($count = 1500; $count >= 1; $count--) {print STDOUT \"$count \";}" +
"while(<STDIN>) {chomp;}\n" +
"print STDERR \"my error msg after consuming input\\n\";\n" +
"print STDERR \"reporter:counter:myOwnCounterGroup,myOwnCounter,1\\n\";\n";
MiniMRCluster mr = null;
FileSystem fs = null;
JobConf conf = null;
/**
* Start the cluster and create input file before running the actual test.
*
* @throws IOException
*/
@Before
public void setUp() throws IOException {
conf = new JobConf();
conf.setBoolean(JTConfig.JT_RETIREJOBS, false);
conf.setBoolean(JTConfig.JT_PERSIST_JOBSTATUS, false);
mr = new MiniMRCluster(1, "file:///", 3, null , null, conf);
Path inFile = new Path(INPUT_FILE);
fs = inFile.getFileSystem(mr.createJobConf());
clean(fs);
buildExpectedJobOutput();
}
/**
* Kill the cluster after the test is done.
*/
@After
public void tearDown() {
if (fs != null) { clean(fs); }
if (mr != null) { mr.shutdown(); }
}
// Updates expectedOutput to have the expected job output as a string
void buildExpectedJobOutput() {
if (expectedOutput == null) {
expectedOutput = "";
for(int i = 1500; i >= 1; i--) {
expectedOutput = expectedOutput.concat(Integer.toString(i) + " ");
}
expectedOutput = expectedOutput.trim();
}
}
// Create empty/nonempty input file.
// Create script file with the specified content.
protected void createInputAndScript(boolean isEmptyInput,
String script) throws IOException {
makeInput(fs, isEmptyInput ? "" : input);
// create script file
DataOutputStream file = fs.create(new Path(scriptFileName));
file.writeBytes(script);
file.close();
}
protected String[] genArgs(String jobtracker, String mapper, String reducer)
{
return new String[] {
"-input", INPUT_FILE,
"-output", OUTPUT_DIR,
"-mapper", mapper,
"-reducer", reducer,
"-jobconf", MRJobConfig.NUM_MAPS + "=1",
"-jobconf", MRJobConfig.NUM_REDUCES + "=1",
"-jobconf", MRJobConfig.PRESERVE_FAILED_TASK_FILES + "=true",
"-jobconf", "stream.tmpdir=" + new Path(TEST_ROOT_DIR).toUri().getPath(),
"-jobconf", JTConfig.JT_IPC_ADDRESS + "="+jobtracker,
"-jobconf", "fs.default.name=file:///",
"-jobconf", "mapred.jar=" + TestStreaming.STREAMING_JAR,
"-jobconf", "mapreduce.framework.name=yarn"
};
}
// create input file with the given content
public void makeInput(FileSystem fs, String input) throws IOException {
Path inFile = new Path(INPUT_FILE);
DataOutputStream file = fs.create(inFile);
file.writeBytes(input);
file.close();
}
// Delete output directory
protected void deleteOutDir(FileSystem fs) {
try {
Path outDir = new Path(OUTPUT_DIR);
fs.delete(outDir, true);
} catch (Exception e) {}
}
// Delete input file, script file and output directory
public void clean(FileSystem fs) {
deleteOutDir(fs);
try {
Path file = new Path(INPUT_FILE);
if (fs.exists(file)) {
fs.delete(file, false);
}
file = new Path(scriptFile);
if (fs.exists(file)) {
fs.delete(file, false);
}
} catch (Exception e) {
e.printStackTrace();
}
}
/**
* Check if mapper/reducer with empty/nonempty input works properly if
* reporting is done using lines like "reporter:status:" and
* "reporter:counter:" before map()/reduce() method is called.
* Validate the task's log of STDERR if messages are written
* to stderr before map()/reduce() is called.
* Also validate job output.
*
* @throws IOException
*/
@Test
public void testReporting() throws Exception {
testStreamJob(false);// nonempty input
testStreamJob(true);// empty input
}
/**
* Run a streaming job with the given script as mapper and validate.
* Run another streaming job with the given script as reducer and validate.
*
* @param isEmptyInput Should the input to the script be empty ?
* @param script The content of the script that will run as the streaming task
*/
private void testStreamJob(boolean isEmptyInput)
throws IOException {
createInputAndScript(isEmptyInput, script);
// Check if streaming mapper works as expected
map = scriptFileName;
reduce = "/bin/cat";
runStreamJob(TaskType.MAP, isEmptyInput);
deleteOutDir(fs);
// Check if streaming reducer works as expected.
map = "/bin/cat";
reduce = scriptFileName;
runStreamJob(TaskType.REDUCE, isEmptyInput);
clean(fs);
}
// Run streaming job for the specified input file, mapper and reducer and
// (1) Validate if the job succeeds.
// (2) Validate if user counter is incremented properly for the cases of
// (a) nonempty input to map
// (b) empty input to map and
// (c) nonempty input to reduce
// (3) Validate task status for the cases of (2)(a),(2)(b),(2)(c).
// Because empty input to reduce task => reporter is dummy and ignores
// all "reporter:status" and "reporter:counter" lines.
// (4) Validate stderr of task of given task type.
// (5) Validate job output
void runStreamJob(TaskType type, boolean isEmptyInput) throws IOException {
boolean mayExit = false;
StreamJob job = new StreamJob(genArgs(
mr.createJobConf().get(JTConfig.JT_IPC_ADDRESS), map, reduce), mayExit);
int returnValue = job.go();
assertEquals(0, returnValue);
// If input to reducer is empty, dummy reporter(which ignores all
// reporting lines) is set for MRErrorThread in waitOutputThreads(). So
// expectedCounterValue is 0 for empty-input-to-reducer case.
// Output of reducer is also empty for empty-input-to-reducer case.
int expectedCounterValue = 0;
if (type == TaskType.MAP || !isEmptyInput) {
validateTaskStatus(job, type);
// output is from "print STDOUT" statements in perl script
validateJobOutput(job.getConf());
expectedCounterValue = 2;
}
validateUserCounter(job, expectedCounterValue);
validateTaskStderr(job, type);
deleteOutDir(fs);
}
// validate task status of task of given type(validates 1st task of that type)
void validateTaskStatus(StreamJob job, TaskType type) throws IOException {
// Map Task has 2 phases: map, sort
// Reduce Task has 3 phases: copy, sort, reduce
String finalPhaseInTask;
TaskReport[] reports;
if (type == TaskType.MAP) {
reports = job.jc_.getMapTaskReports(job.jobId_);
finalPhaseInTask = "sort";
} else {// reduce task
reports = job.jc_.getReduceTaskReports(job.jobId_);
finalPhaseInTask = "reduce";
}
assertEquals(1, reports.length);
assertEquals(expectedStatus + " > " + finalPhaseInTask,
reports[0].getState());
}
// Validate the job output
void validateJobOutput(Configuration conf)
throws IOException {
String output = MapReduceTestUtil.readOutput(
new Path(OUTPUT_DIR), conf).trim();
assertTrue(output.equals(expectedOutput));
}
// Validate stderr task log of given task type(validates 1st
// task of that type).
void validateTaskStderr(StreamJob job, TaskType type)
throws IOException {
TaskAttemptID attemptId =
new TaskAttemptID(new TaskID(job.jobId_, type, 0), 0);
String log = MapReduceTestUtil.readTaskLog(TaskLog.LogName.STDERR,
attemptId, false);
// trim() is called on expectedStderr here because the method
// MapReduceTestUtil.readTaskLog() returns trimmed String.
assertTrue(log.equals(expectedStderr.trim()));
}
// Validate if user counter is incremented properly
void validateUserCounter(StreamJob job, int expectedCounterValue)
throws IOException {
Counters counters = job.running_.getCounters();
assertEquals(expectedCounterValue, counters.findCounter(
"myOwnCounterGroup", "myOwnCounter").getValue());
}
}