/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.tez.mapreduce.output; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertNull; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.when; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter; import org.apache.hadoop.mapreduce.lib.output.LazyOutputFormat; import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat; import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat; import org.apache.hadoop.yarn.api.records.ApplicationId; import org.apache.tez.common.counters.TezCounters; import org.apache.tez.dag.api.DataSinkDescriptor; import org.apache.tez.dag.api.UserPayload; import org.apache.tez.mapreduce.hadoop.MRConfig; import org.apache.tez.runtime.api.OutputContext; import org.apache.tez.runtime.api.OutputStatisticsReporter; import org.junit.Assert; import org.junit.Test; import java.io.IOException; public class TestMultiMROutput { @Test(timeout = 5000) public void testNewAPI_TextOutputFormat() throws Exception { validate(true, TextOutputFormat.class, true, FileOutputCommitter.class, false); } @Test(timeout = 5000) public void testOldAPI_TextOutputFormat() throws Exception { validate(false, org.apache.hadoop.mapred.TextOutputFormat.class, false, org.apache.hadoop.mapred.FileOutputCommitter.class, false); } @Test(timeout = 5000) public void testNewAPI_SequenceFileOutputFormat() throws Exception { validate(true, SequenceFileOutputFormat.class, false, FileOutputCommitter.class, false); } @Test(timeout = 5000) public void testOldAPI_SequenceFileOutputFormat() throws Exception { validate(false, org.apache.hadoop.mapred.SequenceFileOutputFormat.class, false, org.apache.hadoop.mapred.FileOutputCommitter.class, false); } @Test(timeout = 5000) public void testNewAPI_LazySequenceFileOutputFormat() throws Exception { validate(true, SequenceFileOutputFormat.class, false, FileOutputCommitter.class, true); } @Test(timeout = 5000) public void testOldAPI_LazySequenceFileOutputFormat() throws Exception { validate(false, org.apache.hadoop.mapred.SequenceFileOutputFormat.class, false, org.apache.hadoop.mapred.FileOutputCommitter.class, true); } @Test(timeout = 5000) public void testNewAPI_LazyTextOutputFormat() throws Exception { validate(true, TextOutputFormat.class, false, FileOutputCommitter.class, true); } @Test(timeout = 5000) public void testOldAPI_LazyTextOutputFormat() throws Exception { validate(false, org.apache.hadoop.mapred.TextOutputFormat.class, false, org.apache.hadoop.mapred.FileOutputCommitter.class, true); } @Test(timeout = 5000) public void testInvalidBasePath() throws Exception { MultiMROutput outputs = createMROutputs(SequenceFileOutputFormat.class, false, true); try { outputs.getWriter().write(new Text(Integer.toString(0)), new Text("foo"), "/tmp"); Assert.assertTrue(false); // should not come here } catch (UnsupportedOperationException uoe) { } } private OutputContext createMockOutputContext(UserPayload payload) { OutputContext outputContext = mock(OutputContext.class); ApplicationId appId = ApplicationId.newInstance(System.currentTimeMillis(), 1); when(outputContext.getUserPayload()).thenReturn(payload); when(outputContext.getApplicationId()).thenReturn(appId); when(outputContext.getTaskVertexIndex()).thenReturn(1); when(outputContext.getTaskAttemptNumber()).thenReturn(1); when(outputContext.getCounters()).thenReturn(new TezCounters()); when(outputContext.getStatisticsReporter()).thenReturn( mock(OutputStatisticsReporter.class)); return outputContext; } private void validate(boolean expectedUseNewAPIValue, Class outputFormat, boolean isMapper, Class committerClass, boolean useLazyOutputFormat) throws InterruptedException, IOException { MultiMROutput output = createMROutputs(outputFormat, isMapper, useLazyOutputFormat); assertEquals(isMapper, output.isMapperOutput); assertEquals(expectedUseNewAPIValue, output.useNewApi); if (expectedUseNewAPIValue) { if (useLazyOutputFormat) { assertEquals(LazyOutputFormat.class, output.newOutputFormat.getClass()); } else { assertEquals(outputFormat, output.newOutputFormat.getClass()); } assertNotNull(output.newApiTaskAttemptContext); assertNull(output.oldOutputFormat); assertEquals(Text.class, output.newApiTaskAttemptContext.getOutputValueClass()); assertEquals(Text.class, output.newApiTaskAttemptContext.getOutputKeyClass()); assertNull(output.oldApiTaskAttemptContext); assertNotNull(output.newRecordWriters); assertNull(output.oldRecordWriters); } else { if (!useLazyOutputFormat) { assertEquals(outputFormat, output.oldOutputFormat.getClass()); } else { assertEquals(org.apache.hadoop.mapred.lib.LazyOutputFormat.class, output.oldOutputFormat.getClass()); } assertNull(output.newOutputFormat); assertNotNull(output.oldApiTaskAttemptContext); assertNull(output.newApiTaskAttemptContext); assertEquals(Text.class, output.oldApiTaskAttemptContext.getOutputValueClass()); assertEquals(Text.class, output.oldApiTaskAttemptContext.getOutputKeyClass()); assertNotNull(output.oldRecordWriters); assertNull(output.newRecordWriters); } assertEquals(committerClass, output.committer.getClass()); int numOfUniqueKeys = 3; for (int i=0; i<numOfUniqueKeys; i++) { output.getWriter().write(new Text(Integer.toString(i)), new Text("foo"), Integer.toString(i)); } output.close(); if (expectedUseNewAPIValue) { assertEquals(numOfUniqueKeys, output.newRecordWriters.size()); } else { assertEquals(numOfUniqueKeys, output.oldRecordWriters.size()); } } private MultiMROutput createMROutputs(Class outputFormat, boolean isMapper, boolean useLazyOutputFormat) throws InterruptedException, IOException { String outputPath = "/tmp/output"; JobConf conf = new JobConf(); conf.setBoolean(MRConfig.IS_MAP_PROCESSOR, isMapper); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(Text.class); DataSinkDescriptor dataSink = MultiMROutput.createConfigBuilder( conf, outputFormat, outputPath, useLazyOutputFormat).build(); OutputContext outputContext = createMockOutputContext( dataSink.getOutputDescriptor().getUserPayload()); MultiMROutput output = new MultiMROutput(outputContext, 2); output.initialize(); return output; } }