/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.tez.mapreduce.output; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertNull; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.when; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.NullWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter; import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat; import org.apache.hadoop.yarn.api.records.ApplicationId; import org.apache.tez.common.TezUtils; import org.apache.tez.common.counters.TezCounters; import org.apache.tez.dag.api.DataSinkDescriptor; import org.apache.tez.dag.api.OutputCommitterDescriptor; import org.apache.tez.dag.api.OutputDescriptor; import org.apache.tez.dag.api.UserPayload; import org.apache.tez.mapreduce.committer.MROutputCommitter; import org.apache.tez.mapreduce.hadoop.MRConfig; import org.apache.tez.runtime.api.OutputContext; import org.junit.Test; public class TestMROutputLegacy { // simulate the behavior of translating MR to DAG using MR old API @Test (timeout = 5000) public void testOldAPI_MR() throws Exception { String outputPath = "/tmp/output"; JobConf conf = new JobConf(); conf.setOutputKeyClass(NullWritable.class); conf.setOutputValueClass(Text.class); conf.setOutputFormat(org.apache.hadoop.mapred.SequenceFileOutputFormat.class); org.apache.hadoop.mapred.SequenceFileOutputFormat.setOutputPath(conf, new Path(outputPath)); // the output is attached to reducer conf.setBoolean(MRConfig.IS_MAP_PROCESSOR, false); UserPayload vertexPayload = TezUtils.createUserPayloadFromConf(conf); OutputDescriptor od = OutputDescriptor.create(MROutputLegacy.class.getName()) .setUserPayload(vertexPayload); DataSinkDescriptor sink = DataSinkDescriptor.create(od, OutputCommitterDescriptor.create(MROutputCommitter.class.getName()), null); OutputContext outputContext = createMockOutputContext(sink.getOutputDescriptor().getUserPayload()); MROutputLegacy output = new MROutputLegacy(outputContext, 2); output.initialize(); assertEquals(false, output.useNewApi); assertEquals(org.apache.hadoop.mapred.SequenceFileOutputFormat.class, output.oldOutputFormat.getClass()); assertNull(output.newOutputFormat); assertEquals(NullWritable.class, output.oldApiTaskAttemptContext.getOutputKeyClass()); assertEquals(Text.class, output.oldApiTaskAttemptContext.getOutputValueClass()); assertNull(output.newApiTaskAttemptContext); assertNotNull(output.oldRecordWriter); assertNull(output.newRecordWriter); assertEquals(org.apache.hadoop.mapred.FileOutputCommitter.class, output.committer.getClass()); } // simulate the behavior of translating MR to DAG using MR new API @Test (timeout = 5000) public void testNewAPI_MR() throws Exception { String outputPath = "/tmp/output"; Job job = Job.getInstance(); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(Text.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); SequenceFileOutputFormat.setOutputPath(job, new Path(outputPath)); job.getConfiguration().setBoolean("mapred.reducer.new-api", true); // the output is attached to reducer job.getConfiguration().setBoolean(MRConfig.IS_MAP_PROCESSOR, false); UserPayload vertexPayload = TezUtils.createUserPayloadFromConf(job.getConfiguration()); OutputDescriptor od = OutputDescriptor.create(MROutputLegacy.class.getName()) .setUserPayload(vertexPayload); DataSinkDescriptor sink = DataSinkDescriptor.create(od, OutputCommitterDescriptor.create(MROutputCommitter.class.getName()), null); OutputContext outputContext = createMockOutputContext(sink.getOutputDescriptor().getUserPayload()); MROutputLegacy output = new MROutputLegacy(outputContext, 2); output.initialize(); assertEquals(true, output.useNewApi); assertEquals(SequenceFileOutputFormat.class, output.newOutputFormat.getClass()); assertNull(output.oldOutputFormat); assertEquals(NullWritable.class, output.newApiTaskAttemptContext.getOutputKeyClass()); assertEquals(Text.class, output.newApiTaskAttemptContext.getOutputValueClass()); assertNull(output.oldApiTaskAttemptContext); assertNotNull(output.newRecordWriter); assertNull(output.oldRecordWriter); assertEquals(FileOutputCommitter.class, output.committer.getClass()); } // simulate the behavior of translating Mapper-only job to DAG using MR old API @Test (timeout = 5000) public void testOldAPI_MapperOnly() throws Exception { String outputPath = "/tmp/output"; JobConf conf = new JobConf(); conf.setOutputKeyClass(NullWritable.class); conf.setOutputValueClass(Text.class); conf.setOutputFormat(org.apache.hadoop.mapred.SequenceFileOutputFormat.class); org.apache.hadoop.mapred.SequenceFileOutputFormat.setOutputPath(conf, new Path(outputPath)); // the output is attached to mapper conf.setBoolean(MRConfig.IS_MAP_PROCESSOR, true); UserPayload vertexPayload = TezUtils.createUserPayloadFromConf(conf); OutputDescriptor od = OutputDescriptor.create(MROutputLegacy.class.getName()) .setUserPayload(vertexPayload); DataSinkDescriptor sink = DataSinkDescriptor.create(od, OutputCommitterDescriptor.create(MROutputCommitter.class.getName()), null); OutputContext outputContext = createMockOutputContext(sink.getOutputDescriptor().getUserPayload()); MROutputLegacy output = new MROutputLegacy(outputContext, 2); output.initialize(); assertEquals(false, output.useNewApi); assertEquals(org.apache.hadoop.mapred.SequenceFileOutputFormat.class, output.oldOutputFormat.getClass()); assertNull(output.newOutputFormat); assertEquals(NullWritable.class, output.oldApiTaskAttemptContext.getOutputKeyClass()); assertEquals(Text.class, output.oldApiTaskAttemptContext.getOutputValueClass()); assertNull(output.newApiTaskAttemptContext); assertNotNull(output.oldRecordWriter); assertNull(output.newRecordWriter); assertEquals(org.apache.hadoop.mapred.FileOutputCommitter.class, output.committer.getClass()); } //simulate the behavior of translating mapper-only job to DAG using MR new API @Test (timeout = 5000) public void testNewAPI_MapperOnly() throws Exception { String outputPath = "/tmp/output"; Job job = Job.getInstance(); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(Text.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); SequenceFileOutputFormat.setOutputPath(job, new Path(outputPath)); job.getConfiguration().setBoolean("mapred.mapper.new-api", true); // the output is attached to mapper job.getConfiguration().setBoolean(MRConfig.IS_MAP_PROCESSOR, true); UserPayload vertexPayload = TezUtils.createUserPayloadFromConf(job.getConfiguration()); OutputDescriptor od = OutputDescriptor.create(MROutputLegacy.class.getName()) .setUserPayload(vertexPayload); DataSinkDescriptor sink = DataSinkDescriptor.create(od, OutputCommitterDescriptor.create(MROutputCommitter.class.getName()), null); OutputContext outputContext = createMockOutputContext(sink.getOutputDescriptor().getUserPayload()); MROutputLegacy output = new MROutputLegacy(outputContext, 2); output.initialize(); assertEquals(true, output.useNewApi); assertEquals(SequenceFileOutputFormat.class, output.newOutputFormat.getClass()); assertNull(output.oldOutputFormat); assertEquals(NullWritable.class, output.newApiTaskAttemptContext.getOutputKeyClass()); assertEquals(Text.class, output.newApiTaskAttemptContext.getOutputValueClass()); assertNull(output.oldApiTaskAttemptContext); assertNotNull(output.newRecordWriter); assertNull(output.oldRecordWriter); assertEquals(FileOutputCommitter.class, output.committer.getClass()); } private OutputContext createMockOutputContext(UserPayload payload) { OutputContext outputContext = mock(OutputContext.class); ApplicationId appId = ApplicationId.newInstance(System.currentTimeMillis(), 1); when(outputContext.getUserPayload()).thenReturn(payload); when(outputContext.getApplicationId()).thenReturn(appId); when(outputContext.getTaskVertexIndex()).thenReturn(1); when(outputContext.getTaskAttemptNumber()).thenReturn(1); when(outputContext.getCounters()).thenReturn(new TezCounters()); return outputContext; } }