/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.tools.mapred;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.permission.FsPermission;
import org.apache.hadoop.hdfs.MiniDFSCluster;
import org.apache.hadoop.mapreduce.*;
import org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl;
import org.apache.hadoop.mapreduce.task.JobContextImpl;
import org.apache.hadoop.mapreduce.lib.output.NullOutputFormat;
import org.apache.hadoop.tools.CopyListing;
import org.apache.hadoop.tools.DistCpConstants;
import org.apache.hadoop.tools.DistCpOptions;
import org.apache.hadoop.tools.DistCpOptions.FileAttribute;
import org.apache.hadoop.tools.GlobbedCopyListing;
import org.apache.hadoop.tools.util.TestDistCpUtils;
import org.apache.hadoop.security.Credentials;
import org.junit.*;
import java.io.IOException;
import java.util.*;
public class TestCopyCommitter {
private static final Log LOG = LogFactory.getLog(TestCopyCommitter.class);
private static final Random rand = new Random();
private static final Credentials CREDENTIALS = new Credentials();
public static final int PORT = 39737;
private static Configuration config;
private static MiniDFSCluster cluster;
private static Job getJobForClient() throws IOException {
Job job = Job.getInstance(new Configuration());
job.getConfiguration().set("mapred.job.tracker", "localhost:" + PORT);
job.setInputFormatClass(NullInputFormat.class);
job.setOutputFormatClass(NullOutputFormat.class);
job.setNumReduceTasks(0);
return job;
}
@BeforeClass
public static void create() throws IOException {
config = getJobForClient().getConfiguration();
config.setLong(DistCpConstants.CONF_LABEL_TOTAL_BYTES_TO_BE_COPIED, 0);
cluster = new MiniDFSCluster.Builder(config).numDataNodes(1).format(true)
.build();
}
@AfterClass
public static void destroy() {
if (cluster != null) {
cluster.shutdown();
}
}
@Before
public void createMetaFolder() {
config.set(DistCpConstants.CONF_LABEL_META_FOLDER, "/meta");
Path meta = new Path("/meta");
try {
cluster.getFileSystem().mkdirs(meta);
} catch (IOException e) {
LOG.error("Exception encountered while creating meta folder", e);
Assert.fail("Unable to create meta folder");
}
}
@After
public void cleanupMetaFolder() {
Path meta = new Path("/meta");
try {
if (cluster.getFileSystem().exists(meta)) {
cluster.getFileSystem().delete(meta, true);
Assert.fail("Expected meta folder to be deleted");
}
} catch (IOException e) {
LOG.error("Exception encountered while cleaning up folder", e);
Assert.fail("Unable to clean up meta folder");
}
}
@Test
public void testNoCommitAction() {
TaskAttemptContext taskAttemptContext = getTaskAttemptContext(config);
JobContext jobContext = new JobContextImpl(taskAttemptContext.getConfiguration(),
taskAttemptContext.getTaskAttemptID().getJobID());
try {
OutputCommitter committer = new CopyCommitter(null, taskAttemptContext);
committer.commitJob(jobContext);
Assert.assertEquals(taskAttemptContext.getStatus(), "Commit Successful");
//Test for idempotent commit
committer.commitJob(jobContext);
Assert.assertEquals(taskAttemptContext.getStatus(), "Commit Successful");
} catch (IOException e) {
LOG.error("Exception encountered ", e);
Assert.fail("Commit failed");
}
}
@Test
public void testPreserveStatus() {
TaskAttemptContext taskAttemptContext = getTaskAttemptContext(config);
JobContext jobContext = new JobContextImpl(taskAttemptContext.getConfiguration(),
taskAttemptContext.getTaskAttemptID().getJobID());
Configuration conf = jobContext.getConfiguration();
String sourceBase;
String targetBase;
FileSystem fs = null;
try {
OutputCommitter committer = new CopyCommitter(null, taskAttemptContext);
fs = FileSystem.get(conf);
FsPermission sourcePerm = new FsPermission((short) 511);
FsPermission initialPerm = new FsPermission((short) 448);
sourceBase = TestDistCpUtils.createTestSetup(fs, sourcePerm);
targetBase = TestDistCpUtils.createTestSetup(fs, initialPerm);
DistCpOptions options = new DistCpOptions(Arrays.asList(new Path(sourceBase)),
new Path("/out"));
options.preserve(FileAttribute.PERMISSION);
options.appendToConf(conf);
options.setTargetPathExists(false);
CopyListing listing = new GlobbedCopyListing(conf, CREDENTIALS);
Path listingFile = new Path("/tmp1/" + String.valueOf(rand.nextLong()));
listing.buildListing(listingFile, options);
conf.set(DistCpConstants.CONF_LABEL_TARGET_WORK_PATH, targetBase);
committer.commitJob(jobContext);
if (!checkDirectoryPermissions(fs, targetBase, sourcePerm)) {
Assert.fail("Permission don't match");
}
//Test for idempotent commit
committer.commitJob(jobContext);
if (!checkDirectoryPermissions(fs, targetBase, sourcePerm)) {
Assert.fail("Permission don't match");
}
} catch (IOException e) {
LOG.error("Exception encountered while testing for preserve status", e);
Assert.fail("Preserve status failure");
} finally {
TestDistCpUtils.delete(fs, "/tmp1");
conf.unset(DistCpConstants.CONF_LABEL_PRESERVE_STATUS);
}
}
@Test
public void testDeleteMissing() {
TaskAttemptContext taskAttemptContext = getTaskAttemptContext(config);
JobContext jobContext = new JobContextImpl(taskAttemptContext.getConfiguration(),
taskAttemptContext.getTaskAttemptID().getJobID());
Configuration conf = jobContext.getConfiguration();
String sourceBase;
String targetBase;
FileSystem fs = null;
try {
OutputCommitter committer = new CopyCommitter(null, taskAttemptContext);
fs = FileSystem.get(conf);
sourceBase = TestDistCpUtils.createTestSetup(fs, FsPermission.getDefault());
targetBase = TestDistCpUtils.createTestSetup(fs, FsPermission.getDefault());
String targetBaseAdd = TestDistCpUtils.createTestSetup(fs, FsPermission.getDefault());
fs.rename(new Path(targetBaseAdd), new Path(targetBase));
DistCpOptions options = new DistCpOptions(Arrays.asList(new Path(sourceBase)),
new Path("/out"));
options.setSyncFolder(true);
options.setDeleteMissing(true);
options.appendToConf(conf);
CopyListing listing = new GlobbedCopyListing(conf, CREDENTIALS);
Path listingFile = new Path("/tmp1/" + String.valueOf(rand.nextLong()));
listing.buildListing(listingFile, options);
conf.set(DistCpConstants.CONF_LABEL_TARGET_WORK_PATH, targetBase);
conf.set(DistCpConstants.CONF_LABEL_TARGET_FINAL_PATH, targetBase);
committer.commitJob(jobContext);
if (!TestDistCpUtils.checkIfFoldersAreInSync(fs, targetBase, sourceBase)) {
Assert.fail("Source and target folders are not in sync");
}
if (!TestDistCpUtils.checkIfFoldersAreInSync(fs, sourceBase, targetBase)) {
Assert.fail("Source and target folders are not in sync");
}
//Test for idempotent commit
committer.commitJob(jobContext);
if (!TestDistCpUtils.checkIfFoldersAreInSync(fs, targetBase, sourceBase)) {
Assert.fail("Source and target folders are not in sync");
}
if (!TestDistCpUtils.checkIfFoldersAreInSync(fs, sourceBase, targetBase)) {
Assert.fail("Source and target folders are not in sync");
}
} catch (Throwable e) {
LOG.error("Exception encountered while testing for delete missing", e);
Assert.fail("Delete missing failure");
} finally {
TestDistCpUtils.delete(fs, "/tmp1");
conf.set(DistCpConstants.CONF_LABEL_DELETE_MISSING, "false");
}
}
@Test
public void testDeleteMissingFlatInterleavedFiles() {
TaskAttemptContext taskAttemptContext = getTaskAttemptContext(config);
JobContext jobContext = new JobContextImpl(taskAttemptContext.getConfiguration(),
taskAttemptContext.getTaskAttemptID().getJobID());
Configuration conf = jobContext.getConfiguration();
String sourceBase;
String targetBase;
FileSystem fs = null;
try {
OutputCommitter committer = new CopyCommitter(null, taskAttemptContext);
fs = FileSystem.get(conf);
sourceBase = "/tmp1/" + String.valueOf(rand.nextLong());
targetBase = "/tmp1/" + String.valueOf(rand.nextLong());
TestDistCpUtils.createFile(fs, sourceBase + "/1");
TestDistCpUtils.createFile(fs, sourceBase + "/3");
TestDistCpUtils.createFile(fs, sourceBase + "/4");
TestDistCpUtils.createFile(fs, sourceBase + "/5");
TestDistCpUtils.createFile(fs, sourceBase + "/7");
TestDistCpUtils.createFile(fs, sourceBase + "/8");
TestDistCpUtils.createFile(fs, sourceBase + "/9");
TestDistCpUtils.createFile(fs, targetBase + "/2");
TestDistCpUtils.createFile(fs, targetBase + "/4");
TestDistCpUtils.createFile(fs, targetBase + "/5");
TestDistCpUtils.createFile(fs, targetBase + "/7");
TestDistCpUtils.createFile(fs, targetBase + "/9");
TestDistCpUtils.createFile(fs, targetBase + "/A");
DistCpOptions options = new DistCpOptions(Arrays.asList(new Path(sourceBase)),
new Path("/out"));
options.setSyncFolder(true);
options.setDeleteMissing(true);
options.appendToConf(conf);
CopyListing listing = new GlobbedCopyListing(conf, CREDENTIALS);
Path listingFile = new Path("/tmp1/" + String.valueOf(rand.nextLong()));
listing.buildListing(listingFile, options);
conf.set(DistCpConstants.CONF_LABEL_TARGET_WORK_PATH, targetBase);
conf.set(DistCpConstants.CONF_LABEL_TARGET_FINAL_PATH, targetBase);
committer.commitJob(jobContext);
if (!TestDistCpUtils.checkIfFoldersAreInSync(fs, targetBase, sourceBase)) {
Assert.fail("Source and target folders are not in sync");
}
Assert.assertEquals(fs.listStatus(new Path(targetBase)).length, 4);
//Test for idempotent commit
committer.commitJob(jobContext);
if (!TestDistCpUtils.checkIfFoldersAreInSync(fs, targetBase, sourceBase)) {
Assert.fail("Source and target folders are not in sync");
}
Assert.assertEquals(fs.listStatus(new Path(targetBase)).length, 4);
} catch (IOException e) {
LOG.error("Exception encountered while testing for delete missing", e);
Assert.fail("Delete missing failure");
} finally {
TestDistCpUtils.delete(fs, "/tmp1");
conf.set(DistCpConstants.CONF_LABEL_DELETE_MISSING, "false");
}
}
@Test
public void testAtomicCommitMissingFinal() {
TaskAttemptContext taskAttemptContext = getTaskAttemptContext(config);
JobContext jobContext = new JobContextImpl(taskAttemptContext.getConfiguration(),
taskAttemptContext.getTaskAttemptID().getJobID());
Configuration conf = jobContext.getConfiguration();
String workPath = "/tmp1/" + String.valueOf(rand.nextLong());
String finalPath = "/tmp1/" + String.valueOf(rand.nextLong());
FileSystem fs = null;
try {
OutputCommitter committer = new CopyCommitter(null, taskAttemptContext);
fs = FileSystem.get(conf);
fs.mkdirs(new Path(workPath));
conf.set(DistCpConstants.CONF_LABEL_TARGET_WORK_PATH, workPath);
conf.set(DistCpConstants.CONF_LABEL_TARGET_FINAL_PATH, finalPath);
conf.setBoolean(DistCpConstants.CONF_LABEL_ATOMIC_COPY, true);
Assert.assertTrue(fs.exists(new Path(workPath)));
Assert.assertFalse(fs.exists(new Path(finalPath)));
committer.commitJob(jobContext);
Assert.assertFalse(fs.exists(new Path(workPath)));
Assert.assertTrue(fs.exists(new Path(finalPath)));
//Test for idempotent commit
committer.commitJob(jobContext);
Assert.assertFalse(fs.exists(new Path(workPath)));
Assert.assertTrue(fs.exists(new Path(finalPath)));
} catch (IOException e) {
LOG.error("Exception encountered while testing for preserve status", e);
Assert.fail("Atomic commit failure");
} finally {
TestDistCpUtils.delete(fs, workPath);
TestDistCpUtils.delete(fs, finalPath);
conf.setBoolean(DistCpConstants.CONF_LABEL_ATOMIC_COPY, false);
}
}
@Test
public void testAtomicCommitExistingFinal() {
TaskAttemptContext taskAttemptContext = getTaskAttemptContext(config);
JobContext jobContext = new JobContextImpl(taskAttemptContext.getConfiguration(),
taskAttemptContext.getTaskAttemptID().getJobID());
Configuration conf = jobContext.getConfiguration();
String workPath = "/tmp1/" + String.valueOf(rand.nextLong());
String finalPath = "/tmp1/" + String.valueOf(rand.nextLong());
FileSystem fs = null;
try {
OutputCommitter committer = new CopyCommitter(null, taskAttemptContext);
fs = FileSystem.get(conf);
fs.mkdirs(new Path(workPath));
fs.mkdirs(new Path(finalPath));
conf.set(DistCpConstants.CONF_LABEL_TARGET_WORK_PATH, workPath);
conf.set(DistCpConstants.CONF_LABEL_TARGET_FINAL_PATH, finalPath);
conf.setBoolean(DistCpConstants.CONF_LABEL_ATOMIC_COPY, true);
Assert.assertTrue(fs.exists(new Path(workPath)));
Assert.assertTrue(fs.exists(new Path(finalPath)));
try {
committer.commitJob(jobContext);
Assert.fail("Should not be able to atomic-commit to pre-existing path.");
} catch(Exception exception) {
Assert.assertTrue(fs.exists(new Path(workPath)));
Assert.assertTrue(fs.exists(new Path(finalPath)));
LOG.info("Atomic-commit Test pass.");
}
} catch (IOException e) {
LOG.error("Exception encountered while testing for atomic commit.", e);
Assert.fail("Atomic commit failure");
} finally {
TestDistCpUtils.delete(fs, workPath);
TestDistCpUtils.delete(fs, finalPath);
conf.setBoolean(DistCpConstants.CONF_LABEL_ATOMIC_COPY, false);
}
}
private TaskAttemptContext getTaskAttemptContext(Configuration conf) {
return new TaskAttemptContextImpl(conf,
new TaskAttemptID("200707121733", 1, TaskType.MAP, 1, 1));
}
private boolean checkDirectoryPermissions(FileSystem fs, String targetBase,
FsPermission sourcePerm) throws IOException {
Path base = new Path(targetBase);
Stack<Path> stack = new Stack<Path>();
stack.push(base);
while (!stack.isEmpty()) {
Path file = stack.pop();
if (!fs.exists(file)) continue;
FileStatus[] fStatus = fs.listStatus(file);
if (fStatus == null || fStatus.length == 0) continue;
for (FileStatus status : fStatus) {
if (status.isDirectory()) {
stack.push(status.getPath());
Assert.assertEquals(status.getPermission(), sourcePerm);
}
}
}
return true;
}
private static class NullInputFormat extends InputFormat {
@Override
public List getSplits(JobContext context)
throws IOException, InterruptedException {
return Collections.EMPTY_LIST;
}
@Override
public RecordReader createRecordReader(InputSplit split,
TaskAttemptContext context)
throws IOException, InterruptedException {
return null;
}
}
}