/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package gobblin.data.management.copy.publisher;
import gobblin.configuration.ConfigurationKeys;
import gobblin.configuration.State;
import gobblin.configuration.WorkUnitState;
import gobblin.data.management.copy.CopyConfiguration;
import gobblin.data.management.copy.CopySource;
import gobblin.data.management.copy.CopyableDataset;
import gobblin.data.management.copy.CopyableDatasetMetadata;
import gobblin.data.management.copy.CopyEntity;
import gobblin.data.management.copy.CopyableFile;
import gobblin.data.management.copy.PreserveAttributes;
import gobblin.data.management.copy.TestCopyableDataset;
import gobblin.util.PathUtils;
import java.io.IOException;
import java.util.Collection;
import java.util.List;
import lombok.extern.slf4j.Slf4j;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.testng.Assert;
import org.testng.annotations.AfterClass;
import org.testng.annotations.BeforeClass;
import org.testng.annotations.Test;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.Lists;
import com.google.common.io.Closer;
import com.google.common.io.Files;
/*
*
* Test cases covered
* - Single dataset multiple files/workunits
* - Single dataset multiple files/workunits few workunits failed
* - Two datasets multiple files
* - Two datasets one of them failed to publish
* - datasets with overlapping dataset roots
*
*/
@Slf4j
public class CopyDataPublisherTest {
private static final Closer closer = Closer.create();
private FileSystem fs;
private Path testClassTempPath;
@Test
public void testPublishSingleDataset() throws Exception {
State state = getTestState("testPublishSingleDataset");
state.setProp(ConfigurationKeys.DATA_PUBLISHER_FINAL_DIR, "/");
Path testMethodTempPath = new Path(testClassTempPath, "testPublishSingleDataset");
CopyDataPublisher copyDataPublisher = closer.register(new CopyDataPublisher(state));
TestDatasetManager datasetManager =
new TestDatasetManager(testMethodTempPath, state, "datasetTargetPath",
ImmutableList.of("a/b", "a/c", "d/e"));
datasetManager.createDatasetFiles();
datasetManager.verifyDoesntExist();
copyDataPublisher.publishData(datasetManager.getWorkUnitStates());
datasetManager.verifyExists();
}
@Test
@SuppressWarnings("unchecked")
public void testPublishMultipleDatasets() throws Exception {
State state = getTestState("testPublishMultipleDatasets");
state.setProp(ConfigurationKeys.DATA_PUBLISHER_FINAL_DIR, "/");
Path testMethodTempPath = new Path(testClassTempPath, "testPublishMultipleDatasets");
CopyDataPublisher copyDataPublisher = closer.register(new CopyDataPublisher(state));
TestDatasetManager dataset1Manager =
new TestDatasetManager(testMethodTempPath, state, "dataset1TargetPath",
ImmutableList.of("a/b", "a/c", "d/e"));
dataset1Manager.createDatasetFiles();
TestDatasetManager dataset2Manager =
new TestDatasetManager(testMethodTempPath, state, "dataset2TargetPath",
ImmutableList.of("a/b", "a/c", "d/e"));
dataset2Manager.createDatasetFiles();
dataset1Manager.verifyDoesntExist();
dataset2Manager.verifyDoesntExist();
copyDataPublisher.publishData(combine(dataset1Manager.getWorkUnitStates(), dataset2Manager.getWorkUnitStates()));
dataset1Manager.verifyExists();
dataset2Manager.verifyExists();
}
@Test
@SuppressWarnings("unchecked")
public void testPublishOverlappingDatasets() throws Exception {
State state = getTestState("testPublishOverlappingDatasets");
state.setProp(ConfigurationKeys.DATA_PUBLISHER_FINAL_DIR, "/");
Path testMethodTempPath = new Path(testClassTempPath, "testPublishOverlappingDatasets");
CopyDataPublisher copyDataPublisher = closer.register(new CopyDataPublisher(state));
TestDatasetManager dataset1Manager =
new TestDatasetManager(testMethodTempPath, state, "datasetTargetPath", ImmutableList.of("a/b"));
dataset1Manager.createDatasetFiles();
TestDatasetManager dataset2Manager =
new TestDatasetManager(testMethodTempPath, state, "datasetTargetPath/subDir",
ImmutableList.of("a/c", "d/e"));
dataset2Manager.createDatasetFiles();
dataset1Manager.verifyDoesntExist();
dataset2Manager.verifyDoesntExist();
copyDataPublisher.publishData(combine(dataset1Manager.getWorkUnitStates(), dataset2Manager.getWorkUnitStates()));
dataset1Manager.verifyExists();
dataset2Manager.verifyExists();
}
@Test
@SuppressWarnings("unchecked")
public void testPublishDatasetFailure() throws Exception {
State state = getTestState("testPublishDatasetFailure");
state.setProp(ConfigurationKeys.DATA_PUBLISHER_FINAL_DIR, "/");
Path testMethodTempPath = new Path(testClassTempPath, "testPublishDatasetFailure");
CopyDataPublisher copyDataPublisher = closer.register(new CopyDataPublisher(state));
TestDatasetManager successDatasetManager =
new TestDatasetManager(testMethodTempPath, state, "successTargetPath", ImmutableList.of("a/b"));
successDatasetManager.createDatasetFiles();
TestDatasetManager failedDatasetManager =
new TestDatasetManager(testMethodTempPath, state, "failedTargetPath", ImmutableList.of("c/d"));
successDatasetManager.verifyDoesntExist();
failedDatasetManager.verifyDoesntExist();
copyDataPublisher.publishData(combine(successDatasetManager.getWorkUnitStates(),
failedDatasetManager.getWorkUnitStates()));
successDatasetManager.verifyExists();
failedDatasetManager.verifyDoesntExist();
}
@BeforeClass
public void setup() throws Exception {
fs = FileSystem.getLocal(new Configuration());
testClassTempPath = new Path(Files.createTempDir().getAbsolutePath(), "CopyDataPublisherTest");
fs.delete(testClassTempPath, true);
log.info("Created a temp directory for CopyDataPublisherTest at " + testClassTempPath);
fs.mkdirs(testClassTempPath);
}
private static Collection<? extends WorkUnitState> combine(List<WorkUnitState>... workUnitStateLists) {
List<WorkUnitState> wus = Lists.newArrayList();
for (List<WorkUnitState> workUnitStates : workUnitStateLists) {
wus.addAll(workUnitStates);
}
return wus;
}
private State getTestState(String testMethodName) {
return getTestState(testMethodName, testClassTempPath);
}
public static State getTestState(String testMethodName, Path testClassTempPath) {
Path testMethodPath = new Path(testClassTempPath, testMethodName);
State state = new State();
state.setProp(ConfigurationKeys.WRITER_OUTPUT_DIR, new Path(testMethodPath, "task-output"));
state.setProp(ConfigurationKeys.WRITER_STAGING_DIR, new Path(testMethodPath, "task-staging"));
state.setProp(ConfigurationKeys.JOB_ID_KEY, "jobid");
return state;
}
public static class TestDatasetManager {
private CopyableDataset copyableDataset;
private CopyableDatasetMetadata metadata;
private List<String> relativeFilePaths;
private Path writerOutputPath;
private Path targetPath;
private FileSystem fs;
private CopyEntity copyEntity;
private void createDatasetFiles() throws IOException {
// Create writer output files
Path datasetWriterOutputPath =
new Path(writerOutputPath, copyEntity.getDatasetAndPartition(this.metadata).identifier());
Path outputPathWithCurrentDirectory = new Path(datasetWriterOutputPath,
PathUtils.withoutLeadingSeparator(this.targetPath));
for (String path : relativeFilePaths) {
Path pathToCreate = new Path(outputPathWithCurrentDirectory, path);
fs.mkdirs(pathToCreate.getParent());
fs.create(pathToCreate);
}
}
public TestDatasetManager(Path testMethodTempPath, State state, String datasetTargetPath, List<String> relativeFilePaths)
throws IOException {
this.fs = FileSystem.getLocal(new Configuration());
this.copyableDataset =
new TestCopyableDataset(new Path("origin"));
this.metadata = new CopyableDatasetMetadata(this.copyableDataset);
this.relativeFilePaths = relativeFilePaths;
this.writerOutputPath = new Path(state.getProp(ConfigurationKeys.WRITER_OUTPUT_DIR));
this.targetPath = new Path(testMethodTempPath, datasetTargetPath);
FileStatus file = new FileStatus(0, false, 0, 0, 0, new Path("/file"));
FileSystem fs = FileSystem.getLocal(new Configuration());
this.copyEntity = CopyableFile.fromOriginAndDestination(fs, file, new Path("/destination"),
CopyConfiguration.builder(fs, state.getProperties()).preserve(PreserveAttributes.fromMnemonicString(""))
.build()).build();
fs.mkdirs(testMethodTempPath);
log.info("Created a temp directory for test at " + testMethodTempPath);
}
List<WorkUnitState> getWorkUnitStates() throws IOException {
List<WorkUnitState> workUnitStates =
Lists.newArrayList(new WorkUnitState(), new WorkUnitState(), new WorkUnitState());
for (WorkUnitState wus : workUnitStates) {
CopySource.serializeCopyableDataset(wus, metadata);
CopySource.serializeCopyEntity(wus, this.copyEntity);
}
return workUnitStates;
}
void verifyExists() throws IOException {
for (String fileRelativePath : relativeFilePaths) {
Path filePublishPath = new Path(this.targetPath, fileRelativePath);
Assert.assertEquals(fs.exists(filePublishPath), true);
}
}
void verifyDoesntExist() throws IOException {
for (String fileRelativePath : relativeFilePaths) {
Path filePublishPath = new Path(this.targetPath, fileRelativePath);
Assert.assertEquals(fs.exists(filePublishPath), false);
}
}
}
@AfterClass
public void cleanup() {
try {
closer.close();
fs.delete(testClassTempPath, true);
} catch (IOException e) {
log.warn(e.getMessage());
}
}
}