package gobblin.compaction.mapreduce; import gobblin.compaction.dataset.Dataset; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.testng.annotations.BeforeClass; import org.testng.annotations.Test; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.when; import org.testng.Assert; import java.io.File; import java.io.IOException; import java.util.HashSet; import java.util.Set; /** * Test for directory renaming strategy * {@link MRCompactor#getDeepestLevelRenamedDirsWithFileExistence(FileSystem, Set)} * {@link MRCompactor#getDeepestLevelUnrenamedDirsWithFileExistence(FileSystem, Set)} * {@link MRCompactor#renameSourceDirAsCompactionComplete(FileSystem, Dataset)} */ @Test(groups = { "gobblin.compaction.mapreduce" }) public class RenameSourceDirectoryTest { private FileSystem fs; private static final String RENAME_SRC_DIR = "/tmp/renaming-source-dir"; private static final String RENAME_SRC_DIR_RUN1_DIR = RENAME_SRC_DIR + "/00_10/run1"; private static final String RENAME_SRC_DIR_RUN2_DIR = RENAME_SRC_DIR + "/00_10/run2"; private static final String RENAME_SRC_DIR_RUN3_DIR = RENAME_SRC_DIR + "/10_20/run1"; private static final String RENAME_SRC_DIR_RUN4_DIR = RENAME_SRC_DIR + "/20_30/run1"; private static final String RENAME_SRC_DIR_RUN5_DIR = RENAME_SRC_DIR + "/20_30/run2"; private static final String RENAME_SRC_DIR_RUN4_DIR_COMPLETE = RENAME_SRC_DIR + "/20_30/run2_COMPLETE"; private static final String RENAME_SRC_DIR_RUN5_DIR_COMPLETE = RENAME_SRC_DIR + "/20_30/run3_COMPLETE"; private static final String RENAME_SRC_DIR_RUN1_FILE = RENAME_SRC_DIR_RUN1_DIR + "/dummy"; private static final String RENAME_SRC_DIR_RUN2_FILE = RENAME_SRC_DIR_RUN2_DIR + "/dummy"; private static final String RENAME_SRC_DIR_RUN3_FILE = RENAME_SRC_DIR_RUN3_DIR + "/dummy"; private static final String RENAME_SRC_DIR_RUN4_FILE = RENAME_SRC_DIR_RUN4_DIR + "/dummy"; private static final String RENAME_SRC_DIR_RUN5_FILE = RENAME_SRC_DIR_RUN5_DIR + "/dummy"; private static final String RENAME_SRC_DIR_RUN4_COMPLETE_FILE = RENAME_SRC_DIR_RUN4_DIR_COMPLETE + "/dummy"; private static final String RENAME_SRC_DIR_RUN5_COMPLETE_FILE = RENAME_SRC_DIR_RUN5_DIR_COMPLETE + "/dummy"; @BeforeClass public void setUp() throws Exception { Configuration conf = new Configuration(); fs = FileSystem.get(conf); } private void createFile (String path) throws IOException { File f = new File(path); f.getParentFile().mkdirs(); f.createNewFile(); } @Test public void testUnrenamedDirs() throws Exception { fs.delete(new Path(RENAME_SRC_DIR), true); createFile(RENAME_SRC_DIR_RUN1_FILE); createFile(RENAME_SRC_DIR_RUN2_FILE); createFile(RENAME_SRC_DIR_RUN3_FILE); createFile(RENAME_SRC_DIR_RUN4_FILE); createFile(RENAME_SRC_DIR_RUN5_FILE); Set<Path> inputPaths = new HashSet<>(); inputPaths.add(new Path(RENAME_SRC_DIR_RUN1_DIR)); inputPaths.add(new Path(RENAME_SRC_DIR_RUN2_DIR)); inputPaths.add(new Path(RENAME_SRC_DIR_RUN3_DIR)); inputPaths.add(new Path(RENAME_SRC_DIR_RUN4_DIR)); inputPaths.add(new Path(RENAME_SRC_DIR_RUN5_DIR)); Set<Path> unRenamed = MRCompactor.getDeepestLevelUnrenamedDirsWithFileExistence(fs, inputPaths); Assert.assertEquals(unRenamed.size(), 5); fs.delete(new Path(RENAME_SRC_DIR_RUN1_FILE), false); unRenamed = MRCompactor.getDeepestLevelUnrenamedDirsWithFileExistence(fs, inputPaths); Assert.assertEquals(unRenamed.size(), 4); fs.delete(new Path(RENAME_SRC_DIR), true); } @Test public void testRenamedDirs() throws Exception { fs.delete(new Path(RENAME_SRC_DIR), true); createFile(RENAME_SRC_DIR_RUN1_FILE); createFile(RENAME_SRC_DIR_RUN2_FILE); createFile(RENAME_SRC_DIR_RUN3_FILE); createFile(RENAME_SRC_DIR_RUN4_COMPLETE_FILE); createFile(RENAME_SRC_DIR_RUN5_COMPLETE_FILE); Set<Path> inputPaths = new HashSet<>(); inputPaths.add(new Path(RENAME_SRC_DIR_RUN1_DIR)); inputPaths.add(new Path(RENAME_SRC_DIR_RUN2_DIR)); inputPaths.add(new Path(RENAME_SRC_DIR_RUN3_DIR)); inputPaths.add(new Path(RENAME_SRC_DIR_RUN4_DIR_COMPLETE)); inputPaths.add(new Path(RENAME_SRC_DIR_RUN5_DIR_COMPLETE)); Set<Path> renamed = MRCompactor.getDeepestLevelRenamedDirsWithFileExistence(fs, inputPaths); Assert.assertEquals(renamed.size(), 2); fs.delete(new Path(RENAME_SRC_DIR_RUN1_FILE), false); renamed = MRCompactor.getDeepestLevelRenamedDirsWithFileExistence(fs, inputPaths); Assert.assertEquals(renamed.size(), 2); fs.delete(new Path(RENAME_SRC_DIR), true); } @Test public void testRenamingProcedure() throws Exception { fs.delete(new Path(RENAME_SRC_DIR), true); createFile(RENAME_SRC_DIR_RUN1_FILE); createFile(RENAME_SRC_DIR_RUN2_FILE); createFile(RENAME_SRC_DIR_RUN3_FILE); createFile(RENAME_SRC_DIR_RUN4_COMPLETE_FILE); createFile(RENAME_SRC_DIR_RUN5_COMPLETE_FILE); Set<Path> inputPaths = new HashSet<>(); inputPaths.add(new Path(RENAME_SRC_DIR_RUN1_DIR)); inputPaths.add(new Path(RENAME_SRC_DIR_RUN2_DIR)); inputPaths.add(new Path(RENAME_SRC_DIR_RUN3_DIR)); inputPaths.add(new Path(RENAME_SRC_DIR_RUN4_DIR_COMPLETE)); inputPaths.add(new Path(RENAME_SRC_DIR_RUN5_DIR_COMPLETE)); Dataset dataset = mock(Dataset.class); Set<Path> unrenamed = MRCompactor.getDeepestLevelUnrenamedDirsWithFileExistence(fs, inputPaths); Assert.assertEquals(unrenamed.size(), 3); when(dataset.getRenamePaths()).thenReturn(unrenamed); MRCompactor.renameSourceDirAsCompactionComplete(fs, dataset); Assert.assertEquals(fs.exists(new Path(RENAME_SRC_DIR_RUN1_DIR + "_COMPLETE/dummy")), true); Assert.assertEquals(fs.exists(new Path(RENAME_SRC_DIR_RUN2_DIR + "_COMPLETE/dummy")), true); Assert.assertEquals(fs.exists(new Path(RENAME_SRC_DIR_RUN3_DIR + "_COMPLETE/dummy")), true); fs.delete(new Path(RENAME_SRC_DIR), true); } }