/** * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.pinterest.terrapin.hadoop; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertTrue; import static org.mockito.Matchers.any; import static org.mockito.Matchers.anyBoolean; import static org.mockito.Matchers.anyString; import static org.mockito.Matchers.eq; import static org.mockito.Mockito.doNothing; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.verify; import static org.mockito.Mockito.when; import com.pinterest.terrapin.Constants; import com.pinterest.terrapin.TerrapinUtil; import com.pinterest.terrapin.thrift.generated.Options; import com.pinterest.terrapin.thrift.generated.PartitionerType; import com.pinterest.terrapin.tools.HFileGenerator; import com.pinterest.terrapin.zookeeper.ClusterInfo; import com.pinterest.terrapin.zookeeper.FileSetInfo; import com.pinterest.terrapin.zookeeper.ZooKeeperManager; import com.google.common.base.Function; import com.google.common.collect.Lists; import com.google.common.io.Files; import org.apache.commons.io.FileUtils; import org.apache.commons.lang3.tuple.ImmutablePair; import org.apache.commons.lang3.tuple.Pair; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.tools.DistCp; import org.apache.hadoop.tools.DistCpOptions; import org.junit.After; import org.junit.Before; import org.junit.Test; import org.mockito.invocation.InvocationOnMock; import org.mockito.stubbing.Answer; import java.io.File; import java.io.IOException; import java.net.UnknownHostException; import java.util.List; public class BaseUploaderTest { private final static String CLUSTER = "test_cluster"; private final static String NAME_NODE = "test_node"; private final static int REPLICA_FACTOR = 2; private final static String FILE_SET = "test_fileset"; private final static String HDFS_DIR = Constants.HDFS_DATA_DIR + "/" + FILE_SET; public class TestUploader extends BaseUploader { private List<Path> sourceFiles; private long blockSize; private ZooKeeperManager zkManager; private DistCp distCp; private Job job; private int numPartition; public TestUploader(TerrapinUploaderOptions options) { super(options); } public void init(int numPartition) throws Exception { this.numPartition = numPartition; sourceFiles = HFileGenerator.generateHFiles(fs, conf, tempFolder, options.loadOptions.getPartitioner(), numPartition, numPartition * 1000); blockSize = 0; for (Path path : sourceFiles) { long fileSize = new File(path.toString()).length(); if (fileSize > blockSize) { blockSize = fileSize; } } zkManager = mock(ZooKeeperManager.class); distCp = mock(DistCp.class); job = mock(Job.class); when(zkManager.getClusterInfo()).thenReturn(new ClusterInfo(NAME_NODE, REPLICA_FACTOR)); when(distCp.execute()).thenReturn(job); when(job.waitForCompletion(anyBoolean())).then(new Answer<Object>() { @Override public Object answer(InvocationOnMock invocationOnMock) throws Throwable { Thread.sleep(1000); return true; } }); doNothing().when(zkManager).lockFileSet(anyString(), any(FileSetInfo.class)); doNothing().when(zkManager).unlockFileSet(anyString()); } public void verifyTest() throws Exception { verify(zkManager).getClusterInfo(); verify(zkManager).lockFileSet(eq(FILE_SET), any(FileSetInfo.class)); verify(zkManager).unlockFileSet(eq(FILE_SET)); verify(distCp).execute(); verify(job).waitForCompletion(eq(true)); } @Override List<Pair<Path, Long>> getFileList() { return Lists.transform(sourceFiles, new Function<Path, Pair<Path, Long>>() { @Override public Pair<Path, Long> apply(Path path) { return ImmutablePair.of(path, new File(path.toString()).length()); } }); } @Override protected ZooKeeperManager getZKManager(String clusterName) throws UnknownHostException { assertEquals(clusterName, CLUSTER); return zkManager; } @Override protected DistCp getDistCp(Configuration conf, DistCpOptions options) { assertEquals(Constants.MAPRED_MAP_MAX_ATTEMPTS, Integer.parseInt(conf.get("mapred.map.max.attempts"))); assertEquals(Constants.CHECKSUM_BYTES, Integer.parseInt(conf.get("io.bytes.per.checksum"))); long blockSizeExpected = blockSize; if (blockSizeExpected % Constants.CHECKSUM_BYTES != 0) { blockSizeExpected = (blockSize / Constants.CHECKSUM_BYTES + 1) * Constants.CHECKSUM_BYTES; } assertEquals(blockSizeExpected, Long.parseLong(conf.get("dfs.block.size"))); assertEquals(REPLICA_FACTOR, Integer.parseInt(conf.get("dfs.replication"))); assertEquals(sourceFiles, options.getSourcePaths()); assertTrue(options.shouldSkipCRC()); assertTrue(options.shouldSyncFolder()); assertTrue(options.getTargetPath().toString().startsWith("hdfs://" + NAME_NODE + HDFS_DIR)); if (numPartition == 1) { assertTrue(options.getTargetPath().toString() .endsWith(TerrapinUtil.formatPartitionName(0))); } return distCp; } @Override protected void loadFileSetData(ZooKeeperManager zkManager, FileSetInfo fileSetInfo, Options options) { assertEquals(FILE_SET, fileSetInfo.fileSetName); assertEquals(numPartition, fileSetInfo.servingInfo.numPartitions); } } private Configuration conf; private FileSystem fs; private TestUploader uploader; private TerrapinUploaderOptions options; private File tempFolder; @Before public void setUp() throws Exception { conf = new Configuration(); fs = FileSystem.get(conf); tempFolder = Files.createTempDir(); options = new TerrapinUploaderOptions(); options.terrapinZkQuorum = "terrapinzk"; uploader = new TestUploader(options); } @After public void cleanUp() throws IOException { FileUtils.deleteDirectory(tempFolder); } @Test public void testValidateCascadingPartition() throws IOException { List<Path> parts = HFileGenerator.generateHFiles(fs, conf, tempFolder, PartitionerType.CASCADING, 10, 1000); uploader.validate(parts, PartitionerType.CASCADING, 10); } @Test(expected = IllegalArgumentException.class) public void testValidateCascadingPartitionWithException() throws IOException { List<Path> parts = HFileGenerator.generateHFiles(fs, conf, tempFolder, PartitionerType.MODULUS, 10, 1000); uploader.validate(parts, PartitionerType.CASCADING, 10); } @Test public void testValidateModulusPartition() throws IOException { List<Path> parts = HFileGenerator.generateHFiles(fs, conf, tempFolder, PartitionerType.MODULUS, 10, 1000); uploader.validate(parts, PartitionerType.MODULUS, 10); } @Test(expected = IllegalArgumentException.class) public void testValidateModulusPartitionWithException() throws IOException { List<Path> parts = HFileGenerator.generateHFiles(fs, conf, tempFolder, PartitionerType.CASCADING, 10, 1000); uploader.validate(parts, PartitionerType.MODULUS, 10); } @Test(expected = IllegalArgumentException.class) public void testAllEmptyPartitions() throws IOException { List<Path> parts = HFileGenerator.generateHFiles(fs, conf, tempFolder, PartitionerType.CASCADING, 10, 0); uploader.validate(parts, PartitionerType.MODULUS, 10); } @Test public void testUpload() throws Exception { uploader.init(100); uploader.upload(CLUSTER, FILE_SET, new Options()); uploader.verifyTest(); } @Test public void testUploadOneFile() throws Exception { uploader.init(1); uploader.upload(CLUSTER, FILE_SET, new Options()); uploader.verifyTest(); } }