/** * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.pinterest.terrapin; import com.amazonaws.auth.AWSCredentials; import com.amazonaws.auth.BasicAWSCredentials; import com.amazonaws.services.s3.AmazonS3Client; import com.amazonaws.services.s3.model.ListObjectsRequest; import com.amazonaws.services.s3.model.ObjectListing; import com.amazonaws.services.s3.model.S3ObjectSummary; import com.google.common.base.Joiner; import com.google.common.collect.Lists; import com.pinterest.terrapin.base.BytesUtil; import com.pinterest.terrapin.thrift.generated.*; import com.pinterest.terrapin.zookeeper.FileSetInfo; import com.pinterest.terrapin.zookeeper.ZooKeeperManager; import com.twitter.common.quantity.Amount; import com.twitter.common.quantity.Time; import com.twitter.common.zookeeper.ZooKeeperClient; import com.twitter.finagle.Service; import com.twitter.finagle.builder.ClientBuilder; import com.twitter.finagle.thrift.ClientId; import com.twitter.finagle.thrift.ThriftClientFramedCodecFactory; import com.twitter.finagle.thrift.ThriftClientRequest; import com.twitter.ostrich.stats.Stats; import com.twitter.util.Duration; import org.apache.commons.configuration.ConfigurationException; import org.apache.commons.configuration.PropertiesConfiguration; import org.apache.commons.lang3.tuple.ImmutablePair; import org.apache.commons.lang3.tuple.Pair; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hdfs.DFSClient; import org.apache.hadoop.hdfs.protocol.DirectoryListing; import org.apache.hadoop.hdfs.protocol.HdfsFileStatus; import org.apache.hadoop.io.BytesWritable; import org.apache.hadoop.mapreduce.Partitioner; import org.apache.hadoop.mapreduce.lib.partition.HashPartitioner; import org.apache.helix.model.IdealState; import org.apache.thrift.protocol.TBinaryProtocol; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import scala.Option; import java.io.IOException; import java.net.InetAddress; import java.net.InetSocketAddress; import java.net.UnknownHostException; import java.nio.ByteBuffer; import java.util.List; /** * General utility functions. */ public class TerrapinUtil { private static final Logger LOG = LoggerFactory.getLogger(TerrapinUtil.class); private static final Partitioner<BytesWritable, BytesWritable> HASH_PARTITIONER = new HashPartitioner<BytesWritable, BytesWritable>(); /** * Get the helix instance name from the HDFS hostname. */ public static String getHelixInstanceFromHDFSHost(String hdfsHostName) { int index = hdfsHostName.indexOf("."); if (index == -1) { return hdfsHostName; } return hdfsHostName.substring(0, index); } public static PropertiesConfiguration readPropertiesExitOnFailure(String configFile) { PropertiesConfiguration configuration = null; if (configFile.isEmpty()) { LOG.error("Empty configuration file name. Please specify using -Dterrapin.config."); System.exit(1); } try { configuration = new PropertiesConfiguration(configFile); } catch (ConfigurationException e) { LOG.info("Invalid configuration file " + configFile); System.exit(1); } return configuration; } /** * Extracts the partition name for a file. It expects file names with the prefix part-00000 * etc. Currently only modulus sharding is supported. * * @param fileName * @param partitioner * @return Returns the extracted name - null if the file name does not match the expected * prefix. */ public static Integer extractPartitionName(String fileName, PartitionerType partitioner) { if (partitioner == PartitionerType.MODULUS || partitioner == PartitionerType.CASCADING) { // Modulus sharded files are of the format "part-00000-<hash>" // Retrieve 5 characters and strip leading 0's. if (!fileName.startsWith(Constants.FILE_PREFIX)) { return null; } try { return Integer.parseInt(fileName.substring(5, 10)); } catch (NumberFormatException e) { return null; } } return null; } public static String formatPartitionName(int partitionNumber) { return String.format("%s%05d", Constants.FILE_PREFIX, partitionNumber); } public static String getPartitionName(ByteBuffer key, PartitionerType partitionerType, int numPartitions) { Partitioner partitioner = PartitionerFactory.getPartitioner(partitionerType); return Integer.toString( partitioner.getPartition( new BytesWritable(BytesUtil.readBytesFromByteBufferWithoutConsume(key)), null, numPartitions)); } public static String hdfsDirToHelixResource(String hdfsDir) { return hdfsDir.replace('/', '$'); } public static String helixResourceToHdfsDir(String helixResource) { return helixResource.replace('$', '/'); } private static List<InetSocketAddress> getSocketAddressList(String hostPortList) throws UnknownHostException { List<InetSocketAddress> socketAddrList = Lists.newArrayListWithCapacity(7); String[] hostPortPairList = hostPortList.split(","); for (String hostPortPair : hostPortPairList) { String[] hostPort = hostPortPair.split(":"); socketAddrList.add(new InetSocketAddress(InetAddress.getByName(hostPort[0]), Integer.parseInt(hostPort[1]))); } return socketAddrList; } public static ZooKeeperClient getZooKeeperClient(String zkQuorum, int sessionTimeoutSeconds) throws UnknownHostException { return new ZooKeeperClient(Amount.of(sessionTimeoutSeconds, Time.SECONDS), getSocketAddressList(zkQuorum)); } /** * IMPORTANT: Changing the logic in this function can have weird side effects since * the bucket size may change for an already existing resource. This is not an issue for * new resources but would create problems when old resources are rebalanced. Before * we change the logic here, we must make sure that the bucket size of pre existing * resources is not changed during rebalance operations. */ public static int getBucketSize(int numPartitions, boolean enableZkCompression) { // If compression is enabled, there is no need for bucketing of resources. if (enableZkCompression) { return 0; } int numBuckets = (int)Math.ceil((double)numPartitions / 1000); if (numBuckets <= 1) { return 0; } return (int)Math.ceil((double)numPartitions / numBuckets); } /** * Return the fileset corresponding to a file on HDFS. If the file path is not valid, * then return null. */ public static String extractFileSetFromPath(String resource) { String[] splits = resource.split("[/]"); if (splits.length <= 3) { // This should really never happen. Stats.incr("invalid-resource"); return null; } return splits[splits.length - 3]; } public static Pair<String, Integer> getBucketizedResourceAndPartitionNum(String helixPartition) { int index = helixPartition.lastIndexOf("_"); if (index == -1) { return null; } try { int partitionNum = Integer.parseInt(helixPartition.substring(index + 1)); return new ImmutablePair(helixPartition.substring(0, index), partitionNum); } catch (NumberFormatException e) { return null; } } public static Pair<String, Integer> getNonBucketizedResourceAndPartitionNum( String helixPartition) { int index = helixPartition.lastIndexOf("$"); if (index == -1) { return null; } try { int partitionNum = Integer.parseInt(helixPartition.substring(index + 1)); return new ImmutablePair(helixPartition.substring(0, index), partitionNum); } catch (NumberFormatException e) { return null; } } /** * Extracts the resource name and partition number from a helix partition. Returns null * if the helix partition format is bad. */ public static Pair<String, Integer> getResourceAndPartitionNum(String helixPartition) { Pair<String, Integer> nonBucketizedResourceAndPartitionNum = getNonBucketizedResourceAndPartitionNum(helixPartition); if (nonBucketizedResourceAndPartitionNum != null) { return nonBucketizedResourceAndPartitionNum; } return getBucketizedResourceAndPartitionNum(helixPartition); } /** * Get full partition name with resource prefix * @param resource resource name * @param partition partition number * @return full partition name */ public static String getViewPartitionName(String resource, int partition) { return String.format("%s$%d", resource, partition); } /** * Parse partition number from full partition name * @param viewPartitionName full partition name * @return partition number */ public static int getViewPartitionNumber(String viewPartitionName) { int index = viewPartitionName.lastIndexOf('$'); if (index == -1) { return 0; } return Integer.parseInt(viewPartitionName.substring(index + 1)); } /** * Sets the zk compression flag in the Helix ideal state. Compresses both the ideal * state and the external view. */ public static void compressIdealState(IdealState is) { is.getRecord().setBooleanField("enableCompression", true); } /** * Get ZooKeeper quorum string from configuration * * @param configuration configuration instance * @return quorum string */ public static String getZKQuorumFromConf(PropertiesConfiguration configuration) { String[] quorums = configuration.getStringArray(Constants.ZOOKEEPER_QUORUM); return Joiner.on(Constants.ZOOKEEPER_QUORUM_DELIMITER).join(quorums); } /** * Retrieve list of files under @hdfsDir for @hdfsClient. */ public static List<HdfsFileStatus> getHdfsFileList(DFSClient hdfsClient, String hdfsDir) throws IOException { List<HdfsFileStatus> fileList = Lists.newArrayList(); // Build a list of files. DirectoryListing listing = null; String continuation = ""; while (true) { listing = hdfsClient.listPaths(hdfsDir, continuation.getBytes()); for (HdfsFileStatus fileStatus : listing.getPartialListing()) { fileList.add(fileStatus); } // Go through the listing and paginate. if (!listing.hasMore()) { break; } else { continuation = new String(listing.getLastName()); } } return fileList; } /** * Attempt to load data (already in HDFS on a correct directory) into an already locked fileset. * The data is assumed to already have been placed in the correct directory on the terrapin * cluster. This is being called by the Terrapin loader jobs. The @fsInfo object is the same * as the locked fsInfo object. */ public static void loadFileSetData(ZooKeeperManager zkManager, FileSetInfo fsInfo, Options options) throws Exception { InetSocketAddress controllerSockAddress = zkManager.getControllerLeader(); LOG.info("Connecting to controller at " + controllerSockAddress.getHostName() + ":" + controllerSockAddress.getPort()); LOG.info("Load timeout " + Constants.LOAD_TIMEOUT_SECONDS + " seconds."); Service<ThriftClientRequest, byte[]> service = ClientBuilder.safeBuild(ClientBuilder.get() .hosts(controllerSockAddress) .codec(new ThriftClientFramedCodecFactory(Option.<ClientId>empty())) .retries(1) .connectTimeout(Duration.fromMilliseconds(1000)) .requestTimeout(Duration.fromSeconds(Constants.LOAD_TIMEOUT_SECONDS)) .hostConnectionLimit(100) .failFast(false)); TerrapinController.ServiceIface iface = new TerrapinController.ServiceToClient( service, new TBinaryProtocol.Factory()); TerrapinLoadRequest request = new TerrapinLoadRequest(); request.setHdfsDirectory(fsInfo.servingInfo.hdfsPath); request.setOptions(options); request.setFileSet(fsInfo.fileSetName); request.setExpectedNumPartitions(fsInfo.servingInfo.numPartitions); LOG.info("Loading file set " + fsInfo.fileSetName + " at " + fsInfo.servingInfo.hdfsPath); long startTimeSeconds = System.currentTimeMillis() / 1000; int numTriesLeft = 5; boolean done = false; Exception e = null; while (numTriesLeft > 0) { try { iface.loadFileSet(request).get(); done = true; break; } catch (Throwable t) { LOG.error("Swap failed with exception.", t); e = new Exception(t); numTriesLeft--; } LOG.info("Retrying in 10 seconds."); try { Thread.sleep(10000); } catch (InterruptedException ie) { LOG.error("Interrupted."); break; } } if (done) { LOG.info("Load successful. Swap took " + ((System.currentTimeMillis() / 1000) - startTimeSeconds) + " seconds."); } else { LOG.error("Load failed !!!."); throw new Exception(e); } } static public List<Pair<Path, Long>> getS3FileList(AWSCredentials credentials, String s3Bucket, String s3KeyPrefix) { List<Pair<Path, Long>> fileSizePairList = Lists.newArrayListWithCapacity( Constants.MAX_ALLOWED_SHARDS); AmazonS3Client s3Client = new AmazonS3Client(credentials); // List files and build the path using the s3n: prefix. // Note that keys > marker are retrieved where the > is by lexicographic order. String prefix = s3KeyPrefix; String marker = prefix; while (true) { boolean reachedEnd = false; ObjectListing listing = s3Client.listObjects(new ListObjectsRequest(). withBucketName(s3Bucket). withMarker(marker)); List<S3ObjectSummary> summaries = listing.getObjectSummaries(); if (summaries.isEmpty()) { break; } for (S3ObjectSummary summary: summaries) { if (summary.getKey().startsWith(prefix)) { fileSizePairList.add(new ImmutablePair(new Path("s3n", s3Bucket, "/" + summary.getKey()), summary.getSize())); if (fileSizePairList.size() > Constants.MAX_ALLOWED_SHARDS) { throw new RuntimeException("Too many files " + fileSizePairList.size()); } } else { // We found a key which does not match the prefix, stop. reachedEnd = true; break; } } if (reachedEnd) { break; } marker = summaries.get(summaries.size() - 1).getKey(); } return fileSizePairList; } public static void setupConfiguration(Configuration conf, long dfsBlockSize, int dfsReplication) { conf.setInt("mapred.map.max.attempts", Constants.MAPRED_MAP_MAX_ATTEMPTS); conf.setInt("io.bytes.per.checksum", Constants.CHECKSUM_BYTES); long dfsBlockSizeAdjusted = dfsBlockSize; if (dfsBlockSize % Constants.CHECKSUM_BYTES != 0) { dfsBlockSizeAdjusted = (dfsBlockSize / Constants.CHECKSUM_BYTES + 1) * Constants.CHECKSUM_BYTES; } conf.setLong("dfs.block.size", dfsBlockSizeAdjusted); conf.setInt("dfs.replication", dfsReplication); conf.set(Constants.HFILE_COMPRESSION, System.getProperty( Constants.HFILE_COMPRESSION, Constants.HFILE_COMPRESSION_DEFAULT)); conf.setInt(Constants.HFILE_BLOCKSIZE, Integer.parseInt( System.getProperty(Constants.HFILE_BLOCKSIZE, String.valueOf(Constants.HFILE_BLOCKSIZE_DEFAULT)))); } }