/** * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.pinterest.terrapin.hadoop; import com.pinterest.terrapin.Constants; import com.pinterest.terrapin.TerrapinUtil; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hbase.io.hfile.CacheConfig; import org.apache.hadoop.hbase.io.hfile.Compression; import org.apache.hadoop.hbase.regionserver.StoreFile; import org.apache.hadoop.io.BytesWritable; import org.apache.hadoop.mapreduce.RecordWriter; import org.apache.hadoop.mapreduce.TaskAttemptContext; import org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; import java.io.IOException; /** * HFileOutputFormat for outputting fingerprint mod sharded HFiles using a mapreduce * job. */ public class HFileOutputFormat extends FileOutputFormat<BytesWritable, BytesWritable> { /** * Returns the compression string. Defaults to SNAPPY compression. * * @param compressionString One of SNAPPY, GZ, LZO, LZ4 or NONE. * @return The corresponding Compression.Algorithm enum type. */ public static Compression.Algorithm getAlgorithm(String compressionString) { Compression.Algorithm compressionAlgo = Compression.Algorithm.SNAPPY; if (compressionString == null) { return compressionAlgo; } try { compressionAlgo = Compression.Algorithm.valueOf(compressionString); } catch (Throwable t) { // Use the default. return compressionAlgo; } return compressionAlgo; } /** * Returns full path of HFile * @param outputPath output directory for saving this HFile * @param partitionIndex index of partition * @return Full HFile path */ public static Path hfilePath(Path outputPath, int partitionIndex) { return new Path(outputPath, TerrapinUtil.formatPartitionName(partitionIndex)); } public RecordWriter<BytesWritable, BytesWritable> getRecordWriter( TaskAttemptContext context) throws IOException { // Get the path of the temporary output file final Path outputPath = FileOutputFormat.getOutputPath(context); final Path outputDir = new FileOutputCommitter(outputPath, context).getWorkPath(); final Configuration conf = context.getConfiguration(); final FileSystem fs = outputDir.getFileSystem(conf); int blockSize = conf.getInt(Constants.HFILE_BLOCKSIZE, 16384); // Default to snappy. Compression.Algorithm compressionAlgorithm = getAlgorithm( conf.get(Constants.HFILE_COMPRESSION)); final StoreFile.Writer writer = new StoreFile.WriterBuilder(conf, new CacheConfig(conf), fs, blockSize) .withFilePath(hfilePath(outputPath, context.getTaskAttemptID().getTaskID().getId())) .withCompression(compressionAlgorithm) .build(); return new HFileRecordWriter(writer); } }