/**
*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.io.hfile;
import java.io.ByteArrayInputStream;
import java.io.Closeable;
import java.io.DataInput;
import java.io.DataInputStream;
import java.io.DataOutputStream;
import java.io.IOException;
import java.io.SequenceInputStream;
import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Comparator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.SortedMap;
import java.util.TreeMap;
import java.util.concurrent.ArrayBlockingQueue;
import java.util.concurrent.BlockingQueue;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.atomic.AtomicLong;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.PathFilter;
import org.apache.hadoop.hbase.HColumnDescriptor;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.KeyValue.KeyComparator;
import org.apache.hadoop.hbase.fs.HFileSystem;
import org.apache.hadoop.hbase.io.compress.Compression;
import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos;
import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.BytesBytesPair;
import org.apache.hadoop.hbase.protobuf.generated.HFileProtos;
import org.apache.hadoop.hbase.util.BloomFilterWriter;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.util.ChecksumType;
import org.apache.hadoop.hbase.util.FSUtils;
import org.apache.hadoop.io.RawComparator;
import org.apache.hadoop.io.Writable;
import com.google.common.base.Preconditions;
import com.google.common.collect.Lists;
import com.google.protobuf.ByteString;
/**
* File format for hbase.
* A file of sorted key/value pairs. Both keys and values are byte arrays.
* <p>
* The memory footprint of a HFile includes the following (below is taken from the
* <a
* href=https://issues.apache.org/jira/browse/HADOOP-3315>TFile</a> documentation
* but applies also to HFile):
* <ul>
* <li>Some constant overhead of reading or writing a compressed block.
* <ul>
* <li>Each compressed block requires one compression/decompression codec for
* I/O.
* <li>Temporary space to buffer the key.
* <li>Temporary space to buffer the value.
* </ul>
* <li>HFile index, which is proportional to the total number of Data Blocks.
* The total amount of memory needed to hold the index can be estimated as
* (56+AvgKeySize)*NumBlocks.
* </ul>
* Suggestions on performance optimization.
* <ul>
* <li>Minimum block size. We recommend a setting of minimum block size between
* 8KB to 1MB for general usage. Larger block size is preferred if files are
* primarily for sequential access. However, it would lead to inefficient random
* access (because there are more data to decompress). Smaller blocks are good
* for random access, but require more memory to hold the block index, and may
* be slower to create (because we must flush the compressor stream at the
* conclusion of each data block, which leads to an FS I/O flush). Further, due
* to the internal caching in Compression codec, the smallest possible block
* size would be around 20KB-30KB.
* <li>The current implementation does not offer true multi-threading for
* reading. The implementation uses FSDataInputStream seek()+read(), which is
* shown to be much faster than positioned-read call in single thread mode.
* However, it also means that if multiple threads attempt to access the same
* HFile (using multiple scanners) simultaneously, the actual I/O is carried out
* sequentially even if they access different DFS blocks (Reexamine! pread seems
* to be 10% faster than seek+read in my testing -- stack).
* <li>Compression codec. Use "none" if the data is not very compressable (by
* compressable, I mean a compression ratio at least 2:1). Generally, use "lzo"
* as the starting point for experimenting. "gz" overs slightly better
* compression ratio over "lzo" but requires 4x CPU to compress and 2x CPU to
* decompress, comparing to "lzo".
* </ul>
*
* For more on the background behind HFile, see <a
* href=https://issues.apache.org/jira/browse/HBASE-61>HBASE-61</a>.
* <p>
* File is made of data blocks followed by meta data blocks (if any), a fileinfo
* block, data block index, meta data block index, and a fixed size trailer
* which records the offsets at which file changes content type.
* <pre><data blocks><meta blocks><fileinfo><data index><meta index><trailer></pre>
* Each block has a bit of magic at its start. Block are comprised of
* key/values. In data blocks, they are both byte arrays. Metadata blocks are
* a String key and a byte array value. An empty file looks like this:
* <pre><fileinfo><trailer></pre>. That is, there are not data nor meta
* blocks present.
* <p>
* TODO: Do scanners need to be able to take a start and end row?
* TODO: Should BlockIndex know the name of its file? Should it have a Path
* that points at its file say for the case where an index lives apart from
* an HFile instance?
*/
@InterfaceAudience.Private
public class HFile {
static final Log LOG = LogFactory.getLog(HFile.class);
/**
* Maximum length of key in HFile.
*/
public final static int MAXIMUM_KEY_LENGTH = Integer.MAX_VALUE;
/**
* Default block size for an HFile.
*/
public final static int DEFAULT_BLOCKSIZE = 64 * 1024;
/**
* Default compression: none.
*/
public final static Compression.Algorithm DEFAULT_COMPRESSION_ALGORITHM =
Compression.Algorithm.NONE;
/** Minimum supported HFile format version */
public static final int MIN_FORMAT_VERSION = 1;
/** Maximum supported HFile format version
*/
public static final int MAX_FORMAT_VERSION = 2;
/** Default compression name: none. */
public final static String DEFAULT_COMPRESSION =
DEFAULT_COMPRESSION_ALGORITHM.getName();
/**
* We assume that HFile path ends with
* ROOT_DIR/TABLE_NAME/REGION_NAME/CF_NAME/HFILE, so it has at least this
* many levels of nesting. This is needed for identifying table and CF name
* from an HFile path.
*/
public final static int MIN_NUM_HFILE_PATH_LEVELS = 5;
/**
* The number of bytes per checksum.
*/
public static final int DEFAULT_BYTES_PER_CHECKSUM = 16 * 1024;
public static final ChecksumType DEFAULT_CHECKSUM_TYPE = ChecksumType.CRC32;
// For measuring latency of "sequential" reads and writes
private static final AtomicInteger readOps = new AtomicInteger();
private static final AtomicLong readTimeNano = new AtomicLong();
private static final AtomicInteger writeOps = new AtomicInteger();
private static final AtomicLong writeTimeNano = new AtomicLong();
// For measuring latency of pread
private static final AtomicInteger preadOps = new AtomicInteger();
private static final AtomicLong preadTimeNano = new AtomicLong();
// For measuring number of checksum failures
static final AtomicLong checksumFailures = new AtomicLong();
// For getting more detailed stats on FS latencies
// If, for some reason, the metrics subsystem stops polling for latencies,
// I don't want data to pile up in a memory leak
// so, after LATENCY_BUFFER_SIZE items have been enqueued for processing,
// fs latency stats will be dropped (and this behavior will be logged)
private static final int LATENCY_BUFFER_SIZE = 5000;
private static final BlockingQueue<Long> fsReadLatenciesNanos =
new ArrayBlockingQueue<Long>(LATENCY_BUFFER_SIZE);
private static final BlockingQueue<Long> fsWriteLatenciesNanos =
new ArrayBlockingQueue<Long>(LATENCY_BUFFER_SIZE);
private static final BlockingQueue<Long> fsPreadLatenciesNanos =
new ArrayBlockingQueue<Long>(LATENCY_BUFFER_SIZE);
public static final void offerReadLatency(long latencyNanos, boolean pread) {
if (pread) {
fsPreadLatenciesNanos.offer(latencyNanos); // might be silently dropped, if the queue is full
preadOps.incrementAndGet();
preadTimeNano.addAndGet(latencyNanos);
} else {
fsReadLatenciesNanos.offer(latencyNanos); // might be silently dropped, if the queue is full
readTimeNano.addAndGet(latencyNanos);
readOps.incrementAndGet();
}
}
public static final void offerWriteLatency(long latencyNanos) {
fsWriteLatenciesNanos.offer(latencyNanos); // might be silently dropped, if the queue is full
writeTimeNano.addAndGet(latencyNanos);
writeOps.incrementAndGet();
}
public static final Collection<Long> getReadLatenciesNanos() {
final List<Long> latencies =
Lists.newArrayListWithCapacity(fsReadLatenciesNanos.size());
fsReadLatenciesNanos.drainTo(latencies);
return latencies;
}
public static final Collection<Long> getPreadLatenciesNanos() {
final List<Long> latencies =
Lists.newArrayListWithCapacity(fsPreadLatenciesNanos.size());
fsPreadLatenciesNanos.drainTo(latencies);
return latencies;
}
public static final Collection<Long> getWriteLatenciesNanos() {
final List<Long> latencies =
Lists.newArrayListWithCapacity(fsWriteLatenciesNanos.size());
fsWriteLatenciesNanos.drainTo(latencies);
return latencies;
}
// for test purpose
public static volatile AtomicLong dataBlockReadCnt = new AtomicLong(0);
// number of sequential reads
public static final int getReadOps() {
return readOps.getAndSet(0);
}
public static final long getReadTimeMs() {
return readTimeNano.getAndSet(0) / 1000000;
}
// number of positional reads
public static final int getPreadOps() {
return preadOps.getAndSet(0);
}
public static final long getPreadTimeMs() {
return preadTimeNano.getAndSet(0) / 1000000;
}
public static final int getWriteOps() {
return writeOps.getAndSet(0);
}
public static final long getWriteTimeMs() {
return writeTimeNano.getAndSet(0) / 1000000;
}
/**
* Number of checksum verification failures. It also
* clears the counter.
*/
public static final long getChecksumFailuresCount() {
return checksumFailures.getAndSet(0);
}
/** API required to write an {@link HFile} */
public interface Writer extends Closeable {
/** Add an element to the file info map. */
void appendFileInfo(byte[] key, byte[] value) throws IOException;
void append(KeyValue kv) throws IOException;
void append(byte[] key, byte[] value) throws IOException;
/** @return the path to this {@link HFile} */
Path getPath();
/**
* Adds an inline block writer such as a multi-level block index writer or
* a compound Bloom filter writer.
*/
void addInlineBlockWriter(InlineBlockWriter bloomWriter);
// The below three methods take Writables. We'd like to undo Writables but undoing the below would be pretty
// painful. Could take a byte [] or a Message but we want to be backward compatible around hfiles so would need
// to map between Message and Writable or byte [] and current Writable serialization. This would be a bit of work
// to little gain. Thats my thinking at moment. St.Ack 20121129
void appendMetaBlock(String bloomFilterMetaKey, Writable metaWriter);
/**
* Store general Bloom filter in the file. This does not deal with Bloom filter
* internals but is necessary, since Bloom filters are stored differently
* in HFile version 1 and version 2.
*/
void addGeneralBloomFilter(BloomFilterWriter bfw);
/**
* Store delete family Bloom filter in the file, which is only supported in
* HFile V2.
*/
void addDeleteFamilyBloomFilter(BloomFilterWriter bfw) throws IOException;
}
/**
* This variety of ways to construct writers is used throughout the code, and
* we want to be able to swap writer implementations.
*/
public static abstract class WriterFactory {
protected final Configuration conf;
protected final CacheConfig cacheConf;
protected FileSystem fs;
protected Path path;
protected FSDataOutputStream ostream;
protected int blockSize = HColumnDescriptor.DEFAULT_BLOCKSIZE;
protected Compression.Algorithm compression =
HFile.DEFAULT_COMPRESSION_ALGORITHM;
protected HFileDataBlockEncoder encoder = NoOpDataBlockEncoder.INSTANCE;
protected KeyComparator comparator;
protected ChecksumType checksumType = HFile.DEFAULT_CHECKSUM_TYPE;
protected int bytesPerChecksum = DEFAULT_BYTES_PER_CHECKSUM;
WriterFactory(Configuration conf, CacheConfig cacheConf) {
this.conf = conf;
this.cacheConf = cacheConf;
}
public WriterFactory withPath(FileSystem fs, Path path) {
Preconditions.checkNotNull(fs);
Preconditions.checkNotNull(path);
this.fs = fs;
this.path = path;
return this;
}
public WriterFactory withOutputStream(FSDataOutputStream ostream) {
Preconditions.checkNotNull(ostream);
this.ostream = ostream;
return this;
}
public WriterFactory withBlockSize(int blockSize) {
this.blockSize = blockSize;
return this;
}
public WriterFactory withCompression(Compression.Algorithm compression) {
Preconditions.checkNotNull(compression);
this.compression = compression;
return this;
}
public WriterFactory withCompression(String compressAlgo) {
Preconditions.checkNotNull(compression);
this.compression = AbstractHFileWriter.compressionByName(compressAlgo);
return this;
}
public WriterFactory withDataBlockEncoder(HFileDataBlockEncoder encoder) {
Preconditions.checkNotNull(encoder);
this.encoder = encoder;
return this;
}
public WriterFactory withComparator(KeyComparator comparator) {
Preconditions.checkNotNull(comparator);
this.comparator = comparator;
return this;
}
public WriterFactory withChecksumType(ChecksumType checksumType) {
Preconditions.checkNotNull(checksumType);
this.checksumType = checksumType;
return this;
}
public WriterFactory withBytesPerChecksum(int bytesPerChecksum) {
this.bytesPerChecksum = bytesPerChecksum;
return this;
}
public Writer create() throws IOException {
if ((path != null ? 1 : 0) + (ostream != null ? 1 : 0) != 1) {
throw new AssertionError("Please specify exactly one of " +
"filesystem/path or path");
}
if (path != null) {
ostream = AbstractHFileWriter.createOutputStream(conf, fs, path);
}
return createWriter(fs, path, ostream, blockSize,
compression, encoder, comparator, checksumType, bytesPerChecksum);
}
protected abstract Writer createWriter(FileSystem fs, Path path,
FSDataOutputStream ostream, int blockSize,
Compression.Algorithm compress,
HFileDataBlockEncoder dataBlockEncoder,
KeyComparator comparator, ChecksumType checksumType,
int bytesPerChecksum) throws IOException;
}
/** The configuration key for HFile version to use for new files */
public static final String FORMAT_VERSION_KEY = "hfile.format.version";
public static int getFormatVersion(Configuration conf) {
int version = conf.getInt(FORMAT_VERSION_KEY, MAX_FORMAT_VERSION);
checkFormatVersion(version);
return version;
}
/**
* Returns the factory to be used to create {@link HFile} writers.
* Disables block cache access for all writers created through the
* returned factory.
*/
public static final WriterFactory getWriterFactoryNoCache(Configuration
conf) {
Configuration tempConf = new Configuration(conf);
tempConf.setFloat(HConstants.HFILE_BLOCK_CACHE_SIZE_KEY, 0.0f);
return HFile.getWriterFactory(conf, new CacheConfig(tempConf));
}
/**
* Returns the factory to be used to create {@link HFile} writers
*/
public static final WriterFactory getWriterFactory(Configuration conf,
CacheConfig cacheConf) {
int version = getFormatVersion(conf);
switch (version) {
case 1:
return new HFileWriterV1.WriterFactoryV1(conf, cacheConf);
case 2:
return new HFileWriterV2.WriterFactoryV2(conf, cacheConf);
default:
throw new IllegalArgumentException("Cannot create writer for HFile " +
"format version " + version);
}
}
/** An abstraction used by the block index */
public interface CachingBlockReader {
HFileBlock readBlock(long offset, long onDiskBlockSize,
boolean cacheBlock, final boolean pread, final boolean isCompaction,
BlockType expectedBlockType, int customId)
throws IOException;
HFileBlock readBlock(long offset, long onDiskBlockSize,
boolean cacheBlock, final boolean pread, final boolean isCompaction,
BlockType expectedBlockType)
throws IOException;
}
/** An interface used by clients to open and iterate an {@link HFile}. */
public interface Reader extends Closeable, CachingBlockReader {
/**
* Returns this reader's "name". Usually the last component of the path.
* Needs to be constant as the file is being moved to support caching on
* write.
*/
String getName();
RawComparator<byte []> getComparator();
HFileScanner getScanner(boolean cacheBlocks,
final boolean pread, final boolean isCompaction);
ByteBuffer getMetaBlock(String metaBlockName,
boolean cacheBlock, int customId) throws IOException;
Map<byte[], byte[]> loadFileInfo() throws IOException;
byte[] getLastKey();
byte[] midkey() throws IOException;
long length();
long getEntries();
byte[] getFirstKey();
long indexSize();
byte[] getFirstRowKey();
byte[] getLastRowKey();
FixedFileTrailer getTrailer();
HFileBlockIndex.BlockIndexReader getDataBlockIndexReader();
HFileScanner getScanner(boolean cacheBlocks, boolean pread);
Compression.Algorithm getCompressionAlgorithm();
/**
* Retrieves general Bloom filter metadata as appropriate for each
* {@link HFile} version.
* Knows nothing about how that metadata is structured.
*/
DataInput getGeneralBloomFilterMetadata() throws IOException;
/**
* Retrieves delete family Bloom filter metadata as appropriate for each
* {@link HFile} version.
* Knows nothing about how that metadata is structured.
*/
DataInput getDeleteBloomFilterMetadata() throws IOException;
Path getPath();
/** Close method with optional evictOnClose */
void close(boolean evictOnClose) throws IOException;
DataBlockEncoding getEncodingOnDisk();
}
/**
* Method returns the reader given the specified arguments.
* TODO This is a bad abstraction. See HBASE-6635.
*
* @param path hfile's path
* @param fsdis an open checksummed stream of path's file
* @param fsdisNoFsChecksum an open unchecksummed stream of path's file
* @param size max size of the trailer.
* @param closeIStream boolean for closing file after the getting the reader version.
* @param cacheConf Cache configuation values, cannot be null.
* @param preferredEncodingInCache
* @param hfs
* @return an appropriate instance of HFileReader
* @throws IOException If file is invalid, will throw CorruptHFileException flavored IOException
*/
private static Reader pickReaderVersion(Path path, FSDataInputStream fsdis,
FSDataInputStream fsdisNoFsChecksum,
long size, boolean closeIStream, CacheConfig cacheConf,
DataBlockEncoding preferredEncodingInCache, HFileSystem hfs)
throws IOException {
FixedFileTrailer trailer = null;
try {
trailer = FixedFileTrailer.readFromStream(fsdis, size);
} catch (IllegalArgumentException iae) {
throw new CorruptHFileException("Problem reading HFile Trailer from file " + path, iae);
}
switch (trailer.getMajorVersion()) {
case 1:
return new HFileReaderV1(path, trailer, fsdis, size, closeIStream,
cacheConf);
case 2:
return new HFileReaderV2(path, trailer, fsdis, fsdisNoFsChecksum,
size, closeIStream,
cacheConf, preferredEncodingInCache, hfs);
default:
throw new CorruptHFileException("Invalid HFile version " + trailer.getMajorVersion());
}
}
/**
* @param fs A file system
* @param path Path to HFile
* @param cacheConf Cache configuration for hfile's contents
* @param preferredEncodingInCache Preferred in-cache data encoding algorithm.
* @return A version specific Hfile Reader
* @throws IOException If file is invalid, will throw CorruptHFileException flavored IOException
*/
public static Reader createReaderWithEncoding(
FileSystem fs, Path path, CacheConfig cacheConf,
DataBlockEncoding preferredEncodingInCache) throws IOException {
final boolean closeIStream = true;
HFileSystem hfs = null;
FSDataInputStream fsdis = fs.open(path);
FSDataInputStream fsdisNoFsChecksum = fsdis;
// If the fs is not an instance of HFileSystem, then create an
// instance of HFileSystem that wraps over the specified fs.
// In this case, we will not be able to avoid checksumming inside
// the filesystem.
if (!(fs instanceof HFileSystem)) {
hfs = new HFileSystem(fs);
} else {
hfs = (HFileSystem)fs;
// open a stream to read data without checksum verification in
// the filesystem
fsdisNoFsChecksum = hfs.getNoChecksumFs().open(path);
}
return pickReaderVersion(path, fsdis, fsdisNoFsChecksum,
fs.getFileStatus(path).getLen(), closeIStream, cacheConf,
preferredEncodingInCache, hfs);
}
/**
* @param fs A file system
* @param path Path to HFile
* @param fsdis an open checksummed stream of path's file
* @param fsdisNoFsChecksum an open unchecksummed stream of path's file
* @param size max size of the trailer.
* @param cacheConf Cache configuration for hfile's contents
* @param preferredEncodingInCache Preferred in-cache data encoding algorithm.
* @param closeIStream boolean for closing file after the getting the reader version.
* @return A version specific Hfile Reader
* @throws IOException If file is invalid, will throw CorruptHFileException flavored IOException
*/
public static Reader createReaderWithEncoding(
FileSystem fs, Path path, FSDataInputStream fsdis,
FSDataInputStream fsdisNoFsChecksum, long size, CacheConfig cacheConf,
DataBlockEncoding preferredEncodingInCache, boolean closeIStream)
throws IOException {
HFileSystem hfs = null;
// If the fs is not an instance of HFileSystem, then create an
// instance of HFileSystem that wraps over the specified fs.
// In this case, we will not be able to avoid checksumming inside
// the filesystem.
if (!(fs instanceof HFileSystem)) {
hfs = new HFileSystem(fs);
} else {
hfs = (HFileSystem)fs;
}
return pickReaderVersion(path, fsdis, fsdisNoFsChecksum, size,
closeIStream, cacheConf,
preferredEncodingInCache, hfs);
}
/**
*
* @param fs filesystem
* @param path Path to file to read
* @param cacheConf This must not be null. @see {@link org.apache.hadoop.hbase.io.hfile.CacheConfig#CacheConfig(Configuration)}
* @return an active Reader instance
* @throws IOException Will throw a CorruptHFileException (DoNotRetryIOException subtype) if hfile is corrupt/invalid.
*/
public static Reader createReader(
FileSystem fs, Path path, CacheConfig cacheConf) throws IOException {
Preconditions.checkNotNull(cacheConf, "Cannot create Reader with null CacheConf");
return createReaderWithEncoding(fs, path, cacheConf,
DataBlockEncoding.NONE);
}
/**
* This factory method is used only by unit tests
*/
static Reader createReaderFromStream(Path path,
FSDataInputStream fsdis, long size, CacheConfig cacheConf)
throws IOException {
final boolean closeIStream = false;
return pickReaderVersion(path, fsdis, fsdis, size, closeIStream, cacheConf,
DataBlockEncoding.NONE, null);
}
/**
* Metadata for this file. Conjured by the writer. Read in by the reader.
*/
static class FileInfo implements SortedMap<byte [], byte []> {
static final String RESERVED_PREFIX = "hfile.";
static final byte[] RESERVED_PREFIX_BYTES = Bytes.toBytes(RESERVED_PREFIX);
static final byte [] LASTKEY = Bytes.toBytes(RESERVED_PREFIX + "LASTKEY");
static final byte [] AVG_KEY_LEN = Bytes.toBytes(RESERVED_PREFIX + "AVG_KEY_LEN");
static final byte [] AVG_VALUE_LEN = Bytes.toBytes(RESERVED_PREFIX + "AVG_VALUE_LEN");
static final byte [] COMPARATOR = Bytes.toBytes(RESERVED_PREFIX + "COMPARATOR");
private final SortedMap<byte [], byte []> map = new TreeMap<byte [], byte []>(Bytes.BYTES_COMPARATOR);
public FileInfo() {
super();
}
/**
* Append the given key/value pair to the file info, optionally checking the
* key prefix.
*
* @param k key to add
* @param v value to add
* @param checkPrefix whether to check that the provided key does not start
* with the reserved prefix
* @return this file info object
* @throws IOException if the key or value is invalid
*/
public FileInfo append(final byte[] k, final byte[] v,
final boolean checkPrefix) throws IOException {
if (k == null || v == null) {
throw new NullPointerException("Key nor value may be null");
}
if (checkPrefix && isReservedFileInfoKey(k)) {
throw new IOException("Keys with a " + FileInfo.RESERVED_PREFIX
+ " are reserved");
}
put(k, v);
return this;
}
public void clear() {
this.map.clear();
}
public Comparator<? super byte[]> comparator() {
return map.comparator();
}
public boolean containsKey(Object key) {
return map.containsKey(key);
}
public boolean containsValue(Object value) {
return map.containsValue(value);
}
public Set<java.util.Map.Entry<byte[], byte[]>> entrySet() {
return map.entrySet();
}
public boolean equals(Object o) {
return map.equals(o);
}
public byte[] firstKey() {
return map.firstKey();
}
public byte[] get(Object key) {
return map.get(key);
}
public int hashCode() {
return map.hashCode();
}
public SortedMap<byte[], byte[]> headMap(byte[] toKey) {
return this.map.headMap(toKey);
}
public boolean isEmpty() {
return map.isEmpty();
}
public Set<byte[]> keySet() {
return map.keySet();
}
public byte[] lastKey() {
return map.lastKey();
}
public byte[] put(byte[] key, byte[] value) {
return this.map.put(key, value);
}
public void putAll(Map<? extends byte[], ? extends byte[]> m) {
this.map.putAll(m);
}
public byte[] remove(Object key) {
return this.map.remove(key);
}
public int size() {
return map.size();
}
public SortedMap<byte[], byte[]> subMap(byte[] fromKey, byte[] toKey) {
return this.map.subMap(fromKey, toKey);
}
public SortedMap<byte[], byte[]> tailMap(byte[] fromKey) {
return this.map.tailMap(fromKey);
}
public Collection<byte[]> values() {
return map.values();
}
/**
* Write out this instance on the passed in <code>out</code> stream.
* We write it as a protobuf.
* @param out
* @throws IOException
* @see {@link #read(DataInputStream)}
*/
void write(final DataOutputStream out) throws IOException {
HFileProtos.FileInfoProto.Builder builder = HFileProtos.FileInfoProto.newBuilder();
for (Map.Entry<byte [], byte[]> e: this.map.entrySet()) {
HBaseProtos.BytesBytesPair.Builder bbpBuilder = HBaseProtos.BytesBytesPair.newBuilder();
bbpBuilder.setFirst(ByteString.copyFrom(e.getKey()));
bbpBuilder.setSecond(ByteString.copyFrom(e.getValue()));
builder.addMapEntry(bbpBuilder.build());
}
out.write(ProtobufUtil.PB_MAGIC);
builder.build().writeDelimitedTo(out);
}
/**
* Populate this instance with what we find on the passed in <code>in</code> stream.
* Can deserialize protobuf of old Writables format.
* @param in
* @throws IOException
* @see {@link #write(DataOutputStream)}
*/
void read(final DataInputStream in) throws IOException {
// This code is tested over in TestHFileReaderV1 where we read an old hfile w/ this new code.
int pblen = ProtobufUtil.lengthOfPBMagic();
byte [] pbuf = new byte[pblen];
if (in.markSupported()) in.mark(pblen);
int read = in.read(pbuf);
if (read != pblen) throw new IOException("read=" + read + ", wanted=" + pblen);
if (ProtobufUtil.isPBMagicPrefix(pbuf)) {
parsePB(HFileProtos.FileInfoProto.parseDelimitedFrom(in));
} else {
if (in.markSupported()) {
in.reset();
parseWritable(in);
} else {
// We cannot use BufferedInputStream, it consumes more than we read from the underlying IS
ByteArrayInputStream bais = new ByteArrayInputStream(pbuf);
SequenceInputStream sis = new SequenceInputStream(bais, in); // Concatenate input streams
// TODO: Am I leaking anything here wrapping the passed in stream? We are not calling close on the wrapped
// streams but they should be let go after we leave this context? I see that we keep a reference to the
// passed in inputstream but since we no longer have a reference to this after we leave, we should be ok.
parseWritable(new DataInputStream(sis));
}
}
}
/** Now parse the old Writable format. It was a list of Map entries. Each map entry was a key and a value of
* a byte []. The old map format had a byte before each entry that held a code which was short for the key or
* value type. We know it was a byte [] so in below we just read and dump it.
* @throws IOException
*/
void parseWritable(final DataInputStream in) throws IOException {
// First clear the map. Otherwise we will just accumulate entries every time this method is called.
this.map.clear();
// Read the number of entries in the map
int entries = in.readInt();
// Then read each key/value pair
for (int i = 0; i < entries; i++) {
byte [] key = Bytes.readByteArray(in);
// We used to read a byte that encoded the class type. Read and ignore it because it is always byte [] in hfile
in.readByte();
byte [] value = Bytes.readByteArray(in);
this.map.put(key, value);
}
}
/**
* Fill our map with content of the pb we read off disk
* @param fip protobuf message to read
*/
void parsePB(final HFileProtos.FileInfoProto fip) {
this.map.clear();
for (BytesBytesPair pair: fip.getMapEntryList()) {
this.map.put(pair.getFirst().toByteArray(), pair.getSecond().toByteArray());
}
}
}
/** Return true if the given file info key is reserved for internal use. */
public static boolean isReservedFileInfoKey(byte[] key) {
return Bytes.startsWith(key, FileInfo.RESERVED_PREFIX_BYTES);
}
/**
* Get names of supported compression algorithms. The names are acceptable by
* HFile.Writer.
*
* @return Array of strings, each represents a supported compression
* algorithm. Currently, the following compression algorithms are
* supported.
* <ul>
* <li>"none" - No compression.
* <li>"gz" - GZIP compression.
* </ul>
*/
public static String[] getSupportedCompressionAlgorithms() {
return Compression.getSupportedAlgorithms();
}
// Utility methods.
/*
* @param l Long to convert to an int.
* @return <code>l</code> cast as an int.
*/
static int longToInt(final long l) {
// Expecting the size() of a block not exceeding 4GB. Assuming the
// size() will wrap to negative integer if it exceeds 2GB (From tfile).
return (int)(l & 0x00000000ffffffffL);
}
/**
* Returns all files belonging to the given region directory. Could return an
* empty list.
*
* @param fs The file system reference.
* @param regionDir The region directory to scan.
* @return The list of files found.
* @throws IOException When scanning the files fails.
*/
static List<Path> getStoreFiles(FileSystem fs, Path regionDir)
throws IOException {
List<Path> res = new ArrayList<Path>();
PathFilter dirFilter = new FSUtils.DirFilter(fs);
FileStatus[] familyDirs = fs.listStatus(regionDir, dirFilter);
for(FileStatus dir : familyDirs) {
FileStatus[] files = fs.listStatus(dir.getPath());
for (FileStatus file : files) {
if (!file.isDir()) {
res.add(file.getPath());
}
}
}
return res;
}
public static void main(String[] args) throws IOException {
HFilePrettyPrinter prettyPrinter = new HFilePrettyPrinter();
System.exit(prettyPrinter.run(args));
}
/**
* Checks the given {@link HFile} format version, and throws an exception if
* invalid. Note that if the version number comes from an input file and has
* not been verified, the caller needs to re-throw an {@link IOException} to
* indicate that this is not a software error, but corrupted input.
*
* @param version an HFile version
* @throws IllegalArgumentException if the version is invalid
*/
public static void checkFormatVersion(int version)
throws IllegalArgumentException {
if (version < MIN_FORMAT_VERSION || version > MAX_FORMAT_VERSION) {
throw new IllegalArgumentException("Invalid HFile version: " + version
+ " (expected to be " + "between " + MIN_FORMAT_VERSION + " and "
+ MAX_FORMAT_VERSION + ")");
}
}
}