/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with this
* work for additional information regarding copyright ownership. The ASF
* licenses this file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations
* under the License.
*/
package org.apache.hadoop.hbase.io.hfile;
import java.io.ByteArrayOutputStream;
import java.io.DataOutputStream;
import java.io.IOException;
import java.nio.ByteBuffer;
import org.apache.hadoop.hbase.io.encoding.DataBlockEncoder;
import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
import org.apache.hadoop.hbase.io.hfile.HFileBlock;
import org.apache.hadoop.hbase.io.hfile.HFile.FileInfo;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.util.Pair;
import com.google.common.base.Preconditions;
/**
* Do different kinds of data block encoding according to column family
* options.
*/
public class HFileDataBlockEncoderImpl implements HFileDataBlockEncoder {
private final DataBlockEncoding onDisk;
private final DataBlockEncoding inCache;
public HFileDataBlockEncoderImpl(DataBlockEncoding encoding) {
this(encoding, encoding);
}
/**
* Do data block encoding with specified options.
* @param onDisk What kind of data block encoding will be used before writing
* HFileBlock to disk. This must be either the same as inCache or
* {@link DataBlockEncoding#NONE}.
* @param inCache What kind of data block encoding will be used in block
* cache.
*/
public HFileDataBlockEncoderImpl(DataBlockEncoding onDisk,
DataBlockEncoding inCache) {
this.onDisk = onDisk != null ?
onDisk : DataBlockEncoding.NONE;
this.inCache = inCache != null ?
inCache : DataBlockEncoding.NONE;
Preconditions.checkArgument(onDisk == DataBlockEncoding.NONE ||
onDisk == inCache, "on-disk encoding (" + onDisk + ") must be " +
"either the same as in-cache encoding (" + inCache + ") or " +
DataBlockEncoding.NONE);
}
public static HFileDataBlockEncoder createFromFileInfo(
FileInfo fileInfo, DataBlockEncoding preferredEncodingInCache)
throws IOException {
boolean hasPreferredCacheEncoding = preferredEncodingInCache != null
&& preferredEncodingInCache != DataBlockEncoding.NONE;
byte[] dataBlockEncodingType = fileInfo.get(DATA_BLOCK_ENCODING);
if (dataBlockEncodingType == null && !hasPreferredCacheEncoding) {
return NoOpDataBlockEncoder.INSTANCE;
}
DataBlockEncoding onDisk;
if (dataBlockEncodingType == null) {
onDisk = DataBlockEncoding.NONE;
}else {
String dataBlockEncodingStr = Bytes.toString(dataBlockEncodingType);
try {
onDisk = DataBlockEncoding.valueOf(dataBlockEncodingStr);
} catch (IllegalArgumentException ex) {
throw new IOException("Invalid data block encoding type in file info: "
+ dataBlockEncodingStr, ex);
}
}
DataBlockEncoding inCache;
if (onDisk == DataBlockEncoding.NONE) {
// This is an "in-cache-only" encoding or fully-unencoded scenario.
// Either way, we use the given encoding (possibly NONE) specified by
// the column family in cache.
inCache = preferredEncodingInCache;
} else {
// Leave blocks in cache encoded the same way as they are on disk.
// If we switch encoding type for the CF or the in-cache-only encoding
// flag, old files will keep their encoding both on disk and in cache,
// but new files will be generated with the new encoding.
inCache = onDisk;
}
return new HFileDataBlockEncoderImpl(onDisk, inCache);
}
@Override
public void saveMetadata(HFile.Writer writer) throws IOException {
writer.appendFileInfo(DATA_BLOCK_ENCODING, onDisk.getNameInBytes());
}
@Override
public DataBlockEncoding getEncodingOnDisk() {
return onDisk;
}
@Override
public DataBlockEncoding getEncodingInCache() {
return inCache;
}
@Override
public DataBlockEncoding getEffectiveEncodingInCache(boolean isCompaction) {
if (!useEncodedScanner(isCompaction)) {
return DataBlockEncoding.NONE;
}
return inCache;
}
@Override
public HFileBlock diskToCacheFormat(HFileBlock block, boolean isCompaction) {
if (block.getBlockType() == BlockType.DATA) {
if (!useEncodedScanner(isCompaction)) {
// Unencoded block, and we don't want to encode in cache.
return block;
}
// Encode the unencoded block with the in-cache encoding.
return encodeDataBlock(block, inCache, block.doesIncludeMemstoreTS());
}
if (block.getBlockType() == BlockType.ENCODED_DATA) {
if (block.getDataBlockEncodingId() == onDisk.getId()) {
// The block is already in the desired in-cache encoding.
return block;
}
// We don't want to re-encode a block in a different encoding. The HFile
// reader should have been instantiated in such a way that we would not
// have to do this.
throw new AssertionError("Expected on-disk data block encoding " +
onDisk + ", got " + block.getDataBlockEncoding());
}
return block;
}
/**
* Precondition: a non-encoded buffer.
* Postcondition: on-disk encoding.
*/
@Override
public Pair<ByteBuffer, BlockType> beforeWriteToDisk(ByteBuffer in,
boolean includesMemstoreTS, byte[] dummyHeader) {
if (onDisk == DataBlockEncoding.NONE) {
// there is no need to encode the block before writing it to disk
return new Pair<ByteBuffer, BlockType>(in, BlockType.DATA);
}
ByteBuffer encodedBuffer = encodeBufferToHFileBlockBuffer(in,
onDisk, includesMemstoreTS, dummyHeader);
return new Pair<ByteBuffer, BlockType>(encodedBuffer,
BlockType.ENCODED_DATA);
}
@Override
public boolean useEncodedScanner(boolean isCompaction) {
if (isCompaction && onDisk == DataBlockEncoding.NONE) {
return false;
}
return inCache != DataBlockEncoding.NONE;
}
private ByteBuffer encodeBufferToHFileBlockBuffer(ByteBuffer in,
DataBlockEncoding algo, boolean includesMemstoreTS,
byte[] dummyHeader) {
ByteArrayOutputStream encodedStream = new ByteArrayOutputStream();
DataOutputStream dataOut = new DataOutputStream(encodedStream);
DataBlockEncoder encoder = algo.getEncoder();
try {
encodedStream.write(dummyHeader);
algo.writeIdInBytes(dataOut);
encoder.compressKeyValues(dataOut, in,
includesMemstoreTS);
} catch (IOException e) {
throw new RuntimeException(String.format("Bug in data block encoder " +
"'%s', it probably requested too much data", algo.toString()), e);
}
return ByteBuffer.wrap(encodedStream.toByteArray());
}
private HFileBlock encodeDataBlock(HFileBlock block,
DataBlockEncoding algo, boolean includesMemstoreTS) {
ByteBuffer compressedBuffer = encodeBufferToHFileBlockBuffer(
block.getBufferWithoutHeader(), algo, includesMemstoreTS,
block.getDummyHeaderForVersion());
int sizeWithoutHeader = compressedBuffer.limit() - block.headerSize();
HFileBlock encodedBlock = new HFileBlock(BlockType.ENCODED_DATA,
block.getOnDiskSizeWithoutHeader(),
sizeWithoutHeader, block.getPrevBlockOffset(),
compressedBuffer, HFileBlock.FILL_HEADER, block.getOffset(),
includesMemstoreTS, block.getMinorVersion(),
block.getBytesPerChecksum(), block.getChecksumType(),
block.getOnDiskDataSizeWithHeader());
block.passSchemaMetricsTo(encodedBlock);
return encodedBlock;
}
@Override
public String toString() {
return getClass().getSimpleName() + "(onDisk=" + onDisk + ", inCache=" +
inCache + ")";
}
}