/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with this
* work for additional information regarding copyright ownership. The ASF
* licenses this file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations
* under the License.
*/
package org.apache.hadoop.hbase.io.encoding;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.DataInputStream;
import java.io.DataOutputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.nio.ByteBuffer;
import java.util.Iterator;
import org.apache.commons.lang.NotImplementedException;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.io.compress.Compression;
import org.apache.hadoop.hbase.io.compress.Compression.Algorithm;
import org.apache.hadoop.hbase.io.hfile.HFileBlock;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.compress.Compressor;
import com.google.common.base.Preconditions;
import com.google.common.io.NullOutputStream;
/**
* Encapsulates a data block compressed using a particular encoding algorithm.
* Useful for testing and benchmarking.
*/
@InterfaceAudience.Private
public class EncodedDataBlock {
private byte[] rawKVs;
private ByteBuffer rawBuffer;
private DataBlockEncoder dataBlockEncoder;
private byte[] cachedEncodedData;
private boolean includesMemstoreTS;
private final HFileBlockEncodingContext encodingCtx;
/**
* Create a buffer which will be encoded using dataBlockEncoder.
* @param dataBlockEncoder Algorithm used for compression.
* @param encoding encoding type used
* @param rawKVs
*/
public EncodedDataBlock(DataBlockEncoder dataBlockEncoder,
boolean includesMemstoreTS, DataBlockEncoding encoding, byte[] rawKVs) {
Preconditions.checkNotNull(encoding,
"Cannot create encoded data block with null encoder");
this.dataBlockEncoder = dataBlockEncoder;
encodingCtx =
dataBlockEncoder.newDataBlockEncodingContext(Compression.Algorithm.NONE,
encoding, HFileBlock.DUMMY_HEADER);
this.rawKVs = rawKVs;
}
/**
* Provides access to compressed value.
* @return Forwards sequential iterator.
*/
public Iterator<KeyValue> getIterator() {
final int rawSize = rawKVs.length;
byte[] encodedDataWithHeader = getEncodedData();
int bytesToSkip = encodingCtx.getHeaderSize() + Bytes.SIZEOF_SHORT;
ByteArrayInputStream bais = new ByteArrayInputStream(encodedDataWithHeader,
bytesToSkip, encodedDataWithHeader.length - bytesToSkip);
final DataInputStream dis = new DataInputStream(bais);
return new Iterator<KeyValue>() {
private ByteBuffer decompressedData = null;
@Override
public boolean hasNext() {
if (decompressedData == null) {
return rawSize > 0;
}
return decompressedData.hasRemaining();
}
@Override
public KeyValue next() {
if (decompressedData == null) {
try {
decompressedData = dataBlockEncoder.decodeKeyValues(
dis, includesMemstoreTS);
} catch (IOException e) {
throw new RuntimeException("Problem with data block encoder, " +
"most likely it requested more bytes than are available.", e);
}
decompressedData.rewind();
}
int offset = decompressedData.position();
KeyValue kv = new KeyValue(decompressedData.array(), offset);
decompressedData.position(offset + kv.getLength());
return kv;
}
@Override
public void remove() {
throw new NotImplementedException("remove() is not supported!");
}
@Override
public String toString() {
return "Iterator of: " + dataBlockEncoder.getClass().getName();
}
};
}
/**
* Find the size of minimal buffer that could store compressed data.
* @return Size in bytes of compressed data.
*/
public int getSize() {
return getEncodedData().length;
}
/**
* Find the size of compressed data assuming that buffer will be compressed
* using given algorithm.
* @param algo compression algorithm
* @param compressor compressor already requested from codec
* @param inputBuffer Array to be compressed.
* @param offset Offset to beginning of the data.
* @param length Length to be compressed.
* @return Size of compressed data in bytes.
* @throws IOException
*/
public static int getCompressedSize(Algorithm algo, Compressor compressor,
byte[] inputBuffer, int offset, int length) throws IOException {
DataOutputStream compressedStream = new DataOutputStream(
new NullOutputStream());
if (compressor != null) {
compressor.reset();
}
OutputStream compressingStream = algo.createCompressionStream(
compressedStream, compressor, 0);
compressingStream.write(inputBuffer, offset, length);
compressingStream.flush();
compressingStream.close();
return compressedStream.size();
}
/**
* Estimate size after second stage of compression (e.g. LZO).
* @param comprAlgo compression algorithm to be used for compression
* @param compressor compressor corresponding to the given compression
* algorithm
* @return Size after second stage of compression.
*/
public int getEncodedCompressedSize(Algorithm comprAlgo,
Compressor compressor) throws IOException {
byte[] compressedBytes = getEncodedData();
return getCompressedSize(comprAlgo, compressor, compressedBytes, 0,
compressedBytes.length);
}
/** @return encoded data with header */
private byte[] getEncodedData() {
if (cachedEncodedData != null) {
return cachedEncodedData;
}
cachedEncodedData = encodeData();
return cachedEncodedData;
}
private ByteBuffer getUncompressedBuffer() {
if (rawBuffer == null || rawBuffer.limit() < rawKVs.length) {
rawBuffer = ByteBuffer.wrap(rawKVs);
}
return rawBuffer;
}
/**
* Do the encoding, but do not cache the encoded data.
* @return encoded data block with header and checksum
*/
public byte[] encodeData() {
try {
this.dataBlockEncoder.encodeKeyValues(
getUncompressedBuffer(), includesMemstoreTS, encodingCtx);
} catch (IOException e) {
throw new RuntimeException(String.format(
"Bug in encoding part of algorithm %s. " +
"Probably it requested more bytes than are available.",
toString()), e);
}
return encodingCtx.getUncompressedBytesWithHeader();
}
@Override
public String toString() {
return dataBlockEncoder.toString();
}
}