/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with this
* work for additional information regarding copyright ownership. The ASF
* licenses this file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations
* under the License.
*/
package org.apache.hadoop.hbase.io.encoding;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.DataInputStream;
import java.io.DataOutputStream;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.util.Iterator;
import org.apache.commons.lang.NotImplementedException;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.io.compress.Compressor;
/**
* Encapsulates a data block compressed using a particular encoding algorithm.
* Useful for testing and benchmarking.
*/
public class EncodedDataBlock {
private static final int BUFFER_SIZE = 4 * 1024;
protected DataBlockEncoder dataBlockEncoder;
ByteArrayOutputStream uncompressedOutputStream;
ByteBuffer uncompressedBuffer;
private byte[] cacheCompressData;
private ByteArrayOutputStream compressedStream = new ByteArrayOutputStream();
private boolean includesMemstoreTS;
/**
* Create a buffer which will be encoded using dataBlockEncoder.
* @param dataBlockEncoder Algorithm used for compression.
*/
public EncodedDataBlock(DataBlockEncoder dataBlockEncoder,
boolean includesMemstoreTS) {
this.dataBlockEncoder = dataBlockEncoder;
uncompressedOutputStream = new ByteArrayOutputStream(BUFFER_SIZE);
}
/**
* Add KeyValue and compress it.
* @param kv Item to be added and compressed.
*/
public void addKv(KeyValue kv) {
cacheCompressData = null;
uncompressedOutputStream.write(
kv.getBuffer(), kv.getOffset(), kv.getLength());
}
/**
* Provides access to compressed value.
* @return Forwards sequential iterator.
*/
public Iterator<KeyValue> getIterator() {
final int uncompressedSize = uncompressedOutputStream.size();
final ByteArrayInputStream bais = new ByteArrayInputStream(
getCompressedData());
final DataInputStream dis = new DataInputStream(bais);
return new Iterator<KeyValue>() {
private ByteBuffer decompressedData = null;
@Override
public boolean hasNext() {
if (decompressedData == null) {
return uncompressedSize > 0;
}
return decompressedData.hasRemaining();
}
@Override
public KeyValue next() {
if (decompressedData == null) {
try {
decompressedData = dataBlockEncoder.uncompressKeyValues(
dis, includesMemstoreTS);
} catch (IOException e) {
throw new RuntimeException("Problem with data block encoder, " +
"most likely it requested more bytes than are available.", e);
}
decompressedData.rewind();
}
int offset = decompressedData.position();
KeyValue kv = new KeyValue(decompressedData.array(), offset);
decompressedData.position(offset + kv.getLength());
return kv;
}
@Override
public void remove() {
throw new NotImplementedException("remove() is not supported!");
}
@Override
public String toString() {
return "Iterator of: " + dataBlockEncoder.getClass().getName();
}
};
}
/**
* Find the size of minimal buffer that could store compressed data.
* @return Size in bytes of compressed data.
*/
public int getSize() {
return getCompressedData().length;
}
/**
* Find the size of compressed data assuming that buffer will be compressed
* using given algorithm.
* @param compressor Algorithm used for compression.
* @param buffer Array to be compressed.
* @param offset Offset to beginning of the data.
* @param length Length to be compressed.
* @return Size of compressed data in bytes.
*/
public static int checkCompressedSize(Compressor compressor, byte[] buffer,
int offset, int length) {
byte[] compressedBuffer = new byte[buffer.length];
// in fact the buffer could be of any positive size
compressor.setInput(buffer, offset, length);
compressor.finish();
int currentPos = 0;
while (!compressor.finished()) {
try {
// we don't care about compressed data,
// we just want to callculate number of bytes
currentPos += compressor.compress(compressedBuffer, 0,
compressedBuffer.length);
} catch (IOException e) {
throw new RuntimeException(
"For some reason compressor couldn't read data. " +
"It is likely a problem with " +
compressor.getClass().getName(), e);
}
}
return currentPos;
}
/**
* Estimate size after second stage of compression (e.g. LZO).
* @param compressor Algorithm which will be used for compressions.
* @return Size after second stage of compression.
*/
public int checkCompressedSize(Compressor compressor) {
// compress
byte[] compressedBytes = getCompressedData();
return checkCompressedSize(compressor, compressedBytes, 0,
compressedBytes.length);
}
private byte[] getCompressedData() {
// is cached
if (cacheCompressData != null) {
return cacheCompressData;
}
cacheCompressData = doCompressData();
return cacheCompressData;
}
private ByteBuffer getUncompressedBuffer() {
if (uncompressedBuffer == null ||
uncompressedBuffer.limit() < uncompressedOutputStream.size()) {
uncompressedBuffer = ByteBuffer.wrap(
uncompressedOutputStream.toByteArray());
}
return uncompressedBuffer;
}
/**
* Do the compression.
* @return Compressed byte buffer.
*/
public byte[] doCompressData() {
compressedStream.reset();
DataOutputStream dataOut = new DataOutputStream(compressedStream);
try {
this.dataBlockEncoder.compressKeyValues(
dataOut, getUncompressedBuffer(), includesMemstoreTS);
} catch (IOException e) {
throw new RuntimeException(String.format(
"Bug in decoding part of algorithm %s. " +
"Probably it requested more bytes than are available.",
toString()), e);
}
return compressedStream.toByteArray();
}
@Override
public String toString() {
return dataBlockEncoder.toString();
}
/**
* Get uncompressed buffer.
* @return The buffer.
*/
public byte[] getRawKeyValues() {
return uncompressedOutputStream.toByteArray();
}
}