/*
*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.util;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.util.Arrays;
import java.util.LinkedList;
import java.util.Queue;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.hbase.io.hfile.BlockType;
import org.apache.hadoop.hbase.io.hfile.HFileBlockIndex;
import org.apache.hadoop.hbase.io.hfile.InlineBlockWriter;
import org.apache.hadoop.io.RawComparator;
import org.apache.hadoop.io.Writable;
/**
* Adds methods required for writing a compound Bloom filter to the data
* section of an {@link org.apache.hadoop.hbase.io.hfile.HFile} to the
* {@link CompoundBloomFilter} class.
*/
@InterfaceAudience.Private
public class CompoundBloomFilterWriter extends CompoundBloomFilterBase
implements BloomFilterWriter, InlineBlockWriter {
protected static final Log LOG =
LogFactory.getLog(CompoundBloomFilterWriter.class);
/** The current chunk being written to */
private ByteBloomFilter chunk;
/** Previous chunk, so that we can create another similar chunk */
private ByteBloomFilter prevChunk;
/** Maximum fold factor */
private int maxFold;
/** The size of individual Bloom filter chunks to create */
private int chunkByteSize;
/** A Bloom filter chunk enqueued for writing */
private static class ReadyChunk {
int chunkId;
byte[] firstKey;
ByteBloomFilter chunk;
}
private Queue<ReadyChunk> readyChunks = new LinkedList<ReadyChunk>();
/** The first key in the current Bloom filter chunk. */
private byte[] firstKeyInChunk = null;
private HFileBlockIndex.BlockIndexWriter bloomBlockIndexWriter =
new HFileBlockIndex.BlockIndexWriter();
/** Whether to cache-on-write compound Bloom filter chunks */
private boolean cacheOnWrite;
/**
* @param chunkByteSizeHint
* each chunk's size in bytes. The real chunk size might be different
* as required by the fold factor.
* @param errorRate
* target false positive rate
* @param hashType
* hash function type to use
* @param maxFold
* maximum degree of folding allowed
*/
public CompoundBloomFilterWriter(int chunkByteSizeHint, float errorRate,
int hashType, int maxFold, boolean cacheOnWrite,
RawComparator<byte[]> comparator) {
chunkByteSize = ByteBloomFilter.computeFoldableByteSize(
chunkByteSizeHint * 8L, maxFold);
this.errorRate = errorRate;
this.hashType = hashType;
this.maxFold = maxFold;
this.cacheOnWrite = cacheOnWrite;
this.comparator = comparator;
}
@Override
public boolean shouldWriteBlock(boolean closing) {
enqueueReadyChunk(closing);
return !readyChunks.isEmpty();
}
/**
* Enqueue the current chunk if it is ready to be written out.
*
* @param closing true if we are closing the file, so we do not expect new
* keys to show up
*/
private void enqueueReadyChunk(boolean closing) {
if (chunk == null ||
(chunk.getKeyCount() < chunk.getMaxKeys() && !closing)) {
return;
}
if (firstKeyInChunk == null) {
throw new NullPointerException("Trying to enqueue a chunk, " +
"but first key is null: closing=" + closing + ", keyCount=" +
chunk.getKeyCount() + ", maxKeys=" + chunk.getMaxKeys());
}
ReadyChunk readyChunk = new ReadyChunk();
readyChunk.chunkId = numChunks - 1;
readyChunk.chunk = chunk;
readyChunk.firstKey = firstKeyInChunk;
readyChunks.add(readyChunk);
long prevMaxKeys = chunk.getMaxKeys();
long prevByteSize = chunk.getByteSize();
chunk.compactBloom();
if (LOG.isDebugEnabled() && prevByteSize != chunk.getByteSize()) {
LOG.debug("Compacted Bloom chunk #" + readyChunk.chunkId + " from ["
+ prevMaxKeys + " max keys, " + prevByteSize + " bytes] to ["
+ chunk.getMaxKeys() + " max keys, " + chunk.getByteSize()
+ " bytes]");
}
totalMaxKeys += chunk.getMaxKeys();
totalByteSize += chunk.getByteSize();
firstKeyInChunk = null;
prevChunk = chunk;
chunk = null;
}
/**
* Adds a Bloom filter key. This key must be greater than the previous key,
* as defined by the comparator this compound Bloom filter is configured
* with. For efficiency, key monotonicity is not checked here. See
* {@link org.apache.hadoop.hbase.regionserver.StoreFile.Writer#append(
* org.apache.hadoop.hbase.KeyValue)} for the details of deduplication.
*/
@Override
public void add(byte[] bloomKey, int keyOffset, int keyLength) {
if (bloomKey == null)
throw new NullPointerException();
enqueueReadyChunk(false);
if (chunk == null) {
if (firstKeyInChunk != null) {
throw new IllegalStateException("First key in chunk already set: "
+ Bytes.toStringBinary(firstKeyInChunk));
}
firstKeyInChunk = Arrays.copyOfRange(bloomKey, keyOffset, keyOffset
+ keyLength);
if (prevChunk == null) {
// First chunk
chunk = ByteBloomFilter.createBySize(chunkByteSize, errorRate,
hashType, maxFold);
} else {
// Use the same parameters as the last chunk, but a new array and
// a zero key count.
chunk = prevChunk.createAnother();
}
if (chunk.getKeyCount() != 0) {
throw new IllegalStateException("keyCount=" + chunk.getKeyCount()
+ " > 0");
}
chunk.allocBloom();
++numChunks;
}
chunk.add(bloomKey, keyOffset, keyLength);
++totalKeyCount;
}
@Override
public void writeInlineBlock(DataOutput out) throws IOException {
// We don't remove the chunk from the queue here, because we might need it
// again for cache-on-write.
ReadyChunk readyChunk = readyChunks.peek();
ByteBloomFilter readyChunkBloom = readyChunk.chunk;
readyChunkBloom.getDataWriter().write(out);
}
@Override
public void blockWritten(long offset, int onDiskSize, int uncompressedSize) {
ReadyChunk readyChunk = readyChunks.remove();
bloomBlockIndexWriter.addEntry(readyChunk.firstKey, offset, onDiskSize);
}
@Override
public BlockType getInlineBlockType() {
return BlockType.BLOOM_CHUNK;
}
private class MetaWriter implements Writable {
protected MetaWriter() {}
@Override
public void readFields(DataInput in) throws IOException {
throw new IOException("Cant read with this class.");
}
/**
* This is modeled after {@link ByteBloomFilter.MetaWriter} for simplicity,
* although the two metadata formats do not have to be consistent. This
* does have to be consistent with how {@link
* CompoundBloomFilter#CompoundBloomFilter(DataInput,
* org.apache.hadoop.hbase.io.hfile.HFile.Reader)} reads fields.
*/
@Override
public void write(DataOutput out) throws IOException {
out.writeInt(VERSION);
out.writeLong(getByteSize());
out.writeInt(prevChunk.getHashCount());
out.writeInt(prevChunk.getHashType());
out.writeLong(getKeyCount());
out.writeLong(getMaxKeys());
// Fields that don't have equivalents in ByteBloomFilter.
out.writeInt(numChunks);
Bytes.writeByteArray(out,
Bytes.toBytes(comparator.getClass().getName()));
// Write a single-level index without compression or block header.
bloomBlockIndexWriter.writeSingleLevelIndex(out, "Bloom filter");
}
}
@Override
public Writable getMetaWriter() {
return new MetaWriter();
}
@Override
public void compactBloom() {
}
@Override
public void allocBloom() {
// Nothing happens here. All allocation happens on demand.
}
@Override
public Writable getDataWriter() {
return null;
}
@Override
public boolean getCacheOnWrite() {
return cacheOnWrite;
}
}