CompoundBloomFilterWriter.java example

Explorer
hbase-cache-master
- trunk
/*
 *
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.hadoop.hbase.util;

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.util.Arrays;
import java.util.LinkedList;
import java.util.Queue;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.hbase.io.hfile.BlockType;
import org.apache.hadoop.hbase.io.hfile.HFileBlockIndex;
import org.apache.hadoop.hbase.io.hfile.InlineBlockWriter;
import org.apache.hadoop.io.RawComparator;
import org.apache.hadoop.io.Writable;

/**
 * Adds methods required for writing a compound Bloom filter to the data
 * section of an {@link org.apache.hadoop.hbase.io.hfile.HFile} to the
 * {@link CompoundBloomFilter} class.
 */
@InterfaceAudience.Private
public class CompoundBloomFilterWriter extends CompoundBloomFilterBase
    implements BloomFilterWriter, InlineBlockWriter {

  protected static final Log LOG =
    LogFactory.getLog(CompoundBloomFilterWriter.class);

  /** The current chunk being written to */
  private ByteBloomFilter chunk;

  /** Previous chunk, so that we can create another similar chunk */
  private ByteBloomFilter prevChunk;

  /** Maximum fold factor */
  private int maxFold;

  /** The size of individual Bloom filter chunks to create */
  private int chunkByteSize;

  /** A Bloom filter chunk enqueued for writing */
  private static class ReadyChunk {
    int chunkId;
    byte[] firstKey;
    ByteBloomFilter chunk;
  }

  private Queue<ReadyChunk> readyChunks = new LinkedList<ReadyChunk>();

  /** The first key in the current Bloom filter chunk. */
  private byte[] firstKeyInChunk = null;

  private HFileBlockIndex.BlockIndexWriter bloomBlockIndexWriter =
      new HFileBlockIndex.BlockIndexWriter();

  /** Whether to cache-on-write compound Bloom filter chunks */
  private boolean cacheOnWrite;

  /**
   * @param chunkByteSizeHint
   *          each chunk's size in bytes. The real chunk size might be different
   *          as required by the fold factor.
   * @param errorRate
   *          target false positive rate
   * @param hashType
   *          hash function type to use
   * @param maxFold
   *          maximum degree of folding allowed
   */
  public CompoundBloomFilterWriter(int chunkByteSizeHint, float errorRate,
      int hashType, int maxFold, boolean cacheOnWrite,
      RawComparator<byte[]> comparator) {
    chunkByteSize = ByteBloomFilter.computeFoldableByteSize(
        chunkByteSizeHint * 8L, maxFold);

    this.errorRate = errorRate;
    this.hashType = hashType;
    this.maxFold = maxFold;
    this.cacheOnWrite = cacheOnWrite;
    this.comparator = comparator;
  }

  @Override
  public boolean shouldWriteBlock(boolean closing) {
    enqueueReadyChunk(closing);
    return !readyChunks.isEmpty();
  }

  /**
   * Enqueue the current chunk if it is ready to be written out.
   *
   * @param closing true if we are closing the file, so we do not expect new
   *        keys to show up
   */
  private void enqueueReadyChunk(boolean closing) {
    if (chunk == null ||
        (chunk.getKeyCount() < chunk.getMaxKeys() && !closing)) {
      return;
    }

    if (firstKeyInChunk == null) {
      throw new NullPointerException("Trying to enqueue a chunk, " +
          "but first key is null: closing=" + closing + ", keyCount=" +
          chunk.getKeyCount() + ", maxKeys=" + chunk.getMaxKeys());
    }

    ReadyChunk readyChunk = new ReadyChunk();
    readyChunk.chunkId = numChunks - 1;
    readyChunk.chunk = chunk;
    readyChunk.firstKey = firstKeyInChunk;
    readyChunks.add(readyChunk);

    long prevMaxKeys = chunk.getMaxKeys();
    long prevByteSize = chunk.getByteSize();

    chunk.compactBloom();

    if (LOG.isDebugEnabled() && prevByteSize != chunk.getByteSize()) {
      LOG.debug("Compacted Bloom chunk #" + readyChunk.chunkId + " from ["
          + prevMaxKeys + " max keys, " + prevByteSize + " bytes] to ["
          + chunk.getMaxKeys() + " max keys, " + chunk.getByteSize()
          + " bytes]");
    }

    totalMaxKeys += chunk.getMaxKeys();
    totalByteSize += chunk.getByteSize();

    firstKeyInChunk = null;
    prevChunk = chunk;
    chunk = null;
  }

  /**
   * Adds a Bloom filter key. This key must be greater than the previous key,
   * as defined by the comparator this compound Bloom filter is configured
   * with. For efficiency, key monotonicity is not checked here. See
   * {@link org.apache.hadoop.hbase.regionserver.StoreFile.Writer#append(
   * org.apache.hadoop.hbase.KeyValue)} for the details of deduplication.
   */
  @Override
  public void add(byte[] bloomKey, int keyOffset, int keyLength) {
    if (bloomKey == null)
      throw new NullPointerException();

    enqueueReadyChunk(false);

    if (chunk == null) {
      if (firstKeyInChunk != null) {
        throw new IllegalStateException("First key in chunk already set: "
            + Bytes.toStringBinary(firstKeyInChunk));
      }
      firstKeyInChunk = Arrays.copyOfRange(bloomKey, keyOffset, keyOffset
          + keyLength);

      if (prevChunk == null) {
        // First chunk
        chunk = ByteBloomFilter.createBySize(chunkByteSize, errorRate,
            hashType, maxFold);
      } else {
        // Use the same parameters as the last chunk, but a new array and
        // a zero key count.
        chunk = prevChunk.createAnother();
      }

      if (chunk.getKeyCount() != 0) {
        throw new IllegalStateException("keyCount=" + chunk.getKeyCount()
            + " > 0");
      }

      chunk.allocBloom();
      ++numChunks;
    }

    chunk.add(bloomKey, keyOffset, keyLength);
    ++totalKeyCount;
  }

  @Override
  public void writeInlineBlock(DataOutput out) throws IOException {
    // We don't remove the chunk from the queue here, because we might need it
    // again for cache-on-write.
    ReadyChunk readyChunk = readyChunks.peek();

    ByteBloomFilter readyChunkBloom = readyChunk.chunk;
    readyChunkBloom.getDataWriter().write(out);
  }

  @Override
  public void blockWritten(long offset, int onDiskSize, int uncompressedSize) {
    ReadyChunk readyChunk = readyChunks.remove();
    bloomBlockIndexWriter.addEntry(readyChunk.firstKey, offset, onDiskSize);
  }

  @Override
  public BlockType getInlineBlockType() {
    return BlockType.BLOOM_CHUNK;
  }

  private class MetaWriter implements Writable {
    protected MetaWriter() {}

    @Override
    public void readFields(DataInput in) throws IOException {
      throw new IOException("Cant read with this class.");
    }

    /**
     * This is modeled after {@link ByteBloomFilter.MetaWriter} for simplicity,
     * although the two metadata formats do not have to be consistent. This
     * does have to be consistent with how {@link
     * CompoundBloomFilter#CompoundBloomFilter(DataInput,
     * org.apache.hadoop.hbase.io.hfile.HFile.Reader)} reads fields.
     */
    @Override
    public void write(DataOutput out) throws IOException {
      out.writeInt(VERSION);

      out.writeLong(getByteSize());
      out.writeInt(prevChunk.getHashCount());
      out.writeInt(prevChunk.getHashType());
      out.writeLong(getKeyCount());
      out.writeLong(getMaxKeys());

      // Fields that don't have equivalents in ByteBloomFilter.
      out.writeInt(numChunks);
      Bytes.writeByteArray(out,
          Bytes.toBytes(comparator.getClass().getName()));

      // Write a single-level index without compression or block header.
      bloomBlockIndexWriter.writeSingleLevelIndex(out, "Bloom filter");
    }
  }

  @Override
  public Writable getMetaWriter() {
    return new MetaWriter();
  }

  @Override
  public void compactBloom() {
  }

  @Override
  public void allocBloom() {
    // Nothing happens here. All allocation happens on demand.
  }

  @Override
  public Writable getDataWriter() {
    return null;
  }

  @Override
  public boolean getCacheOnWrite() {
    return cacheOnWrite;
  }
}