/**
* Copyright The Apache Software Foundation
*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.io.hfile.bucket;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.concurrent.atomic.AtomicLong;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.hbase.io.hfile.BlockCacheKey;
import org.apache.hadoop.hbase.io.hfile.bucket.BucketCache.BucketEntry;
/**
* This class is used to allocate a block with specified size and free the block
* when evicting. It manages an array of buckets, each bucket is associated with
* a size and caches elements up to this size. For completely empty bucket, this
* size could be re-specified dynamically.
*
* This class is not thread safe.
*/
@InterfaceAudience.Private
public final class BucketAllocator {
static final Log LOG = LogFactory.getLog(BucketAllocator.class);
final private static class Bucket {
private long baseOffset;
private int itemAllocationSize, sizeIndex;
private int itemCount;
private int freeList[];
private int freeCount, usedCount;
public Bucket(long offset) {
baseOffset = offset;
sizeIndex = -1;
}
void reconfigure(int sizeIndex) {
this.sizeIndex = sizeIndex;
assert sizeIndex < BUCKET_SIZES.length;
itemAllocationSize = BUCKET_SIZES[sizeIndex];
itemCount = (int) (((long) BUCKET_CAPACITY) / (long) itemAllocationSize);
freeCount = itemCount;
usedCount = 0;
freeList = new int[itemCount];
for (int i = 0; i < freeCount; ++i)
freeList[i] = i;
}
public boolean isUninstantiated() {
return sizeIndex == -1;
}
public int sizeIndex() {
return sizeIndex;
}
public int itemAllocationSize() {
return itemAllocationSize;
}
public boolean hasFreeSpace() {
return freeCount > 0;
}
public boolean isCompletelyFree() {
return usedCount == 0;
}
public int freeCount() {
return freeCount;
}
public int usedCount() {
return usedCount;
}
public int freeBytes() {
return freeCount * itemAllocationSize;
}
public int usedBytes() {
return usedCount * itemAllocationSize;
}
public long baseOffset() {
return baseOffset;
}
/**
* Allocate a block in this bucket, return the offset representing the
* position in physical space
* @return the offset in the IOEngine
*/
public long allocate() {
assert freeCount > 0; // Else should not have been called
assert sizeIndex != -1;
++usedCount;
long offset = baseOffset + (freeList[--freeCount] * itemAllocationSize);
assert offset >= 0;
return offset;
}
public void addAllocation(long offset) throws BucketAllocatorException {
offset -= baseOffset;
if (offset < 0 || offset % itemAllocationSize != 0)
throw new BucketAllocatorException(
"Attempt to add allocation for bad offset: " + offset + " base="
+ baseOffset + ", bucket size=" + itemAllocationSize);
int idx = (int) (offset / itemAllocationSize);
boolean matchFound = false;
for (int i = 0; i < freeCount; ++i) {
if (matchFound) freeList[i - 1] = freeList[i];
else if (freeList[i] == idx) matchFound = true;
}
if (!matchFound)
throw new BucketAllocatorException("Couldn't find match for index "
+ idx + " in free list");
++usedCount;
--freeCount;
}
private void free(long offset) {
offset -= baseOffset;
assert offset >= 0;
assert offset < itemCount * itemAllocationSize;
assert offset % itemAllocationSize == 0;
assert usedCount > 0;
assert freeCount < itemCount; // Else duplicate free
int item = (int) (offset / (long) itemAllocationSize);
assert !freeListContains(item);
--usedCount;
freeList[freeCount++] = item;
}
private boolean freeListContains(int blockNo) {
for (int i = 0; i < freeCount; ++i) {
if (freeList[i] == blockNo) return true;
}
return false;
}
}
final class BucketSizeInfo {
// Free bucket means it has space to allocate a block;
// Completely free bucket means it has no block.
private List<Bucket> bucketList, freeBuckets, completelyFreeBuckets;
private int sizeIndex;
BucketSizeInfo(int sizeIndex) {
bucketList = new ArrayList<Bucket>();
freeBuckets = new ArrayList<Bucket>();
completelyFreeBuckets = new ArrayList<Bucket>();
this.sizeIndex = sizeIndex;
}
public void instantiateBucket(Bucket b) {
assert b.isUninstantiated() || b.isCompletelyFree();
b.reconfigure(sizeIndex);
bucketList.add(b);
freeBuckets.add(b);
completelyFreeBuckets.add(b);
}
public int sizeIndex() {
return sizeIndex;
}
/**
* Find a bucket to allocate a block
* @return the offset in the IOEngine
*/
public long allocateBlock() {
Bucket b = null;
if (freeBuckets.size() > 0) // Use up an existing one first...
b = freeBuckets.get(freeBuckets.size() - 1);
if (b == null) {
b = grabGlobalCompletelyFreeBucket();
if (b != null) instantiateBucket(b);
}
if (b == null) return -1;
long result = b.allocate();
blockAllocated(b);
return result;
}
void blockAllocated(Bucket b) {
if (!b.isCompletelyFree()) completelyFreeBuckets.remove(b);
if (!b.hasFreeSpace()) freeBuckets.remove(b);
}
public Bucket findAndRemoveCompletelyFreeBucket() {
Bucket b = null;
assert bucketList.size() > 0;
if (bucketList.size() == 1) {
// So we never get complete starvation of a bucket for a size
return null;
}
if (completelyFreeBuckets.size() > 0) {
b = completelyFreeBuckets.get(0);
removeBucket(b);
}
return b;
}
private void removeBucket(Bucket b) {
assert b.isCompletelyFree();
bucketList.remove(b);
freeBuckets.remove(b);
completelyFreeBuckets.remove(b);
}
public void freeBlock(Bucket b, long offset) {
assert bucketList.contains(b);
// else we shouldn't have anything to free...
assert (!completelyFreeBuckets.contains(b));
b.free(offset);
if (!freeBuckets.contains(b)) freeBuckets.add(b);
if (b.isCompletelyFree()) completelyFreeBuckets.add(b);
}
public IndexStatistics statistics() {
long free = 0, used = 0;
for (Bucket b : bucketList) {
free += b.freeCount();
used += b.usedCount();
}
return new IndexStatistics(free, used, BUCKET_SIZES[sizeIndex]);
}
}
// Default block size is 64K, so we choose more sizes near 64K, you'd better
// reset it according to your cluster's block size distribution
// TODO Make these sizes configurable
// TODO Support the view of block size distribution statistics
private static final int BUCKET_SIZES[] = { 4 * 1024 + 1024, 8 * 1024 + 1024,
16 * 1024 + 1024, 32 * 1024 + 1024, 40 * 1024 + 1024, 48 * 1024 + 1024,
56 * 1024 + 1024, 64 * 1024 + 1024, 96 * 1024 + 1024, 128 * 1024 + 1024,
192 * 1024 + 1024, 256 * 1024 + 1024, 384 * 1024 + 1024,
512 * 1024 + 1024 };
/**
* Round up the given block size to bucket size, and get the corresponding
* BucketSizeInfo
* @param blockSize
* @return BucketSizeInfo
*/
public BucketSizeInfo roundUpToBucketSizeInfo(int blockSize) {
for (int i = 0; i < BUCKET_SIZES.length; ++i)
if (blockSize <= BUCKET_SIZES[i])
return bucketSizeInfos[i];
return null;
}
static final int BIG_ITEM_SIZE = (512 * 1024) + 1024; // 513K plus overhead
static public final int FEWEST_ITEMS_IN_BUCKET = 4;
// The capacity size for each bucket
static final long BUCKET_CAPACITY = FEWEST_ITEMS_IN_BUCKET * BIG_ITEM_SIZE;
private Bucket[] buckets;
private BucketSizeInfo[] bucketSizeInfos;
private final long totalSize;
private long usedSize = 0;
BucketAllocator(long availableSpace) throws BucketAllocatorException {
buckets = new Bucket[(int) (availableSpace / (long) BUCKET_CAPACITY)];
if (buckets.length < BUCKET_SIZES.length)
throw new BucketAllocatorException(
"Bucket allocator size too small - must have room for at least "
+ BUCKET_SIZES.length + " buckets");
bucketSizeInfos = new BucketSizeInfo[BUCKET_SIZES.length];
for (int i = 0; i < BUCKET_SIZES.length; ++i) {
bucketSizeInfos[i] = new BucketSizeInfo(i);
}
for (int i = 0; i < buckets.length; ++i) {
buckets[i] = new Bucket(BUCKET_CAPACITY * i);
bucketSizeInfos[i < BUCKET_SIZES.length ? i : BUCKET_SIZES.length - 1]
.instantiateBucket(buckets[i]);
}
this.totalSize = ((long) buckets.length) * BUCKET_CAPACITY;
}
/**
* Rebuild the allocator's data structures from a persisted map.
* @param availableSpace capacity of cache
* @param map A map stores the block key and BucketEntry(block's meta data
* like offset, length)
* @param realCacheSize cached data size statistics for bucket cache
* @throws BucketAllocatorException
*/
BucketAllocator(long availableSpace, Map<BlockCacheKey, BucketEntry> map,
AtomicLong realCacheSize) throws BucketAllocatorException {
this(availableSpace);
// each bucket has an offset, sizeindex. probably the buckets are too big
// in our default state. so what we do is reconfigure them according to what
// we've found. we can only reconfigure each bucket once; if more than once,
// we know there's a bug, so we just log the info, throw, and start again...
boolean[] reconfigured = new boolean[buckets.length];
for (Map.Entry<BlockCacheKey, BucketEntry> entry : map.entrySet()) {
long foundOffset = entry.getValue().offset();
int foundLen = entry.getValue().getLength();
int bucketSizeIndex = -1;
for (int i = 0; i < BUCKET_SIZES.length; ++i) {
if (foundLen <= BUCKET_SIZES[i]) {
bucketSizeIndex = i;
break;
}
}
if (bucketSizeIndex == -1) {
throw new BucketAllocatorException(
"Can't match bucket size for the block with size " + foundLen);
}
int bucketNo = (int) (foundOffset / (long) BUCKET_CAPACITY);
if (bucketNo < 0 || bucketNo >= buckets.length)
throw new BucketAllocatorException("Can't find bucket " + bucketNo
+ ", total buckets=" + buckets.length
+ "; did you shrink the cache?");
Bucket b = buckets[bucketNo];
if (reconfigured[bucketNo] == true) {
if (b.sizeIndex() != bucketSizeIndex)
throw new BucketAllocatorException(
"Inconsistent allocation in bucket map;");
} else {
if (!b.isCompletelyFree())
throw new BucketAllocatorException("Reconfiguring bucket "
+ bucketNo + " but it's already allocated; corrupt data");
// Need to remove the bucket from whichever list it's currently in at
// the moment...
BucketSizeInfo bsi = bucketSizeInfos[bucketSizeIndex];
BucketSizeInfo oldbsi = bucketSizeInfos[b.sizeIndex()];
oldbsi.removeBucket(b);
bsi.instantiateBucket(b);
reconfigured[bucketNo] = true;
}
realCacheSize.addAndGet(foundLen);
buckets[bucketNo].addAllocation(foundOffset);
usedSize += buckets[bucketNo].itemAllocationSize();
bucketSizeInfos[bucketSizeIndex].blockAllocated(b);
}
}
public String getInfo() {
StringBuilder sb = new StringBuilder(1024);
for (int i = 0; i < buckets.length; ++i) {
Bucket b = buckets[i];
sb.append(" Bucket ").append(i).append(": ").append(b.itemAllocationSize());
sb.append(" freeCount=").append(b.freeCount()).append(" used=")
.append(b.usedCount());
sb.append('\n');
}
return sb.toString();
}
public long getUsedSize() {
return this.usedSize;
}
public long getFreeSize() {
long freeSize = this.totalSize - getUsedSize();
return freeSize;
}
public long getTotalSize() {
return this.totalSize;
}
/**
* Allocate a block with specified size. Return the offset
* @param blockSize size of block
* @throws BucketAllocatorException,CacheFullException
* @return the offset in the IOEngine
*/
public synchronized long allocateBlock(int blockSize) throws CacheFullException,
BucketAllocatorException {
assert blockSize > 0;
BucketSizeInfo bsi = roundUpToBucketSizeInfo(blockSize);
if (bsi == null) {
throw new BucketAllocatorException("Allocation too big size=" + blockSize);
}
long offset = bsi.allocateBlock();
// Ask caller to free up space and try again!
if (offset < 0)
throw new CacheFullException(blockSize, bsi.sizeIndex());
usedSize += BUCKET_SIZES[bsi.sizeIndex()];
return offset;
}
private Bucket grabGlobalCompletelyFreeBucket() {
for (BucketSizeInfo bsi : bucketSizeInfos) {
Bucket b = bsi.findAndRemoveCompletelyFreeBucket();
if (b != null) return b;
}
return null;
}
/**
* Free a block with the offset
* @param offset block's offset
* @return size freed
*/
public synchronized int freeBlock(long offset) {
int bucketNo = (int) (offset / (long) BUCKET_CAPACITY);
assert bucketNo >= 0 && bucketNo < buckets.length;
Bucket targetBucket = buckets[bucketNo];
bucketSizeInfos[targetBucket.sizeIndex()].freeBlock(targetBucket, offset);
usedSize -= targetBucket.itemAllocationSize();
return targetBucket.itemAllocationSize();
}
public int sizeIndexOfAllocation(long offset) {
int bucketNo = (int) (offset / (long) BUCKET_CAPACITY);
assert bucketNo >= 0 && bucketNo < buckets.length;
Bucket targetBucket = buckets[bucketNo];
return targetBucket.sizeIndex();
}
public int sizeOfAllocation(long offset) {
int bucketNo = (int) (offset / (long) BUCKET_CAPACITY);
assert bucketNo >= 0 && bucketNo < buckets.length;
Bucket targetBucket = buckets[bucketNo];
return targetBucket.itemAllocationSize();
}
static public int getMaximumAllocationIndex() {
return BUCKET_SIZES.length;
}
static class IndexStatistics {
private long freeCount, usedCount, itemSize, totalCount;
public long freeCount() {
return freeCount;
}
public long usedCount() {
return usedCount;
}
public long totalCount() {
return totalCount;
}
public long freeBytes() {
return freeCount * itemSize;
}
public long usedBytes() {
return usedCount * itemSize;
}
public long totalBytes() {
return totalCount * itemSize;
}
public long itemSize() {
return itemSize;
}
public IndexStatistics(long free, long used, long itemSize) {
setTo(free, used, itemSize);
}
public IndexStatistics() {
setTo(-1, -1, 0);
}
public void setTo(long free, long used, long itemSize) {
this.itemSize = itemSize;
this.freeCount = free;
this.usedCount = used;
this.totalCount = free + used;
}
}
public void dumpToLog() {
logStatistics();
StringBuilder sb = new StringBuilder();
for (Bucket b : buckets) {
sb.append("Bucket:").append(b.baseOffset).append('\n');
sb.append(" Size index: " + b.sizeIndex() + "; Free:" + b.freeCount
+ "; used:" + b.usedCount + "; freelist\n");
for (int i = 0; i < b.freeCount(); ++i)
sb.append(b.freeList[i]).append(',');
sb.append('\n');
}
LOG.info(sb);
}
public void logStatistics() {
IndexStatistics total = new IndexStatistics();
IndexStatistics[] stats = getIndexStatistics(total);
LOG.info("Bucket allocator statistics follow:\n");
LOG.info(" Free bytes=" + total.freeBytes() + "+; used bytes="
+ total.usedBytes() + "; total bytes=" + total.totalBytes());
for (IndexStatistics s : stats) {
LOG.info(" Object size " + s.itemSize() + " used=" + s.usedCount()
+ "; free=" + s.freeCount() + "; total=" + s.totalCount());
}
}
public IndexStatistics[] getIndexStatistics(IndexStatistics grandTotal) {
IndexStatistics[] stats = getIndexStatistics();
long totalfree = 0, totalused = 0;
for (IndexStatistics stat : stats) {
totalfree += stat.freeBytes();
totalused += stat.usedBytes();
}
grandTotal.setTo(totalfree, totalused, 1);
return stats;
}
public IndexStatistics[] getIndexStatistics() {
IndexStatistics[] stats = new IndexStatistics[BUCKET_SIZES.length];
for (int i = 0; i < stats.length; ++i)
stats[i] = bucketSizeInfos[i].statistics();
return stats;
}
public long freeBlock(long freeList[]) {
long sz = 0;
for (int i = 0; i < freeList.length; ++i)
sz += freeBlock(freeList[i]);
return sz;
}
}