/**
*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.io.hfile;
import java.io.IOException;
import java.lang.ref.WeakReference;
import java.util.*;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.Executors;
import java.util.concurrent.ScheduledExecutorService;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicLong;
import java.util.concurrent.locks.ReentrantLock;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.io.HeapSize;
import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
import org.apache.hadoop.hbase.io.hfile.CachedBlock.BlockPriority;
import org.apache.hadoop.hbase.io.hfile.bucket.BucketCache;
import org.apache.hadoop.hbase.util.*;
import org.apache.hadoop.util.StringUtils;
import com.google.common.util.concurrent.ThreadFactoryBuilder;
/**
* A block cache implementation that is memory-aware using {@link HeapSize},
* memory-bound using an LRU eviction algorithm, and concurrent: backed by a
* {@link ConcurrentHashMap} and with a non-blocking eviction thread giving
* constant-time {@link #cacheBlock} and {@link #getBlock} operations.<p>
*
* Contains three levels of block priority to allow for
* scan-resistance and in-memory families. A block is added with an inMemory
* flag if necessary, otherwise a block becomes a single access priority. Once
* a blocked is accessed again, it changes to multiple access. This is used
* to prevent scans from thrashing the cache, adding a least-frequently-used
* element to the eviction algorithm.<p>
*
* Each priority is given its own chunk of the total cache to ensure
* fairness during eviction. Each priority will retain close to its maximum
* size, however, if any priority is not using its entire chunk the others
* are able to grow beyond their chunk size.<p>
*
* Instantiated at a minimum with the total size and average block size.
* All sizes are in bytes. The block size is not especially important as this
* cache is fully dynamic in its sizing of blocks. It is only used for
* pre-allocating data structures and in initial heap estimation of the map.<p>
*
* The detailed constructor defines the sizes for the three priorities (they
* should total to the maximum size defined). It also sets the levels that
* trigger and control the eviction thread.<p>
*
* The acceptable size is the cache size level which triggers the eviction
* process to start. It evicts enough blocks to get the size below the
* minimum size specified.<p>
*
* Eviction happens in a separate thread and involves a single full-scan
* of the map. It determines how many bytes must be freed to reach the minimum
* size, and then while scanning determines the fewest least-recently-used
* blocks necessary from each of the three priorities (would be 3 times bytes
* to free). It then uses the priority chunk sizes to evict fairly according
* to the relative sizes and usage.
*/
@InterfaceAudience.Private
public class LruBlockCache implements BlockCache, HeapSize {
static final Log LOG = LogFactory.getLog(LruBlockCache.class);
static final String LRU_MIN_FACTOR_CONFIG_NAME = "hbase.lru.blockcache.min.factor";
static final String LRU_ACCEPTABLE_FACTOR_CONFIG_NAME = "hbase.lru.blockcache.acceptable.factor";
/** Default Configuration Parameters*/
/** Backing Concurrent Map Configuration */
static final float DEFAULT_LOAD_FACTOR = 0.75f;
static final int DEFAULT_CONCURRENCY_LEVEL = 16;
/** Eviction thresholds */
static final float DEFAULT_MIN_FACTOR = 0.95f;
static final float DEFAULT_ACCEPTABLE_FACTOR = 0.99f;
/** Priority buckets */
static final float DEFAULT_SINGLE_FACTOR = 0.25f;
static final float DEFAULT_MULTI_FACTOR = 0.50f;
static final float DEFAULT_MEMORY_FACTOR = 0.25f;
/** Statistics thread */
static final int statThreadPeriod = 60 * 1;
/** Map of workload ID to its cache occupancy, revised every PERIOD seconds **/
private static HashMap<Integer, Float> occupancyMap = new HashMap<Integer, Float>();
/** Map of workload ID to its cache hit Rates, revised every PERIOD seconds **/
private static HashMap<Integer, Float> hitRatioMap = new HashMap<Integer, Float>();
/** Map of workload ID to its throttle threshold, revised every PERIOD seconds **/
private static HashMap<Integer, Float> thresholdMap = new HashMap<Integer, Float>();
private static HashMap<Integer, AtomicLong> hitsCount = new HashMap<Integer, AtomicLong>();
private static HashMap<Integer, AtomicLong> missCount = new HashMap<Integer, AtomicLong>();
private static HashMap<Integer, AtomicLong> periodichitsCount = new HashMap<Integer, AtomicLong>();
private static HashMap<Integer, AtomicLong> periodicmissCount = new HashMap<Integer, AtomicLong>();
private static HashMap<Integer, AtomicLong> cachePlacementCount = new HashMap<Integer, AtomicLong>();
private static HashMap<Integer, AtomicLong> cacheUseCount = new HashMap<Integer, AtomicLong>();
private static HashMap<Integer, Float> cacheReusePercentage = new HashMap<Integer, Float>();
private static Random rng = new Random();
/** Concurrent map (the cache) */
private final ConcurrentHashMap<BlockCacheKey,CachedBlock> map;
//private final HashMap<BlockCacheKey,CachedBlock> map;
/** Eviction lock (locked when eviction in process) */
private final ReentrantLock evictionLock = new ReentrantLock(true);
/** Volatile boolean to track if we are in an eviction process or not */
private volatile boolean evictionInProgress = false;
/** Eviction thread */
private final EvictionThread evictionThread;
/** Statistics thread schedule pool (for heavy debugging, could remove) */
private final ScheduledExecutorService scheduleThreadPool =
Executors.newScheduledThreadPool(1,
new ThreadFactoryBuilder()
.setNameFormat("LRU Statistics #%d")
.setDaemon(true)
.build());
/** Current size of cache */
private final AtomicLong size;
/** Current number of cached elements */
private final AtomicLong elements;
/** Cache access count (sequential ID) */
private final AtomicLong count;
/** Cache statistics */
private final CacheStats stats;
/** Maximum allowable size of cache (block put if size > max, evict) */
private long maxSize;
/** Approximate block size */
private long blockSize;
/** Acceptable size of cache (no evictions if size < acceptable) */
private float acceptableFactor;
/** Minimum threshold of cache (when evicting, evict until size < min) */
private float minFactor;
/** Single access bucket size */
private float singleFactor;
/** Multiple access bucket size */
private float multiFactor;
/** In-memory bucket size */
private float memoryFactor;
/** Overhead of the structure itself */
private long overhead;
/** Where to send victims (blocks evicted from the cache) */
private BucketCache victimHandler = null;
private static Map<Integer, AtomicLong> occupancy = new HashMap<Integer, AtomicLong>();
/** Total number of accesses to this cache, reset every log interval**/
private static long accesses = 0;
/** Max of accesses to any one block this cache, reset every log interval**/
private static long maxaccesses = 0;
/**
* Default constructor. Specify maximum size and expected average block
* size (approximation is fine).
*
* <p>All other factors will be calculated based on defaults specified in
* this class.
* @param maxSize maximum size of cache, in bytes
* @param blockSize approximate size of each block, in bytes
*/
public LruBlockCache(long maxSize, long blockSize) {
this(maxSize, blockSize, true);
}
/**
* Constructor used for testing. Allows disabling of the eviction thread.
*/
public LruBlockCache(long maxSize, long blockSize, boolean evictionThread) {
this(maxSize, blockSize, evictionThread,
(int)Math.ceil(1.2*maxSize/blockSize),
DEFAULT_LOAD_FACTOR, DEFAULT_CONCURRENCY_LEVEL,
DEFAULT_MIN_FACTOR, DEFAULT_ACCEPTABLE_FACTOR,
DEFAULT_SINGLE_FACTOR, DEFAULT_MULTI_FACTOR,
DEFAULT_MEMORY_FACTOR);
}
public LruBlockCache(long maxSize, long blockSize, boolean evictionThread, Configuration conf) {
this(maxSize, blockSize, evictionThread,
(int)Math.ceil(1.2*maxSize/blockSize),
DEFAULT_LOAD_FACTOR,
DEFAULT_CONCURRENCY_LEVEL,
conf.getFloat(LRU_MIN_FACTOR_CONFIG_NAME, DEFAULT_MIN_FACTOR),
conf.getFloat(LRU_ACCEPTABLE_FACTOR_CONFIG_NAME, DEFAULT_ACCEPTABLE_FACTOR),
DEFAULT_SINGLE_FACTOR,
DEFAULT_MULTI_FACTOR,
DEFAULT_MEMORY_FACTOR);
}
public LruBlockCache(long maxSize, long blockSize, Configuration conf) {
this(maxSize, blockSize, true, conf);
}
/**
* Configurable constructor. Use this constructor if not using defaults.
* @param maxSize maximum size of this cache, in bytes
* @param blockSize expected average size of blocks, in bytes
* @param evictionThread whether to run evictions in a bg thread or not
* @param mapInitialSize initial size of backing ConcurrentHashMap
* @param mapLoadFactor initial load factor of backing ConcurrentHashMap
* @param mapConcurrencyLevel initial concurrency factor for backing CHM
* @param minFactor percentage of total size that eviction will evict until
* @param acceptableFactor percentage of total size that triggers eviction
* @param singleFactor percentage of total size for single-access blocks
* @param multiFactor percentage of total size for multiple-access blocks
* @param memoryFactor percentage of total size for in-memory blocks
*/
public LruBlockCache(long maxSize, long blockSize, boolean evictionThread,
int mapInitialSize, float mapLoadFactor, int mapConcurrencyLevel,
float minFactor, float acceptableFactor,
float singleFactor, float multiFactor, float memoryFactor) {
if(singleFactor + multiFactor + memoryFactor != 1) {
throw new IllegalArgumentException("Single, multi, and memory factors " +
" should total 1.0");
}
if(minFactor >= acceptableFactor) {
throw new IllegalArgumentException("minFactor must be smaller than acceptableFactor");
}
if(minFactor >= 1.0f || acceptableFactor >= 1.0f) {
throw new IllegalArgumentException("all factors must be < 1");
}
this.maxSize = maxSize;
this.blockSize = blockSize;
map = new ConcurrentHashMap<BlockCacheKey,CachedBlock>(mapInitialSize,
mapLoadFactor, mapConcurrencyLevel);
//map = new HashMap<BlockCacheKey,CachedBlock>(mapInitialSize,
// mapLoadFactor);
this.minFactor = minFactor;
this.acceptableFactor = acceptableFactor;
this.singleFactor = singleFactor;
this.multiFactor = multiFactor;
this.memoryFactor = memoryFactor;
this.stats = new CacheStats();
this.count = new AtomicLong(0);
this.elements = new AtomicLong(0);
this.overhead = calculateOverhead(maxSize, blockSize, mapConcurrencyLevel);
this.size = new AtomicLong(this.overhead);
if(evictionThread) {
this.evictionThread = new EvictionThread(this);
this.evictionThread.start(); // FindBugs SC_START_IN_CTOR
} else {
this.evictionThread = null;
}
this.scheduleThreadPool.scheduleAtFixedRate(new StatisticsThread(this),
statThreadPeriod, statThreadPeriod, TimeUnit.SECONDS);
}
public void setMaxSize(long maxSize) {
this.maxSize = maxSize;
if(this.size.get() > acceptableSize() && !evictionInProgress) {
LOG.info("FOr RUN EVICTION FROM SET MAX");
runEviction();
}
}
// BlockCache implementation
/**
* Cache the block with the specified name and buffer.
* <p>
* It is assumed this will NEVER be called on an already cached block. If
* that is done, an exception will be thrown.
* @param cacheKey block's cache key
* @param buf block buffer
* @param inMemory if block is in-memory
*/
public void cacheBlock(BlockCacheKey cacheKey, Cacheable buf, boolean inMemory) {
// CachedBlock cb = map.get(cacheKey);
// if(cb != null) {
// throw new RuntimeException("Cached an already cached block");
// }
// cb = new CachedBlock(cacheKey, buf, count.incrementAndGet(), inMemory);
// long newSize = updateSizeMetrics(cb, false);
// map.put(cacheKey, cb);
// elements.incrementAndGet();
// if(newSize > acceptableSize() && !evictionInProgress) {
// runEviction();
// }
cacheBlock(cacheKey, buf, inMemory, 0);
}
// Clone of above allowing ID tagging of blocks
public void cacheBlock(BlockCacheKey cacheKey, Cacheable buf, boolean inMemory, int customId) {
// Throttling writes
float threshold = 100;
if (thresholdMap.containsKey(customId)) {
threshold = thresholdMap.get(customId);
}
if (customId == 150) {
threshold = 0;
}
int x = rng.nextInt(100);
if (x >= threshold) {
return;
}
CachedBlock cb = map.get(cacheKey);
if(cb != null) {
throw new RuntimeException("Cached an already cached block");
}
cb = new CachedBlock(cacheKey, buf, count.incrementAndGet(), inMemory);
cb.setCustomId(customId);
cb.setUsed(false);
long newSize = updateSizeMetrics(cb, false);
// if (customId == 70 || customId == 140) {
// return;
// }
map.put(cacheKey, cb);
elements.incrementAndGet();
if(newSize > acceptableSize() && !evictionInProgress) {
runEviction();
}
// YESPROB if (customId == 70 || customId == 140) {
// return;
// }
if (occupancy.containsKey(customId)) {
occupancy.get(customId).incrementAndGet();
} else {
occupancy.put(customId, new AtomicLong(1));
}
if (cachePlacementCount.containsKey(customId)) {
cachePlacementCount.get(customId).incrementAndGet();
} else {
cachePlacementCount.put(customId, new AtomicLong(1));
}
// YESPROB if (customId == 70 || customId == 140) {
// return;
// }
}
/**
* Cache the block with the specified name and buffer.
* <p>
* It is assumed this will NEVER be called on an already cached block. If
* that is done, it is assumed that you are reinserting the same exact
* block due to a race condition and will update the buffer but not modify
* the size of the cache.
* @param cacheKey block's cache key
* @param buf block buffer
*/
public void cacheBlock(BlockCacheKey cacheKey, Cacheable buf) {
cacheBlock(cacheKey, buf, false);
}
/**
* Helper function that updates the local size counter and also updates any
* per-cf or per-blocktype metrics it can discern from given
* {@link CachedBlock}
*
* @param cb
* @param evict
*/
protected long updateSizeMetrics(CachedBlock cb, boolean evict) {
long heapsize = cb.heapSize();
if (evict) {
heapsize *= -1;
}
return size.addAndGet(heapsize);
}
/**
* Get the buffer of the block with the specified name.
* @param cacheKey block's cache key
* @param caching true if the caller caches blocks on cache misses
* @param repeat Whether this is a repeat lookup for the same block
* (used to avoid double counting cache misses when doing double-check locking)
* @return buffer of specified cache key, or null if not in cache
* @see HFileReaderV2#readBlock(long, long, boolean, boolean, boolean, BlockType)
*/
@Override
public Cacheable getBlock(BlockCacheKey cacheKey, boolean caching, boolean repeat) {
// CachedBlock cb = map.get(cacheKey);
// if(cb == null) {
// if (!repeat) stats.miss(caching);
// if (victimHandler != null)
// return victimHandler.getBlock(cacheKey, caching, repeat);
// return null;
// }
// stats.hit(caching);
// cb.incrementNumAccesses();
// cb.access(count.incrementAndGet());
// return cb.getBuffer();
return getBlock(cacheKey, caching, repeat, false, 0, false).getFirst();
}
static int check = 0;
/*
Returns a pair, the first of which is a cacheable, and thesecond is a boolean which is true
if the workload was actually blocked from cache access.
*/
public Pair<Cacheable, Boolean> getBlock(BlockCacheKey cacheKey, boolean caching, boolean repeat,
boolean updateaccess, int customId, boolean isdatarequest) {
Pair <Cacheable, Boolean> returnPair = new Pair<Cacheable, Boolean>();
returnPair.setSecond(false);
if (customId == 140) {
returnPair.setFirst(null);
returnPair.setSecond(true);
return returnPair;
}
CachedBlock cb = map.get(cacheKey);
if(cb == null) {
if (!repeat) stats.miss(caching);
if (victimHandler != null) {
returnPair.setFirst(victimHandler.getBlock(cacheKey, caching, repeat, updateaccess, customId));
return returnPair;
}
if (!repeat && isdatarequest) {
if(missCount.containsKey(customId)) {
missCount.get(customId).incrementAndGet();
} else {
missCount.put(customId, new AtomicLong(1));
}
}
if (!repeat && isdatarequest) {
if(periodicmissCount.containsKey(customId)) {
periodicmissCount.get(customId).incrementAndGet();
} else {
periodicmissCount.put(customId, new AtomicLong(1));
}
}
returnPair.setFirst(null);
return returnPair;
}
stats.hit(caching);
if (isdatarequest) {
if(hitsCount.containsKey(customId)) {
hitsCount.get(customId).incrementAndGet();
} else {
hitsCount.put(customId, new AtomicLong(1));
}
if(periodichitsCount.containsKey(customId)) {
periodichitsCount.get(customId).incrementAndGet();
} else {
periodichitsCount.put(customId, new AtomicLong(1));
}
}
// TODO: Lot of changes to do here.
// 1. Should this be tracked for all blocks, or only blocks that *I* placed into cache?
// 2. How often do I reset "used" to false? I.e, if it hasn't been used in a while, it's unused right?
// How does above tie in with LRU eviction? Should we bump down priority if unused?
if (!cb.isUsed() && cb.getCustomId() == customId) {
cb.setUsed(true);
if (cacheUseCount.containsKey(customId)) {
cacheUseCount.get(customId).incrementAndGet();
} else {
cacheUseCount.put(customId, new AtomicLong(1));
}
}
if (customId != 70) {
cb.incrementNumAccesses();
cb.access(count.incrementAndGet(), 0, customId);
}
returnPair.setFirst(cb.getBuffer());
return returnPair;
}
/** n
* Whether the cache contains block with specified cacheKey
* @param cacheKey
* @return true if contains the block
*/
public boolean containsBlock(BlockCacheKey cacheKey) {
return map.containsKey(cacheKey);
}
@Override
public boolean evictBlock(BlockCacheKey cacheKey) {
CachedBlock cb = map.get(cacheKey);
if (cb == null) return false;
evictBlock(cb, false);
return true;
}
/**
* Evicts all blocks for a specific HFile. This is an
* expensive operation implemented as a linear-time search through all blocks
* in the cache. Ideally this should be a search in a log-access-time map.
*
* <p>
* This is used for evict-on-close to remove all blocks of a specific HFile.
*
* @return the number of blocks evicted
*/
@Override
public int evictBlocksByHfileName(String hfileName) {
int numEvicted = 0;
for (BlockCacheKey key : map.keySet()) {
if (key.getHfileName().equals(hfileName)) {
if (evictBlock(key))
++numEvicted;
}
}
if (victimHandler != null) {
numEvicted += victimHandler.evictBlocksByHfileName(hfileName);
}
return numEvicted;
}
/**
* Evict the block, and it will be cached by the victim handler if exists &&
* block may be read again later
* @param block
* @param evictedByEvictionProcess true if the given block is evicted by
* EvictionThread
* @return the heap size of evicted block
*/
protected long evictBlock(CachedBlock block, boolean evictedByEvictionProcess) {
map.remove(block.getCacheKey());
updateSizeMetrics(block, true);
elements.decrementAndGet();
if (occupancy.containsKey(block.getCustomId())) {
occupancy.get(block.getCustomId()).decrementAndGet();
}
stats.evicted();
if (evictedByEvictionProcess && victimHandler != null) {
boolean wait = getCurrentSize() < acceptableSize();
boolean inMemory = block.getPriority() == BlockPriority.MEMORY;
victimHandler.cacheBlockWithWait(block.getCacheKey(), block.getBuffer(),
inMemory, wait);
}
return block.heapSize();
}
/**
* Multi-threaded call to run the eviction process.
*/
private void runEviction() {
if(evictionThread == null) {
evict();
} else {
evictionThread.evict();
}
}
/**
* Eviction method.
*/
void evict() {
// Ensure only one eviction at a time
if(!evictionLock.tryLock()) return;
try {
evictionInProgress = true;
long currentSize = this.size.get();
long bytesToFree = currentSize - minSize();
if (LOG.isDebugEnabled()) {
LOG.debug("Block cache LRU eviction started; Attempting to free " +
StringUtils.byteDesc(bytesToFree) + " of total=" +
StringUtils.byteDesc(currentSize));
}
if(bytesToFree <= 0) return;
// Instantiate priority buckets
BlockBucket bucketSingle = new BlockBucket(bytesToFree, blockSize,
singleSize());
BlockBucket bucketMulti = new BlockBucket(bytesToFree, blockSize,
multiSize());
BlockBucket bucketMemory = new BlockBucket(bytesToFree, blockSize,
memorySize());
// Scan entire map putting into appropriate buckets
for(CachedBlock cachedBlock : map.values()) {
switch(cachedBlock.getPriority()) {
case SINGLE: {
bucketSingle.add(cachedBlock);
break;
}
case MULTI: {
bucketMulti.add(cachedBlock);
break;
}
case MEMORY: {
bucketMemory.add(cachedBlock);
break;
}
}
}
PriorityQueue<BlockBucket> bucketQueue =
new PriorityQueue<BlockBucket>(3);
bucketQueue.add(bucketSingle);
bucketQueue.add(bucketMulti);
bucketQueue.add(bucketMemory);
int remainingBuckets = 3;
long bytesFreed = 0;
BlockBucket bucket;
while((bucket = bucketQueue.poll()) != null) {
long overflow = bucket.overflow();
if(overflow > 0) {
long bucketBytesToFree = Math.min(overflow,
(bytesToFree - bytesFreed) / remainingBuckets);
bytesFreed += bucket.free(bucketBytesToFree);
}
remainingBuckets--;
}
if (LOG.isDebugEnabled()) {
long single = bucketSingle.totalSize();
long multi = bucketMulti.totalSize();
long memory = bucketMemory.totalSize();
LOG.debug("Block cache LRU eviction completed; " +
"freed=" + StringUtils.byteDesc(bytesFreed) + ", " +
"total=" + StringUtils.byteDesc(this.size.get()) + ", " +
"single=" + StringUtils.byteDesc(single) + ", " +
"multi=" + StringUtils.byteDesc(multi) + ", " +
"memory=" + StringUtils.byteDesc(memory));
}
} finally {
stats.evict();
evictionInProgress = false;
evictionLock.unlock();
}
}
/**
* Used to group blocks into priority buckets. There will be a BlockBucket
* for each priority (single, multi, memory). Once bucketed, the eviction
* algorithm takes the appropriate number of elements out of each according
* to configuration parameters and their relatives sizes.
*/
private class BlockBucket implements Comparable<BlockBucket> {
private CachedBlockQueue queue;
private long totalSize = 0;
private long bucketSize;
public BlockBucket(long bytesToFree, long blockSize, long bucketSize) {
this.bucketSize = bucketSize;
queue = new CachedBlockQueue(bytesToFree, blockSize);
totalSize = 0;
}
public void add(CachedBlock block) {
totalSize += block.heapSize();
queue.add(block);
}
public long free(long toFree) {
CachedBlock cb;
long freedBytes = 0;
while ((cb = queue.pollLast()) != null) {
freedBytes += evictBlock(cb, true);
if (freedBytes >= toFree) {
return freedBytes;
}
}
return freedBytes;
}
public long overflow() {
return totalSize - bucketSize;
}
public long totalSize() {
return totalSize;
}
public int compareTo(BlockBucket that) {
if(this.overflow() == that.overflow()) return 0;
return this.overflow() > that.overflow() ? 1 : -1;
}
@Override
public boolean equals(Object that) {
if (that == null || !(that instanceof BlockBucket)){
return false;
}
return compareTo(( BlockBucket)that) == 0;
}
}
/**
* Get the maximum size of this cache.
* @return max size in bytes
*/
public long getMaxSize() {
return this.maxSize;
}
/**
* Get the current size of this cache.
* @return current size in bytes
*/
public long getCurrentSize() {
return this.size.get();
}
/**
* Get the current size of this cache.
* @return current size in bytes
*/
public long getFreeSize() {
return getMaxSize() - getCurrentSize();
}
/**
* Get the size of this cache (number of cached blocks)
* @return number of cached blocks
*/
public long size() {
return this.elements.get();
}
@Override
public long getBlockCount() {
return this.elements.get();
}
/**
* Get the number of eviction runs that have occurred
*/
public long getEvictionCount() {
return this.stats.getEvictionCount();
}
/**
* Get the number of blocks that have been evicted during the lifetime
* of this cache.
*/
public long getEvictedCount() {
return this.stats.getEvictedCount();
}
EvictionThread getEvictionThread() {
return this.evictionThread;
}
/*
* Eviction thread. Sits in waiting state until an eviction is triggered
* when the cache size grows above the acceptable level.<p>
*
* Thread is triggered into action by {@link LruBlockCache#runEviction()}
*/
static class EvictionThread extends HasThread {
private WeakReference<LruBlockCache> cache;
private boolean go = true;
// flag set after enter the run method, used for test
private boolean enteringRun = false;
public EvictionThread(LruBlockCache cache) {
super(Thread.currentThread().getName() + ".LruBlockCache.EvictionThread");
setDaemon(true);
this.cache = new WeakReference<LruBlockCache>(cache);
}
@Override
public void run() {
enteringRun = true;
while (this.go) {
synchronized(this) {
try {
this.wait();
} catch(InterruptedException e) {}
}
LruBlockCache cache = this.cache.get();
if(cache == null) break;
cache.evict();
}
}
public void evict() {
synchronized(this) {
this.notifyAll(); // FindBugs NN_NAKED_NOTIFY
}
}
synchronized void shutdown() {
this.go = false;
this.notifyAll();
}
/**
* Used for the test.
*/
boolean isEnteringRun() {
return this.enteringRun;
}
}
/*
* Statistics thread. Periodically prints the cache statistics to the log.
*/
static class StatisticsThread extends Thread {
LruBlockCache lru;
public StatisticsThread(LruBlockCache lru) {
super("LruBlockCache.StatisticsThread");
setDaemon(true);
this.lru = lru;
}
@Override
public void run() {
lru.logStats();
}
}
public void logStats() {
if (!LOG.isDebugEnabled()) return;
// Log size
long totalSize = heapSize();
long freeSize = maxSize - totalSize;
LruBlockCache.LOG.debug("Stats: " +
"total=" + StringUtils.byteDesc(totalSize) + ", " +
"free=" + StringUtils.byteDesc(freeSize) + ", " +
"max=" + StringUtils.byteDesc(this.maxSize) + ", " +
"blocks=" + size() +", " +
"accesses=" + stats.getRequestCount() + ", " +
"hits=" + stats.getHitCount() + ", " +
"hitRatio=" +
(stats.getHitCount() == 0 ? "0" : (StringUtils.formatPercent(stats.getHitRatio(), 2)+ ", ")) + ", " +
"cachingAccesses=" + stats.getRequestCachingCount() + ", " +
"cachingHits=" + stats.getHitCachingCount() + ", " +
"cachingHitsRatio=" +
(stats.getHitCachingCount() == 0 ? "0" : (StringUtils.formatPercent(stats.getHitCachingRatio(), 2)+ ", ")) + ", " +
"evictions=" + stats.getEvictionCount() + ", " +
"evicted=" + stats.getEvictedCount() + ", " +
"evictedPerRun=" + stats.evictedPerEviction());
calculateHitRatios();
calculateOccupancy();
calculateReuse();
calculateThresholds();
LOG.info("FOr total accesses: " + accesses);
LOG.info("FOr max accesses: " + maxaccesses);
if (elements.floatValue() > 0) {
LOG.info("FOr avg accesses: " + (float)accesses/elements.floatValue());
}
accesses = 0;
maxaccesses = 0;
LOG.info("FOr Unique blocs:" + HFileReaderV2.uniqueBlocks.size());
for (int id : HFileReaderV2.uniqueBlockCounts.keySet()) {
LOG.info("FOr Uniq blocks ID: " + id + " " + HFileReaderV2.uniqueBlockCounts.get(id).size());
}
HFileReaderV2.idMissCounts.clear();
HFileReaderV2.idHitCounts.clear();
periodichitsCount.clear();
periodicmissCount.clear();
// HFileReaderV2.readCounts.clear();
// HFileReaderV2.blockCounts.clear();
// HFileReaderV2.blockCacheCounts.clear();
}
private void calculateReuse() {
for (int id : cachePlacementCount.keySet()) {
float placement = cachePlacementCount.get(id).floatValue();
float accesses = 0;
if (cacheUseCount.containsKey(id)) {
accesses = cacheUseCount.get(id).floatValue();
}
float ratio = accesses/placement;
LOG.info("FOr cache Reuse: " + id + " is " + accesses + "/" + placement + "=" + ratio);
cacheReusePercentage.put(id, ratio);
}
}
private void calculateOccupancy() {
LOG.info("FOr Occupancy metrics");
for (int id : occupancy.keySet()) {
float idoccupancy = occupancy.get(id).floatValue()/elements.floatValue();
occupancyMap.put(id, idoccupancy);
LOG.info("FOr IDOccupancy: " + id + " is: " + idoccupancy);
}
}
private void calculateHitRatios() {
Set<Integer> keys = new TreeSet<Integer>();
keys.addAll(hitsCount.keySet());
keys.addAll(missCount.keySet());
for(int each : keys) {
float hits = 0;
if (hitsCount.containsKey(each)) {
hits = hitsCount.get(each).floatValue();
}
float misses = 0;
if (missCount.containsKey(each)) {
misses = missCount.get(each).floatValue();
}
if (hits > 0 || misses > 0) {
float ratio = hits/(misses + hits);
hitRatioMap.put(each, ratio);
LOG.info("FOr IDCumulative: " + each + " hits: " + hits
+ " misses: " + misses + " ratio: " + ratio);
}
}
keys = new TreeSet<Integer>();
keys.addAll(periodichitsCount.keySet());
keys.addAll(periodicmissCount.keySet());
for(int each : keys) {
float hits = 0;
if (periodichitsCount.containsKey(each)) {
hits = periodichitsCount.get(each).floatValue();
}
float misses = 0;
if (periodicmissCount.containsKey(each)) {
misses = periodicmissCount.get(each).floatValue();
}
if (hits > 0 || misses > 0) {
float ratio = hits/(misses + hits);
LOG.info("FOr IDPeriodic: " + each + " hits: " + hits
+ " misses: " + misses + " ratio: " + ratio);
}
}
}
private void calculateThresholds() {
for (int workload : cacheReusePercentage.keySet()) {
if (!thresholdMap.containsKey(workload)) {
thresholdMap.put(workload, 99f);
LOG.info("FOr workload: " + workload + " set MAX threshold " + 99);
} else {
if (thresholdMap.get(workload) > 100) {
// If a workload has reached 103, start reducing it.
LOG.info("FOr DEC thresh: " + workload + " " + (thresholdMap.get(workload) - 1));
thresholdMap.put(workload, thresholdMap.get(workload) - 1);
} else {
float cacheFillPercentage = (float) getCurrentSize()/(float) getMaxSize();
float myOccupancy = occupancyMap.get(workload);
float threshold = 100 * cacheReusePercentage.get(workload);
// This exaggerates the effect of threshold: If aworkload has threshold below 50,
// it decreases it even further, and if it is above 50 it raises it further.
// The peanlty/reward is higher , upon higher differences from 50.
LOG.info("FOr " + workload +
" cachefill: " + cacheFillPercentage + " cacheReuse: " + cacheReusePercentage.get(workload)
+ " occupancy: " + occupancyMap.get(workload) + " initthresh:" + threshold);
if (cacheFillPercentage < 0.3) {
// If cache is not filled yet, let it run longer.
threshold = 99;
} else if (myOccupancy < 0.1) {
// If this is not occupying more than 10% of the cache, leave it alone.
threshold = 99;
} else {
float diff = (threshold - 50)/25;
threshold = threshold + threshold * diff;
}
if (threshold < 0) {
threshold = 5;
}
thresholdMap.put(workload, threshold);
LOG.info("FOr Calc new thresh for ID: " + workload + " " + threshold);
//thresholdMap.put(workload, new Float(100.0));
}
}
}
thresholdMap.put(0, new Float(100.0));
thresholdMap.put(267, new Float(100.0));
}
/**
* Get counter statistics for thische.
*
* <p>Includes: total accesses, hits, misses, evicted blocks, and runs
* of the eviction processes.
*/
public CacheStats getStats() {
return this.stats;
}
public final static long CACHE_FIXED_OVERHEAD = ClassSize.align(
(3 * Bytes.SIZEOF_LONG) + (9 * ClassSize.REFERENCE) +
(5 * Bytes.SIZEOF_FLOAT) + Bytes.SIZEOF_BOOLEAN
+ ClassSize.OBJECT);
// HeapSize implementation
public long heapSize() {
return getCurrentSize();
}
public static long calculateOverhead(long maxSize, long blockSize, int concurrency){
// FindBugs ICAST_INTEGER_MULTIPLY_CAST_TO_LONG
return CACHE_FIXED_OVERHEAD + ClassSize.CONCURRENT_HASHMAP +
((long)Math.ceil(maxSize*1.2/blockSize)
* ClassSize.CONCURRENT_HASHMAP_ENTRY) +
((long)concurrency * ClassSize.CONCURRENT_HASHMAP_SEGMENT);
}
@Override
public List<BlockCacheColumnFamilySummary> getBlockCacheColumnFamilySummaries(Configuration conf) throws IOException {
Map<String, Path> sfMap = FSUtils.getTableStoreFilePathMap(
FileSystem.get(conf),
FSUtils.getRootDir(conf));
// quirky, but it's a compound key and this is a shortcut taken instead of
// creating a class that would represent only a key.
Map<BlockCacheColumnFamilySummary, BlockCacheColumnFamilySummary> bcs =
new HashMap<BlockCacheColumnFamilySummary, BlockCacheColumnFamilySummary>();
for (CachedBlock cb : map.values()) {
String sf = cb.getCacheKey().getHfileName();
Path path = sfMap.get(sf);
if ( path != null) {
BlockCacheColumnFamilySummary lookup =
BlockCacheColumnFamilySummary.createFromStoreFilePath(path);
BlockCacheColumnFamilySummary bcse = bcs.get(lookup);
if (bcse == null) {
bcse = BlockCacheColumnFamilySummary.create(lookup);
bcs.put(lookup,bcse);
}
bcse.incrementBlocks();
bcse.incrementHeapSize(cb.heapSize());
}
}
List<BlockCacheColumnFamilySummary> list =
new ArrayList<BlockCacheColumnFamilySummary>(bcs.values());
Collections.sort( list );
return list;
}
// Simple calculators of sizes given factors and maxSize
private long acceptableSize() {
return (long)Math.floor(this.maxSize * this.acceptableFactor);
}
private long minSize() {
return (long)Math.floor(this.maxSize * this.minFactor);
}
private long singleSize() {
return (long)Math.floor(this.maxSize * this.singleFactor * this.minFactor);
}
private long multiSize() {
return (long)Math.floor(this.maxSize * this.multiFactor * this.minFactor);
}
private long memorySize() {
return (long)Math.floor(this.maxSize * this.memoryFactor * this.minFactor);
}
public void shutdown() {
if (victimHandler != null)
victimHandler.shutdown();
this.scheduleThreadPool.shutdown();
for (int i = 0; i < 10; i++) {
if (!this.scheduleThreadPool.isShutdown()) Threads.sleep(10);
}
if (!this.scheduleThreadPool.isShutdown()) {
List<Runnable> runnables = this.scheduleThreadPool.shutdownNow();
LOG.debug("Still running " + runnables);
}
this.evictionThread.shutdown();
}
/** Clears the cache. Used in tests. */
public void clearCache() {
map.clear();
}
/**
* Used in testing. May be very inefficient.
* @return the set of cached file names
*/
SortedSet<String> getCachedFileNamesForTest() {
SortedSet<String> fileNames = new TreeSet<String>();
for (BlockCacheKey cacheKey : map.keySet()) {
fileNames.add(cacheKey.getHfileName());
}
return fileNames;
}
Map<BlockType, Integer> getBlockTypeCountsForTest() {
Map<BlockType, Integer> counts =
new EnumMap<BlockType, Integer>(BlockType.class);
for (CachedBlock cb : map.values()) {
BlockType blockType = ((HFileBlock) cb.getBuffer()).getBlockType();
Integer count = counts.get(blockType);
counts.put(blockType, (count == null ? 0 : count) + 1);
}
return counts;
}
public Map<DataBlockEncoding, Integer> getEncodingCountsForTest() {
Map<DataBlockEncoding, Integer> counts =
new EnumMap<DataBlockEncoding, Integer>(DataBlockEncoding.class);
for (BlockCacheKey cacheKey : map.keySet()) {
DataBlockEncoding encoding = cacheKey.getDataBlockEncoding();
Integer count = counts.get(encoding);
counts.put(encoding, (count == null ? 0 : count) + 1);
}
return counts;
}
public void setVictimCache(BucketCache handler) {
assert victimHandler == null;
victimHandler = handler;
}
}