/**
* Copyright The Apache Software Foundation
*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.client;
import java.io.IOException;
import java.util.AbstractMap.SimpleEntry;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.LinkedBlockingQueue;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.atomic.AtomicLong;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HRegionInfo;
import org.apache.hadoop.hbase.HRegionLocation;
import org.apache.hadoop.hbase.ZooKeeperConnectionException;
import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
/**
* HTableMultiplexer provides a thread-safe non blocking PUT API across all the tables.
* Each put will be sharded into different buffer queues based on its destination region server.
* So each region server buffer queue will only have the puts which share the same destination.
* And each queue will have a flush worker thread to flush the puts request to the region server.
* If any queue is full, the HTableMultiplexer starts to drop the Put requests for that
* particular queue.
*
* Also all the puts will be retried as a configuration number before dropping.
* And the HTableMultiplexer can report the number of buffered requests and the number of the
* failed (dropped) requests in total or on per region server basis.
*
* This class is thread safe.
*/
@InterfaceAudience.Public
@InterfaceStability.Evolving
public class HTableMultiplexer {
private static final Log LOG = LogFactory.getLog(HTableMultiplexer.class.getName());
private static int poolID = 0;
static final String TABLE_MULTIPLEXER_FLUSH_FREQ_MS = "hbase.tablemultiplexer.flush.frequency.ms";
private Map<byte[], HTable> tableNameToHTableMap;
/** The map between each region server to its corresponding buffer queue */
private Map<HRegionLocation, LinkedBlockingQueue<PutStatus>>
serverToBufferQueueMap;
/** The map between each region server to its flush worker */
private Map<HRegionLocation, HTableFlushWorker> serverToFlushWorkerMap;
private Configuration conf;
private int retryNum;
private int perRegionServerBufferQueueSize;
/**
*
* @param conf The HBaseConfiguration
* @param perRegionServerBufferQueueSize determines the max number of the buffered Put ops
* for each region server before dropping the request.
*/
public HTableMultiplexer(Configuration conf,
int perRegionServerBufferQueueSize) throws ZooKeeperConnectionException {
this.conf = conf;
this.serverToBufferQueueMap = new ConcurrentHashMap<HRegionLocation,
LinkedBlockingQueue<PutStatus>>();
this.serverToFlushWorkerMap = new ConcurrentHashMap<HRegionLocation, HTableFlushWorker>();
this.tableNameToHTableMap = new ConcurrentHashMap<byte[], HTable>();
this.retryNum = conf.getInt("hbase.client.retries.number", 10);
this.perRegionServerBufferQueueSize = perRegionServerBufferQueueSize;
}
/**
* The put request will be buffered by its corresponding buffer queue. Return false if the queue
* is already full.
* @param table
* @param put
* @return true if the request can be accepted by its corresponding buffer queue.
* @throws IOException
*/
public boolean put(final byte[] table, final Put put) throws IOException {
return put(table, put, this.retryNum);
}
/**
* The puts request will be buffered by their corresponding buffer queue.
* Return the list of puts which could not be queued.
* @param table
* @param puts
* @return the list of puts which could not be queued
* @throws IOException
*/
public List<Put> put(final byte[] table, final List<Put> puts)
throws IOException {
if (puts == null)
return null;
List <Put> failedPuts = null;
boolean result;
for (Put put : puts) {
result = put(table, put, this.retryNum);
if (result == false) {
// Create the failed puts list if necessary
if (failedPuts == null) {
failedPuts = new ArrayList<Put>();
}
// Add the put to the failed puts list
failedPuts.add(put);
}
}
return failedPuts;
}
/**
* The put request will be buffered by its corresponding buffer queue. And the put request will be
* retried before dropping the request.
* Return false if the queue is already full.
* @param table
* @param put
* @param retry
* @return true if the request can be accepted by its corresponding buffer queue.
* @throws IOException
*/
public boolean put(final byte[] table, final Put put, int retry)
throws IOException {
if (retry <= 0) {
return false;
}
LinkedBlockingQueue<PutStatus> queue;
HTable htable = getHTable(table);
try {
htable.validatePut(put);
HRegionLocation loc = htable.getRegionLocation(put.getRow(), false);
if (loc != null) {
// Add the put pair into its corresponding queue.
queue = addNewRegionServer(loc, htable);
// Generate a MultiPutStatus obj and offer it into the queue
PutStatus s = new PutStatus(loc.getRegionInfo(), put, retry);
return queue.offer(s);
}
} catch (Exception e) {
LOG.debug("Cannot process the put " + put + " because of " + e);
}
return false;
}
/**
* @return the current HTableMultiplexerStatus
*/
public HTableMultiplexerStatus getHTableMultiplexerStatus() {
return new HTableMultiplexerStatus(serverToFlushWorkerMap);
}
private HTable getHTable(final byte[] table) throws IOException {
HTable htable = this.tableNameToHTableMap.get(table);
if (htable == null) {
synchronized (this.tableNameToHTableMap) {
htable = this.tableNameToHTableMap.get(table);
if (htable == null) {
htable = new HTable(conf, table);
this.tableNameToHTableMap.put(table, htable);
}
}
}
return htable;
}
private synchronized LinkedBlockingQueue<PutStatus> addNewRegionServer(
HRegionLocation addr, HTable htable) {
LinkedBlockingQueue<PutStatus> queue =
serverToBufferQueueMap.get(addr);
if (queue == null) {
// Create a queue for the new region server
queue = new LinkedBlockingQueue<PutStatus>(perRegionServerBufferQueueSize);
serverToBufferQueueMap.put(addr, queue);
// Create the flush worker
HTableFlushWorker worker = new HTableFlushWorker(conf, addr,
this, queue, htable);
this.serverToFlushWorkerMap.put(addr, worker);
// Launch a daemon thread to flush the puts
// from the queue to its corresponding region server.
String name = "HTableFlushWorker-" + addr.getHostnamePort() + "-"
+ (poolID++);
Thread t = new Thread(worker, name);
t.setDaemon(true);
t.start();
}
return queue;
}
/**
* HTableMultiplexerStatus keeps track of the current status of the HTableMultiplexer.
* report the number of buffered requests and the number of the failed (dropped) requests
* in total or on per region server basis.
*/
static class HTableMultiplexerStatus {
private long totalFailedPutCounter;
private long totalBufferedPutCounter;
private long maxLatency;
private long overallAverageLatency;
private Map<String, Long> serverToFailedCounterMap;
private Map<String, Long> serverToBufferedCounterMap;
private Map<String, Long> serverToAverageLatencyMap;
private Map<String, Long> serverToMaxLatencyMap;
public HTableMultiplexerStatus(
Map<HRegionLocation, HTableFlushWorker> serverToFlushWorkerMap) {
this.totalBufferedPutCounter = 0;
this.totalFailedPutCounter = 0;
this.maxLatency = 0;
this.overallAverageLatency = 0;
this.serverToBufferedCounterMap = new HashMap<String, Long>();
this.serverToFailedCounterMap = new HashMap<String, Long>();
this.serverToAverageLatencyMap = new HashMap<String, Long>();
this.serverToMaxLatencyMap = new HashMap<String, Long>();
this.initialize(serverToFlushWorkerMap);
}
private void initialize(
Map<HRegionLocation, HTableFlushWorker> serverToFlushWorkerMap) {
if (serverToFlushWorkerMap == null) {
return;
}
long averageCalcSum = 0;
int averageCalcCount = 0;
for (Map.Entry<HRegionLocation, HTableFlushWorker> entry : serverToFlushWorkerMap
.entrySet()) {
HRegionLocation addr = entry.getKey();
HTableFlushWorker worker = entry.getValue();
long bufferedCounter = worker.getTotalBufferedCount();
long failedCounter = worker.getTotalFailedCount();
long serverMaxLatency = worker.getMaxLatency();
AtomicAverageCounter averageCounter = worker.getAverageLatencyCounter();
// Get sum and count pieces separately to compute overall average
SimpleEntry<Long, Integer> averageComponents = averageCounter
.getComponents();
long serverAvgLatency = averageCounter.getAndReset();
this.totalBufferedPutCounter += bufferedCounter;
this.totalFailedPutCounter += failedCounter;
if (serverMaxLatency > this.maxLatency) {
this.maxLatency = serverMaxLatency;
}
averageCalcSum += averageComponents.getKey();
averageCalcCount += averageComponents.getValue();
this.serverToBufferedCounterMap.put(addr.getHostnamePort(),
bufferedCounter);
this.serverToFailedCounterMap
.put(addr.getHostnamePort(),
failedCounter);
this.serverToAverageLatencyMap.put(addr.getHostnamePort(),
serverAvgLatency);
this.serverToMaxLatencyMap
.put(addr.getHostnamePort(),
serverMaxLatency);
}
this.overallAverageLatency = averageCalcCount != 0 ? averageCalcSum
/ averageCalcCount : 0;
}
public long getTotalBufferedCounter() {
return this.totalBufferedPutCounter;
}
public long getTotalFailedCounter() {
return this.totalFailedPutCounter;
}
public long getMaxLatency() {
return this.maxLatency;
}
public long getOverallAverageLatency() {
return this.overallAverageLatency;
}
public Map<String, Long> getBufferedCounterForEachRegionServer() {
return this.serverToBufferedCounterMap;
}
public Map<String, Long> getFailedCounterForEachRegionServer() {
return this.serverToFailedCounterMap;
}
public Map<String, Long> getMaxLatencyForEachRegionServer() {
return this.serverToMaxLatencyMap;
}
public Map<String, Long> getAverageLatencyForEachRegionServer() {
return this.serverToAverageLatencyMap;
}
}
private static class PutStatus {
private final HRegionInfo regionInfo;
private final Put put;
private final int retryCount;
public PutStatus(final HRegionInfo regionInfo, final Put put,
final int retryCount) {
this.regionInfo = regionInfo;
this.put = put;
this.retryCount = retryCount;
}
public HRegionInfo getRegionInfo() {
return regionInfo;
}
public Put getPut() {
return put;
}
public int getRetryCount() {
return retryCount;
}
}
/**
* Helper to count the average over an interval until reset.
*/
private static class AtomicAverageCounter {
private long sum;
private int count;
public AtomicAverageCounter() {
this.sum = 0L;
this.count = 0;
}
public synchronized long getAndReset() {
long result = this.get();
this.reset();
return result;
}
public synchronized long get() {
if (this.count == 0) {
return 0;
}
return this.sum / this.count;
}
public synchronized SimpleEntry<Long, Integer> getComponents() {
return new SimpleEntry<Long, Integer>(sum, count);
}
public synchronized void reset() {
this.sum = 0l;
this.count = 0;
}
public synchronized void add(long value) {
this.sum += value;
this.count++;
}
}
private static class HTableFlushWorker implements Runnable {
private HRegionLocation addr;
private Configuration conf;
private LinkedBlockingQueue<PutStatus> queue;
private HTableMultiplexer htableMultiplexer;
private AtomicLong totalFailedPutCount;
private AtomicInteger currentProcessingPutCount;
private AtomicAverageCounter averageLatency;
private AtomicLong maxLatency;
private HTable htable; // For Multi
public HTableFlushWorker(Configuration conf, HRegionLocation addr,
HTableMultiplexer htableMultiplexer,
LinkedBlockingQueue<PutStatus> queue, HTable htable) {
this.addr = addr;
this.conf = conf;
this.htableMultiplexer = htableMultiplexer;
this.queue = queue;
this.totalFailedPutCount = new AtomicLong(0);
this.currentProcessingPutCount = new AtomicInteger(0);
this.averageLatency = new AtomicAverageCounter();
this.maxLatency = new AtomicLong(0);
this.htable = htable;
}
public long getTotalFailedCount() {
return totalFailedPutCount.get();
}
public long getTotalBufferedCount() {
return queue.size() + currentProcessingPutCount.get();
}
public AtomicAverageCounter getAverageLatencyCounter() {
return this.averageLatency;
}
public long getMaxLatency() {
return this.maxLatency.getAndSet(0);
}
private boolean resubmitFailedPut(PutStatus failedPutStatus,
HRegionLocation oldLoc) throws IOException {
Put failedPut = failedPutStatus.getPut();
// The currentPut is failed. So get the table name for the currentPut.
byte[] tableName = failedPutStatus.getRegionInfo().getTableName();
// Decrease the retry count
int retryCount = failedPutStatus.getRetryCount() - 1;
if (retryCount <= 0) {
// Update the failed counter and no retry any more.
return false;
} else {
// Retry one more time
return this.htableMultiplexer.put(tableName, failedPut, retryCount);
}
}
@Override
public void run() {
List<PutStatus> processingList = new ArrayList<PutStatus>();
/**
* The frequency in milliseconds for the current thread to process the corresponding
* buffer queue.
**/
long frequency = conf.getLong(TABLE_MULTIPLEXER_FLUSH_FREQ_MS, 100);
// initial delay
try {
Thread.sleep(frequency);
} catch (InterruptedException e) {
} // Ignore
long start, elapsed;
int failedCount = 0;
while (true) {
try {
start = elapsed = EnvironmentEdgeManager.currentTimeMillis();
// Clear the processingList, putToStatusMap and failedCount
processingList.clear();
failedCount = 0;
// drain all the queued puts into the tmp list
queue.drainTo(processingList);
currentProcessingPutCount.set(processingList.size());
if (processingList.size() > 0) {
ArrayList<Put> list = new ArrayList<Put>(processingList.size());
for (PutStatus putStatus: processingList) {
list.add(putStatus.getPut());
}
// Process this multiput request
List<Put> failed = null;
Object[] results = new Object[list.size()];
try {
htable.batch(list, results);
} catch (IOException e) {
LOG.debug("Caught some exceptions " + e
+ " when flushing puts to region server " + addr.getHostnamePort());
} finally {
// mutate list so that it is empty for complete success, or
// contains only failed records
// results are returned in the same order as the requests in list
// walk the list backwards, so we can remove from list without
// impacting the indexes of earlier members
for (int i = results.length - 1; i >= 0; i--) {
if (results[i] instanceof Result) {
// successful Puts are removed from the list here.
list.remove(i);
}
}
failed = list;
}
if (failed != null) {
if (failed.size() == processingList.size()) {
// All the puts for this region server are failed. Going to retry it later
for (PutStatus putStatus: processingList) {
if (!resubmitFailedPut(putStatus, this.addr)) {
failedCount++;
}
}
} else {
Set<Put> failedPutSet = new HashSet<Put>(failed);
for (PutStatus putStatus: processingList) {
if (failedPutSet.contains(putStatus.getPut())
&& !resubmitFailedPut(putStatus, this.addr)) {
failedCount++;
}
}
}
}
// Update the totalFailedCount
this.totalFailedPutCount.addAndGet(failedCount);
elapsed = EnvironmentEdgeManager.currentTimeMillis() - start;
// Update latency counters
averageLatency.add(elapsed);
if (elapsed > maxLatency.get()) {
maxLatency.set(elapsed);
}
// Log some basic info
if (LOG.isDebugEnabled()) {
LOG.debug("Processed " + currentProcessingPutCount
+ " put requests for " + addr.getHostnamePort() + " and "
+ failedCount + " failed" + ", latency for this send: "
+ elapsed);
}
// Reset the current processing put count
currentProcessingPutCount.set(0);
}
// Sleep for a while
if (elapsed == start) {
elapsed = EnvironmentEdgeManager.currentTimeMillis() - start;
}
if (elapsed < frequency) {
Thread.sleep(frequency - elapsed);
}
} catch (Exception e) {
// Log all the exceptions and move on
LOG.debug("Caught some exceptions " + e
+ " when flushing puts to region server "
+ addr.getHostnamePort());
}
}
}
}
}