/**
* Copyright 2011 The Apache Software Foundation
*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.index;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Random;
import java.util.concurrent.ConcurrentHashMap;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.ClusterStatus;
import org.apache.hadoop.hbase.HRegionInfo;
import org.apache.hadoop.hbase.ServerName;
import org.apache.hadoop.hbase.index.util.IndexUtils;
import org.apache.hadoop.hbase.master.LoadBalancer;
import org.apache.hadoop.hbase.master.MasterServices;
import org.apache.hadoop.hbase.master.RegionPlan;
import org.apache.hadoop.hbase.util.Bytes;
/**
* This class is an extension of the load balancer class. It allows to colocate the regions of the
* actual table and the regions of the indexed table. roundRobinAssignment, retainAssignment ->
* index regions will follow the actual table regions. randomAssignment -> either index table or
* actual table region will follow each other based on which ever comes first. In case of master
* failover there is a chance that the znodes of the index table and actual table are left behind.
* Then in that scenario we may get randomAssignment for either the actual table region first or the
* index table region first.
*/
public class SecIndexLoadBalancer implements LoadBalancer {
private static final Log LOG = LogFactory.getLog(SecIndexLoadBalancer.class);
private LoadBalancer delegator;
private MasterServices master;
private static final Random RANDOM = new Random(System.currentTimeMillis());
private Map<String, Map<HRegionInfo, ServerName>> regionLocation =
new ConcurrentHashMap<String, Map<HRegionInfo, ServerName>>();
@Override
public Configuration getConf() {
return this.delegator.getConf();
}
@Override
public void setConf(Configuration configuration) {
this.delegator.setConf(configuration);
}
@Override
public void setClusterStatus(ClusterStatus st) {
this.delegator.setClusterStatus(st);
}
public Map<String, Map<HRegionInfo, ServerName>> getRegionLocation() {
return regionLocation;
}
@Override
public void setMasterServices(MasterServices masterServices) {
this.master = masterServices;
this.delegator.setMasterServices(masterServices);
}
public void setDelegator(LoadBalancer defaultLoadBalancer) {
this.delegator = defaultLoadBalancer;
}
@Override
public List<RegionPlan> balanceCluster(Map<ServerName, List<HRegionInfo>> clusterState) {
synchronized (this.regionLocation) {
Map<ServerName, List<HRegionInfo>> userClusterState =
new HashMap<ServerName, List<HRegionInfo>>(1);
Map<ServerName, List<HRegionInfo>> indexClusterState =
new HashMap<ServerName, List<HRegionInfo>>(1);
boolean balanceByTable =
this.master.getConfiguration().getBoolean("hbase.master.loadbalance.bytable", true);
String tableName = null;
if (LOG.isDebugEnabled()) {
LOG.debug("Seperating user and index regions of each region server in the cluster.");
}
if (balanceByTable) {
// Check and modify the regionLocation map based on values of cluster state because we will
// call balancer only when the cluster is in stable state and reliable.
Map<HRegionInfo, ServerName> regionMap = null;
for (Entry<ServerName, List<HRegionInfo>> serverVsRegionList : clusterState.entrySet()) {
ServerName sn = serverVsRegionList.getKey();
List<HRegionInfo> regionInfos = serverVsRegionList.getValue();
if (regionInfos.isEmpty()) {
continue;
}
// Just get the table name from any one of the values in the regioninfo list
if (null == tableName) {
tableName = regionInfos.get(0).getTableNameAsString();
regionMap = this.regionLocation.get(tableName);
}
if (regionMap != null) {
for (HRegionInfo hri : regionInfos) {
updateServer(regionMap, sn, hri);
}
}
}
} else {
for (Entry<ServerName, List<HRegionInfo>> serverVsRegionList : clusterState.entrySet()) {
ServerName sn = serverVsRegionList.getKey();
List<HRegionInfo> regionsInfos = serverVsRegionList.getValue();
List<HRegionInfo> idxRegionsToBeMoved = new ArrayList<HRegionInfo>();
List<HRegionInfo> uRegionsToBeMoved = new ArrayList<HRegionInfo>();
for (HRegionInfo hri : regionsInfos) {
if (hri.isMetaRegion() || hri.isRootRegion()) {
continue;
}
tableName = hri.getTableNameAsString();
// table name may change every time thats why always need to get table entries.
Map<HRegionInfo, ServerName> regionMap = this.regionLocation.get(tableName);
if (regionMap != null) {
updateServer(regionMap, sn, hri);
}
if (tableName.endsWith(Constants.INDEX_TABLE_SUFFIX)) {
idxRegionsToBeMoved.add(hri);
continue;
}
uRegionsToBeMoved.add(hri);
}
// there may be dummy entries here if assignments by table is set
userClusterState.put(sn, uRegionsToBeMoved);
indexClusterState.put(sn, idxRegionsToBeMoved);
}
}
/*
* In case of table wise balancing if balanceCluster called for index table then no user
* regions available. At that time skip default balancecluster call and get region plan from
* region location map if exist.
*/
// TODO : Needs refactoring here
List<RegionPlan> regionPlanList = null;
if (balanceByTable && (false == tableName.endsWith(Constants.INDEX_TABLE_SUFFIX))) {
regionPlanList = this.delegator.balanceCluster(clusterState);
// regionPlanList is null means skipping balancing.
if (null == regionPlanList) {
if (LOG.isDebugEnabled()) {
LOG.debug("User region plan is null.");
}
return null;
} else {
saveRegionPlanList(regionPlanList);
return regionPlanList;
}
} else if (balanceByTable && (true == tableName.endsWith(Constants.INDEX_TABLE_SUFFIX))) {
regionPlanList = new ArrayList<RegionPlan>(1);
String actualTableName = extractActualTableName(tableName);
Map<HRegionInfo, ServerName> regionMap = regionLocation.get(actualTableName);
// no previous region plan for user table.
if (null == regionMap) {
if (LOG.isDebugEnabled()) {
LOG.debug("No user table region plans present for index table " + tableName + '.');
}
return null;
}
for (Entry<HRegionInfo, ServerName> e : regionMap.entrySet()) {
regionPlanList.add(new RegionPlan(e.getKey(), null, e.getValue()));
}
// for preparing the index plan
List<RegionPlan> indexPlanList = new ArrayList<RegionPlan>(1);
// copy of region plan to iterate.
List<RegionPlan> regionPlanListCopy = new ArrayList<RegionPlan>(regionPlanList);
if (LOG.isDebugEnabled()) {
LOG.debug("Preparing index region plans from user region plans for table " + tableName
+ ".");
}
return prepareIndexPlan(clusterState, indexPlanList, regionPlanListCopy);
} else {
regionPlanList = this.delegator.balanceCluster(userClusterState);
if (null == regionPlanList) {
if (LOG.isDebugEnabled()) {
LOG.debug("User region plan is null.");
}
regionPlanList = new ArrayList<RegionPlan>(1);
} else {
saveRegionPlanList(regionPlanList);
}
List<RegionPlan> userRegionPlans = new ArrayList<RegionPlan>(1);
for (Entry<String, Map<HRegionInfo, ServerName>> tableVsRegions : this.regionLocation
.entrySet()) {
Map<HRegionInfo, ServerName> regionMap = regionLocation.get(tableVsRegions.getKey());
// no previous region plan for user table.
if (null == regionMap) {
if (LOG.isDebugEnabled()) {
LOG.debug("No user table region plans present for index table " + tableName + '.');
}
} else {
for (Entry<HRegionInfo, ServerName> e : regionMap.entrySet()) {
userRegionPlans.add(new RegionPlan(e.getKey(), null, e.getValue()));
}
}
}
List<RegionPlan> regionPlanListCopy = new ArrayList<RegionPlan>(userRegionPlans);
if (LOG.isDebugEnabled()) {
LOG.debug("Preparing index region plans from user region plans for whole cluster.");
}
return prepareIndexPlan(indexClusterState, regionPlanList, regionPlanListCopy);
}
}
}
private void updateServer(Map<HRegionInfo, ServerName> regionMap, ServerName sn, HRegionInfo hri) {
ServerName existingServer = regionMap.get(hri);
if (!sn.equals(existingServer)) {
if (LOG.isDebugEnabled()) {
LOG.debug("There is a mismatch in the existing server name for the region " + hri
+ ". Replacing the server " + existingServer + " with " + sn + ".");
}
regionMap.put(hri, sn);
}
}
// Creates the index region plan based on the corresponding user region plan
private List<RegionPlan> prepareIndexPlan(Map<ServerName, List<HRegionInfo>> indexClusterState,
List<RegionPlan> regionPlanList, List<RegionPlan> regionPlanListCopy) {
if (LOG.isDebugEnabled()) {
LOG.debug("Entered prepareIndexPlan");
}
OUTER_LOOP: for (RegionPlan regionPlan : regionPlanListCopy) {
HRegionInfo hri = regionPlan.getRegionInfo();
MIDDLE_LOOP: for (Entry<ServerName, List<HRegionInfo>> serverVsRegionList : indexClusterState
.entrySet()) {
List<HRegionInfo> indexRegions = serverVsRegionList.getValue();
ServerName server = serverVsRegionList.getKey();
if (regionPlan.getDestination().equals(server)) {
// desination server in the region plan is new and should not be same with this
// server in index cluster state.thats why skipping regions check in this server
continue MIDDLE_LOOP;
}
String actualTableName = null;
for (HRegionInfo indexRegionInfo : indexRegions) {
String indexTableName = indexRegionInfo.getTableNameAsString();
actualTableName = extractActualTableName(indexTableName);
if (false == hri.getTableNameAsString().equals(actualTableName)) {
continue;
}
if (0 != Bytes.compareTo(hri.getStartKey(), indexRegionInfo.getStartKey())) {
continue;
}
RegionPlan rp = new RegionPlan(indexRegionInfo, server, regionPlan.getDestination());
if (LOG.isDebugEnabled()) {
LOG.debug("Selected server " + regionPlan.getDestination()
+ " as destination for region " + indexRegionInfo.getRegionNameAsString()
+ "from user region plan.");
}
putRegionPlan(indexRegionInfo, regionPlan.getDestination());
regionPlanList.add(rp);
continue OUTER_LOOP;
}
}
}
regionPlanListCopy.clear();
// if no user regions to balance then return newly formed index region plan.
if (LOG.isDebugEnabled()) {
LOG.debug("Exited prepareIndexPlan");
}
return regionPlanList;
}
private void saveRegionPlanList(List<RegionPlan> regionPlanList) {
for (RegionPlan regionPlan : regionPlanList) {
HRegionInfo hri = regionPlan.getRegionInfo();
if (LOG.isDebugEnabled()) {
LOG.debug("Saving region plan of region " + hri.getRegionNameAsString() + '.');
}
putRegionPlan(hri, regionPlan.getDestination());
}
}
@Override
public Map<ServerName, List<HRegionInfo>> roundRobinAssignment(List<HRegionInfo> regions,
List<ServerName> servers) {
List<HRegionInfo> userRegions = new ArrayList<HRegionInfo>(1);
List<HRegionInfo> indexRegions = new ArrayList<HRegionInfo>(1);
for (HRegionInfo hri : regions) {
seperateUserAndIndexRegion(hri, userRegions, indexRegions);
}
Map<ServerName, List<HRegionInfo>> bulkPlan = null;
if (false == userRegions.isEmpty()) {
bulkPlan = this.delegator.roundRobinAssignment(userRegions, servers);
if (null == bulkPlan) {
if (LOG.isDebugEnabled()) {
LOG.debug("No region plan for user regions.");
}
return null;
}
synchronized (this.regionLocation) {
savePlan(bulkPlan);
}
}
bulkPlan = prepareIndexRegionPlan(indexRegions, bulkPlan, servers);
return bulkPlan;
}
private void seperateUserAndIndexRegion(HRegionInfo hri, List<HRegionInfo> userRegions,
List<HRegionInfo> indexRegions) {
if (hri.getTableNameAsString().endsWith(Constants.INDEX_TABLE_SUFFIX)) {
indexRegions.add(hri);
return;
}
userRegions.add(hri);
}
private String extractActualTableName(String indexTableName) {
int endIndex = indexTableName.length() - Constants.INDEX_TABLE_SUFFIX.length();
return indexTableName.substring(0, endIndex);
}
private Map<ServerName, List<HRegionInfo>> prepareIndexRegionPlan(List<HRegionInfo> indexRegions,
Map<ServerName, List<HRegionInfo>> bulkPlan, List<ServerName> servers) {
if (null != indexRegions && false == indexRegions.isEmpty()) {
if (null == bulkPlan) {
bulkPlan = new ConcurrentHashMap<ServerName, List<HRegionInfo>>(1);
}
for (HRegionInfo hri : indexRegions) {
if (LOG.isDebugEnabled()) {
LOG.debug("Preparing region plan for index region " + hri.getRegionNameAsString() + '.');
}
ServerName destServer = getDestServerForIdxRegion(hri);
List<HRegionInfo> destServerRegions = null;
if (null == destServer) {
destServer = this.randomAssignment(hri, servers);
}
if (null != destServer) {
destServerRegions = bulkPlan.get(destServer);
if (null == destServerRegions) {
destServerRegions = new ArrayList<HRegionInfo>(1);
bulkPlan.put(destServer, destServerRegions);
}
if (LOG.isDebugEnabled()) {
LOG.debug("Server " + destServer + " selected for region "
+ hri.getRegionNameAsString() + '.');
}
destServerRegions.add(hri);
}
}
}
return bulkPlan;
}
private ServerName getDestServerForIdxRegion(HRegionInfo hri) {
// Every time we calculate the table name because in case of master restart the index regions
// may be coming for different index tables.
String indexTableName = hri.getTableNameAsString();
String actualTableName = extractActualTableName(indexTableName);
synchronized (this.regionLocation) {
Map<HRegionInfo, ServerName> regionMap = regionLocation.get(actualTableName);
if (null == regionMap) {
// Can this case come
return null;
}
for (Map.Entry<HRegionInfo, ServerName> e : regionMap.entrySet()) {
HRegionInfo uHri = e.getKey();
if (0 == Bytes.compareTo(uHri.getStartKey(), hri.getStartKey())) {
// put index region location if corresponding user region found in regionLocation map.
putRegionPlan(hri, e.getValue());
return e.getValue();
}
}
}
return null;
}
private void savePlan(Map<ServerName, List<HRegionInfo>> bulkPlan) {
for (Entry<ServerName, List<HRegionInfo>> e : bulkPlan.entrySet()) {
if (LOG.isDebugEnabled()) {
LOG.debug("Saving user regions' plans for server " + e.getKey() + '.');
}
for (HRegionInfo hri : e.getValue()) {
putRegionPlan(hri, e.getKey());
}
if (LOG.isDebugEnabled()) {
LOG.debug("Saved user regions' plans for server " + e.getKey() + '.');
}
}
}
@Override
public Map<ServerName, List<HRegionInfo>> retainAssignment(Map<HRegionInfo, ServerName> regions,
List<ServerName> servers) {
Map<HRegionInfo, ServerName> userRegionsMap = new ConcurrentHashMap<HRegionInfo, ServerName>(1);
List<HRegionInfo> indexRegions = new ArrayList<HRegionInfo>(1);
for (Entry<HRegionInfo, ServerName> e : regions.entrySet()) {
seperateUserAndIndexRegion(e, userRegionsMap, indexRegions, servers);
}
Map<ServerName, List<HRegionInfo>> bulkPlan = null;
if (false == userRegionsMap.isEmpty()) {
bulkPlan = this.delegator.retainAssignment(userRegionsMap, servers);
if (null == bulkPlan) {
if (LOG.isDebugEnabled()) {
LOG.debug("Empty region plan for user regions.");
}
return null;
}
synchronized (this.regionLocation) {
savePlan(bulkPlan);
}
}
bulkPlan = prepareIndexRegionPlan(indexRegions, bulkPlan, servers);
return bulkPlan;
}
private void seperateUserAndIndexRegion(Entry<HRegionInfo, ServerName> e,
Map<HRegionInfo, ServerName> userRegionsMap, List<HRegionInfo> indexRegions,
List<ServerName> servers) {
HRegionInfo hri = e.getKey();
if (hri.getTableNameAsString().endsWith(Constants.INDEX_TABLE_SUFFIX)) {
indexRegions.add(hri);
return;
}
if (e.getValue() == null) {
userRegionsMap.put(hri, servers.get(RANDOM.nextInt(servers.size())));
} else {
userRegionsMap.put(hri, e.getValue());
}
}
@Override
public Map<HRegionInfo, ServerName> immediateAssignment(List<HRegionInfo> regions,
List<ServerName> servers) {
return this.delegator.immediateAssignment(regions, servers);
}
@Override
public ServerName randomAssignment(HRegionInfo regionInfo, List<ServerName> servers) {
if (regionInfo.isMetaTable()) {
// if the region is root or meta table region no need to check for any region plan.
return this.delegator.randomAssignment(regionInfo, servers);
}
ServerName sn = null;
try {
sn = getServerNameFromMap(regionInfo, servers);
} catch (IOException e) {
if (LOG.isDebugEnabled()) {
LOG.debug("Not able to get server name.", e);
}
} catch (InterruptedException e) {
if (LOG.isDebugEnabled()) {
LOG.debug("Interrupted while getting region and location details.", e);
}
}
if (sn == null) {
if (LOG.isDebugEnabled()) {
LOG.debug("No server found for region " + regionInfo.getRegionNameAsString() + '.');
}
sn = getRandomServer(regionInfo, servers);
}
if (LOG.isDebugEnabled()) {
LOG.debug("Destination server for region " + regionInfo.getRegionNameAsString() + " is "
+ ((sn == null) ? "null" : sn.toString()) + '.');
}
return sn;
}
private ServerName getRandomServer(HRegionInfo regionInfo, List<ServerName> servers) {
ServerName sn = null;
String tableName = regionInfo.getTableNameAsString();
if (true == IndexUtils.isIndexTable(tableName)) {
String actualTableName = extractActualTableName(tableName);
sn =
this.delegator.randomAssignment(new HRegionInfo(Bytes.toBytes(actualTableName),
regionInfo.getStartKey(), regionInfo.getEndKey()), servers);
} else {
sn = this.delegator.randomAssignment(regionInfo, servers);
}
if (sn == null) {
return null;
}
synchronized (this.regionLocation) {
putRegionPlan(regionInfo, sn);
}
return sn;
}
private ServerName getServerNameFromMap(HRegionInfo regionInfo, List<ServerName> onlineServers)
throws IOException, InterruptedException {
String tableNameOfCurrentRegion = regionInfo.getTableNameAsString();
String correspondingTableName = null;
if (false == tableNameOfCurrentRegion.endsWith(Constants.INDEX_TABLE_SUFFIX)) {
// if the region is user region need to check whether index region plan available or not.
correspondingTableName = tableNameOfCurrentRegion + Constants.INDEX_TABLE_SUFFIX;
} else {
// if the region is index region need to check whether user region plan available or not.
correspondingTableName = extractActualTableName(tableNameOfCurrentRegion);
}
synchronized (this.regionLocation) {
// skip if its in both index and user and same server
// I will always have the regionMapWithServerLocation for the correspondingTableName already
// populated.
// Only on the first time both the regionMapWithServerLocation and actualRegionMap may be
// null.
Map<HRegionInfo, ServerName> regionMapWithServerLocation =
this.regionLocation.get(correspondingTableName);
Map<HRegionInfo, ServerName> actualRegionMap =
this.regionLocation.get(tableNameOfCurrentRegion);
if (null != regionMapWithServerLocation) {
for (Entry<HRegionInfo, ServerName> iHri : regionMapWithServerLocation.entrySet()) {
if (0 == Bytes.compareTo(iHri.getKey().getStartKey(), regionInfo.getStartKey())) {
ServerName previousServer = null;
if (null != actualRegionMap) {
previousServer = actualRegionMap.get(regionInfo);
}
ServerName sn = iHri.getValue();
if (null != previousServer) {
// if servername of index region and user region are same in regionLocation clean
// previous plans and return null
if (previousServer.equals(sn)) {
regionMapWithServerLocation.remove(iHri.getKey());
actualRegionMap.remove(regionInfo);
if (LOG.isDebugEnabled()) {
LOG.debug("Both user region plan and index region plan "
+ "in regionLocation are same for the region."
+ regionInfo.getRegionNameAsString() + " The location is " + sn
+ ". Hence clearing from regionLocation.");
}
return null;
}
}
if (sn != null && onlineServers.contains(sn)) {
if (LOG.isDebugEnabled()) {
LOG.debug("Updating the region " + regionInfo.getRegionNameAsString()
+ " with server " + sn);
}
putRegionPlan(regionInfo, sn);
return sn;
} else if (sn != null) {
if (LOG.isDebugEnabled()) {
LOG.debug("The location " + sn + " of region "
+ iHri.getKey().getRegionNameAsString()
+ " is not in online. Selecting other region server.");
}
return null;
}
}
}
} else {
if (LOG.isDebugEnabled()) {
LOG.debug("No region plans in regionLocation for table " + correspondingTableName);
}
}
return null;
}
}
public void putRegionPlan(HRegionInfo regionInfo, ServerName sn) {
String tableName = regionInfo.getTableNameAsString();
synchronized (this.regionLocation) {
Map<HRegionInfo, ServerName> regionMap = this.regionLocation.get(tableName);
if (null == regionMap) {
if (LOG.isDebugEnabled()) {
LOG.debug("No regions of table " + tableName + " in the region plan.");
}
regionMap = new ConcurrentHashMap<HRegionInfo, ServerName>(1);
this.regionLocation.put(tableName, regionMap);
}
regionMap.put(regionInfo, sn);
}
}
public void clearTableRegionPlans(String tableName) {
if (LOG.isDebugEnabled()) {
LOG.debug("Clearing regions plans from regionLocation for table " + tableName);
}
synchronized (this.regionLocation) {
this.regionLocation.remove(tableName);
}
}
public void clearRegionInfoFromRegionPlan(HRegionInfo regionInfo) {
String tableName = regionInfo.getTableNameAsString();
synchronized (this.regionLocation) {
Map<HRegionInfo, ServerName> regionMap = this.regionLocation.get(tableName);
if (null == regionMap) {
if (LOG.isDebugEnabled()) {
LOG.debug("No regions of table " + tableName + " in the region plan.");
}
} else {
regionMap.remove(regionInfo);
if (LOG.isDebugEnabled()) {
LOG.debug("The regioninfo " + regionInfo + " removed from the region plan");
}
}
}
}
}