/** * Copyright 2011 The Apache Software Foundation * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.hadoop.hbase.index.util; import java.io.IOException; import java.util.ArrayList; import java.util.Collection; import java.util.Collections; import java.util.Comparator; import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Map.Entry; import java.util.Set; import java.util.TreeMap; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hbase.ClusterStatus; import org.apache.hadoop.hbase.HBaseConfiguration; import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.HRegionInfo; import org.apache.hadoop.hbase.KeyValue; import org.apache.hadoop.hbase.MasterNotRunningException; import org.apache.hadoop.hbase.ServerName; import org.apache.hadoop.hbase.UnknownRegionException; import org.apache.hadoop.hbase.ZooKeeperConnectionException; import org.apache.hadoop.hbase.catalog.MetaReader; import org.apache.hadoop.hbase.client.HBaseAdmin; import org.apache.hadoop.hbase.client.HConnection; import org.apache.hadoop.hbase.client.HConnectionManager; import org.apache.hadoop.hbase.client.HConnectionManager.HConnectable; import org.apache.hadoop.hbase.client.MetaScanner; import org.apache.hadoop.hbase.client.MetaScanner.MetaScannerVisitor; import org.apache.hadoop.hbase.client.MetaScanner.MetaScannerVisitorBase; import org.apache.hadoop.hbase.client.Result; import org.apache.hadoop.hbase.ipc.HRegionInterface; import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.hbase.util.HBaseFsckRepair; import org.apache.hadoop.hbase.util.Pair; import org.apache.hadoop.hbase.zookeeper.ZKTable.TableState; import org.apache.hadoop.hbase.zookeeper.ZKTableReadOnly; import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher; import org.apache.zookeeper.KeeperException; public class SecondaryIndexColocator { public static boolean testingEnabled = false; public static final Log LOG = LogFactory.getLog(SecondaryIndexColocator.class); public Configuration conf = null; private Map<String, List<MetaInfo>> tableMap = new HashMap<String, List<MetaInfo>>(); private List<HRegionInfo> regionsToMove = new ArrayList<HRegionInfo>(); private Map<ServerName, Set<HRegionInfo>> rsToRegionMap = new HashMap<ServerName, Set<HRegionInfo>>(); private Map<byte[], TableState> disabledandDisablingTables = new TreeMap<byte[], TableState>( Bytes.BYTES_COMPARATOR); private Map<byte[], TableState> enabledOrEnablingTables = new TreeMap<byte[], TableState>( Bytes.BYTES_COMPARATOR); // private Set<HRegionInfo> staleMetaEntries = new HashSet<HRegionInfo>(); private List<Pair<String, TableState>> tablesToBeSetInZK = new ArrayList<Pair<String, TableState>>(); // List<String> rit = new ArrayList<String>(); private HBaseAdmin admin; private ClusterStatus status; private HConnection connection; public SecondaryIndexColocator(Configuration conf) { this.conf = conf; } public void setUp() throws IOException, ZooKeeperConnectionException { admin = new HBaseAdmin(conf); status = admin.getClusterStatus(); connection = admin.getConnection(); } public static void main(String args[]) throws Exception { Configuration config = HBaseConfiguration.create(); SecondaryIndexColocator secHbck = new SecondaryIndexColocator(config); secHbck.setUp(); secHbck.admin.setBalancerRunning(false, true); boolean inconsistent = secHbck.checkForCoLocationInconsistency(); if (inconsistent) { secHbck.fixCoLocationInconsistency(); } secHbck.admin.setBalancerRunning(true, true); } public boolean checkForCoLocationInconsistency() throws IOException, KeeperException, InterruptedException { getMetaInfo(); // Do we need to complete the partial disable table loadDisabledTables(); // handleRIT(); checkMetaInfoCosistency(); setTablesInZK(); // in the former steps there may have been movement of regions. So need to update // in memory map of tables. getMetaInfo(); checkCoLocationAndGetRegionsToBeMoved(); if (regionsToMove == null || regionsToMove.isEmpty()) { return false; } return true; } /* * private void handleRIT() { if(rit != null && !rit.isEmpty()){ for(String s : rit){ * RegionTransitionData data = ZKAssign.getDataNoWatch(zkw, pathOrRegionName, stat) } } } */ private void checkMetaInfoCosistency() throws IOException, KeeperException, InterruptedException { if (status == null) { throw new IOException("Cluster status is not available."); } Collection<ServerName> regionServers = status.getServers(); for (ServerName serverName : regionServers) { HRegionInterface server = connection.getHRegionConnection(serverName.getHostname(), serverName.getPort()); Set<HRegionInfo> onlineRegions = new HashSet<HRegionInfo>(); List<HRegionInfo> regions = server.getOnlineRegions(); if (regions == null) continue; onlineRegions.addAll(regions); if (rsToRegionMap == null) { rsToRegionMap = new HashMap<ServerName, Set<HRegionInfo>>(); } rsToRegionMap.put(serverName, onlineRegions); } if (tableMap != null && !tableMap.isEmpty()) { for (Map.Entry<String, List<MetaInfo>> e : tableMap.entrySet()) { if (isDisabledOrDisablingTable(Bytes.toBytes(e.getKey()))) { // It should be disabled...But we can check this if (disabledandDisablingTables.get(Bytes.toBytes(e.getKey())) == TableState.DISABLED) { continue; } else { if (LOG.isDebugEnabled()) { LOG.debug("Table " + e.getKey() + " is in DISABLING state. Trying to close all the regions for this table."); } // If the table is in DISABLING state , then there might be some regions which are // still online. Close the regions and set the table as DISABLED. for (MetaInfo metaInfo : e.getValue()) { List<ServerName> sn = new ArrayList<ServerName>(); for (Map.Entry<ServerName, Set<HRegionInfo>> entry : rsToRegionMap.entrySet()) { if (entry.getValue().contains(metaInfo.getRegionInfo())) { sn.add(entry.getKey()); } } if (sn.isEmpty()) { // region is not assigned anywhere ,so continue. continue; } else { HBaseFsckRepair.fixMultiAssignment(this.admin, metaInfo.getRegionInfo(), sn); } } Pair<String, TableState> p = new Pair<String, TableState>(); p.setFirst(e.getKey()); p.setSecond(TableState.DISABLED); tablesToBeSetInZK.add(p); } } else { // first we are checking here for the tables to be enabled which // we left in disabled stage in the previous step. if (!disabledandDisablingTables.containsKey(Bytes.toBytes(e.getKey())) && !enabledOrEnablingTables.containsKey(Bytes.toBytes(e.getKey()))) { // if reached here then this table, which is disabled // and still not present in our in-memory map of disabledTables , // should // be enabled. this.admin.enableTable(e.getKey()); this.enabledOrEnablingTables.put(Bytes.toBytes(e.getKey()), TableState.ENABLED); continue; } boolean movedRegions = false; for (MetaInfo metaInfo : e.getValue()) { List<ServerName> sn = new ArrayList<ServerName>(); for (Map.Entry<ServerName, Set<HRegionInfo>> entry : rsToRegionMap.entrySet()) { if (entry.getValue().contains(metaInfo.getRegionInfo())) { sn.add(entry.getKey()); } } if (sn.size() == 1 && sn.get(0).equals(metaInfo.getServerName())) { // this means region is deployed on correct rs according to META. if (LOG.isDebugEnabled()) { LOG.debug("Info in META for region " + metaInfo.getRegionInfo().getRegionNameAsString() + " is correct."); } continue; } // if it reaches here , it means that the region is deployed and // in some other rs. Need to find it and call unassign. if (sn.isEmpty()) { if (LOG.isDebugEnabled()) { LOG.debug("Region " + metaInfo.getRegionInfo().getRegionNameAsString() + " not deployed on any rs.Trying to assign"); } HBaseFsckRepair.fixUnassigned(this.admin, metaInfo.getRegionInfo()); HBaseFsckRepair.waitUntilAssigned(this.admin, metaInfo.getRegionInfo()); movedRegions = true; } else { if (LOG.isDebugEnabled()) { LOG.debug("Region " + metaInfo.getRegionInfo().getRegionNameAsString() + " is not deployed on the rs as mentioned in META. Re-assigning."); } HBaseFsckRepair.fixMultiAssignment(this.admin, metaInfo.getRegionInfo(), sn); HBaseFsckRepair.waitUntilAssigned(this.admin, metaInfo.getRegionInfo()); movedRegions = true; } } if (movedRegions) { Pair<String, TableState> p = new Pair<String, TableState>(); p.setFirst(e.getKey()); p.setSecond(TableState.ENABLED); tablesToBeSetInZK.add(p); } } } } } private void setTablesInZK() throws IOException { if (tablesToBeSetInZK != null && !tablesToBeSetInZK.isEmpty()) { for (Pair<String, TableState> p : tablesToBeSetInZK) { setStateInZK(p.getFirst(), p.getSecond()); } } } private void setStateInZK(String tableName, TableState state) throws IOException { if (state == TableState.ENABLED) { admin.setEnableTable(tableName); } if (state == TableState.DISABLED) { admin.setDisableTable(tableName); } } private boolean isDisabledOrDisablingTable(byte[] tableName) { if (disabledandDisablingTables != null && !disabledandDisablingTables.isEmpty()) { if (disabledandDisablingTables.containsKey(tableName)) return true; } return false; } public void fixCoLocationInconsistency() { if (regionsToMove != null && !regionsToMove.isEmpty()) { Iterator<HRegionInfo> itr = regionsToMove.iterator(); while (itr.hasNext()) { HRegionInfo hri = itr.next(); try { if (LOG.isDebugEnabled()) { LOG.debug("Moving region " + hri.getRegionNameAsString() + " to server "); } admin.move(hri.getEncodedNameAsBytes(), null); itr.remove(); } catch (UnknownRegionException e) { LOG.error("Unnkown region exception.", e); } catch (MasterNotRunningException e) { LOG.error("Master not running.", e); } catch (ZooKeeperConnectionException e) { LOG.error("Zookeeper connection exception.", e); } } } } private void checkCoLocationAndGetRegionsToBeMoved() { if (tableMap != null && !tableMap.isEmpty()) { Iterator<Map.Entry<String, List<MetaInfo>>> itr = tableMap.entrySet().iterator(); while (itr.hasNext()) { Map.Entry<String, List<MetaInfo>> e = itr.next(); if (!IndexUtils.isIndexTable(e.getKey()) && !IndexUtils.isCatalogTable(Bytes.toBytes(e.getKey()))) { if (isDisabledOrDisablingTable(Bytes.toBytes(e.getKey()))) continue; String indexTableName = IndexUtils.getIndexTableName(e.getKey()); List<MetaInfo> idxRegionList = tableMap.get(indexTableName); if (idxRegionList == null || idxRegionList.isEmpty()) { itr.remove(); continue; } else { getRegionsToMove(e.getValue(), idxRegionList); itr.remove(); } } } } } private void getRegionsToMove(List<MetaInfo> userRegions, List<MetaInfo> idxRegionList) { Iterator<MetaInfo> userRegionItr = userRegions.iterator(); while (userRegionItr.hasNext()) { MetaInfo userRegionMetaInfo = userRegionItr.next(); for (MetaInfo indexRegionMetaInfo : idxRegionList) { if (Bytes.equals(userRegionMetaInfo.getRegionInfo().getStartKey(), indexRegionMetaInfo .getRegionInfo().getStartKey())) { if (!userRegionMetaInfo.getServerName().equals(indexRegionMetaInfo.getServerName())) { if (regionsToMove == null) { regionsToMove = new ArrayList<HRegionInfo>(); } regionsToMove.add(userRegionMetaInfo.getRegionInfo()); if (LOG.isDebugEnabled()) { LOG.debug("Adding region " + userRegionMetaInfo.getRegionInfo().getRegionNameAsString() + " to regions to be moved list."); } } break; } } } } private void getMetaInfo() throws IOException { MetaScannerVisitor visitor = new MetaScannerVisitorBase() { // comparator to sort KeyValues with latest timestamp final Comparator<KeyValue> comp = new Comparator<KeyValue>() { public int compare(KeyValue k1, KeyValue k2) { return (int) (k1.getTimestamp() - k2.getTimestamp()); } }; public boolean processRow(Result result) throws IOException { try { long ts = Collections.max(result.list(), comp).getTimestamp(); // record the latest modification of this META record Pair<HRegionInfo, ServerName> pair = MetaReader.parseCatalogResult(result); if (pair != null) { String tableName = pair.getFirst().getTableNameAsString(); if (tableMap == null) { tableMap = new HashMap<String, List<MetaInfo>>(); } List<MetaInfo> regionsOfTable = tableMap.get(tableName); if (regionsOfTable == null) { regionsOfTable = new ArrayList<MetaInfo>(); tableMap.put(tableName, regionsOfTable); } Iterator<MetaInfo> itr = regionsOfTable.iterator(); while (itr.hasNext()) { MetaInfo m = itr.next(); if (m.getRegionInfo().equals(pair.getFirst())) { itr.remove(); break; } } MetaInfo m = new MetaInfo(pair.getFirst(), pair.getSecond(), ts); regionsOfTable.add(m); } return true; } catch (RuntimeException e) { LOG.error("Result=" + result); throw e; } } }; // Scan -ROOT- to pick up META regions MetaScanner.metaScan(conf, visitor, null, null, Integer.MAX_VALUE, HConstants.ROOT_TABLE_NAME); // Scan .META. to pick up user regions MetaScanner.metaScan(conf, visitor); } /** * Stores the regioninfo entries scanned from META */ static class MetaInfo { private HRegionInfo hri; private ServerName regionServer; private long timeStamp; public MetaInfo(HRegionInfo hri, ServerName regionServer, long modTime) { this.hri = hri; this.regionServer = regionServer; this.timeStamp = modTime; } public HRegionInfo getRegionInfo() { return this.hri; } public ServerName getServerName() { return this.regionServer; } public long getTimeStamp() { return this.timeStamp; } } public void setAdmin(HBaseAdmin admin, HConnection conn, ClusterStatus status) { if (testingEnabled) { this.admin = admin; this.connection = conn; this.status = status; } } private void loadDisabledTables() throws ZooKeeperConnectionException, IOException, KeeperException { HConnectionManager.execute(new HConnectable<Void>(conf) { @Override public Void connect(HConnection connection) throws IOException { ZooKeeperWatcher zkw = connection.getZooKeeperWatcher(); try { for (Entry<TableState, Set<String>> e : ZKTableReadOnly.getDisabledOrDisablingTables(zkw) .entrySet()) { for (String tableName : e.getValue()) { disabledandDisablingTables.put(Bytes.toBytes(tableName), e.getKey()); } } for (Entry<TableState, Set<String>> e : ZKTableReadOnly.getEnabledOrEnablingTables(zkw) .entrySet()) { for (String tableName : e.getValue()) { enabledOrEnablingTables.put(Bytes.toBytes(tableName), e.getKey()); } } // rit = ZKUtil.listChildrenNoWatch(zkw, zkw.assignmentZNode); } catch (KeeperException ke) { throw new IOException(ke); } return null; } }); checkDisabledAndEnabledTables(); } private void checkDisabledAndEnabledTables() throws IOException, KeeperException { if (disabledandDisablingTables != null && !disabledandDisablingTables.isEmpty()) { Map<byte[], TableState> disabledHere = new TreeMap<byte[], TableState>(Bytes.BYTES_COMPARATOR); Iterator<Entry<byte[], TableState>> itr = disabledandDisablingTables.entrySet().iterator(); while (itr.hasNext()) { Entry<byte[], TableState> tableEntry = itr.next(); if (!IndexUtils.isIndexTable(tableEntry.getKey())) { byte[] indexTableName = Bytes.toBytes(IndexUtils.getIndexTableName(tableEntry.getKey())); if (null == tableMap.get(Bytes.toString(indexTableName))) { continue; } boolean present = disabledandDisablingTables.containsKey(indexTableName); if (!present && (enabledOrEnablingTables.get(indexTableName) == TableState.ENABLED)) { // TODO How to handle ENABLING state(if it could happen). If try to disable ENABLING // table // it throws. if (LOG.isDebugEnabled()) { LOG.debug("Table " + Bytes.toString(tableEntry.getKey()) + " is disabled but corresponding index table is " + "enabled. So disabling " + Bytes.toString(indexTableName)); } this.admin.disableTable(indexTableName); disabledHere.put(indexTableName, TableState.DISABLED); } } else { if (tableEntry.getValue() != TableState.DISABLED) { continue; } byte[] userTableName = Bytes.toBytes(IndexUtils.getActualTableNameFromIndexTableName(Bytes .toString(tableEntry.getKey()))); if (!disabledandDisablingTables.containsKey(userTableName)) { if (LOG.isDebugEnabled()) { LOG.debug("Index Table " + Bytes.toString(tableEntry.getKey()) + " is disabled but corresponding user table is enabled. So Enabling " + Bytes.toString(tableEntry.getKey())); } // Here we are not enabling the table. We will do it in the next step // checkMetaInfoCosistency(). // Because if we do here, META will be updated and our in-memory map will have old // entries. // So it will surely cause unnecessary unassignments and assignments in the next step. // In the next // step anyway we are moving regions. So no problem doing it there. // this.admin.enableTable(tableName); itr.remove(); } } } disabledandDisablingTables.putAll(disabledHere); } } }