/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * License); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an AS IS BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.hadoop.hbase.client.crosssite; import java.io.IOException; import java.util.ArrayList; import java.util.Comparator; import java.util.Iterator; import java.util.List; import java.util.concurrent.Callable; import java.util.concurrent.ExecutionException; import java.util.concurrent.ExecutorService; import java.util.concurrent.Future; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hbase.client.HTableInterface; import org.apache.hadoop.hbase.client.HTableInterfaceFactory; import org.apache.hadoop.hbase.client.Result; import org.apache.hadoop.hbase.client.ResultScanner; import org.apache.hadoop.hbase.client.Scan; import org.apache.hadoop.hbase.crosssite.ClusterInfo; import org.apache.hadoop.hbase.crosssite.CrossSiteConstants; import org.apache.hadoop.hbase.crosssite.CrossSiteUtil; import org.apache.hadoop.hbase.crosssite.CrossSiteZNodes; import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.hbase.util.MergeSortIterator; import org.apache.hadoop.hbase.util.Pair; import org.apache.hadoop.hbase.zookeeper.ZKUtil; import org.apache.zookeeper.KeeperException; /** * Implements the scanner interface for partition table. This scanner will iterate all partition * segment tables and return the results in universal order. */ class CrossSiteClientScanner implements ResultScanner { private static final Log LOG = LogFactory.getLog(CrossSiteClientScanner.class); private final Configuration configuration; private final Scan scan; private final String tableName; private final ExecutorService pool; private boolean ignoreUnavailableClusters; private List<Pair<ClusterInfo, Pair<byte[], byte[]>>> clusterStartStopKeyPairs; private List<ScannerIterator> clusterScannerIterators; private Iterator<Result> resultIterator; private boolean closed = false; private boolean failover; private CrossSiteZNodes znodes; private final HTableInterfaceFactory hTableFactory; protected CrossSiteClientScanner(final Configuration conf, final Scan scan, final byte[] tableName, List<Pair<ClusterInfo, Pair<byte[], byte[]>>> clusterStartStopKeyPairs, boolean failover, ExecutorService pool, CrossSiteZNodes znodes, HTableInterfaceFactory hTableFactory) throws IOException { this.configuration = conf; this.scan = scan; this.tableName = Bytes.toString(tableName); this.pool = pool; this.ignoreUnavailableClusters = configuration.getBoolean( CrossSiteConstants.CROSS_SITE_TABLE_SCAN_IGNORE_UNAVAILABLE_CLUSTERS, false); this.failover = failover; this.clusterStartStopKeyPairs = clusterStartStopKeyPairs; this.znodes = znodes; clusterScannerIterators = new ArrayList<ScannerIterator>(); this.hTableFactory = hTableFactory; initialize(); } private void initialize() throws IOException { int count = this.clusterStartStopKeyPairs.size(); List<Future<ScannerIterator>> futures = new ArrayList<Future<ScannerIterator>>(); for (int i = count - 1; i >= 0; i--) { Callable<ScannerIterator> callable = createCallable(clusterStartStopKeyPairs.get(i), this.tableName, ignoreUnavailableClusters); if (callable != null) { futures.add(pool.submit(callable)); } } IOException exception = null; for (Future<ScannerIterator> future : futures) { try { ScannerIterator iter = future.get(); if (iter != null) { clusterScannerIterators.add(iter); } } catch (InterruptedException e) { exception = new IOException("Interrupted", e); } catch (ExecutionException e) { exception = new IOException(e.getCause()); } } if (exception != null) { close(); // just throw the last exception throw exception; } if (clusterScannerIterators.size() == 0) { // add an empty scanner iterator LOG.debug("The ScannerIterator is empty, the EmptyScannerIterator is used instead"); clusterScannerIterators.add(new EmptyScannerIterator()); } this.resultIterator = new MergeSortIterator<Result>(clusterScannerIterators, new ResultComparator()); } private Callable<ScannerIterator> createCallable( final Pair<ClusterInfo, Pair<byte[], byte[]>> clusterStartStopKeyPair, final String tableName, final boolean ignore) { return new Callable<ScannerIterator>() { @Override public ScannerIterator call() throws Exception { if (LOG.isDebugEnabled()) { LOG.debug("Start initialization of scanner" + " for the cluster " + clusterStartStopKeyPair.getFirst()); } String clusterTableName = CrossSiteUtil.getClusterTableName(tableName, clusterStartStopKeyPair.getFirst().getName()); ScannerIterator scanIterator = null; try { try { HTableInterface table = hTableFactory.createHTableInterface( getClusterConf(configuration, clusterStartStopKeyPair.getFirst().getAddress()), Bytes.toBytes(clusterTableName)); Scan s = new Scan(scan); s.setStartRow(clusterStartStopKeyPair.getSecond().getFirst()); s.setStopRow(clusterStartStopKeyPair.getSecond().getSecond()); scanIterator = new ScannerIterator(clusterStartStopKeyPair.getFirst(), clusterStartStopKeyPair.getFirst(), table, s); } catch (RuntimeException e) { if (e.getCause() instanceof IOException) { throw (IOException) e.getCause(); } else { throw new IOException(e); } } } catch (IOException e) { LOG.info("Fail to connect to the CSBTable " + tableName + " in cluster " + clusterStartStopKeyPair.getFirst(), e); if (failover && CrossSiteUtil.isFailoverException(e)) { LOG.warn("Start to failover to the peers for cluster " + clusterStartStopKeyPair.getFirst() + ". Please notice, the data may be stale."); // Get the peers once from the zk.. this would be helpful when a new peer is added // after a CrossSiteHTable instance has been created List<ClusterInfo> peerClusters = null; try { peerClusters = znodes.getPeerClusters(clusterStartStopKeyPair.getFirst().getName()); } catch (KeeperException ke) { LOG.warn("Fail to connect the global zookeeper", ke); } if (peerClusters != null) { for (ClusterInfo peerCluster : peerClusters) { LOG.info("Starting to failover to the peer cluster" + peerCluster + " for the cluster " + clusterStartStopKeyPair.getFirst()); Configuration conf = getClusterConf(configuration, peerCluster.getAddress()); try { String peerClusterTableName = CrossSiteUtil.getPeerClusterTableName(tableName, clusterStartStopKeyPair.getFirst().getName(), peerCluster.getName()); try { HTableInterface table = hTableFactory.createHTableInterface(conf, Bytes.toBytes(peerClusterTableName)); Scan s = new Scan(scan); s.setStartRow(clusterStartStopKeyPair.getSecond().getFirst()); s.setStopRow(clusterStartStopKeyPair.getSecond().getSecond()); scanIterator = new ScannerIterator( clusterStartStopKeyPair.getFirst(), peerCluster, table, s); } catch (RuntimeException re) { if (re.getCause() instanceof IOException) { throw (IOException) re.getCause(); } else { throw new IOException(re); } } LOG.info("Failover to the cluster " + peerCluster + ". Please notice, the data may be stale."); break; } catch (IOException ioe) { LOG.warn("Fail to connect to peer cluster '" + peerCluster + "'. Will try other peers", ioe); } } } } else { if (ignore) { LOG.warn("The scanner for the cluster " + clusterStartStopKeyPair.getFirst() + " will be ignored"); return null; } else { throw new IOException("Failed to initialize CSBTable '" + tableName + "' in cluster " + clusterStartStopKeyPair.getFirst()); } } } if (scanIterator == null) { if (!ignore) { throw new IOException("Failed to initialize CSBTable '" + tableName + "' in main and peer clusters"); } else { LOG.warn("The scanner for the cluster " + clusterStartStopKeyPair.getFirst().getName() + " will be ignored"); return null; } } return scanIterator; } }; } @Override public void close() { if (closed) return; for (ScannerIterator it : clusterScannerIterators) { it.close(); } this.closed = true; } @Override public Result next() throws IOException { if (this.closed) return null; try { return resultIterator.next(); } catch (Throwable t) { throw new IOException(t); } } @Override public Result[] next(int nbRows) throws IOException { // Collect values to be returned here ArrayList<Result> resultSets = new ArrayList<Result>(nbRows); for (int i = 0; i < nbRows; i++) { Result next = next(); if (next != null) { resultSets.add(next); } else { break; } } return resultSets.toArray(new Result[resultSets.size()]); } @Override public Iterator<Result> iterator() { return resultIterator; } private static class ResultComparator implements Comparator<Result> { @Override public int compare(Result r1, Result r2) { if (r1 == null && r2 != null) { return 1; } else if (r1 == null && r2 == null) { return 0; } else if (r1 != null && r2 == null) { return -1; } return Bytes.compareTo(r1.getRow(), r2.getRow()); } } private class ScannerIterator implements Iterator<Result> { private HTableInterface table; private ResultScanner scanner; private Result next = null; boolean closed = false; private Future<Result> future; private Result lastNotNullResult; private Scan internalScan; private ClusterInfo currentCluster; private ClusterInfo masterCluster; protected ScannerIterator() { } protected ScannerIterator(ClusterInfo masterCluster, ClusterInfo currentCluster, HTableInterface table, Scan internalScan) throws IOException { this.masterCluster = masterCluster; this.currentCluster = currentCluster; this.table = table; this.internalScan = internalScan; this.scanner = table.getScanner(internalScan); nextInternal(); } public void close() { if (closed) return; this.scanner.close(); try { this.table.close(); } catch (IOException e) { LOG.error("Exception while closing table '" + Bytes.toString(table.getTableName()), e); } this.closed = true; } @Override public boolean hasNext() { if (!closed && next == null) { try { next = future == null ? null : future.get(); if (next != null) { lastNotNullResult = next; } nextInternal(); return next != null; } catch (Throwable t) { LOG.info( "Fail to connect to the CSBTable " + tableName + " in cluster " + masterCluster.getName(), t); if (failover && CrossSiteUtil.isFailoverException(t)) { LOG.warn("Start to failover to the peers for cluster " + masterCluster.getName() + ". Please notice, the data may be stale."); this.scanner.close(); try { this.table.close(); } catch (IOException e) { LOG.error("Exception while closing table '" + Bytes.toString(table.getTableName()), e); } // Get the peers once from the zk.. this would be helpful when a new // peer is added // after a CrossSiteHTable instance has been created List<ClusterInfo> peerClusters = null; try { peerClusters = znodes.getPeerClusters(masterCluster.getName()); } catch (KeeperException ke) { LOG.warn("Fail to connect the global zookeeper", ke); } if (peerClusters != null && !peerClusters.isEmpty()) { List<ClusterInfo> allClusters = new ArrayList<ClusterInfo>(); allClusters.add(masterCluster); allClusters.addAll(peerClusters); int index = allClusters.indexOf(currentCluster); if (index < 0) { index = 0; } for (int i = index + 1; i < allClusters.size() + index; i++) { ClusterInfo failoverClusterInfo = allClusters.get(i % allClusters.size()); LOG.info("Starting to failover to the peer cluster " + failoverClusterInfo + " for the cluster " + masterCluster.getName()); try { Configuration conf = getClusterConf(configuration, failoverClusterInfo.getAddress()); String failoverClusterTableName = CrossSiteUtil.getPeerClusterTableName(tableName, masterCluster.getName(), failoverClusterInfo.getName()); try { table = hTableFactory.createHTableInterface(conf, Bytes.toBytes(failoverClusterTableName)); Scan s = new Scan(internalScan); if (lastNotNullResult != null) { s.setStartRow(lastNotNullResult.getRow()); } scanner = table.getScanner(s); if (lastNotNullResult != null) { next = scanner.next(); } if(next != null && Bytes.equals(next.getRow(), lastNotNullResult.getRow())) { next = scanner.next(); } if (next != null) { lastNotNullResult = next; nextInternal(); } LOG.info("Failover to the cluster " + failoverClusterInfo + ". Please notice, the data may be stale."); currentCluster = failoverClusterInfo; return next != null; } catch (Throwable re) { if (re.getCause() instanceof IOException) { throw (IOException) re.getCause(); } else { throw new IOException(re); } } } catch (IOException ioe) { LOG.warn("Fail to connect to peer cluster '" + failoverClusterInfo + "'. Will try other peers", ioe); } } } } if (ignoreUnavailableClusters) { LOG.warn("The scanner for the cluster " + masterCluster.getName() + " will be ignored"); close(); return false; } CrossSiteClientScanner.this.close(); throw new RuntimeException(t instanceof ExecutionException ? t.getCause() : t); } } return !closed; } private void nextInternal() { if (closed) { future = null; } else { future = pool.submit(new Callable<Result>() { @Override public Result call() throws Exception { Result ret = scanner.next(); if (ret == null) { close(); } return ret; } }); } } @Override public Result next() { // since hasNext() does the real advancing, we call this to determine // if there is a next before proceeding. if (!hasNext()) { return null; } // if we get to here, then hasNext() has given us an item to return. // we want to return the item and then null out the next pointer, so // we use a temporary variable. Result temp = next; next = null; return temp; } @Override public void remove() { throw new UnsupportedOperationException(); } } /** * An empty implementation of the ScannerIterator */ private class EmptyScannerIterator extends ScannerIterator { public EmptyScannerIterator() throws IOException { super(); } @Override public boolean hasNext() { return false; } @Override public Result next() { return null; } @Override public void close() { } @Override public void remove() { throw new UnsupportedOperationException(); } } private Configuration getClusterConf(Configuration conf, String address) throws IOException { Configuration otherConf = new Configuration(conf); ZKUtil.applyClusterKeyToConf(otherConf, address); return otherConf; } } // TODO to handle the scan metric which are passed via Scan op attrs. Follow the work happening // around this in HBASE-9272 // Why we need a clinet side parallel scanner with merge sort always? Atleast in Prefix locator?