/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.regionserver;
import static org.junit.Assert.assertArrayEquals;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertTrue;
import java.io.IOException;
import java.util.List;
import java.util.Map;
import java.util.NavigableMap;
import java.util.Random;
import java.util.Set;
import org.apache.commons.io.IOUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.Chore;
import org.apache.hadoop.hbase.HBaseTestingUtility;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.HRegionInfo;
import org.apache.hadoop.hbase.LargeTests;
import org.apache.hadoop.hbase.NotServingRegionException;
import org.apache.hadoop.hbase.ServerName;
import org.apache.hadoop.hbase.Stoppable;
import org.apache.hadoop.hbase.client.Get;
import org.apache.hadoop.hbase.client.HBaseAdmin;
import org.apache.hadoop.hbase.client.HConnection;
import org.apache.hadoop.hbase.client.HConnectionManager;
import org.apache.hadoop.hbase.client.HTable;
import org.apache.hadoop.hbase.client.MetaScanner;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
import org.apache.hadoop.hbase.protobuf.RequestConverter;
import org.apache.hadoop.hbase.protobuf.generated.ClientProtos.ScanRequest;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.util.Pair;
import org.apache.hadoop.hbase.util.PairOfSameType;
import org.apache.hadoop.hbase.util.StoppableImplementation;
import org.apache.hadoop.hbase.util.Threads;
import org.junit.AfterClass;
import org.junit.BeforeClass;
import org.junit.Test;
import org.junit.experimental.categories.Category;
import com.google.common.collect.Iterators;
import com.google.common.collect.Sets;
import com.google.protobuf.ServiceException;
@Category(LargeTests.class)
public class TestEndToEndSplitTransaction {
private static final Log LOG = LogFactory.getLog(TestEndToEndSplitTransaction.class);
private static final HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
private static final Configuration conf = TEST_UTIL.getConfiguration();
@BeforeClass
public static void beforeAllTests() throws Exception {
TEST_UTIL.getConfiguration().setInt(HConstants.HBASE_CLIENT_RETRIES_NUMBER, 5);
TEST_UTIL.startMiniCluster();
}
@AfterClass
public static void afterAllTests() throws Exception {
TEST_UTIL.shutdownMiniCluster();
}
@Test
public void testMasterOpsWhileSplitting() throws Exception {
byte[] tableName = Bytes.toBytes("TestSplit");
byte[] familyName = Bytes.toBytes("fam");
HTable ht = TEST_UTIL.createTable(tableName, familyName);
TEST_UTIL.loadTable(ht, familyName);
ht.close();
HRegionServer server = TEST_UTIL.getHBaseCluster().getRegionServer(0);
byte []firstRow = Bytes.toBytes("aaa");
byte []splitRow = Bytes.toBytes("lll");
byte []lastRow = Bytes.toBytes("zzz");
HConnection con = HConnectionManager
.getConnection(TEST_UTIL.getConfiguration());
// this will also cache the region
byte[] regionName = con.locateRegion(tableName, splitRow).getRegionInfo()
.getRegionName();
HRegion region = server.getRegion(regionName);
SplitTransaction split = new SplitTransaction(region, splitRow);
split.prepare();
// 1. phase I
PairOfSameType<HRegion> regions = split.createDaughters(server, server);
assertFalse(test(con, tableName, firstRow, server));
assertFalse(test(con, tableName, lastRow, server));
// passing null as services prevents final step
// 2, most of phase II
split.openDaughters(server, null, regions.getFirst(), regions.getSecond());
assertFalse(test(con, tableName, firstRow, server));
assertFalse(test(con, tableName, lastRow, server));
// 3. finish phase II
// note that this replicates some code from SplitTransaction
// 2nd daughter first
server.postOpenDeployTasks(regions.getSecond(), server.getCatalogTracker(), true);
// Add to online regions
server.addToOnlineRegions(regions.getSecond());
// THIS is the crucial point:
// the 2nd daughter was added, so querying before the split key should fail.
assertFalse(test(con, tableName, firstRow, server));
// past splitkey is ok.
assertTrue(test(con, tableName, lastRow, server));
// first daughter second
server.postOpenDeployTasks(regions.getFirst(), server.getCatalogTracker(), true);
// Add to online regions
server.addToOnlineRegions(regions.getFirst());
assertTrue(test(con, tableName, firstRow, server));
assertTrue(test(con, tableName, lastRow, server));
// 4. phase III
split.transitionZKNode(server, server, regions.getFirst(),
regions.getSecond());
assertTrue(test(con, tableName, firstRow, server));
assertTrue(test(con, tableName, lastRow, server));
}
/**
* attempt to locate the region and perform a get and scan
* @return True if successful, False otherwise.
*/
private boolean test(HConnection con, byte[] tableName, byte[] row,
HRegionServer server) {
// not using HTable to avoid timeouts and retries
try {
byte[] regionName = con.relocateRegion(tableName, row).getRegionInfo()
.getRegionName();
// get and scan should now succeed without exception
ProtobufUtil.get(server, regionName, new Get(row));
ScanRequest scanRequest = RequestConverter.buildScanRequest(
regionName, new Scan(row), 1, true);
try {
server.scan(null, scanRequest);
} catch (ServiceException se) {
throw ProtobufUtil.getRemoteException(se);
}
} catch (IOException x) {
return false;
}
return true;
}
/**
* Tests that the client sees meta table changes as atomic during splits
*/
@Test
public void testFromClientSideWhileSplitting() throws Throwable {
LOG.info("Starting testFromClientSideWhileSplitting");
final byte[] TABLENAME = Bytes.toBytes("testFromClientSideWhileSplitting");
final byte[] FAMILY = Bytes.toBytes("family");
//SplitTransaction will update the meta table by offlining the parent region, and adding info
//for daughters.
HTable table = TEST_UTIL.createTable(TABLENAME, FAMILY);
Stoppable stopper = new StoppableImplementation();
RegionSplitter regionSplitter = new RegionSplitter(table);
RegionChecker regionChecker = new RegionChecker(conf, stopper, TABLENAME);
regionChecker.start();
regionSplitter.start();
//wait until the splitter is finished
regionSplitter.join();
stopper.stop(null);
if (regionChecker.ex != null) {
throw regionChecker.ex;
}
if (regionSplitter.ex != null) {
throw regionSplitter.ex;
}
//one final check
regionChecker.verify();
}
static class RegionSplitter extends Thread {
Throwable ex;
HTable table;
byte[] tableName, family;
HBaseAdmin admin;
HTable metaTable;
HRegionServer rs;
RegionSplitter(HTable table) throws IOException {
this.table = table;
this.tableName = table.getTableName();
this.family = table.getTableDescriptor().getFamiliesKeys().iterator().next();
admin = TEST_UTIL.getHBaseAdmin();
rs = TEST_UTIL.getMiniHBaseCluster().getRegionServer(0);
metaTable = new HTable(conf, HConstants.META_TABLE_NAME);
}
public void run() {
try {
Random random = new Random();
for (int i=0; i< 5; i++) {
NavigableMap<HRegionInfo, ServerName> regions = MetaScanner.allTableRegions(conf, tableName, false);
if (regions.size() == 0) {
continue;
}
int regionIndex = random.nextInt(regions.size());
//pick a random region and split it into two
HRegionInfo region = Iterators.get(regions.keySet().iterator(), regionIndex);
//pick the mid split point
int start = 0, end = Integer.MAX_VALUE;
if (region.getStartKey().length > 0) {
start = Bytes.toInt(region.getStartKey());
}
if (region.getEndKey().length > 0) {
end = Bytes.toInt(region.getEndKey());
}
int mid = start + ((end - start) / 2);
byte[] splitPoint = Bytes.toBytes(mid);
//put some rows to the regions
addData(start);
addData(mid);
flushAndBlockUntilDone(region.getRegionName());
compactAndBlockUntilDone(region.getRegionName());
log("Initiating region split for:" + region.getRegionNameAsString());
try {
admin.split(region.getRegionName(), splitPoint);
//wait until the split is complete
blockUntilRegionSplit(50000, region.getRegionName(), true);
} catch (NotServingRegionException ex) {
//ignore
}
}
} catch (Throwable ex) {
this.ex = ex;
} finally {
if (metaTable != null) {
IOUtils.closeQuietly(metaTable);
}
}
}
void addData(int start) throws IOException {
for (int i=start; i< start + 100; i++) {
Put put = new Put(Bytes.toBytes(i));
put.add(family, family, Bytes.toBytes(i));
table.put(put);
}
table.flushCommits();
}
void flushAndBlockUntilDone(byte[] regionName) throws IOException, InterruptedException {
log("flushing region: " + Bytes.toStringBinary(regionName));
admin.flush(regionName);
log("blocking until flush is complete: " + Bytes.toStringBinary(regionName));
Threads.sleepWithoutInterrupt(500);
while (rs.cacheFlusher.getFlushQueueSize() > 0) {
Threads.sleep(50);
}
}
void compactAndBlockUntilDone(byte[] regionName) throws IOException,
InterruptedException {
log("Compacting region: " + Bytes.toStringBinary(regionName));
admin.majorCompact(regionName);
log("blocking until compaction is complete: " + Bytes.toStringBinary(regionName));
Threads.sleepWithoutInterrupt(500);
while (rs.compactSplitThread.getCompactionQueueSize() > 0) {
Threads.sleep(50);
}
}
/** bloks until the region split is complete in META and region server opens the daughters */
void blockUntilRegionSplit(long timeout, final byte[] regionName, boolean waitForDaughters)
throws IOException, InterruptedException {
long start = System.currentTimeMillis();
log("blocking until region is split:" + Bytes.toStringBinary(regionName));
HRegionInfo daughterA = null, daughterB = null;
while (System.currentTimeMillis() - start < timeout) {
Result result = getRegionRow(regionName);
if (result == null) {
break;
}
HRegionInfo region = HRegionInfo.getHRegionInfo(result);
if(region.isSplitParent()) {
log("found parent region: " + region.toString());
PairOfSameType<HRegionInfo> pair = HRegionInfo.getDaughterRegions(result);
daughterA = pair.getFirst();
daughterB = pair.getSecond();
break;
}
sleep(100);
}
//if we are here, this means the region split is complete or timed out
if (waitForDaughters) {
long rem = timeout - (System.currentTimeMillis() - start);
blockUntilRegionIsInMeta(rem, daughterA.getRegionName());
rem = timeout - (System.currentTimeMillis() - start);
blockUntilRegionIsInMeta(rem, daughterB.getRegionName());
rem = timeout - (System.currentTimeMillis() - start);
blockUntilRegionIsOpenedByRS(rem, daughterA.getRegionName());
rem = timeout - (System.currentTimeMillis() - start);
blockUntilRegionIsOpenedByRS(rem, daughterB.getRegionName());
}
}
Result getRegionRow(byte[] regionName) throws IOException {
Get get = new Get(regionName);
return metaTable.get(get);
}
void blockUntilRegionIsInMeta(long timeout, byte[] regionName)
throws IOException, InterruptedException {
log("blocking until region is in META: " + Bytes.toStringBinary(regionName));
long start = System.currentTimeMillis();
while (System.currentTimeMillis() - start < timeout) {
Result result = getRegionRow(regionName);
if (result != null) {
HRegionInfo info = HRegionInfo.getHRegionInfo(result);
if (info != null && !info.isOffline()) {
log("found region in META: " + Bytes.toStringBinary(regionName));
break;
}
}
sleep(10);
}
}
void blockUntilRegionIsOpenedByRS(long timeout, byte[] regionName)
throws IOException, InterruptedException {
log("blocking until region is opened by region server: " + Bytes.toStringBinary(regionName));
long start = System.currentTimeMillis();
while (System.currentTimeMillis() - start < timeout) {
List<HRegion> regions = rs.getOnlineRegions(tableName);
for (HRegion region : regions) {
if (Bytes.compareTo(region.getRegionName(), regionName) == 0) {
log("found region open in RS: " + Bytes.toStringBinary(regionName));
return;
}
}
sleep(10);
}
}
}
/**
* Checks regions using MetaScanner, MetaReader and HTable methods
*/
static class RegionChecker extends Chore {
Configuration conf;
byte[] tableName;
Throwable ex;
RegionChecker(Configuration conf, Stoppable stopper, byte[] tableName) {
super("RegionChecker", 10, stopper);
this.conf = conf;
this.tableName = tableName;
this.setDaemon(true);
}
/** verify region boundaries obtained from MetaScanner */
void verifyRegionsUsingMetaScanner() throws Exception {
//MetaScanner.allTableRegions()
NavigableMap<HRegionInfo, ServerName> regions = MetaScanner.allTableRegions(conf, tableName,
false);
verifyTableRegions(regions.keySet());
//MetaScanner.listAllRegions()
List<HRegionInfo> regionList = MetaScanner.listAllRegions(conf, false);
verifyTableRegions(Sets.newTreeSet(regionList));
}
/** verify region boundaries obtained from HTable.getStartEndKeys() */
void verifyRegionsUsingHTable() throws IOException {
HTable table = null;
try {
//HTable.getStartEndKeys()
table = new HTable(conf, tableName);
Pair<byte[][], byte[][]> keys = table.getStartEndKeys();
verifyStartEndKeys(keys);
//HTable.getRegionsInfo()
Map<HRegionInfo, ServerName> regions = table.getRegionLocations();
verifyTableRegions(regions.keySet());
} finally {
IOUtils.closeQuietly(table);
}
}
void verify() throws Exception {
verifyRegionsUsingMetaScanner();
verifyRegionsUsingHTable();
}
void verifyTableRegions(Set<HRegionInfo> regions) {
log("Verifying " + regions.size() + " regions");
byte[][] startKeys = new byte[regions.size()][];
byte[][] endKeys = new byte[regions.size()][];
int i=0;
for (HRegionInfo region : regions) {
startKeys[i] = region.getStartKey();
endKeys[i] = region.getEndKey();
i++;
}
Pair<byte[][], byte[][]> keys = new Pair<byte[][], byte[][]>(startKeys, endKeys);
verifyStartEndKeys(keys);
}
void verifyStartEndKeys(Pair<byte[][], byte[][]> keys) {
byte[][] startKeys = keys.getFirst();
byte[][] endKeys = keys.getSecond();
assertEquals(startKeys.length, endKeys.length);
assertTrue("Found 0 regions for the table", startKeys.length > 0);
assertArrayEquals("Start key for the first region is not byte[0]",
HConstants.EMPTY_START_ROW, startKeys[0]);
byte[] prevEndKey = HConstants.EMPTY_START_ROW;
// ensure that we do not have any gaps
for (int i=0; i<startKeys.length; i++) {
assertArrayEquals(
"Hole in .META. is detected. prevEndKey=" + Bytes.toStringBinary(prevEndKey)
+ " ,regionStartKey=" + Bytes.toStringBinary(startKeys[i]), prevEndKey,
startKeys[i]);
prevEndKey = endKeys[i];
}
assertArrayEquals("End key for the last region is not byte[0]", HConstants.EMPTY_END_ROW,
endKeys[endKeys.length - 1]);
}
@Override
protected void chore() {
try {
verify();
} catch (Throwable ex) {
this.ex = ex;
stopper.stop("caught exception");
}
}
}
public static void log(String msg) {
LOG.info(msg);
}
}