/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.snapshot;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
import static org.junit.Assert.fail;
import java.io.IOException;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import java.util.concurrent.CountDownLatch;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.HBaseTestingUtility;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.HRegionInfo;
import org.apache.hadoop.hbase.HTableDescriptor;
import org.apache.hadoop.hbase.LargeTests;
import org.apache.hadoop.hbase.TableNotFoundException;
import org.apache.hadoop.hbase.client.HBaseAdmin;
import org.apache.hadoop.hbase.client.HTable;
import org.apache.hadoop.hbase.master.HMaster;
import org.apache.hadoop.hbase.master.snapshot.SnapshotManager;
import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.SnapshotDescription;
import org.apache.hadoop.hbase.regionserver.ConstantSizeRegionSplitPolicy;
import org.apache.hadoop.hbase.regionserver.HRegion;
import org.apache.hadoop.hbase.regionserver.HRegionServer;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.util.FSTableDescriptors;
import org.apache.hadoop.hbase.util.FSUtils;
import org.apache.hadoop.hbase.util.JVMClusterUtil.RegionServerThread;
import org.junit.After;
import org.junit.AfterClass;
import org.junit.Before;
import org.junit.BeforeClass;
import org.junit.Test;
import org.junit.experimental.categories.Category;
/**
* Test creating/using/deleting snapshots from the client
* <p>
* This is an end-to-end test for the snapshot utility
*
* TODO This is essentially a clone of TestSnapshotFromClient. This is worth refactoring this
* because there will be a few more flavors of snapshots that need to run these tests.
*/
@Category(LargeTests.class)
public class TestFlushSnapshotFromClient {
private static final Log LOG = LogFactory.getLog(TestFlushSnapshotFromClient.class);
private static final HBaseTestingUtility UTIL = new HBaseTestingUtility();
private static final int NUM_RS = 2;
private static final String STRING_TABLE_NAME = "test";
private static final byte[] TEST_FAM = Bytes.toBytes("fam");
private static final byte[] TABLE_NAME = Bytes.toBytes(STRING_TABLE_NAME);
/**
* Setup the config for the cluster
* @throws Exception on failure
*/
@BeforeClass
public static void setupCluster() throws Exception {
setupConf(UTIL.getConfiguration());
UTIL.startMiniCluster(NUM_RS);
}
private static void setupConf(Configuration conf) {
// disable the ui
conf.setInt("hbase.regionsever.info.port", -1);
// change the flush size to a small amount, regulating number of store files
conf.setInt("hbase.hregion.memstore.flush.size", 25000);
// so make sure we get a compaction when doing a load, but keep around some
// files in the store
conf.setInt("hbase.hstore.compaction.min", 10);
conf.setInt("hbase.hstore.compactionThreshold", 10);
// block writes if we get to 12 store files
conf.setInt("hbase.hstore.blockingStoreFiles", 12);
// drop the number of attempts for the hbase admin
conf.setInt("hbase.client.retries.number", 1);
// Enable snapshot
conf.setBoolean(SnapshotManager.HBASE_SNAPSHOT_ENABLED, true);
// prevent aggressive region split
conf.set(HConstants.HBASE_REGION_SPLIT_POLICY_KEY,
ConstantSizeRegionSplitPolicy.class.getName());
}
@Before
public void setup() throws Exception {
UTIL.createTable(TABLE_NAME, TEST_FAM);
}
@After
public void tearDown() throws Exception {
UTIL.deleteTable(TABLE_NAME);
// and cleanup the archive directory
try {
UTIL.getTestFileSystem().delete(new Path(UTIL.getDefaultRootDirPath(), ".archive"), true);
} catch (IOException e) {
LOG.warn("Failure to delete archive directory", e);
}
}
@AfterClass
public static void cleanupTest() throws Exception {
try {
UTIL.shutdownMiniCluster();
} catch (Exception e) {
LOG.warn("failure shutting down cluster", e);
}
}
/**
* Test simple flush snapshotting a table that is online
* @throws Exception
*/
@Test
public void testFlushTableSnapshot() throws Exception {
HBaseAdmin admin = UTIL.getHBaseAdmin();
// make sure we don't fail on listing snapshots
SnapshotTestingUtils.assertNoSnapshots(admin);
// put some stuff in the table
HTable table = new HTable(UTIL.getConfiguration(), TABLE_NAME);
UTIL.loadTable(table, TEST_FAM);
// get the name of all the regionservers hosting the snapshotted table
Set<String> snapshotServers = new HashSet<String>();
List<RegionServerThread> servers = UTIL.getMiniHBaseCluster().getLiveRegionServerThreads();
for (RegionServerThread server : servers) {
if (server.getRegionServer().getOnlineRegions(TABLE_NAME).size() > 0) {
snapshotServers.add(server.getRegionServer().getServerName().toString());
}
}
LOG.debug("FS state before snapshot:");
FSUtils.logFileSystemState(UTIL.getTestFileSystem(),
FSUtils.getRootDir(UTIL.getConfiguration()), LOG);
// take a snapshot of the enabled table
String snapshotString = "offlineTableSnapshot";
byte[] snapshot = Bytes.toBytes(snapshotString);
admin.snapshot(snapshotString, STRING_TABLE_NAME, SnapshotDescription.Type.FLUSH);
LOG.debug("Snapshot completed.");
// make sure we have the snapshot
List<SnapshotDescription> snapshots = SnapshotTestingUtils.assertOneSnapshotThatMatches(admin,
snapshot, TABLE_NAME);
// make sure its a valid snapshot
FileSystem fs = UTIL.getHBaseCluster().getMaster().getMasterFileSystem().getFileSystem();
Path rootDir = UTIL.getHBaseCluster().getMaster().getMasterFileSystem().getRootDir();
LOG.debug("FS state after snapshot:");
FSUtils.logFileSystemState(UTIL.getTestFileSystem(),
FSUtils.getRootDir(UTIL.getConfiguration()), LOG);
SnapshotTestingUtils.confirmSnapshotValid(snapshots.get(0), TABLE_NAME, TEST_FAM, rootDir,
admin, fs, false, new Path(rootDir, HConstants.HREGION_LOGDIR_NAME), snapshotServers);
admin.deleteSnapshot(snapshot);
snapshots = admin.listSnapshots();
SnapshotTestingUtils.assertNoSnapshots(admin);
}
@Test
public void testSnapshotFailsOnNonExistantTable() throws Exception {
HBaseAdmin admin = UTIL.getHBaseAdmin();
// make sure we don't fail on listing snapshots
SnapshotTestingUtils.assertNoSnapshots(admin);
String tableName = "_not_a_table";
// make sure the table doesn't exist
boolean fail = false;
do {
try {
admin.getTableDescriptor(Bytes.toBytes(tableName));
fail = true;
LOG.error("Table:" + tableName + " already exists, checking a new name");
tableName = tableName+"!";
} catch (TableNotFoundException e) {
fail = false;
}
} while (fail);
// snapshot the non-existant table
try {
admin.snapshot("fail", tableName, SnapshotDescription.Type.FLUSH);
fail("Snapshot succeeded even though there is not table.");
} catch (SnapshotCreationException e) {
LOG.info("Correctly failed to snapshot a non-existant table:" + e.getMessage());
}
}
@Test(timeout = 60000)
public void testAsyncFlushSnapshot() throws Exception {
HBaseAdmin admin = UTIL.getHBaseAdmin();
SnapshotDescription snapshot = SnapshotDescription.newBuilder().setName("asyncSnapshot")
.setTable(STRING_TABLE_NAME).setType(SnapshotDescription.Type.FLUSH).build();
// take the snapshot async
admin.takeSnapshotAsync(snapshot);
// constantly loop, looking for the snapshot to complete
HMaster master = UTIL.getMiniHBaseCluster().getMaster();
SnapshotTestingUtils.waitForSnapshotToComplete(master, new HSnapshotDescription(snapshot), 200);
LOG.info(" === Async Snapshot Completed ===");
FSUtils.logFileSystemState(UTIL.getTestFileSystem(),
FSUtils.getRootDir(UTIL.getConfiguration()), LOG);
// make sure we get the snapshot
SnapshotTestingUtils.assertOneSnapshotThatMatches(admin, snapshot);
// test that we can delete the snapshot
admin.deleteSnapshot(snapshot.getName());
LOG.info(" === Async Snapshot Deleted ===");
FSUtils.logFileSystemState(UTIL.getTestFileSystem(),
FSUtils.getRootDir(UTIL.getConfiguration()), LOG);
// make sure we don't have any snapshots
SnapshotTestingUtils.assertNoSnapshots(admin);
LOG.info(" === Async Snapshot Test Completed ===");
}
/**
* Basic end-to-end test of simple-flush-based snapshots
*/
@Test
public void testFlushCreateListDestroy() throws Exception {
LOG.debug("------- Starting Snapshot test -------------");
HBaseAdmin admin = UTIL.getHBaseAdmin();
// make sure we don't fail on listing snapshots
SnapshotTestingUtils.assertNoSnapshots(admin);
// load the table so we have some data
UTIL.loadTable(new HTable(UTIL.getConfiguration(), TABLE_NAME), TEST_FAM);
// and wait until everything stabilizes
waitForTableToBeOnline(TABLE_NAME);
String snapshotName = "flushSnapshotCreateListDestroy";
// test creating the snapshot
admin.snapshot(snapshotName, STRING_TABLE_NAME, SnapshotDescription.Type.FLUSH);
logFSTree(new Path(UTIL.getConfiguration().get(HConstants.HBASE_DIR)));
// make sure we only have 1 matching snapshot
List<SnapshotDescription> snapshots = SnapshotTestingUtils.assertOneSnapshotThatMatches(admin,
snapshotName, STRING_TABLE_NAME);
// check the directory structure
FileSystem fs = UTIL.getHBaseCluster().getMaster().getMasterFileSystem().getFileSystem();
Path rootDir = UTIL.getHBaseCluster().getMaster().getMasterFileSystem().getRootDir();
Path snapshotDir = SnapshotDescriptionUtils.getCompletedSnapshotDir(snapshots.get(0), rootDir);
assertTrue(fs.exists(snapshotDir));
FSUtils.logFileSystemState(UTIL.getTestFileSystem(), snapshotDir, LOG);
Path snapshotinfo = new Path(snapshotDir, SnapshotDescriptionUtils.SNAPSHOTINFO_FILE);
assertTrue(fs.exists(snapshotinfo));
// check the table info
HTableDescriptor desc = FSTableDescriptors.getTableDescriptor(fs, rootDir, TABLE_NAME);
HTableDescriptor snapshotDesc = FSTableDescriptors.getTableDescriptor(fs,
SnapshotDescriptionUtils.getSnapshotsDir(rootDir), Bytes.toBytes(snapshotName));
assertEquals(desc, snapshotDesc);
// check the region snapshot for all the regions
List<HRegionInfo> regions = admin.getTableRegions(TABLE_NAME);
for (HRegionInfo info : regions) {
String regionName = info.getEncodedName();
Path regionDir = new Path(snapshotDir, regionName);
HRegionInfo snapshotRegionInfo = HRegion.loadDotRegionInfoFileContent(fs, regionDir);
assertEquals(info, snapshotRegionInfo);
// check to make sure we have the family
Path familyDir = new Path(regionDir, Bytes.toString(TEST_FAM));
assertTrue(fs.exists(familyDir));
// make sure we have some file references
assertTrue(fs.listStatus(familyDir).length > 0);
}
// test that we can delete the snapshot
admin.deleteSnapshot(snapshotName);
FSUtils.logFileSystemState(UTIL.getTestFileSystem(),
FSUtils.getRootDir(UTIL.getConfiguration()), LOG);
// make sure we don't have any snapshots
SnapshotTestingUtils.assertNoSnapshots(admin);
LOG.debug("------- Flush-Snapshot Create List Destroy-------------");
}
/**
* Demonstrate that we reject snapshot requests if there is a snapshot already running on the
* same table currently running and that concurrent snapshots on different tables can both
* succeed concurretly.
*/
@Test(timeout=60000)
public void testConcurrentSnapshottingAttempts() throws IOException, InterruptedException {
final String STRING_TABLE2_NAME = STRING_TABLE_NAME + "2";
final byte[] TABLE2_NAME = Bytes.toBytes(STRING_TABLE2_NAME);
int ssNum = 20;
HBaseAdmin admin = UTIL.getHBaseAdmin();
// make sure we don't fail on listing snapshots
SnapshotTestingUtils.assertNoSnapshots(admin);
// create second testing table
UTIL.createTable(TABLE2_NAME, TEST_FAM);
// load the table so we have some data
UTIL.loadTable(new HTable(UTIL.getConfiguration(), TABLE_NAME), TEST_FAM);
UTIL.loadTable(new HTable(UTIL.getConfiguration(), TABLE2_NAME), TEST_FAM);
// and wait until everything stabilizes
waitForTableToBeOnline(TABLE_NAME);
waitForTableToBeOnline(TABLE2_NAME);
final CountDownLatch toBeSubmitted = new CountDownLatch(ssNum);
// We'll have one of these per thread
class SSRunnable implements Runnable {
SnapshotDescription ss;
SSRunnable(SnapshotDescription ss) {
this.ss = ss;
}
@Override
public void run() {
try {
HBaseAdmin admin = UTIL.getHBaseAdmin();
LOG.info("Submitting snapshot request: " + SnapshotDescriptionUtils.toString(ss));
admin.takeSnapshotAsync(ss);
} catch (Exception e) {
LOG.info("Exception during snapshot request: " + SnapshotDescriptionUtils.toString(ss)
+ ". This is ok, we expect some", e);
}
LOG.info("Submitted snapshot request: " + SnapshotDescriptionUtils.toString(ss));
toBeSubmitted.countDown();
}
};
// build descriptions
SnapshotDescription[] descs = new SnapshotDescription[ssNum];
for (int i = 0; i < ssNum; i++) {
SnapshotDescription.Builder builder = SnapshotDescription.newBuilder();
builder.setTable((i % 2) == 0 ? STRING_TABLE_NAME : STRING_TABLE2_NAME);
builder.setName("ss"+i);
builder.setType(SnapshotDescription.Type.FLUSH);
descs[i] = builder.build();
}
// kick each off its own thread
for (int i=0 ; i < ssNum; i++) {
new Thread(new SSRunnable(descs[i])).start();
}
// wait until all have been submitted
toBeSubmitted.await();
// loop until all are done.
while (true) {
int doneCount = 0;
for (SnapshotDescription ss : descs) {
try {
if (admin.isSnapshotFinished(ss)) {
doneCount++;
}
} catch (Exception e) {
LOG.warn("Got an exception when checking for snapshot " + ss.getName(), e);
doneCount++;
}
}
if (doneCount == descs.length) {
break;
}
Thread.sleep(100);
}
// dump for debugging
logFSTree(new Path(UTIL.getConfiguration().get(HConstants.HBASE_DIR)));
List<SnapshotDescription> taken = admin.listSnapshots();
int takenSize = taken.size();
LOG.info("Taken " + takenSize + " snapshots: " + taken);
assertTrue("We expect at least 1 request to be rejected because of we concurrently" +
" issued many requests", takenSize < ssNum && takenSize > 0);
// Verify that there's at least one snapshot per table
int t1SnapshotsCount = 0;
int t2SnapshotsCount = 0;
for (SnapshotDescription ss : taken) {
if (ss.getTable().equals(STRING_TABLE_NAME)) {
t1SnapshotsCount++;
} else if (ss.getTable().equals(STRING_TABLE2_NAME)) {
t2SnapshotsCount++;
}
}
assertTrue("We expect at least 1 snapshot of table1 ", t1SnapshotsCount > 0);
assertTrue("We expect at least 1 snapshot of table2 ", t2SnapshotsCount > 0);
// delete snapshots so subsequent tests are clean.
for (SnapshotDescription ss : taken) {
admin.deleteSnapshot(ss.getName());
}
UTIL.deleteTable(TABLE2_NAME);
}
private void logFSTree(Path root) throws IOException {
FSUtils.logFileSystemState(UTIL.getDFSCluster().getFileSystem(), root, LOG);
}
private void waitForTableToBeOnline(final byte[] tableName) throws IOException {
HRegionServer rs = UTIL.getRSForFirstRegionInTable(tableName);
List<HRegion> onlineRegions = rs.getOnlineRegions(tableName);
for (HRegion region : onlineRegions) {
region.waitForFlushesAndCompactions();
}
}
}