package org.apache.hadoop.hbase.replication; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.hbase.KeyValue; import org.apache.hadoop.hbase.LargeTests; import org.apache.hadoop.hbase.client.*; import org.apache.hadoop.hbase.mapreduce.replication.VerifyReplication; import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.hbase.util.EnvironmentEdgeManager; import org.apache.hadoop.hbase.util.JVMClusterUtil; import org.apache.hadoop.mapreduce.Job; import org.junit.Before; import org.junit.Test; import org.junit.experimental.categories.Category; import static org.junit.Assert.assertArrayEquals; import static org.junit.Assert.assertEquals; import static org.junit.Assert.fail; @Category(LargeTests.class) public class TestReplicationSmallTests extends TestReplicationBase { private static final Log LOG = LogFactory.getLog(TestReplicationSmallTests.class); /** * @throws java.lang.Exception */ @Before public void setUp() throws Exception { htable1.setAutoFlush(true); // Starting and stopping replication can make us miss new logs, // rolling like this makes sure the most recent one gets added to the queue for ( JVMClusterUtil.RegionServerThread r : utility1.getHBaseCluster().getRegionServerThreads()) { r.getRegionServer().getWAL().rollWriter(); } utility1.truncateTable(tableName); // truncating the table will send one Delete per row to the slave cluster // in an async fashion, which is why we cannot just call truncateTable on // utility2 since late writes could make it to the slave in some way. // Instead, we truncate the first table and wait for all the Deletes to // make it to the slave. Scan scan = new Scan(); int lastCount = 0; for (int i = 0; i < NB_RETRIES; i++) { if (i==NB_RETRIES-1) { fail("Waited too much time for truncate"); } ResultScanner scanner = htable2.getScanner(scan); Result[] res = scanner.next(NB_ROWS_IN_BIG_BATCH); scanner.close(); if (res.length != 0) { if (res.length < lastCount) { i--; // Don't increment timeout if we make progress } lastCount = res.length; LOG.info("Still got " + res.length + " rows"); Thread.sleep(SLEEP_TIME); } else { break; } } } /** * Verify that version and column delete marker types are replicated * correctly. * @throws Exception */ @Test(timeout=300000) public void testDeleteTypes() throws Exception { LOG.info("testDeleteTypes"); final byte[] v1 = Bytes.toBytes("v1"); final byte[] v2 = Bytes.toBytes("v2"); final byte[] v3 = Bytes.toBytes("v3"); htable1 = new HTable(conf1, tableName); long t = EnvironmentEdgeManager.currentTimeMillis(); // create three versions for "row" Put put = new Put(row); put.add(famName, row, t, v1); htable1.put(put); put = new Put(row); put.add(famName, row, t+1, v2); htable1.put(put); put = new Put(row); put.add(famName, row, t+2, v3); htable1.put(put); Get get = new Get(row); get.setMaxVersions(); for (int i = 0; i < NB_RETRIES; i++) { if (i==NB_RETRIES-1) { fail("Waited too much time for put replication"); } Result res = htable2.get(get); if (res.size() < 3) { LOG.info("Rows not available"); Thread.sleep(SLEEP_TIME); } else { assertArrayEquals(res.raw()[0].getValue(), v3); assertArrayEquals(res.raw()[1].getValue(), v2); assertArrayEquals(res.raw()[2].getValue(), v1); break; } } // place a version delete marker (delete last version) Delete d = new Delete(row); d.deleteColumn(famName, row, t); htable1.delete(d); get = new Get(row); get.setMaxVersions(); for (int i = 0; i < NB_RETRIES; i++) { if (i==NB_RETRIES-1) { fail("Waited too much time for put replication"); } Result res = htable2.get(get); if (res.size() > 2) { LOG.info("Version not deleted"); Thread.sleep(SLEEP_TIME); } else { assertArrayEquals(res.raw()[0].getValue(), v3); assertArrayEquals(res.raw()[1].getValue(), v2); break; } } // place a column delete marker d = new Delete(row); d.deleteColumns(famName, row, t+2); htable1.delete(d); // now *both* of the remaining version should be deleted // at the replica get = new Get(row); for (int i = 0; i < NB_RETRIES; i++) { if (i==NB_RETRIES-1) { fail("Waited too much time for del replication"); } Result res = htable2.get(get); if (res.size() >= 1) { LOG.info("Rows not deleted"); Thread.sleep(SLEEP_TIME); } else { break; } } } /** * Add a row, check it's replicated, delete it, check's gone * @throws Exception */ @Test(timeout=300000) public void testSimplePutDelete() throws Exception { LOG.info("testSimplePutDelete"); Put put = new Put(row); put.add(famName, row, row); htable1 = new HTable(conf1, tableName); htable1.put(put); Get get = new Get(row); for (int i = 0; i < NB_RETRIES; i++) { if (i==NB_RETRIES-1) { fail("Waited too much time for put replication"); } Result res = htable2.get(get); if (res.size() == 0) { LOG.info("Row not available"); Thread.sleep(SLEEP_TIME); } else { assertArrayEquals(res.value(), row); break; } } Delete del = new Delete(row); htable1.delete(del); get = new Get(row); for (int i = 0; i < NB_RETRIES; i++) { if (i==NB_RETRIES-1) { fail("Waited too much time for del replication"); } Result res = htable2.get(get); if (res.size() >= 1) { LOG.info("Row not deleted"); Thread.sleep(SLEEP_TIME); } else { break; } } } /** * Try a small batch upload using the write buffer, check it's replicated * @throws Exception */ @Test(timeout=300000) public void testSmallBatch() throws Exception { LOG.info("testSmallBatch"); Put put; // normal Batch tests htable1.setAutoFlush(false); for (int i = 0; i < NB_ROWS_IN_BATCH; i++) { put = new Put(Bytes.toBytes(i)); put.add(famName, row, row); htable1.put(put); } htable1.flushCommits(); Scan scan = new Scan(); ResultScanner scanner1 = htable1.getScanner(scan); Result[] res1 = scanner1.next(NB_ROWS_IN_BATCH); scanner1.close(); assertEquals(NB_ROWS_IN_BATCH, res1.length); for (int i = 0; i < NB_RETRIES; i++) { if (i==NB_RETRIES-1) { fail("Waited too much time for normal batch replication"); } ResultScanner scanner = htable2.getScanner(scan); Result[] res = scanner.next(NB_ROWS_IN_BATCH); scanner.close(); if (res.length != NB_ROWS_IN_BATCH) { LOG.info("Only got " + res.length + " rows"); Thread.sleep(SLEEP_TIME); } else { break; } } } /** * Test stopping replication, trying to insert, make sure nothing's * replicated, enable it, try replicating and it should work * @throws Exception */ @Test(timeout=300000) public void testStartStop() throws Exception { // Test stopping replication setIsReplication(false); Put put = new Put(Bytes.toBytes("stop start")); put.add(famName, row, row); htable1.put(put); Get get = new Get(Bytes.toBytes("stop start")); for (int i = 0; i < NB_RETRIES; i++) { if (i==NB_RETRIES-1) { break; } Result res = htable2.get(get); if(res.size() >= 1) { fail("Replication wasn't stopped"); } else { LOG.info("Row not replicated, let's wait a bit more..."); Thread.sleep(SLEEP_TIME); } } // Test restart replication setIsReplication(true); htable1.put(put); for (int i = 0; i < NB_RETRIES; i++) { if (i==NB_RETRIES-1) { fail("Waited too much time for put replication"); } Result res = htable2.get(get); if(res.size() == 0) { LOG.info("Row not available"); Thread.sleep(SLEEP_TIME); } else { assertArrayEquals(res.value(), row); break; } } put = new Put(Bytes.toBytes("do not rep")); put.add(noRepfamName, row, row); htable1.put(put); get = new Get(Bytes.toBytes("do not rep")); for (int i = 0; i < NB_RETRIES; i++) { if (i == NB_RETRIES-1) { break; } Result res = htable2.get(get); if (res.size() >= 1) { fail("Not supposed to be replicated"); } else { LOG.info("Row not replicated, let's wait a bit more..."); Thread.sleep(SLEEP_TIME); } } } /** * Test disable/enable replication, trying to insert, make sure nothing's * replicated, enable it, the insert should be replicated * * @throws Exception */ @Test(timeout = 300000) public void testDisableEnable() throws Exception { // Test disabling replication admin.disablePeer("2"); byte[] rowkey = Bytes.toBytes("disable enable"); Put put = new Put(rowkey); put.add(famName, row, row); htable1.put(put); Get get = new Get(rowkey); for (int i = 0; i < NB_RETRIES; i++) { Result res = htable2.get(get); if (res.size() >= 1) { fail("Replication wasn't disabled"); } else { LOG.info("Row not replicated, let's wait a bit more..."); Thread.sleep(SLEEP_TIME); } } // Test enable replication admin.enablePeer("2"); for (int i = 0; i < NB_RETRIES; i++) { Result res = htable2.get(get); if (res.size() == 0) { LOG.info("Row not available"); Thread.sleep(SLEEP_TIME); } else { assertArrayEquals(res.value(), row); return; } } fail("Waited too much time for put replication"); } /** * Integration test for TestReplicationAdmin, removes and re-add a peer * cluster * * @throws Exception */ @Test(timeout=300000) public void testAddAndRemoveClusters() throws Exception { LOG.info("testAddAndRemoveClusters"); admin.removePeer("2"); Thread.sleep(SLEEP_TIME); byte[] rowKey = Bytes.toBytes("Won't be replicated"); Put put = new Put(rowKey); put.add(famName, row, row); htable1.put(put); Get get = new Get(rowKey); for (int i = 0; i < NB_RETRIES; i++) { if (i == NB_RETRIES-1) { break; } Result res = htable2.get(get); if (res.size() >= 1) { fail("Not supposed to be replicated"); } else { LOG.info("Row not replicated, let's wait a bit more..."); Thread.sleep(SLEEP_TIME); } } admin.addPeer("2", utility2.getClusterKey()); Thread.sleep(SLEEP_TIME); rowKey = Bytes.toBytes("do rep"); put = new Put(rowKey); put.add(famName, row, row); LOG.info("Adding new row"); htable1.put(put); get = new Get(rowKey); for (int i = 0; i < NB_RETRIES; i++) { if (i==NB_RETRIES-1) { fail("Waited too much time for put replication"); } Result res = htable2.get(get); if (res.size() == 0) { LOG.info("Row not available"); Thread.sleep(SLEEP_TIME*i); } else { assertArrayEquals(res.value(), row); break; } } } /** * Do a more intense version testSmallBatch, one that will trigger * hlog rolling and other non-trivial code paths * @throws Exception */ @Test(timeout=300000) public void loadTesting() throws Exception { htable1.setWriteBufferSize(1024); htable1.setAutoFlush(false); for (int i = 0; i < NB_ROWS_IN_BIG_BATCH; i++) { Put put = new Put(Bytes.toBytes(i)); put.add(famName, row, row); htable1.put(put); } htable1.flushCommits(); Scan scan = new Scan(); ResultScanner scanner = htable1.getScanner(scan); Result[] res = scanner.next(NB_ROWS_IN_BIG_BATCH); scanner.close(); assertEquals(NB_ROWS_IN_BATCH *10, res.length); scan = new Scan(); for (int i = 0; i < NB_RETRIES; i++) { scanner = htable2.getScanner(scan); res = scanner.next(NB_ROWS_IN_BIG_BATCH); scanner.close(); if (res.length != NB_ROWS_IN_BIG_BATCH) { if (i == NB_RETRIES-1) { int lastRow = -1; for (Result result : res) { int currentRow = Bytes.toInt(result.getRow()); for (int row = lastRow+1; row < currentRow; row++) { LOG.error("Row missing: " + row); } lastRow = currentRow; } LOG.error("Last row: " + lastRow); fail("Waited too much time for normal batch replication, " + res.length + " instead of " + NB_ROWS_IN_BIG_BATCH); } else { LOG.info("Only got " + res.length + " rows"); Thread.sleep(SLEEP_TIME); } } else { break; } } } /** * Do a small loading into a table, make sure the data is really the same, * then run the VerifyReplication job to check the results. Do a second * comparison where all the cells are different. * @throws Exception */ @Test(timeout=300000) public void testVerifyRepJob() throws Exception { // Populate the tables, at the same time it guarantees that the tables are // identical since it does the check testSmallBatch(); String[] args = new String[] {"2", Bytes.toString(tableName)}; Job job = VerifyReplication.createSubmittableJob(CONF_WITH_LOCALFS, args); if (job == null) { fail("Job wasn't created, see the log"); } if (!job.waitForCompletion(true)) { fail("Job failed, see the log"); } assertEquals(NB_ROWS_IN_BATCH, job.getCounters(). findCounter(VerifyReplication.Verifier.Counters.GOODROWS).getValue()); assertEquals(0, job.getCounters(). findCounter(VerifyReplication.Verifier.Counters.BADROWS).getValue()); Scan scan = new Scan(); ResultScanner rs = htable2.getScanner(scan); Put put = null; for (Result result : rs) { put = new Put(result.getRow()); KeyValue firstVal = result.raw()[0]; put.add(firstVal.getFamily(), firstVal.getQualifier(), Bytes.toBytes("diff data")); htable2.put(put); } Delete delete = new Delete(put.getRow()); htable2.delete(delete); job = VerifyReplication.createSubmittableJob(CONF_WITH_LOCALFS, args); if (job == null) { fail("Job wasn't created, see the log"); } if (!job.waitForCompletion(true)) { fail("Job failed, see the log"); } assertEquals(0, job.getCounters(). findCounter(VerifyReplication.Verifier.Counters.GOODROWS).getValue()); assertEquals(NB_ROWS_IN_BATCH, job.getCounters(). findCounter(VerifyReplication.Verifier.Counters.BADROWS).getValue()); } }