/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase;
import java.io.IOException;
import java.security.InvalidParameterException;
import java.util.HashMap;
import java.util.Map;
import java.util.Set;
import java.util.TreeMap;
import java.util.concurrent.atomic.AtomicLong;
import junit.framework.Assert;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.client.HTable;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.ResultScanner;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.filter.CompareFilter;
import org.apache.hadoop.hbase.filter.Filter;
import org.apache.hadoop.hbase.filter.SingleColumnValueFilter;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
import org.apache.hadoop.hbase.util.LoadTestTool;
import org.apache.hadoop.hbase.util.MultiThreadedWriter;
import org.apache.hadoop.hbase.util.RegionSplitter;
import org.apache.hadoop.hbase.util.test.LoadTestDataGenerator;
import org.apache.hadoop.hbase.util.test.LoadTestKVGenerator;
import org.junit.After;
import org.junit.AfterClass;
import org.junit.Before;
import org.junit.BeforeClass;
import org.junit.Test;
import org.junit.experimental.categories.Category;
/**
* Integration test that verifies lazy CF loading during scans by doing repeated scans
* with this feature while multiple threads are continuously writing values; and
* verifying the result.
*/
@Category(IntegrationTests.class)
public class IntegrationTestLazyCfLoading {
private static final String TABLE_NAME = IntegrationTestLazyCfLoading.class.getSimpleName();
private static final String TIMEOUT_KEY = "hbase.%s.timeout";
/** A soft test timeout; duration of the test, as such, depends on number of keys to put. */
private static final int DEFAULT_TIMEOUT_MINUTES = 10;
private static final int NUM_SERVERS = 1;
/** Set regions per server low to ensure splits happen during test */
private static final int REGIONS_PER_SERVER = 3;
private static final int KEYS_TO_WRITE_PER_SERVER = 20000;
private static final int WRITER_THREADS = 10;
private static final int WAIT_BETWEEN_SCANS_MS = 1000;
private static final Log LOG = LogFactory.getLog(IntegrationTestLazyCfLoading.class);
private IntegrationTestingUtility util = new IntegrationTestingUtility();
private final DataGenerator dataGen = new DataGenerator();
/** Custom LoadTestDataGenerator. Uses key generation and verification from
* LoadTestKVGenerator. Creates 3 column families; one with an integer column to
* filter on, the 2nd one with an integer column that matches the first integer column (for
* test-specific verification), and byte[] value that is used for general verification; and
* the third one with just the value.
*/
private static class DataGenerator extends LoadTestDataGenerator {
private static final int MIN_DATA_SIZE = 4096;
private static final int MAX_DATA_SIZE = 65536;
public static final byte[] ESSENTIAL_CF = Bytes.toBytes("essential");
public static final byte[] JOINED_CF1 = Bytes.toBytes("joined");
public static final byte[] JOINED_CF2 = Bytes.toBytes("joined2");
public static final byte[] FILTER_COLUMN = Bytes.toBytes("filter");
public static final byte[] VALUE_COLUMN = Bytes.toBytes("val");
public static final long ACCEPTED_VALUE = 1L;
private static final Map<byte[], byte[][]> columnMap = new TreeMap<byte[], byte[][]>(
Bytes.BYTES_COMPARATOR);
private final AtomicLong expectedNumberOfKeys = new AtomicLong(0);
private final AtomicLong totalNumberOfKeys = new AtomicLong(0);
public DataGenerator() {
super(MIN_DATA_SIZE, MAX_DATA_SIZE);
columnMap.put(ESSENTIAL_CF, new byte[][] { FILTER_COLUMN });
columnMap.put(JOINED_CF1, new byte[][] { FILTER_COLUMN, VALUE_COLUMN });
columnMap.put(JOINED_CF2, new byte[][] { VALUE_COLUMN });
}
public long getExpectedNumberOfKeys() {
return expectedNumberOfKeys.get();
}
public long getTotalNumberOfKeys() {
return totalNumberOfKeys.get();
}
@Override
public byte[] getDeterministicUniqueKey(long keyBase) {
return LoadTestKVGenerator.md5PrefixedKey(keyBase).getBytes();
}
@Override
public byte[][] getColumnFamilies() {
return columnMap.keySet().toArray(new byte[columnMap.size()][]);
}
@Override
public byte[][] generateColumnsForCf(byte[] rowKey, byte[] cf) {
return columnMap.get(cf);
}
@Override
public byte[] generateValue(byte[] rowKey, byte[] cf, byte[] column) {
if (Bytes.BYTES_COMPARATOR.compare(column, FILTER_COLUMN) == 0) {
// Random deterministic way to make some values "on" and others "off" for filters.
long value = Long.parseLong(Bytes.toString(rowKey, 0, 4), 16) & ACCEPTED_VALUE;
if (Bytes.BYTES_COMPARATOR.compare(cf, ESSENTIAL_CF) == 0) {
totalNumberOfKeys.incrementAndGet();
if (value == ACCEPTED_VALUE) {
expectedNumberOfKeys.incrementAndGet();
}
}
return Bytes.toBytes(value);
} else if (Bytes.BYTES_COMPARATOR.compare(column, VALUE_COLUMN) == 0) {
return kvGenerator.generateRandomSizeValue(rowKey, cf, column);
}
String error = "Unknown column " + Bytes.toString(column);
assert false : error;
throw new InvalidParameterException(error);
}
@Override
public boolean verify(byte[] rowKey, byte[] cf, byte[] column, byte[] value) {
if (Bytes.BYTES_COMPARATOR.compare(column, FILTER_COLUMN) == 0) {
// Relies on the filter from getScanFilter being used.
return Bytes.toLong(value) == ACCEPTED_VALUE;
} else if (Bytes.BYTES_COMPARATOR.compare(column, VALUE_COLUMN) == 0) {
return LoadTestKVGenerator.verify(value, rowKey, cf, column);
}
return false; // some bogus value from read, we don't expect any such thing.
}
@Override
public boolean verify(byte[] rowKey, byte[] cf, Set<byte[]> columnSet) {
return columnMap.get(cf).length == columnSet.size();
}
public Filter getScanFilter() {
SingleColumnValueFilter scf = new SingleColumnValueFilter(ESSENTIAL_CF, FILTER_COLUMN,
CompareFilter.CompareOp.EQUAL, Bytes.toBytes(ACCEPTED_VALUE));
scf.setFilterIfMissing(true);
return scf;
}
};
@Before
public void setUp() throws Exception {
LOG.info("Initializing cluster with " + NUM_SERVERS + " servers");
util.initializeCluster(NUM_SERVERS);
LOG.info("Done initializing cluster");
createTable();
}
private void createTable() throws Exception {
deleteTable();
LOG.info("Creating table");
HTableDescriptor htd = new HTableDescriptor(Bytes.toBytes(TABLE_NAME));
for (byte[] cf : dataGen.getColumnFamilies()) {
htd.addFamily(new HColumnDescriptor(cf));
}
int serverCount = util.getHBaseClusterInterface().getClusterStatus().getServersSize();
byte[][] splits = new RegionSplitter.HexStringSplit().split(serverCount * REGIONS_PER_SERVER);
util.getHBaseAdmin().createTable(htd, splits);
LOG.info("Created table");
}
private void deleteTable() throws Exception {
if (util.getHBaseAdmin().tableExists(TABLE_NAME)) {
LOG.info("Deleting table");
if (!util.getHBaseAdmin().isTableDisabled(TABLE_NAME)) {
util.getHBaseAdmin().disableTable(TABLE_NAME);
}
util.getHBaseAdmin().deleteTable(TABLE_NAME);
LOG.info("Deleted table");
}
}
@After
public void tearDown() throws Exception {
deleteTable();
LOG.info("Restoring the cluster");
util.restoreCluster();
LOG.info("Done restoring the cluster");
}
@Test
public void testReadersAndWriters() throws Exception {
Configuration conf = util.getConfiguration();
String timeoutKey = String.format(TIMEOUT_KEY, this.getClass().getSimpleName());
long maxRuntime = conf.getLong(timeoutKey, DEFAULT_TIMEOUT_MINUTES);
long serverCount = util.getHBaseClusterInterface().getClusterStatus().getServersSize();
long keysToWrite = serverCount * KEYS_TO_WRITE_PER_SERVER;
HTable table = new HTable(conf, Bytes.toBytes(TABLE_NAME));
// Create multi-threaded writer and start it. We write multiple columns/CFs and verify
// their integrity, therefore multi-put is necessary.
MultiThreadedWriter writer =
new MultiThreadedWriter(dataGen, conf, Bytes.toBytes(TABLE_NAME));
writer.setMultiPut(true);
LOG.info("Starting writer; the number of keys to write is " + keysToWrite);
writer.start(1, keysToWrite, WRITER_THREADS);
// Now, do scans.
long now = EnvironmentEdgeManager.currentTimeMillis();
long timeLimit = now + (maxRuntime * 60000);
boolean isWriterDone = false;
while (now < timeLimit && !isWriterDone) {
LOG.info("Starting the scan; wrote approximately "
+ dataGen.getTotalNumberOfKeys() + " keys");
isWriterDone = writer.isDone();
if (isWriterDone) {
LOG.info("Scanning full result, writer is done");
}
Scan scan = new Scan();
for (byte[] cf : dataGen.getColumnFamilies()) {
scan.addFamily(cf);
}
scan.setFilter(dataGen.getScanFilter());
scan.setLoadColumnFamiliesOnDemand(true);
// The number of keys we can expect from scan - lower bound (before scan).
// Not a strict lower bound - writer knows nothing about filters, so we report
// this from generator. Writer might have generated the value but not put it yet.
long onesGennedBeforeScan = dataGen.getExpectedNumberOfKeys();
long startTs = EnvironmentEdgeManager.currentTimeMillis();
ResultScanner results = table.getScanner(scan);
long resultCount = 0;
Result result = null;
// Verify and count the results.
while ((result = results.next()) != null) {
boolean isOk = writer.verifyResultAgainstDataGenerator(result, true, true);
Assert.assertTrue("Failed to verify [" + Bytes.toString(result.getRow())+ "]", isOk);
++resultCount;
}
long timeTaken = EnvironmentEdgeManager.currentTimeMillis() - startTs;
// Verify the result count.
long onesGennedAfterScan = dataGen.getExpectedNumberOfKeys();
Assert.assertTrue("Read " + resultCount + " keys when at most " + onesGennedAfterScan
+ " were generated ", onesGennedAfterScan >= resultCount);
if (isWriterDone) {
Assert.assertTrue("Read " + resultCount + " keys; the writer is done and "
+ onesGennedAfterScan + " keys were generated", onesGennedAfterScan == resultCount);
} else if (onesGennedBeforeScan * 0.9 > resultCount) {
LOG.warn("Read way too few keys (" + resultCount + "/" + onesGennedBeforeScan
+ ") - there might be a problem, or the writer might just be slow");
}
LOG.info("Scan took " + timeTaken + "ms");
if (!isWriterDone) {
Thread.sleep(WAIT_BETWEEN_SCANS_MS);
now = EnvironmentEdgeManager.currentTimeMillis();
}
}
Assert.assertEquals("There are write failures", 0, writer.getNumWriteFailures());
Assert.assertTrue("Writer is not done", isWriterDone);
// Assert.fail("Boom!");
}
}