/*
*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.regionserver;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.HBaseTestCase;
import org.apache.hadoop.hbase.HBaseTestingUtility;
import org.apache.hadoop.hbase.HColumnDescriptor;
import org.apache.hadoop.hbase.HRegionInfo;
import org.apache.hadoop.hbase.HTableDescriptor;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.MediumTests;
import org.apache.hadoop.hbase.client.Delete;
import org.apache.hadoop.hbase.client.Get;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.io.hfile.BlockCache;
import org.apache.hadoop.hbase.io.hfile.CacheConfig;
import org.apache.hadoop.hbase.io.hfile.HFile;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.util.EnvironmentEdgeManagerTestHelper;
import org.junit.Test;
import org.junit.experimental.categories.Category;
@Category(MediumTests.class)
public class TestBlocksRead extends HBaseTestCase {
static final Log LOG = LogFactory.getLog(TestBlocksRead.class);
static final BloomType[] BLOOM_TYPE = new BloomType[] { BloomType.ROWCOL,
BloomType.ROW, BloomType.NONE };
private static BlockCache blockCache;
private HBaseConfiguration getConf() {
HBaseConfiguration conf = new HBaseConfiguration();
// disable compactions in this test.
conf.setInt("hbase.hstore.compactionThreshold", 10000);
return conf;
}
HRegion region = null;
private HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
private final String DIR = TEST_UTIL.getDataTestDir("TestBlocksRead").toString();
/**
* @see org.apache.hadoop.hbase.HBaseTestCase#setUp()
*/
@SuppressWarnings("deprecation")
@Override
protected void setUp() throws Exception {
super.setUp();
}
@SuppressWarnings("deprecation")
@Override
protected void tearDown() throws Exception {
super.tearDown();
EnvironmentEdgeManagerTestHelper.reset();
}
/**
* Callers must afterward call {@link HRegion#closeHRegion(HRegion)}
* @param tableName
* @param callingMethod
* @param conf
* @param family
* @throws IOException
* @return created and initialized region.
*/
private HRegion initHRegion(byte[] tableName, String callingMethod,
HBaseConfiguration conf, String family) throws IOException {
HTableDescriptor htd = new HTableDescriptor(tableName);
HColumnDescriptor familyDesc;
for (int i = 0; i < BLOOM_TYPE.length; i++) {
BloomType bloomType = BLOOM_TYPE[i];
familyDesc = new HColumnDescriptor(family + "_" + bloomType)
.setBlocksize(1)
.setBloomFilterType(BLOOM_TYPE[i]);
htd.addFamily(familyDesc);
}
HRegionInfo info = new HRegionInfo(htd.getName(), null, null, false);
Path path = new Path(DIR + callingMethod);
HRegion r = HRegion.createHRegion(info, path, conf, htd);
blockCache = new CacheConfig(conf).getBlockCache();
return r;
}
private void putData(String family, String row, String col, long version)
throws IOException {
for (int i = 0; i < BLOOM_TYPE.length; i++) {
putData(Bytes.toBytes(family + "_" + BLOOM_TYPE[i]), row, col, version,
version);
}
}
// generates a value to put for a row/col/version.
private static byte[] genValue(String row, String col, long version) {
return Bytes.toBytes("Value:" + row + "#" + col + "#" + version);
}
private void putData(byte[] cf, String row, String col, long versionStart,
long versionEnd) throws IOException {
byte columnBytes[] = Bytes.toBytes(col);
Put put = new Put(Bytes.toBytes(row));
put.setWriteToWAL(false);
for (long version = versionStart; version <= versionEnd; version++) {
put.add(cf, columnBytes, version, genValue(row, col, version));
}
region.put(put);
}
private KeyValue[] getData(String family, String row, List<String> columns,
int expBlocks) throws IOException {
return getData(family, row, columns, expBlocks, expBlocks, expBlocks);
}
private KeyValue[] getData(String family, String row, List<String> columns,
int expBlocksRowCol, int expBlocksRow, int expBlocksNone)
throws IOException {
int[] expBlocks = new int[] { expBlocksRowCol, expBlocksRow, expBlocksNone };
KeyValue[] kvs = null;
for (int i = 0; i < BLOOM_TYPE.length; i++) {
BloomType bloomType = BLOOM_TYPE[i];
byte[] cf = Bytes.toBytes(family + "_" + bloomType);
long blocksStart = getBlkAccessCount(cf);
Get get = new Get(Bytes.toBytes(row));
for (String column : columns) {
get.addColumn(cf, Bytes.toBytes(column));
}
kvs = region.get(get).raw();
long blocksEnd = getBlkAccessCount(cf);
if (expBlocks[i] != -1) {
assertEquals("Blocks Read Check for Bloom: " + bloomType, expBlocks[i],
blocksEnd - blocksStart);
}
System.out.println("Blocks Read for Bloom: " + bloomType + " = "
+ (blocksEnd - blocksStart) + "Expected = " + expBlocks[i]);
}
return kvs;
}
private KeyValue[] getData(String family, String row, String column,
int expBlocks) throws IOException {
return getData(family, row, Arrays.asList(column), expBlocks, expBlocks,
expBlocks);
}
private KeyValue[] getData(String family, String row, String column,
int expBlocksRowCol, int expBlocksRow, int expBlocksNone)
throws IOException {
return getData(family, row, Arrays.asList(column), expBlocksRowCol,
expBlocksRow, expBlocksNone);
}
private void deleteFamily(String family, String row, long version)
throws IOException {
Delete del = new Delete(Bytes.toBytes(row));
del.deleteFamily(Bytes.toBytes(family + "_ROWCOL"), version);
del.deleteFamily(Bytes.toBytes(family + "_ROW"), version);
del.deleteFamily(Bytes.toBytes(family + "_NONE"), version);
region.delete(del, true);
}
private static void verifyData(KeyValue kv, String expectedRow,
String expectedCol, long expectedVersion) {
assertEquals("RowCheck", expectedRow, Bytes.toString(kv.getRow()));
assertEquals("ColumnCheck", expectedCol, Bytes.toString(kv.getQualifier()));
assertEquals("TSCheck", expectedVersion, kv.getTimestamp());
assertEquals("ValueCheck",
Bytes.toString(genValue(expectedRow, expectedCol, expectedVersion)),
Bytes.toString(kv.getValue()));
}
private static long getBlkAccessCount(byte[] cf) {
return HFile.dataBlockReadCnt.get();
}
private static long getBlkCount() {
return blockCache.getBlockCount();
}
/**
* Test # of blocks read for some simple seek cases.
*
* @throws Exception
*/
@Test
public void testBlocksRead() throws Exception {
byte[] TABLE = Bytes.toBytes("testBlocksRead");
String FAMILY = "cf1";
KeyValue kvs[];
HBaseConfiguration conf = getConf();
this.region = initHRegion(TABLE, getName(), conf, FAMILY);
try {
putData(FAMILY, "row", "col1", 1);
putData(FAMILY, "row", "col2", 2);
putData(FAMILY, "row", "col3", 3);
putData(FAMILY, "row", "col4", 4);
putData(FAMILY, "row", "col5", 5);
putData(FAMILY, "row", "col6", 6);
putData(FAMILY, "row", "col7", 7);
region.flushcache();
// Expected block reads: 1
// The top block has the KV we are
// interested. So only 1 seek is needed.
kvs = getData(FAMILY, "row", "col1", 1);
assertEquals(1, kvs.length);
verifyData(kvs[0], "row", "col1", 1);
// Expected block reads: 2
// The top block and next block has the KVs we are
// interested. So only 2 seek is needed.
kvs = getData(FAMILY, "row", Arrays.asList("col1", "col2"), 2);
assertEquals(2, kvs.length);
verifyData(kvs[0], "row", "col1", 1);
verifyData(kvs[1], "row", "col2", 2);
// Expected block reads: 3
// The first 2 seeks is to find out col2. [HBASE-4443]
// One additional seek for col3
// So 3 seeks are needed.
kvs = getData(FAMILY, "row", Arrays.asList("col2", "col3"), 3);
assertEquals(2, kvs.length);
verifyData(kvs[0], "row", "col2", 2);
verifyData(kvs[1], "row", "col3", 3);
// Expected block reads: 2. [HBASE-4443]
kvs = getData(FAMILY, "row", Arrays.asList("col5"), 2);
assertEquals(1, kvs.length);
verifyData(kvs[0], "row", "col5", 5);
} finally {
HRegion.closeHRegion(this.region);
this.region = null;
}
}
/**
* Test # of blocks read (targetted at some of the cases Lazy Seek optimizes).
*
* @throws Exception
*/
@Test
public void testLazySeekBlocksRead() throws Exception {
byte[] TABLE = Bytes.toBytes("testLazySeekBlocksRead");
String FAMILY = "cf1";
KeyValue kvs[];
HBaseConfiguration conf = getConf();
this.region = initHRegion(TABLE, getName(), conf, FAMILY);
try {
// File 1
putData(FAMILY, "row", "col1", 1);
putData(FAMILY, "row", "col2", 2);
region.flushcache();
// File 2
putData(FAMILY, "row", "col1", 3);
putData(FAMILY, "row", "col2", 4);
region.flushcache();
// Expected blocks read: 1.
// File 2's top block is also the KV we are
// interested. So only 1 seek is needed.
kvs = getData(FAMILY, "row", Arrays.asList("col1"), 1);
assertEquals(1, kvs.length);
verifyData(kvs[0], "row", "col1", 3);
// Expected blocks read: 2
// File 2's top block has the "col1" KV we are
// interested. We also need "col2" which is in a block
// of its own. So, we need that block as well.
kvs = getData(FAMILY, "row", Arrays.asList("col1", "col2"), 2);
assertEquals(2, kvs.length);
verifyData(kvs[0], "row", "col1", 3);
verifyData(kvs[1], "row", "col2", 4);
// File 3: Add another column
putData(FAMILY, "row", "col3", 5);
region.flushcache();
// Expected blocks read: 1
// File 3's top block has the "col3" KV we are
// interested. So only 1 seek is needed.
kvs = getData(FAMILY, "row", "col3", 1);
assertEquals(1, kvs.length);
verifyData(kvs[0], "row", "col3", 5);
// Get a column from older file.
// For ROWCOL Bloom filter: Expected blocks read: 1.
// For ROW Bloom filter: Expected blocks read: 2.
// For NONE Bloom filter: Expected blocks read: 2.
kvs = getData(FAMILY, "row", Arrays.asList("col1"), 1, 2, 2);
assertEquals(1, kvs.length);
verifyData(kvs[0], "row", "col1", 3);
// File 4: Delete the entire row.
deleteFamily(FAMILY, "row", 6);
region.flushcache();
// For ROWCOL Bloom filter: Expected blocks read: 2.
// For ROW Bloom filter: Expected blocks read: 3.
// For NONE Bloom filter: Expected blocks read: 3.
kvs = getData(FAMILY, "row", "col1", 2, 3, 3);
assertEquals(0, kvs.length);
kvs = getData(FAMILY, "row", "col2", 3, 4, 4);
assertEquals(0, kvs.length);
kvs = getData(FAMILY, "row", "col3", 2);
assertEquals(0, kvs.length);
kvs = getData(FAMILY, "row", Arrays.asList("col1", "col2", "col3"), 4);
assertEquals(0, kvs.length);
// File 5: Delete
deleteFamily(FAMILY, "row", 10);
region.flushcache();
// File 6: some more puts, but with timestamps older than the
// previous delete.
putData(FAMILY, "row", "col1", 7);
putData(FAMILY, "row", "col2", 8);
putData(FAMILY, "row", "col3", 9);
region.flushcache();
// Baseline expected blocks read: 8. [HBASE-4532]
kvs = getData(FAMILY, "row", Arrays.asList("col1", "col2", "col3"), 5);
assertEquals(0, kvs.length);
// File 7: Put back new data
putData(FAMILY, "row", "col1", 11);
putData(FAMILY, "row", "col2", 12);
putData(FAMILY, "row", "col3", 13);
region.flushcache();
// Expected blocks read: 5. [HBASE-4585]
kvs = getData(FAMILY, "row", Arrays.asList("col1", "col2", "col3"), 5);
assertEquals(3, kvs.length);
verifyData(kvs[0], "row", "col1", 11);
verifyData(kvs[1], "row", "col2", 12);
verifyData(kvs[2], "row", "col3", 13);
} finally {
HRegion.closeHRegion(this.region);
this.region = null;
}
}
/**
* Test # of blocks read to ensure disabling cache-fill on Scan works.
* @throws Exception
*/
@Test
public void testBlocksStoredWhenCachingDisabled() throws Exception {
byte [] TABLE = Bytes.toBytes("testBlocksReadWhenCachingDisabled");
String FAMILY = "cf1";
HBaseConfiguration conf = getConf();
this.region = initHRegion(TABLE, getName(), conf, FAMILY);
try {
putData(FAMILY, "row", "col1", 1);
putData(FAMILY, "row", "col2", 2);
region.flushcache();
// Execute a scan with caching turned off
// Expected blocks stored: 0
long blocksStart = getBlkCount();
Scan scan = new Scan();
scan.setCacheBlocks(false);
RegionScanner rs = region.getScanner(scan);
List<KeyValue> result = new ArrayList<KeyValue>(2);
rs.next(result);
assertEquals(2 * BLOOM_TYPE.length, result.size());
rs.close();
long blocksEnd = getBlkCount();
assertEquals(blocksStart, blocksEnd);
// Execute with caching turned on
// Expected blocks stored: 2
blocksStart = blocksEnd;
scan.setCacheBlocks(true);
rs = region.getScanner(scan);
result = new ArrayList<KeyValue>(2);
rs.next(result);
assertEquals(2 * BLOOM_TYPE.length, result.size());
rs.close();
blocksEnd = getBlkCount();
assertEquals(2 * BLOOM_TYPE.length, blocksEnd - blocksStart);
} finally {
HRegion.closeHRegion(this.region);
this.region = null;
}
}
@Test
public void testLazySeekBlocksReadWithDelete() throws Exception {
byte[] TABLE = Bytes.toBytes("testLazySeekBlocksReadWithDelete");
String FAMILY = "cf1";
KeyValue kvs[];
HBaseConfiguration conf = getConf();
this.region = initHRegion(TABLE, getName(), conf, FAMILY);
try {
deleteFamily(FAMILY, "row", 200);
for (int i = 0; i < 100; i++) {
putData(FAMILY, "row", "col" + i, i);
}
putData(FAMILY, "row", "col99", 201);
region.flushcache();
kvs = getData(FAMILY, "row", Arrays.asList("col0"), 2);
assertEquals(0, kvs.length);
kvs = getData(FAMILY, "row", Arrays.asList("col99"), 2);
assertEquals(1, kvs.length);
verifyData(kvs[0], "row", "col99", 201);
} finally {
HRegion.closeHRegion(this.region);
this.region = null;
}
}
}