/** * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.hadoop.hbase.regionserver; import java.io.IOException; import java.util.ArrayList; import java.util.List; import org.junit.Assert; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.hbase.KeyValue; import org.apache.hadoop.hbase.LargeTests; import org.apache.hadoop.hbase.MediumTests; import org.apache.hadoop.hbase.HBaseConfiguration; import org.apache.hadoop.hbase.HBaseTestingUtility; import org.apache.hadoop.hbase.HRegionInfo; import org.apache.hadoop.hbase.HTableDescriptor; import org.apache.hadoop.hbase.HColumnDescriptor; import org.apache.hadoop.hbase.MiniHBaseCluster; import org.apache.hadoop.hbase.client.HTable; import org.apache.hadoop.hbase.client.Put; import org.apache.hadoop.hbase.client.Scan; import org.apache.hadoop.hbase.client.Result; import org.apache.hadoop.hbase.client.ResultScanner; import org.apache.hadoop.hbase.filter.SingleColumnValueExcludeFilter; import org.apache.hadoop.hbase.filter.SingleColumnValueFilter; import org.apache.hadoop.hbase.filter.FilterList; import org.apache.hadoop.hbase.filter.CompareFilter; import org.apache.hadoop.hbase.filter.RowFilter; import org.apache.hadoop.hbase.filter.RegexStringComparator; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.junit.Test; import org.junit.experimental.categories.Category; /** * Test performance improvement of joined scanners optimization: * https://issues.apache.org/jira/browse/HBASE-5416 */ @Category(LargeTests.class) public class TestJoinedScanners { static final Log LOG = LogFactory.getLog(TestJoinedScanners.class); private static final HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(); private static final String DIR = TEST_UTIL.getDataTestDir("TestJoinedScanners").toString(); private static final byte[] tableName = Bytes.toBytes("testTable"); private static final byte[] cf_essential = Bytes.toBytes("essential"); private static final byte[] cf_joined = Bytes.toBytes("joined"); private static final byte[] col_name = Bytes.toBytes("a"); private static final byte[] flag_yes = Bytes.toBytes("Y"); private static final byte[] flag_no = Bytes.toBytes("N"); @Test public void testJoinedScanners() throws Exception { String dataNodeHosts[] = new String[] { "host1", "host2", "host3" }; int regionServersCount = 3; HBaseTestingUtility htu = new HBaseTestingUtility(); final int DEFAULT_BLOCK_SIZE = 1024*1024; htu.getConfiguration().setLong("dfs.block.size", DEFAULT_BLOCK_SIZE); htu.getConfiguration().setInt("dfs.replication", 1); htu.getConfiguration().setLong("hbase.hregion.max.filesize", 322122547200L); MiniHBaseCluster cluster = null; try { cluster = htu.startMiniCluster(1, regionServersCount, dataNodeHosts); byte [][] families = {cf_essential, cf_joined}; HTable ht = htu.createTable( Bytes.toBytes(this.getClass().getSimpleName()), families); long rows_to_insert = 10000; int insert_batch = 20; int flag_percent = 1; int large_bytes = 128 * 1024; long time = System.nanoTime(); LOG.info("Make " + Long.toString(rows_to_insert) + " rows, total size = " + Float.toString(rows_to_insert * large_bytes / 1024 / 1024) + " MB"); byte [] val_large = new byte[large_bytes]; List<Put> puts = new ArrayList<Put>(); for (long i = 0; i < rows_to_insert; i++) { Put put = new Put(Bytes.toBytes(Long.toString (i))); if (i % 100 <= flag_percent) { put.add(cf_essential, col_name, flag_yes); } else { put.add(cf_essential, col_name, flag_no); } put.add(cf_joined, col_name, val_large); puts.add(put); if (puts.size() >= insert_batch) { ht.put(puts); puts.clear(); } } if (puts.size() >= 0) { ht.put(puts); puts.clear(); } LOG.info("Data generated in " + Double.toString((System.nanoTime() - time) / 1000000000.0) + " seconds"); boolean slow = true; for (int i = 0; i < 20; ++i) { runScanner(ht, slow); slow = !slow; } ht.close(); } finally { if (cluster != null) { htu.shutdownMiniCluster(); } } } private void runScanner(HTable table, boolean slow) throws Exception { long time = System.nanoTime(); Scan scan = new Scan(); scan.addColumn(cf_essential, col_name); scan.addColumn(cf_joined, col_name); SingleColumnValueFilter filter = new SingleColumnValueFilter( cf_essential, col_name, CompareFilter.CompareOp.EQUAL, flag_yes); filter.setFilterIfMissing(true); scan.setFilter(filter); scan.setLoadColumnFamiliesOnDemand(!slow); ResultScanner result_scanner = table.getScanner(scan); Result res; long rows_count = 0; while ((res = result_scanner.next()) != null) { rows_count++; } double timeSec = (System.nanoTime() - time) / 1000000000.0; result_scanner.close(); LOG.info((slow ? "Slow" : "Joined") + " scanner finished in " + Double.toString(timeSec) + " seconds, got " + Long.toString(rows_count/2) + " rows"); } private static HRegion initHRegion(byte[] tableName, byte[] startKey, byte[] stopKey, String callingMethod, Configuration conf, byte[]... families) throws IOException { HTableDescriptor htd = new HTableDescriptor(tableName); for(byte [] family : families) { htd.addFamily(new HColumnDescriptor(family)); } HRegionInfo info = new HRegionInfo(htd.getName(), startKey, stopKey, false); Path path = new Path(DIR + callingMethod); FileSystem fs = FileSystem.get(conf); if (fs.exists(path)) { if (!fs.delete(path, true)) { throw new IOException("Failed delete of " + path); } } return HRegion.createHRegion(info, path, conf, htd); } }