/** * Copyright 2011 The Apache Software Foundation * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.hadoop.hbase.index.util; import java.io.EOFException; import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; import java.util.Comparator; import java.util.List; import java.util.Map; import java.util.Map.Entry; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.PathFilter; import org.apache.hadoop.hbase.HColumnDescriptor; import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.HRegionInfo; import org.apache.hadoop.hbase.HTableDescriptor; import org.apache.hadoop.hbase.KeyValue; import org.apache.hadoop.hbase.TableNotFoundException; import org.apache.hadoop.hbase.client.Delete; import org.apache.hadoop.hbase.client.HBaseAdmin; import org.apache.hadoop.hbase.client.Put; import org.apache.hadoop.hbase.index.ColumnQualifier; import org.apache.hadoop.hbase.index.ColumnQualifier.ValueType; import org.apache.hadoop.hbase.index.Constants; import org.apache.hadoop.hbase.index.IndexSpecification; import org.apache.hadoop.hbase.index.IndexedHTableDescriptor; import org.apache.hadoop.hbase.index.ValuePartition; import org.apache.hadoop.hbase.index.coprocessor.master.IndexMasterObserver; import org.apache.hadoop.hbase.regionserver.HRegion; import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.hbase.util.FSUtils; public class IndexUtils { private static final Log LOG = LogFactory.getLog(IndexUtils.class); private static final String DOT_TABLEINFO = ".tableinfo"; public static final String TABLE_INPUT_COLS = "table.columns.index"; /** * Utility method to get the name of the index table when given the name of the actual table. * @param tableName * @return index table name */ public static String getIndexTableName(String tableName) { // TODO The suffix for the index table is fixed now. Do we allow to make this configurable? // We can handle things in byte[] way? return tableName + Constants.INDEX_TABLE_SUFFIX; } /** * Utility method to get the name of the index table when given the name of the actual table. * @param tableName * @return index table name */ public static String getIndexTableName(byte[] tableName) { return getIndexTableName(Bytes.toString(tableName)); } /** * Tells whether the passed table is a secondary index table or a normal table. * @param tableName * @return */ public static boolean isIndexTable(String tableName) { return tableName.endsWith(Constants.INDEX_TABLE_SUFFIX); } /** * Tells whether the passed table is a secondary index table or a normal table. * @param tableName * @return */ public static boolean isIndexTable(byte[] tableName) { return isIndexTable(Bytes.toString(tableName)); } /** * Checks whether the passed table is a catalog table or not * @param tableName * @return true when the passed table is a catalog table. */ public static boolean isCatalogTable(byte[] tableName) { if (Bytes.equals(tableName, HConstants.ROOT_TABLE_NAME) || Bytes.equals(tableName, HConstants.META_TABLE_NAME)) { return true; } return false; } /** * Returns the max length allowed for the index name. * @return */ public static int getMaxIndexNameLength() { // TODO we need to allow customers to configure this value. return Constants.DEF_MAX_INDEX_NAME_LENGTH; } /** * Returns the main table name. * @param index table name * @return */ public static String extractActualTableName(String indexTableName) { int endIndex = indexTableName.length() - Constants.INDEX_TABLE_SUFFIX.length(); return indexTableName.substring(0, endIndex); } public static byte[] changeValueAccToDataType(byte[] value, ValueType valueType) { byte[] valueArr = new byte[value.length]; System.arraycopy(value, 0, valueArr, 0, value.length); if (valueArr.length == 0) return valueArr; switch (valueType) { case String: case Char: break; case Float: float f = Bytes.toFloat(valueArr); if (f > 0) { valueArr[0] ^= (1 << 7); } else { valueArr[0] ^= 0xff; valueArr[1] ^= 0xff; valueArr[2] ^= 0xff; valueArr[3] ^= 0xff; } break; case Double: double d = Bytes.toDouble(valueArr); if (d > 0) { valueArr[0] ^= (1 << 7); } else { for (int i = 0; i < 8; i++) { valueArr[i] ^= 0xff; } } break; case Int: case Long: case Short: case Byte: valueArr[0] ^= (1 << 7); break; } return valueArr; } // TODO check this... Is this ok with all cases? // No.. for -ve issues... Will see later.. public static byte[] incrementValue(byte[] value, boolean copy) { byte[] newValue = new byte[value.length]; if (copy) { System.arraycopy(value, 0, newValue, 0, newValue.length); } else { newValue = value; } for (int i = newValue.length - 1; i >= 0; i--) { byte b = newValue[i]; b = (byte) (b + 1); if (b == 0) { newValue[i] = 0; } else { newValue[i] = b; break; } } return newValue; } public static String getActualTableNameFromIndexTableName(String indexTableName) { String split[] = indexTableName.split(Constants.INDEX_TABLE_SUFFIX); return split[0]; } public static IndexedHTableDescriptor getIndexedHTableDescriptor(byte[] tableName, Configuration conf) throws IOException { IndexedHTableDescriptor tableDescriptor = null; FSDataInputStream fsDataInputStream = null; try { FileSystem fs = FSUtils.getCurrentFileSystem(conf); Path path = getTableInfoFilePath(conf, tableName, fs); if (null != path) { fsDataInputStream = fs.open(path); tableDescriptor = new IndexedHTableDescriptor(); tableDescriptor.readFields(fsDataInputStream); } } catch (IOException e) { if (e instanceof EOFException) { if (LOG.isDebugEnabled()) { LOG.debug("Error reading data from the table descriptor . Got " + e + " exception"); } tableDescriptor = null; } else { throw e; } } finally { if (fsDataInputStream != null) { try { fsDataInputStream.close(); } catch (IOException e) { LOG.error("IOException closing the input stream "); } } } return tableDescriptor; } private static Path getTableInfoFilePath(Configuration conf, byte[] tableName, FileSystem fs) throws IOException { Path path = FSUtils.getTablePath(FSUtils.getRootDir(conf), tableName); FileStatus[] status = FSUtils.listStatus(fs, path, new PathFilter() { @Override public boolean accept(Path p) { // Accept any file that starts with TABLEINFO_NAME return p.getName().startsWith(DOT_TABLEINFO); } }); if (status == null || status.length < 1) { return null; } Arrays.sort(status, new FileStatusFileNameComparator()); if (status.length > 1) { // Clean away old versions of .tableinfo for (int i = 1; i < status.length; i++) { Path p = status[i].getPath(); // Clean up old versions if (!fs.delete(p, false)) { LOG.warn("Failed cleanup of " + p); } else { if (LOG.isDebugEnabled()) { LOG.debug("Cleaned up old tableinfo file " + p); } } } } if (null != status[0]) { path = status[0].getPath(); } return path; } /** * Compare {@link FileStatus} instances by {@link Path#getName()}. Returns in reverse order. */ static class FileStatusFileNameComparator implements Comparator<FileStatus> { @Override public int compare(FileStatus left, FileStatus right) { return -left.compareTo(right); } } public static Put prepareIndexPut(Put userPut, IndexSpecification index, HRegion indexRegion) throws IOException { byte[] indexRegionStartKey = indexRegion.getStartKey(); return prepareIndexPut(userPut, index, indexRegionStartKey); } public static Delete prepareIndexDelete(Delete userDelete, IndexSpecification index, byte[] indexRegionStartKey) throws IOException { ByteArrayBuilder indexRow = IndexUtils.getIndexRowKeyHeader(index, indexRegionStartKey, userDelete.getRow()); boolean update = false; for (ColumnQualifier cq : index.getIndexColumns()) { KeyValue kvFound = null; for (Entry<byte[], List<KeyValue>> entry : userDelete.getFamilyMap().entrySet()) { for (KeyValue kv : entry.getValue()) { if (Bytes.equals(cq.getColumnFamily(), kv.getFamily()) && Bytes.equals(cq.getQualifier(), kv.getQualifier())) { kvFound = kv; update = true; break; } } } if (kvFound == null) { indexRow.position(indexRow.position() + cq.getMaxValueLength()); } else { IndexUtils.updateRowKeyForKV(cq, kvFound, indexRow); } } if (update) { // Append the actual row key at the end of the index row key. indexRow.put(userDelete.getRow()); Delete idxDelete = new Delete(indexRow.array()); idxDelete.deleteColumn(Constants.IDX_COL_FAMILY, Constants.IDX_COL_QUAL, userDelete.getTimeStamp()); idxDelete.setWriteToWAL(false); return idxDelete; } return null; } // Default access specifier for the UT public static Put prepareIndexPut(Put userPut, IndexSpecification index, byte[] indexRegionStartKey) throws IOException { long tsForIndexTabPut = 0; boolean bypass = true; for (ColumnQualifier c : index.getIndexColumns()) { List<KeyValue> values = userPut.get(c.getColumnFamily(), c.getQualifier()); if (null != values && values.size() > 0) { bypass = false; break; } } if (bypass) { // When this Put having no values for all the column in this index just skip this Put // from adding corresponding entry in the index table. return null; } byte[] primaryRowKey = userPut.getRow(); ByteArrayBuilder indexRowKey = getIndexRowKeyHeader(index, indexRegionStartKey, primaryRowKey); // STEP 3 : Adding the column value + padding for each of the columns in // the index. for (ColumnQualifier indexCQ : index.getIndexColumns()) { List<KeyValue> values = userPut.get(indexCQ.getColumnFamily(), indexCQ.getQualifier()); if (values == null || values.isEmpty()) { // There is no value provided for the column. Going with the padding // All the bytes in the byte[] 'indexRowKey' will be 0s already. // No need to put a 0 padding bytes. Just need to advance the position by col max value // length. indexRowKey.position(indexRowKey.position() + indexCQ.getMaxValueLength()); } else { // A put can contains diff version values for the same column. // We can consider the latest value only for the indexing. This needs to be documented. // TODO KeyValue kv = selectKVForIndexing(values); updateRowKeyForKV(indexCQ, kv, indexRowKey); if (tsForIndexTabPut < kv.getTimestamp()) { tsForIndexTabPut = kv.getTimestamp(); } } } // Remember the offset of rowkey and store it as value short rowKeyOffset = indexRowKey.position(); // STEP 4 : Adding the user table rowkey. indexRowKey.put(primaryRowKey); // Creating the value to be put into the index column // Last portion of index row key = [region start key length (2 bytes), offset of primary rowkey // in index rowkey (2 bytes)] ByteArrayBuilder indexColVal = ByteArrayBuilder.allocate(4); indexColVal.put(Bytes.toBytes((short) indexRegionStartKey.length)); indexColVal.put(Bytes.toBytes(rowKeyOffset)); Put idxPut = new Put(indexRowKey.array()); idxPut.add(Constants.IDX_COL_FAMILY, Constants.IDX_COL_QUAL, tsForIndexTabPut, indexColVal.array()); idxPut.setWriteToWAL(false); return idxPut; } private static KeyValue selectKVForIndexing(List<KeyValue> values) { KeyValue kv = null; long ts = HConstants.OLDEST_TIMESTAMP; for (KeyValue value : values) { // When the TS is same, then we need to consider the last KV // appearing in the KVList // as this will be added to the memstore with highest memstore TS. if (value.getTimestamp() >= ts) { kv = value; ts = value.getTimestamp(); } } return kv; } public static ByteArrayBuilder getIndexRowKeyHeader(IndexSpecification index, byte[] indexRegionStartKey, byte[] primaryRowKey) { /* * Format for the rowkey for index table [Startkey for the index region] + [one 0 byte] + [Index * name] + [Padding for the max index name] + [[index col value]+[padding for the max col value] * for each of the index col] + [user table row key] To know the reason for adding empty byte * array refert to HDP-1666 */ byte[] indexName = Bytes.toBytes(index.getName()); int totalValueLength = index.getTotalValueLength(); int maxIndexNameLength = IndexUtils.getMaxIndexNameLength(); int rowLength = indexRegionStartKey.length + maxIndexNameLength + totalValueLength + primaryRowKey.length + 1; ByteArrayBuilder row = ByteArrayBuilder.allocate(rowLength); // STEP 1 : Adding the startkey for the index region and single empty Byte. row.put(indexRegionStartKey); // one byte [0] to be added after the index region startkey. This is for the case of // entries added to the 1st region.Here the startkey of the region will be empty byte[] // So the 1st byte(s) which comes will be the index name and it might not fit into the // 1st region [As per the end key of that region] // Well all the bytes in the byte[] 'row' will be 0s already. No need to put a 0 byte // Just need to advance the position by 1 row.position(row.position() + 1); // STEP 2 : Adding the index name and the padding needed row.put(indexName); int padLength = maxIndexNameLength - indexName.length; // Well all the bytes in the byte[] 'row' will be 0s already. No need to put a 0 padding bytes // Just need to advance the position by padLength row.position(row.position() + padLength); return row; } public static void updateRowKeyForKV(ColumnQualifier indexCQ, KeyValue kv, ByteArrayBuilder indexRowKey) throws IOException { byte[] value = getValueFromKV(kv, indexCQ); int valuePadLength = indexCQ.getMaxValueLength() - value.length; if (valuePadLength < 0) { String errMsg = "The value length for the column " + indexCQ.getColumnFamilyString() + ":" + indexCQ.getQualifierString() + " is greater than the cofigured max value length : " + indexCQ.getMaxValueLength(); LOG.warn(errMsg); throw new IOException(errMsg); } indexRowKey.put(value); indexRowKey.position(indexRowKey.position() + valuePadLength); } private static byte[] getValueFromKV(KeyValue kv, ColumnQualifier indexCQ) { ValuePartition vp = indexCQ.getValuePartition(); byte value[] = null; if (vp != null) { value = vp.getPartOfValue(kv.getValue()); if (value != null) { value = IndexUtils.changeValueAccToDataType(value, indexCQ.getType()); } } else { LOG.trace("No offset or separator is mentioned. So just returning the value fetched from kv"); value = kv.getValue(); value = IndexUtils.changeValueAccToDataType(value, indexCQ.getType()); } return value; } public static byte[] getRowKeyFromKV(KeyValue kv) { byte[] row = kv.getRow(); // Row key of the index table entry = region startkey + index name + column value(s) // + actual table rowkey. // Every row in the index table will have exactly one KV in that. The value will be // 4 bytes. First 2 bytes specify length of the region start key bytes part in the // rowkey. Last 2 bytes specify the offset to the actual table rowkey part within the // index table rowkey. byte[] value = kv.getValue(); short actualRowKeyOffset = Bytes.toShort(value, 2); byte[] actualTableRowKey = new byte[row.length - actualRowKeyOffset]; System.arraycopy(row, actualRowKeyOffset, actualTableRowKey, 0, actualTableRowKey.length); return actualTableRowKey; } public static void createIndexTable(String userTable, Configuration conf, Map<String, List<String>> indexColumnFamily) throws IOException, InterruptedException, ClassNotFoundException { HBaseAdmin hbaseAdmin = new HBaseAdmin(conf); try { HTableDescriptor tableDescriptor = hbaseAdmin.getTableDescriptor(Bytes.toBytes(userTable)); Collection<HColumnDescriptor> existingColumnFamilies = tableDescriptor.getFamilies(); String input = conf.get(TABLE_INPUT_COLS); IndexedHTableDescriptor ihtd = parse(userTable, existingColumnFamilies, input, indexColumnFamily); // disable the table hbaseAdmin.disableTable(userTable); // This will create the index table. Also modifies the existing table htable descriptor. hbaseAdmin.modifyTable(Bytes.toBytes(userTable), ihtd); boolean found = false; while (!found) { try { hbaseAdmin.getTableDescriptor(Bytes.toBytes(IndexUtils.getIndexTableName(userTable))); } catch (TableNotFoundException tnfe) { Thread.sleep(1000); continue; } found = true; } hbaseAdmin.enableTable(Bytes.toBytes(userTable)); } finally { if (hbaseAdmin != null) { hbaseAdmin.close(); } } } // This can be a comma seperated list // We can pass like // IDX1=>cf1:[q1->datatype& // length],[q2],[q3];cf2:[q1->datatype&length],[q2->datatype&length],[q3->datatype& // lenght]#IDX2=>cf1:q5,q5 private static IndexedHTableDescriptor parse(String tableNameToIndex, Collection<HColumnDescriptor> existingColumnFamilies, String input, Map<String, List<String>> cfs) { IndexedHTableDescriptor indexHTableDesc = new IndexedHTableDescriptor(tableNameToIndex); List<String> colFamilyList = new ArrayList<String>(); for (HColumnDescriptor hColumnDescriptor : existingColumnFamilies) { indexHTableDesc.addFamily(hColumnDescriptor); colFamilyList.add(hColumnDescriptor.getNameAsString()); } if (input != null) { String[] indexSplits = input.split("#"); for (String index : indexSplits) { String[] indexName = index.split("=>"); if (indexName.length < 2) { System.out.println("Invalid entry."); System.exit(-1); } IndexSpecification iSpec = new IndexSpecification(indexName[0]); String[] cfSplits = indexName[1].split(";"); if (cfSplits.length < 1) { System.exit(-1); } else { for (String cf : cfSplits) { String[] qualSplits = cf.split(":"); if (qualSplits.length < 2) { System.out.println("The qualifiers are not given"); System.exit(-1); } if (!colFamilyList.contains(qualSplits[0])) { System.out.println("Valid CF not found"); System.exit(-1); } String[] qualDetails = qualSplits[1].split(","); for (String details : qualDetails) { String substring = details.substring(1, details.lastIndexOf("]")); if (substring != null) { String[] splitQual = substring.split("->"); if (splitQual.length < 2) { System.out.println("Default value length and data type will be take"); iSpec.addIndexColumn(new HColumnDescriptor(qualSplits[0]), splitQual[0], ValueType.String, Constants.DEF_MAX_INDEX_NAME_LENGTH); } else { String[] valueType = splitQual[1].split("&"); iSpec.addIndexColumn(new HColumnDescriptor(qualSplits[0]), splitQual[0], ValueType.valueOf(valueType[0]), Integer.parseInt(valueType[1])); } if (cfs != null) { addToMap(cfs, qualSplits, splitQual); } } } } } indexHTableDesc.addIndex(iSpec); } } return indexHTableDesc; } private static void addToMap(Map<String, List<String>> cfs, String[] qualSplits, String[] splitQual) { if (cfs.get(qualSplits[0]) == null) { List<String> qual = new ArrayList<String>(); qual.add(splitQual[0]); cfs.put(qualSplits[0], qual); } else { List<String> list = cfs.get(qualSplits[0]); list.add(splitQual[0]); } } /** * Reads the indexed table description directly from the file. * @param tableName Table name * @param conf HBase Configuration * @return HTableDescriptor * @throws IOException */ public static HTableDescriptor readIndexedHTableDescriptor(String tableName, Configuration conf) throws IOException { IndexedHTableDescriptor indexedHTabDescriptor = new IndexedHTableDescriptor(); FSDataInputStream fsDataInputStream = null; try { FileSystem fs = FSUtils.getCurrentFileSystem(conf); Path rootPath = FSUtils.getRootDir(conf); Path path = FSUtils.getTablePath(rootPath, tableName); FileStatus status = IndexMasterObserver.getTableInfoPath(fs, path); if (null == status) { throw new IOException(tableName + " status is null"); } fsDataInputStream = fs.open(status.getPath()); indexedHTabDescriptor.readFields(fsDataInputStream); return indexedHTabDescriptor; } catch (EOFException e) { return new HTableDescriptor(indexedHTabDescriptor); } catch (IOException i) { throw i; } finally { if (fsDataInputStream != null) { fsDataInputStream.close(); } } } public static byte[][] getSplitKeys(HRegionInfo[] regions) { byte[][] splitKeys = null; if (null != regions && regions.length > 1) { // for the 1st region always the start key will be empty. We no need to // pass this as a start key item for the index table because this will // be added by HBase any way. So if we pass empty, HBase will create one // extra region with start and end key as empty byte[]. splitKeys = new byte[regions.length - 1][]; int i = 0; for (HRegionInfo region : regions) { byte[] startKey = region.getStartKey(); if (startKey.length > 0) { splitKeys[i++] = startKey; } } } return splitKeys; } }