/**
* Copyright 2011 The Apache Software Foundation
*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.index.mapreduce;
import java.io.IOException;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.client.HTable;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.index.util.IndexUtils;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.HFileOutputFormat;
import org.apache.hadoop.hbase.mapreduce.KeyValueSortReducer;
import org.apache.hadoop.hbase.mapreduce.TableInputFormat;
import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.GenericOptionsParser;
public class TableIndexer {
private static final String TABLE_NAME_TO_INDEX = "tablename.to.index";
final static String BULK_OUTPUT_CONF_KEY = "import.bulk.output";
private final static int DEFAULT_CACHING = 500;
private final static int DEFAULT_VERSIONS = 1;
// This can be a comma seperated list
// We can pass like
// IDX1=>cf1:[q1->datatype&
// length],[q2],[q3];cf2:[q1->datatype&length],[q2->datatype&length],[q3->datatype&
// lenght]#IDX2=>cf1:q5,q5
private static Map<String, List<String>> cfs = new HashMap<String, List<String>>();
public static void main(String[] args) throws Exception {
Configuration conf = HBaseConfiguration.create();
String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
if (otherArgs.length < 2) {
System.out.println("Caching and Versions not specified");
System.exit(-1);
}
int caching = -1;
int versions = -1;
try {
caching = Integer.parseInt(otherArgs[0]);
} catch (NumberFormatException nfe) {
caching = DEFAULT_CACHING;
}
try {
versions = Integer.parseInt(otherArgs[1]);
} catch (NumberFormatException nfe) {
versions = DEFAULT_VERSIONS;
}
String[] tableName = conf.getStrings(TABLE_NAME_TO_INDEX);
if (tableName == null) {
System.out
.println("Wrong usage. Usage is pass the table -Dtablename.to.index='table1' "
+ "-Dtable.columns.index='IDX1=>cf1:[q1->datatype& length],[q2],"
+ "[q3];cf2:[q1->datatype&length],[q2->datatype&length],[q3->datatype& lenght]#IDX2=>cf1:q5,q5'");
System.out.println("The format used here is: ");
System.out.println("IDX1 - Index name");
System.out.println("cf1 - Columnfamilyname");
System.out.println("q1 - qualifier");
System.out.println("datatype - datatype (Int, String, Double, Float)");
System.out.println("length - length of the value");
System.out.println("The columnfamily should be seperated by ';'");
System.out
.println("The qualifier and the datatype and its length should be enclosed in '[]'."
+ " The qualifier details are specified using '->' following qualifer name and the details are seperated by '&'");
System.out.println("If the qualifier details are not specified default values are used.");
System.out.println("# is used to seperate between two index details");
System.out.println("Pass the scanner caching and maxversions as arguments.");
System.exit(-1);
}
String tableNameToIndex = tableName[0];
IndexUtils.createIndexTable(tableNameToIndex, conf, cfs);
createMapReduceJob(tableNameToIndex, conf, caching, versions);
}
private static void createMapReduceJob(String tableNameToIndex, Configuration conf, int caching,
int versions) throws IOException, InterruptedException, ClassNotFoundException {
// Set the details to TableInputFormat
Scan s = new Scan();
s.setCaching(caching);
s.setMaxVersions(versions);
conf.set(TableInputFormat.INPUT_TABLE, tableNameToIndex);
Set<Entry<String, List<String>>> entrySet = cfs.entrySet();
for (Entry<String, List<String>> entry : entrySet) {
List<String> quals = entry.getValue();
addColumn(quals, Bytes.toBytes(entry.getKey()), s);
}
Job job = new Job(conf, "CreateIndex");
String hfileOutPath = conf.get(BULK_OUTPUT_CONF_KEY);
TableMapReduceUtil.initTableMapperJob(tableNameToIndex, // input table
s, // Scan instance to control CF and attribute selection
IndexCreationMapper.class, // mapper class
ImmutableBytesWritable.class, // mapper output key
Put.class, // mapper output value
job);
TableMapReduceUtil.initTableReducerJob(IndexUtils.getIndexTableName(tableNameToIndex), // output
// table
null, // reducer class
job);
if (hfileOutPath != null) {
HTable table = new HTable(conf, tableNameToIndex);
job.setReducerClass(KeyValueSortReducer.class);
Path outputDir = new Path(hfileOutPath);
FileOutputFormat.setOutputPath(job, outputDir);
HFileOutputFormat.configureIncrementalLoad(job, table);
} else {
job.setNumReduceTasks(0);
}
TableMapReduceUtil.addDependencyJars(job.getConfiguration(),
com.google.common.base.Preconditions.class);
job.waitForCompletion(true);
assert job.isComplete() == true;
}
private static void addColumn(List<String> quals, byte[] cf, Scan s) {
for (String q : quals) {
s.addColumn(cf, Bytes.toBytes(q));
}
}
}