package com.cloudera.sa.spark.hbase.example; import java.util.ArrayList; import java.util.List; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hbase.HBaseConfiguration; import org.apache.hadoop.hbase.client.Put; import org.apache.hadoop.hbase.util.Bytes; import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.api.java.function.Function; import org.apache.spark.hbase.JavaHBaseContext; public class JavaHBaseBulkPutExample { public static void main(String args[]) { if (args.length == 0) { System.out .println("JavaHBaseBulkPutExample {master} {tableName} {columnFamily}"); } String master = args[0]; String tableName = args[1]; String columnFamily = args[2]; JavaSparkContext jsc = new JavaSparkContext(master, "JavaHBaseBulkPutExample"); jsc.addJar("SparkHBase.jar"); List<String> list = new ArrayList<String>(); list.add("1," + columnFamily + ",a,1"); list.add("2," + columnFamily + ",a,2"); list.add("3," + columnFamily + ",a,3"); list.add("4," + columnFamily + ",a,4"); list.add("5," + columnFamily + ",a,5"); JavaRDD<String> rdd = jsc.parallelize(list); Configuration conf = HBaseConfiguration.create(); conf.addResource(new Path("/etc/hbase/conf/core-site.xml")); conf.addResource(new Path("/etc/hbase/conf/hbase-site.xml")); JavaHBaseContext hbaseContext = new JavaHBaseContext(jsc, conf); hbaseContext.bulkPut(rdd, tableName, new PutFunction(), true); } public static class PutFunction implements Function<String, Put> { private static final long serialVersionUID = 1L; public Put call(String v) throws Exception { String[] cells = v.split(","); Put put = new Put(Bytes.toBytes(cells[0])); put.add(Bytes.toBytes(cells[1]), Bytes.toBytes(cells[2]), Bytes.toBytes(cells[3])); return put; } } }