package com.cloudera.sa.spark.hbase.example; import java.util.List; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hbase.HBaseConfiguration; import org.apache.hadoop.hbase.client.Scan; import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.hbase.JavaHBaseContext; import scala.Tuple2; import scala.Tuple3; public class JavaHBaseDistributedScan { public static void main(String args[]) { if (args.length == 0) { System.out .println("JavaHBaseDistributedScan {master} {tableName}"); } String master = args[0]; String tableName = args[1]; JavaSparkContext jsc = new JavaSparkContext(master, "JavaHBaseDistributedScan"); jsc.addJar("SparkHBase.jar"); Configuration conf = HBaseConfiguration.create(); conf.addResource(new Path("/etc/hbase/conf/core-site.xml")); conf.addResource(new Path("/etc/hbase/conf/hbase-site.xml")); JavaHBaseContext hbaseContext = new JavaHBaseContext(jsc, conf); Scan scan = new Scan(); scan.setCaching(100); JavaRDD<Tuple2<byte[], List<Tuple3<byte[], byte[], byte[]>>>> javaRdd = hbaseContext.hbaseRDD(tableName, scan); List<Tuple2<byte[], List<Tuple3<byte[], byte[], byte[]>>>> results = javaRdd.collect(); results.size(); } }