package com.cloudera.sa.spark.hbase.example;
import java.io.File;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HBaseTestingUtility;
import org.apache.hadoop.hbase.client.Get;
import org.apache.hadoop.hbase.client.HConnection;
import org.apache.hadoop.hbase.client.HTableInterface;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.function.VoidFunction;
import org.apache.spark.hbase.JavaHBaseContext;
import org.junit.After;
import scala.Tuple2;
import com.google.common.io.Files;
public class TestJavaLocalMainExample {
private static transient JavaSparkContext jsc;
private static transient File tempDir;
static HBaseTestingUtility htu;
static String tableName = "t1";
static String columnFamily = "c";
public static void main(String[] agrs) {
setUp();
Configuration conf = htu.getConfiguration();
JavaHBaseContext hbaseContext = new JavaHBaseContext(jsc, conf);
List<byte[]> list = new ArrayList<byte[]>();
list.add(Bytes.toBytes("1"));
list.add(Bytes.toBytes("2"));
list.add(Bytes.toBytes("3"));
list.add(Bytes.toBytes("4"));
list.add(Bytes.toBytes("5"));
JavaRDD<byte[]> rdd = jsc.parallelize(list);
hbaseContext.foreachPartition(rdd, new VoidFunction<Tuple2<Iterator<byte[]>, HConnection>>() {
public void call(Tuple2<Iterator<byte[]>, HConnection> t)
throws Exception {
HTableInterface table1 = t._2().getTable(Bytes.toBytes("Foo"));
Iterator<byte[]> it = t._1();
while (it.hasNext()) {
byte[] b = it.next();
Result r = table1.get(new Get(b));
if (r.getExists()) {
table1.put(new Put(b));
}
}
}
});
//This is me
hbaseContext.foreach(rdd, new VoidFunction<Tuple2<byte[], HConnection>>() {
public void call(Tuple2<byte[], HConnection> t)
throws Exception {
HTableInterface table1 = t._2().getTable(Bytes.toBytes("Foo"));
byte[] b = t._1();
Result r = table1.get(new Get(b));
if (r.getExists()) {
table1.put(new Put(b));
}
}
});
tearDown();
}
public static void setUp() {
jsc = new JavaSparkContext("local", "JavaHBaseContextSuite");
jsc.addJar("SparkHBase.jar");
tempDir = Files.createTempDir();
tempDir.deleteOnExit();
htu = HBaseTestingUtility.createLocalHTU();
try {
System.out.println("cleaning up test dir");
htu.cleanupTestDir();
System.out.println("starting minicluster");
htu.startMiniZKCluster();
htu.startMiniHBaseCluster(1, 1);
System.out.println(" - minicluster started");
try {
htu.deleteTable(Bytes.toBytes(tableName));
} catch (Exception e) {
System.out.println(" - no table " + tableName + " found");
}
System.out.println(" - creating table " + tableName);
htu.createTable(Bytes.toBytes(tableName), Bytes.toBytes(columnFamily));
System.out.println(" - created table");
} catch (Exception e1) {
throw new RuntimeException(e1);
}
}
@After
public static void tearDown() {
try {
htu.deleteTable(Bytes.toBytes(tableName));
System.out.println("shuting down minicluster");
htu.shutdownMiniHBaseCluster();
htu.shutdownMiniZKCluster();
System.out.println(" - minicluster shut down");
htu.cleanupTestDir();
} catch (Exception e) {
throw new RuntimeException(e);
}
jsc.stop();
jsc = null;
}
}