/*
* Copyright (c) 2014 Oculus Info Inc.
* http://www.oculusinfo.com/
*
* Released under the MIT License.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy of
* this software and associated documentation files (the "Software"), to deal in
* the Software without restriction, including without limitation the rights to
* use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
* of the Software, and to permit persons to whom the Software is furnished to do
* so, subject to the following conditions:
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
package com.oculusinfo.annotation.io.impl;
import com.oculusinfo.annotation.AnnotationData;
import com.oculusinfo.annotation.io.AnnotationIO;
import com.oculusinfo.annotation.io.serialization.AnnotationSerializer;
import com.oculusinfo.factory.util.Pair;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.HColumnDescriptor;
import org.apache.hadoop.hbase.HTableDescriptor;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.Admin;
import org.apache.hadoop.hbase.client.Connection;
import org.apache.hadoop.hbase.client.ConnectionFactory;
import org.apache.hadoop.hbase.client.Delete;
import org.apache.hadoop.hbase.client.Get;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.Row;
import org.apache.hadoop.hbase.client.Table;
import org.apache.log4j.Level;
import org.apache.log4j.Logger;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.util.*;
public class HBaseAnnotationIO implements AnnotationIO {
private static final byte[] EMPTY_BYTES = new byte[0];
private static final byte[] ANNOTATION_FAMILY_NAME = "annotationData".getBytes();
public static final HBaseColumn ANNOTATION_COLUMN = new HBaseColumn(ANNOTATION_FAMILY_NAME, EMPTY_BYTES);
private static final byte[] METADATA_FAMILY_NAME = "metaData".getBytes();
public static final HBaseColumn METADATA_COLUMN = new HBaseColumn(METADATA_FAMILY_NAME, EMPTY_BYTES);
public static class HBaseColumn {
byte[] family;
byte[] qualifier;
HBaseColumn (byte[] family, byte[] qualifier) {
this.family = family;
this.qualifier = qualifier;
}
public byte[] getFamily () { return family; }
public byte[] getQualifier() { return qualifier; }
}
private Configuration _config;
private Admin _admin;
private Connection _connection;
public HBaseAnnotationIO (String zookeeperQuorum,
String zookeeperPort,
String hbaseMaster) throws IOException {
Logger.getLogger("org.apache.zookeeper").setLevel(Level.WARN);
Logger.getLogger("org.apache.hadoop").setLevel(Level.WARN);
_config = HBaseConfiguration.create();
_config.set("hbase.zookeeper.quorum", zookeeperQuorum);
_config.set("hbase.zookeeper.property.clientPort", zookeeperPort);
_config.set("hbase.master", hbaseMaster);
_connection = ConnectionFactory.createConnection(_config);
_admin = _connection.getAdmin();
}
/**
* Determine the row ID we use in HBase for given annotation data
*/
public static byte[] rowIdFromData (UUID uuid) {
ByteBuffer bb = ByteBuffer.wrap(new byte[16]);
bb.putLong(uuid.getMostSignificantBits());
bb.putLong(uuid.getLeastSignificantBits());
return bb.array();
}
@Override
public void initializeForWrite (String tableName) throws IOException {
// convert to separate data table name
String dataTableName = getTableName(tableName);
if ( !_admin.tableExists(TableName.valueOf(dataTableName))) {
createTable( dataTableName );
}
}
@Override
public void writeData (String tableName,
AnnotationSerializer serializer,
Iterable<AnnotationData<?>> data ) throws IOException {
List<Row> rows = new ArrayList<>();
for (AnnotationData<?> d : data) {
ByteArrayOutputStream baos = new ByteArrayOutputStream();
serializer.serialize( d, baos );
rows.add( addToPut(null,
rowIdFromData( d.getUUID() ),
ANNOTATION_COLUMN,
baos.toByteArray() ) );
}
try {
writeRows( tableName, rows);
} catch (InterruptedException e) {
throw new IOException("Error writing annotations to HBase", e);
}
}
@Override
public void initializeForRead (String tableName) {
try {
initializeForWrite( tableName );
} catch (Exception e) {
e.printStackTrace();
}
}
@Override
public List<AnnotationData<?>> readData (String tableName,
AnnotationSerializer serializer,
Iterable<Pair<String,Long>> certificates) throws IOException {
List<byte[]> rowIds = new ArrayList<>();
for (Pair<String,Long> certificate: certificates) {
if (certificate != null) {
rowIds.add( rowIdFromData( UUID.fromString( certificate.getFirst() ) ) );
}
}
List<Map<HBaseColumn, byte[]>> rawResults = readRows(tableName, rowIds, ANNOTATION_COLUMN);
return convertResults( rawResults, serializer );
}
@Override
public void removeData (String tableName, Iterable<Pair<String,Long>> certificates) throws IOException {
List<byte[]> rowIds = new ArrayList<>();
for (Pair<String,Long> certificate: certificates) {
rowIds.add( rowIdFromData( UUID.fromString( certificate.getFirst() ) ) );
}
deleteRows( tableName, rowIds, ANNOTATION_COLUMN);
}
public Admin getAdmin() {
return _admin;
}
public void createTable( String tableName ) {
HTableDescriptor tableDesc = new HTableDescriptor(TableName.valueOf(tableName));
HColumnDescriptor metadataFamily = new HColumnDescriptor(METADATA_FAMILY_NAME);
tableDesc.addFamily(metadataFamily);
HColumnDescriptor tileFamily = new HColumnDescriptor(ANNOTATION_FAMILY_NAME);
tableDesc.addFamily(tileFamily);
try {
_admin.createTable(tableDesc);
} catch (Exception e) {
e.printStackTrace();
}
}
public void dropTable( String tableName ) {
// convert to separate data table name
String dataTableName = getTableName( tableName );
try {
_admin.disableTable(TableName.valueOf(dataTableName));
_admin.deleteTable(TableName.valueOf(dataTableName));
} catch (Exception e) {}
}
/**
* Get the configuration used to connect to HBase.
*/
public Configuration getConfiguration () {
return _config;
}
private String getTableName( String layer ) {
return layer + "-data";
}
/*
* Gets an existing table (without creating it)
*/
private Table getTable (String tableName) throws IOException {
// convert to separate data table name
String dataTableName = getTableName(tableName);
return _connection.getTable(TableName.valueOf(dataTableName));
}
/*
* Given a put request (a request to put data into a table), add a single
* entry into the request
*
* @param existingPut
* The existing request. If null, a request will be created for
* the given row. If non-null, no check will be performed to make
* sure the put request is for the right row - this is the
* responsibility of the caller.
* @param rowId
* The id of the row to put. This is only used if the existingPut
* is null.
* @param column
* The column defining the entry in this row into which to put
* the data
* @param data
* the data to put into the described entry.
* @return The put request - the same as is passed in, or a new request if
* none was passed in.
*/
private Put addToPut (Put existingPut, byte[] rowId, HBaseColumn column, byte[] data) {
if (null == existingPut) {
existingPut = new Put(rowId);
}
existingPut.addColumn(column.family, column.qualifier, data);
return existingPut;
}
/*
* Write a series of rows out to the given table
*
* @param table
* The table to which to write
* @param rows
* The rows to write
*/
private void writeRows (String tableName, List<Row> rows) throws InterruptedException, IOException {
Table table = getTable(tableName);
Object[] results = new Object[rows.size()];
table.batch(rows, results);
table.close();
}
private Map<HBaseColumn, byte[]> decodeRawResult (Result row, HBaseColumn[] columns) {
Map<HBaseColumn, byte[]> results = null;
for (HBaseColumn column: columns) {
if (row.containsColumn(column.family, column.qualifier)) {
if (null == results) results = new HashMap<>();
results.put(column, row.getValue(column.family, column.qualifier));
}
}
return results;
}
/*
* Read several rows of data.
*
* @param table
* The table to read
* @param rows
* The rows to read
* @param columns
* The columns to read
* @return A list, in the same order as the input rows of maps from column
* id to value. Columns missing from the data are also missing from
* the map. Rows which returned no data have a null instead of a
* map.
*/
private List<Map<HBaseColumn, byte[]>> readRows (String tableName, List<byte[]> rows, HBaseColumn... columns) throws IOException {
Table table = getTable(tableName);
List<Get> gets = new ArrayList<>(rows.size());
for (byte[] rowId: rows) {
Get get = new Get(rowId);
for (HBaseColumn column: columns) {
get.addColumn(column.family, column.qualifier);
}
gets.add(get);
}
Result[] results = table.get(gets);
List<Map<HBaseColumn, byte[]>> allResults = new LinkedList<Map<HBaseColumn,byte[]>>();
for (Result result: results) {
allResults.add(decodeRawResult(result, columns));
}
table.close();
return allResults;
}
private void deleteRows (String tableName, List<byte[]> rows, HBaseColumn... columns ) throws IOException {
Table table = getTable(tableName);
List<Delete> deletes = new LinkedList<>();
for (byte[] rowId: rows) {
Delete delete = new Delete(rowId);
deletes.add(delete);
}
table.delete(deletes);
table.close();
}
private List<AnnotationData<?>> convertResults( List<Map<HBaseColumn, byte[]>> rawResults,
AnnotationSerializer serializer )
throws IOException {
List<AnnotationData<?>> results = new LinkedList<>();
Iterator<Map<HBaseColumn, byte[]>> iData = rawResults.iterator();
while ( iData.hasNext() ) {
Map<HBaseColumn, byte[]> rawResult = iData.next();
if (null != rawResult) {
byte[] rawData = rawResult.get(ANNOTATION_COLUMN);
ByteArrayInputStream bais = new ByteArrayInputStream(rawData);
AnnotationData<?> data = serializer.deserialize( bais );
results.add(data);
}
}
return results;
}
}