/*
* Copyright (c) 2015 Uncharted Software Inc. http://www.uncharted.software/
*
* Released under the MIT License.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy of
* this software and associated documentation files (the "Software"), to deal in
* the Software without restriction, including without limitation the rights to
* use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
* of the Software, and to permit persons to whom the Software is furnished to do
* so, subject to the following conditions:
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
package com.oculusinfo.binning.io.impl;
import com.oculusinfo.binning.TileData;
import com.oculusinfo.binning.TileIndex;
import com.oculusinfo.binning.impl.DenseTileMultiSliceView;
import com.oculusinfo.binning.impl.MultiSliceTileView;
import com.oculusinfo.binning.io.serialization.TileSerializer;
import com.oculusinfo.binning.util.TypeDescriptor;
import com.oculusinfo.factory.util.Pair;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Row;
import org.json.JSONObject;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* This version of the HBasePyramidIO is specialized for bucketted tiles; it will take a tile whose bins are
* lists (of buckets) of something - doesn't matter what - and instead of storing the tile as a monolithic tile
* in the tileData column, as HBasePyramidIO does, it will store the contents in separate columns.
*/
public class HBaseSlicedPyramidIO extends HBasePyramidIO {
private static final Pattern SLICE_PATTERN = Pattern.compile("(?<table>.*)\\[(?<min>[0-9]+)(?>-(?<max>[0-9]+))?\\]");
private boolean _doPyramidding;
private HBaseTilePutter _putter;
public HBaseSlicedPyramidIO (String zookeeperQuorum, String zookeeperPort, String hbaseMaster)
throws IOException {
super(zookeeperQuorum, zookeeperPort, hbaseMaster);
setPyramidding(true);
}
@Override public HBaseTilePutter getPutter () {
return _putter;
}
public void setPyramidding (boolean doPyramidding) {
_doPyramidding = doPyramidding;
_putter = new SlicedHBaseTilePutter(_doPyramidding);
}
public static HBaseColumn getSliceColumn (int minSlice, int maxSlice) {
String qualifier;
if (minSlice == maxSlice) {
qualifier = ""+minSlice;
} else {
qualifier = ""+minSlice+"-"+maxSlice;
}
return new HBaseColumn(TILE_FAMILY_NAME, qualifier.getBytes());
}
private <T> TileData<List<T>> compose (List<TileData<List<T>>> candidates,
int startIndex, int numComponents, int increment) {
List<TileData<List<T>>> components = new ArrayList<>();
for (int c=0; c<numComponents; ++c) {
components.add((TileData) candidates.get(startIndex + c * increment));
}
return new MultiSliceTileView<T>(components);
}
@Override
public <T> List<TileData<T>> readTiles (String tableName,
TileSerializer<T> serializer,
Iterable<TileIndex> tiles) throws IOException {
Matcher m = SLICE_PATTERN.matcher(tableName);
TypeDescriptor binType = serializer.getBinTypeDescription();
if (List.class == binType.getMainType() && m.matches()) {
String realName = m.group("table");
HBaseColumn[] columns;
int min = Integer.parseInt(m.group("min"));
if (null == m.group("max")) {
columns = new HBaseColumn[] { getSliceColumn(min, min) };
} else {
int max = Integer.parseInt(m.group("max"));
List<Pair<Integer, Integer>> sliceRanges;
if (_doPyramidding) {
sliceRanges = decomposeRange(min, max);
} else {
sliceRanges = new ArrayList<>();
for (int n=min; n<=max; ++n) {
sliceRanges.add(new Pair<>(n, n));
}
}
columns = new HBaseColumn[sliceRanges.size()];
for (int i = 0; i < sliceRanges.size(); ++i) {
Pair<Integer, Integer> sliceRange = sliceRanges.get(i);
columns[i] = getSliceColumn(sliceRange.getFirst(), sliceRange.getSecond());
}
}
List<TileData<T>> rawResults = super.readTiles(realName, serializer, tiles, columns);
if (1 == columns.length) return rawResults;
else {
// Consolidate the columns from each tile.
int numRaw = rawResults.size();
int numReal = numRaw/columns.length;
List<TileData<T>> realResults = new ArrayList<>(numReal);
for (int i=0; i<numReal; ++i) {
// We know this cast is correct because of our guard condition up top, that
// List is the main type of T.
realResults.add(compose((List) rawResults, i, columns.length, numReal));
}
return realResults;
}
} else {
return super.readTiles(tableName, serializer, tiles);
}
}
// Convert a number to its base 2 representation, as an array of 0's and 1's
private static int[] numberToBits (int number) {
// Count how many bits there are
int numBits = 0;
for (int n = number; n > 0; n = n >> 1) ++numBits;
int[] bits = new int[numBits];
for (int n=0; n<numBits; ++n) {
bits[n] = 1 & (number >> n);
}
return bits;
}
/**
* Take a range of buckets, and convert it into the best set of requests for retrieving that set of buckets from a
* fully pyramidded bucket tile set.
*
* @param start The first bucket needed
* @param end The last bucket needed
* @return A series of pairs, each of which indicates a single stored bucket range to be retrieved, with its
* start end end bucket number.
*/
public static List<Pair<Integer, Integer>> decomposeRange (int start, int end) {
if (start > end) return decomposeRange(end, start);
// Get the most significant bit at which the endpoints differ.
int differences = start ^ (end+1); // xor
int msb = 1;
while ((msb << 1) <= differences) msb = msb << 1;
int midPoint = start - (start % msb) + msb;
int[] startRanges = numberToBits(midPoint - start);;
int[] endRanges = numberToBits(end + 1 - midPoint);
// Construct our ranges.
List<Pair<Integer, Integer>> ranges = new ArrayList<>();
int curRangeMin = start;
// Ranges before our midpoint go from smallest to largest
for (int i=0; i<startRanges.length; ++i) {
if (1 == startRanges[i]) {
int curRangeSize = 1 << i;
ranges.add(new Pair<Integer, Integer>(curRangeMin, curRangeMin+curRangeSize-1));
curRangeMin += curRangeSize;
}
}
// Ranges after our midpoint go from largest to smallest
for (int i=endRanges.length-1; i>=0; --i) {
if (1 == endRanges[i]) {
int curRangeSize = 1 << i;
ranges.add(new Pair<Integer, Integer>(curRangeMin, curRangeMin+curRangeSize-1));
curRangeMin += curRangeSize;
}
}
return ranges;
}
public static class SlicedHBaseTilePutter extends StandardHBaseTilePutter {
private boolean _doPyramidding;
public SlicedHBaseTilePutter (boolean doPyramidding) {
_doPyramidding = doPyramidding;
}
@Override
public <T> Put getPutForTile(TileData<T> tile, TileSerializer<T> serializer) throws IOException {
TypeDescriptor binType = serializer.getBinTypeDescription();
Put put = super.getPutForTile(tile, serializer);
if (List.class == binType.getMainType()) {
put = addSlices(put, (TileSerializer) serializer, (TileData) tile);
}
return put;
}
private <T> Put addSlices (Put existingPut,
TileSerializer<List<T>> serializer,
TileData<List<T>> tile) throws IOException {
// Figure out into how many slices to divide the data
int slices = numSlices(tile);
// Store the whole thing pyramidded.
int slicesPerWrite = 1;
while (slicesPerWrite < slices) {
// Divide the tile into slices, storing each of them individually in their own column
for (int startSlice = 0; startSlice < slices; startSlice = startSlice + slicesPerWrite) {
int endSlice = startSlice + slicesPerWrite - 1;
TileData<List<T>> slice = new DenseTileMultiSliceView<T>(tile, startSlice, endSlice).harden();
ByteArrayOutputStream baos = new ByteArrayOutputStream();
serializer.serialize(slice, baos);
existingPut = addToPut(existingPut, rowIdFromTileIndex(tile.getDefinition()),
getSliceColumn(startSlice, endSlice), baos.toByteArray());
}
// If not pyramidding, bail out after our first time through.
if (_doPyramidding) slicesPerWrite = slicesPerWrite * 2;
else slicesPerWrite = slices;
}
return existingPut;
}
private int numSlices (TileData<?> tile) {
int slices = 0;
TileIndex index = tile.getDefinition();
for (int x=0; x < index.getXBins(); ++x) {
for (int y = 0; y < index.getYBins(); ++y) {
try {
List<?> bin = (List<?>) tile.getBin(x, y);
int size = bin.size();
if (size > slices) slices = size;
} catch (ClassCastException|NullPointerException e) {
// Swallow it, we don't care here.
}
}
}
return slices;
}
}
}