package xxl.core.spatial.spatialBPlusTree;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.DataOutputStream;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import xxl.core.collections.MapEntry;
import xxl.core.collections.containers.Container;
import xxl.core.collections.containers.io.BufferedContainer;
import xxl.core.collections.containers.io.ConverterContainer;
import xxl.core.cursors.Cursor;
import xxl.core.cursors.Cursors;
import xxl.core.cursors.mappers.Mapper;
import xxl.core.cursors.sources.io.FileInputCursor;
import xxl.core.functions.AbstractFunction;
import xxl.core.functions.Constant;
import xxl.core.functions.Function;
import xxl.core.functions.Functional.UnaryFunction;
import xxl.core.functions.Functions;
import xxl.core.indexStructures.BPlusTree;
import xxl.core.indexStructures.BPlusTree.IndexEntry;
import xxl.core.indexStructures.BPlusTree.Node;
import xxl.core.indexStructures.rtrees.RtreeIterativeBulkloader;
import xxl.core.io.Buffer;
import xxl.core.io.converters.ConvertableConverter;
import xxl.core.io.converters.Converter;
import xxl.core.io.converters.LongConverter;
import xxl.core.io.converters.MeasuredConverter;
import xxl.core.spatial.rectangles.DoublePointRectangle;
import xxl.core.spatial.spatialBPlusTree.separators.LongKeyRange;
import xxl.core.spatial.spatialBPlusTree.separators.LongSeparator;
/**
* This is a bulk loading class for a BPlusTree. Here we use a similar optimization as in {@link RtreeIterativeBulkloader}, since we use z-strings our optimization function is a
* sum of prefixes of z-strings.
*
*
* @author achakeev
*
*/
public class SingleLevelwiseOptimizedBulkloader<T> {
/**
*
* @author d
*
*/
public static enum DistributionType{
DISTRIBUTION_GOPT,
DISTRIBUTION_OPT,
}
/**
* for writing data
*/
public MeasuredConverter<T> dataConverter;
/**
* for writing temp files
*/
public Converter<MapEntry<Long, Long>> mapEntryConverter = new Converter<MapEntry<Long, Long>>(){
@Override
public MapEntry<Long, Long> read(DataInput arg0,
MapEntry<Long, Long> arg1) throws IOException {
long key = LongConverter.DEFAULT_INSTANCE.readLong(arg0);
long value = LongConverter.DEFAULT_INSTANCE.readLong(arg0);
return new MapEntry<Long, Long>(key, value) ;
}
@Override
public void write(DataOutput arg0,
MapEntry<Long, Long> arg1) throws IOException {
LongConverter.DEFAULT_INSTANCE.write(arg0, arg1.getKey());
LongConverter.DEFAULT_INSTANCE.write(arg0, arg1.getValue());
}
};
/**
*
*/
public BPlusTree tree;
/**
*
*/
public int partitionSize;
/**
*
*/
public int dimension;
/**
*
*/
public int blockSize;
/**
*
*/
public double ratio;
/**
*
*/
public double ratioIndex;
/**
*
*/
public int b_Leaf;
/**
*
*/
public int B_Leaf;
/**
*
*/
public int b_Index;
/**
*
*/
public int B_Index;
/**
*
*/
public int numberOfRectangles;
/**
*
*/
public double minMemory;
/**
*
*/
public double maxMemory;
/**
*
*/
public Container treeContainer;
/**
*
*/
public DistributionType distributionType;
/**
*
*/
public double[] a;
/**
*
*/
String path;
/**
*
*/
final Function<T, Long> getKey;
/**
*
*/
LongKeyRange rootDescriptor;
/**
*
* @param partitionSize
* @param dimension
* @param blockSize
* @param ratio
* @param memory
* @param fileContainer
* @param distributionType
* @param a
* @param path
*/
public SingleLevelwiseOptimizedBulkloader(MeasuredConverter<T> dataConverter, int partitionSize,
int dimension,
int blockSize,
double ratio,
double minMemory,
double maxMemory,
Container fileContainer,
DistributionType distributionType,
String path,
UnaryFunction<T,Long> getKey,
Buffer buffer) {
super();
this.dataConverter = dataConverter;
this.getKey = Functions.toFunction(getKey);
// duplicates enabled
tree = new BPlusTree(blockSize, true);
this.partitionSize = partitionSize;
this.dimension = dimension;
this.blockSize = blockSize;
this.ratio = ratio;
this.treeContainer = new ConverterContainer(fileContainer, tree.nodeConverter());
if(buffer!=null){
this.treeContainer = new BufferedContainer(this.treeContainer, buffer);
}
this.distributionType = distributionType;
this.path = path;
this.minMemory = minMemory;
this.maxMemory = maxMemory;
// level 2 , number 4 , link 8
int payload = blockSize-2-4-8;
b_Leaf = (int)( (payload * ratio)/ (dimension * 16));
B_Leaf = payload / (dimension * 16);
b_Index = (int)( (payload * ratio)/ (8+8));
B_Index = payload / (8+8);
// hack to initialize detremineCpontainer Function;
tree.initialize(null, null,
this.getKey,
this.treeContainer,
ZBPlusTreeIndexFactrory.longKeyMeasuredConverter,
this.dataConverter,
LongSeparator.FACTORY_FUNCTION,
LongKeyRange.FACTORY_FUNCTION);
//
}
/**
*
* @param rectangles
* @throws IOException
*/
public void buildBPlusTRee(Iterator data) throws IOException{
Iterator tempIterator = data;
int level = 0;
while(tempIterator.hasNext()){
File file = File.createTempFile("levelKeys_", "dat");
DataOutputStream out = new DataOutputStream(new FileOutputStream(file));
int written = writeLevel(tempIterator, level, partitionSize, out);
level++;
Cursor levelIterator = new FileInputCursor<MapEntry<Long,Long>>(mapEntryConverter, file);
tempIterator = levelIterator;
if (written <= 1){
break;
}
file.deleteOnExit();
}
// create rtree
MapEntry<Long, Long> entry = (MapEntry<Long, Long>)(tempIterator.next());
Long key = entry.getValue();
IndexEntry indexEntry = (IndexEntry) tree.createIndexEntry(level);
LongSeparator rootSep = new LongSeparator(entry.getValue());
IndexEntry rootEntry = (IndexEntry) ((BPlusTree.IndexEntry)indexEntry).initialize( entry.getKey(), rootSep);
// hack to initialize detremineCpontainer Function;
tree.initialize(rootEntry, rootDescriptor,
getKey,
this.treeContainer,
ZBPlusTreeIndexFactrory.longKeyMeasuredConverter,
dataConverter,
LongSeparator.FACTORY_FUNCTION,
LongKeyRange.FACTORY_FUNCTION );
}
/**
*
* @param data
* @param level
* @param partitionSize
* @param out
* @return
* @throws IOException
*/
public int writeLevel(Iterator data, final int level, int partitionSize,final DataOutput out) throws IOException{
// read partitions size to a list
int counter = 0;
int k = 0;
List partition = new LinkedList();
while(data.hasNext()){
for(int i = 0; data.hasNext() && i < partitionSize; i++ ){
if (level > 0 ){
MapEntry<Long, Long> mapEntry = (MapEntry<Long, Long>) data.next();
Long key = mapEntry.getValue();
// create index entry
IndexEntry indexEntry = (IndexEntry) tree.createIndexEntry(level);
//FIXME change to factory function!!!
indexEntry.initialize(mapEntry.getKey(), new LongSeparator(key));
partition.add(indexEntry);
}else{
DoublePointRectangle rec = (DoublePointRectangle) data.next();
partition.add(rec);
}
}
if (partition.size() > ((level > 0 ) ? B_Index : B_Leaf) ){
Function mapping = new AbstractFunction() {
public Object invoke(Object obj ){
return (level == 0 )? getKey.invoke((T)obj) : ((IndexEntry)obj).separator.sepValue();
}
};
final int[] distribution = computeDistribution((Iterator<Long>)new Mapper(mapping, partition.iterator() ),
partition.size(), level);
counter += writePartition(distribution, partition.iterator(),level, out);;
}else{
// just allocate one node
MapEntry<Long, Long> entry = writeNode(partition, level) ;
mapEntryConverter.write(out, entry );
counter++;
}
partition = new LinkedList();
}
return counter;
}
public int writePartition(int[] distribution, Iterator data, int level, DataOutput out) throws IOException{
for(int i : distribution){
List entries = new ArrayList(i);
for(int k = 0; data.hasNext() && k < i ; k++){
DoublePointRectangle rec = null;
if(level != 0 ){
IndexEntry indexEntry = (IndexEntry) data.next();
entries.add(indexEntry);
}else{
rec = (DoublePointRectangle) data.next();
entries.add(rec);
}
}
MapEntry<Long, Long> entry = writeNode(entries, level);
mapEntryConverter.write(out, entry );
}
return distribution.length;
}
public MapEntry<Long, Long> writeNode(final List entries, int level) {
Long descriptor = null;
if (level == 0){
descriptor = (Long) getKey.invoke((T)entries.get(entries.size()-1));
}else{
descriptor = (Long) ((IndexEntry) entries.get(entries.size()-1)).separator().sepValue();
}
if(rootDescriptor == null){
Long smallestVal = (Long) getKey.invoke((T)entries.get(0));
rootDescriptor = new LongKeyRange(smallestVal, smallestVal);
}else{
rootDescriptor.union(descriptor);
}
final Node node = (Node) tree.createNode(level);
Long nodeId = (Long) treeContainer.reserve(new Constant<Node>(node));
node.initialize(level, entries);
treeContainer.update(nodeId, node);
return new MapEntry<Long, Long>(nodeId, descriptor);
}
/**
* creates optimal distribution for space and function
* @return
*/
protected int[] computeDistribution(Iterator<Long> iterator, int size, int level){
int b = (level > 0) ? b_Index : b_Leaf;
int B = (level > 0) ? B_Index : B_Leaf;
int maxBlocks = (int) (Math.ceil(size/(minMemory * B)));
int minBlocks = (int) (size/(maxMemory * B));
switch(distributionType){
case DISTRIBUTION_OPT : {
return ZValueDistributionGenerator.computeZKeysDistributionApprox(
Cursors.toList(iterator,new ArrayList<Long>()),
b, B);
}
default: {
return ZValueDistributionGenerator.computeZKeysDistribution(
Cursors.toList(iterator,new ArrayList<Long>()),
b, B, maxBlocks);
}
}
}
}