package com.ontotext.kim.model;
import java.io.IOException;
import java.io.Serializable;
import java.util.Arrays;
import java.util.Comparator;
import org.apache.commons.collections.comparators.NullComparator;
import com.ontotext.kim.util.KimLogs;
/**
* This class implements a hash-code indexed storage for objects. It resembles
* the <code>HashSet</code> class with these substantial differences:<br>
* - The hash-code value is supplied externally
* - Multiple objects can be stored with equal hash-codes. They are grouped
* and stored in packages. These packages are implemented as the internal
* class <code>HashElement</code>.<br>
* - Stored objects are retrieved by packages through their hash-code value.<br>
* - If stored objects implement <code>Comparable</code> they are sorted in the
* packages and are returned as a sorted array. This allows binary search within
* the result.<br>
* <br>
* The implementation of the storage structure as also the resize logic is
* simplified intentionally. By design it is intended to support only operations
* addition, search and retrieval (not removal).
*
* @author danko
*/
public class HashRegister implements Serializable {
private static final long serialVersionUID = 2442L;
/**
* This flag could take the following values:<br>
* 0 - add the duplicating object;<br>
* 1 - replace the old object and put the new one;<br>
* 2 - keep the old one and ignore the new one;<br>
*/
protected static int todoIfExists = 0;
/**
* Determines the step for extension of the array field
* <code>HashElement.elementHolder</code>
*/
protected static int subRegIncrement = 2;
/**
* Determines the initial size of the main-register. A better distribution
* is achieved if it is a prime number.
*/
protected static int initialSize = 1021;
/**
* Determines the maximal percent of filling of the main-register
* after which the HashRegister is expanded by doubling its size.
*/
protected int maxFillPerc = 75;
/**
* A container class that holds all of the stored objects with
* same hash-code value. If the Objects implement
* <code>Comparable</code> they are sorted in the packages and
* are returned as a sorted array. This allows binary search in
* the result.
*
* @author danko
*
*/
public static class HashElement implements Serializable {
private static final long serialVersionUID = 2442L;
public final int subRegHash;
public Serializable elementHolder = null;
public HashElement (int hashCode) {
subRegHash = hashCode;
}
/**
* Adds new element to the container. The added element must have
* the same hash-code value as the container. This must be verified
* outside this class.
*
* @param element - the added element
*/
public byte add(Serializable element) {
byte elemCountChange = add12(element);
if (elemCountChange < 0) {
if (element instanceof Comparable)
elemCountChange = addSorted((Comparable) element);
else
elemCountChange = addUnsorted(element);
}
return elemCountChange;
}
/** Method adds new element to the container in unsorted manner.
* The method is called only if the container already have 2
* elements stored in.
* @param element - the element to store
* @return number of elements stored: 1 - if stored successfully;
* 0 - element already existed and no duplicates are allowed;
*/
private byte addUnsorted(Object element) {
if (todoIfExists > 0) {
int i = Arrays.asList(elementHolder).indexOf(element);
if (i >= 0)
switch (todoIfExists) {
case 1: // replace the old object and put the new one
((Object[])elementHolder)[i] = element;
return 0;
case 2: // drop the new one and retain the old one
return 0;
}
}
Object[] newSubReg;
Object[] oldSubReg = ((Object[])elementHolder);
int addPosition = oldSubReg.length;
while (oldSubReg[addPosition - 1] == null)
addPosition--;
if (addPosition == oldSubReg.length) {
newSubReg = new Object[oldSubReg.length + subRegIncrement];
for (int i=0; i < oldSubReg.length; i++)
newSubReg[i] = oldSubReg[i];
}
else
newSubReg = oldSubReg;
newSubReg[addPosition] = element;
elementHolder = newSubReg;
return 1;
}
private static final Comparator<Comparable> nsc = new NullComparator(true);
/** Method adds new element to the container in sorted manner.
* The method is called only if the container already have 2
* elements stored in.
* @param element - the element to store
* @return number of elements stored: 1 - if stored successfully;
* 0 - element already existed and no duplicates are allowed;
*/
private byte addSorted(Comparable element) {
Comparable[] newSubReg;
Comparable[] oldSubReg = ((Comparable[])elementHolder);
int addPosition = Arrays.binarySearch( oldSubReg, element, nsc);
if (todoIfExists > 0 && addPosition >= 0) {
switch (todoIfExists) {
case 1: // replace the old object and put the new one
((Object[])elementHolder)[addPosition] = element;
return 0;
case 2: // drop the new one and retain the old one
return 0;
}
}
if (addPosition < 0)
addPosition = - addPosition - 1;
if (oldSubReg[oldSubReg.length-1] != null) {
newSubReg = new Comparable[oldSubReg.length + subRegIncrement];
for (int i = 0; i < oldSubReg.length; i++) {
if (i < addPosition)
newSubReg[i] = oldSubReg[i];
else
newSubReg[i+1] = oldSubReg[i];
}
}
else {
for (int i = oldSubReg.length-1; i > addPosition; i--)
oldSubReg[i] = oldSubReg[i-1];
newSubReg = oldSubReg;
}
newSubReg[addPosition] = element;
elementHolder = newSubReg;
return 1;
}
/** Method adds new element to the container in sorted manner.
* The method is called only if the container is empty or has 1
* element stored in.
* @param element - element to be stored
* @return - returns the change in the number of stored elements
* (0 or 1) or returns -1 if element is not handled
*/
private byte add12(Serializable element) {
if (element == null)
return 0;
if (elementHolder == null) {
elementHolder = element;
return 1;
}
else if ( !(elementHolder instanceof Object[])){
if (todoIfExists > 0 && element.equals(elementHolder)) {
switch (todoIfExists) {
case 1: // replace the old object and put the new one
elementHolder = element;
return 0;
case 2: // drop the new one and retain the old one
return 0;
}
}
Object[] newSubReg;
if (element instanceof Comparable) {
newSubReg = new Comparable[1 + subRegIncrement];
if (nsc.compare((Comparable) elementHolder,
(Comparable) element) < 0) {
newSubReg[0] = elementHolder;
newSubReg[1] = element;
}
else {
newSubReg[0] = element;
newSubReg[1] = elementHolder;
}
}
else {
newSubReg = new Object[1 + subRegIncrement];
newSubReg[0] = elementHolder;
newSubReg[1] = element;
}
elementHolder = newSubReg;
return 1;
}
return -1;
}
/** A shortcut method which calculates the storage index in given
* container. The index is calculated based on the hash-code of
* the current HashElement.
* @param container - the container where the HashElement will be stored
* @return - the value of the index
*/
public int index(Object[] container) {
return mainIx(subRegHash, container);
}
/** Checks if given element is already stored in the container.
* @param element - the element to be checked.
* @return <b>true</b> - if found
*/
public boolean contains(Object element) {
Object[] elements = getElements();
if (element == null)
return (elements == null);
else if (element instanceof Comparable)
return Arrays.binarySearch( (Comparable[])elements,
(Comparable) element, nsc) >= 0;
else
return Arrays.asList(elements).contains(element);
}
/** Gets the stored instance that equals the given instance.
* @param element - the element to be checked.
* @return the equal stored instance
*/
public Object get(Object element) {
Object[] elements = getElements();
int i = -1;
if (element != null) {
if (element instanceof Comparable)
i = Arrays.binarySearch( (Comparable[])elements,
(Comparable) element, nsc);
else
i = Arrays.asList(elements).indexOf(element);
}
return ((i >= 0)? elements[i]: null);
}
/** Returns the stored elements as an object array
* @return array with all stored elements
*/
public Object[] getElements() {
if (elementHolder == null)
return null;
else if (elementHolder instanceof Object[]) {
Object[] eList = (Object[]) elementHolder;
int i = eList.length;
while (i>0 && eList[i-1] == null)
i--;
if (eList[0] instanceof Comparable)
return Arrays.asList(
eList).subList(0, i).toArray(new Comparable[0]);
else
return Arrays.asList(
eList).subList(0, i).toArray();
}
else { // getPoint.elementHolder contains single object
if (elementHolder instanceof Comparable)
return new Comparable[]{(Comparable) elementHolder};
else
return new Object[]{elementHolder};
}
}
}
/** Comparator for sorting <code>HashElement</code> instances. */
public static class HashElementComp implements Comparator<HashElement> {
Object[] container;
public HashElementComp(Object[] container) {
this.container = container;
}
public int compare(HashElement o1, HashElement o2) {
int k1 = o1.index(container);
int k2 = o2.index(container);
if (k1 < k2) return -1;
if (k1 > k2) return 1;
return 0;
}
}
/** The main storage structure of the Hash Register. The index under
* which an object will be stored is calculated through the method
* <code>mainIx</code>. Calculation is based on the externally
* provided hash-code of the object and the current size of the array */
protected Object[] mainReg;
/** Access synchronization locking object */
protected transient Object mainRegLock = new Object();
/** Number of different hash-code values provided with stored objects */
protected int hashesCount = 0;
/** Current limit of the number of distinct hash-code values that
* could be stored with the current size of <code>mainReg</code> */
protected int hashesCountLimit = 0;
/** The number of objects stored in the register */
protected int elementsCount = 0;
/** This flag determines if the auto-resizing is turned on */
protected boolean autoResize = true;
/** This public constructor starts the hash register with predefined size
* and automatic resizing turned on */
public HashRegister() {
this(initialSize, true);
}
/** This protected constructor allows starting the hash register with
* different than default size and automatic resizing option.
* @param newSize - the initial <code>mainReg</code> size.
* @param autoRes - the auto-resize flag.
*/
protected HashRegister(int newSize, boolean autoRes) {
mainReg = new Object[newSize];
hashesCountLimit = mainReg.length * maxFillPerc / 100;
autoResize = autoRes;
}
/** The method resizes the <code>mainReg</code> array if the content
* volume limit was reached.
* @return <b>true</b> if resize was performed
*/
public boolean resizeMainReg() {
// If limit is not reached -> quit resize
if (hashesCountLimit > hashesCount) return false;
long start = System.currentTimeMillis();
Object[] newMainReg;
try {
newMainReg = new Object[mainReg.length*2];
} catch (Exception e) {
KimLogs.logNERC_GAZETTEER.warn("Unsuccessful attempt to resize" +
" [HashRegister] due to:\n" + e.getMessage());
return false;
}
synchronized (mainRegLock) {
// Cycle through the old register and move the [HashElement] objects
// to the new main-register
for (int main_i=0; main_i < mainReg.length; main_i++) {
int new_main_i = main_i + mainReg.length;
if (mainReg[main_i] == null) {
// If there was no sub-register:
// Do nothing
}
else if (mainReg[main_i] instanceof HashElement) {
// If there was just one [HashElement] for sub-register:
// Determine it's new place
newMainReg[((HashElement)mainReg[main_i]).index(newMainReg)] =
mainReg[main_i];
}
else {
// If there were two or more [HashElement] as sub-register:
// Try to split the sub-register in two parts.
HashElement[] oldSubReg = (HashElement[]) mainReg[main_i];
// The comparator [HashElementComp] allows [HashElement]
// instances to be sorted with respect to the value of their
// next main-register index (after the expansion).
Arrays.sort(oldSubReg, new HashElementComp(newMainReg));
// Scan where is the splitting point between the elements
// that retain their old main-register index and those
// that will move to a new main-register index
int j1 = 0;
while (j1 < oldSubReg.length
&& oldSubReg[j1].index(newMainReg) == main_i)
j1++;
// Handle the elements retaining the old main-register index
if (j1 == 0) {
}
else if (j1 == 1) {
newMainReg[main_i] = oldSubReg[0];
}
else if (j1 < oldSubReg.length) {
HashElement[] newSubReg = new HashElement[j1];
for (int k=0; k<j1; k++)
newSubReg[k] = oldSubReg[k];
newMainReg[main_i] = newSubReg;
}
else {
newMainReg[main_i] = oldSubReg;
}
// Handle the elements moving to the new main-register index
int j2 = oldSubReg.length - j1;
if (j2 == 0) {
}
else if (j2 == 1) {
newMainReg[new_main_i] = oldSubReg[oldSubReg.length - 1];
}
else if (j2 < oldSubReg.length) {
HashElement[] newSubReg = new HashElement[j2];
for (int k=0; k<j2; k++)
newSubReg[k] = oldSubReg[j1 + k];
newMainReg[new_main_i] = newSubReg;
}
else {
newMainReg[new_main_i] = oldSubReg;
}
}
// Release the old sub-register to allow for early GC.
mainReg[main_i] = null;
}
// Release the old main-register to allow for GC.
mainReg = newMainReg;
hashesCountLimit = mainReg.length * maxFillPerc / 100;
}
long duration = System.currentTimeMillis() - start;
KimLogs.logNERC_GAZETTEER.debug("Main Register resized to " +
mainReg.length + " for " + duration + "ms.");
return true;
}
/** Adds a new element to the hash register.
* @param hashValue - the hash-code value related with the stored element
* @param element - the stored element
*/
public void add(int hashValue, Serializable element) {
elementsCount += getHashElement(hashValue, true).add(element);
if (autoResize) resizeMainReg();
}
/** Retrieves an array of stored elements corresponding to a hash-code
* @param hashValue - the hash-code value
* @return the related elements (<b>null</b> if none found)
*/
public Object[] get(int hashValue) {
HashElement getPoint = getHashElement(hashValue, false);
if (getPoint != null)
return getPoint.getElements();
else
return null;
}
/** Retrieves an element instance that is relevant to the given
* hash-code and is equal to the given element.
* @param hashValue - the hash-code value
* @param element - the element for comparison
* @return the corresponding stored element
*/
public Object get(int hashValue, Object element) {
HashElement getPoint = getHashElement(hashValue, false);
if ( getPoint != null )
return getPoint.get(element);
return null;
}
/** Checks if there are any stored elements related to the given
* hash-code value.
* @param hashValue - the hash-code value
* @return <b>true</b> if an <code>HashElement</code> instance is found
* which has the same as given hash-code value. No check for existence
* of stored elements is done, so the test is positive even is null element
* with such hash-code has been added.
*/
public boolean exists(int hashValue) {
return (!(getHashElement(hashValue, false) == null));
}
/** Checks if there is a stored element related to the given
* hash-code value and equal to the given element.
* @param hashValue - the hash-code value
* @param element - the element for comparison
* @return <b>true</b> if matching element is found.
*/
public boolean exists(int hashValue, Object element) {
HashElement getPoint = getHashElement(hashValue, false);
if ( getPoint != null )
return getPoint.contains(element);
return false;
}
/** Retrieves the number of stored elements.
* @return the elements count
*/
public int getElementsCount() {
return elementsCount;
}
/** This method searches a <code>HashElement</code> corresponding to the
* given hash-code value. Depending on the flag <code>createIfMissing</code>
* if no such hash element is found it can be created.
* @param hashValue - the hash-code value
* @param createIfMissing - flag to force creation if not found.
* @return the found/created <code>HashElement</code>
*/
private HashElement getHashElement(
int hashValue, boolean createIfMissing) {
HashElement getPoint;
synchronized (mainRegLock) {
int main_i = mainIx(hashValue, mainReg);
Object subReg = mainReg[main_i];
if (subReg == null) {
if (createIfMissing) {
getPoint = new HashElement(hashValue);
mainReg[main_i] = getPoint;
hashesCount++;
}
else
getPoint = null;
}
else if (subReg instanceof HashElement) {
getPoint = (HashElement) subReg;
if (getPoint.subRegHash != hashValue)
if (createIfMissing) {
HashElement[] newSubReg = new HashElement[2];
newSubReg[0] = getPoint;
getPoint = new HashElement(hashValue);
newSubReg[1] = getPoint;
mainReg[main_i] = newSubReg;
hashesCount++;
}
else {
getPoint = null;
}
}
else { // subReg is instance of HashElement[]
HashElement[] oldSubReg = (HashElement[]) subReg;
getPoint = null;
for (int i=0; i < oldSubReg.length; i++) {
if (oldSubReg[i].subRegHash == hashValue) {
getPoint = oldSubReg[i];
break;
}
}
if (createIfMissing && getPoint == null) {
HashElement[] newSubReg = new HashElement[oldSubReg.length+1];
for (int i=0; i<oldSubReg.length; i++)
newSubReg[i] = oldSubReg[i];
getPoint = new HashElement(hashValue);
newSubReg[newSubReg.length-1] = getPoint;
mainReg[main_i] = newSubReg;
hashesCount++;
}
}
}
return getPoint;
}
//======================================================
// An interface and a method for bulk operation over the
// whole content of the hash-register by external logic
//======================================================
/** This is a listener interface which provides means to perform bulk
* operations over all of the stored elements in the register. This
* interface must be used together with the method
* <code>processContent</code>. */
public interface ContentProcessor {
public void process(Object[] elements);
}
/** This method accepts an implementation of the
* <code>ContentProcessor</code> interface. Then it cycles all
* of the stored elements in the register and passes them to the
* <code>process</code> method of the <code>ContentProcessor</code>
* implementation.
* @param cProc - <code>ContentProcessor</code> implementation;
*/
public void processContent(ContentProcessor cProc) {
for (int main_i=0; main_i<mainReg.length; main_i++) {
if (mainReg[main_i] == null) continue;
if (mainReg[main_i] instanceof HashElement)
cProc.process(((HashElement)mainReg[main_i]).getElements());
else {
HashElement[] subReg = (HashElement[]) mainReg[main_i];
for (int i=0; i<subReg.length; i++)
cProc.process(subReg[i].getElements());
}
}
}
//==================================================
// Content Inspection methods for profiling purposes
//==================================================
public void printContent(int mainRegLim, int subRegLim) {
int repLen = Math.min(mainReg.length, mainRegLim);
for (int i=0; i<mainReg.length; i++) {
if ( mainReg[i] == null)
;
else if (mainReg[i] instanceof HashElement) {
System.out.println("<"+i+">"+getHEContent((HashElement) mainReg[i]));
repLen--;
}
else {
int subRepLen = Math.min(((HashElement[])mainReg[i]).length, subRegLim);
System.out.print("<"+i+">");
for (int j=0; j<subRepLen; j++)
System.out.print(getHEContent(((HashElement[])mainReg[i])[j])+",");
System.out.println();
repLen--;
}
if (repLen <= 0) break;
}
}
private String getHEContent(HashElement he) {
String cont = he.subRegHash + "[";
Object[] eList = he.getElements();
for (int i=0; i<eList.length; i++)
cont += eList[i].toString() + "|";
cont += "]";
return cont;
}
public void printDistribution() {
int[] bucketSize = new int[11];
for (int i=0; i<bucketSize.length; i++) bucketSize[i] = 0;
for (int i=0; i<mainReg.length; i++) {
if ( mainReg[i] == null)
bucketSize[0]++;
else if (mainReg[i] instanceof HashElement)
bucketSize[1]++;
else {
int size = ((HashElement[])mainReg[i]).length;
if (size >= bucketSize.length) size = bucketSize.length-1;
bucketSize[size]++;
}
}
System.out.println("Distribution of buckets by sizes for HashRegister with" +
"MRSize="+mainReg.length+" Elements="+hashesCount);
System.out.println("ForSize BucketCount");
for (int i=0; i<bucketSize.length; i++)
System.out.println(i + " " + bucketSize[i]);
}
//==================================================
// Serialization / Deserialization handling
//==================================================
private void readObject(java.io.ObjectInputStream in)
throws IOException, ClassNotFoundException {
in.defaultReadObject();
// Recreate the lock object of the instance
mainRegLock = new Object();
}
/** Method which calculates the storage index in given container.
* The index is calculated based on a hash-code value.
* @param hashValue - the hash-code value
* @param container - the container where the HashElement will be stored
* @return - the value of the index
*/
private static int mainIx(int hashValue, Object[] container) {
return Math.abs(hashValue % container.length);
}
}