package com.ontotext.kim.model; import java.io.IOException; import java.io.Serializable; import java.util.Arrays; import java.util.Comparator; import org.apache.commons.collections.comparators.NullComparator; import com.ontotext.kim.util.KimLogs; /** * This class implements a hash-code indexed storage for objects. It resembles * the <code>HashSet</code> class with these substantial differences:<br> * - The hash-code value is supplied externally * - Multiple objects can be stored with equal hash-codes. They are grouped * and stored in packages. These packages are implemented as the internal * class <code>HashElement</code>.<br> * - Stored objects are retrieved by packages through their hash-code value.<br> * - If stored objects implement <code>Comparable</code> they are sorted in the * packages and are returned as a sorted array. This allows binary search within * the result.<br> * <br> * The implementation of the storage structure as also the resize logic is * simplified intentionally. By design it is intended to support only operations * addition, search and retrieval (not removal). * * @author danko */ public class HashRegister implements Serializable { private static final long serialVersionUID = 2442L; /** * This flag could take the following values:<br> * 0 - add the duplicating object;<br> * 1 - replace the old object and put the new one;<br> * 2 - keep the old one and ignore the new one;<br> */ protected static int todoIfExists = 0; /** * Determines the step for extension of the array field * <code>HashElement.elementHolder</code> */ protected static int subRegIncrement = 2; /** * Determines the initial size of the main-register. A better distribution * is achieved if it is a prime number. */ protected static int initialSize = 1021; /** * Determines the maximal percent of filling of the main-register * after which the HashRegister is expanded by doubling its size. */ protected int maxFillPerc = 75; /** * A container class that holds all of the stored objects with * same hash-code value. If the Objects implement * <code>Comparable</code> they are sorted in the packages and * are returned as a sorted array. This allows binary search in * the result. * * @author danko * */ public static class HashElement implements Serializable { private static final long serialVersionUID = 2442L; public final int subRegHash; public Serializable elementHolder = null; public HashElement (int hashCode) { subRegHash = hashCode; } /** * Adds new element to the container. The added element must have * the same hash-code value as the container. This must be verified * outside this class. * * @param element - the added element */ public byte add(Serializable element) { byte elemCountChange = add12(element); if (elemCountChange < 0) { if (element instanceof Comparable) elemCountChange = addSorted((Comparable) element); else elemCountChange = addUnsorted(element); } return elemCountChange; } /** Method adds new element to the container in unsorted manner. * The method is called only if the container already have 2 * elements stored in. * @param element - the element to store * @return number of elements stored: 1 - if stored successfully; * 0 - element already existed and no duplicates are allowed; */ private byte addUnsorted(Object element) { if (todoIfExists > 0) { int i = Arrays.asList(elementHolder).indexOf(element); if (i >= 0) switch (todoIfExists) { case 1: // replace the old object and put the new one ((Object[])elementHolder)[i] = element; return 0; case 2: // drop the new one and retain the old one return 0; } } Object[] newSubReg; Object[] oldSubReg = ((Object[])elementHolder); int addPosition = oldSubReg.length; while (oldSubReg[addPosition - 1] == null) addPosition--; if (addPosition == oldSubReg.length) { newSubReg = new Object[oldSubReg.length + subRegIncrement]; for (int i=0; i < oldSubReg.length; i++) newSubReg[i] = oldSubReg[i]; } else newSubReg = oldSubReg; newSubReg[addPosition] = element; elementHolder = newSubReg; return 1; } private static final Comparator<Comparable> nsc = new NullComparator(true); /** Method adds new element to the container in sorted manner. * The method is called only if the container already have 2 * elements stored in. * @param element - the element to store * @return number of elements stored: 1 - if stored successfully; * 0 - element already existed and no duplicates are allowed; */ private byte addSorted(Comparable element) { Comparable[] newSubReg; Comparable[] oldSubReg = ((Comparable[])elementHolder); int addPosition = Arrays.binarySearch( oldSubReg, element, nsc); if (todoIfExists > 0 && addPosition >= 0) { switch (todoIfExists) { case 1: // replace the old object and put the new one ((Object[])elementHolder)[addPosition] = element; return 0; case 2: // drop the new one and retain the old one return 0; } } if (addPosition < 0) addPosition = - addPosition - 1; if (oldSubReg[oldSubReg.length-1] != null) { newSubReg = new Comparable[oldSubReg.length + subRegIncrement]; for (int i = 0; i < oldSubReg.length; i++) { if (i < addPosition) newSubReg[i] = oldSubReg[i]; else newSubReg[i+1] = oldSubReg[i]; } } else { for (int i = oldSubReg.length-1; i > addPosition; i--) oldSubReg[i] = oldSubReg[i-1]; newSubReg = oldSubReg; } newSubReg[addPosition] = element; elementHolder = newSubReg; return 1; } /** Method adds new element to the container in sorted manner. * The method is called only if the container is empty or has 1 * element stored in. * @param element - element to be stored * @return - returns the change in the number of stored elements * (0 or 1) or returns -1 if element is not handled */ private byte add12(Serializable element) { if (element == null) return 0; if (elementHolder == null) { elementHolder = element; return 1; } else if ( !(elementHolder instanceof Object[])){ if (todoIfExists > 0 && element.equals(elementHolder)) { switch (todoIfExists) { case 1: // replace the old object and put the new one elementHolder = element; return 0; case 2: // drop the new one and retain the old one return 0; } } Object[] newSubReg; if (element instanceof Comparable) { newSubReg = new Comparable[1 + subRegIncrement]; if (nsc.compare((Comparable) elementHolder, (Comparable) element) < 0) { newSubReg[0] = elementHolder; newSubReg[1] = element; } else { newSubReg[0] = element; newSubReg[1] = elementHolder; } } else { newSubReg = new Object[1 + subRegIncrement]; newSubReg[0] = elementHolder; newSubReg[1] = element; } elementHolder = newSubReg; return 1; } return -1; } /** A shortcut method which calculates the storage index in given * container. The index is calculated based on the hash-code of * the current HashElement. * @param container - the container where the HashElement will be stored * @return - the value of the index */ public int index(Object[] container) { return mainIx(subRegHash, container); } /** Checks if given element is already stored in the container. * @param element - the element to be checked. * @return <b>true</b> - if found */ public boolean contains(Object element) { Object[] elements = getElements(); if (element == null) return (elements == null); else if (element instanceof Comparable) return Arrays.binarySearch( (Comparable[])elements, (Comparable) element, nsc) >= 0; else return Arrays.asList(elements).contains(element); } /** Gets the stored instance that equals the given instance. * @param element - the element to be checked. * @return the equal stored instance */ public Object get(Object element) { Object[] elements = getElements(); int i = -1; if (element != null) { if (element instanceof Comparable) i = Arrays.binarySearch( (Comparable[])elements, (Comparable) element, nsc); else i = Arrays.asList(elements).indexOf(element); } return ((i >= 0)? elements[i]: null); } /** Returns the stored elements as an object array * @return array with all stored elements */ public Object[] getElements() { if (elementHolder == null) return null; else if (elementHolder instanceof Object[]) { Object[] eList = (Object[]) elementHolder; int i = eList.length; while (i>0 && eList[i-1] == null) i--; if (eList[0] instanceof Comparable) return Arrays.asList( eList).subList(0, i).toArray(new Comparable[0]); else return Arrays.asList( eList).subList(0, i).toArray(); } else { // getPoint.elementHolder contains single object if (elementHolder instanceof Comparable) return new Comparable[]{(Comparable) elementHolder}; else return new Object[]{elementHolder}; } } } /** Comparator for sorting <code>HashElement</code> instances. */ public static class HashElementComp implements Comparator<HashElement> { Object[] container; public HashElementComp(Object[] container) { this.container = container; } public int compare(HashElement o1, HashElement o2) { int k1 = o1.index(container); int k2 = o2.index(container); if (k1 < k2) return -1; if (k1 > k2) return 1; return 0; } } /** The main storage structure of the Hash Register. The index under * which an object will be stored is calculated through the method * <code>mainIx</code>. Calculation is based on the externally * provided hash-code of the object and the current size of the array */ protected Object[] mainReg; /** Access synchronization locking object */ protected transient Object mainRegLock = new Object(); /** Number of different hash-code values provided with stored objects */ protected int hashesCount = 0; /** Current limit of the number of distinct hash-code values that * could be stored with the current size of <code>mainReg</code> */ protected int hashesCountLimit = 0; /** The number of objects stored in the register */ protected int elementsCount = 0; /** This flag determines if the auto-resizing is turned on */ protected boolean autoResize = true; /** This public constructor starts the hash register with predefined size * and automatic resizing turned on */ public HashRegister() { this(initialSize, true); } /** This protected constructor allows starting the hash register with * different than default size and automatic resizing option. * @param newSize - the initial <code>mainReg</code> size. * @param autoRes - the auto-resize flag. */ protected HashRegister(int newSize, boolean autoRes) { mainReg = new Object[newSize]; hashesCountLimit = mainReg.length * maxFillPerc / 100; autoResize = autoRes; } /** The method resizes the <code>mainReg</code> array if the content * volume limit was reached. * @return <b>true</b> if resize was performed */ public boolean resizeMainReg() { // If limit is not reached -> quit resize if (hashesCountLimit > hashesCount) return false; long start = System.currentTimeMillis(); Object[] newMainReg; try { newMainReg = new Object[mainReg.length*2]; } catch (Exception e) { KimLogs.logNERC_GAZETTEER.warn("Unsuccessful attempt to resize" + " [HashRegister] due to:\n" + e.getMessage()); return false; } synchronized (mainRegLock) { // Cycle through the old register and move the [HashElement] objects // to the new main-register for (int main_i=0; main_i < mainReg.length; main_i++) { int new_main_i = main_i + mainReg.length; if (mainReg[main_i] == null) { // If there was no sub-register: // Do nothing } else if (mainReg[main_i] instanceof HashElement) { // If there was just one [HashElement] for sub-register: // Determine it's new place newMainReg[((HashElement)mainReg[main_i]).index(newMainReg)] = mainReg[main_i]; } else { // If there were two or more [HashElement] as sub-register: // Try to split the sub-register in two parts. HashElement[] oldSubReg = (HashElement[]) mainReg[main_i]; // The comparator [HashElementComp] allows [HashElement] // instances to be sorted with respect to the value of their // next main-register index (after the expansion). Arrays.sort(oldSubReg, new HashElementComp(newMainReg)); // Scan where is the splitting point between the elements // that retain their old main-register index and those // that will move to a new main-register index int j1 = 0; while (j1 < oldSubReg.length && oldSubReg[j1].index(newMainReg) == main_i) j1++; // Handle the elements retaining the old main-register index if (j1 == 0) { } else if (j1 == 1) { newMainReg[main_i] = oldSubReg[0]; } else if (j1 < oldSubReg.length) { HashElement[] newSubReg = new HashElement[j1]; for (int k=0; k<j1; k++) newSubReg[k] = oldSubReg[k]; newMainReg[main_i] = newSubReg; } else { newMainReg[main_i] = oldSubReg; } // Handle the elements moving to the new main-register index int j2 = oldSubReg.length - j1; if (j2 == 0) { } else if (j2 == 1) { newMainReg[new_main_i] = oldSubReg[oldSubReg.length - 1]; } else if (j2 < oldSubReg.length) { HashElement[] newSubReg = new HashElement[j2]; for (int k=0; k<j2; k++) newSubReg[k] = oldSubReg[j1 + k]; newMainReg[new_main_i] = newSubReg; } else { newMainReg[new_main_i] = oldSubReg; } } // Release the old sub-register to allow for early GC. mainReg[main_i] = null; } // Release the old main-register to allow for GC. mainReg = newMainReg; hashesCountLimit = mainReg.length * maxFillPerc / 100; } long duration = System.currentTimeMillis() - start; KimLogs.logNERC_GAZETTEER.debug("Main Register resized to " + mainReg.length + " for " + duration + "ms."); return true; } /** Adds a new element to the hash register. * @param hashValue - the hash-code value related with the stored element * @param element - the stored element */ public void add(int hashValue, Serializable element) { elementsCount += getHashElement(hashValue, true).add(element); if (autoResize) resizeMainReg(); } /** Retrieves an array of stored elements corresponding to a hash-code * @param hashValue - the hash-code value * @return the related elements (<b>null</b> if none found) */ public Object[] get(int hashValue) { HashElement getPoint = getHashElement(hashValue, false); if (getPoint != null) return getPoint.getElements(); else return null; } /** Retrieves an element instance that is relevant to the given * hash-code and is equal to the given element. * @param hashValue - the hash-code value * @param element - the element for comparison * @return the corresponding stored element */ public Object get(int hashValue, Object element) { HashElement getPoint = getHashElement(hashValue, false); if ( getPoint != null ) return getPoint.get(element); return null; } /** Checks if there are any stored elements related to the given * hash-code value. * @param hashValue - the hash-code value * @return <b>true</b> if an <code>HashElement</code> instance is found * which has the same as given hash-code value. No check for existence * of stored elements is done, so the test is positive even is null element * with such hash-code has been added. */ public boolean exists(int hashValue) { return (!(getHashElement(hashValue, false) == null)); } /** Checks if there is a stored element related to the given * hash-code value and equal to the given element. * @param hashValue - the hash-code value * @param element - the element for comparison * @return <b>true</b> if matching element is found. */ public boolean exists(int hashValue, Object element) { HashElement getPoint = getHashElement(hashValue, false); if ( getPoint != null ) return getPoint.contains(element); return false; } /** Retrieves the number of stored elements. * @return the elements count */ public int getElementsCount() { return elementsCount; } /** This method searches a <code>HashElement</code> corresponding to the * given hash-code value. Depending on the flag <code>createIfMissing</code> * if no such hash element is found it can be created. * @param hashValue - the hash-code value * @param createIfMissing - flag to force creation if not found. * @return the found/created <code>HashElement</code> */ private HashElement getHashElement( int hashValue, boolean createIfMissing) { HashElement getPoint; synchronized (mainRegLock) { int main_i = mainIx(hashValue, mainReg); Object subReg = mainReg[main_i]; if (subReg == null) { if (createIfMissing) { getPoint = new HashElement(hashValue); mainReg[main_i] = getPoint; hashesCount++; } else getPoint = null; } else if (subReg instanceof HashElement) { getPoint = (HashElement) subReg; if (getPoint.subRegHash != hashValue) if (createIfMissing) { HashElement[] newSubReg = new HashElement[2]; newSubReg[0] = getPoint; getPoint = new HashElement(hashValue); newSubReg[1] = getPoint; mainReg[main_i] = newSubReg; hashesCount++; } else { getPoint = null; } } else { // subReg is instance of HashElement[] HashElement[] oldSubReg = (HashElement[]) subReg; getPoint = null; for (int i=0; i < oldSubReg.length; i++) { if (oldSubReg[i].subRegHash == hashValue) { getPoint = oldSubReg[i]; break; } } if (createIfMissing && getPoint == null) { HashElement[] newSubReg = new HashElement[oldSubReg.length+1]; for (int i=0; i<oldSubReg.length; i++) newSubReg[i] = oldSubReg[i]; getPoint = new HashElement(hashValue); newSubReg[newSubReg.length-1] = getPoint; mainReg[main_i] = newSubReg; hashesCount++; } } } return getPoint; } //====================================================== // An interface and a method for bulk operation over the // whole content of the hash-register by external logic //====================================================== /** This is a listener interface which provides means to perform bulk * operations over all of the stored elements in the register. This * interface must be used together with the method * <code>processContent</code>. */ public interface ContentProcessor { public void process(Object[] elements); } /** This method accepts an implementation of the * <code>ContentProcessor</code> interface. Then it cycles all * of the stored elements in the register and passes them to the * <code>process</code> method of the <code>ContentProcessor</code> * implementation. * @param cProc - <code>ContentProcessor</code> implementation; */ public void processContent(ContentProcessor cProc) { for (int main_i=0; main_i<mainReg.length; main_i++) { if (mainReg[main_i] == null) continue; if (mainReg[main_i] instanceof HashElement) cProc.process(((HashElement)mainReg[main_i]).getElements()); else { HashElement[] subReg = (HashElement[]) mainReg[main_i]; for (int i=0; i<subReg.length; i++) cProc.process(subReg[i].getElements()); } } } //================================================== // Content Inspection methods for profiling purposes //================================================== public void printContent(int mainRegLim, int subRegLim) { int repLen = Math.min(mainReg.length, mainRegLim); for (int i=0; i<mainReg.length; i++) { if ( mainReg[i] == null) ; else if (mainReg[i] instanceof HashElement) { System.out.println("<"+i+">"+getHEContent((HashElement) mainReg[i])); repLen--; } else { int subRepLen = Math.min(((HashElement[])mainReg[i]).length, subRegLim); System.out.print("<"+i+">"); for (int j=0; j<subRepLen; j++) System.out.print(getHEContent(((HashElement[])mainReg[i])[j])+","); System.out.println(); repLen--; } if (repLen <= 0) break; } } private String getHEContent(HashElement he) { String cont = he.subRegHash + "["; Object[] eList = he.getElements(); for (int i=0; i<eList.length; i++) cont += eList[i].toString() + "|"; cont += "]"; return cont; } public void printDistribution() { int[] bucketSize = new int[11]; for (int i=0; i<bucketSize.length; i++) bucketSize[i] = 0; for (int i=0; i<mainReg.length; i++) { if ( mainReg[i] == null) bucketSize[0]++; else if (mainReg[i] instanceof HashElement) bucketSize[1]++; else { int size = ((HashElement[])mainReg[i]).length; if (size >= bucketSize.length) size = bucketSize.length-1; bucketSize[size]++; } } System.out.println("Distribution of buckets by sizes for HashRegister with" + "MRSize="+mainReg.length+" Elements="+hashesCount); System.out.println("ForSize BucketCount"); for (int i=0; i<bucketSize.length; i++) System.out.println(i + " " + bucketSize[i]); } //================================================== // Serialization / Deserialization handling //================================================== private void readObject(java.io.ObjectInputStream in) throws IOException, ClassNotFoundException { in.defaultReadObject(); // Recreate the lock object of the instance mainRegLock = new Object(); } /** Method which calculates the storage index in given container. * The index is calculated based on a hash-code value. * @param hashValue - the hash-code value * @param container - the container where the HashElement will be stored * @return - the value of the index */ private static int mainIx(int hashValue, Object[] container) { return Math.abs(hashValue % container.length); } }