/* XXL: The eXtensible and fleXible Library for data processing Copyright (C) 2000-2011 Prof. Dr. Bernhard Seeger Head of the Database Research Group Department of Mathematics and Computer Science University of Marburg Germany This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 3 of the License, or (at your option) any later version. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with this library; If not, see <http://www.gnu.org/licenses/>. http://code.google.com/p/xxl/ */ package xxl.core.collections.sweepAreas; import static xxl.core.util.metaData.CostModelMetaDataIdentifiers.COST_MEASUREMENT; import java.util.Arrays; import java.util.HashMap; import java.util.Iterator; import java.util.LinkedList; import java.util.List; import xxl.core.cursors.AbstractCursor; import xxl.core.cursors.sources.EmptyCursor; import xxl.core.functions.AbstractFunction; import xxl.core.functions.Function; import xxl.core.functions.Functions; /** * A hash-based implementation of the interface * {@link SweepAreaImplementor}. The hash table * references {@link java.util.List lists} modelling * the buckets. * * @see SweepAreaImplementor * @see xxl.core.functions.Function * @see java.util.List */ public class HashSAImplementor<E> extends AbstractSAImplementor<E> { protected volatile boolean countHashFunctionCalls = false; protected volatile boolean countHashBucketOperations = false; public class HashSAImplementorMetaDataManagement extends AbstractSAImplementorMetaDataManagement { public final static String HASH_FUNCTION_CALLS = "HASH_FUNCTION_CALLS"; public final static String HASH_BUCKET_OPERATIONS = "HASH_BUCKET_OPERATIONS"; protected long[] hashFunctionCallCounter; protected long[] hashFunctionCalls; public final static String SINGLE_HASH_COSTS = "SINGLE_HASH_COSTS"; protected double singleHashCosts[]; public static final String HASH_COSTS = "HASH_COSTS"; protected volatile boolean measureHashCosts = false; protected double hashCosts = 0.0; protected long hashBucketOperationsCounter; protected long hashBucketOperations; public final static String SINGLE_OPERATION_COSTS = "SINGLE_OPERATION_COSTS"; protected double singleOperationCosts; public static final String OPERATIONS_COSTS = "OPERATIONS_COSTS"; protected volatile boolean measureOperationsCosts = false; protected double operationsCosts = 0.0; protected volatile boolean measureCosts = false; protected double costs = 0.0; @Override public void updatePeriodicMetaData(long period) { if (countHashFunctionCalls) { hashFunctionCalls = hashFunctionCallCounter; hashFunctionCallCounter = new long[hashFunctions.length]; } if (countHashBucketOperations) { hashBucketOperations = hashBucketOperationsCounter; hashBucketOperationsCounter = 0; } if (measureHashCosts) { hashCosts = 0.0; if (singleHashCosts!=null) for (int i=0; i<hashFunctions.length; i++) hashCosts += (hashFunctionCalls[i] * singleHashCosts[i]) / period; } if (measureOperationsCosts) { operationsCosts = (hashBucketOperations * singleOperationCosts) / period; } if (measureCosts) { costs = hashCosts + operationsCosts; } } public boolean needsPeriodicUpdate(Object metaDataIdentifier) { if (metaDataIdentifier.equals(HASH_FUNCTION_CALLS) || metaDataIdentifier.equals(HASH_BUCKET_OPERATIONS) || metaDataIdentifier.equals(HASH_COSTS) || metaDataIdentifier.equals(OPERATIONS_COSTS) || metaDataIdentifier.equals(COST_MEASUREMENT)) { return true; } return false; } @Override protected boolean addMetaData(Object metaDataIdentifier) { if (metaDataIdentifier.equals(HASH_FUNCTION_CALLS)) { countHashFunctionCalls = true; hashFunctionCallCounter = new long[hashFunctions.length]; metaData.add(metaDataIdentifier, new AbstractFunction<Object,long[]>() { @Override public long[] invoke() { return hashFunctionCalls; } }); return true; } if (metaDataIdentifier.equals(HASH_BUCKET_OPERATIONS)) { countHashBucketOperations = true; hashBucketOperationsCounter = 0; metaData.add(metaDataIdentifier, new AbstractFunction<Object,Long>() { @Override public Long invoke() { return hashBucketOperations; } }); return true; } if (metaDataIdentifier.equals(SINGLE_HASH_COSTS)) { metaData.add(metaDataIdentifier, new AbstractFunction<Object,double[]>() { @Override public double[] invoke() { return singleHashCosts; } }); return true; } if (metaDataIdentifier.equals(HASH_COSTS)) { include(HASH_FUNCTION_CALLS); measureHashCosts = true; hashCosts = 0.0; metaData.add(metaDataIdentifier, new AbstractFunction<Object,Double>() { @Override public Double invoke() { return hashCosts; } }); return true; } if (metaDataIdentifier.equals(SINGLE_OPERATION_COSTS)) { metaData.add(metaDataIdentifier, new AbstractFunction<Object,Double>() { @Override public Double invoke() { return singleOperationCosts; } }); return true; } if (metaDataIdentifier.equals(OPERATIONS_COSTS)) { include(HASH_BUCKET_OPERATIONS); measureOperationsCosts = true; operationsCosts = 0.0; metaData.add(metaDataIdentifier, new AbstractFunction<Object,Double>() { @Override public Double invoke() { return operationsCosts; } }); return true; } if (metaDataIdentifier.equals(COST_MEASUREMENT)) { include(HASH_COSTS); include(OPERATIONS_COSTS); costs = 0.0; measureCosts = true; metaData.add(metaDataIdentifier, new AbstractFunction<Object,Double>() { @Override public Double invoke() { return costs; } }); return true; } if (super.addMetaData(metaDataIdentifier)) return true; return false; } public void setSingleHashCosts(double [] costs) { singleHashCosts = costs; } public void setSingleOperationCosts(double costs) { singleOperationCosts = costs; } public void setCostFactors(double [] hashCosts, double operationCosts) { setSingleHashCosts(hashCosts); setSingleOperationCosts(operationCosts); } @Override protected boolean removeMetaData(Object metaDataIdentifier) { if (metaDataIdentifier.equals(HASH_FUNCTION_CALLS)) { countHashFunctionCalls = false; hashFunctionCalls = null; hashFunctionCallCounter = null; metaData.remove(metaDataIdentifier); return true; } if (metaDataIdentifier.equals(HASH_BUCKET_OPERATIONS)) { countHashBucketOperations = false; hashBucketOperations = 0; metaData.remove(metaDataIdentifier); return true; } if (metaDataIdentifier.equals(HASH_COSTS)) { exclude(HASH_FUNCTION_CALLS); measureHashCosts = false; hashCosts = 0.0; metaData.remove(metaDataIdentifier); return true; } if (metaDataIdentifier.equals(OPERATIONS_COSTS)) { exclude(HASH_BUCKET_OPERATIONS); measureOperationsCosts = false; operationsCosts = 0.0; metaData.remove(metaDataIdentifier); return true; } if (metaDataIdentifier.equals(COST_MEASUREMENT)) { exclude(HASH_COSTS); exclude(OPERATIONS_COSTS); costs = 0.0; measureCosts = false; metaData.remove(metaDataIdentifier); return true; } if (metaDataIdentifier.equals(SINGLE_HASH_COSTS) || metaDataIdentifier.equals(SINGLE_OPERATION_COSTS)) { metaData.remove(metaDataIdentifier); return true; } return false; } } /** * The hash table. */ protected HashMap<Integer, List<E>> hashTable; /** * An array of hash functions offering insertion, * retrieval and reorganization depending on the * ID passed to the method calls. Each hash function * is a map from Object -> Integer. */ protected Function<? super E,Integer>[] hashFunctions; /** * A parameterless function that delivers * an empty {@link java.util.List List} representing * a new bucket. */ protected Function<?,List<E>> newList; protected int size; /** * Constructs a new HashListSAImplementor. * * @param hashFunctions The array of hash functions. Each is a * map from Object -> Integer. * @param newList A parameterless function that returns a new * list at each invocation. These lists represent the * buckets of the hash table. * */ public HashSAImplementor(Function<? super E,Integer>[] hashFunctions, Function<?,List<E>> newList) { this.hashFunctions = hashFunctions; this.hashTable = new HashMap<Integer, List<E>>(); this.newList = newList; this.size = 0; } /** * Constructs a new HashListSAImplementor. * * @param hashFunctions The array of hash functions. Each is a * map from Object -> Integer. */ public HashSAImplementor(Function<? super E,Integer>[] hashFunctions) { this.hashFunctions = hashFunctions; this.hashTable = new HashMap<Integer, List<E>>(); this.newList = new AbstractFunction<Object,List<E>>() { public List<E> invoke() { return new LinkedList<E>(); } }; this.size = 0; } /** * Constructs a new HashListSAImplementor which uses * the specified hash function independently from the ID * passed to query, expire and reorganize calls. * * @param hashFunction The hash function, which is a * map from Object -> Integer. * @param newList A parameterless function that returns a new * list at each invocation. These lists represent the * buckets of the hash table. * @param dim The number of possible inputs or in other words, * the number of different IDs that can be passed to * method calls of this implementor. */ public HashSAImplementor(Function<? super E,Integer> hashFunction, Function<?,List<E>> newList, int dim) { this.hashTable = new HashMap<Integer, List<E>>(); this.newList = newList; this.hashFunctions = new Function[dim]; Arrays.fill(this.hashFunctions, hashFunction); this.size = 0; } /** * Constructs a new HashListSAImplementor which uses * the specified hash function independently from the ID * passed to query, expire and reorganize calls. * The function creating the buckets delivers * instances of the class {@link java.util.LinkedList LinkedList}. * * @param hashFunction The hash function, which is a * map from Object -> Integer. * @param dim The number of possible inputs or in other words, * the number of different IDs that can be passed to * method calls of this implementor. */ public HashSAImplementor(Function<? super E,Integer> hashFunction, int dim) { this(hashFunction, new AbstractFunction<Object,List<E>>() { public List<E> invoke() { return new LinkedList<E>(); } }, dim ); this.size = 0; } /** * Constructs a new HashListSAImplementor which uses * the method {@link java.lang.Object#hashCode()} to determine * the hash value of an object. The function creating the buckets delivers * instances of the class {@link java.util.LinkedList LinkedList}. * * @param dim The number of possible inputs or in other words, * the number of different IDs that can be passed to * method calls of this implementor. */ public HashSAImplementor(int dim) { this(Functions.hash(), dim); } /** * Inserts the given element into the corresponding * bucket of the hash table. The bucket number is * determined by <code>((Integer)hashFunctions[ID].invoke(o)).intValue()</code>. * If the hash table does not contains a bucket with this * number, a new bucket is created by invoking * the function <code>newList</code>. * * @param o The object to be inserted. * @throws IllegalArgumentException Throws an IllegalArgumentException * if something goes wrong with the insertion due to the passed argument. */ @Override public void insert(E o) throws IllegalArgumentException { int bucketNo = hashFunctions[ID].invoke(o); if (countHashFunctionCalls) { ((HashSAImplementorMetaDataManagement)metaDataManagement).hashFunctionCallCounter[ID]++; } List<E> list; if (hashTable.containsKey(bucketNo)) list = hashTable.get(bucketNo); else { list = newList.invoke(); hashTable.put(bucketNo, list); if (countHashBucketOperations) ((HashSAImplementorMetaDataManagement)metaDataManagement).hashBucketOperationsCounter++; } list.add(o); size++; } /** * Removes the specified element from the hash table. * Tries to access the corresponding bucket and to remove * the element <code>o</code>. * * @param o The object to be removed. * @return <tt>True</tt> if the removal has been successful, otherwise <tt>false</tt>. * @throws IllegalArgumentException Throws an IllegalArgumentException * if something goes wrong with the removal due to the passed argument. */ @Override public boolean remove(E o) throws IllegalArgumentException { int bucketNo = hashFunctions[ID].invoke(o); if (countHashFunctionCalls) { ((HashSAImplementorMetaDataManagement)metaDataManagement).hashFunctionCallCounter[ID]++; } if (hashTable.containsKey(bucketNo)) { Iterator<E> it = hashTable.get(bucketNo).iterator(); while (it.hasNext()) if (equals.invoke(o, it.next())) { it.remove(); if (countHashBucketOperations) ((HashSAImplementorMetaDataManagement)metaDataManagement).hashBucketOperationsCounter++; size--; return true; } } return false; } /** * Checks if element <tt>o1</tt> is contained and * if <tt>true</tt> updates it with </tt>o2</tt>. * * @param o1 The object to be replaced. * @param o2 The new object. * @return The updated object is returned. * @throws IllegalArgumentException Throws an IllegalArgumentException * if something goes wrong with the update operation due to the passed arguments. * @throws UnsupportedOperationException Throws an UnsupportedOperationException * if this method is not supported. */ @Override public E update(E o1, E o2) throws IllegalArgumentException { int hashValue1 = hashFunctions[ID].invoke(o1); if (hashValue1 != hashFunctions[ID].invoke(o2)) throw new IllegalArgumentException("Incompatible hash values!"); if (countHashFunctionCalls) { ((HashSAImplementorMetaDataManagement)metaDataManagement).hashFunctionCallCounter[ID]+=2; } if (hashTable.containsKey(hashValue1)) { List<E> list = hashTable.get(hashValue1); for (int i = 0, j = list.size(); i < j; i++) { if (equals.invoke(o1, list.get(i))) { if (countHashBucketOperations) ((HashSAImplementorMetaDataManagement)metaDataManagement).hashBucketOperationsCounter++; return list.set(i, o2); } } } throw new IllegalArgumentException("Object o1 is not contained."); } /** * Clears this implementor by clearing all buckets * as well as the hash table. */ @Override public void clear() { for (List<E> bucket : hashTable.values()) bucket.clear(); hashTable.clear(); size = 0; } /** * Closes this implementor. In this case, * only {@link #clear()} is executed. */ @Override public void close() { clear(); size = 0; } /** * Returns the size of this implementor which * corresponds to the sum of the bucket sizes. * * @return The size. */ @Override public int size() { return size; } /** * Returns an iterator over the elements of this * implementor. Consequently, this iterator is * a concatenation of the bucket iterators. * * @return An iterator over the elements of this HashListSAImplementor. * @throws UnsupportedOperationException If this operation is not supported. */ @Override public Iterator<E> iterator() { // return new Sequentializer<E>( // new Mapper<List<E>, Iterator<E>>( // new AbstractFunction<List<E>, Iterator<E>>() { // public Iterator<E> invoke(List<E> bucket) { // return bucket.iterator(); // } // }, // hashTable.values().iterator() // ) // ); return new AbstractCursor<E>() { protected Iterator<E> it = null; protected Iterator<List<E>> buckets = hashTable.values().iterator(); @Override public boolean hasNextObject() { if (it != null && it.hasNext()) return true; if (it != null && !it.hasNext()) it = null; while (it == null && buckets.hasNext()) { it = buckets.next().iterator(); if (it.hasNext()) return true; else it = null; } return false; } @Override public E nextObject() { return it.next(); } }; } /** * Queries this implementor with the help of the * specified query object <code>o</code> and the query-predicates * set during initialization, see method * {@link #initialize(int, xxl.core.predicates.Predicate[])}. * At first, the corresponding bucket for retrieval * is determined by applying the hash function on <code>o</code>. * Then this bucket is filtered for matching elements which * are returned as a cursor. <br> * <i>Note:</i> * This iterator should not be used to remove any elements from this * implementor SweepArea! * * @param o The query object. This object is typically probed against * the elements contained in this implementor. * @param ID An ID determining from which input this method * is triggered. * @return All matching elements of this implementor are returned as an iterator. * @throws IllegalArgumentException Throws an IllegalArgumentException * if something goes wrong due to the passed arguments during retrieval. * @see #filter(Iterator, Object, int) */ @Override public Iterator<E> query(E o, int ID) throws IllegalArgumentException { if (size == 0) return new EmptyCursor<E>(); int bucketNo = hashFunctions[ID].invoke(o); if (countHashFunctionCalls) { ((HashSAImplementorMetaDataManagement)metaDataManagement).hashFunctionCallCounter[ID]++; } return hashTable.containsKey(bucketNo) ? filter(hashTable.get(bucketNo).iterator(), o, ID) : new EmptyCursor<E>(); } @Override public Iterator<E> query(E[] os, int [] IDs, int valid) throws IllegalArgumentException { if (size == 0) return new EmptyCursor<E>(); int bucketNo = hashFunctions[ID].invoke(os[0]); for (int i=1; i<valid; i++) if (hashFunctions[ID].invoke(os[i])!=bucketNo) throw new IllegalArgumentException("Query elements are hashed to different buckets"); if (countHashFunctionCalls) { ((HashSAImplementorMetaDataManagement)metaDataManagement).hashFunctionCallCounter[ID]+=valid; } return hashTable.containsKey(bucketNo) ? filter(hashTable.get(bucketNo).iterator(), os, IDs, valid) : new EmptyCursor<E>(); } @Override public void createMetaDataManagement() { if (metaDataManagement != null) throw new IllegalStateException("An instance of MetaDataManagement already exists."); metaDataManagement = new HashSAImplementorMetaDataManagement(); } }