SimilarityJoinDemo.java example

Explorer
xxl-master
/* XXL: The eXtensible and fleXible Library for data processing

Copyright (C) 2000-2011 Prof. Dr. Bernhard Seeger
                        Head of the Database Research Group
                        Department of Mathematics and Computer Science
                        University of Marburg
                        Germany

This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 3 of the License, or (at your option) any later version.

This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.

You should have received a copy of the GNU Lesser General Public
License along with this library;  If not, see <http://www.gnu.org/licenses/>. 

    http://code.google.com/p/xxl/

*/

package xxl.core.relational;

import java.io.File;
import java.io.IOException;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.PreparedStatement;
import java.sql.SQLException;
import java.sql.Statement;
import java.util.Iterator;

import xxl.core.collections.queues.ListQueue;
import xxl.core.collections.queues.Queue;
import xxl.core.collections.queues.io.RandomAccessFileQueue;
import xxl.core.cursors.MetaDataCursor;
import xxl.core.functions.AbstractFunction;
import xxl.core.functions.Function;
import xxl.core.io.IOCounter;
import xxl.core.io.converters.ConvertableConverter;
import xxl.core.io.converters.Converters;
import xxl.core.predicates.FeaturePredicate;
import xxl.core.predicates.Predicate;
import xxl.core.relational.cursors.MergeSorter;
import xxl.core.relational.cursors.Orenstein;
import xxl.core.relational.cursors.ResultSetMetaDataCursor;
import xxl.core.relational.cursors.SortMergeJoin.Type;
import xxl.core.relational.resultSets.MetaDataCursorResultSet;
import xxl.core.relational.resultSets.VirtualTable;
import xxl.core.relational.tuples.ArrayTuple;
import xxl.core.relational.tuples.Tuple;
import xxl.core.relational.tuples.TupleConverter;
import xxl.core.relational.tuples.Tuples;
import xxl.core.spatial.KPEzCode;
import xxl.core.spatial.cursors.PointInputCursor;
import xxl.core.spatial.points.FloatPoint;
import xxl.core.spatial.predicates.DistanceWithinMaximum;
import xxl.core.util.BitSet;
import xxl.core.util.WrappingRuntimeException;
import xxl.core.util.metaData.CompositeMetaData;

/**
 * This class demonstrates the usage of the library XXL with a special
 * focus on the integration of similarity join algorithms into
 * the database management system <b>Cloudscape</b>.<br>
 * <tt>Cloudscape</tt> offers the possibility of accessing so called
 * virtual tables by providing a special interface, named VTI
 * (Virtual Table Interface). <br>
 * XXL provides with its package {@link xxl.core.relational} wrapper classes
 * which are able to wrap a {@link java.sql.ResultSet} to a
 * {@link xxl.core.cursors.MetaDataCursor} and vice versa. The class
 * {@link xxl.core.relational.resultSets.VirtualTable} wraps a ResultSet and
 * therefore it can be used in SQL queries like a usual table.
 * <p>
 * The main method represents a minimal JDBC application showing
 * JDBC access to Cloudscape.
 * It provides methods to get a connection to Cloudscape,
 * to execute querys, to close the open connection and
 * to shutdown Cloudscape. <br>
 * Cloudscape applications can be run in three different modes:
 * "Cloudscape applications can run against Cloudscape running in an embedded
 * or a client/server framework. When Cloudscape runs in an embedded framework,
 * the Cloudscape application and Cloudscape run in the same JVM. The application
 * starts up the Cloudscape engine. When Cloudscape runs in a client/server framework,
 * the application runs in a different JVM from Cloudscape. The application only needs
 * to start the client driver, and the connectivity framework provides network connections.
 * (The server must already be running.)"
 *
 * <p>When you run this application, give one of the following arguments:
 * <ul>
 * <li> embedded (default, if none specified)</li>
 * <li> rmijdbcclient (if Cloudscape is running embedded in the RmiJdbc Server framework)</li>
 * <li> sysconnectclient (if Cloudscape is running embedded in the Cloudconnector framework)</li>
 * </ul>
 * <p>
 * A complete similartiy join is performed, i.e. point data can be inserted
 * into database tables, it is retrieved using ResultSetMetaDataCursors, the
 * contained elements are mapped (Tuple --> FloatPoint) and sorted, after that a similarity join
 * based on Jack Orenstein's algorithm is executed and the elements
 * are mapped back (FloatPoint --> Tuple). <br>
 * The resulting elements are inserted into the table 'JoinResults'. With can
 * be viewed with e.g. with <tt>Cloudview</tt> if the argument <tt>clean</tt>
 * has not been specified. An other feature of this use case is given by
 * the possibility of using only a fraction of all elements stored
 * in the database table. Therefore a {@link xxl.core.relational.cursors.Sampler} wraps
 * the ResultSetMetaDataCursors delivering the input tuples. <br>
 * The whole join process can be steered by the arguments given to
 * the main method when calling this application. <br>
 * Note, all arguments have been set to default values for an easier usage.
 *
 * <p>
 * The following arguments can be passed to the main method: <br><br>
 *
 * <tt>rmijdbcclient</tt> - particular Cloudscape mode <br>
 * <tt>sysconnectclient</tt> - particular Cloudscape mode <br>
 * <tt>input</tt> - insert data into Cloudscape tables <br>
 * <tt>createDatabase</tt> - a new database will be created <br>
 * <tt>clean</tt> - used database will be cleaned (removal of all tables) <br>
 * <tt>external</tt> - external file queue will be placed on the location given by the next argument<br>
 * <tt>DBName</tt> - name of the database will be set to the next given argument <br>
 * <tt>DBLocation</tt> - path to the database will be set to the next given argument <br>
 * <tt>file1</tt> - location of first input file will be set to the next given argument <br>
 * <tt>file2</tt> - location of second input file will be set to the next given argument <br>
 * <tt>dim</tt> - dimension of the float points will be set to the next given argument <br>
 * <tt>mem</tt> - memory size in bytes will be set to the next given argument <br>
 * <tt>initialCapacity</tt> - initial capacity of the sweep line areas will be set to the next given argument <br>
 * <tt>p</tt> - fraction of elements to be used from the input will be set to the next given argument <br>
 * <tt>seed</tt> - seed used for generating a sample of the input will be set to the next given argument <br>
 * <tt>epsilon</tt> - epsilon distance will be set to the next given argument <br>
 * <tt>maxLevel</tt> - maximum level to be considered will be set to the next given argument <br>
 * <p>
 * Example usage: <br>
 * <pre>
 * java xxl.applications.relational.SimilarityJoinDemo input file1 C:\\st.ll.bin file2 C:\\rr.ll.bin createDatabase clean external
 * </pre>
 * The resulting output shows the number of join results and the runtime in
 * seconds, the number of element comparisons and,
 * if the sorting algorithm has been set to 'external',
 * the number of performed read/write operations.
 *
 *
 * @see java.sql.DriverManager
 * @see java.sql.Connection
 * @see java.sql.Statement
 * @see java.sql.PreparedStatement
 * @see java.sql.ResultSet
 * @see java.sql.ResultSetMetaData
 * @see java.sql.SQLException
 * @see java.io.File
 * @see xxl.core.comparators.ComparableComparator
 * @see xxl.core.collections.queues.ListQueue
 * @see xxl.core.cursors.wrappers.IteratorCursor
 * @see xxl.core.cursors.MetaDataCursor
 * @see xxl.core.functions.Function
 * @see xxl.core.io.IOCounter
 * @see xxl.core.collections.queues.io.RandomAccessFileQueue
 * @see xxl.core.predicates.MetaDataPredicate
 * @see xxl.core.predicates.Predicate
 * @see xxl.core.relational.tuples.ArrayTuple
 * @see xxl.core.relational.JoinUtils
 * @see xxl.core.relational.cursors.Mapper
 * @see xxl.core.relational.cursors.MergeSorter
 * @see xxl.core.relational.resultSets.MetaDataCursorResultSet
 * @see xxl.core.relational.cursors.Orenstein
 * @see xxl.core.relational.cursors.ResultSetMetaDataCursor
 * @see xxl.core.cursors.filters.Sampler
 * @see xxl.core.relational.resultSets.VirtualTable
 * @see xxl.core.spatial.predicates.DistanceWithinMaximum
 * @see xxl.core.predicates.FeaturePredicate
 * @see xxl.core.spatial.points.FloatPoint
 * @see xxl.core.spatial.KPEzCode
 * @see xxl.core.util.BitSet
 * @see xxl.core.util.WrappingRuntimeException
 *
 */
public class SimilarityJoinDemo {

	/**
	 * Cloudscape application is run in this mode.
	 * default value: "embedded"
	 */
	protected static String framework = "embedded";

	/**
	 * Cloudscape application uses this driver.
	 * default value: "COM.cloudscape.core.JDBCDriver"
	 */
	protected static String driver = "COM.cloudscape.core.JDBCDriver";

	/**
	 * Cloudscape application uses this protocol to communicate.
	 * default value: "jdbc:cloudscape:"
	 */
	protected static String protocol = "jdbc:cloudscape:";

	/**
	 * A flag determining if new data should be inserted.
	 * default value: false
	 */
	protected static boolean input = false;

	/**
	 * A flag determining if the data needed for this application
	 * should be removed.
	 * default value: false
	 */
	protected static boolean drop = false;

	/**
	 * The location of the data base.
	 * default value: "D:\\Datenbanken\\"
	 */
	public static String databaseLocation = "D:\\Datenbanken\\";

	/**
	 * The name of the data base.
	 * default value: "TigerData"
	 */
	public static String databaseName = "TigerData";

	/**
	 * A flag determining if a new database should be created.
	 * default value: false
	 */
	public static boolean createDataBase = false;

	/**
	 * Location of spatial data; points in unit-cube [0, 1)^2
	 * default value: "D:\\user\\kraemerj\\uni\\lokal\\paper\\st.ll.bin"
	 */
	public static String file1 = "D:\\user\\kraemerj\\uni\\lokal\\paper\\st.ll.bin";

	/**
	 * Location of spatial data; points in unit-cube [0, 1)^2
	 * default value: "D:\\user\\kraemerj\\uni\\lokal\\paper\\rr.ll.bin"
	 */
	public static String file2 = "D:\\user\\kraemerj\\uni\\lokal\\paper\\rr.ll.bin";

	/**
	 * Dimension of data.
	 * default value: 2
	 */
	public static int dim = 2;

	/**
	 * Main memory available (in bytes).
	 * default value: 1000000
	 */
	public static int mem = 1000000;

	/**
	 * The initial capacity of the sweep areas.
	 * default value: 30000
	 */
	public static int initialCapacity = 30000;

	/**
	 * Fraction of elements to be used from the input.
	 * default value: 0.01
	 */
	public static double p = 0.01;

	/**
	 * The seed to be used for the Sampler.
	 * Note: same seed as in NestedLoopsJoin use-case in xxl.core.spatial!!
	 * default value: 42
	 */
	public static long seed = 42;

	/**
	 * Epsilon-distance.
	 * default value: 0.01
	 */
	public static float epsilon = 0.01f;

	/**
	 * Maximum level of the partitioning.
	 * default value: 12
	 */
	public static int maxLevel = 12;

	/** A factory method generating the desired tuples contained in the cursors. */
	public static Function<Object, ? extends Tuple> createTuple = ArrayTuple.FACTORY_METHOD;

	/** The join type. */
	public static Type type = Type.THETA_JOIN;

	/**
	 * Use a temporal external file queue.
	 * default value: false
	 */
	public static boolean external = false;

	/**
	 * The path of the temporal external file queue.
	 * default value: ""
	 */
	public static String tmpPath = "";

	/** The start time of the algorithm. */
	protected static long start;

	/** Counter for result tuples. */
	protected static int res = 0;

	/** An IO-counter. */
	protected static final IOCounter counter = new IOCounter();

	/**
	 * Method to determine the mode a
	 * cloudscape application should be run in. <br>
	 * Futhermore arguments for the external sorting algorithm,
	 * the similarity join algorithm and the mapping functions
	 * can be specified.
	 *
	 * @param args the arguments given by a call to main.
	 */
	private static void determineMode(String[] args) {
		int length = args.length;
		try {
			for (int index = 0; index < length; index++) {
				if (args[index].equalsIgnoreCase("rmijdbcclient")) {
					framework = "rmijdbc";
					driver = "COM.cloudscape.core.RmiJdbcDriver";
					protocol = "jdbc:cloudscape:rmi:";
				}
				if (args[index].equalsIgnoreCase("sysconnectclient")) {
					framework = "sysconnect";
					driver = "COM.cloudscape.core.WebLogicDriver";
					protocol = "jdbc:cloudscape:weblogic:";
				}
				if (args[index].equalsIgnoreCase("input"))
					input = true;
				if (args[index].equalsIgnoreCase("createDatabase"))
					createDataBase = true;
				if (args[index].equalsIgnoreCase("clean"))
					drop = true;
				if (args[index].equalsIgnoreCase("external")) {
					external = true;
					tmpPath = args[index+1];
				}
				if (args[index].equalsIgnoreCase("DBName")) {
					databaseName = args[index+1];
				}
				if (args[index].equalsIgnoreCase("DBLocation")) {
					databaseLocation = args[index+1];
				}
				if (args[index].equalsIgnoreCase("file1")) {
					file1 = args[index+1];
				}
				if (args[index].equalsIgnoreCase("file2")) {
					file2 = args[index+1];
				}
				if (args[index].equalsIgnoreCase("dim")) {
					dim = Integer.parseInt(args[index+1]);
				}
				if (args[index].equalsIgnoreCase("mem")) {
					mem = Integer.parseInt(args[index+1]);
				}
				if (args[index].equalsIgnoreCase("initialCapacity")) {
					initialCapacity = Integer.parseInt(args[index+1]);
				}
				if (args[index].equalsIgnoreCase("p")) {
					p = Double.parseDouble(args[index+1]);
				}
				if (args[index].equalsIgnoreCase("seed")) {
					seed = Long.parseLong(args[index+1]);
				}
				if (args[index].equalsIgnoreCase("epsilon")) {
					epsilon = Float.parseFloat(args[index+1]);
				}
				if (args[index].equalsIgnoreCase("maxLevel")) {
					maxLevel = Integer.parseInt(args[index+1]);
				}
			}
		}
		catch (ArrayIndexOutOfBoundsException ae) {
			System.err.println("Wrong argument usage. Please specify all needed parameters.");
			ae.printStackTrace(System.err);
		}
	}

	/**
	 * Returns a JDBC-connection to a Cloudscape database.
	 * If the static class attribute <tt>createDatabase</tt> is <tt>true</tt>
	 * a new database with <tt>dataBaseName</tt>
	 * is created. Otherwise an existing database is used.
	 *
	 * @param databaseLocation the location of the database (path).
	 * @param dataBaseName the name of the database.
	 * @param autoCommit Sets the autoCommit value of the returned connection
	 * 		to this value.
	 * @return a JDBC-connection to the specified database.
	 */
	protected static Connection getConnection(String databaseLocation, String dataBaseName, boolean autoCommit) {
		try {
			Class.forName(driver).newInstance();
			System.out.println("Loaded the appropriate driver.");
			Connection conn = createDataBase ?
				DriverManager.getConnection(protocol + databaseLocation + dataBaseName +";create=true") :
				DriverManager.getConnection(protocol + databaseLocation + dataBaseName+";create=false");
			System.out.println("Connected to database.");
			conn.setAutoCommit(autoCommit);
			System.out.println("Autocommit activated: " +autoCommit);
			return conn;
		}
		catch (Exception e) {
			throw new WrappingRuntimeException(e);
		}
	}

	/**
	 * Closes the given connection after committing the transaction
	 * of this connection.
	 *
	 * @param conn the connection to be closed.
	 * @see #getConnection(String, String, boolean)
	 */
	protected static void closeConnection(Connection conn) {
		try {
			conn.commit();
			conn.close();
			System.out.println("Committed transaction and closed connection.");
		}
		catch (Exception e) {
			throw new WrappingRuntimeException(e);
		}
	}

	/**
	 * "In embedded mode, an application should shut down Cloudscape.
	 * If the application fails to shut down Cloudscape explicitly,
	 * the Cloudscape does not perform a checkpoint when the JVM shuts down,
	 * which means that the next connection will be slower.
	 * Explicitly shutting down Cloudscape with the URL is preferred."
	 * <p>
	 * This style of shutdown will always throw an "exception".
	 */
	protected static void shutDownCloudscape() {
		boolean gotSQLExc = false;
		if (framework.equals("embedded")) {
			try {
				DriverManager.getConnection("jdbc:cloudscape:;shutdown=true");
			}
			catch (SQLException se) {
				gotSQLExc = true;
			}
			if (!gotSQLExc)
				System.out.println("Database did not shut down normally.");
			else
				System.out.println("Database shut down normally.");
		}
	}

	/**
	 * Creating all tables storing needed for the similarity join, i.e.
	 * two tables storing two dimensional points and one table that
	 * will contain the join results at the end of this application.
	 *
	 * @param s Statement used for the execution of the SQL statements.
	 * @throws java.sql.SQLException if the query cannot be fulfilled correctly.
	 */
	public static void createTables(Statement s) throws SQLException {
		// Creating tables for spatial data
		String type = " DOUBLE PRECISION";
		String attributes1 = new String();
		for (int i = 0; i < dim-1; i++)
			attributes1 += "x"+dim+type+",";
		attributes1 += "x"+(dim-1)+type;
		s.execute("CREATE TABLE Spatial1 ("+attributes1+")");
		System.out.println("Created table Spatial1.");

		String attributes2 = new String();
		for (int i = 0; i < dim-1; i++)
			attributes2 += "y"+dim+type+",";
		attributes2 += "y"+(dim-1)+type;
		s.execute("CREATE TABLE Spatial2 ("+attributes2+")");
		System.out.println("Created table Spatial2.");

		s.execute("CREATE TABLE JoinResults ("+attributes1+", "+attributes2+")");
		System.out.println("Created table JoinResults.");
	}

	/**
	 * Dropping all tables needed for the test.
	 *
	 * @param s Statement used for the execution of the SQL statements.
	 * @throws java.sql.SQLException if the query cannot be fulfilled correctly.
	 */
	public static void dropTables(Statement s) throws SQLException {
		s.execute("DROP TABLE Spatial1");
		System.out.println("Dropped table Spatial1.");
		s.execute("DROP TABLE Spatial2");
		System.out.println("Dropped table Spatial2.");
		s.execute("DROP TABLE JoinResults");
		System.out.println("Dropped table JoinResults.");
	}

	/**
	 * Inserts the two dimensional FloatPoints contained in the given file
	 * into the defined table using the specified connection.
	 *
	 * @param file The file containing the FloatPoints.
	 * @param bufferSize The buffer size used for the FloatPointInputIterator.
	 * @param conn The connection the insertion should be fulfilled with.
	 * @param tableName The name of the table the FloatPoints should be inserted in.
	 * @throws java.sql.SQLException if the query cannot be fulfilled correctly.
	 */
	public static void insertPoints(File file, int bufferSize, Connection conn, String tableName) throws SQLException {
		System.out.println("Inserting data into table: "+tableName);
		String prefix = tableName.equalsIgnoreCase("Spatial1") ? "x" : "y";
		Iterator<?> it = new PointInputCursor(file, PointInputCursor.FLOAT_POINT, dim, bufferSize);
		String sql = "INSERT INTO " +tableName+"(";
		String attributes = new String();
		for (int i = 0; i < dim-1; i++)
			attributes += prefix+dim+",";
		attributes += prefix+(dim-1);
		sql += attributes+") values (";
		for (int i = 0; i < dim-1; i++)
			sql += "?, ";
		sql += "?)";

		PreparedStatement insert = conn.prepareStatement(sql);
		while(it.hasNext()) {
			FloatPoint point = (FloatPoint)it.next();
			for (int i = 0; i < dim; i++)
				insert.setDouble(i+1, point.getValue(i));
			insert.executeUpdate();
		}
		insert.close();
	}

	/**
	 * Creating a {@link xxl.core.relational.cursors.ResultSetMetaDataCursor}
	 * based on the specified table.
	 *
	 * @param s Statement used for the execution of the SQL statements.
	 * @param createTuple factory method used to create the tuples in the ResultSetMetaDataCursor.
	 * @param tableName The name of the table the FloatPoints are contained in.
	 * @return the ResultSetMetaDataCursor.
	 * @throws java.sql.SQLException if the query cannot be fulfilled correctly.
	 */
	public static ResultSetMetaDataCursor initializeInput(Statement s, Function<Object, ? extends Tuple> createTuple, String tableName) throws SQLException {
		return new ResultSetMetaDataCursor(s.executeQuery("SELECT * FROM "+tableName), createTuple);
	}

	/**
	 * The spatial join algorithm based on space-filling curves proposed by Jack Orenstein.
	 * See: [Ore 91] Jack A. Orenstein: An Algorithm for Computing the Overlay of k-Dimensional Spaces. SSD 1991:
	 * 381-400 for a detailed explanation. See: [DS 01]: Jens-Peter Dittrich, Bernhard Seeger: GESS: a Scalable Similarity-Join Algorithm for Mining Large Data Sets in High Dimensional
	 * Spaces. ACM SIGKDD-2001. for a review on Orensteins algorithm.
	 * <p>
	 * Orensteins algorithm is based on a binary recursive partitioning, where the binary code
	 * represents the so-called Z-ordering (z-codes).
	 * <p>
	 * Information concerning the implementation of {@link xxl.core.spatial.cursors.Orenstein}: <br>
	 * Orensteins algorithm (ORE) assigns each hypercube of the input relations to disjoint
	 * subspaces of the recursive partitioning whose union entirely
	 * covers the hypercube. ORE sorts the two sets of
	 * hypercubes derived from the input relations (including the
	 * possible replicates) w.r.t. the lexicographical ordering of its
	 * binary code. After that, the relations are merged using two
	 * main-memory stacks Stack_R and Stack_S. It is guaranteed that for two adjacent
	 * hypercubes in the stack, the prefix property is satisfied for
	 * their associated codes. Only those hypercubes are joined
	 * that have the same prefix code.
	 * <p>
	 * A deficiency of ORE is that the different assignment strategies
	 * examined in [Ore91] cause substantial replication rates. This
	 * results in an increase of the problem space and hence, sorting
	 * will be very expensive. Furthermore, ORE has not addressed the
	 * problem of eliminating duplicates in the result set.
	 * <p>
	 * Note that the method <code>reorganize(final Object
	 * currentStatus)</code> could actually be implemented with only 1 LOC. For efficiency
	 * reasons we use a somewhat longer version of the method here.
	 * <p>
	 *
	 * @param input0 the first input cursor.
	 * @param input1 the second input cursor.
	 * @param mem main memory available (in bytes).
	 * @param initialCapacity the initial capacity of the sweep areas.
	 * @param p fraction of elements to be used from the input.
	 * @param seed the seed to be used for the Sampler.
	 * @param epsilon epsilon-distance.
	 * @param maxLevel maximum level of the partitioning.
	 * @param createTuple a factory method generating the desired tuples contained in the cursors.
	 * @param type the join type.
	 * @throws IllegalAccessException failed to determine the object size.
	 *
	 * @return the created MetaDataCursor.
	 * @see xxl.core.spatial.cursors.Orenstein
	 * @see xxl.core.relational.cursors.Orenstein
	 */
	public static MetaDataCursor<Tuple, CompositeMetaData<Object, Object>> performOrenstein(
		final ResultSetMetaDataCursor input0,
		final ResultSetMetaDataCursor input1,
		final int mem,
		final int initialCapacity,
		final double p,
		final long seed,
		final float epsilon,
		final int maxLevel,
		Function<Object, ? extends Tuple> createTuple,
		final Type type
	) throws IllegalAccessException {

		if(external)
			System.out.print("EXTERNAL_ALG\t"+tmpPath);

		// determining the object size
		final int objectSize = xxl.core.util.XXLSystem.getObjectSize(new KPEzCode(new FloatPoint(dim), new BitSet(32)));

		// function delivering a ListQueue in main memory
		// or a RandomAccessFileQueue on external memory
		// depending on the static class attribute 'external'
		final Function<Function<?, Integer>, Queue<Tuple>> newQueue = new AbstractFunction<Function<?, Integer>, Queue<Tuple>>() {
			public Queue<Tuple> invoke(Function<?, Integer> inputBufferSize, Function<?, Integer> outputBufferSize) {
				if (external) {
					File file = null;
					try {
						file = File.createTempFile("RAF", ".queue", new File(tmpPath));
					}
					catch (IOException ioe) {
						ioe.printStackTrace(System.err);
					}
					
					return new RandomAccessFileQueue<Tuple>(
						file,
						new TupleConverter(
							true,
							Converters.getObjectConverter(
								ConvertableConverter.DEFAULT_INSTANCE
							)
						),
						new AbstractFunction<Object, ArrayTuple>() {
							public ArrayTuple invoke() {
								return new ArrayTuple(
									new KPEzCode(
										new FloatPoint(
											dim
										)
									)
								);
							}
						},
						inputBufferSize,
						outputBufferSize
					) {
						public void enqueueObject(Tuple tuple) {
							counter.incWrite();
							super.enqueueObject(tuple);
						}
						
						public Tuple dequeueObject() {
							counter.incRead();
							return super.dequeueObject();
						}
					};
				}
				else
					return new ListQueue<Tuple>();
			}
		};

		// function delivering a MergeSorter
		// with the intention to sort the input cursors
		// uses the above definded function 'newQueue'
		Function<MetaDataCursor<? extends Tuple, CompositeMetaData<Object, Object>>, MetaDataCursor<? extends Tuple, CompositeMetaData<Object, Object>>> newSorter = new AbstractFunction<MetaDataCursor<? extends Tuple, CompositeMetaData<Object, Object>>, MetaDataCursor<? extends Tuple, CompositeMetaData<Object, Object>>>() {
			public MetaDataCursor<? extends Tuple, CompositeMetaData<Object, Object>> invoke(MetaDataCursor<? extends Tuple, CompositeMetaData<Object, Object>> cursor) {
				return new MergeSorter(
					cursor,
					Tuples.getTupleComparator(1),
					objectSize,
					mem,
					(int)(mem*0.4),
					newQueue,
					false
				);
			}
		};

		// the join predicate based on an epsilon distance
		Predicate<Tuple> joinPredicate = new FeaturePredicate<Tuple, FloatPoint>(
			new DistanceWithinMaximum<FloatPoint>(epsilon),
			new AbstractFunction<Tuple, FloatPoint> () {
				public FloatPoint invoke(Tuple tuple) {
					return (FloatPoint)((KPEzCode)tuple.getObject(1)).getData();
				}
			}
		);

		// setting the start time
		start = System.currentTimeMillis();

		/* calling the constructor of the Orenstein join algorithm
		 *
		 * parameter explanation:
		 *
		 * input0: the first input cursor
		 * input1: the second input cursor
		 * joinPredicate: the join predicate to use
		 * newSorter: function for sorting input cursors
		 * createTuple: a factory method generating the desired tuples contained in the cursors
		 * initialCapacity: the initial capacity of the sweep areas
		 * p: fraction of elements to be used from the input
		 * seed: the seed to be used for the Sampler
		 * epsilon: epsilon-distance
		 * maxLevel: maximum level of the partitioning
		 * type: the join type
		 */
		Orenstein orenstein = new Orenstein(
			input0,
			input1,
			joinPredicate,
			newSorter,
			createTuple,
			initialCapacity,
			p,
			seed,
			epsilon,
			maxLevel,
			type
		);

		return orenstein;
	}


	/**
	 * The main method contains the method calls to integrate data
	 * into a Cloudscape table, performing a similarity join on them
	 * and writing the results back to an other Cloudscape table.
	 *
	 * @param args array of <tt>String</tt> arguments. It can be used to
	 * 		submit parameters when the main method is called.
	 */
	public static void main(String[] args) {

		// determining the mode Cloudscape should run in
		// and parse further arguments (input, clean, ...)
		determineMode(args);

		System.out.println("SimilarityJoinDemo starting in " + framework + " mode.");
		try {

			// retrieving a valid JDBC connection
			Connection conn = getConnection(databaseLocation, databaseName, false);

			// creating some statements based on the current connection
			Statement s = conn.createStatement();
			Statement t = conn.createStatement();

			// defining the input cursors
			ResultSetMetaDataCursor input1, input2;

			// defining the output cursor
			final MetaDataCursor<Tuple, CompositeMetaData<Object, Object>> results;

			// if the argument 'input' has been specified
			if (input) {

				// Creating tables
				createTables(s);

				// Inserting spatial data using buffer size: 1024*1024 byte
				insertPoints(new File(file1), 1024*1024, conn, "Spatial1");
				insertPoints(new File(file2), 1024*1024, conn, "Spatial2");
				System.out.println("Inserted spatial data.");
			}

			/* initializing input cursors
			 *
			 * CLOUDSCAPE ==> XXL
			 *
			 */
			input1 = initializeInput(s, createTuple, "Spatial1");
			input2 = initializeInput(t, createTuple, "Spatial2");

			System.out.println("Performing similarity join based on Orenstein algorithm.");

			/* performing join using Orenstein algorithm
			 *
			 * parameter explanation:
			 * input1: the first input cursor.
			 * input2: the second input cursor.
			 * mem: main memory available (in bytes).
			 * initialCapacity: the initial capacity of the sweep areas.
			 * p: fraction of elements to be used from the input.
			 * seed: the seed to be used for the Sampler.
			 * epsilon: epsilon-distance.
			 * maxLevel: maximum level of the partitioning.
			 * createTuple: a factory method generating the desired tuples contained in the cursors.
			 * type: the join type.
			 */
			results = performOrenstein(input1, input2, mem, initialCapacity,
				p, seed, epsilon, maxLevel, createTuple, Type.THETA_JOIN);

			/* setting virtual table
			 *
			 * XXL ==> CLOUDSCAPE
			 *
			 */
			VirtualTable.SET_RESULTSET = new AbstractFunction<Object, MetaDataCursorResultSet>() {
				public MetaDataCursorResultSet invoke() {
					return new MetaDataCursorResultSet(results) {

						public Object getObject(int columnIndex) throws SQLException {
							res++;
							return super.getObject(columnIndex);
						}
					};
				}
			};

			System.out.println("INSERT INTO JoinResults SELECT * FROM NEW xxl.core.relational.VirtualTable() AS VT");

			// executing query on a virtual table
			s.execute("INSERT INTO JoinResults SELECT * FROM NEW xxl.core.relational.VirtualTable() AS VT");

			System.out.println("\n==============================");
			System.out.println("No. of results:\t"+res+"\t");
			System.out.println("runtime (sec):\t"+(System.currentTimeMillis()-start)/1000.0+"\t");
			System.out.println("element-comparisons:\t"+xxl.core.spatial.cursors.Orenstein.comparisons.counter+"\t");
			if(external)
				System.out.println("IOs(object-count)\tRead:\t"+counter.getReadIO()+"\tWrite:\t"+counter.getWriteIO());

			// closing input-Cursors
			input1.close();
			input2.close();

			// dropping tables if argument 'clean' has been specified
			if (drop) {
				dropTables(s);
			}

			// closing remaining resources and shutting down Cloudscape
			s.close();
			t.close();
			closeConnection(conn);
			shutDownCloudscape();

			System.out.println("SimilarityJoinDemo finished.");

		}
		catch (Exception e) {
			e.printStackTrace(System.err);
		}
	}
}