/* XXL: The eXtensible and fleXible Library for data processing Copyright (C) 2000-2011 Prof. Dr. Bernhard Seeger Head of the Database Research Group Department of Mathematics and Computer Science University of Marburg Germany This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 3 of the License, or (at your option) any later version. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with this library; If not, see <http://www.gnu.org/licenses/>. http://code.google.com/p/xxl/ */ package xxl.core.relational; import java.io.File; import java.io.IOException; import java.sql.Connection; import java.sql.DriverManager; import java.sql.PreparedStatement; import java.sql.SQLException; import java.sql.Statement; import java.util.Iterator; import xxl.core.collections.queues.ListQueue; import xxl.core.collections.queues.Queue; import xxl.core.collections.queues.io.RandomAccessFileQueue; import xxl.core.cursors.MetaDataCursor; import xxl.core.functions.AbstractFunction; import xxl.core.functions.Function; import xxl.core.io.IOCounter; import xxl.core.io.converters.ConvertableConverter; import xxl.core.io.converters.Converters; import xxl.core.predicates.FeaturePredicate; import xxl.core.predicates.Predicate; import xxl.core.relational.cursors.MergeSorter; import xxl.core.relational.cursors.Orenstein; import xxl.core.relational.cursors.ResultSetMetaDataCursor; import xxl.core.relational.cursors.SortMergeJoin.Type; import xxl.core.relational.resultSets.MetaDataCursorResultSet; import xxl.core.relational.resultSets.VirtualTable; import xxl.core.relational.tuples.ArrayTuple; import xxl.core.relational.tuples.Tuple; import xxl.core.relational.tuples.TupleConverter; import xxl.core.relational.tuples.Tuples; import xxl.core.spatial.KPEzCode; import xxl.core.spatial.cursors.PointInputCursor; import xxl.core.spatial.points.FloatPoint; import xxl.core.spatial.predicates.DistanceWithinMaximum; import xxl.core.util.BitSet; import xxl.core.util.WrappingRuntimeException; import xxl.core.util.metaData.CompositeMetaData; /** * This class demonstrates the usage of the library XXL with a special * focus on the integration of similarity join algorithms into * the database management system <b>Cloudscape</b>.<br> * <tt>Cloudscape</tt> offers the possibility of accessing so called * virtual tables by providing a special interface, named VTI * (Virtual Table Interface). <br> * XXL provides with its package {@link xxl.core.relational} wrapper classes * which are able to wrap a {@link java.sql.ResultSet} to a * {@link xxl.core.cursors.MetaDataCursor} and vice versa. The class * {@link xxl.core.relational.resultSets.VirtualTable} wraps a ResultSet and * therefore it can be used in SQL queries like a usual table. * <p> * The main method represents a minimal JDBC application showing * JDBC access to Cloudscape. * It provides methods to get a connection to Cloudscape, * to execute querys, to close the open connection and * to shutdown Cloudscape. <br> * Cloudscape applications can be run in three different modes: * "Cloudscape applications can run against Cloudscape running in an embedded * or a client/server framework. When Cloudscape runs in an embedded framework, * the Cloudscape application and Cloudscape run in the same JVM. The application * starts up the Cloudscape engine. When Cloudscape runs in a client/server framework, * the application runs in a different JVM from Cloudscape. The application only needs * to start the client driver, and the connectivity framework provides network connections. * (The server must already be running.)" * * <p>When you run this application, give one of the following arguments: * <ul> * <li> embedded (default, if none specified)</li> * <li> rmijdbcclient (if Cloudscape is running embedded in the RmiJdbc Server framework)</li> * <li> sysconnectclient (if Cloudscape is running embedded in the Cloudconnector framework)</li> * </ul> * <p> * A complete similartiy join is performed, i.e. point data can be inserted * into database tables, it is retrieved using ResultSetMetaDataCursors, the * contained elements are mapped (Tuple --> FloatPoint) and sorted, after that a similarity join * based on Jack Orenstein's algorithm is executed and the elements * are mapped back (FloatPoint --> Tuple). <br> * The resulting elements are inserted into the table 'JoinResults'. With can * be viewed with e.g. with <tt>Cloudview</tt> if the argument <tt>clean</tt> * has not been specified. An other feature of this use case is given by * the possibility of using only a fraction of all elements stored * in the database table. Therefore a {@link xxl.core.relational.cursors.Sampler} wraps * the ResultSetMetaDataCursors delivering the input tuples. <br> * The whole join process can be steered by the arguments given to * the main method when calling this application. <br> * Note, all arguments have been set to default values for an easier usage. * * <p> * The following arguments can be passed to the main method: <br><br> * * <tt>rmijdbcclient</tt> - particular Cloudscape mode <br> * <tt>sysconnectclient</tt> - particular Cloudscape mode <br> * <tt>input</tt> - insert data into Cloudscape tables <br> * <tt>createDatabase</tt> - a new database will be created <br> * <tt>clean</tt> - used database will be cleaned (removal of all tables) <br> * <tt>external</tt> - external file queue will be placed on the location given by the next argument<br> * <tt>DBName</tt> - name of the database will be set to the next given argument <br> * <tt>DBLocation</tt> - path to the database will be set to the next given argument <br> * <tt>file1</tt> - location of first input file will be set to the next given argument <br> * <tt>file2</tt> - location of second input file will be set to the next given argument <br> * <tt>dim</tt> - dimension of the float points will be set to the next given argument <br> * <tt>mem</tt> - memory size in bytes will be set to the next given argument <br> * <tt>initialCapacity</tt> - initial capacity of the sweep line areas will be set to the next given argument <br> * <tt>p</tt> - fraction of elements to be used from the input will be set to the next given argument <br> * <tt>seed</tt> - seed used for generating a sample of the input will be set to the next given argument <br> * <tt>epsilon</tt> - epsilon distance will be set to the next given argument <br> * <tt>maxLevel</tt> - maximum level to be considered will be set to the next given argument <br> * <p> * Example usage: <br> * <pre> * java xxl.applications.relational.SimilarityJoinDemo input file1 C:\\st.ll.bin file2 C:\\rr.ll.bin createDatabase clean external * </pre> * The resulting output shows the number of join results and the runtime in * seconds, the number of element comparisons and, * if the sorting algorithm has been set to 'external', * the number of performed read/write operations. * * * @see java.sql.DriverManager * @see java.sql.Connection * @see java.sql.Statement * @see java.sql.PreparedStatement * @see java.sql.ResultSet * @see java.sql.ResultSetMetaData * @see java.sql.SQLException * @see java.io.File * @see xxl.core.comparators.ComparableComparator * @see xxl.core.collections.queues.ListQueue * @see xxl.core.cursors.wrappers.IteratorCursor * @see xxl.core.cursors.MetaDataCursor * @see xxl.core.functions.Function * @see xxl.core.io.IOCounter * @see xxl.core.collections.queues.io.RandomAccessFileQueue * @see xxl.core.predicates.MetaDataPredicate * @see xxl.core.predicates.Predicate * @see xxl.core.relational.tuples.ArrayTuple * @see xxl.core.relational.JoinUtils * @see xxl.core.relational.cursors.Mapper * @see xxl.core.relational.cursors.MergeSorter * @see xxl.core.relational.resultSets.MetaDataCursorResultSet * @see xxl.core.relational.cursors.Orenstein * @see xxl.core.relational.cursors.ResultSetMetaDataCursor * @see xxl.core.cursors.filters.Sampler * @see xxl.core.relational.resultSets.VirtualTable * @see xxl.core.spatial.predicates.DistanceWithinMaximum * @see xxl.core.predicates.FeaturePredicate * @see xxl.core.spatial.points.FloatPoint * @see xxl.core.spatial.KPEzCode * @see xxl.core.util.BitSet * @see xxl.core.util.WrappingRuntimeException * */ public class SimilarityJoinDemo { /** * Cloudscape application is run in this mode. * default value: "embedded" */ protected static String framework = "embedded"; /** * Cloudscape application uses this driver. * default value: "COM.cloudscape.core.JDBCDriver" */ protected static String driver = "COM.cloudscape.core.JDBCDriver"; /** * Cloudscape application uses this protocol to communicate. * default value: "jdbc:cloudscape:" */ protected static String protocol = "jdbc:cloudscape:"; /** * A flag determining if new data should be inserted. * default value: false */ protected static boolean input = false; /** * A flag determining if the data needed for this application * should be removed. * default value: false */ protected static boolean drop = false; /** * The location of the data base. * default value: "D:\\Datenbanken\\" */ public static String databaseLocation = "D:\\Datenbanken\\"; /** * The name of the data base. * default value: "TigerData" */ public static String databaseName = "TigerData"; /** * A flag determining if a new database should be created. * default value: false */ public static boolean createDataBase = false; /** * Location of spatial data; points in unit-cube [0, 1)^2 * default value: "D:\\user\\kraemerj\\uni\\lokal\\paper\\st.ll.bin" */ public static String file1 = "D:\\user\\kraemerj\\uni\\lokal\\paper\\st.ll.bin"; /** * Location of spatial data; points in unit-cube [0, 1)^2 * default value: "D:\\user\\kraemerj\\uni\\lokal\\paper\\rr.ll.bin" */ public static String file2 = "D:\\user\\kraemerj\\uni\\lokal\\paper\\rr.ll.bin"; /** * Dimension of data. * default value: 2 */ public static int dim = 2; /** * Main memory available (in bytes). * default value: 1000000 */ public static int mem = 1000000; /** * The initial capacity of the sweep areas. * default value: 30000 */ public static int initialCapacity = 30000; /** * Fraction of elements to be used from the input. * default value: 0.01 */ public static double p = 0.01; /** * The seed to be used for the Sampler. * Note: same seed as in NestedLoopsJoin use-case in xxl.core.spatial!! * default value: 42 */ public static long seed = 42; /** * Epsilon-distance. * default value: 0.01 */ public static float epsilon = 0.01f; /** * Maximum level of the partitioning. * default value: 12 */ public static int maxLevel = 12; /** A factory method generating the desired tuples contained in the cursors. */ public static Function<Object, ? extends Tuple> createTuple = ArrayTuple.FACTORY_METHOD; /** The join type. */ public static Type type = Type.THETA_JOIN; /** * Use a temporal external file queue. * default value: false */ public static boolean external = false; /** * The path of the temporal external file queue. * default value: "" */ public static String tmpPath = ""; /** The start time of the algorithm. */ protected static long start; /** Counter for result tuples. */ protected static int res = 0; /** An IO-counter. */ protected static final IOCounter counter = new IOCounter(); /** * Method to determine the mode a * cloudscape application should be run in. <br> * Futhermore arguments for the external sorting algorithm, * the similarity join algorithm and the mapping functions * can be specified. * * @param args the arguments given by a call to main. */ private static void determineMode(String[] args) { int length = args.length; try { for (int index = 0; index < length; index++) { if (args[index].equalsIgnoreCase("rmijdbcclient")) { framework = "rmijdbc"; driver = "COM.cloudscape.core.RmiJdbcDriver"; protocol = "jdbc:cloudscape:rmi:"; } if (args[index].equalsIgnoreCase("sysconnectclient")) { framework = "sysconnect"; driver = "COM.cloudscape.core.WebLogicDriver"; protocol = "jdbc:cloudscape:weblogic:"; } if (args[index].equalsIgnoreCase("input")) input = true; if (args[index].equalsIgnoreCase("createDatabase")) createDataBase = true; if (args[index].equalsIgnoreCase("clean")) drop = true; if (args[index].equalsIgnoreCase("external")) { external = true; tmpPath = args[index+1]; } if (args[index].equalsIgnoreCase("DBName")) { databaseName = args[index+1]; } if (args[index].equalsIgnoreCase("DBLocation")) { databaseLocation = args[index+1]; } if (args[index].equalsIgnoreCase("file1")) { file1 = args[index+1]; } if (args[index].equalsIgnoreCase("file2")) { file2 = args[index+1]; } if (args[index].equalsIgnoreCase("dim")) { dim = Integer.parseInt(args[index+1]); } if (args[index].equalsIgnoreCase("mem")) { mem = Integer.parseInt(args[index+1]); } if (args[index].equalsIgnoreCase("initialCapacity")) { initialCapacity = Integer.parseInt(args[index+1]); } if (args[index].equalsIgnoreCase("p")) { p = Double.parseDouble(args[index+1]); } if (args[index].equalsIgnoreCase("seed")) { seed = Long.parseLong(args[index+1]); } if (args[index].equalsIgnoreCase("epsilon")) { epsilon = Float.parseFloat(args[index+1]); } if (args[index].equalsIgnoreCase("maxLevel")) { maxLevel = Integer.parseInt(args[index+1]); } } } catch (ArrayIndexOutOfBoundsException ae) { System.err.println("Wrong argument usage. Please specify all needed parameters."); ae.printStackTrace(System.err); } } /** * Returns a JDBC-connection to a Cloudscape database. * If the static class attribute <tt>createDatabase</tt> is <tt>true</tt> * a new database with <tt>dataBaseName</tt> * is created. Otherwise an existing database is used. * * @param databaseLocation the location of the database (path). * @param dataBaseName the name of the database. * @param autoCommit Sets the autoCommit value of the returned connection * to this value. * @return a JDBC-connection to the specified database. */ protected static Connection getConnection(String databaseLocation, String dataBaseName, boolean autoCommit) { try { Class.forName(driver).newInstance(); System.out.println("Loaded the appropriate driver."); Connection conn = createDataBase ? DriverManager.getConnection(protocol + databaseLocation + dataBaseName +";create=true") : DriverManager.getConnection(protocol + databaseLocation + dataBaseName+";create=false"); System.out.println("Connected to database."); conn.setAutoCommit(autoCommit); System.out.println("Autocommit activated: " +autoCommit); return conn; } catch (Exception e) { throw new WrappingRuntimeException(e); } } /** * Closes the given connection after committing the transaction * of this connection. * * @param conn the connection to be closed. * @see #getConnection(String, String, boolean) */ protected static void closeConnection(Connection conn) { try { conn.commit(); conn.close(); System.out.println("Committed transaction and closed connection."); } catch (Exception e) { throw new WrappingRuntimeException(e); } } /** * "In embedded mode, an application should shut down Cloudscape. * If the application fails to shut down Cloudscape explicitly, * the Cloudscape does not perform a checkpoint when the JVM shuts down, * which means that the next connection will be slower. * Explicitly shutting down Cloudscape with the URL is preferred." * <p> * This style of shutdown will always throw an "exception". */ protected static void shutDownCloudscape() { boolean gotSQLExc = false; if (framework.equals("embedded")) { try { DriverManager.getConnection("jdbc:cloudscape:;shutdown=true"); } catch (SQLException se) { gotSQLExc = true; } if (!gotSQLExc) System.out.println("Database did not shut down normally."); else System.out.println("Database shut down normally."); } } /** * Creating all tables storing needed for the similarity join, i.e. * two tables storing two dimensional points and one table that * will contain the join results at the end of this application. * * @param s Statement used for the execution of the SQL statements. * @throws java.sql.SQLException if the query cannot be fulfilled correctly. */ public static void createTables(Statement s) throws SQLException { // Creating tables for spatial data String type = " DOUBLE PRECISION"; String attributes1 = new String(); for (int i = 0; i < dim-1; i++) attributes1 += "x"+dim+type+","; attributes1 += "x"+(dim-1)+type; s.execute("CREATE TABLE Spatial1 ("+attributes1+")"); System.out.println("Created table Spatial1."); String attributes2 = new String(); for (int i = 0; i < dim-1; i++) attributes2 += "y"+dim+type+","; attributes2 += "y"+(dim-1)+type; s.execute("CREATE TABLE Spatial2 ("+attributes2+")"); System.out.println("Created table Spatial2."); s.execute("CREATE TABLE JoinResults ("+attributes1+", "+attributes2+")"); System.out.println("Created table JoinResults."); } /** * Dropping all tables needed for the test. * * @param s Statement used for the execution of the SQL statements. * @throws java.sql.SQLException if the query cannot be fulfilled correctly. */ public static void dropTables(Statement s) throws SQLException { s.execute("DROP TABLE Spatial1"); System.out.println("Dropped table Spatial1."); s.execute("DROP TABLE Spatial2"); System.out.println("Dropped table Spatial2."); s.execute("DROP TABLE JoinResults"); System.out.println("Dropped table JoinResults."); } /** * Inserts the two dimensional FloatPoints contained in the given file * into the defined table using the specified connection. * * @param file The file containing the FloatPoints. * @param bufferSize The buffer size used for the FloatPointInputIterator. * @param conn The connection the insertion should be fulfilled with. * @param tableName The name of the table the FloatPoints should be inserted in. * @throws java.sql.SQLException if the query cannot be fulfilled correctly. */ public static void insertPoints(File file, int bufferSize, Connection conn, String tableName) throws SQLException { System.out.println("Inserting data into table: "+tableName); String prefix = tableName.equalsIgnoreCase("Spatial1") ? "x" : "y"; Iterator<?> it = new PointInputCursor(file, PointInputCursor.FLOAT_POINT, dim, bufferSize); String sql = "INSERT INTO " +tableName+"("; String attributes = new String(); for (int i = 0; i < dim-1; i++) attributes += prefix+dim+","; attributes += prefix+(dim-1); sql += attributes+") values ("; for (int i = 0; i < dim-1; i++) sql += "?, "; sql += "?)"; PreparedStatement insert = conn.prepareStatement(sql); while(it.hasNext()) { FloatPoint point = (FloatPoint)it.next(); for (int i = 0; i < dim; i++) insert.setDouble(i+1, point.getValue(i)); insert.executeUpdate(); } insert.close(); } /** * Creating a {@link xxl.core.relational.cursors.ResultSetMetaDataCursor} * based on the specified table. * * @param s Statement used for the execution of the SQL statements. * @param createTuple factory method used to create the tuples in the ResultSetMetaDataCursor. * @param tableName The name of the table the FloatPoints are contained in. * @return the ResultSetMetaDataCursor. * @throws java.sql.SQLException if the query cannot be fulfilled correctly. */ public static ResultSetMetaDataCursor initializeInput(Statement s, Function<Object, ? extends Tuple> createTuple, String tableName) throws SQLException { return new ResultSetMetaDataCursor(s.executeQuery("SELECT * FROM "+tableName), createTuple); } /** * The spatial join algorithm based on space-filling curves proposed by Jack Orenstein. * See: [Ore 91] Jack A. Orenstein: An Algorithm for Computing the Overlay of k-Dimensional Spaces. SSD 1991: * 381-400 for a detailed explanation. See: [DS 01]: Jens-Peter Dittrich, Bernhard Seeger: GESS: a Scalable Similarity-Join Algorithm for Mining Large Data Sets in High Dimensional * Spaces. ACM SIGKDD-2001. for a review on Orensteins algorithm. * <p> * Orensteins algorithm is based on a binary recursive partitioning, where the binary code * represents the so-called Z-ordering (z-codes). * <p> * Information concerning the implementation of {@link xxl.core.spatial.cursors.Orenstein}: <br> * Orensteins algorithm (ORE) assigns each hypercube of the input relations to disjoint * subspaces of the recursive partitioning whose union entirely * covers the hypercube. ORE sorts the two sets of * hypercubes derived from the input relations (including the * possible replicates) w.r.t. the lexicographical ordering of its * binary code. After that, the relations are merged using two * main-memory stacks Stack_R and Stack_S. It is guaranteed that for two adjacent * hypercubes in the stack, the prefix property is satisfied for * their associated codes. Only those hypercubes are joined * that have the same prefix code. * <p> * A deficiency of ORE is that the different assignment strategies * examined in [Ore91] cause substantial replication rates. This * results in an increase of the problem space and hence, sorting * will be very expensive. Furthermore, ORE has not addressed the * problem of eliminating duplicates in the result set. * <p> * Note that the method <code>reorganize(final Object * currentStatus)</code> could actually be implemented with only 1 LOC. For efficiency * reasons we use a somewhat longer version of the method here. * <p> * * @param input0 the first input cursor. * @param input1 the second input cursor. * @param mem main memory available (in bytes). * @param initialCapacity the initial capacity of the sweep areas. * @param p fraction of elements to be used from the input. * @param seed the seed to be used for the Sampler. * @param epsilon epsilon-distance. * @param maxLevel maximum level of the partitioning. * @param createTuple a factory method generating the desired tuples contained in the cursors. * @param type the join type. * @throws IllegalAccessException failed to determine the object size. * * @return the created MetaDataCursor. * @see xxl.core.spatial.cursors.Orenstein * @see xxl.core.relational.cursors.Orenstein */ public static MetaDataCursor<Tuple, CompositeMetaData<Object, Object>> performOrenstein( final ResultSetMetaDataCursor input0, final ResultSetMetaDataCursor input1, final int mem, final int initialCapacity, final double p, final long seed, final float epsilon, final int maxLevel, Function<Object, ? extends Tuple> createTuple, final Type type ) throws IllegalAccessException { if(external) System.out.print("EXTERNAL_ALG\t"+tmpPath); // determining the object size final int objectSize = xxl.core.util.XXLSystem.getObjectSize(new KPEzCode(new FloatPoint(dim), new BitSet(32))); // function delivering a ListQueue in main memory // or a RandomAccessFileQueue on external memory // depending on the static class attribute 'external' final Function<Function<?, Integer>, Queue<Tuple>> newQueue = new AbstractFunction<Function<?, Integer>, Queue<Tuple>>() { public Queue<Tuple> invoke(Function<?, Integer> inputBufferSize, Function<?, Integer> outputBufferSize) { if (external) { File file = null; try { file = File.createTempFile("RAF", ".queue", new File(tmpPath)); } catch (IOException ioe) { ioe.printStackTrace(System.err); } return new RandomAccessFileQueue<Tuple>( file, new TupleConverter( true, Converters.getObjectConverter( ConvertableConverter.DEFAULT_INSTANCE ) ), new AbstractFunction<Object, ArrayTuple>() { public ArrayTuple invoke() { return new ArrayTuple( new KPEzCode( new FloatPoint( dim ) ) ); } }, inputBufferSize, outputBufferSize ) { public void enqueueObject(Tuple tuple) { counter.incWrite(); super.enqueueObject(tuple); } public Tuple dequeueObject() { counter.incRead(); return super.dequeueObject(); } }; } else return new ListQueue<Tuple>(); } }; // function delivering a MergeSorter // with the intention to sort the input cursors // uses the above definded function 'newQueue' Function<MetaDataCursor<? extends Tuple, CompositeMetaData<Object, Object>>, MetaDataCursor<? extends Tuple, CompositeMetaData<Object, Object>>> newSorter = new AbstractFunction<MetaDataCursor<? extends Tuple, CompositeMetaData<Object, Object>>, MetaDataCursor<? extends Tuple, CompositeMetaData<Object, Object>>>() { public MetaDataCursor<? extends Tuple, CompositeMetaData<Object, Object>> invoke(MetaDataCursor<? extends Tuple, CompositeMetaData<Object, Object>> cursor) { return new MergeSorter( cursor, Tuples.getTupleComparator(1), objectSize, mem, (int)(mem*0.4), newQueue, false ); } }; // the join predicate based on an epsilon distance Predicate<Tuple> joinPredicate = new FeaturePredicate<Tuple, FloatPoint>( new DistanceWithinMaximum<FloatPoint>(epsilon), new AbstractFunction<Tuple, FloatPoint> () { public FloatPoint invoke(Tuple tuple) { return (FloatPoint)((KPEzCode)tuple.getObject(1)).getData(); } } ); // setting the start time start = System.currentTimeMillis(); /* calling the constructor of the Orenstein join algorithm * * parameter explanation: * * input0: the first input cursor * input1: the second input cursor * joinPredicate: the join predicate to use * newSorter: function for sorting input cursors * createTuple: a factory method generating the desired tuples contained in the cursors * initialCapacity: the initial capacity of the sweep areas * p: fraction of elements to be used from the input * seed: the seed to be used for the Sampler * epsilon: epsilon-distance * maxLevel: maximum level of the partitioning * type: the join type */ Orenstein orenstein = new Orenstein( input0, input1, joinPredicate, newSorter, createTuple, initialCapacity, p, seed, epsilon, maxLevel, type ); return orenstein; } /** * The main method contains the method calls to integrate data * into a Cloudscape table, performing a similarity join on them * and writing the results back to an other Cloudscape table. * * @param args array of <tt>String</tt> arguments. It can be used to * submit parameters when the main method is called. */ public static void main(String[] args) { // determining the mode Cloudscape should run in // and parse further arguments (input, clean, ...) determineMode(args); System.out.println("SimilarityJoinDemo starting in " + framework + " mode."); try { // retrieving a valid JDBC connection Connection conn = getConnection(databaseLocation, databaseName, false); // creating some statements based on the current connection Statement s = conn.createStatement(); Statement t = conn.createStatement(); // defining the input cursors ResultSetMetaDataCursor input1, input2; // defining the output cursor final MetaDataCursor<Tuple, CompositeMetaData<Object, Object>> results; // if the argument 'input' has been specified if (input) { // Creating tables createTables(s); // Inserting spatial data using buffer size: 1024*1024 byte insertPoints(new File(file1), 1024*1024, conn, "Spatial1"); insertPoints(new File(file2), 1024*1024, conn, "Spatial2"); System.out.println("Inserted spatial data."); } /* initializing input cursors * * CLOUDSCAPE ==> XXL * */ input1 = initializeInput(s, createTuple, "Spatial1"); input2 = initializeInput(t, createTuple, "Spatial2"); System.out.println("Performing similarity join based on Orenstein algorithm."); /* performing join using Orenstein algorithm * * parameter explanation: * input1: the first input cursor. * input2: the second input cursor. * mem: main memory available (in bytes). * initialCapacity: the initial capacity of the sweep areas. * p: fraction of elements to be used from the input. * seed: the seed to be used for the Sampler. * epsilon: epsilon-distance. * maxLevel: maximum level of the partitioning. * createTuple: a factory method generating the desired tuples contained in the cursors. * type: the join type. */ results = performOrenstein(input1, input2, mem, initialCapacity, p, seed, epsilon, maxLevel, createTuple, Type.THETA_JOIN); /* setting virtual table * * XXL ==> CLOUDSCAPE * */ VirtualTable.SET_RESULTSET = new AbstractFunction<Object, MetaDataCursorResultSet>() { public MetaDataCursorResultSet invoke() { return new MetaDataCursorResultSet(results) { public Object getObject(int columnIndex) throws SQLException { res++; return super.getObject(columnIndex); } }; } }; System.out.println("INSERT INTO JoinResults SELECT * FROM NEW xxl.core.relational.VirtualTable() AS VT"); // executing query on a virtual table s.execute("INSERT INTO JoinResults SELECT * FROM NEW xxl.core.relational.VirtualTable() AS VT"); System.out.println("\n=============================="); System.out.println("No. of results:\t"+res+"\t"); System.out.println("runtime (sec):\t"+(System.currentTimeMillis()-start)/1000.0+"\t"); System.out.println("element-comparisons:\t"+xxl.core.spatial.cursors.Orenstein.comparisons.counter+"\t"); if(external) System.out.println("IOs(object-count)\tRead:\t"+counter.getReadIO()+"\tWrite:\t"+counter.getWriteIO()); // closing input-Cursors input1.close(); input2.close(); // dropping tables if argument 'clean' has been specified if (drop) { dropTables(s); } // closing remaining resources and shutting down Cloudscape s.close(); t.close(); closeConnection(conn); shutDownCloudscape(); System.out.println("SimilarityJoinDemo finished."); } catch (Exception e) { e.printStackTrace(System.err); } } }