/* XXL: The eXtensible and fleXible Library for data processing Copyright (C) 2000-2011 Prof. Dr. Bernhard Seeger Head of the Database Research Group Department of Mathematics and Computer Science University of Marburg Germany This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 3 of the License, or (at your option) any later version. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with this library; If not, see <http://www.gnu.org/licenses/>. http://code.google.com/p/xxl/ */ package xxl.core.xxql; import java.sql.ResultSetMetaData; import java.sql.SQLException; import java.util.Comparator; import java.util.HashMap; import java.util.Iterator; import java.util.LinkedList; import java.util.List; import java.util.Map; import xxl.core.collections.bags.Bag; import xxl.core.cursors.AbstractCursor; import xxl.core.cursors.Cursor; import xxl.core.cursors.differences.NestedLoopsDifference; import xxl.core.cursors.filters.Filter; import xxl.core.cursors.joins.NestedLoopsJoin; import xxl.core.functions.AbstractFunction; import xxl.core.functions.Function; import xxl.core.relational.cursors.NestedLoopsDistinct; import xxl.core.relational.metaData.ColumnMetaData; import xxl.core.relational.metaData.ColumnMetaDatas; import xxl.core.relational.metaData.ResultSetMetaDatas; import xxl.core.relational.tuples.ArrayTuple; import xxl.core.relational.tuples.Tuple; import xxl.core.util.metaData.CompositeMetaData; import xxl.core.xxql.columns.Column; /** * This class implements all relational operators used by {@link AdvTupleCursor} as static * functions.<br> * You may extend (via inheritance) this class to provide your own implementation of the operators * and tell AdvTupleCursor to use it (via * {@link AdvTupleCursor#setOperatorImpl(OperatorImplementation)} or in it's constructors) */ public class OperatorImplementation { // HA! singleton! (see "gang of four" books if you don't know what this is and are seriously // interested - or wikipedia) private static OperatorImplementation stdOperatorImplSingleton = null; private OperatorImplementation(){} /** * Returns Object of {@link OperatorImplementation} - always the same Object, as it doesn't * make any sense to have multiple of them: it only contains static functions anyway.<br> * Yes, this is a Singleton. */ public static OperatorImplementation getOperatorImpl(){ if(stdOperatorImplSingleton == null) stdOperatorImplSingleton = new OperatorImplementation(); return stdOperatorImplSingleton; } public static AdvTupleCursor select(AdvTupleCursor src, String newTableAlias, final Column ...cols ){ // if no alias was given use the old one if(newTableAlias == null) newTableAlias = src.getResultSetMetaData().getAlias(); for(Column col : cols){ col.setMetaData(src.getResultSetMetaData(), newTableAlias); } Function<Object, Tuple> mapping = new AbstractFunction<Object,Tuple>(){ private static final long serialVersionUID = 1L; @Override public Tuple invoke(Object argument) { Tuple tuple = (Tuple)argument; Object[] elems = new Object[cols.length]; for(int i = 0; i < cols.length; i++){ elems[i] = cols[i].invoke(tuple); } return new ArrayTuple(elems); } }; ColumnMetaData[] cmds = new ColumnMetaData[cols.length]; for(int i=0;i<cmds.length;i++){ cmds[i] = cols[i].getColumnMetaData(); } AdvResultSetMetaData metadata = new AdvResultSetMetaData(newTableAlias, cmds); return new AdvTupleCursor(src, mapping, metadata, src); } /** * Adds additional column with a name "index", which can be accessed via ColumnUtils.col("index"). * This column holds actual position of the tuple in the provided iterator. * @param src * @param newTableAlias * @param cols * @return */ public static AdvTupleCursor selectIndex(AdvTupleCursor src, String newTableAlias, final Column ...cols ){ // if no alias was given use the old one if(newTableAlias == null) newTableAlias = src.getResultSetMetaData().getAlias(); for(Column col : cols){ col.setMetaData(src.getResultSetMetaData(), newTableAlias); } Function<Object, Tuple> mapping = new AbstractFunction<Object,Tuple>(){ private static final long serialVersionUID = 1L; private int counter = 0; @Override public Tuple invoke(Object argument) { Tuple tuple = (Tuple)argument; Object[] elems = new Object[cols.length+1]; for(int i = 0; i < cols.length; i++){ elems[i] = cols[i].invoke(tuple); } elems[elems.length-1] = new Integer(counter); counter++; return new ArrayTuple(elems); } }; ColumnMetaData[] cmds = new ColumnMetaData[cols.length+1]; for(int i=0;i<cols.length;i++){ cmds[i] = cols[i].getColumnMetaData(); } cmds[cmds.length-1] = AdvResultSetMetaData.createColumnMetaData(Integer.class, "index", newTableAlias); AdvResultSetMetaData metadata = new AdvResultSetMetaData(newTableAlias, cmds); return new AdvTupleCursor(src, mapping, metadata, src); } public static AdvTupleCursor where(AdvTupleCursor src, AdvPredicate predicate){ // supports reset // set metadata and correlated tuples in predicate so it can pass them on to "its" Columns predicate.setMetaData(src.getResultSetMetaData(), null); Filter<Tuple> filter = new Filter<Tuple>(src, predicate); AdvTupleCursor ret = AdvTupleCursor.factorFromCursorWithTuples(filter, src.getResultSetMetaData(), src); // add our predicate to the "Correlated Tuples Receivers" of the cursor ret.addCorrTuplesRec(predicate); return ret; } public static AdvTupleCursor join(AdvTupleCursor src1, AdvTupleCursor src2, String newTableAlias, AdvPredicate predicate){ return join_impl(src1, src2, newTableAlias, predicate, NestedLoopsJoin.Type.THETA_JOIN); } public static AdvTupleCursor join(AdvTupleCursor src1, AdvTupleCursor src2, String newTableAlias, AdvPredicate predicate, AdvTupleCursor.JOIN_TYPE type){ NestedLoopsJoin.Type jtype = NestedLoopsJoin.Type.THETA_JOIN; switch(type){ // we duplicated the types in AdvTupleCursor so NestedLoopsJoin.Type // needs not be imported by user case LEFT_OUTER_JOIN : jtype = NestedLoopsJoin.Type.LEFT_OUTER_JOIN; break; case RIGHT_OUTER_JOIN : jtype = NestedLoopsJoin.Type.RIGHT_OUTER_JOIN; break; case OUTER_JOIN : jtype = NestedLoopsJoin.Type.OUTER_JOIN; break; case THETA_JOIN : jtype = NestedLoopsJoin.Type.THETA_JOIN; break; } return join_impl(src1, src2, newTableAlias, predicate, jtype); } protected static AdvTupleCursor join_impl(AdvTupleCursor src1, AdvTupleCursor src2, String newTableAlias, AdvPredicate predicate, NestedLoopsJoin.Type type) { AdvResultSetMetaData metadata = AdvResultSetMetaData.concat(src1.getResultSetMetaData(), src2.getResultSetMetaData(), newTableAlias); final int size1; final int size2; try { size1 = src1.getResultSetMetaData().getColumnCount(); size2 = src2.getResultSetMetaData().getColumnCount(); } catch (SQLException e) { throw new RuntimeException(e); } Function<Tuple, Tuple> concat = new AbstractFunction<Tuple, Tuple>(){ private static final long serialVersionUID = 1L; @Override public Tuple invoke(Tuple t1, Tuple t2) { return AdvTupleCursor.concatTuples(t1, t2, size1, size2); } }; // set left and right metadata in predicate, so the containend columns can decide whether // to use the left or the right tuple in invoke() predicate.setMetaDatas(src1.getResultSetMetaData(), src2.getResultSetMetaData()); NestedLoopsJoin<Tuple, Tuple> join = new NestedLoopsJoin<Tuple, Tuple>(src1, src2, null, predicate, concat, type); //TODO sollte hier nicht ret.addCorrTuplesRec(predicate); rein ?!? return AdvTupleCursor.factorFromCursorWithTuples(join, metadata, src1, src2); } @SuppressWarnings("unchecked") public static AdvTupleCursor union(AdvTupleCursor src1, AdvTupleCursor src2, String newTableAlias) { // supports reset AdvResultSetMetaData metadata; if(newTableAlias != null && !newTableAlias.equals("")) metadata = src1.getResultSetMetaData().clone(newTableAlias); else metadata = src1.getResultSetMetaData(); // check whether metadatas are compatible if(!checkMetaDatasEquivalence(src1.getResultSetMetaData(), src2.getResultSetMetaData())){ throw new RuntimeException("Can't calculate union of "+src1.getResultSetMetaData().getAlias() +" and "+src2.getResultSetMetaData().getAlias()+" because their schemas are not compatible"); } // don't use relational.Union because its metadata-checks won't work with our metadata // (because we might have Objects that don't have a SQL equivalent in our tuples) ResettableSequentializer<Tuple> union = new ResettableSequentializer<Tuple>(src1, src2); return AdvTupleCursor.factorFromCursorWithTuples(union, metadata, src1, src2); } public static AdvTupleCursor distinct(AdvTupleCursor src) { // supports reset() AdvResultSetMetaData metadata = src.getResultSetMetaData(); int memSize = 32; int objectSize = 4; NestedLoopsDistinct distinct = new NestedLoopsDistinct(src, memSize, objectSize); return AdvTupleCursor.factorFromCursorWithTuples(distinct, metadata, src); } /** * Checks whether two (Adv)ResultSetMetaDatas are equivalent, e.g. you may perfrom UNION, * DIFFERENCE or INTERSECT on them.<br><br> * We don't use {@link ResultSetMetaDatas#RESULTSET_METADATA_COMPARATOR}. because it checks the * java.sql.types and precision and we don't support them because it's impossible for non-basic * types. We check by comparing the columnCount and the columnClassName (i.e. the full java * class name). * * @param rsmd1 first ResultSetMetaData * @param rsmd2 first ResultSetMetaData * @return true if rsmd1 and rsmd2 are equivalent */ public static boolean checkMetaDatasEquivalence(ResultSetMetaData rsmd1, ResultSetMetaData rsmd2){ try { // kind of stolen from ResultSetMetaDatas, but changed to compare the actual java types // and we just care if they're equal. if(rsmd1.getColumnCount() != rsmd2.getColumnCount()) return false; int compare; for (int column = 1; column <= rsmd1.getColumnCount(); column++) { // compare the column names compare = rsmd1.getColumnName(column).compareToIgnoreCase(rsmd2.getColumnName(column)); if (compare != 0) return false; // compare the (java!)-type if(!rsmd1.getColumnClassName(column).equals(rsmd2.getColumnClassName(column))) return false; } return true; } catch (SQLException sqle) { throw new RuntimeException("relational metadata information cannot be compared because of the following SQL exception : " +sqle.getMessage()); } } public static AdvTupleCursor difference(AdvTupleCursor src1, AdvTupleCursor src2) { return difference(src1, src2, true); } public static AdvTupleCursor difference(AdvTupleCursor src1, AdvTupleCursor src2, boolean all) { // supports reset AdvResultSetMetaData metadata = src1.getResultSetMetaData(); // check whether metadatas are compatible if(!checkMetaDatasEquivalence(src1.getResultSetMetaData(), src2.getResultSetMetaData())){ throw new RuntimeException("Can't calculate difference between "+src1.getResultSetMetaData().getAlias() +" and "+src2.getResultSetMetaData().getAlias()+" because their schemas are not compatible"); } int memSize = 32; int objectSize = 4; NestedLoopsDifference<Tuple> difference = new NestedLoopsDifference<Tuple>(src1, src2, memSize, objectSize, all); return AdvTupleCursor.factorFromCursorWithTuples(difference, metadata, src1, src2); } public static AdvTupleCursor groupBy(AdvTupleCursor src, String newTableAlias, final Column[] proj, AggregateColumn[] metaDataAggregationFunctions){ // (probably) supports reset // if no alias was given use the old one if(newTableAlias == null) newTableAlias = src.getResultSetMetaData().getAlias(); //MetaDaten weiterreichen for(Column col : proj){ col.setMetaData(src.getResultSetMetaData(), newTableAlias); } for(AggregateColumn col : metaDataAggregationFunctions){ col.setMetaData(src.getResultSetMetaData(), newTableAlias); } //Teil 1: Berechnen der Partitionen //berechnet den key um die tuples zu partitionieren Function<Tuple, Tuple> mapping = new Function<Tuple, Tuple>() { @Override public Tuple invoke(List<? extends Tuple> arguments) { throw new RuntimeException(); } @Override public Tuple invoke() { throw new RuntimeException(); } @Override public Tuple invoke(Tuple argument) { List<Object> objects = new LinkedList<Object>(); for(Column col: proj){ objects.add(col.invoke(argument)); } return ArrayTuple.FACTORY_METHOD.invoke(objects); } @Override public Tuple invoke(Tuple argument0, Tuple argument1) { throw new RuntimeException(); } }; //Zeug fuer den NestedLoopsGrouper Map<Object, Bag<Tuple>> map = new HashMap<Object, Bag<Tuple>>(); int memSize = 32; int objectSize = 4; int keySize = 8; //Partitionierung // FIXME: das ding ist kaputt: schmeisst nullpointer-exception bei reset() weil bagIterator == null! xxl.core.relational.cursors.NestedLoopsGrouper grouper = new xxl.core.relational.cursors.NestedLoopsGrouper( src, mapping, map, memSize, objectSize, keySize); //Teil 2 Auswertung der Partitionen Function<Object, ? extends Tuple> createOutputTuple = ArrayTuple.FACTORY_METHOD; GroupAggregator aggregator = new GroupAggregator( grouper, metaDataAggregationFunctions, proj, createOutputTuple); //Metadaten des neuen AdvTupleCursor ColumnMetaData[] cmds = new ColumnMetaData[proj.length + metaDataAggregationFunctions.length]; for(int i=0;i<proj.length;i++){ cmds[i] = proj[i].getColumnMetaData(); try { System.out.println(cmds[i].getColumnName()); } catch (SQLException e) { // TODO Auto-generated catch block e.printStackTrace(); } } CompositeMetaData<Object, Object> mtd; for(int i=proj.length;i<cmds.length;i++){ mtd = (CompositeMetaData<Object, Object>) metaDataAggregationFunctions[i-proj.length].getMetaData(); cmds[i] = (ColumnMetaData) mtd.get(ColumnMetaDatas.COLUMN_METADATA_TYPE); } AdvResultSetMetaData metadata = new AdvResultSetMetaData(newTableAlias, cmds); return AdvTupleCursor.factorFromCursorWithTuples(aggregator, metadata, src); } public static AdvTupleCursor intersect(AdvTupleCursor src1, AdvTupleCursor src2) { // supports reset AdvResultSetMetaData metadata = src1.getResultSetMetaData(); // check whether metadatas are compatible if(!checkMetaDatasEquivalence(src1.getResultSetMetaData(), src2.getResultSetMetaData())){ throw new RuntimeException("Can't calculate intersection of "+src1.getResultSetMetaData().getAlias() +" and "+src2.getResultSetMetaData().getAlias()+" because their schemas are not compatible"); } // we don't use the relational NestedLoopsIntersection because we don't like their MetaData // comparison, because it checks the java.sql.types and we don't support them because it's // impossible for non-basic types. we need our own type checking on the metadatas checking // the java type (see above) to be sure! // XXLs NestedLoopsIntersection is buggy! // NestedLoopsIntersection<Tuple> intersect = new NestedLoopsIntersection<Tuple>(src1, src2); Intersection<Tuple> intersect = new Intersection<Tuple>(src1, src2); return AdvTupleCursor.factorFromCursorWithTuples(intersect, metadata, src1, src2); } @SuppressWarnings("unchecked") public static AdvTupleCursor orderBy(AdvTupleCursor src, final boolean asc, final Column ... cols) { if(cols == null || cols.length<1){ throw new RuntimeException("You need to specify at least one Column to sort!"); } for(Column col : cols){ col.setMetaData(src.getResultSetMetaData(), null); } final Comparator comps[] = new Comparator[cols.length]; // comparators for each column // initialize comparators for(int i=0;i<cols.length;i++){ try { comps[i] = Column.createComp(cols[i], cols[i]); } catch (Exception e) { throw new RuntimeException("Can't sort by Column "+cols[i].columnAlias+" because: " +e.getMessage(),e); } } // the comparator for the whole tuple (iterates through the columns) final Comparator<Tuple> comp = new Comparator<Tuple>() { @Override public int compare(Tuple t1, Tuple t2) { for(int i=0;i<cols.length;i++){ int tmp = comps[i].compare(cols[i].invoke(t1), cols[i].invoke(t2)); if(tmp != 0) // if the colums aren't equal return (asc == true) ? tmp : -1*tmp; } // if we've come this far, the tuples are equal. return 0; } }; AdvTupleCursor ret = new AdvTupleCursor(src, id, src.getResultSetMetaData(), src){ { doNotCache = false; } @Override public void setCachingStrategy(CachingStrategy strat, boolean recursive) { super.setCachingStrategy(strat, recursive); this.doNotCache=false; // make sure this cursor does cache! } @Override protected boolean hasNextObject() { // when hasNext() or next() is called the first time, we cache everything into the // internal list and sort. if(firstRun){ // this will cache all elements in the list, sort them an reset the cursor internal_sort(comp); // TODO: als optimierung koennte man jetzt einen "ist sortiert" flag setzen und // bei joins, gruppierungen etc die sorted varianten nehmen } return super.hasNextObject(); } }; return ret; } public Iterable<Object> getIterableForColumn(final AdvTupleCursor cur, final Column col){ col.setMetaData(cur.getResultSetMetaData(), null); return new Iterable<Object>(){ Cursor<Object> internCursor = new AbstractCursor<Object>() { @Override protected boolean hasNextObject() { return cur.hasNext(); } @Override protected Object nextObject() { return col.invoke(cur.next()); } @Override public void reset() throws UnsupportedOperationException { cur.reset(); super.reset(); } @Override public boolean supportsReset() { return cur.supportsReset(); } }; @Override public Iterator<Object> iterator() { internCursor.reset(); return internCursor; } }; } public static AdvTupleCursor top(AdvTupleCursor src, final int limit) { if(limit<0){ throw new RuntimeException("top() must not be invoked with a negative count"); } AdvTupleCursor ret = new AdvTupleCursor(src, id, src.getResultSetMetaData(), src){ int count = 0; @Override protected boolean hasNextObject() { if(count == limit) return false; count++; return super.hasNextObject(); } @Override public void reset() throws UnsupportedOperationException { count=0; super.reset(); } }; return ret; } // brauchen wir mind. 2x, also lieber in die klasse packen static Function<Object, Tuple> id = new AbstractFunction<Object, Tuple>() { private static final long serialVersionUID = 1L; @Override public Tuple invoke(Object argument) { return (Tuple)argument; } }; }