/* * Copyright (C) INRIA, 2012-2013 * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ package fr.inrialpes.tyrexmo.queryanalysis; import org.slf4j.Logger; import org.slf4j.LoggerFactory; //import org.apache.commons.cli.CommandLine; //import org.apache.commons.cli.CommandLineParser; //import org.apache.commons.cli.PosixParser; //import org.apache.commons.cli.Options; //import org.apache.commons.cli.OptionBuilder; //import org.apache.commons.cli.ParseException; //import org.apache.commons.cli.HelpFormatter; import java.io.FileInputStream; import java.io.FileOutputStream; import java.io.BufferedReader; import java.io.FileReader; import java.io.PrintStream; import java.io.IOException; import java.io.File; import java.net.URI; import com.hp.hpl.jena.sparql.core.Var; import com.hp.hpl.jena.query.Query; import com.hp.hpl.jena.query.QueryFactory; public class Analysis { final static Logger logger = LoggerFactory.getLogger( Analysis.class ); //protected Options options = null; protected String queryDir = null; protected String outputType = "html"; protected String outputFile = null; protected PrintStream stream = null; static final int NONE = 0; // means not even a and... (triple pattern) static final int AND = 1; // means only and (basic graph pattern) static final int UNION = 2; // means UNION and AND (UCQ) static final int OPT = 3; static final int FILTER = 4; static final int UNION_OPT = 5; static final int OPT_FILTER = 6; static final int FILTER_UNION = 7; static final int UNION_OPT_FILTER = 8; static final int CYCLE = 0; static final int DAG = 1; static final int TREE = 2; static final int PROJ = 0; static final int NOPROJ = 1; int totalNumber = 0; int correctNumber = 0; int failure = 0; int ndVarCycles = 0; int[][][] resultArray = null; // public Analysis() { // resultArray = new int[NOPROJ+1][TREE+1][UNION_OPT_FILTER+1]; // for( int i = 0; i <= NOPROJ; i++ ) { // for( int j = 0; j <= TREE; j++ ) { // for( int k = 0; k <= UNION_OPT_FILTER; k++ ) { // resultArray[i][j][k] = 0; // } // } // } // options = new Options(); // options.addOption( "h", "help", false, "Print this page" ); // options.addOption( OptionBuilder.withLongOpt( "output" ).hasArg().withDescription( "Result FILE" ).withArgName("FILE").create( 'o' ) ); // options.addOption( OptionBuilder.withLongOpt( "format" ).hasArg().withDescription( "Output format [NIY]" ).withArgName("TYPE (asc|plot)").create( 'f' ) ); // } // // public static void main( String[] args ) throws Exception { // new Analysis().run( args ); // } // // public void run ( String [] args ) throws Exception, IOException { // // Read parameters // String[] argList = null; // try { // CommandLineParser parser = new PosixParser(); // CommandLine line = parser.parse( options, args ); // if ( line.hasOption( 'h' ) ) { usage(); System.exit( 0 ); } // if ( line.hasOption( 'f' ) ) outputType = line.getOptionValue( 'f' ); // if ( line.hasOption( 'o' ) ) outputFile = line.getOptionValue( 'o' ); // argList = line.getArgs(); // if ( argList.length < 1 ) { // logger.error( "Usage: TestContain SolverClass Q1 Q2" ); // usage(); // System.exit( -1 ); // } // } catch( ParseException exp ) { // logger.error( exp.getMessage() ); // usage(); // System.exit(-1); // } // queryDir = argList[0]; // // // Set output file // if ( outputFile == null ) { // stream = System.out; // } else { // stream = new PrintStream( new FileOutputStream( outputFile ) ); // } // // File [] subdir = ( new File( queryDir ) ).listFiles(); // int size = subdir.length; // for ( int i=0 ; i < size; i++ ) { // File queryFile = subdir[i]; // if( queryFile.isFile() ) { // logger.trace( queryFile.toString() ); // totalNumber++; // try { // // ANALYSE // analyse( queryFile ); // correctNumber++; // } catch ( Exception ex ) { // //System.err.println( queryFile.toString() ); // //System.err.println( ex ); // failure++; // } // } // } // // RENDER // render(); // } // Classical, in particular dbpedia, prefix that are often forgotten String missingPrefixes = "PREFIX dbpedia: <http://dbpedia.org/resource/>\nPREFIX dbpprop: <http://dbpedia.org/property/>\nPREFIX dbpedia-owl: <http://dbpedia.org/ontology/>\nPREFIX geo: <http://www.example.com/>\nPREFIX foaf: <http://xmlns.com/foaf/0.1/>\n\n"; /** * Read a query from a file * @param fname a file containing a SPARQL query * @return returns the query as a string * @throws IOException */ public String read( String filename ) throws IOException { String qry = ""; try { BufferedReader in = new BufferedReader(new FileReader(filename)); String str; while ((str = in.readLine()) != null) { qry += " "+str; } in.close(); } catch ( IOException e ) {} return qry; } public void analyse( File qFile ) throws Exception { int projected; int cyclic; int constr; String queryString = read( qFile.toString() ); queryString = missingPrefixes + queryString; Query query = QueryFactory.create( queryString ); TransformAlgebra ta = new TransformAlgebra( queryString ); /* Would be better if prefixes could be added Query query = QueryFactory.read( qFile.toString() ); TransformAlgebra ta = new TransformAlgebra( query ); */ CycleAnalysis cq = new CycleAnalysis( ta.getTriples() ); // Projections if ( ta.getNonDistVars().size() != 0 ) projected = PROJ; else projected = NOPROJ; // Cycles if ( cq.isCyclic() ) cyclic = CYCLE; else if ( cq.isDAGATree() ) cyclic = TREE; else cyclic = DAG; // Constructors if ( ta.containsOpt() ) { if ( ta.hasUnion() ) { if ( ta.hasFilter() ) { constr = UNION_OPT_FILTER; } else { constr = UNION_OPT; } } else { if ( ta.hasFilter() ) { constr = OPT_FILTER; } else { constr = OPT; } } } else { if ( ta.hasUnion() ) { if ( ta.hasFilter() ) { constr = FILTER_UNION; } else { constr = UNION; } } else { if ( ta.hasFilter() ) { constr = FILTER; } else { constr = NONE; } } } /* System.err.println( queryString ); System.err.print( "\nDistVars: "); for( Var v : ta.getResultVars() ) System.err.print( v+" " ); System.err.println( "\nAllVars: "); for( Var v : ta.getAllVariables() ) System.err.print( v+" " ); System.err.print( "\nDiff: "); for( Var v : ta.getNonDistVars() ) System.err.print( v+" " ); System.err.println( "\nCyclic: "+cyclic+" Projection: "+projected+" Constructor: "+constr ); */ // Cycles among non distinguished variables cq.constantsAndDvars.addAll( cq.convertFromVarToString( ta.getResultVars() ) ); if ( cq.isThereAcycleAmongNDvars( ta.getNonDistVars() ) ) ndVarCycles++; (resultArray[projected][cyclic][constr])++; } public void renderOldStyle() { int number = resultArray[PROJ][TREE][UNION] + resultArray[PROJ][TREE][UNION_OPT] + resultArray[PROJ][TREE][FILTER_UNION] + resultArray[PROJ][TREE][UNION_OPT_FILTER] + resultArray[NOPROJ][TREE][UNION] + resultArray[NOPROJ][TREE][UNION_OPT] + resultArray[NOPROJ][TREE][FILTER_UNION] + resultArray[NOPROJ][TREE][UNION_OPT_FILTER]; int perc = number*100/correctNumber; int total = number; stream.println("Tree UCQ\t"+number+"\t"+number*100/correctNumber ); number = resultArray[PROJ][TREE][NONE] + resultArray[PROJ][TREE][OPT] + resultArray[PROJ][TREE][FILTER] + resultArray[PROJ][TREE][OPT_FILTER] + resultArray[NOPROJ][TREE][NONE] + resultArray[NOPROJ][TREE][OPT] + resultArray[NOPROJ][TREE][FILTER] + resultArray[NOPROJ][TREE][OPT_FILTER] ; perc += number*100/correctNumber; total += number; stream.println("Tree Others\t"+number+"\t"+number*100/correctNumber ); number = resultArray[PROJ][DAG][UNION] + resultArray[PROJ][DAG][UNION_OPT] + resultArray[PROJ][DAG][FILTER_UNION] + resultArray[PROJ][DAG][UNION_OPT_FILTER] + resultArray[NOPROJ][DAG][UNION] + resultArray[NOPROJ][DAG][UNION_OPT] + resultArray[NOPROJ][DAG][FILTER_UNION] + resultArray[NOPROJ][DAG][UNION_OPT_FILTER]; perc += number*100/correctNumber; total += number; stream.println("DAG UCQ\t\t"+number+"\t"+number*100/correctNumber ); number = resultArray[PROJ][DAG][NONE] + resultArray[PROJ][DAG][OPT] + resultArray[PROJ][DAG][FILTER] + resultArray[PROJ][DAG][OPT_FILTER] + resultArray[NOPROJ][DAG][NONE] + resultArray[NOPROJ][DAG][OPT] + resultArray[NOPROJ][DAG][FILTER] + resultArray[NOPROJ][DAG][OPT_FILTER] ; stream.println("DAG Others\t"+number+"\t"+number*100/correctNumber ); perc += number*100/correctNumber; total += number; number = resultArray[PROJ][CYCLE][NONE] + resultArray[PROJ][CYCLE][UNION] + resultArray[PROJ][CYCLE][OPT] + resultArray[PROJ][CYCLE][FILTER] + resultArray[PROJ][CYCLE][UNION_OPT] + resultArray[PROJ][CYCLE][OPT_FILTER] + resultArray[PROJ][CYCLE][FILTER_UNION] + resultArray[PROJ][CYCLE][UNION_OPT_FILTER] + resultArray[NOPROJ][CYCLE][NONE] + resultArray[NOPROJ][CYCLE][UNION] + resultArray[NOPROJ][CYCLE][OPT] + resultArray[NOPROJ][CYCLE][FILTER] + resultArray[NOPROJ][CYCLE][UNION_OPT] + resultArray[NOPROJ][CYCLE][OPT_FILTER] + resultArray[NOPROJ][CYCLE][FILTER_UNION] + resultArray[NOPROJ][CYCLE][UNION_OPT_FILTER]; perc += number*100/correctNumber; total += number; stream.println("Cyclic\t\t"+number+"\t"+number*100/correctNumber ); stream.println("TOTAL\t\t"+total+"\t"+perc+"\n" ); stream.println( "\nNumber of queries with cycles using only ndvariables: "+ndVarCycles+"\n" ); } public void render() { stream.println( "\n"+failure+" errors over "+totalNumber+" queries (residu: "+correctNumber+")\n" ); renderOldStyle(); int proj = resultArray[PROJ][CYCLE][NONE] + resultArray[PROJ][CYCLE][UNION] + resultArray[PROJ][CYCLE][OPT] + resultArray[PROJ][CYCLE][FILTER] + resultArray[PROJ][CYCLE][UNION_OPT] + resultArray[PROJ][CYCLE][OPT_FILTER] + resultArray[PROJ][CYCLE][FILTER_UNION] + resultArray[PROJ][CYCLE][UNION_OPT_FILTER] + resultArray[PROJ][DAG][NONE] + resultArray[PROJ][DAG][UNION] + resultArray[PROJ][DAG][OPT] + resultArray[PROJ][DAG][FILTER] + resultArray[PROJ][DAG][UNION_OPT] + resultArray[PROJ][DAG][OPT_FILTER] + resultArray[PROJ][DAG][FILTER_UNION] + resultArray[PROJ][DAG][UNION_OPT_FILTER] + resultArray[PROJ][TREE][NONE] + resultArray[PROJ][TREE][UNION] + resultArray[PROJ][TREE][OPT] + resultArray[PROJ][TREE][FILTER] + resultArray[PROJ][TREE][UNION_OPT] + resultArray[PROJ][TREE][OPT_FILTER] + resultArray[PROJ][TREE][FILTER_UNION] + resultArray[PROJ][TREE][UNION_OPT_FILTER]; int noproj = resultArray[NOPROJ][CYCLE][NONE] + resultArray[NOPROJ][CYCLE][UNION] + resultArray[NOPROJ][CYCLE][OPT] + resultArray[NOPROJ][CYCLE][FILTER] + resultArray[NOPROJ][CYCLE][UNION_OPT] + resultArray[NOPROJ][CYCLE][OPT_FILTER] + resultArray[NOPROJ][CYCLE][FILTER_UNION] + resultArray[NOPROJ][CYCLE][UNION_OPT_FILTER] + resultArray[NOPROJ][DAG][NONE] + resultArray[NOPROJ][DAG][UNION] + resultArray[NOPROJ][DAG][OPT] + resultArray[NOPROJ][DAG][FILTER] + resultArray[NOPROJ][DAG][UNION_OPT] + resultArray[NOPROJ][DAG][OPT_FILTER] + resultArray[NOPROJ][DAG][FILTER_UNION] + resultArray[NOPROJ][DAG][UNION_OPT_FILTER] + resultArray[NOPROJ][TREE][NONE] + resultArray[NOPROJ][TREE][UNION] + resultArray[NOPROJ][TREE][OPT] + resultArray[NOPROJ][TREE][FILTER] + resultArray[NOPROJ][TREE][UNION_OPT] + resultArray[NOPROJ][TREE][OPT_FILTER] + resultArray[NOPROJ][TREE][FILTER_UNION] + resultArray[NOPROJ][TREE][UNION_OPT_FILTER]; stream.println( "\t\tproj ("+proj+")\t\t\tnoproj ("+noproj+")" ); stream.println( "\t\ttree\tdag\tcycle\ttree\tdag\tcycle" ); boolean perc = false; printOneLine( "none\t", NONE, perc ); //printOneLine( "and\t", NONE, perc ); printOneLine( "union\t", UNION, perc ); printOneLine( "opt\t", OPT, perc ); printOneLine( "filter\t", FILTER, perc ); printOneLine( "un-opt\t", UNION_OPT, perc ); printOneLine( "opt-filt", OPT_FILTER, perc ); printOneLine( "filt-un\t", FILTER_UNION, perc ); printOneLine( "un-opt-filt", UNION_OPT_FILTER, perc ); perc = true; stream.println( "\t\tproj ("+proj*100/correctNumber+")\t\t\tnoproj ("+noproj*100/correctNumber+")" ); stream.println( "\t\ttree\tdag\tcycle\ttree\tdag\tcycle" ); printOneLine( "none\t", NONE, perc ); //printOneLine( "and\t", NONE, perc ); printOneLine( "union\t", UNION, perc ); printOneLine( "opt\t", OPT, perc ); printOneLine( "filter\t", FILTER, perc ); printOneLine( "un-opt\t", UNION_OPT, perc ); printOneLine( "opt-filt", OPT_FILTER, perc ); printOneLine( "filt-un\t", FILTER_UNION, perc ); printOneLine( "un-opt-filt", UNION_OPT_FILTER, perc ); } public void printOneLine ( String header, int line, boolean perc ) { if ( perc ) { stream.println( header+"\t"+resultArray[PROJ][TREE][line]*100/correctNumber+"\t"+resultArray[PROJ][DAG][line]*100/correctNumber+"\t"+resultArray[PROJ][CYCLE][line]*100/correctNumber+"\t"+resultArray[NOPROJ][TREE][line]*100/correctNumber+"\t"+resultArray[NOPROJ][DAG][line]*100/correctNumber+"\t"+resultArray[NOPROJ][CYCLE][line]*100/correctNumber ); } else { stream.println( header+"\t"+resultArray[PROJ][TREE][line]+"\t"+resultArray[PROJ][DAG][line]+"\t"+resultArray[PROJ][CYCLE][line]+"\t"+resultArray[NOPROJ][TREE][line]+"\t"+resultArray[NOPROJ][DAG][line]+"\t"+resultArray[NOPROJ][CYCLE][line] ); } } public void usage() { Package pkg = this.getClass().getPackage(); // new HelpFormatter().printHelp( 80, pkg+" [options] queryDir\nAnalyses the queries contained in queryDir", "\nOptions:", options, "" ); } }