/*
* Copyright (c) 2009-2012 Clark & Parsia, LLC. <http://www.clarkparsia.com>
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.clarkparsia.empire.codegen;
import com.clarkparsia.empire.util.Repositories2;
import com.complexible.common.collect.Iterables2;
import com.complexible.common.collect.Iterators2;
import com.complexible.common.openrdf.model.Statements;
import com.complexible.common.openrdf.repository.Repositories;
import com.google.common.collect.Iterables;
import org.openrdf.model.Resource;
import org.openrdf.model.URI;
import org.openrdf.model.Statement;
import org.openrdf.model.Value;
import org.openrdf.model.Literal;
import org.openrdf.model.BNode;
import org.openrdf.model.vocabulary.OWL;
import org.openrdf.model.vocabulary.XMLSchema;
import org.openrdf.model.vocabulary.RDFS;
import org.openrdf.model.vocabulary.RDF;
import org.openrdf.model.impl.ValueFactoryImpl;
import org.openrdf.query.QueryLanguage;
import org.openrdf.repository.Repository;
import org.openrdf.rio.RDFFormat;
import org.openrdf.query.BindingSet;
import org.openrdf.query.TupleQueryResult;
import com.complexible.common.openrdf.util.AdunaIterations;
import com.complexible.common.collect.MultiIterator;
import com.complexible.common.net.NetUtils;
import com.complexible.common.base.Functions2;
import com.google.common.base.Predicate;
import com.google.common.base.Charsets;
import com.google.common.base.Function;
import com.google.common.base.Functions;
import com.google.common.collect.Collections2;
import com.google.common.collect.Sets;
import com.google.common.collect.Iterators;
import com.google.common.io.Files;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.Collection;
import java.util.Map;
import java.util.List;
import java.util.Arrays;
import java.util.HashSet;
import java.util.HashMap;
import java.io.File;
import java.io.IOException;
import java.net.URL;
/**
* <p>Generate a set of Java beans which are compatible with Empire from a given RDF schema, OWL ontology, or blob
* of RDF data. The generated source code will map to the domain represented in the RDF.</p>
*
* @author Michael Grove
* @since 0.5.1
* @version 0.7.3
*/
public final class BeanGenerator {
/**
* The logger
*/
private static final Logger LOGGER = LoggerFactory.getLogger(BeanGenerator.class);
/**
* String URI constant for the owl:Thing conccept
*/
private static final URI OWL_THING = ValueFactoryImpl.getInstance().createURI(OWL.NAMESPACE + "Thing");
/**
* The list of xsd datatypes which map to Integer
*/
private static final List<URI> integerTypes = Arrays.asList(XMLSchema.INT, XMLSchema.INTEGER, XMLSchema.POSITIVE_INTEGER,
XMLSchema.NEGATIVE_INTEGER, XMLSchema.NON_NEGATIVE_INTEGER,
XMLSchema.NON_POSITIVE_INTEGER, XMLSchema.UNSIGNED_INT);
/**
* The list of xsd datatypes which map to Long
*/
private static final List<URI> longTypes = Arrays.asList(XMLSchema.LONG, XMLSchema.UNSIGNED_LONG);
/**
* The list of xsd datatypes which map to Float
*/
private static final List<URI> floatTypes = Arrays.asList(XMLSchema.FLOAT, XMLSchema.DECIMAL);
/**
* The list of xsd datatypes which map to Short
*/
private static final List<URI> shortTypes = Arrays.asList(XMLSchema.SHORT, XMLSchema.UNSIGNED_SHORT);
/**
* The list of xsd datatypes which map to Byte
*/
private static final List<URI> byteTypes = Arrays.asList(XMLSchema.BYTE, XMLSchema.UNSIGNED_BYTE);
private static final Map<Resource, String> NAMES = new HashMap<Resource, String>();
private static final Map<String, Integer> NAMES_TO_COUNT = new HashMap<String, Integer>();
/**
* NO instances
*/
private BeanGenerator() {
}
/**
* Return the Java bean source code that represents the given RDF class
* @param thePackageName the name of the package the source will be in
* @param theGraph the repository containing information about the class
* @param theClass the class that is to be turned into Java source
* @param theMap the map of classes to the properties in their domain
* @return a string of the source code of the equivalent Java bean
* @throws Exception if there is an error while converting
*/
private static String toSource(final String thePackageName, final Repository theGraph, final Resource theClass, final Map<Resource, Collection<URI>> theMap) throws Exception {
StringBuffer aSrc = new StringBuffer();
aSrc.append("package ").append(thePackageName).append(";\n\n");
aSrc.append("import java.util.*;\n");
aSrc.append("import javax.persistence.Entity;\n");
aSrc.append("import com.clarkparsia.empire.SupportsRdfId;\n");
aSrc.append("import com.clarkparsia.empire.annotation.*;\n\n");
// TODO: more imports? less?
Iterable<Resource> aSupers = Iterables2.present(Iterables.transform(AdunaIterations.iterable(Repositories.getStatements(theGraph, theClass, RDFS.SUBCLASSOF, null)),
Statements.objectAsResource()));
aSrc.append("@Entity\n");
aSrc.append("@RdfsClass(\"").append(theClass).append("\")\n");
aSrc.append("public interface ").append(className(theClass));
aSupers = Collections2.filter(Sets.newHashSet(aSupers), new Predicate<Resource>() {
public boolean apply(final Resource theValue) {
return theValue != null &&
!theValue.toString().startsWith(OWL.NAMESPACE)
&& !theValue.toString().startsWith(RDFS.NAMESPACE)
&& !theValue.toString().startsWith(RDF.NAMESPACE);
}
});
boolean aNeedsComma = false;
aSrc.append(" extends");
if (aSupers.iterator().hasNext()) {
for (Resource aSuper : aSupers) {
if (aNeedsComma) {
aSrc.append(",");
}
else {
aNeedsComma = true;
}
aSrc.append(" ").append(className(aSuper));
}
}
if (aNeedsComma) {
aSrc.append(",");
}
aSrc.append(" SupportsRdfId");
aSrc.append(" { \n\n");
Collection<URI> aProps = props(theClass, theMap);
for (URI aProp : aProps) {
aSrc.append("@RdfProperty(\"").append(aProp).append("\")\n");
aSrc.append("public ").append(functionType(theGraph, aProp)).append(" get").append(functionName(aProp)).append("();\n");
aSrc.append("public void set").append(functionName(aProp)).append("(").append(functionType(theGraph, aProp)).append(" theValue);\n\n");
}
aSrc.append("}");
return aSrc.toString();
}
/**
* Return the type of the function (getter & setter), i.e. the bean property type, for the given rdf:Property
* @param theRepo the graph of the ontology/data
* @param theProp the property
* @return the String representation of the property type
* @throws Exception if there is an error querying the data
*/
private static String functionType(final Repository theRepo, final URI theProp) throws Exception {
String aType;
Resource aRangeRes = Statements.objectAsResource().apply(AdunaIterations.singleResult(Repositories.getStatements(theRepo, theProp, RDFS.RANGE, null)).orNull()).orNull();
if (aRangeRes instanceof BNode) {
// we can't handle bnodes very well, so we're just going to assume Object
return "Object";
}
URI aRange = (URI) aRangeRes;
if (aRange == null) {
// no explicit range, try to infer it...
try {
TupleQueryResult aResults = Repositories.selectQuery(theRepo, QueryLanguage.SERQL, "select distinct r from {s} <"+theProp+"> {o}, {o} rdf:type {r}");
if (aResults.hasNext()) {
URI aTempRange = (URI) aResults.next().getValue("r");
if (!aResults.hasNext()) {
aRange = aTempRange;
}
else {
// TODO: leave range as null, the property is used for things of multiple different values. so here
// we should try and find the superclass of all the values and use that as the range.
}
}
aResults.close();
if (aRange == null) {
// could not get it from type usage, so maybe its a literal and we can guess it from datatype
aResults = Repositories.selectQuery(theRepo, QueryLanguage.SERQL, "select distinct datatype(o) as dt from {s} <"+theProp+"> {o} where isLiteral(o)");
if (aResults.hasNext()) {
URI aTempRange = null;
while (aTempRange == null && aResults.hasNext()) {
Literal aLit = (Literal) aResults.next().getValue("o");
if (aLit != null){
aTempRange = aLit.getDatatype();
}
}
if (!aResults.hasNext()) {
aRange = aTempRange;
}
else {
// TODO: do something here, literals of multiple types used
}
}
aResults.close();
}
}
catch (Exception e) {
// don't worry about it
e.printStackTrace();
}
}
if (XMLSchema.STRING.equals(aRange) || RDFS.LITERAL.equals(aRange)) {
aType = "String";
}
else if (XMLSchema.BOOLEAN.equals(aRange)) {
aType = "Boolean";
}
else if (integerTypes.contains(aRange)) {
aType = "Integer";
}
else if (longTypes.contains(aRange)) {
aType = "Long";
}
else if (XMLSchema.DOUBLE.equals(aRange)) {
aType = "Double";
}
else if (floatTypes.contains(aRange)) {
aType = "Float";
}
else if (shortTypes.contains(aRange)) {
aType = "Short";
}
else if (byteTypes.contains(aRange)) {
aType = "Byte";
}
else if (XMLSchema.ANYURI.equals(aRange)) {
aType = "java.net.URI";
}
else if (XMLSchema.DATE.equals(aRange) || XMLSchema.DATETIME.equals(aRange)) {
aType = "Date";
}
else if (XMLSchema.TIME.equals(aRange)) {
aType = "Date";
}
else if (aRange == null || aRange.equals(OWL_THING)) {
aType = "Object";
}
else {
aType = className(aRange);
}
if (isCollection(theRepo, theProp)) {
aType = "Collection<? extends " + aType + ">";
}
return aType;
}
/**
* Determine whether or not the property's range is a collection. This will inspect both the ontology, for cardinality
* restrictions, and when that is not available, it will use the actual structure of the data.
* @param theRepo the graph of the ontology/data
* @param theProp the property
* @return true if the property has a collection as it's value, false if it's just a single valued property
* @throws Exception if there is an error querying the data
*/
private static boolean isCollection(final Repository theRepo, final URI theProp) throws Exception {
// TODO: this is not fool proof.
String aCardQuery = "select distinct ?card where {\n" +
"?s rdf:type owl:Restriction.\n" +
"?s owl:onProperty <"+theProp+">.\n" +
"?s ?cardProp ?card.\n" +
"FILTER (?cardProp = owl:cardinality || ?cardProp = owl:minCardinality || ?cardProp = owl:maxCardinality)\n" +
"}";
TupleQueryResult aResults = Repositories.selectQuery(theRepo, QueryLanguage.SPARQL ,aCardQuery);
if (aResults.hasNext()) {
Literal aCard = (Literal) aResults.next().getValue("card") ;
try {
return Integer.parseInt(aCard.getLabel()) > 1;
}
catch (NumberFormatException e) {
LOGGER.error("Unparseable cardinality value for '" + theProp + "' of '" + aCard + "'", e);
}
}
aResults.close();
try {
aResults = Repositories.selectQuery(theRepo, QueryLanguage.SPARQL, "select distinct ?s where { ?s <"+theProp+"> ?o}");
for (BindingSet aBinding : AdunaIterations.iterable(aResults)) {
Collection aCollection = Sets.newHashSet(Iterators2.present(Iterators.transform(AdunaIterations.iterator(Repositories.getStatements(theRepo, (Resource) aBinding.getValue("s"), theProp, null)),
Statements.objectOptional())));
if (aCollection.size() > 1) {
return true;
}
}
return false;
}
finally {
aResults.close();
}
}
/**
* Return the name of the function (the bean property) for this rdf:Property
* @param theProp the rdf:Property
* @return the name of the Java property/function name
*/
private static String functionName(final URI theProp) {
return className(theProp);
}
/**
* Return all the properties for the given resource. This will return only the properties which are directly
* associated with the class, not any properties from its parent, or otherwise inferred from the data.
* @param theRes the resource
* @param theMap the map of resources to properties
* @return a collection of the proeprties associated with the class
*/
private static Collection<URI> props(final Resource theRes, final Map<Resource, Collection<URI>> theMap) {
Collection<URI> aProps = new HashSet<URI>();
if (theMap.containsKey(theRes)) {
aProps.addAll(theMap.get(theRes));
}
return aProps;
}
/**
* Given a Resource, return the Java class name for that resource
* @param theClass the resource
* @return the name of the Java class
*/
private static String className(Resource theClass) {
if (NAMES.containsKey(theClass)) {
return NAMES.get(theClass);
}
String aLabel;
if (theClass instanceof URI) {
aLabel = ((URI) theClass).getLocalName();
}
else {
aLabel = theClass.stringValue();
}
aLabel = String.valueOf(aLabel.charAt(0)).toUpperCase() + aLabel.substring(1);
aLabel = aLabel.replaceAll(" ", "");
if (NAMES_TO_COUNT.containsKey(aLabel)) {
String aNewLabel = aLabel + NAMES_TO_COUNT.get(aLabel);
NAMES_TO_COUNT.put(aLabel, NAMES_TO_COUNT.get(aLabel)+1);
aLabel = aNewLabel;
}
else {
NAMES_TO_COUNT.put(aLabel, 0);
}
NAMES.put(theClass, aLabel);
return aLabel;
}
/**
* Given an ontology/schema, generate Empire compatible Java beans for each class in the ontology.
* @param thePackageName the name of the packages the source should belong to
* @param theOntology the location of the ontology to load
* @param theFormat the RDF format the ontology is in
* @param theDirToSave where to save the generated source code
* @throws Exception if there is an error while generating the source
*/
public static void generateSourceFiles(String thePackageName, URL theOntology, RDFFormat theFormat, File theDirToSave) throws Exception {
NAMES_TO_COUNT.clear();
Repository aRepository = Repositories2.createInMemoryRepo();
Repositories.add(aRepository, theOntology.openStream(), theFormat);
Collection<Resource> aClasses = Sets.newHashSet(Iterators.transform(new MultiIterator<Statement>(AdunaIterations.iterator(Repositories.getStatements(aRepository, null, RDF.TYPE, RDFS.CLASS)),
AdunaIterations.iterator(Repositories.getStatements(aRepository, null, RDF.TYPE, OWL.CLASS))),
new StatementToSubject()));
aClasses = Collections2.filter(aClasses, new Predicate<Resource>() { public boolean apply(Resource theRes) { return theRes instanceof URI; } });
Collection<Resource> aIndClasses = Sets.newHashSet(Iterators.transform(AdunaIterations.iterator(Repositories.getStatements(aRepository, null, RDF.TYPE, null)),
Functions.compose(Functions2.<Value, Resource>cast(Resource.class),
new StatementToObject())));
aClasses.addAll(aIndClasses);
aClasses = Collections2.filter(aClasses, new Predicate<Resource>() {
public boolean apply(final Resource theValue) {
return !theValue.stringValue().startsWith(RDFS.NAMESPACE)
&& !theValue.stringValue().startsWith(RDF.NAMESPACE)
&& !theValue.stringValue().startsWith(OWL.NAMESPACE);
}
});
Map<Resource, Collection<URI>> aMap = new HashMap<Resource, Collection<URI>>();
for (Resource aClass : aClasses) {
if (aClass instanceof BNode) { continue; }
Collection<URI> aProps = Sets.newHashSet(Iterators.transform(AdunaIterations.iterator(Repositories.getStatements(aRepository, null, RDFS.DOMAIN, aClass)),
Functions.compose(Functions2.<Resource, URI>cast(URI.class),
new StatementToSubject())));
// infer properties based on usage in actual instance data
for (BindingSet aBinding : AdunaIterations.iterable(Repositories.selectQuery(aRepository, QueryLanguage.SPARQL, "select distinct ?p where { ?s rdf:type <" + aClass + ">. ?s ?p ?o }"))) {
aProps.add( (URI) aBinding.getValue("p"));
}
// don't include rdf:type as a property
aProps = Collections2.filter(aProps, new Predicate<URI>() {
public boolean apply(final URI theValue) {
return !RDF.TYPE.equals(theValue);
}
});
aMap.put(aClass, aProps);
}
if (!theDirToSave.exists()) {
if (!theDirToSave.mkdirs()) {
throw new IOException("Could not create output directory");
}
}
for (Resource aClass : aMap.keySet()) {
String aSrc = toSource(thePackageName, aRepository, aClass, aMap);
if (aSrc == null) {
continue;
}
File aFile = new File(theDirToSave, className(aClass) + ".java");
System.out.println("Writing source to file: " + aFile.getName());
Files.write(aSrc, aFile, Charsets.UTF_8);
}
}
public static void main(String[] args) throws Exception {
//aGraph.read(new URL("http://xmlns.com/foaf/spec/index.rdf").openStream());
// File aOut = new File("/Users/mhgrove/work/GitHub/empire/core/src/com/clarkparsia/empire/codegen/test/");
//
// generateSourceFiles("com.clarkparsia.empire.codegen.test", new File("test/data/nasa.nt").toURI().toURL(), RDFFormat.NTRIPLES, aOut);
if (args.length < 4) {
System.err.println("Must provide four arguments to the program, the package name, ontology URL, rdf format of the ontology (rdf/xml|turtle|ntriples), and the output directory for the source code.\n");
System.err.println("For example:\n");
System.err.println("\tBeanGenerator my.package.domain /usr/local/files/myontology.ttl turtle /usr/local/code/src/my/package/domain");
return;
}
URL aURL;
if (NetUtils.isURL(args[1])) {
aURL = new URL(args[1]);
}
else {
aURL = new File(args[1]).toURI().toURL();
}
generateSourceFiles(args[0], aURL, RDFFormat.valueOf(args[2]), new File(args[3]));
}
private static class StatementToObject implements Function<Statement, Value> {
public Value apply(final Statement theIn) {
return theIn.getObject();
}
}
private static class StatementToSubject implements Function<Statement, Resource> {
public Resource apply(final Statement theIn) {
return theIn.getSubject();
}
}
}