/*
* Copyright 2012 Research Studios Austria Forschungsges.m.b.H.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package won.protocol.util.linkeddata;
import org.apache.jena.query.Dataset;
import org.apache.jena.query.DatasetFactory;
import org.apache.jena.rdf.model.Model;
import org.apache.jena.rdf.model.NodeIterator;
import org.apache.jena.rdf.model.Property;
import org.apache.jena.rdf.model.RDFNode;
import org.apache.jena.sparql.core.DatasetGraph;
import org.apache.jena.sparql.expr.nodevalue.NodeValueBoolean;
import org.apache.jena.sparql.path.Path;
import org.apache.jena.tdb.TDB;
import org.apache.jena.tdb.TDBFactory;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import won.protocol.rest.LinkedDataRestClient;
import won.protocol.util.RdfUtils;
import java.net.URI;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Set;
/**
* LinkedDataSource implementation that delegates fetching linked data resources to the
* provided LinedDataRestClient.
*/
public class LinkedDataSourceBase implements LinkedDataSource
{
private final Logger logger = LoggerFactory.getLogger(getClass());
protected LinkedDataRestClient linkedDataRestClient;
@Override
public Dataset getDataForResource(URI resource){
assert resource != null : "resource must not be null";
logger.debug("fetching linked data for URI {}", resource);
Dataset dataset = DatasetFactory.createGeneral();
try {
dataset = linkedDataRestClient.readResourceData(resource);
} catch (Exception e){
logger.debug(String.format("Couldn't fetch resource %s",resource),e);
}
return dataset;
}
@Override
public Dataset getDataForResource(final URI resource, final URI requesterWebID) {
assert (resource != null && requesterWebID != null) : "resource and requester must not be null";
logger.debug("fetching linked data for URI {} requester {}", resource, requesterWebID);
Dataset dataset = DatasetFactory.createGeneral();
try {
dataset = linkedDataRestClient.readResourceData(resource, requesterWebID);
} catch (Exception e){
logger.debug(String.format("Couldn't fetch resource %s",resource),e);
}
return dataset;
}
@Override
public Dataset getDataForResource(final URI resourceURI, List<URI> properties,
int maxRequest, int maxDepth) {
Set<URI> crawledURIs = new HashSet<URI>();
Set<URI> newlyDiscoveredURIs = new HashSet<URI>();
Set<URI> urisToCrawl = null;
newlyDiscoveredURIs.add(resourceURI);
int depth = 0;
int requests = 0;
Dataset dataset = makeDataset();
OUTER: while (newlyDiscoveredURIs.size() > 0 && depth < maxDepth && requests < maxRequest){
urisToCrawl = newlyDiscoveredURIs;
newlyDiscoveredURIs = new HashSet<URI>();
for (URI currentURI: urisToCrawl) {
//add all models from urisToCrawl
Dataset currentModel = getDataForResource(currentURI);
RdfUtils.addDatasetToDataset(dataset, currentModel);
newlyDiscoveredURIs.addAll(getURIsToCrawl(currentModel, crawledURIs, properties));
crawledURIs.add(currentURI);
requests++;
logger.debug("current Request: "+requests);
if (requests >= maxRequest) break OUTER;
}
depth++;
logger.debug("current Depth: "+depth);
}
return dataset;
}
@Override
public Dataset getDataForResource(final URI resourceURI, final URI requesterWebID, final List<URI> properties, final int maxRequest, final int maxDepth) {
Set<URI> crawledURIs = new HashSet<URI>();
Set<URI> newlyDiscoveredURIs = new HashSet<URI>();
Set<URI> urisToCrawl = null;
newlyDiscoveredURIs.add(resourceURI);
int depth = 0;
int requests = 0;
Dataset dataset = makeDataset();
OUTER: while (newlyDiscoveredURIs.size() > 0 && depth < maxDepth && requests < maxRequest){
urisToCrawl = newlyDiscoveredURIs;
newlyDiscoveredURIs = new HashSet<URI>();
for (URI currentURI: urisToCrawl) {
//add all models from urisToCrawl
Dataset currentModel = getDataForResource(currentURI, requesterWebID);
RdfUtils.addDatasetToDataset(dataset, currentModel);
newlyDiscoveredURIs.addAll(getURIsToCrawl(currentModel, crawledURIs, properties));
crawledURIs.add(currentURI);
requests++;
logger.debug("current Request: "+requests);
if (requests >= maxRequest) break OUTER;
}
depth++;
logger.debug("current Depth: "+depth);
}
return dataset;
}
@Override
public Dataset getDataForResourceWithPropertyPath(final URI resourceURI, final List<Path> properties,
final int maxRequest, final int maxDepth, final boolean moveAllTriplesInDefaultGraph) {
Set<URI> crawledURIs = new HashSet<URI>();
Set<URI> newlyDiscoveredURIs = new HashSet<URI>();
Set<URI> urisToCrawl = null;
newlyDiscoveredURIs.add(resourceURI);
int depth = 0;
int requests = 0;
Dataset resultDataset = makeDataset();
OUTER: while (newlyDiscoveredURIs.size() > 0 && depth < maxDepth && requests < maxRequest){
urisToCrawl = newlyDiscoveredURIs;
newlyDiscoveredURIs = new HashSet<URI>();
for (URI currentURI: urisToCrawl) {
//add all models from urisToCrawl
Dataset currentDataset = getDataForResource(currentURI);
//logger.debug("current dataset: {} "+RdfUtils.toString(currentModel));
if (moveAllTriplesInDefaultGraph){
RdfUtils.copyDatasetTriplesToModel(currentDataset, resultDataset.getDefaultModel());
} else {
RdfUtils.addDatasetToDataset(resultDataset, currentDataset);
}
newlyDiscoveredURIs.addAll(getURIsToCrawlWithPropertyPath(resultDataset, resourceURI, crawledURIs, properties));
crawledURIs.add(currentURI);
requests++;
logger.debug("current Request: "+requests);
if (requests >= maxRequest) break OUTER;
}
depth++;
logger.debug("current Depth: "+depth);
}
return resultDataset;
}
@Override
public Dataset getDataForResourceWithPropertyPath(final URI resourceURI, final URI requesterWebID, final List<Path> properties, final int maxRequest, final int maxDepth, final boolean moveAllTriplesInDefaultGraph) {
Set<URI> crawledURIs = new HashSet<URI>();
Set<URI> newlyDiscoveredURIs = new HashSet<URI>();
Set<URI> urisToCrawl = null;
newlyDiscoveredURIs.add(resourceURI);
int depth = 0;
int requests = 0;
Dataset resultDataset = makeDataset();
OUTER: while (newlyDiscoveredURIs.size() > 0 && depth < maxDepth && requests < maxRequest){
urisToCrawl = newlyDiscoveredURIs;
newlyDiscoveredURIs = new HashSet<URI>();
for (URI currentURI: urisToCrawl) {
//add all models from urisToCrawl
Dataset currentDataset = getDataForResource(currentURI, requesterWebID);
//logger.debug("current dataset: {} "+RdfUtils.toString(currentModel));
if (moveAllTriplesInDefaultGraph){
RdfUtils.copyDatasetTriplesToModel(currentDataset, resultDataset.getDefaultModel());
} else {
RdfUtils.addDatasetToDataset(resultDataset, currentDataset);
}
newlyDiscoveredURIs.addAll(getURIsToCrawlWithPropertyPath(resultDataset, resourceURI, crawledURIs, properties));
crawledURIs.add(currentURI);
requests++;
logger.debug("current Request: "+requests);
if (requests >= maxRequest) break OUTER;
}
depth++;
logger.debug("current Depth: "+depth);
}
return resultDataset;
}
/**
* For the specified resourceURI, evaluates the specified property paths and adds the identified
* resources to the returned set if they are not contained in the specified exclude set.
* @param dataset
* @param resourceURI
* @param excludedUris
* @param properties
* @return
*/
private Set<URI> getURIsToCrawlWithPropertyPath(Dataset dataset, URI resourceURI, Set<URI> excludedUris,
List<Path> properties){
Set<URI> toCrawl = new HashSet<URI>();
for (int i = 0; i<properties.size();i++){
Iterator<URI> newURIs = RdfUtils.getURIsForPropertyPathByQuery(dataset,
resourceURI,
properties.get(i));
while (newURIs.hasNext()){
URI newUri = newURIs.next();
if (!excludedUris.contains(newUri)) {
toCrawl.add(newUri);
}
}
}
return toCrawl;
}
/**
* For the specified properties, finds their objects and adds the identified
* resources to the returned set if they are not contained in the specified exclude set.
* @param dataset
@param excludedUris
* @param properties
* @return
*/
private Set<URI> getURIsToCrawl(Dataset dataset, Set<URI> excludedUris, final List<URI> properties) {
Set<URI> toCrawl = new HashSet<>();
for (int i = 0; i<properties.size();i++){
final URI property = properties.get(i);
NodeIterator objectIterator = RdfUtils.visitFlattenedToNodeIterator(dataset, new RdfUtils.ModelVisitor<NodeIterator>()
{
@Override
public NodeIterator visit(final Model model) {
final Property p = model.createProperty(property.toString());
return model.listObjectsOfProperty(p);
}
});
for (;objectIterator.hasNext();){
RDFNode objectNode = objectIterator.next();
if (objectNode.isURIResource()) {
URI discoveredUri = URI.create(objectNode.asResource().getURI());
if (!excludedUris.contains(discoveredUri)){
toCrawl.add(discoveredUri);
}
}
}
}
return toCrawl;
}
public void setLinkedDataRestClient(final LinkedDataRestClient linkedDataRestClient) {
this.linkedDataRestClient = linkedDataRestClient;
}
public static Dataset makeDataset() {
DatasetGraph dsg = TDBFactory.createDatasetGraph();
dsg.getContext().set(TDB.symUnionDefaultGraph, new NodeValueBoolean(true));
return DatasetFactory.create(dsg);
}
}