package io.github.infolis.algorithm;
import java.io.File;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import io.github.infolis.InfolisConfig;
import io.github.infolis.datastore.DataStoreClient;
import io.github.infolis.datastore.FileResolver;
import io.github.infolis.model.Execution;
import io.github.infolis.model.ExecutionStatus;
import io.github.infolis.model.TextualReference;
import io.github.infolis.model.entity.Entity;
import io.github.infolis.model.entity.EntityLink;
import io.github.infolis.infolink.querying.QueryService;
/**
* This algorithm extracts metadata from textual references to create or reuse a
* corresponding entity and creates links between entities:
* <ol>
* <li>links between the mentionsReference of the textualReference and the referencedEntity</li>
* <li>links between the referenced entity and entities in an external repository</li>
* </ol>
*
* Used algorithms: MetaDataExtractor - FederatedSearcher - SearchResultLinker
*
* @author kata
*
*/
public class ReferenceLinker extends BaseAlgorithm {
public ReferenceLinker(DataStoreClient inputDataStoreClient, DataStoreClient outputDataStoreClient,
FileResolver inputFileResolver, FileResolver outputFileResolver) {
super(inputDataStoreClient, outputDataStoreClient, inputFileResolver, outputFileResolver);
}
private static final Logger log = LoggerFactory.getLogger(ReferenceLinker.class);
private String createLinkToEntity(String fromEntityUri, String toEntityUri, TextualReference textualReference) {
EntityLink link = new EntityLink();
Entity referencedEntity = getOutputDataStoreClient().get(Entity.class, toEntityUri);
link.setConfidence(referencedEntity.getEntityReliability());
link.setFromEntity(fromEntityUri);
link.setToEntity(toEntityUri);
Set<EntityLink.EntityRelation> entityRelations = new HashSet<>();
entityRelations.add(EntityLink.EntityRelation.references);
link.setEntityRelations(entityRelations);
link.setLinkReason(textualReference.getUri());
link.setTags(textualReference.getTags());
link.addAllTags(getExecution().getTags());
getOutputDataStoreClient().post(EntityLink.class, link);
return link.getUri();
}
private List<String> linkEntity(String referencedEntity, List<String> queryServices,
List<Class<? extends QueryService>> queryServiceClasses, String cachePath) {
List<String> links = new ArrayList<>();
List<String> searchResults = new ArrayList<>();
if (null != getExecution().getQueryServiceClasses() && !getExecution().getQueryServiceClasses().isEmpty()) {
searchResults = searchClassInRepositories(referencedEntity, queryServiceClasses, cachePath);
}
if (null != getExecution().getQueryServices() && !getExecution().getQueryServices().isEmpty()) {
searchResults = searchInRepositories(referencedEntity, queryServices, cachePath);
}
if (searchResults.size() > 0) {
// TODO check. Previously used textual reference
links.addAll(createLinksForSearchResults(searchResults, referencedEntity));
}
return links;
}
private List<String> linkReferences(List<String> textualReferences) throws IOException {
// create query cache
Path generalCachePath = Paths.get(InfolisConfig.getTmpFilePath().toString(), "cache");
if (!generalCachePath.toFile().exists()) Files.createDirectories(generalCachePath);
Path privateCachePath = Files.createTempDirectory(generalCachePath, Long.toString(System.nanoTime()));
File cache = Files.createTempFile(privateCachePath, "querycache", ".txt").toFile();
String cachePath = cache.getCanonicalPath();
// update links for entities only once per execution
Set<String> entitiesWithUpdatedLinks = new HashSet<>();
List<String> entityLinks = new ArrayList<>();
for (String s : textualReferences) {
debug(log, "Extracted metadata from reference");
String toEntityUri = extractMetaData(s, getOutputDataStoreClient());
TextualReference textRef = getOutputDataStoreClient().get(TextualReference.class, s);
String linkFromSourceToReferencedEntity = createLinkToEntity(
textRef.getMentionsReference(), toEntityUri, textRef);
entityLinks.add(linkFromSourceToReferencedEntity);
// create links from referenced entity to entities in repository, if not already linked
if (!entitiesWithUpdatedLinks.contains(toEntityUri)) {
List<String> queryServices = getExecution().getQueryServices();
List<Class<? extends QueryService>> queryServiceClasses = getExecution().getQueryServiceClasses();
entityLinks.addAll(linkEntity(toEntityUri, queryServices, queryServiceClasses, cachePath));
entitiesWithUpdatedLinks.add(toEntityUri);
}
// TODO else return all existing links of entity
//else
}
cache.delete();
privateCachePath.toFile().delete();
log.debug("Returning entity links: " + entityLinks);
return entityLinks;
}
public String extractMetaData(String textualReference, DataStoreClient client) {
Execution extract = getExecution().createSubExecution(MetaDataExtractor.class);
List<String> textRefs = Arrays.asList(textualReference);
extract.setTextualReferences(textRefs);
getOutputDataStoreClient().post(Execution.class, extract);
extract.instantiateAlgorithm(getInputDataStoreClient(), client,
getInputFileResolver(), getOutputFileResolver()).run();
String entityUri = extract.getLinkedEntities().get(0);
updateProgress(1, 3);
return entityUri;
}
public List<String> searchInRepositories(String entityUri, List<String> queryServices, String cachePath) {
Execution searchRepo = getExecution().createSubExecution(FederatedSearcher.class);
searchRepo.setSearchResultLinkerClass(getExecution().getSearchResultLinkerClass());
searchRepo.setLinkedEntities(Arrays.asList(entityUri));
searchRepo.setQueryServices(queryServices);
searchRepo.setIndexDirectory(cachePath);
getOutputDataStoreClient().post(Execution.class, searchRepo);
searchRepo.instantiateAlgorithm(this).run();
updateProgress(2, 3);
debug(log, "FederatedSearcher returned " + searchRepo.getSearchResults().size() + " search results");
return searchRepo.getSearchResults();
}
public List<String> searchClassInRepositories(String entityUri, List<Class<? extends QueryService>> queryServices, String cachePath) {
Execution searchRepo = getExecution().createSubExecution(FederatedSearcher.class);
searchRepo.setSearchResultLinkerClass(getExecution().getSearchResultLinkerClass());
searchRepo.setLinkedEntities(Arrays.asList(entityUri));
searchRepo.setQueryServiceClasses(queryServices);
searchRepo.setIndexDirectory(cachePath);
getOutputDataStoreClient().post(Execution.class, searchRepo);
searchRepo.instantiateAlgorithm(this).run();
updateProgress(2, 3);
debug(log, "FederatedSearcher returned " + searchRepo.getSearchResults().size() + " search results");
return searchRepo.getSearchResults();
}
public List<String> createLinksForSearchResults(List<String> searchResults, String entityUri) {
Execution linker = getExecution().createSubExecution(getExecution().getSearchResultLinkerClass());
linker.setSearchResults(searchResults);
linker.setLinkedEntities(Arrays.asList(entityUri));
if (null != getExecution().getInputFiles() && !getExecution().getInputFiles().isEmpty()) linker.setInputFiles(getExecution().getInputFiles());
getOutputDataStoreClient().post(Execution.class, linker);
debug(log, "Creating links based on " + searchResults.size() + " search results");
linker.instantiateAlgorithm(this).run();
updateProgress(3, 3);
debug(log, "Returning links: " + linker.getLinks());
return linker.getLinks();
}
@Override
public void execute() throws IOException {
Execution tagSearcher = getExecution().createSubExecution(TagSearcher.class);
tagSearcher.setTextualReferenceTags(getExecution().getTextualReferenceTags());
tagSearcher.instantiateAlgorithm(this).run();
getExecution().getTextualReferences().addAll(tagSearcher.getTextualReferences());
List<String> entityLinks = linkReferences(getExecution().getTextualReferences());
getExecution().setLinks(entityLinks);
getExecution().setStatus(ExecutionStatus.FINISHED);
}
@Override
public void validate() throws IllegalAlgorithmArgumentException {
boolean queryServiceSet = false;
if (null != getExecution().getQueryServiceClasses() && !getExecution().getQueryServiceClasses().isEmpty()) {
queryServiceSet = true;
}
if (null != getExecution().getQueryServices() && !getExecution().getQueryServices().isEmpty()) {
queryServiceSet = true;
}
// If textualReferences is empty, do not throw an exception. If used automatically after searching for
// patterns, the list of textual references may be empty, it is not an error.
// If, however, ReferenceLinker is applied directly on existing textual references specified by their tags, the
// list should not be empty and throwing an error is assumed to be helpful for the user.
if (null == getExecution().getTextualReferences()
&& (null == getExecution().getTextualReferenceTags() || getExecution().getTextualReferenceTags().isEmpty())) {
throw new IllegalAlgorithmArgumentException(getClass(), "TextualReference", "Required parameter 'textual references' is missing!");
}
if (!queryServiceSet) {
throw new IllegalAlgorithmArgumentException(getClass(), "queryService", "Required parameter 'query services' is missing!");
}
if (null == getExecution().getSearchResultLinkerClass()) {
throw new IllegalAlgorithmArgumentException(getClass(), "searchResultLinkerClass", "Required parameter 'SearchResultLinkerClass' is missing!");
}
}
}