package io.github.infolis.algorithm;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashSet;
import java.util.List;
import org.apache.commons.collections4.CollectionUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.google.common.collect.HashMultimap;
import com.google.common.collect.Multimap;
import io.github.infolis.datastore.DataStoreClient;
import io.github.infolis.datastore.FileResolver;
import io.github.infolis.model.Execution;
import io.github.infolis.model.entity.Entity;
import io.github.infolis.model.entity.EntityLink;
import io.github.infolis.model.entity.InfolisFile;
import io.github.infolis.model.entity.EntityLink.EntityRelation;
/**
* Use links contained in the gold standard for linking new citedData entities.
* Use automated linking methods only for items not contained in the gold standard.
*
* @author kata
*
*/
public class GoldLinker extends OntologyLinker {
private static final Logger log = LoggerFactory.getLogger(GoldLinker.class);
public GoldLinker(DataStoreClient inputDataStoreClient, DataStoreClient outputDataStoreClient,
FileResolver inputFileResolver, FileResolver outputFileResolver) {
super(inputDataStoreClient, outputDataStoreClient, inputFileResolver, outputFileResolver);
}
@Override
public void execute() {
List<String> ruleFileTags = new ArrayList<>();
log.debug("Loading data from rule file...");
for (InfolisFile ruleFile : getInputDataStoreClient().get(InfolisFile.class, getExecution().getInputFiles())) {
loadData(ruleFile);
ruleFileTags.add(getRuleFileTag(ruleFile));
}
log.debug("Creating links...");
String entityUri = "";
if (null != getExecution().getTextualReferences() && !getExecution().getTextualReferences().isEmpty()) {
Execution mde = getExecution().createSubExecution(MetaDataExtractor.class);
mde.setTextualReferences(getExecution().getTextualReferences());
mde.instantiateAlgorithm(this).run();
entityUri = mde.getLinkedEntities().get(0);
}
// if both textual reference and entity are given, only the entity is processed
if (null != getExecution().getLinkedEntities() && !getExecution().getLinkedEntities().isEmpty()) {
entityUri = getExecution().getLinkedEntities().get(0);
}
Entity entity = getInputDataStoreClient().get(Entity.class, entityUri);
List<String> links = link(entity, ruleFileTags);
if (links.isEmpty()) {
Execution exec = getExecution().createSubExecution(OntologyLinker.class);
exec.setLinkedEntities(Arrays.asList(entityUri));
exec.setSearchResults(getExecution().getSearchResults());
exec.instantiateAlgorithm(this).run();
getExecution().setLinks(exec.getLinks());
} else getExecution().setLinks(links);
}
private String getRuleFileTag(InfolisFile ruleFile) {
return "ruleFile_" + ruleFile.getUri();
}
private void loadData(InfolisFile ruleFile) {
Execution importerExec = getExecution().createSubExecution(LinkImporter.class);
importerExec.addTag(getRuleFileTag(ruleFile));
importerExec.setInputFiles(Arrays.asList(ruleFile.getUri()));
importerExec.instantiateAlgorithm(this).run();
}
private List<String> getAllOutgoingLinks(String fromEntityUri, List<String> tags) {
List<String> links = new ArrayList<>();
for (EntityLink goldLink : getLinksFromDatastore(fromEntityUri, tags)) {
links.add(goldLink.getUri());
}
return links;
}
private List<String> link(Entity entity, List<String> ruleFileTags) {
List<String> links = new ArrayList<>();
debug(log, "Searching for matching entity in gold links");
List<String> fromEntityUris = getEntitiesFromDatastore(entity, ruleFileTags);
if (fromEntityUris.isEmpty()) return new ArrayList<>();
// get all links with fromEntityUri as fromEntity and make copies with entity as fromEntity
List<String> tags = new ArrayList<>();
tags.addAll(ruleFileTags);
tags.add("infolis-ontology");
for (String fromEntityUri : fromEntityUris) {
EntityLink newLink = new EntityLink();
newLink.setFromEntity(entity.getUri());
newLink.setToEntity(fromEntityUri);
newLink.setConfidence(1.0);
newLink.setEntityRelations(new HashSet<>(Arrays.asList(EntityRelation.same_as)));
newLink.setTags(getExecution().getTags());
getOutputDataStoreClient().post(EntityLink.class, newLink);
links.add(newLink.getUri());
links.addAll(getAllOutgoingLinks(fromEntityUri, tags));
}
return links;
}
private List<String> link_copy(Entity entity, List<String> ruleFileTags) {
List<String> links = new ArrayList<>();
debug(log, "Searching for matching entity in gold links");
List<String> fromEntityUris = getEntitiesFromDatastore(entity, ruleFileTags);
if (fromEntityUris.isEmpty()) return new ArrayList<>();
// get all links with fromEntityUri as fromEntity and make copies with entity as fromEntity
List<String> tags = new ArrayList<>();
tags.addAll(ruleFileTags);
tags.add("infolis-ontology");
for (String fromEntityUri : fromEntityUris) {
for (EntityLink goldLink : getLinksFromDatastore(fromEntityUri, tags)) {
EntityLink newLink = new EntityLink();
newLink.setConfidence(goldLink.getConfidence());
newLink.setEntityRelations(goldLink.getEntityRelations());
newLink.setFromEntity(entity.getUri());
newLink.setTags(goldLink.getTags());
newLink.addAllTags(getExecution().getTags());
newLink.addAllTags(ruleFileTags);
newLink.setToEntity(goldLink.getToEntity());
getOutputDataStoreClient().post(EntityLink.class, newLink);
links.add(newLink.getUri());
}
}
return links;
}
//TODO do all clients use boolean OR?
/**
* Search entity in datastore - if an entity with the same properties can be found,
* return its uri. Else, post the tempEntity to the datastore and return its uri.
*
* Note: this method assumes the search method searches for entities having at least
* one of the specified properties (OR), not all them (AND).
*
* @param tempEntity
* @return
*/
private List<String> getEntitiesFromDatastore(Entity entity, List<String> tags) {
List<String> entities = new ArrayList<>();
Multimap<String, String> query = HashMultimap.create();
query.put("name", entity.getName());
/*
for (int i = 0; i < entity.getNumericInfo().size(); i++) {
query.put("numericInfo", entity.getNumericInfo().get(i));
}
for (int i = 0; i < tags.size(); i++) {
query.put("tags", tags.get(i));
}*/
List<Entity> entitiesInDatabase = getOutputDataStoreClient().search(Entity.class, query);
for (Entity entityInDatabase : entitiesInDatabase) {
if (((new HashSet<>(entityInDatabase.getNumericInfo()).equals(new HashSet<>(entity.getNumericInfo())))
|| !(CollectionUtils.intersection(entityInDatabase.getNumericInfo(), entity.getNumericInfo()).isEmpty())
|| (entityInDatabase.getNumericInfo().isEmpty() && entity.getNumericInfo().isEmpty()))
&& (entityInDatabase.getName().equals(entity.getName()))
&& (!CollectionUtils.intersection(entityInDatabase.getTags(), tags).isEmpty())) {
debug(log, "Found entity in datastore: " + entityInDatabase.getUri());
entities.add(entityInDatabase.getUri());
}
}
debug(log, "found {} matching entities in data store", entities.size());
return entities;
}
//Note: this method assumes the search method searches for entities having at least
//one of the specified properties (OR), not all them (AND).
// TODO checking tag should not be necessary
private List<EntityLink> getLinksFromDatastore(String fromEntityUri, List<String> tags) {
List<EntityLink> links = new ArrayList<>();
Multimap<String, String> query = HashMultimap.create();
query.put("fromEntity", fromEntityUri);
for (EntityLink link : getOutputDataStoreClient().search(EntityLink.class, query)) {
if (!CollectionUtils.intersection(link.getTags(), tags).isEmpty()) {
debug(log, "Found link in datastore: " + link.getUri());
links.add(link);
}
}
debug(log, "found {} matching links in data store", links.size());
return links;
}
}