package io.github.infolis.algorithm;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashSet;
import java.util.List;
import java.util.Map.Entry;
import java.util.Set;
import javax.json.Json;
import javax.json.JsonArray;
import javax.json.JsonObject;
import javax.json.JsonValue;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import io.github.infolis.InfolisConfig;
import io.github.infolis.datastore.DataStoreClient;
import io.github.infolis.datastore.FileResolver;
import io.github.infolis.datastore.LocalClient;
import io.github.infolis.model.EntityType;
import io.github.infolis.model.entity.Entity;
import io.github.infolis.model.entity.EntityLink;
import io.github.infolis.model.entity.InfolisFile;
import io.github.infolis.model.entity.EntityLink.EntityRelation;
/**
* Class for importing entities and entityLinks. Entities are connected to entities
* in the ontology at import.
*
* @author kata
*
*/
public class LinkImporter extends BaseAlgorithm {
private static final Logger log = LoggerFactory.getLogger(LinkImporter.class);
public LinkImporter(DataStoreClient inputDataStoreClient, DataStoreClient outputDataStoreClient,
FileResolver inputFileResolver, FileResolver outputFileResolver) {
super(inputDataStoreClient, outputDataStoreClient, inputFileResolver, outputFileResolver);
}
/*
* Create entities and entityLinks, post them to the datastore and link entities
* to corresponding entities in the ontology.
*/
private void importLinkFile(InfolisFile linkFile) throws IOException {
List<String> importedEntities = new ArrayList<>();
List<String> importedLinks = new ArrayList<>();
InputStream in = getInputFileResolver().openInputStream(linkFile);
BufferedReader streamReader = new BufferedReader(new InputStreamReader(in, "UTF-8"));
JsonObject jsonObject = Json.createReader(streamReader).readObject();
for (Entry<String, JsonValue> values : jsonObject.entrySet()) {
if (values.getKey().equals("entity")) {
JsonObject entities = (JsonObject)(values.getValue());
for (Entry<String, JsonValue> entityEntry : entities.entrySet()) {
JsonObject entityValues = (JsonObject)(entityEntry.getValue());
Entity entity = new Entity();
try {
entity.setIdentifiers(toList(entityValues.get("identifiers")));
} catch (NullPointerException npe) {};
try {
entity.setAbstractText(entityValues.getString("abstractText"));
} catch (NullPointerException npe) {};
try {
entity.setAlternativeNames(toList(entityValues.get("alternativeNames")));
} catch (NullPointerException npe) {};
try {
entity.setAuthors(toList(entityValues.get("authors")));
} catch (NullPointerException npe) {};
try {
entity.setEntityType(EntityType.valueOf(EntityType.class, entityValues.getString("entityType")));
} catch (NullPointerException npe) {};
try {
entity.setLanguage(entityValues.getString("language"));
} catch (NullPointerException npe) {};
try {
entity.setName(entityValues.getString("name"));
} catch (NullPointerException npe) {};
try {
entity.setNumericInfo(toList(entityValues.get("numericInfo")));
} catch (NullPointerException npe) {};
try {
entity.setSpatial(new HashSet<>(toList(entityValues.get("spatial"))));
} catch (NullPointerException npe) {};
try {
entity.setSubjects(toList(entityValues.get("subjects")));
} catch (NullPointerException npe) {};
try {
entity.setVersionInfo(entityValues.getString("versionInfo"));
} catch (NullPointerException npe) {};
try {
entity.setTags(new HashSet<>(toList(entityValues.get("tags"))));
} catch (NullPointerException npe) {};
entity.addAllTags(getExecution().getTags());
try {
entity.setURL(entityValues.getString("url"));
} catch (NullPointerException npe) {};
try {
entity.setEntityReliability(Double.valueOf(entityValues.get("reliability").toString()));
} catch (NullPointerException npe) {};
getOutputDataStoreClient().post(Entity.class, entity);
importedEntities.add(entity.getUri());
debug(log, "imported entity {}", entity.getUri());
//create link from entity to corresponding entities in the ontology
if (null != entity.getIdentifiers() && !entity.getIdentifiers().isEmpty()) {
String ontologyEntity = getOntologyEntity(entity);
if (null != ontologyEntity) {
EntityLink ontoLink = createLink(entity.getUri(), ontologyEntity, new HashSet<EntityRelation>(Arrays.asList(EntityRelation.same_as)));
ontoLink.setTags(getExecution().getTags());
ontoLink.setConfidence(1.0);
getOutputDataStoreClient().post(EntityLink.class, ontoLink);
importedLinks.add(ontoLink.getUri());
}
}
};
}
else if (values.getKey().equals("entityLink")) {
JsonObject links = (JsonObject)(values.getValue());
for (Entry<String, JsonValue> linkEntry : links.entrySet()) {
JsonObject linkValues = (JsonObject)(linkEntry.getValue());
EntityLink link = new EntityLink();
try {
link.setConfidence(Double.valueOf(linkValues.get("confidence").toString()));
} catch (NullPointerException npe) {};
try {
link.setEntityRelations(new HashSet<>(toEntityRelationList(linkValues.get("entityRelations"))));
} catch (NullPointerException npe) {};
try {
link.setFromEntity(linkValues.getString("fromEntity"));
} catch (NullPointerException npe) {
warn(log, "entityLink missing fromEntity. Ignoring link");
continue;
};
try {
link.setLinkReason(linkValues.getString("linkReason"));
} catch (NullPointerException npe) {};
try {
link.setTags(new HashSet<>(toList(linkValues.get("tags"))));
} catch (NullPointerException npe) {};
link.addAllTags(getExecution().getTags());
try {
link.setToEntity(linkValues.getString("toEntity"));
} catch (NullPointerException npe) {
warn(log, "entityLink missing toEntity. Ignoring link");
continue;
};
getOutputDataStoreClient().post(EntityLink.class, link);
importedLinks.add(link.getUri());
debug(log, "imported entityLink {}", link.getUri());
}
}
}
getExecution().setLinkedEntities(importedEntities);
getExecution().setLinks(importedLinks);
}
private List<EntityRelation> toEntityRelationList(JsonValue jsonValue) {
List<EntityRelation> list = new ArrayList<>();
JsonArray array = (JsonArray) jsonValue;
for (JsonValue val : array) {
list.add(EntityRelation.valueOf(EntityRelation.class, val.toString().replaceAll("\"", "")));
}
return list;
}
private List<String> toList(JsonValue jsonValue) {
List<String> list = new ArrayList<>();
JsonArray array = (JsonArray) jsonValue;
for (JsonValue val : array) {
// remove '"' from string
list.add(val.toString().substring(1, val.toString().length() - 1));
}
return list;
}
private EntityLink createLink(String fromEntityUri, String toEntityUri, Set<EntityRelation> entityRelations) {
EntityLink link = new EntityLink();
link.setFromEntity(fromEntityUri);
link.setToEntity(toEntityUri);
link.setEntityRelations(entityRelations);
return link;
}
private String getOntologyEntity(Entity entity) {
String ontologyUri = "dataset_" + entity.getIdentifiers().get(0)
.replace("/", "")
.replace(".", "");
// TODO add "getUriPrefix"-method to AbstractClient and implementing classes; use this here
if (!getOutputDataStoreClient().getClass().isAssignableFrom(LocalClient.class)) {
ontologyUri = InfolisConfig.getFrontendURI() + "/entity/" + ontologyUri;
}
Entity ontologyEntity = null;
try {
ontologyEntity = getOutputDataStoreClient().get(Entity.class, ontologyUri);
} catch (RuntimeException e) {
return null;
}
if (null != ontologyEntity) {
return ontologyUri;
} else return null;
}
@Override
public void execute() throws IOException {
int i = 0;
for (InfolisFile linkFile : getInputDataStoreClient().get(InfolisFile.class, getExecution().getInputFiles())) {
log.debug("Importing " + linkFile);
importLinkFile(linkFile);
i ++;
updateProgress(i, getExecution().getInputFiles().size());
}
}
@Override
public void validate() throws IllegalAlgorithmArgumentException {
if (null == getExecution().getInputFiles() || getExecution().getInputFiles().isEmpty()) {
throw new IllegalArgumentException("Must set at least one inputFile!");
}
}
}