package io.github.infolis.algorithm; import java.io.IOException; import java.util.List; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import io.github.infolis.datastore.DataStoreClient; import io.github.infolis.datastore.FileResolver; import io.github.infolis.model.Execution; import io.github.infolis.model.ExecutionStatus; /** * * This algorithm searches for a set of given patterns, then executes the * ReferenceLinker algorithm to create EntityLinks from the resulting textualReferences. * * Used algorithms: InfolisPatternSearcher - ReferenceLinker * * @author domi * @author kata * */ public class SearchPatternsAndCreateLinks extends ComplexAlgorithm { public SearchPatternsAndCreateLinks(DataStoreClient inputDataStoreClient, DataStoreClient outputDataStoreClient, FileResolver inputFileResolver, FileResolver outputFileResolver) { super(inputDataStoreClient, outputDataStoreClient, inputFileResolver, outputFileResolver); } private static final Logger log = LoggerFactory.getLogger(SearchPatternsAndCreateLinks.class); @Override public void execute() throws IOException { Execution tagExec = getExecution().createSubExecution(TagSearcher.class); tagExec.getInfolisFileTags().addAll(getExecution().getInfolisFileTags()); tagExec.getInfolisPatternTags().addAll(getExecution().getInfolisPatternTags()); tagExec.instantiateAlgorithm(this).run(); getExecution().getPatterns().addAll(tagExec.getPatterns()); getExecution().getInputFiles().addAll(tagExec.getInputFiles()); preprocessInputFiles(); List<String> textualRefs = searchPatterns(getExecution().getPatterns(), getExecution().getInputFiles()); List<String> createdLinks = createLinks(textualRefs); debug(log, "Created links: " + createdLinks); getExecution().setLinks(createdLinks); getExecution().setStatus(ExecutionStatus.FINISHED); } private List<String> searchPatterns(List<String> patterns, List<String> input) { debug(log, "Executing InfolisPatternSearcher with patterns " + patterns); Execution search = getExecution().createSubExecution(InfolisPatternSearcher.class); search.setPatterns(patterns); search.setInputFiles(input); search.setPhraseSlop(getExecution().getPhraseSlop()); search.setUpperCaseConstraint(getExecution().isUpperCaseConstraint()); search.setIndexDirectory(getExecution().getIndexDirectory()); getOutputDataStoreClient().post(Execution.class, search); search.instantiateAlgorithm(this).run(); updateProgress(1, 2); debug(log, "Done executing InfolisPatternSearcher, found textualReferences: " + search.getTextualReferences()); return search.getTextualReferences(); } protected List<String> createLinks(List<String> textualRefs) { Execution exec = getExecution().createSubExecution(ReferenceLinker.class); if (null != getExecution().getQueryServices()) { exec.setQueryServices(getExecution().getQueryServices()); } if (null != getExecution().getQueryServiceClasses()) { exec.setQueryServiceClasses(getExecution().getQueryServiceClasses()); } exec.setTextualReferences(textualRefs); exec.setSearchResultLinkerClass(getExecution().getSearchResultLinkerClass()); exec.instantiateAlgorithm(this).run(); updateProgress(2, 2); debug(log, "Done executing ReferenceLinker, created entityLinks: " + exec.getLinks()); return exec.getLinks(); } @Override public void validate() throws IllegalAlgorithmArgumentException { Execution exec = this.getExecution(); if ((null == exec.getInputFiles() || exec.getInputFiles().isEmpty()) && (null == exec.getInfolisFileTags() || exec.getInfolisFileTags().isEmpty())){ throw new IllegalArgumentException("Must set at least one inputFile!"); } if ((null == exec.getPatterns() || exec.getPatterns().isEmpty()) && (null == exec.getInfolisPatternTags() || exec.getInfolisPatternTags().isEmpty())) { throw new IllegalArgumentException("No patterns given."); } boolean queryServiceSet = false; if (null != exec.getQueryServiceClasses() && !exec.getQueryServiceClasses().isEmpty()) { queryServiceSet = true; } if (null != exec.getQueryServices() && !exec.getQueryServices().isEmpty()) { queryServiceSet = true; } if (!queryServiceSet) { throw new IllegalAlgorithmArgumentException(getClass(), "queryService", "Required parameter 'query services' is missing!"); } if (null == exec.getSearchResultLinkerClass()) { throw new IllegalAlgorithmArgumentException(getClass(), "searchResultLinkerClass", "Required parameter 'SearchResultLinkerClass' is missing!"); } if (null == exec.isTokenize()) { warn(log, "tokenize parameter unspecified. Setting to true for SearchPatternsAndCreateLinks"); exec.setTokenize(true); } } }