package io.github.infolis.algorithm; import java.io.File; import java.io.FileInputStream; import java.io.IOException; import java.io.OutputStream; import java.nio.file.Files; import java.nio.file.Path; import java.util.ArrayList; import java.util.HashSet; import java.util.List; import org.apache.commons.io.IOUtils; import org.junit.Test; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import io.github.infolis.InfolisBaseTest; import io.github.infolis.model.Execution; import io.github.infolis.model.entity.EntityLink; import io.github.infolis.model.entity.InfolisFile; import io.github.infolis.model.entity.InfolisPattern; import io.github.infolis.infolink.querying.DaraHTMLQueryService; import io.github.infolis.infolink.querying.QueryService; import io.github.infolis.util.SerializationUtils; /** * Tests for the SearchPatternsAndCreateLinks algorithm. * * @author kata * @author domi */ public class SearchPatternsAndCreateLinksTest extends InfolisBaseTest { private static final Logger log = LoggerFactory.getLogger(SearchPatternsAndCreateLinksTest.class); /** * Applies a given set of pattern (loaded from a file) and resolves the * references. * * @throws IOException */ @Test public void testSearchPatternsAndCreateLinks() throws IOException { File txtDir = new File(getClass().getResource("/examples/minimal-txt").getFile()); InfolisPattern infolisPattern = new InfolisPattern(); String regex = ".*?Datenbasis: (\\S*?\\s?\\S+?\\s?\\S+?\\s?\\S+?\\s?\\S*?), eigene Berechnung.*?"; infolisPattern.setPatternRegex(regex); infolisPattern.setLuceneQuery("\"Datenbasis\\\\: * eigene Berechnung\\\\)\""); HashSet<String> tags = new HashSet<String>(); tags.add("test"); infolisPattern.setTags(tags); //post all important stuff dataStoreClient.post(InfolisPattern.class, infolisPattern); List<String> txt = postTxtFiles(txtDir); List<String> qServices = postQueryServices(); Execution e = new Execution(); e.getInfolisPatternTags().add("test"); e.setAlgorithm(SearchPatternsAndCreateLinks.class); e.setInputFiles(txt); e.setQueryServices(qServices); e.setSearchResultLinkerClass(BestMatchLinker.class); dataStoreClient.post(Execution.class, e); e.instantiateAlgorithm(dataStoreClient, fileResolver).run(); for (String ref : e.getTextualReferences()) { log.debug(ref); } List<EntityLink> createdLinks = dataStoreClient.get(EntityLink.class, e.getLinks()); //check the amount of created links //TODO might change? // Assert.assertEquals(22, createdLinks.size()); for (EntityLink el : createdLinks) { //TODO any nice tests? // if (el.getToEntity().getName().equals("Flash Eurobarometer 35")) { // // } } } public List<String> postTxtFiles(File dir) throws IOException { List<String> txtFiles = new ArrayList<>(); for (File f : dir.listFiles()) { Path tempFile = Files.createTempFile("infolis-", ".txt"); InfolisFile inFile = new InfolisFile(); FileInputStream inputStream = new FileInputStream(f.getAbsolutePath()); int numberBytes = inputStream.available(); byte pdfBytes[] = new byte[numberBytes]; inputStream.read(pdfBytes); inputStream.close(); IOUtils.write(pdfBytes, Files.newOutputStream(tempFile)); inFile.setFileName(tempFile.toString()); inFile.setMd5(SerializationUtils.getHexMd5(pdfBytes)); inFile.setMediaType("text/plain"); inFile.setFileStatus("AVAILABLE"); try { OutputStream os = fileResolver.openOutputStream(inFile); IOUtils.write(pdfBytes, os); os.close(); } catch (Exception e) { e.printStackTrace(); } dataStoreClient.post(InfolisFile.class, inFile); txtFiles.add(inFile.getUri()); } return txtFiles; } public List<String> postQueryServices() throws IOException { List<String> postedQueryServices = new ArrayList<>(); QueryService p1 = new DaraHTMLQueryService(); p1.setMaxNumber(10); dataStoreClient.post(QueryService.class, p1); postedQueryServices.add(p1.getUri()); return postedQueryServices; } }