package io.github.infolis.algorithm;
import io.github.infolis.InfolisBaseTest;
import io.github.infolis.model.Execution;
import io.github.infolis.model.entity.Entity;
import io.github.infolis.model.entity.InfolisFile;
import io.github.infolis.util.SerializationUtils;
import java.io.IOException;
import java.io.OutputStream;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.Arrays;
import org.apache.commons.io.IOUtils;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
import org.junit.Before;
import org.junit.Test;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
*
* @author domi
*/
public class TextAndMetaDataExtractorTest extends InfolisBaseTest {
Logger log = LoggerFactory.getLogger(TextExtractorTest.class);
private byte[] pdfBytes;
Path tempFile;
String filePath;
String metaDataFile;
@Before
public void setUp() throws IOException {
dataStoreClient.clear();
pdfBytes = IOUtils.toByteArray(getClass().getResourceAsStream("/trivial.pdf"));
tempFile = Files.createTempFile("infolis-", ".pdf");
filePath = this.getClass().getResource("/trivial.pdf").getFile();
metaDataFile = this.getClass().getResource("/metaData/trivial.xml").getFile();
}
@SuppressWarnings("unchecked")
@Test
public void testMetaExtraction() throws Exception {
//TODO: why did we introduce tempFiles?
InfolisFile inFile = new InfolisFile();
inFile.setFileName(tempFile.toString());
inFile.setOriginalName(filePath);
inFile.setMd5(SerializationUtils.getHexMd5(pdfBytes));
inFile.setMediaType("application/pdf");
inFile.setFileStatus("AVAILABLE");
Entity entity = new Entity();
dataStoreClient.post(Entity.class, entity);
inFile.setManifestsEntity(entity.getUri());
writeFile(inFile);
Execution execution = new Execution();
execution.getInputFiles().add(inFile.getUri());
execution.getMetaDataFiles().add(metaDataFile);
execution.getInfolisFileTags().add("domi");
execution.setAlgorithm(TextAndMetaDataExtractor.class);
dataStoreClient.post(Execution.class, execution);
Algorithm algo = execution.instantiateAlgorithm(dataStoreClient, dataStoreClient, fileResolver, fileResolver);
algo.run();
Entity e = dataStoreClient.get(Entity.class, inFile.getManifestsEntity());
assertTrue(e.getName().equals("The possible trinity: Optimal interest rate, exchange rate, and taxes on capital flows in a DSGE model for a small open economy"));
assertTrue(e.getAuthors().size()==1);
assertTrue(e.getAbstractText().equals("A traditional way of thinking about the exchange rate (XR) regime and capital account openness has been framed in "
+ "terms of the 'impossible trinity' or 'trilemma', in which policymakers can only have 2 of 3 possible outcomes: open capital markets, monetary "
+ "independence and pegged XRs. This paper is an extension of Escude (A DSGE Model for a SOE with Systematic Interest and Foreign Exchange Policies "
+ "in Which Policymakers Exploit the Risk Premium for Stabilization Purposes, 2013), which focused on interest rate and XR policies, since it introduces"
+ " the third vertex of the 'trinity' in the form of taxes on private foreign debt. These affect the risk-adjusted uncovered interest parity equation and"
+ " hence influence the SOE's international financial flows. A useful way to illustrate the range of policy alternatives is to associate them with the"
+ " faces of a triangle. Each of 3 possible government intervention policies taken individually (in the domestic currency bond market, in the FX market,"
+ " and in the foreign currency bonds market) corresponds to one of the vertices of the triangle, each of the 3 possible pairs of intervention policies"
+ " corresponds to one of its 3 edges, and the 3 simultaneous intervention policies taken jointly correspond to its interior. This paper shows that this "
+ "interior, or 'possible trinity' is quite generally not only possible but optimal, since the CB obtains a lower loss when it implements a policy with"
+ " all three interventions."));
assertTrue(e.getSubjects().size()==9);
log.debug("ids: " + e.getIdentifiers());
assertEquals(Arrays.asList(
"Economics: The Open-Access, Open-Assessment E-Journal 8 2014-25 1-58",
"doi:10.5018/economics-ejournal.ja.2014-25",
"http://hdl.handle.net/10419/100000",
"ppn:789521210",
"RePEc:zbw:ifweej:201425"),
e.getIdentifiers());
}
private void writeFile(InfolisFile inFile) {
dataStoreClient.post(InfolisFile.class, inFile);
try {
OutputStream os = fileResolver.openOutputStream(inFile);
IOUtils.write(pdfBytes, os);
os.close();
} catch (Exception e) {
e.printStackTrace();
}
}
}