package net.yacy.document.parser; import java.io.File; import java.io.FileInputStream; import java.util.Collection; import static junit.framework.TestCase.assertEquals; import net.yacy.cora.document.id.AnchorURL; import net.yacy.document.Document; import net.yacy.document.VocabularyScraper; import org.junit.Test; public class pdfParserTest { /** * Test extraction of links in parse method, of class pdfParser. */ @Test public void testParse() throws Exception { System.out.println("pdfParser.parse"); final String testFiles = "umlaute_linux.pdf"; final String mimetype = "application/pdf"; final String charset = null; //final String resulttxt = "In München steht ein Hofbräuhaus. Dort gibt es Bier aus Maßkrügen."; final String filename = "test/parsertest/" + testFiles; final File file = new File(filename); final AnchorURL url = new AnchorURL("http://localhost/" + filename); System.out.println("parse file: " + filename); pdfParser p = new pdfParser(); final Document[] docs = p.parse(url, mimetype, charset, new VocabularyScraper(), 0, new FileInputStream(file)); Document doc = docs[0]; int ilinks = doc.getAnchors().size(); assertEquals("number of links in pdf", 1, ilinks); Collection<AnchorURL> links = doc.getAnchors(); System.out.println("number of links detected = " + ilinks); for (AnchorURL aurl : links) { System.out.println(" found: " + aurl.toString()); } } }