/* * Copyright Robert Newson * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.github.rnewson.couchdb.lucene; import org.apache.lucene.document.Document; import org.junit.Before; import org.junit.Test; import java.io.IOException; import java.io.InputStream; import static org.hamcrest.Matchers.*; import static org.junit.Assert.assertThat; public class TikaTest { private Document doc; @Before public void setup() { doc = new Document(); } @Test public void testPDF() throws IOException { parse("paxos-simple.pdf", "application/pdf", "foo"); assertThat(doc.getField("foo"), not(nullValue())); } @Test public void testXML() throws IOException { parse("example.xml", "text/xml", "bar"); assertThat(doc.getField("bar"), not(nullValue())); } @Test public void testWord() throws IOException { parse("example.doc", "application/msword", "bar"); assertThat(doc.getField("bar"), not(nullValue())); assertThat(doc.get("bar"), containsString("The express mission of the organization")); } private void parse(final String resource, final String type, final String field) throws IOException { final InputStream in = getClass().getClassLoader().getResourceAsStream(resource); try { Tika.INSTANCE.parse(in, type, field, doc); } finally { in.close(); } } }