package org.voyanttools.trombone.input.extract; import static org.junit.Assert.*; import java.io.IOException; import java.io.InputStream; import java.util.List; import org.apache.commons.io.IOUtils; import org.junit.Test; import org.voyanttools.trombone.input.expand.StoredDocumentSourceExpander; import org.voyanttools.trombone.input.source.FileInputSource; import org.voyanttools.trombone.input.source.InputSource; import org.voyanttools.trombone.model.DocumentFormat; import org.voyanttools.trombone.model.DocumentMetadata; import org.voyanttools.trombone.model.StoredDocumentSource; import org.voyanttools.trombone.storage.Storage; import org.voyanttools.trombone.storage.StoredDocumentSourceStorage; import org.voyanttools.trombone.util.FlexibleParameters; import org.voyanttools.trombone.util.TestHelper; public class BagItExtractorTest { @Test public void test() throws IOException { Storage storage = TestHelper.getDefaultTestStorage(); StoredDocumentSourceStorage storeDocumentSourceStorage = storage.getStoredDocumentSourceStorage(); FlexibleParameters parameters = new FlexibleParameters(); StoredDocumentSourceExtractor extractor = new StoredDocumentSourceExtractor(storeDocumentSourceStorage, parameters); InputSource inputSource; StoredDocumentSource storedDocumentSource; StoredDocumentSource extractedStoredDocumentSource; DocumentMetadata metadata; inputSource = new FileInputSource(TestHelper.getResource("formats/BagIt-One-Document.zip")); inputSource.getMetadata().setDocumentFormat(DocumentFormat.BAGIT); // will normally be set by expander storedDocumentSource = storeDocumentSourceStorage.getStoredDocumentSource(inputSource); extractedStoredDocumentSource = extractor.getExtractedStoredDocumentSource(storedDocumentSource); metadata = extractedStoredDocumentSource.getMetadata(); assertEquals("A Beautiful Possibility", metadata.getTitle()); assertEquals("Edith Ferguson Black", metadata.getAuthor()); InputStream is = storeDocumentSourceStorage.getStoredDocumentSourceInputStream(extractedStoredDocumentSource.getId()); String contents = IOUtils.toString(is); assertTrue(contents.contains("In one of the fairest")); is.close(); StoredDocumentSourceExpander storedDocumentSourceExpander = new StoredDocumentSourceExpander(storeDocumentSourceStorage); inputSource = new FileInputSource(TestHelper.getResource("formats/BagIt-Multiple-Documents.zip")); storedDocumentSource = storeDocumentSourceStorage.getStoredDocumentSource(inputSource); List<StoredDocumentSource> expandedSourceDocumentSources = storedDocumentSourceExpander.getExpandedStoredDocumentSources(storedDocumentSource); assertEquals(2, expandedSourceDocumentSources.size()); extractedStoredDocumentSource = extractor.getExtractedStoredDocumentSource(expandedSourceDocumentSources.get(0)); metadata = extractedStoredDocumentSource.getMetadata(); assertEquals("Further Chronicles of Avonlea", metadata.getTitle()); assertEquals("L. M. (Lucy Maud) Montgomery", metadata.getAuthor()); is = storeDocumentSourceStorage.getStoredDocumentSourceInputStream(extractedStoredDocumentSource.getId()); contents = IOUtils.toString(is); assertTrue(contents.contains("Max always blesses the animal")); assertFalse(contents.contains("GutenTag")); is.close(); extractedStoredDocumentSource = extractor.getExtractedStoredDocumentSource(expandedSourceDocumentSources.get(1)); metadata = extractedStoredDocumentSource.getMetadata(); assertEquals("Anne of the Island", metadata.getTitle()); assertEquals("L. M. (Lucy Maud) Montgomery", metadata.getAuthor()); is = storeDocumentSourceStorage.getStoredDocumentSourceInputStream(extractedStoredDocumentSource.getId()); contents = IOUtils.toString(is); assertTrue(contents.contains("Harvest is ended and summer")); assertFalse(contents.contains("GutenTag")); is.close(); // test more recent BagIt storedDocumentSourceExpander = new StoredDocumentSourceExpander(storeDocumentSourceStorage); inputSource = new FileInputSource(TestHelper.getResource("formats/bagit_cwrc_lmm_texts-04f2ac7.zip")); storedDocumentSource = storeDocumentSourceStorage.getStoredDocumentSource(inputSource); expandedSourceDocumentSources = storedDocumentSourceExpander.getExpandedStoredDocumentSources(storedDocumentSource); assertEquals(16, expandedSourceDocumentSources.size()); // note that there are 17 directories, but one doesn't have docs extractedStoredDocumentSource = extractor.getExtractedStoredDocumentSource(expandedSourceDocumentSources.get(0)); metadata = extractedStoredDocumentSource.getMetadata(); assertEquals("Further Chronicles of Avonlea", metadata.getTitle()); assertEquals("L. M. (Lucy Maud) Montgomery", metadata.getAuthor()); is = storeDocumentSourceStorage.getStoredDocumentSourceInputStream(extractedStoredDocumentSource.getId()); contents = IOUtils.toString(is); assertTrue(contents.contains("Max always blesses the animal")); assertFalse(contents.contains("GutenTag")); is.close(); extractedStoredDocumentSource = extractor.getExtractedStoredDocumentSource(expandedSourceDocumentSources.get(1)); metadata = extractedStoredDocumentSource.getMetadata(); assertEquals("Rainbow Valley", metadata.getTitle()); assertEquals("L. M. (Lucy Maud) Montgomery", metadata.getAuthor()); is = storeDocumentSourceStorage.getStoredDocumentSourceInputStream(extractedStoredDocumentSource.getId()); contents = IOUtils.toString(is); assertTrue(contents.contains("apple-green evening in May")); assertFalse(contents.contains("GutenTag")); is.close(); storage.destroy(); } }