/** * */ package org.voyanttools.trombone.input.expand; import java.io.File; import java.io.FileInputStream; import java.io.IOException; import java.io.InputStream; import java.util.ArrayList; import java.util.List; import java.util.UUID; import org.apache.commons.compress.archivers.ArchiveOutputStream; import org.apache.commons.compress.archivers.zip.ZipArchiveEntry; import org.apache.commons.compress.archivers.zip.ZipArchiveOutputStream; import org.apache.commons.io.FileUtils; import org.apache.commons.io.IOUtils; import org.voyanttools.trombone.input.source.FileInputSource; import org.voyanttools.trombone.input.source.InputSource; import org.voyanttools.trombone.model.DocumentFormat; import org.voyanttools.trombone.model.DocumentMetadata; import org.voyanttools.trombone.model.DocumentMetadata.ParentType; import org.voyanttools.trombone.model.StoredDocumentSource; import org.voyanttools.trombone.storage.StoredDocumentSourceStorage; import org.voyanttools.trombone.util.FlexibleParameters; import net.lingala.zip4j.core.ZipFile; import net.lingala.zip4j.exception.ZipException; /** * @author sgsin * */ class BagItExpander implements Expander { /** * the stored document storage strategy */ private StoredDocumentSourceStorage storedDocumentSourceStorage; private FlexibleParameters parameters; private String[] keyFileNames = new String[]{"CWRC.bin","DC.xml","MODS.bin"}; BagItExpander(StoredDocumentSourceStorage storedDocumentSourceStorage, FlexibleParameters parameters) { this.storedDocumentSourceStorage = storedDocumentSourceStorage; this.parameters = parameters; } /* (non-Javadoc) * @see org.voyanttools.trombone.input.expand.Expander#getExpandedStoredDocumentSources(org.voyanttools.trombone.model.StoredDocumentSource) */ @Override public List<StoredDocumentSource> getExpandedStoredDocumentSources(StoredDocumentSource storedDocumentSource) throws IOException { File base = new File(System.getProperty("java.io.tmpdir"), "_temp_bagit_"+UUID.randomUUID()); assert base.mkdir(); File zipFile = new File(base, "bagit.zip"); // to avoid getting too close to the inner workings of the storage and still be able to open a file (for extracting convenience), we'll copy the input stream to a file. InputStream is = storedDocumentSourceStorage.getStoredDocumentSourceInputStream(storedDocumentSource.getId()); FileUtils.copyInputStreamToFile(is, zipFile); is.close(); // extract the zip into a directory and the traverse the directory to find data File extractedFile = new File(base, "extracted"); try { new ZipFile(zipFile).extractAll(extractedFile.getPath()); List<StoredDocumentSource> expandedStoredDocumentSources = new ArrayList<StoredDocumentSource>(); addFromDirectory(base, extractedFile, storedDocumentSource, expandedStoredDocumentSources); return expandedStoredDocumentSources; } catch (ZipException e) { throw new IOException("Unable to extract BagIt archive.", e); } finally { FileUtils.deleteDirectory(base); } } private void addFromDirectory(File base, File currentDirectory, StoredDocumentSource parentStoredDocumentSource, List<StoredDocumentSource> expandedStoredDocumentSources) throws IOException { // go through current directory to find key file names boolean hasKeyFileNames = true; for (String filename : keyFileNames) { if (new File(currentDirectory, filename).exists()==false) { hasKeyFileNames = false; break; } } if (hasKeyFileNames) { File zipFile = new File(base, currentDirectory.getName() +".zip"); ArchiveOutputStream output = new ZipArchiveOutputStream(zipFile); for (String filename : keyFileNames) { File childFile = new File(currentDirectory, filename); ZipArchiveEntry entry = new ZipArchiveEntry(childFile, childFile.getName()); entry.setSize(childFile.length()); output.putArchiveEntry(entry); FileInputStream fis = new FileInputStream(childFile); IOUtils.copy(fis, output); fis.close(); output.closeArchiveEntry(); } output.finish(); output.close(); InputSource inputSource = new FileInputSource(zipFile); DocumentMetadata metadata = inputSource.getMetadata(); metadata.setParent(parentStoredDocumentSource.getMetadata(), ParentType.EXPANSION); metadata.setDocumentFormat(DocumentFormat.BAGIT); StoredDocumentSource storedDocumentSource = storedDocumentSourceStorage.getStoredDocumentSource(inputSource); expandedStoredDocumentSources.add(storedDocumentSource); } for (File childFile : currentDirectory.listFiles()) { if (childFile.isDirectory()) { // recurse (even for "data" directory) addFromDirectory(base, childFile, parentStoredDocumentSource, expandedStoredDocumentSources); } } } }