/*******************************************************************************
* Trombone is a flexible text processing and analysis library used
* primarily by Voyant Tools (voyant-tools.org).
*
* Copyright (©) 2007-2012 Stéfan Sinclair & Geoffrey Rockwell
*
* This file is part of Trombone.
*
* Trombone is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Trombone is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with Trombone. If not, see <http://www.gnu.org/licenses/>.
******************************************************************************/
package org.voyanttools.trombone.input.expand;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.concurrent.Callable;
import org.apache.tika.io.IOUtils;
import org.voyanttools.trombone.input.source.InputSource;
import org.voyanttools.trombone.input.source.Source;
import org.voyanttools.trombone.model.DocumentFormat;
import org.voyanttools.trombone.model.DocumentMetadata;
import org.voyanttools.trombone.model.StoredDocumentSource;
import org.voyanttools.trombone.storage.StoredDocumentSourceStorage;
import org.voyanttools.trombone.util.FlexibleParameters;
/**
* This is the main stored document source expander that calls other expanders
* as needed. When this class's {#link
* {@link #getExpandedStoredDocumentSources(StoredDocumentSource)} is called
* with a stored document source that doesn't need expansion, the same
* StoredDocumentSource is return (but as part of a list).
*
* @author Stéfan Sinclair
*/
public class StoredDocumentSourceExpander implements Expander {
/**
* all parameters sent, only some of which may be relevant to some expanders
*/
private FlexibleParameters parameters;
/**
* the storage strategy to use for storing document sources
*/
private StoredDocumentSourceStorage storedDocumentSourceStorage;
/**
* the expander for compressed archives
*/
private Expander archiveExpander;
/**
* the expander for compressed archives
*/
private Expander compressedExpander;
/**
* the expander for XML documents
*/
private Expander xmlExpander;
/**
* the expander for XSL documents
*/
private Expander xslExpander;
private Expander obApiSearchJsonExpander;
private Expander bagItExpander;
/**
* Create a new instance of this expander with the specified storage
* strategy.
*
* @param storedDocumentSourceStorage
* the storage handler for document sources
*/
public StoredDocumentSourceExpander(
StoredDocumentSourceStorage storedDocumentSourceStorage) {
this(storedDocumentSourceStorage, new FlexibleParameters());
}
/**
* Create a new instance of this expander with the specified storage
* strategy.
*
* @param storedDocumentSourceStorage
* the storage handler for document sources
* @param parameters
* that may be relevant to the expanders
*/
public StoredDocumentSourceExpander(
StoredDocumentSourceStorage storedDocumentSourceStorage,
FlexibleParameters parameters) {
this.storedDocumentSourceStorage = storedDocumentSourceStorage;
this.archiveExpander = null;
this.compressedExpander = null;
this.xmlExpander = null;
this.bagItExpander = null;
this.parameters = parameters;
}
/*
public List<StoredDocumentSource> getExpandedStoredDocumentSources(InputSource inputSource) throws IOException {
List<InputSource> inputSources = new ArrayList<InputSource>();
inputSources.add(inputSource);
return getExpandedStoredDocumentSources(inputSources);
}
public List<StoredDocumentSource> getExpandedStoredDocumentSources(List<InputSource> inputSources) throws IOException {
List<StoredDocumentSource> storedDocumentSources = new ArrayList<StoredDocumentSource>();
ExecutorService executor = Executors.newCachedThreadPool();
List<Future<StoredDocumentSource>> list = new ArrayList<Future<StoredDocumentSource>>();
for (InputSource inputSource : inputSources) {
Callable<StoredDocumentSource> worker = new CallableExpander(this.storedDocumentSourceStorage, inputSource);
Future<StoredDocumentSource> submit = executor.submit(worker);
list.add(submit);
}
try {
for (Future<StoredDocumentSource> future : list) {
storedDocumentSources.add(future.get());
}
} catch (InterruptedException e) {
throw new IllegalStateException("An error occurred during multi-threaded document expansion.", e);
} catch (ExecutionException e) {
throw new IllegalStateException("An error occurred during multi-threaded document expansion.", e);
}
executor.shutdown();
return storedDocumentSources;
}
*/
public List<StoredDocumentSource> getExpandedStoredDocumentSources(
StoredDocumentSource storedDocumentSource) throws IOException {
List<StoredDocumentSource> storedDocumentSources = new ArrayList<StoredDocumentSource>();
DocumentFormat format;
format = storedDocumentSource.getMetadata().getDocumentFormat();
String inputFormatString = parameters.getParameterValue("inputFormat", "").toUpperCase();
if (inputFormatString.isEmpty()==false) {
if (format!=DocumentFormat.ARCHIVE && format!=DocumentFormat.COMPRESSED) { // make sure it's not container format (where the inputFormat parameters probably applies to the contents, not the container)
// is it ok to have unrecognized here?
DocumentFormat f = DocumentFormat.getForgivingly(inputFormatString);
if (f!=DocumentFormat.UNKNOWN) { // only set if we have a real format (could be an XML profile)
format = f;
}
}
}
// if we have a string and an unknown format, we have to have a peek
if (format == DocumentFormat.UNKNOWN && storedDocumentSource.getMetadata().getSource()==Source.STRING) {
String string = IOUtils.toString(storedDocumentSourceStorage.getStoredDocumentSourceInputStream(storedDocumentSource.getId()));
format = DocumentFormat.fromString(string);
if (format != DocumentFormat.UNKNOWN) {
DocumentMetadata metadata = storedDocumentSource.getMetadata();
metadata.setDefaultFormat(format);
storedDocumentSourceStorage.updateStoredDocumentSourceMetadata(storedDocumentSource.getId(), metadata);
}
}
if (format==DocumentFormat.BAGIT) {
storedDocumentSources.addAll(expandBagIt(storedDocumentSource));
}
if (format.isArchive()) {
storedDocumentSources.addAll(expandArchive(storedDocumentSource));
}
else if (format == DocumentFormat.COMPRESSED) {
storedDocumentSources
.addAll(expandCompressed(storedDocumentSource));
}
else if (format == DocumentFormat.XLSX) {
storedDocumentSources.addAll(expandXsl(storedDocumentSource));
}
else if (format == DocumentFormat.OBAPISEARCHJSON) {
storedDocumentSources.addAll(expandObApiSearchJson(storedDocumentSource));
}
else if (format.isXml()) {
storedDocumentSources.addAll(expandXml(storedDocumentSource));
}
// no expansion needed or known
else {
storedDocumentSources.add(storedDocumentSource);
}
return storedDocumentSources;
}
private List<StoredDocumentSource> expandObApiSearchJson(StoredDocumentSource storedDocumentSource) throws IOException {
if (this.obApiSearchJsonExpander==null) {
this.obApiSearchJsonExpander = new ObApiSearchExpander(storedDocumentSourceStorage, parameters);
}
return obApiSearchJsonExpander.getExpandedStoredDocumentSources(storedDocumentSource);
}
List<StoredDocumentSource> expandXsl(StoredDocumentSource storedDocumentSource) throws IOException {
if (this.xslExpander==null) {
this.xslExpander = new XlsExpander(storedDocumentSourceStorage, parameters);
}
return this.xslExpander.getExpandedStoredDocumentSources(storedDocumentSource);
}
/**
* Expand the specified StoredDocumentSource archive and add it to the
* specified list of StoredDocumentSources.
*
* @param storedDocumentSource
* the stored document source to expand (or add as is)
* @return a list of expanded document sources
* @throws IOException
* an IO Exception
*/
List<StoredDocumentSource> expandArchive(
StoredDocumentSource storedDocumentSource) throws IOException {
if (this.archiveExpander == null) {
this.archiveExpander = new ArchiveExpander(
storedDocumentSourceStorage, this, parameters);
}
return this.archiveExpander
.getExpandedStoredDocumentSources(storedDocumentSource);
}
/**
* Expand the specified StoredDocumentSource archive and add it to the
* specified list of StoredDocumentSources.
*
* @param storedDocumentSource
* the stored document source to expand (or add as is)
* @return a list of expanded document sources
* @throws IOException
* an IO Exception
*/
List<StoredDocumentSource> expandCompressed(
StoredDocumentSource storedDocumentSource) throws IOException {
if (this.compressedExpander == null) {
this.compressedExpander = new CompressedExpander(
storedDocumentSourceStorage, this);
}
return this.compressedExpander
.getExpandedStoredDocumentSources(storedDocumentSource);
}
/**
* Expand the specified StoredDocumentSource archive and add it to the
* specified list of StoredDocumentSources.
*
* @param storedDocumentSource
* the stored document source to expand (or add as is)
* @return a list of expanded document sources
* @throws IOException
* an IO Exception
*/
List<StoredDocumentSource> expandXml(
StoredDocumentSource storedDocumentSource) throws IOException {
if (this.xmlExpander == null) {
this.xmlExpander = new XmlExpander(storedDocumentSourceStorage,
parameters);
}
// this will deal fine when no expansion is needed
return this.xmlExpander.getExpandedStoredDocumentSources(storedDocumentSource);
}
/**
* Expand the specified StoredDocumentSource archive and add it to the
* specified list of StoredDocumentSources.
*
* @param storedDocumentSource
* the stored document source to expand (or add as is)
* @return a list of expanded document sources
* @throws IOException
* an IO Exception
*/
List<StoredDocumentSource> expandBagIt(
StoredDocumentSource storedDocumentSource) throws IOException {
if (this.bagItExpander == null) {
this.bagItExpander = new BagItExpander(storedDocumentSourceStorage, parameters);
}
return this.bagItExpander.getExpandedStoredDocumentSources(storedDocumentSource);
}
private class CallableExpander implements Callable<StoredDocumentSource> {
private StoredDocumentSourceStorage storedDocumentSourceStorage;
private InputSource inputSource;
public CallableExpander(StoredDocumentSourceStorage storedDocumentSourceStorage,
InputSource inputSource) {
this.storedDocumentSourceStorage = storedDocumentSourceStorage;
this.inputSource = inputSource;
}
@Override
public StoredDocumentSource call() throws Exception {
return this.storedDocumentSourceStorage.getStoredDocumentSource(inputSource);
}
}
}