/** * */ package org.voyanttools.trombone.input.expand; import java.io.File; import java.io.IOException; import java.io.InputStream; import java.net.URI; import java.net.URISyntaxException; import java.util.ArrayList; import java.util.LinkedHashMap; import java.util.List; import org.apache.commons.io.FileUtils; import org.apache.commons.io.IOUtils; import org.json.simple.JSONArray; import org.json.simple.JSONObject; import org.json.simple.parser.JSONParser; import org.json.simple.parser.ParseException; import org.voyanttools.trombone.input.source.FileInputSource; import org.voyanttools.trombone.input.source.InputSource; import org.voyanttools.trombone.input.source.UriInputSource; import org.voyanttools.trombone.model.DocumentFormat; import org.voyanttools.trombone.model.StoredDocumentSource; import org.voyanttools.trombone.storage.StoredDocumentSourceStorage; import org.voyanttools.trombone.storage.file.FileStoredDocumentSourceStorage; import org.voyanttools.trombone.util.FlexibleParameters; /** * @author sgs * */ public class ObApiSearchExpander implements Expander { /** * all parameters sent, only some of which may be relevant to some expanders */ private FlexibleParameters parameters; /** * the stored document storage strategy */ private StoredDocumentSourceStorage storedDocumentSourceStorage; /** * @param parameters * @param storedDocumentSourceStorage * */ public ObApiSearchExpander(StoredDocumentSourceStorage storedDocumentSourceStorage, FlexibleParameters parameters) { this.storedDocumentSourceStorage = storedDocumentSourceStorage; this.parameters = parameters; } /* (non-Javadoc) * @see org.voyanttools.trombone.input.expand.Expander#getExpandedStoredDocumentSources(org.voyanttools.trombone.model.StoredDocumentSource) */ @Override public List<StoredDocumentSource> getExpandedStoredDocumentSources(StoredDocumentSource storedDocumentSource) throws IOException { List<StoredDocumentSource> sourceDocumentSources = new ArrayList<StoredDocumentSource>(); File localOldBaileyDir = null; if (storedDocumentSourceStorage instanceof FileStoredDocumentSourceStorage) { File dummyFile = ((FileStoredDocumentSourceStorage) storedDocumentSourceStorage).getDocumentSourceDirectory("dummy"); File rootData = dummyFile.getParentFile().getParentFile().getParentFile(); localOldBaileyDir = new File(rootData, "OldBaileyXmlDocuments"); } InputStream is = null; String jsonString; try { is = storedDocumentSourceStorage.getStoredDocumentSourceInputStream(storedDocumentSource.getId()); jsonString = IOUtils.toString(is); } finally { if (is!=null) {is.close();} } JSONParser parser = new JSONParser(); JSONObject obj; try { obj = (JSONObject) parser.parse(jsonString); } catch (ParseException e) { throw new IOException("Unable to parse JSON results: "+storedDocumentSource); } JSONArray hits = (JSONArray) obj.get("hits"); List<String> ids = new ArrayList<String>(); for (int i=0; i<hits.size(); i++) { ids.add((String) hits.get(i)); } for (String id : ids) { InputSource inputSource; if (localOldBaileyDir!=null && localOldBaileyDir.exists() && new File(localOldBaileyDir, id+".xml").exists()) { inputSource = new FileInputSource(new File(localOldBaileyDir, id+".xml")); } else { String uriString = "http://www.oldbaileyonline.org//obapi/text?div="+id; URI uri; try { uri = new URI(uriString); } catch (URISyntaxException e) { throw new IllegalArgumentException("The URI provided by the parameters has a problem: "+uriString, e); } inputSource = new UriInputSource(uri); } inputSource.getMetadata().setDocumentFormat(DocumentFormat.OLDBAILEYXML); sourceDocumentSources.add(storedDocumentSourceStorage.getStoredDocumentSource(inputSource)); } // we have to switch the inputFormat for the next step (and hope that it sticks) parameters.setParameter("inputFormat", DocumentFormat.OLDBAILEYXML.name()); return sourceDocumentSources; } }