/*
* <p><b>License and Copyright: </b>The contents of this file is subject to the
* same open source license as the Fedora Repository System at www.fedora-commons.org
* Copyright © 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013 by The Technical University of Denmark.
* All rights reserved.</p>
*/
package dk.defxws.fedoragsearch.server;
import java.io.IOException;
import java.io.InputStream;
import java.io.StringReader;
import java.io.UnsupportedEncodingException;
import java.net.MalformedURLException;
import java.net.URL;
import java.net.URLConnection;
import java.net.URLDecoder;
import java.net.URLEncoder;
import java.rmi.RemoteException;
import java.util.Date;
import java.util.HashMap;
import java.util.Map;
import java.util.Set;
import java.util.StringTokenizer;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.transform.stream.StreamSource;
import dk.defxws.fedoragsearch.server.errors.ConfigException;
import dk.defxws.fedoragsearch.server.errors.FedoraObjectNotFoundException;
import dk.defxws.fedoragsearch.server.errors.GenericSearchException;
import org.apache.log4j.Logger;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.IndexSearcher;
import org.fcrepo.client.FedoraClient;
import org.fcrepo.common.Constants;
import org.fcrepo.server.access.FedoraAPIA;
import org.fcrepo.server.management.FedoraAPIM;
import org.fcrepo.server.types.gen.Datastream;
import org.fcrepo.server.types.gen.MIMETypedStream;
import org.w3c.dom.Document;
import org.w3c.dom.Node;
import org.xml.sax.InputSource;
import sun.misc.BASE64Encoder;
/**
* performs the generic parts of the operations
*
* @author gsp@dtv.dk
* @version
*/
public class GenericOperationsImpl implements Operations {
private static final Logger logger =
Logger.getLogger(GenericOperationsImpl.class);
int debuglength = 500;
private static final Map<String, FedoraClient> fedoraClients = new HashMap<String, FedoraClient>();
protected Map<String, Set<String>> fgsUserAttributes;
protected String fgsUserName;
protected String indexName;
protected Config config;
protected SearchResultFiltering srf;
protected int insertTotal = 0;
protected int updateTotal = 0;
protected int deleteTotal = 0;
protected int emptyTotal = 0;
protected int docCount = 0;
protected int warnCount = 0;
protected String usingQuery;
protected StringBuffer embeddedResult;
protected byte[] foxmlRecord;
protected String dsID;
protected byte[] ds;
protected String dsText;
protected String[] params = null;
// protected IndexReader ir = null;
protected DirectoryReader ir = null;
protected IndexSearcher searcher = null;
protected IndexWriter iw = null;
private static FedoraClient getFedoraClient(
String repositoryName,
String fedoraSoap,
String fedoraUser,
String fedoraPass,
String trustStorePath,
String trustStorePass)
throws GenericSearchException {
if (logger.isDebugEnabled())
logger.debug("getFedoraClient repositoryName="+repositoryName+" fedoraSoap="+fedoraSoap+" fedoraUser="+fedoraUser+" fedoraPass="+fedoraPass);
if (trustStorePath!=null)
System.setProperty("javax.net.ssl.trustStore", trustStorePath);
if (trustStorePass!=null)
System.setProperty("javax.net.ssl.trustStorePassword", trustStorePass);
try {
String baseURL = getBaseURL(fedoraSoap);
String user = fedoraUser;
String clientId = user + "@" + baseURL;
synchronized (fedoraClients) {
if (fedoraClients.containsKey(clientId)) {
return fedoraClients.get(clientId);
} else {
FedoraClient client = new FedoraClient(baseURL,
user, fedoraPass);
fedoraClients.put(clientId, client);
return client;
}
}
} catch (Exception e) {
throw new GenericSearchException("Error getting FedoraClient"
+ " for repository: " + repositoryName, e);
}
}
private static String getBaseURL(String fedoraSoap)
throws Exception {
final String end = "/services";
String baseURL = fedoraSoap;
if (fedoraSoap.endsWith(end)) {
baseURL = fedoraSoap.substring(0, fedoraSoap.length() - end.length());
} else {
throw new Exception("Unable to determine baseURL from fedoraSoap"
+ " value (expected it to end with '" + end + "'): "
+ fedoraSoap);
}
return baseURL;
}
private static FedoraAPIA getAPIA(
String repositoryName,
String fedoraSoap,
String fedoraUser,
String fedoraPass,
String trustStorePath,
String trustStorePass)
throws GenericSearchException {
FedoraClient client = getFedoraClient(repositoryName, fedoraSoap, fedoraUser, fedoraPass, trustStorePath, trustStorePass);
try {
return client.getAPIA();
} catch (Exception e) {
throw new GenericSearchException("Error getting API-A stub"
+ " for repository: " + repositoryName, e);
}
}
private static FedoraAPIM getAPIM(
String repositoryName,
String fedoraSoap,
String fedoraUser,
String fedoraPass,
String trustStorePath,
String trustStorePass)
throws GenericSearchException {
FedoraClient client = getFedoraClient(repositoryName, fedoraSoap, fedoraUser, fedoraPass, trustStorePath, trustStorePass);
try {
return client.getAPIM();
} catch (Exception e) {
throw new GenericSearchException("Error getting API-M stub"
+ " for repository: " + repositoryName, e);
}
}
public void init(String indexName, Config currentConfig) {
init(null, indexName, currentConfig);
}
public void init(String fgsUserName, String indexName, Config currentConfig) {
init(null, indexName, currentConfig, null);
}
public void init(String fgsUserName, String indexName, Config currentConfig, Map<String, Set<String>> fgsUserAttributes) {
this.fgsUserName = fgsUserName;
this.indexName = indexName;
this.fgsUserAttributes = fgsUserAttributes;
config = currentConfig;
if (null==this.fgsUserName || this.fgsUserName.length()==0) {
try {
this.fgsUserName = config.getProperty("fedoragsearch.testUserName");
} catch (ConfigException e) {
this.fgsUserName = "fedoragsearch.testUserName";
}
}
}
public String gfindObjects(
String query,
int hitPageStart,
int hitPageSize,
int snippetsMax,
int fieldMaxLength,
String indexName,
String sortFields,
String resultPageXslt)
throws java.rmi.RemoteException {
if (logger.isDebugEnabled())
logger.debug("gfindObjects" +
" query="+query+
" hitPageStart="+hitPageStart+
" hitPageSize="+hitPageSize+
" snippetsMax="+snippetsMax+
" fieldMaxLength="+fieldMaxLength+
" indexName="+indexName+
" sortFields="+sortFields+
" resultPageXslt="+resultPageXslt);
srf = config.getSearchResultFiltering();
params = new String[18];
params[0] = "OPERATION";
params[1] = "gfindObjects";
params[2] = "QUERY";
params[3] = query;
params[4] = "HITPAGESTART";
params[5] = Integer.toString(hitPageStart);
params[6] = "HITPAGESIZE";
params[7] = Integer.toString(hitPageSize);
params[8] = "INDEXNAME";
params[9] = indexName;
params[10] = "SORTFIELDS";
params[11] = sortFields;
params[12] = "RESULTPAGEXSLT";
params[13] = resultPageXslt;
params[14] = "FGSUSERNAME";
params[15] = fgsUserName;
params[16] = "SRFTYPE";
params[17] = config.getSearchResultFilteringType();
embeddedResult = new StringBuffer();
usingQuery = handleEmbeddedQueries("", query);
return "";
}
private String handleEmbeddedQueries(String embedType, String query)
throws GenericSearchException {
String newQuery = query;
if (logger.isDebugEnabled())
logger.debug("handleEmbeddedQueries embedType="+embedType+" query="+query);
String beginString = "(::";
String beginEndString = "::";
String endString = "::)";
String endBeginString = "::";
int beginLength = beginString.length();
int endLength = endString.length();
int beginIndex = query.indexOf(beginString);
int endIndex = query.indexOf(endString);
String embeddedEmbedType = "UNKNOWN";
if (logger.isDebugEnabled())
logger.debug("handleEmbeddedQueries beginIndex="+beginIndex+" endIndex="+endIndex+" newQuery=\n"+newQuery);
while (beginIndex > -1 && endIndex > -1 && beginIndex < endIndex) {
int beginEndIndex = newQuery.indexOf(beginEndString, beginIndex+beginLength);
embeddedEmbedType = newQuery.substring(beginIndex+beginLength, beginEndIndex);
endIndex = newQuery.indexOf(endBeginString+embeddedEmbedType+endString, beginEndIndex);
if (endIndex == -1) {
throw new GenericSearchException("handleEmbeddedQueries: No end for embedType="+embeddedEmbedType+" found."+" newQuery=\n"+newQuery);
}
String embeddedQuery = newQuery.substring(beginEndIndex+beginEndString.length(), endIndex);
String newQueryPart = embeddedQuery;
if (embeddedQuery.indexOf(beginString) > -1) {
newQueryPart = handleEmbeddedQueries(embeddedEmbedType, embeddedQuery);
}
String decodedEmbeddedQuery = "";
try {
decodedEmbeddedQuery = URLDecoder.decode(newQueryPart, "UTF-8");
} catch (UnsupportedEncodingException e) {
throw new GenericSearchException("handleEmbeddedQueries decode exception="+e.toString());
}
newQueryPart = processEmbeddedQuery(embeddedEmbedType, decodedEmbeddedQuery);
newQuery = newQuery.substring(0,beginIndex) + newQueryPart + newQuery.substring(endIndex+(endBeginString+embeddedEmbedType+endString).length());
beginIndex = newQuery.indexOf(beginString);
endIndex = newQuery.indexOf(endString);
if (logger.isDebugEnabled())
logger.debug("handleEmbeddedQueries embeddedEmbedType="+embeddedEmbedType+" beginIndex="+beginIndex+" endIndex="+endIndex+" newQuery=\n"+newQuery);
}
if ("".equals(embedType) && query.indexOf(beginString) == 0) {
embeddedResult = new StringBuffer(newQuery);
newQuery = "";
}
return newQuery;
}
private String processEmbeddedQuery(String embedType, String embeddedQuery)
throws GenericSearchException {
// embeddedQuery :: [[["reposName/"<reposName>"/"]["indexName/"<indexName>"/"]]["xsltName"/<xsltName>]["?"]]<parameters>
if (logger.isDebugEnabled())
logger.debug("processEmbeddedQuery embedType="+embedType+" embeddedQuery="+embeddedQuery);
String firstPart = "";
String secondPart = embeddedQuery;
int i = embeddedQuery.indexOf("?");
int j;
if (i == -1) {
// secondPart = "?" + secondPart;
} else {
firstPart = embeddedQuery.substring(0, i);
secondPart = embeddedQuery.substring(i+1);
}
String embeddedRepositoryName = config.getRepositoryName(null);
String embeddedIndexName = config.getIndexName(null);
String embeddedXsltName = "copyXml";
if (firstPart.length() > 0) {
StringTokenizer st = new StringTokenizer(firstPart, "/");
while (st.hasMoreTokens()) {
String t = st.nextToken();
String tv = "";
if (st.hasMoreTokens()) {
tv = st.nextToken();
}
if ("reposName".equals(t)) {
embeddedRepositoryName = tv;
}
if ("indexName".equals(t)) {
embeddedIndexName = tv;
}
if ("xsltName".equals(t)) {
embeddedXsltName = tv;
}
}
}
if (logger.isDebugEnabled())
logger.debug("processEmbeddedQuery embeddedRepositoryName="+embeddedRepositoryName+" embeddedIndexName="+embeddedIndexName+" embeddedXsltName="+embeddedXsltName);
String baseUrl = firstPart;
String userPassword = "";
if ("GSEARCH".equals(embedType)) {
try {
baseUrl = getBaseURL(config.getSoapBase())+"/rest";
} catch (Exception e) {
throw new GenericSearchException("processEmbeddedQuery GSEARCH getBaseURL exception=\n"+e.toString());
}
userPassword = config.getSoapUser()+":"+config.getSoapPass();
// secondPart = encodeQuery(secondPart, "query");
} else if ("RISEARCH".equals(embedType)) {
try {
baseUrl = getBaseURL(config.getFedoraSoap(embeddedRepositoryName))+"/risearch";
} catch (Exception e) {
throw new GenericSearchException("processEmbeddedQuery RISEARCH getBaseURL exception=\n"+e.toString());
}
userPassword = config.getFedoraUser(embeddedRepositoryName)+":"+config.getFedoraPass(embeddedRepositoryName);
secondPart = encodeQuery(secondPart, "query");
} else if ("SOLR".equals(embedType)) {
try {
baseUrl = config.getIndexBase(embeddedIndexName);
} catch (Exception e) {
throw new GenericSearchException("processEmbeddedQuery SOLR embeddedIndexName="+embeddedIndexName+" hasnoSolrserver exception=\n"+e.toString());
}
if (baseUrl == null) {
throw new GenericSearchException("processEmbeddedQuery SOLR embeddedIndexName="+embeddedIndexName+" hasnoSolrserver baseUrl=null");
}
baseUrl += "/select";
userPassword = config.getSoapUser()+":"+config.getSoapPass();
secondPart = encodeQuery(secondPart, "q");
}
String urlString = baseUrl+"?"+secondPart;
if (logger.isDebugEnabled())
logger.debug("processEmbeddedQuery userPassword="+userPassword+" url=\n"+urlString);
URL url = null;
try {
url = new URL(urlString);
} catch (MalformedURLException e) {
throw new GenericSearchException(e.toString());
}
InputStream content = null;
URLConnection conn = null;
try {
conn = url.openConnection();
} catch (IOException e) {
throw new GenericSearchException("processEmbeddedQuery url.openConnection() exception="+e.toString());
}
conn.setRequestProperty("Authorization",
"Basic "+(new BASE64Encoder()).encode(userPassword.getBytes()));
try {
conn.connect();
} catch (IOException e) {
throw new GenericSearchException("processEmbeddedQuery conn.connect() exception="+e.toString());
}
try {
content = (InputStream) conn.getContent();
} catch (IOException e) {
throw new GenericSearchException("processEmbeddedQuery conn.getContent() exception="+e.toString());
}
if (logger.isDebugEnabled())
logger.debug("processEmbeddedQuery after get content");
String[] params = new String[12];
params[0] = "OPERATION";
params[1] = "gfindObjects";
params[2] = "ACTION";
params[3] = "processEmbeddedQuery";
params[4] = "VALUE";
params[5] = urlString;
params[6] = "REPOSITORYNAME";
params[7] = embeddedRepositoryName;
params[8] = "INDEXNAME";
params[9] = embeddedIndexName;
params[10] = "RESULTPAGEXSLT";
params[11] = embeddedXsltName;
String xsltPath = config.getConfigName()+"/rest/"+embeddedXsltName;
StringBuffer resultXml = (new GTransformer()).transform(
xsltPath,
new StreamSource(content),
config.getURIResolver(embeddedIndexName),
params);
String newQueryPart = resultXml.toString().replaceAll("<", "<").replaceAll(">", ">");
String findString = "result:newQueryPart xmlns:result=\"http://www.w3.org/2001/sw/DataAccess/rf1/result\">";
i = resultXml.indexOf(findString);
if (i>-1) {
j = resultXml.indexOf("</result:newQueryPart", i);
if (j > -1) {
newQueryPart = resultXml.substring(i+findString.length(), j);
}
}
if (logger.isDebugEnabled())
logger.debug("processEmbeddedQuery newQueryPart=\n"+newQueryPart);
return newQueryPart;
}
public String encodeQuery(String secondPart, String queryName)
throws GenericSearchException {
if (logger.isDebugEnabled())
logger.debug("encodeQuery" + " queryName="+queryName + " secondPart="+secondPart);
String queryContents = "";
int i = secondPart.indexOf(queryName+"=");
int j = -1;
if (i > -1) {
j = secondPart.indexOf("&", i+queryName.length());
if (j == -1) {
j = secondPart.length();
}
queryContents = secondPart.substring(i+1+queryName.length(), j);
}
if (i == -1 || queryContents.length() == 0) {
throw new GenericSearchException("processEmbeddedQuery: No query contents found?"+" finalQuery=\n"+secondPart);
}
if (logger.isDebugEnabled())
logger.debug("encodeQuery" + " queryContents="+queryContents);
try {
queryContents = URLEncoder.encode(queryContents, "UTF-8");
} catch (UnsupportedEncodingException e) {
throw new GenericSearchException(e.toString());
}
String result = secondPart.substring(0, i+1+queryName.length()) + queryContents + secondPart.substring(j);
if (logger.isDebugEnabled())
logger.debug("encodeQuery" + " secondPart="+result);
return result;
}
public String browseIndex(
String startTerm,
int termPageSize,
String fieldName,
String indexName,
String resultPageXslt)
throws java.rmi.RemoteException {
if (logger.isDebugEnabled())
logger.debug("browseIndex" +
" startTerm="+startTerm+
" termPageSize="+termPageSize+
" fieldName="+fieldName+
" indexName="+indexName+
" resultPageXslt="+resultPageXslt);
params = new String[12];
params[0] = "OPERATION";
params[1] = "browseIndex";
params[2] = "STARTTERM";
params[3] = startTerm;
params[4] = "TERMPAGESIZE";
params[5] = Integer.toString(termPageSize);
params[6] = "INDEXNAME";
params[7] = indexName;
params[8] = "FIELDNAME";
params[9] = fieldName;
return "";
}
public String getRepositoryInfo(
String repositoryName,
String resultPageXslt) throws java.rmi.RemoteException {
if (logger.isDebugEnabled())
logger.debug("getRepositoryInfo" +
" repositoryName="+repositoryName+
" resultPageXslt="+resultPageXslt);
InputStream repositoryStream = null;
String repositoryInfoPath = "/"+config.getConfigName()+"/repository/"+config.getRepositoryName(repositoryName)+"/repositoryInfo.xml";
try {
repositoryStream = this.getClass().getResourceAsStream(repositoryInfoPath);
if (repositoryStream == null) {
throw new GenericSearchException("Error "+repositoryInfoPath+" not found in classpath");
}
} catch (IOException e) {
throw new GenericSearchException("Error "+repositoryInfoPath+" not found in classpath", e);
}
String xsltPath = config.getConfigName()
+"/repository/"+config.getRepositoryName(repositoryName)+"/"
+config.getRepositoryInfoResultXslt(repositoryName, resultPageXslt);
StringBuffer sb = (new GTransformer()).transform(
xsltPath,
new StreamSource(repositoryStream),
new String[] {});
return sb.toString();
}
public String getIndexInfo(
String indexName,
String resultPageXslt) throws java.rmi.RemoteException {
if (logger.isDebugEnabled())
logger.debug("getIndexInfo" +
" indexName="+indexName+
" resultPageXslt="+resultPageXslt);
return "";
}
public String updateIndex(
String action,
String value,
String repositoryNameParam,
String indexNames,
String indexDocXslt,
String resultPageXslt)
throws java.rmi.RemoteException {
if (logger.isDebugEnabled())
logger.debug("updateIndex" +
" action="+action+
" value="+value+
" repositoryName="+repositoryNameParam+
" indexNames="+indexNames+
" indexDocXslt="+indexDocXslt+
" resultPageXslt="+resultPageXslt);
StringBuffer resultXml = new StringBuffer();
String repositoryName = repositoryNameParam;
if (repositoryNameParam==null || repositoryNameParam.equals(""))
repositoryName = config.getRepositoryName(repositoryName);
resultXml.append("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n");
resultXml.append("<resultPage");
resultXml.append(" operation=\"updateIndex\"");
resultXml.append(" action=\""+action+"\"");
resultXml.append(" value=\""+value+"\"");
resultXml.append(" repositoryName=\""+repositoryName+"\"");
resultXml.append(" indexNames=\""+indexNames+"\"");
resultXml.append(" resultPageXslt=\""+resultPageXslt+"\"");
resultXml.append(" dateTime=\""+new Date()+"\"");
resultXml.append(">\n");
StringTokenizer st = new StringTokenizer(config.getIndexNames(indexNames));
while (st.hasMoreTokens()) {
String indexName = st.nextToken();
Operations ops = config.getOperationsImpl(fgsUserName, indexName);
resultXml.append(ops.updateIndex(action, value, repositoryName, indexName, indexDocXslt, resultPageXslt));
}
resultXml.append("</resultPage>\n");
if (logger.isDebugEnabled())
logger.debug("resultXml="+resultXml);
return resultXml.toString();
}
public void getFoxmlFromPid(
String pid,
String repositoryName)
throws java.rmi.RemoteException {
if (logger.isInfoEnabled())
logger.info("getFoxmlFromPid" +
" pid="+pid +
" repositoryName="+repositoryName);
FedoraAPIM apim = getAPIM(repositoryName,
config.getFedoraSoap(repositoryName),
config.getFedoraUser(repositoryName),
config.getFedoraPass(repositoryName),
config.getTrustStorePath(repositoryName),
config.getTrustStorePass(repositoryName) );
String fedoraVersion = config.getFedoraVersion(repositoryName);
String format = Constants.FOXML1_1.uri;
if(fedoraVersion != null && fedoraVersion.startsWith("2.")) {
format = Constants.FOXML1_0_LEGACY;
}
String realPID = getRealPID(pid);
try {
foxmlRecord = apim.export(realPID, format, "public");
} catch (RemoteException e) {
throw new FedoraObjectNotFoundException("Fedora Object "+realPID+" not found at "+repositoryName, e);
}
}
public String getDatastreamText(
String pid,
String repositoryName,
String dsId)
throws GenericSearchException {
return getDatastreamText(pid, repositoryName, dsId,
config.getFedoraSoap(repositoryName),
config.getFedoraUser(repositoryName),
config.getFedoraPass(repositoryName),
config.getTrustStorePath(repositoryName),
config.getTrustStorePass(repositoryName) );
}
public String getDatastreamText(
String pid,
String repositoryName,
String dsId,
String fedoraSoap,
String fedoraUser,
String fedoraPass,
String trustStorePath,
String trustStorePass) {
if (logger.isInfoEnabled())
logger.info("getDatastreamText"
+" pid="+pid
+" repositoryName="+repositoryName
+" dsId="+dsId
+" fedoraSoap="+fedoraSoap
+" fedoraUser="+fedoraUser
+" fedoraPass="+fedoraPass
+" trustStorePath="+trustStorePath
+" trustStorePass="+trustStorePass);
StringBuffer dsBuffer = new StringBuffer();
String mimetype = "";
ds = null;
if (dsId != null) {
try {
FedoraAPIA apia = getAPIA(
repositoryName,
fedoraSoap,
fedoraUser,
fedoraPass,
trustStorePath,
trustStorePass );
MIMETypedStream mts = apia.getDatastreamDissemination(getRealPID(pid),
dsId, null);
if (mts==null) return "";
ds = mts.getStream();
mimetype = mts.getMIMEType().split(";")[0]; // MIMETypedStream can include encoding, eg "text/xml;charset=utf-8" - split this off
} catch (Exception e) {
return emptyIndexField("getDatastreamText mimetype", pid, dsId, mimetype, e);
}
if (logger.isDebugEnabled())
logger.debug("getDatastreamText" +
" pid="+pid+
" dsId="+dsId+
" mimetype="+mimetype);
TransformerToText transformerToText = null;
if (ds != null) {
try {
transformerToText = new TransformerToText();
} catch (Exception e) {
return emptyIndexField("getDatastreamText TransformerToText", pid, dsId, mimetype, e);
}
if (logger.isDebugEnabled())
logger.debug("getDatastreamText" +
" pid="+pid+
" dsId="+dsId+
" TransformerToText="+transformerToText);
try {
dsBuffer = transformerToText.getText(ds, mimetype);
} catch (Exception e) {
if (logger.isDebugEnabled())
logger.debug("getDatastreamText" +
" pid="+pid+
" dsId="+dsId+
" TransformerToText="+transformerToText+
" Exception="+e);
return emptyIndexField("getDatastreamText getText", pid, dsId, mimetype, e);
} finally {
if (logger.isDebugEnabled())
logger.debug("getDatastreamText finally " +
" pid="+pid+
" dsId="+dsId);
}
}
}
if (logger.isDebugEnabled())
logger.debug("getDatastreamText" +
" pid="+pid+
" dsId="+dsId+
" mimetype="+mimetype+
" dsBuffer="+getDebugString(dsBuffer.toString()));
return dsBuffer.toString();
}
public StringBuffer getFirstDatastreamText(
String pid,
String repositoryName,
String dsMimetypes)
throws GenericSearchException {
return getFirstDatastreamText(pid, repositoryName, dsMimetypes,
config.getFedoraSoap(repositoryName),
config.getFedoraUser(repositoryName),
config.getFedoraPass(repositoryName),
config.getTrustStorePath(repositoryName),
config.getTrustStorePass(repositoryName));
}
public StringBuffer getFirstDatastreamText(
String pid,
String repositoryName,
String dsMimetypes,
String fedoraSoap,
String fedoraUser,
String fedoraPass,
String trustStorePath,
String trustStorePass)
throws GenericSearchException {
if (logger.isInfoEnabled())
logger.info("getFirstDatastreamText"
+" pid="+pid
+" dsMimetypes="+dsMimetypes
+" fedoraSoap="+fedoraSoap
+" fedoraUser="+fedoraUser
+" fedoraPass="+fedoraPass
+" trustStorePath="+trustStorePath
+" trustStorePass="+trustStorePass);
StringBuffer dsBuffer = new StringBuffer();
String mimetype = "";
Datastream[] dsds = null;
try {
FedoraAPIM apim = getAPIM(
repositoryName,
fedoraSoap,
fedoraUser,
fedoraPass,
trustStorePath,
trustStorePass );
dsds = apim.getDatastreams(getRealPID(pid), null, "A");
} catch (Exception e) {
return new StringBuffer(emptyIndexField("getFirstDatastreamText", pid, "", mimetype, e));
}
String mimetypes = config.getMimeTypes();
if (dsMimetypes!=null && dsMimetypes.length()>0)
mimetypes = dsMimetypes;
dsID = null;
if (dsds != null) {
int best = 99999;
for (int i = 0; i < dsds.length; i++) {
int j = mimetypes.indexOf(dsds[i].getMIMEType());
if (j > -1 && best > j) {
dsID = dsds[i].getID();
best = j;
mimetype = dsds[i].getMIMEType().split(";")[0]; // MIMETypedStream can include encoding, eg "text/xml;charset=utf-8" - split this off
}
}
}
ds = null;
if (dsID != null) {
try {
FedoraAPIA apia = getAPIA(
repositoryName,
fedoraSoap,
fedoraUser,
fedoraPass,
trustStorePath,
trustStorePass );
MIMETypedStream mts = apia.getDatastreamDissemination(getRealPID(pid),
dsID, null);
ds = mts.getStream();
mimetype = mts.getMIMEType().split(";")[0]; // MIMETypedStream can include encoding, eg "text/xml;charset=utf-8" - split this off
} catch (Exception e) {
return new StringBuffer(emptyIndexField("getFirstDatastreamText", pid, dsID, mimetype, e));
}
}
if (ds != null) {
dsBuffer = (new TransformerToText().getText(ds, mimetype));
}
if (logger.isDebugEnabled())
logger.debug("getFirstDatastreamText" +
" pid="+pid+
" dsID="+dsID+
" mimetype="+mimetype+
" dsBuffer="+dsBuffer.toString());
return dsBuffer;
}
public StringBuffer getDisseminationText(
String pid,
String repositoryName,
String bDefPid,
String methodName,
String parameters,
String asOfDateTime)
throws GenericSearchException {
return getDisseminationText(pid, repositoryName, bDefPid, methodName, parameters, asOfDateTime,
config.getFedoraSoap(repositoryName),
config.getFedoraUser(repositoryName),
config.getFedoraPass(repositoryName),
config.getTrustStorePath(repositoryName),
config.getTrustStorePass(repositoryName) );
}
public StringBuffer getDisseminationText(
String pid,
String repositoryName,
String bDefPid,
String methodName,
String parameters,
String asOfDateTime,
String fedoraSoap,
String fedoraUser,
String fedoraPass,
String trustStorePath,
String trustStorePass)
throws GenericSearchException {
if (logger.isInfoEnabled())
logger.info("getDisseminationText" +
" pid="+pid+
" bDefPid="+bDefPid+
" methodName="+methodName+
" parameters="+parameters+
" asOfDateTime="+asOfDateTime
+" fedoraSoap="+fedoraSoap
+" fedoraUser="+fedoraUser
+" fedoraPass="+fedoraPass
+" trustStorePath="+trustStorePath
+" trustStorePass="+trustStorePass);
StringTokenizer st = new StringTokenizer(parameters);
org.fcrepo.server.types.gen.Property[] params = new org.fcrepo.server.types.gen.Property[st.countTokens()];
for (int i=0; i<st.countTokens(); i++) {
String param = st.nextToken();
String[] nameAndValue = param.split("=");
params[i] = new org.fcrepo.server.types.gen.Property(nameAndValue[0], nameAndValue[1]);
}
if (logger.isDebugEnabled())
logger.debug("getDisseminationText" +
" #parameters="+params.length);
StringBuffer dsBuffer = new StringBuffer();
String mimetype = "";
ds = null;
if (pid != null) {
try {
FedoraAPIA apia = getAPIA(
repositoryName,
fedoraSoap,
fedoraUser,
fedoraPass,
trustStorePath,
trustStorePass );
MIMETypedStream mts = apia.getDissemination(getRealPID(pid), bDefPid,
methodName, params, asOfDateTime);
if (mts==null) {
throw new GenericSearchException("getDissemination returned null");
}
ds = mts.getStream();
mimetype = mts.getMIMEType().split(";")[0]; // MIMETypedStream can include encoding, eg "text/xml;charset=utf-8" - split this off
if (logger.isDebugEnabled())
logger.debug("getDisseminationText" +
" mimetype="+mimetype);
} catch (Exception e) {
return new StringBuffer(emptyIndexField("getDisseminationText", pid, bDefPid, mimetype, e));
}
}
if (ds != null) {
dsBuffer = (new TransformerToText().getText(ds, mimetype));
}
if (logger.isDebugEnabled())
logger.debug("getDisseminationText" +
" pid="+pid+
" bDefPid="+bDefPid+
" mimetype="+mimetype+
" dsBuffer="+dsBuffer.toString());
return dsBuffer;
}
public String getDatastreamTextFromTika(
String pid,
String repositoryName,
String dsId,
String indexFieldTagName,
String textIndexField,
String fedoraSoap,
String fedoraUser,
String fedoraPass,
String trustStorePath,
String trustStorePass) {
return getDatastreamFromTika(pid, repositoryName, dsId, indexFieldTagName, textIndexField, null, null, fedoraSoap, fedoraUser, fedoraPass, trustStorePath, trustStorePass);
}
public String getDatastreamMetadataFromTika(
String pid,
String repositoryName,
String dsId,
String indexFieldTagName,
String indexFieldNamePrefix,
String selectedFields,
String fedoraSoap,
String fedoraUser,
String fedoraPass,
String trustStorePath,
String trustStorePass) {
return getDatastreamFromTika(pid, repositoryName, dsId, indexFieldTagName, null, indexFieldNamePrefix, selectedFields, fedoraSoap, fedoraUser, fedoraPass, trustStorePath, trustStorePass);
}
public String getDatastreamFromTika(
String pid,
String repositoryName,
String dsId,
String indexFieldTagName,
String textIndexField,
String indexFieldNamePrefix,
String selectedFields,
String fedoraSoap,
String fedoraUser,
String fedoraPass,
String trustStorePath,
String trustStorePass) {
if (logger.isInfoEnabled())
logger.info("getDatastreamFromTika"
+" pid="+pid
+" repositoryName="+repositoryName
+" dsId="+dsId
+" indexFieldTagName="+indexFieldTagName
+" textIndexField="+textIndexField
+" indexFieldNamePrefix="+indexFieldNamePrefix
+" selectedFields="+selectedFields
+" fedoraSoap="+fedoraSoap
+" fedoraUser="+fedoraUser
+" fedoraPass="+fedoraPass
+" trustStorePath="+trustStorePath
+" trustStorePass="+trustStorePass);
StringBuffer dsBuffer = new StringBuffer();
String mimetype = "";
ds = null;
if (dsId != null) {
try {
FedoraAPIA apia = getAPIA(
repositoryName,
fedoraSoap,
fedoraUser,
fedoraPass,
trustStorePath,
trustStorePass );
MIMETypedStream mts = apia.getDatastreamDissemination(getRealPID(pid),
dsId, null);
if (mts==null) return "";
ds = mts.getStream();
mimetype = mts.getMIMEType().split(";")[0]; // MIMETypedStream can include encoding, eg "text/xml;charset=utf-8" - split this off
} catch (Exception e) {
return emptyIndexField("getDatastreamFromTika mimetype", pid, dsId, mimetype, e);
}
if (logger.isDebugEnabled())
logger.debug("getDatastreamFromTika" +
" pid="+pid+
" dsId="+dsId+
" mimetype="+mimetype);
TransformerToText transformerToText = null;
if (ds != null) {
try {
transformerToText = new TransformerToText();
} catch (Exception e) {
return emptyIndexField("getDatastreamFromTika TransformerToText", pid, dsId, mimetype, e);
}
int writeLimit = 100000;
try {
config = Config.getCurrentConfig();
if (config != null) writeLimit = config.getWriteLimit();
} catch (Exception e) {
logger.debug("getDatastreamFromTika Config.getCurrentConfig() exception=" + e);
}
if (logger.isDebugEnabled())
logger.debug("getDatastreamFromTika" +
" pid="+pid+
" dsId="+dsId+
" writeLimit="+writeLimit+
" TransformerToText="+transformerToText);
try {
dsBuffer = transformerToText.getFromTika(repositoryName+"/"+pid+"/"+dsId, ds, indexFieldTagName, textIndexField, indexFieldNamePrefix, selectedFields, writeLimit);
} catch (Exception e) {
if (logger.isDebugEnabled())
logger.debug("getDatastreamFromTika" +
" pid="+pid+
" dsId="+dsId+
" TransformerToText="+transformerToText+
" Exception="+e);
return emptyIndexField("getDatastreamFromTika getText", pid, dsId, mimetype, e);
} finally {
if (logger.isDebugEnabled())
logger.debug("getDatastreamFromTika finally " +
" pid="+pid+
" dsId="+dsId);
}
}
}
if (logger.isDebugEnabled())
logger.debug("getDatastreamFromTika" +
" pid="+pid+
" dsId="+dsId+
" mimetype="+mimetype+
" dsBuffer="+getDebugString(dsBuffer.toString()));
return dsBuffer.toString();
}
private String emptyIndexField(
String methodName,
String pid,
String dsId,
String mimetype,
Exception e) {
// no exception to be thrown,
// because then the index document being created will be deleted,
// instead put log warning and send empty index field text.
logger.warn("exception and empty index field from " + methodName +
" pid="+pid+
" dsId="+dsId+
" mimetype="+mimetype+
" exception="+e.toString());
return "";
}
private String getDebugString(String debugString) {
String result = debugString;
if (debugString.length()>debuglength) {
result = result.substring(0,debuglength)+"...\n...";
}
return result;
}
private String getRealPID(String pid) {
int j = pid.indexOf("$");
if (j==-1) j = pid.length();
return pid.substring(0, j);
}
public Node getDatastreamXML(
String pid,
String repositoryName,
String dsId,
String fedoraSoap,
String fedoraUser,
String fedoraPass,
String trustStorePath,
String trustStorePass) {
if (logger.isInfoEnabled())
logger.info("getDatastreamXML"
+" pid="+pid
+" repositoryName="+repositoryName
+" dsId="+dsId
+" fedoraSoap="+fedoraSoap
+" fedoraUser="+fedoraUser
+" fedoraPass="+fedoraPass
+" trustStorePath="+trustStorePath
+" trustStorePass="+trustStorePass);
Node datastreamXml = null;
try {
FedoraAPIA apia = getAPIA(
repositoryName,
fedoraSoap,
fedoraUser,
fedoraPass,
trustStorePath,
trustStorePass );
MIMETypedStream mts = apia.getDatastreamDissemination(getRealPID(pid), dsId, null);
if (mts==null)
return getExceptionNode("No MIMETypedStream for pid="+getRealPID(pid)+" dsid="+dsId);
datastreamXml = getDocumentNode(new String(mts.getStream()));
} catch (Exception e) {
return getExceptionNode(e.toString());
}
return datastreamXml;
}
private Node getDocumentNode(String xmlString) {
if (logger.isDebugEnabled())
logger.debug("getDocumentNode" + " xmlString="+xmlString);
DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
factory.setNamespaceAware(true);
DocumentBuilder builder;
Document doc;
try {
builder = factory.newDocumentBuilder();
doc = builder.parse(new InputSource(new StringReader(xmlString)));
} catch (Exception e) {
return getExceptionNode(e.toString());
}
if (logger.isDebugEnabled())
logger.debug("getDocumentNode" + " doc root="+doc.getDocumentElement());
return doc;
}
private Node getExceptionNode(String exceptionMessage) {
logger.error("getExceptionNode" + " exceptionMessage="+exceptionMessage);
String xmlString = "<exception><message>"+exceptionMessage.replaceAll("<", "<")+"</message></exception>";
Node doc = getDocumentNode(xmlString);
return doc;
}
protected boolean indexDocExists(String pid)
throws GenericSearchException {
// two alternatives implemented, both timed
// the one chosen had time=0 ms, the other had time=0 or 1 ms
// the not chosen is left commented out, for future potential use
boolean indexDocExists = true;
// Date startTime = new Date();
// String queryString = "PID:\""+pid+"\"";
// QueryParser queryParser = new QueryParser(Version.LUCENE_36, null, new KeywordAnalyzer());
// Query query;
// try {
// query = queryParser.parse(queryString);
// } catch (ParseException e) {
// throw new GenericSearchException("indexDocExists parse "+queryString+" exception="+e);
// }
// searcher = new IndexSearcher(ir);
// TopDocs hits = null;
// try {
// hits = searcher.search(query, 1);
// } catch (Exception e) {
// throw new GenericSearchException("indexDocExists search "+queryString+" exception="+e);
// }
// int hitTotal = hits.totalHits;
// String timeusedms = Long.toString((new Date()).getTime() - startTime.getTime());
// if (hitTotal==0) indexDocExists = false;
// if (logger.isDebugEnabled())
// logger.debug("indexDocExists query="+queryString+" hitTotal="+hitTotal+" timeusedms="+timeusedms);
indexDocExists = false;
// startTime = new Date();
try {
// if (ir.termDocs(new Term("PID", pid)).next()) indexDocExists = true;
if (ir.docFreq(new Term("PID", pid))>0) indexDocExists = true;
} catch (IOException e) {
throw new GenericSearchException("indexDocExists docFreq "+pid+" exception="+e);
}
// timeusedms = Long.toString((new Date()).getTime() - startTime.getTime());
if (logger.isDebugEnabled())
logger.debug("indexDocExists pid="+pid+" indexDocExists="+indexDocExists);
// logger.debug("indexDocExists termDocs="+pid+" indexDocExists="+indexDocExists+" timeusedms="+timeusedms);
return indexDocExists;
}
protected String getPidFromObjectFilename(String filename) {
String pid = filename;
String filenameStart = "info%3Afedora%2F";
int i = filename.indexOf(filenameStart);
if (i>-1) {
pid = filename.substring(i+filenameStart.length()).replaceAll("%3A", ":");
}
return pid;
}
public static String encode(String in) {
String inStr = in;
if (inStr == null) {
inStr = "";
}
StringBuffer out = new StringBuffer();
for (int i = 0; i < inStr.length(); i++) {
char c = inStr.charAt(i);
if (c == '&') {
out.append("&");
} else if (c == '<') {
out.append("<");
} else if (c == '>') {
out.append(">");
} else if (c == '\"') {
out.append(""");
} else if (c == '\'') {
out.append("'");
} else {
out.append(c);
}
}
return out.toString();
}
}