/*
* <p><b>License and Copyright: </b>The contents of this file is subject to the
* same open source license as the Fedora Repository System at www.fedora-commons.org
* Copyright © 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013 by The Technical University of Denmark.
* All rights reserved.</p>
*/
package dk.defxws.fgslucene;
import org.apache.log4j.Logger;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
import java.io.IOException;
import java.io.StringReader;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;
import org.xml.sax.Attributes;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.DefaultHandler;
import dk.defxws.fedoragsearch.server.errors.GenericSearchException;
/**
* parses the IndexDocument and generates the Lucene document
*
* @author gsp@dtv.dk
* @version
*/
public class IndexDocumentHandler extends DefaultHandler {
private static final Logger logger = Logger.getLogger(IndexDocumentHandler.class);
private Document indexDocument;
private OperationsImpl owner;
private String repositoryName;
private StringBuffer elementBuffer;
private String pid;
private String fieldName;
private FieldType fType;
// http://lucene.apache.org/core/4_0_0/MIGRATE.html :
// If you previously used Document.setBoost,
// you must now pre-multiply the document boost into each Field.setBoost
// private float docboost;
private float boost;
private String dsId;
private String dsMimetypes;
private String bDefPid;
private String methodName;
private String parameters;
private String asOfDateTime;
public IndexDocumentHandler(
OperationsImpl owner,
String repositoryName,
String pidOrFilename,
StringBuffer indexDoc)
throws GenericSearchException {
this.owner = owner;
this.repositoryName = repositoryName;
elementBuffer = new StringBuffer();
SAXParserFactory spf = SAXParserFactory.newInstance();
spf.setNamespaceAware(true);
SAXParser parser;
try {
parser = spf.newSAXParser();
} catch (ParserConfigurationException e) {
throw new GenericSearchException("IndexDocument parser error pidOrFilename="+pidOrFilename, e);
} catch (SAXException e) {
throw new GenericSearchException("IndexDocument parser error pidOrFilename="+pidOrFilename, e);
}
try {
parser.parse(new InputSource(new StringReader(indexDoc.toString())), this);
} catch (IOException e) {
throw new GenericSearchException("IndexDocument parse error pidOrFilename="+pidOrFilename, e);
} catch (org.xml.sax.SAXParseException e) {
throw new GenericSearchException("IndexDocument parse error pidOrFilename="+pidOrFilename+" at line: " +
e.getLineNumber() + " column " +
e.getColumnNumber(), e);
} catch (SAXException e) {
throw new GenericSearchException("IndexDocument parse error pidOrFilename="+pidOrFilename, e);
}
}
public void startDocument() throws SAXException {
indexDocument = new Document();
}
public void startElement(String namespaceURI, String localName,
String qualifiedName, Attributes attrs) throws SAXException {
fieldName = "NoFieldName";
dsId = null;
dsMimetypes = null;
bDefPid = null;
methodName = "";
parameters = "";
asOfDateTime = "";
fType = new FieldType();
fType.setIndexed(true);
fType.setStored(true);
fType.setStoreTermVectors(false);
fType.setTokenized(true);
boost = 1;
// docboost = 1;
if ("IndexDocument".equals(localName) && attrs != null) {
for (int i = 0; i < attrs.getLength(); i++) {
String aName = attrs.getLocalName(i);
if ("".equals(aName)) { aName = attrs.getQName(i); }
String val = attrs.getValue(i);
if (aName=="PID")
pid = val.trim();
// if (aName=="boost")
// try {
// docboost = Float.parseFloat(val);
// } catch (NumberFormatException e) {
// docboost = Float.parseFloat("3");
// }
}
// http://lucene.apache.org/core/4_0_0/MIGRATE.html :
// If you previously used Document.setBoost,
// you must now pre-multiply the document boost into each Field.setBoost
// indexDocument.setBoost(docboost);
}
if ("IndexField".equals(localName) && attrs != null) {
for (int i = 0; i < attrs.getLength(); i++) {
String aName = attrs.getLocalName(i);
if ("".equals(aName)) { aName = attrs.getQName(i); }
String val = attrs.getValue(i);
if (aName=="IFname") fieldName = val;
if (aName=="dsId") dsId = val;
if (aName=="dsMimetypes") dsMimetypes = val;
if (aName=="bDefPid") bDefPid = val;
if (aName=="methodName") methodName = val;
if (aName=="parameters") parameters = val;
if (aName=="asOfDateTime") asOfDateTime = val;
if (aName=="index")
if ("ANALYZED".equals(val) || "TOKENIZED".equals(val)) fType.setTokenized(true);
else if ("NOT_ANALYZED".equals(val) || "UN_TOKENIZED".equals(val)) fType.setTokenized(false);
else if ("YES".equals(val)) fType.setIndexed(true);
else if ("NO".equals(val)) fType.setIndexed(false);
else if ("NO_NORMS".equals(val)) fType.setOmitNorms(true);
else if ("NOT_ANALYZED_NO_NORMS".equals(val)) {
fType.setTokenized(false);
fType.setOmitNorms(true);
}
else if ("ANALYZED_NO_NORMS".equals(val)) {
fType.setTokenized(true);
fType.setOmitNorms(true);
}
if (aName=="store")
if ("YES".equals(val)) fType.setStored(true);
else if ("NO".equals(val)) fType.setStored(false);
if (aName=="termVector")
if ("NO".equals(val)) fType.setStoreTermVectors(false);
else if ("YES".equals(val)) fType.setStoreTermVectors(true);
else if ("WITH_OFFSETS".equals(val)) fType.setStoreTermVectorOffsets(true);
else if ("WITH_POSITIONS".equals(val)) fType.setStoreTermVectorPositions(true);
else if ("WITH_POSITIONS_OFFSETS".equals(val)) {
fType.setStoreTermVectorOffsets(true);
fType.setStoreTermVectorPositions(true);
}
if (aName=="boost")
try {
boost = Float.parseFloat(val);
} catch (NumberFormatException e) {
boost = Float.parseFloat("1");
}
}
}
elementBuffer.setLength(0);
}
public void characters(char[] text, int start, int length)
throws SAXException {
elementBuffer.append(text, start, length);
}
public void endElement(String namespaceURI, String simpleName,
String qualifiedName) throws SAXException {
String ebs = elementBuffer.toString().trim();
if ("IndexField".equals(simpleName)) {
if (dsId != null) {
try {
ebs = owner.getDatastreamText(pid, repositoryName, dsId)
.toString();
} catch (GenericSearchException e) {
throw new SAXException(e);
}
} else if (dsMimetypes != null) {
try {
ebs = owner.getFirstDatastreamText(pid, repositoryName,
dsMimetypes).toString();
} catch (GenericSearchException e) {
throw new SAXException(e);
}
} else if (bDefPid != null) {
try {
ebs = owner.getDisseminationText(pid, repositoryName,
bDefPid, methodName, parameters, asOfDateTime)
.toString();
} catch (GenericSearchException e) {
throw new SAXException(e);
}
}
if (ebs.length() > 0) {
if (logger.isDebugEnabled())
logger.debug(fieldName + "=" + ebs);
Field f = new Field(fieldName, ebs, fType);
if (boost > Float.MIN_VALUE)
f.setBoost(boost);
indexDocument.add(f);
}
}
}
protected Document getIndexDocument() {
return indexDocument;
}
protected String getPid() {
return pid;
}
}