/*
* eXist Open Source Native XML Database
* Copyright (C) 2001-07 The eXist Project
* http://exist-db.org
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*
* $Id$
*/
package org.exist.fulltext;
import org.exist.collections.Collection;
import org.exist.dom.*;
import org.exist.indexing.AbstractStreamListener;
import org.exist.indexing.IndexController;
import org.exist.indexing.IndexWorker;
import org.exist.indexing.MatchListener;
import org.exist.indexing.OrderedValuesIndex;
import org.exist.indexing.QNamedKeysIndex;
import org.exist.indexing.StreamListener;
import org.exist.storage.DBBroker;
import org.exist.storage.ElementValue;
import org.exist.storage.FulltextIndexSpec;
import org.exist.storage.IndexSpec;
import org.exist.storage.NativeTextEngine;
import org.exist.storage.NodePath;
import org.exist.storage.TextSearchEngine;
import org.exist.storage.btree.DBException;
import org.exist.storage.txn.Txn;
import org.exist.util.DatabaseConfigurationException;
import org.exist.util.Occurrences;
import org.exist.xquery.XQueryContext;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import java.util.Map;
import java.util.Stack;
/**
* A legacy IndexWorker which wraps around {@link org.exist.storage.NativeTextEngine}. Right
* now, the fulltext index has only partly been moved into the new modularized indexing architecture
* and we thus need some glue classes to keep the old and new parts together. This class will become
* part of the new fulltext indexing module.
*/
public class FTIndexWorker implements OrderedValuesIndex, QNamedKeysIndex {
private NativeTextEngine engine;
private FTIndex index;
private DBBroker broker;
private DocumentImpl document;
private FulltextIndexSpec config;
private int mode = StreamListener.UNKNOWN;
private FTStreamListener listener = new FTStreamListener();
private FTMatchListener matchListener = null;
public FTIndexWorker(FTIndex index, DBBroker broker) throws DatabaseConfigurationException {
this.index = index;
this.broker = broker;
try {
this.engine = new NativeTextEngine(broker, index.getBFile(), broker.getConfiguration());
} catch (DBException e) {
throw new DatabaseConfigurationException(e.getMessage(), e);
}
}
public String getIndexId() {
return FTIndex.ID;
}
public String getIndexName() {
return "ft-index-old";
}
public TextSearchEngine getEngine() {
return engine;
}
public Object configure(IndexController controller, NodeList configNodes, Map namespaces) throws DatabaseConfigurationException {
// Not implemented
return null;
}
public void setDocument(DocumentImpl doc) {
setDocument(doc, StreamListener.UNKNOWN);
}
public void setDocument(DocumentImpl doc, int newMode) {
document = doc;
mode = newMode;
IndexSpec indexConf = document.getCollection().getIndexConfiguration(broker);
if (indexConf != null)
config = indexConf.getFulltextIndexSpec();
engine.setDocument(document);
}
public void setMode(int newMode) {
mode = newMode;
// wolf: unnecessary call to setDocument?
// setDocument(document, newMode);
}
public DocumentImpl getDocument() {
return document;
}
public int getMode() {
return mode;
}
public StoredNode getReindexRoot(StoredNode node, NodePath path, boolean includeSelf) {
if (node.getNodeType() == Node.ATTRIBUTE_NODE)
return null;
IndexSpec indexConf = node.getDocument().getCollection().getIndexConfiguration(broker);
if (indexConf != null) {
FulltextIndexSpec config = indexConf.getFulltextIndexSpec();
if (config == null)
return null;
boolean reindexRequired = false;
int len = node.getNodeType() == Node.ELEMENT_NODE && !includeSelf ? path.length() - 1 : path.length();
for (int i = 0; i < len; i++) {
QName qn = path.getComponent(i);
if (config.hasQNameIndex(qn)) {
reindexRequired = true;
break;
}
}
if (reindexRequired) {
StoredNode topMost = null;
StoredNode currentNode = node;
while (currentNode != null) {
if (config.hasQNameIndex(currentNode.getQName()))
topMost = currentNode;
currentNode = currentNode.getParentStoredNode();
}
return topMost;
}
}
return null;
}
public StreamListener getListener() {
return listener;
}
public MatchListener getMatchListener(DBBroker broker, NodeProxy proxy) {
boolean needToFilter = false;
Match nextMatch = proxy.getMatches();
while (nextMatch != null) {
if (nextMatch.getIndexId() == FTIndex.ID) {
needToFilter = true;
break;
}
nextMatch = nextMatch.getNextMatch();
}
if (!needToFilter)
return null;
if (matchListener == null)
matchListener = new FTMatchListener(broker, proxy);
else
matchListener.reset(broker, proxy);
return matchListener;
}
public void flush() {
switch (mode) {
case StreamListener.STORE :
engine.flush();
break;
case StreamListener.REMOVE_ALL_NODES :
engine.dropIndex(document);
break;
case StreamListener.REMOVE_SOME_NODES :
engine.remove();
break;
}
}
public void removeCollection(Collection collection, DBBroker broker) {
engine.dropIndex(collection);
}
public boolean checkIndex(DBBroker broker) {
// Not implemented
return false;
}
public Occurrences[] scanIndex(XQueryContext context, DocumentSet docs, NodeSet contextSet, Map hints) {
// Not implemented
return new Occurrences[0];
}
private class FTStreamListener extends AbstractStreamListener {
private Stack contentStack = new Stack();
public FTStreamListener() {
}
public void startElement(Txn transaction, ElementImpl element, NodePath path) {
if (config != null) {
boolean mixedContent = config.matchMixedElement(path);
if (mixedContent || config.hasQNameIndex(element.getQName())) {
ElementContent contentBuf =
new ElementContent(element.getQName(), mixedContent || config.preserveMixedContent(element.getQName()));
contentStack.push(contentBuf);
}
}
super.startElement(transaction, element, path);
}
public void endElement(Txn transaction, ElementImpl element, NodePath path) {
if (config != null) {
boolean mixedContent = config.matchMixedElement(path);
if (mixedContent || config.hasQNameIndex(element.getQName())) {
ElementContent contentBuf = (ElementContent) contentStack.pop();
element.getQName().setNameType(ElementValue.ELEMENT);
engine.storeText(element, contentBuf,
mixedContent ? NativeTextEngine.FOURTH_OPTION : NativeTextEngine.TEXT_BY_QNAME,
null, mode == REMOVE_ALL_NODES);
}
}
super.endElement(transaction, element, path);
}
/**
*
* @param transaction
* @param text
* @param path
*/
public void characters(Txn transaction, CharacterDataImpl text, NodePath path) {
if (config == null) {
engine.storeText(text, NativeTextEngine.TOKENIZE, config, mode == REMOVE_ALL_NODES);
} else if (config.match(path)) {
int tokenize = config.preserveContent(path) ? NativeTextEngine.DO_NOT_TOKENIZE : NativeTextEngine.TOKENIZE;
engine.storeText(text, tokenize, config, mode == REMOVE_ALL_NODES);
}
if (!contentStack.isEmpty()) {
for (int i = 0; i < contentStack.size(); i++) {
ElementContent next = (ElementContent) contentStack.get(i);
next.append(text.getXMLString());
}
}
super.characters(transaction, text, path);
}
public void attribute(Txn transaction, AttrImpl attrib, NodePath path) {
path.addComponent(attrib.getQName());
if (config == null || config.matchAttribute(path)) {
engine.storeAttribute(attrib, null, NativeTextEngine.ATTRIBUTE_NOT_BY_QNAME, config, mode == REMOVE_ALL_NODES);
}
if (config != null && config.hasQNameIndex(attrib.getQName())){
engine.storeAttribute(attrib, null, NativeTextEngine.ATTRIBUTE_BY_QNAME, config, mode == REMOVE_ALL_NODES);
}
path.removeLastComponent();
super.attribute(transaction, attrib, path);
}
public IndexWorker getWorker() {
return FTIndexWorker.this;
}
}
}