/*
* eXist Open Source Native XML Database
* Copyright (C) 2001-09 The eXist Project
* http://exist-db.org
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*
* $Id$
*/
package org.exist.xquery.functions.util;
import org.apache.log4j.Logger;
import org.exist.dom.DocumentSet;
import org.exist.dom.NodeSet;
import org.exist.dom.QName;
import org.exist.indexing.IndexWorker;
import org.exist.indexing.OrderedValuesIndex;
import org.exist.storage.DBBroker;
import org.exist.storage.IndexSpec;
import org.exist.storage.Indexable;
import org.exist.util.Occurrences;
import org.exist.util.ValueOccurrences;
import org.exist.xquery.*;
import org.exist.xquery.value.*;
import java.util.*;
/**
* @author wolf
*
*/
public class IndexKeys extends BasicFunction {
protected static final Logger logger = Logger.getLogger(IndexKeys.class);
public final static FunctionSignature[] signatures = {
new FunctionSignature(
new QName("index-keys", UtilModule.NAMESPACE_URI, UtilModule.PREFIX),
"Can be used to query existing range indexes defined on a set of nodes. " +
"All index keys defined for the given node set are reported to a callback function. " +
"The function will check for indexes defined on path as well as indexes defined by QName. ",
new SequenceType[] {
new FunctionParameterSequenceType("node-set", Type.NODE, Cardinality.ZERO_OR_MORE, "The node set"),
new FunctionParameterSequenceType("start-value", Type.ATOMIC, Cardinality.EXACTLY_ONE, "Only index keys of the same type but being greater than $start-value will be reported for non-string types. For string types, only keys starting with the given prefix are reported."),
new FunctionParameterSequenceType("function-reference", Type.FUNCTION_REFERENCE, Cardinality.EXACTLY_ONE, "The function reference as created by the util:function function. " +
"It can be an arbitrary user-defined function, but it should take exactly 2 arguments: " +
"1) the current index key as found in the range index as an atomic value, 2) a sequence " +
"containing three int values: a) the overall frequency of the key within the node set, " +
"b) the number of distinct documents in the node set the key occurs in, " +
"c) the current position of the key in the whole list of keys returned."),
new FunctionParameterSequenceType("max-number-returned", Type.INT, Cardinality.EXACTLY_ONE, "The maximum number of returned keys")
},
new FunctionReturnSequenceType(Type.ITEM, Cardinality.ZERO_OR_MORE, "the results of the eval of the $function-reference")),
new FunctionSignature(
new QName("index-keys", UtilModule.NAMESPACE_URI, UtilModule.PREFIX),
"Can be used to query existing range indexes defined on a set of nodes. " +
"All index keys defined for the given node set are reported to a callback function. " +
"The function will check for indexes defined on path as well as indexes defined by QName. ",
new SequenceType[] {
new FunctionParameterSequenceType("node-set", Type.NODE, Cardinality.ZERO_OR_MORE, "The node set"),
new FunctionParameterSequenceType("start-value", Type.ATOMIC, Cardinality.EXACTLY_ONE, "Only index keys of the same type but being greater than $start-value will be reported for non-string types. For string types, only keys starting with the given prefix are reported."),
new FunctionParameterSequenceType("function-reference", Type.FUNCTION_REFERENCE, Cardinality.EXACTLY_ONE, "The function reference as created by the util:function function. " +
"It can be an arbitrary user-defined function, but it should take exactly 2 arguments: " +
"1) the current index key as found in the range index as an atomic value, 2) a sequence " +
"containing three int values: a) the overall frequency of the key within the node set, " +
"b) the number of distinct documents in the node set the key occurs in, " +
"c) the current position of the key in the whole list of keys returned."),
new FunctionParameterSequenceType("max-number-returned", Type.INT, Cardinality.EXACTLY_ONE, "The maximum number of returned keys"),
new FunctionParameterSequenceType("index", Type.STRING, Cardinality.EXACTLY_ONE, "The index in which the search is made")
},
new FunctionReturnSequenceType(Type.ITEM, Cardinality.ZERO_OR_MORE, "the results of the eval of the $function-reference"))
};
/**
* @param context
*/
public IndexKeys(XQueryContext context, FunctionSignature signature) {
super(context, signature);
}
/*
* (non-Javadoc)
*
* @see org.exist.xquery.BasicFunction#eval(org.exist.xquery.value.Sequence[],
* org.exist.xquery.value.Sequence)
*/
public Sequence eval(Sequence[] args, Sequence contextSequence)
throws XPathException {
if (args[0].isEmpty())
return Sequence.EMPTY_SEQUENCE;
NodeSet nodes = args[0].toNodeSet();
DocumentSet docs = nodes.getDocumentSet();
FunctionReference ref = (FunctionReference) args[2].itemAt(0);
int max = ((IntegerValue) args[3].itemAt(0)).getInt();
FunctionCall call = ref.getFunctionCall();
Sequence result = new ValueSequence();
if (this.getArgumentCount() == 5) {
IndexWorker indexWorker = context.getBroker().getIndexController().getWorkerByIndexName(args[4].itemAt(0).getStringValue());
//Alternate design
//IndexWorker indexWorker = context.getBroker().getBrokerPool().getIndexManager().getIndexByName(args[4].itemAt(0).getStringValue()).getWorker();
if (indexWorker == null)
throw new XPathException(this, "Unknown index: " + args[4].itemAt(0).getStringValue());
Map hints = new HashMap();
hints.put(IndexWorker.VALUE_COUNT, new IntegerValue(max));
if (indexWorker instanceof OrderedValuesIndex)
hints.put(OrderedValuesIndex.START_VALUE, args[1]);
else
logger.warn(indexWorker.getClass().getName() + " isn't an instance of org.exist.indexing.OrderedIndexWorker. Start value '" + args[1] + "' ignored." );
Occurrences[] occur = indexWorker.scanIndex(context, docs, nodes, hints);
//TODO : add an extra argument to pass the END_VALUE ?
int len = (occur.length > max ? max : occur.length);
Sequence params[] = new Sequence[2];
ValueSequence data = new ValueSequence();
for (int j = 0; j < len; j++) {
params[0] = new StringValue(occur[j].getTerm().toString());
data.add(new IntegerValue(occur[j].getOccurrences(),
Type.UNSIGNED_INT));
data.add(new IntegerValue(occur[j].getDocuments(),
Type.UNSIGNED_INT));
data.add(new IntegerValue(j + 1, Type.UNSIGNED_INT));
params[1] = data;
result.addAll(call.evalFunction(contextSequence, null, params));
data.clear();
}
} else {
int idxType = nodes.getIndexType();
Indexable indexable = (Indexable) args[1].itemAt(0);
ValueOccurrences occur[] = null;
// First check for indexes defined on qname
QName[] qnames = getDefinedIndexes(context.getBroker(), docs);
if (qnames != null && qnames.length > 0)
occur = context.getBroker().getValueIndex().scanIndexKeys(docs, nodes, qnames, indexable);
// Also check if there's an index defined by path
ValueOccurrences occur2[] = context.getBroker().getValueIndex().scanIndexKeys(docs, nodes, indexable);
// Merge the two results
if (occur == null || occur.length == 0)
occur = occur2;
else {
ValueOccurrences t[] = new ValueOccurrences[occur.length + occur2.length];
System.arraycopy(occur, 0, t, 0, occur.length);
System.arraycopy(occur2, 0, t, occur.length, occur2.length);
occur = t;
}
int len = (occur.length > max ? max : occur.length);
Sequence params[] = new Sequence[2];
ValueSequence data = new ValueSequence();
for (int j = 0; j < len; j++) {
params[0] = occur[j].getValue();
data.add(new IntegerValue(occur[j].getOccurrences(),
Type.UNSIGNED_INT));
data.add(new IntegerValue(occur[j].getDocuments(),
Type.UNSIGNED_INT));
data.add(new IntegerValue(j + 1, Type.UNSIGNED_INT));
params[1] = data;
result.addAll(call.evalFunction(contextSequence, null, params));
data.clear();
}
}
logger.debug("Returning: " + result.getItemCount());
return result;
}
/**
* Check index configurations for all collection in the given DocumentSet and return
* a list of QNames, which have indexes defined on them.
*
* @param broker
* @param docs
* @return
*/
private QName[] getDefinedIndexes(DBBroker broker, DocumentSet docs) {
Set indexes = new HashSet();
for (Iterator i = docs.getCollectionIterator(); i.hasNext(); ) {
final org.exist.collections.Collection collection = (org.exist.collections.Collection) i.next();
final IndexSpec idxConf = collection.getIndexConfiguration(broker);
if (idxConf != null) {
final List qnames = idxConf.getIndexedQNames();
for (int j = 0; j < qnames.size(); j++) {
final QName qName = (QName) qnames.get(j);
indexes.add(qName);
}
}
}
QName qnames[] = new QName[indexes.size()];
return (QName[]) indexes.toArray(qnames);
}
}