/*
* eXist Open Source Native XML Database
* Copyright (C) 2009 The eXist Project
* http://exist-db.org
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program; if not, write to the Free Software Foundation
* Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*
* $Id$
*/
package org.exist.xquery.functions.util;
import java.io.IOException;
import java.util.ArrayList;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import javax.xml.stream.XMLStreamException;
import javax.xml.stream.XMLStreamReader;
import org.apache.log4j.Logger;
import org.exist.EXistException;
import org.exist.dom.DocumentImpl;
import org.exist.dom.QName;
import org.exist.dom.StoredNode;
import org.exist.stax.EmbeddedXMLStreamReader;
import org.exist.storage.BrokerPool;
import org.exist.storage.DBBroker;
import org.exist.xquery.BasicFunction;
import org.exist.xquery.Cardinality;
import org.exist.xquery.FunctionSignature;
import org.exist.xquery.XPathException;
import org.exist.xquery.XQueryContext;
import org.exist.xquery.value.FunctionParameterSequenceType;
import org.exist.xquery.value.FunctionReturnSequenceType;
import org.exist.xquery.value.NodeValue;
import org.exist.xquery.value.Sequence;
import org.exist.xquery.value.SequenceType;
import org.exist.xquery.value.StringValue;
import org.exist.xquery.value.Type;
import org.exist.xquery.value.ValueSequence;
import org.w3c.dom.NamedNodeMap;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
/**
* Delivers the fragment between two nodes (normally milestones) of a document.
* It leads to more performance for most XML documents because it
* determines the fragment directly by the EmbeddedXmlReader and not by
* XQL operators.
* @author Josef Willenborg, Max Planck Institute for the history of science,
* http://www.mpiwg-berlin.mpg.de, jwillenborg@mpiwg-berlin.mpg.de
*/
public class GetFragmentBetween extends BasicFunction {
protected static final Logger logger = Logger.getLogger(GetFragmentBetween.class);
public final static FunctionSignature signature =
new FunctionSignature(
new QName("get-fragment-between", UtilModule.NAMESPACE_URI, UtilModule.PREFIX),
"Returns an xml fragment or a sequence of nodes between two elements (normally milestone elements). " +
"The $beginning-node represents the first node/milestone element, $ending-node, the second one. " +
"The third argument, $make-fragment, is " +
"a boolean value for the path completion. If it is set to true() the " +
"result sequence is wrapped into a parent element node. " +
"Example call of the function for getting the fragment between two TEI page break element nodes: " +
" let $fragment := util:get-fragment-between(//pb[1], //pb[2], true())" ,
new SequenceType[] {
new FunctionParameterSequenceType("beginning-node", Type.NODE, Cardinality.ZERO_OR_ONE, "The first node/milestone element"),
new FunctionParameterSequenceType("ending-node", Type.NODE, Cardinality.ZERO_OR_ONE, "The second node/milestone element"),
new FunctionParameterSequenceType("make-fragment", Type.BOOLEAN, Cardinality.ZERO_OR_ONE, "The flag make a fragment.")
},
new FunctionReturnSequenceType(Type.STRING, Cardinality.ONE, "the string containing the fragments between the two node/milestone elements."));
public GetFragmentBetween(XQueryContext context) {
super(context, signature);
}
/**
* Get the fragment between two elements (normally milestone elements) of a document
* @param args 1. first node (e.g. pb[10]) 2. second node (e.g.: pb[11]) 3. pathCompletion:
* open and closing tags before and after the fragment are appended (Default: true)
* @return the fragment between the two nodes
* @throws XPathException
*/
public Sequence eval(Sequence[] args, Sequence contextSequence) throws XPathException {
Sequence ms1 = args[0];
Sequence ms2 = args[1];
if (ms1.isEmpty()) {
throw new XPathException(this, "your first argument delivers an empty node (no valid node position in document)");
}
Node ms1Node = null;
if (! (ms1.itemAt(0) == null))
ms1Node = ((NodeValue) ms1.itemAt(0)).getNode();
Node ms2Node = null;
if (! (ms2.itemAt(0) == null))
ms2Node = ((NodeValue) ms2.itemAt(0)).getNode();
Sequence seqPathCompletion = args[2];
boolean pathCompletion = true; // default
if (! (seqPathCompletion.itemAt(0) == null)) {
pathCompletion = seqPathCompletion.effectiveBooleanValue();
}
// fetch the fragment between the two milestones
StringBuilder fragment = getFragmentBetween(ms1Node, ms2Node);
if (pathCompletion) {
String msFromPathName = getNodeXPath(ms1Node.getParentNode());
String openElementsOfMsFrom = pathName2XmlTags(msFromPathName, "open");
String closingElementsOfMsTo = "";
if (!(ms2Node == null)) {
String msToPathName = getNodeXPath(ms2Node.getParentNode());
closingElementsOfMsTo = pathName2XmlTags(msToPathName, "close");
}
fragment.insert(0, openElementsOfMsFrom);
fragment.append(closingElementsOfMsTo);
}
StringValue strValFragment = new StringValue(fragment.toString());
ValueSequence resultFragment = new ValueSequence();
resultFragment.add(strValFragment);
return resultFragment;
}
/**
* Fetch the fragment between two nodes (normally milestones) in an XML document
* @param node1 first node from which down to the node node2 the XML fragment is delivered as a string
* @param node2 the node to which down the XML fragment is delivered as a string
* @return fragment between the two nodes
* @throws XPathException
*/
private StringBuilder getFragmentBetween(Node node1, Node node2) throws XPathException {
StoredNode storedNode1 = (StoredNode) node1;
StoredNode storedNode2 = (StoredNode) node2;
String node1NodeId = storedNode1.getNodeId().toString();
String node2NodeId = "-1";
if (! (node2 == null))
node2NodeId = storedNode2.getNodeId().toString();
DocumentImpl docImpl = (DocumentImpl) node1.getOwnerDocument();
BrokerPool brokerPool = null;
DBBroker dbBroker = null;
StringBuilder resultFragment = new StringBuilder("");
String actualNodeId = "-2";
boolean getFragmentMode = false;
try {
brokerPool = docImpl.getBrokerPool();
dbBroker = brokerPool.get(null);
EmbeddedXMLStreamReader reader = null;
NodeList children = docImpl.getChildNodes();
for (int i = 0; i < children.getLength(); i++) {
StoredNode docChildStoredNode = (StoredNode) children.item(i);
reader = dbBroker.getXMLStreamReader(docChildStoredNode, false);
while (reader.hasNext() && ! node2NodeId.equals(actualNodeId)) {
int status = reader.next();
switch (status) {
case XMLStreamReader.START_DOCUMENT:
case XMLStreamReader.END_DOCUMENT:
break;
case XMLStreamReader.START_ELEMENT :
actualNodeId = reader.getNode().getNodeId().toString();
if (actualNodeId.equals(node1NodeId))
getFragmentMode = true;
if (actualNodeId.equals(node2NodeId))
getFragmentMode = false;
if (getFragmentMode) {
String startElementTag = getStartElementTag(reader);
resultFragment.append(startElementTag);
}
break;
case XMLStreamReader.END_ELEMENT :
if (getFragmentMode) {
String endElementTag = getEndElementTag(reader);
resultFragment.append(endElementTag);
}
break;
case XMLStreamReader.CHARACTERS :
if (getFragmentMode) {
String characters = getCharacters(reader);
resultFragment.append(characters);
}
break;
case XMLStreamReader.CDATA :
if (getFragmentMode) {
String cdata = getCDataTag(reader);
resultFragment.append(cdata);
}
break;
case XMLStreamReader.COMMENT :
if (getFragmentMode) {
String comment = getCommentTag(reader);
resultFragment.append(comment);
}
break;
case XMLStreamReader.PROCESSING_INSTRUCTION :
if (getFragmentMode) {
String piTag = getPITag(reader);
resultFragment.append(piTag);
}
break;
}
}
}
} catch (EXistException e) {
throw new XPathException(this, "An error occurred while getFragmentBetween: " + e.getMessage(), e);
} catch (XMLStreamException e) {
throw new XPathException(this, "An error occurred while getFragmentBetween: " + e.getMessage(), e);
} catch (IOException e) {
throw new XPathException(this, "An error occurred while getFragmentBetween: " + e.getMessage(), e);
} finally {
if (brokerPool != null)
brokerPool.release(dbBroker);
}
return resultFragment;
}
private String getStartElementTag(EmbeddedXMLStreamReader reader) {
String elemName = reader.getLocalName();
String elemAttrString = "";
String elemNsString ="";
int nsCount = reader.getNamespaceCount();
for (int ni = 0; ni < nsCount; ni++) {
String nsPrefix = reader.getNamespacePrefix(ni);
String nsUri = reader.getNamespaceURI(ni);
String nsString = "xmlns:" + nsPrefix + "=\"" + nsUri + "\"";
if (nsPrefix != null && nsPrefix.equals(""))
nsString = "xmlns" + "=\"" + nsUri + "\"";
elemNsString = elemNsString + " " +nsString;
}
int attrCount = reader.getAttributeCount();
for (int j = 0; j < attrCount; j++) {
String attrNamePrefix = reader.getAttributePrefix(j);
String attrName = reader.getAttributeLocalName(j);
String attrValue = reader.getAttributeValue(j);
String attrString = "";
if (! (attrNamePrefix == null || attrNamePrefix.length() == 0))
attrString = attrNamePrefix + ":";
if (attrName.toLowerCase().equals("href")) {
attrValue = escape(attrValue);
}
attrString = attrString + attrName + "=\"" + attrValue + "\"";
elemAttrString = elemAttrString + " " + attrString;
}
String elemPrefix = reader.getPrefix();
String elemPart = "";
if (! (elemPrefix == null || elemPrefix.length() == 0))
elemPart = elemPrefix + ":";
elemPart = elemPart + elemName;
String elementString = "<" + elemPart + elemNsString + elemAttrString + ">";
return elementString;
}
private String getEndElementTag(EmbeddedXMLStreamReader reader) {
String elemName = reader.getLocalName();
String elemPrefix = reader.getPrefix();
String elemPart = "";
if (! (elemPrefix == null || elemPrefix.length() == 0))
elemPart = elemPrefix + ":";
elemPart = elemPart + elemName;
return "</" + elemPart + ">";
}
private String getCharacters(EmbeddedXMLStreamReader reader) {
String xmlChars = reader.getText();
xmlChars = escape(xmlChars);
return xmlChars;
}
private String getCDataTag(EmbeddedXMLStreamReader reader) {
char[] chars = reader.getTextCharacters();
return "<![CDATA[\n" + new String(chars) + "\n]]>";
}
private String getCommentTag(EmbeddedXMLStreamReader reader) {
char[] chars = reader.getTextCharacters();
return "<!--" + new String(chars) + "-->";
}
private String getPITag(EmbeddedXMLStreamReader reader) {
String piTarget = reader.getPITarget();
String piData = reader.getPIData();
if (! (piData == null || piData.length() == 0))
piData = " " + piData;
else
piData = "";
return "<?" + piTarget + piData + "?>";
}
private String escape(String inputStr) {
StringBuilder resultStrBuf = new StringBuilder();
for (int i = 0; i < inputStr.length(); i++) {
char ch = inputStr.charAt(i);
switch (ch) {
case '<' :
resultStrBuf.append("<");
break;
case '>' :
resultStrBuf.append(">");
break;
case '&' :
resultStrBuf.append("&");
break;
case '\"' :
resultStrBuf.append(""");
break;
case '\'' :
resultStrBuf.append("'");
break;
default:
resultStrBuf.append(ch);
break;
}
}
return resultStrBuf.toString();
}
/**
* A path name delivered by function xnode-path (with special strings such as
* "@", "[", "]", " eq ") is converted to an XML String with xml tags,
* opened or closed such as the mode says
* @param pathName delivered by function xnode-path: Example: /archimedes[@xmlns:xlink eq "http://www.w3.org/1999/xlink"]/text/body/chap/p[@type eq "main"]/s/foreign[@lang eq "en"]
* @param mode open or close
* @return xml tags opened or closed
*/
private String pathName2XmlTags(String pathName, String mode) {
String result = "";
ArrayList<String> elements = pathName2ElementsWithAttributes(pathName);
if (mode.equals("open")) {
for (int i=0; i < elements.size(); i++) {
String element = elements.get(i);
element = element.replaceAll("\\[", " "); // opening element: replace open bracket with space
element = element.replaceAll(" eq ", "="); // opening element: remove @ character
element = element.replaceAll("@", ""); // opening element: remove @ character
element = element.replaceAll("\\]", ""); // opening element: remove closing bracket
if (! (element.length() == 0))
result += "<" + element + ">\n";
}
} else if (mode.equals("close")) {
for (int i=elements.size()-1; i >= 0; i--) {
String element = elements.get(i);
element = element.replaceAll("\\[[^\\]]*\\]", ""); // closing element: remove brackets with attributes
if (! (element.length() == 0))
result += "</" + element + ">\n";
}
}
return result;
}
private ArrayList<String> pathName2ElementsWithAttributes(String pathName) {
ArrayList<String> result = new ArrayList<String>();
if (pathName.charAt(0) == '/')
pathName = pathName.substring(1, pathName.length()); // without first "/" character
String regExpr = "[a-zA-Z0-9]+?\\[.+?\\]/" + "|" + "[a-zA-Z0-9]+?/" + "|" + "[a-zA-Z0-9]+?\\[.+\\]$" + "|" + "[a-zA-Z0-9]+?$"; // pathName example: "/archimedes[@xmlns:xlink eq "http://www.w3.org/1999/xlink"]/text/body/chap/p[@type eq "main"]/s/foreign[@lang eq "en"]"
Pattern p = Pattern.compile(regExpr, Pattern.CASE_INSENSITIVE | Pattern.MULTILINE); // both flags enabled
Matcher m = p.matcher(pathName);
while (m.find()) {
int msBeginPos = m.start();
int msEndPos = m.end();
String elementName = pathName.substring(msBeginPos, msEndPos);
int elemNameSize = elementName.length();
if (elemNameSize > 0 && elementName.charAt(elemNameSize - 1) == '/')
elementName = elementName.substring(0, elemNameSize - 1); // without last "/" character
result.add(elementName);
}
return result;
}
private String getNodeXPath(Node n) {
//if at the document level just return /
if(n.getNodeType() == Node.DOCUMENT_NODE)
return "/";
/* walk up the node hierarchy
* - node names become path names
* - attributes become predicates
*/
StringBuilder buf = new StringBuilder(nodeToXPath(n));
while((n = n.getParentNode()) != null) {
if(n.getNodeType() == Node.ELEMENT_NODE) {
buf.insert(0, nodeToXPath(n));
}
}
return buf.toString();
}
/**
* Creates an XPath for a Node
* The nodes attribute's become predicates
*
* @param n The Node to generate an XPath for
* @return StringBuilder containing the XPath
*/
private StringBuilder nodeToXPath(Node n) {
StringBuilder xpath = new StringBuilder("/" + getFullNodeName(n));
NamedNodeMap attrs = n.getAttributes();
for(int i = 0; i < attrs.getLength(); i++) {
Node attr = attrs.item(i);
String fullNodeName = getFullNodeName(attr);
String attrNodeValue = attr.getNodeValue();
if (!fullNodeName.equals("") && (! (fullNodeName == null)))
xpath.append("[@" + fullNodeName + " eq \"" + attrNodeValue + "\"]");
}
return xpath;
}
/**
* Returns the full node name including the prefix if present
*
* @param n The node to get the name for
* @return The full name of the node
*/
private String getFullNodeName(Node n) {
String prefix = n.getPrefix();
String localName = n.getLocalName();
if (prefix == null || prefix.equals("")) {
if (localName == null || localName.equals(""))
return "";
else
return localName;
} else {
if (localName == null || localName.equals(""))
return "";
else
return prefix + ":" + localName;
}
}
}