/* * eXist Open Source Native XML Database * Copyright (C) 2001-04 Wolfgang M. Meier * wolfgang@exist-db.org * http://exist-db.org * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public License * as published by the Free Software Foundation; either version 2 * of the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. * * $Id$ */ package org.exist.storage.serializers; import org.exist.Namespaces; import org.exist.dom.persistent.AttrImpl; import org.exist.dom.persistent.CDATASectionImpl; import org.exist.dom.persistent.CommentImpl; import org.exist.dom.persistent.DocumentImpl; import org.exist.dom.persistent.DocumentTypeImpl; import org.exist.dom.persistent.ElementImpl; import org.exist.dom.persistent.IStoredNode; import org.exist.dom.persistent.Match; import org.exist.dom.persistent.NodeProxy; import org.exist.dom.persistent.ProcessingInstructionImpl; import org.exist.dom.QName; import org.exist.dom.persistent.TextImpl; import org.exist.numbering.NodeId; import org.exist.storage.DBBroker; import org.exist.util.Configuration; import org.exist.util.serializer.AttrList; import org.exist.xquery.value.Type; import org.w3c.dom.Node; import org.w3c.dom.NodeList; import org.xml.sax.SAXException; import java.io.IOException; import java.util.Iterator; import java.util.List; import java.util.Set; import java.util.TreeSet; import java.util.regex.Matcher; import java.util.regex.Pattern; import org.exist.storage.dom.INodeIterator; /** * Serializer implementation for the native database backend. * * @author wolf */ public class NativeSerializer extends Serializer { // private final static AttributesImpl EMPTY_ATTRIBUTES = new AttributesImpl(); private final static QName TEXT_ELEMENT = new QName("text", Namespaces.EXIST_NS, Namespaces.EXIST_NS_PREFIX); private final static QName ATTRIB_ELEMENT = new QName("attribute", Namespaces.EXIST_NS, Namespaces.EXIST_NS_PREFIX); private final static QName SOURCE_ATTRIB = new QName("source", Namespaces.EXIST_NS, Namespaces.EXIST_NS_PREFIX); private final static QName ID_ATTRIB = new QName("id", Namespaces.EXIST_NS, Namespaces.EXIST_NS_PREFIX); private final static QName MATCHES_ATTRIB = new QName("matches", Namespaces.EXIST_NS, Namespaces.EXIST_NS_PREFIX); private final static QName MATCHES_OFFSET_ATTRIB = new QName("matches-offset", Namespaces.EXIST_NS, Namespaces.EXIST_NS_PREFIX); private final static QName MATCHES_LENGTH_ATTRIB = new QName("matches-length", Namespaces.EXIST_NS, Namespaces.EXIST_NS_PREFIX); private final static Pattern P_ZERO_VALUES = Pattern.compile("0(,0)?"); private final static Matcher M_ZERO_VALUES = P_ZERO_VALUES.matcher(""); public NativeSerializer(DBBroker broker, Configuration config) { this(broker, config, null); } public NativeSerializer(DBBroker broker, Configuration config, List<String> chainOfReceivers) { super(broker, config, chainOfReceivers); } protected void serializeToReceiver(NodeProxy p, boolean generateDocEvent, boolean checkAttributes) throws SAXException { if(Type.subTypeOf(p.getType(), Type.DOCUMENT) || p.getNodeId() == NodeId.DOCUMENT_NODE) { serializeToReceiver(p.getOwnerDocument(), generateDocEvent); return; } setDocument(p.getOwnerDocument()); if (generateDocEvent) { receiver.startDocument(); } try(final INodeIterator domIter = broker.getNodeIterator(p)) { serializeToReceiver(null, domIter, p.getOwnerDocument(), checkAttributes, p.getMatches(), new TreeSet<String>()); } catch(final IOException e) { LOG.warn("Unable to close node iterator", e); } if(generateDocEvent) { receiver.endDocument(); } } protected void serializeToReceiver(DocumentImpl doc, boolean generateDocEvent) throws SAXException { final long start = System.currentTimeMillis(); setDocument(doc); final NodeList children = doc.getChildNodes(); if (generateDocEvent) {receiver.startDocument();} if (doc.getDoctype() != null){ if ("yes".equals(getProperty(EXistOutputKeys.OUTPUT_DOCTYPE, "no"))) { final DocumentTypeImpl docType = (DocumentTypeImpl)doc.getDoctype(); serializeToReceiver(docType, null, docType.getOwnerDocument(), true, null, new TreeSet<String>()); } } // iterate through children for (int i = 0; i < children.getLength(); i++) { final IStoredNode<?> node = (IStoredNode<?>) children.item(i); try(final INodeIterator domIter = broker.getNodeIterator(node)) { domIter.next(); final NodeProxy p = new NodeProxy(node); serializeToReceiver(node, domIter, (DocumentImpl) node.getOwnerDocument(), true, p.getMatches(), new TreeSet<String>()); } catch(final IOException ioe) { LOG.warn("Unable to close node iterator", ioe); } } if (generateDocEvent) {receiver.endDocument();} if (LOG.isDebugEnabled()) {LOG.debug("serializing document " + doc.getDocId() + " (" + doc.getURI() + ")" + " to SAX took " + (System.currentTimeMillis() - start) + " msec");} } protected void serializeToReceiver(IStoredNode node, INodeIterator iter, DocumentImpl doc, boolean first, Match match, Set<String> namespaces) throws SAXException { if (node == null && iter.hasNext()) { node = iter.next(); } if (node == null) { return; } // char ch[]; String cdata; switch (node.getNodeType()) { case Node.ELEMENT_NODE: receiver.setCurrentNode(node); String defaultNS = null; if (((ElementImpl) node).declaresNamespacePrefixes()) { // declare namespaces used by this element String prefix, uri; for (final Iterator<String> i = ((ElementImpl) node).getPrefixes(); i.hasNext();) { prefix = i.next(); if (prefix.length() == 0) { defaultNS = ((ElementImpl) node).getNamespaceForPrefix(prefix); receiver.startPrefixMapping("", defaultNS); namespaces.add(defaultNS); } else { uri = ((ElementImpl) node).getNamespaceForPrefix(prefix); receiver.startPrefixMapping(prefix, uri); namespaces.add(uri); } } } final String ns = defaultNS == null ? node.getNamespaceURI() : defaultNS; if (ns.length() > 0 && (!namespaces.contains(ns))) {receiver.startPrefixMapping(node.getPrefix(), ns);} final AttrList attribs = new AttrList(); if ((first && showId == EXIST_ID_ELEMENT) || showId == EXIST_ID_ALL) { attribs.addAttribute(ID_ATTRIB, node.getNodeId().toString()); /* * This is a proposed fix-up that the serializer could do * to make sure elements always have the namespace declarations * } else { // This is fix-up for when the node has a namespace but there is no // namespace declaration. String elementNS = node.getNamespaceURI(); Node parent = node.getParentNode(); if (parent instanceof ElementImpl) { ElementImpl parentElement = (ElementImpl)parent; String declaredNS = parentElement.getNamespaceForPrefix(node.getPrefix()); if (elementNS!=null && declaredNS==null) { // We need to declare the prefix as it was missed somehow receiver.startPrefixMapping(node.getPrefix(), elementNS); } else if (elementNS==null && declaredNS!=null) { // We need to declare the default namespace to be the no namespace receiver.startPrefixMapping(node.getPrefix(), elementNS); } else if (!elementNS.equals(defaultNS)) { // Same prefix but different namespace receiver.startPrefixMapping(node.getPrefix(), elementNS); } } else if (elementNS!=null) { // If the parent is the document, we must have a namespace // declaration when there is a namespace URI. receiver.startPrefixMapping(node.getPrefix(), elementNS); } */ } if (first && showId > 0) { // String src = doc.getCollection().getName() + "/" + doc.getFileName(); attribs.addAttribute(SOURCE_ATTRIB, doc.getFileURI().toString()); } final int children = node.getChildCount(); int count = 0; IStoredNode child = null; StringBuilder matchAttrCdata = null; StringBuilder matchAttrOffsetsCdata = null; StringBuilder matchAttrLengthsCdata = null; while (count < children) { child = iter.hasNext() ? iter.next() : null; if (child != null && child.getNodeType() == Node.ATTRIBUTE_NODE) { if ((getHighlightingMode() & TAG_ATTRIBUTE_MATCHES) == TAG_ATTRIBUTE_MATCHES && match != null && child.getNodeId().equals(match.getNodeId())) { if(matchAttrCdata == null) { matchAttrCdata = new StringBuilder(); matchAttrOffsetsCdata = new StringBuilder(); matchAttrLengthsCdata = new StringBuilder(); } else { matchAttrCdata.append(","); matchAttrOffsetsCdata.append(","); matchAttrLengthsCdata.append(","); } matchAttrCdata.append(child.getQName().toString()); matchAttrOffsetsCdata.append(match.getOffset(0).getOffset()); matchAttrLengthsCdata.append(match.getOffset(0).getLength()); match = match.getNextMatch(); } cdata = ((AttrImpl) child).getValue(); attribs.addAttribute(child.getQName(), cdata); count++; child.release(); } else { break; } } if(matchAttrCdata != null) { attribs.addAttribute(MATCHES_ATTRIB, matchAttrCdata.toString()); //mask the full-text index which doesn't provide offset and length M_ZERO_VALUES.reset(matchAttrOffsetsCdata); final boolean offsetsIsZero = M_ZERO_VALUES.matches(); M_ZERO_VALUES.reset(matchAttrLengthsCdata); final boolean lengthsIsZero = M_ZERO_VALUES.matches(); if(!offsetsIsZero && !lengthsIsZero) { attribs.addAttribute(MATCHES_OFFSET_ATTRIB, matchAttrOffsetsCdata.toString()); attribs.addAttribute(MATCHES_LENGTH_ATTRIB, matchAttrLengthsCdata.toString()); } } receiver.setCurrentNode(node); receiver.startElement(node.getQName(), attribs); while (count < children) { serializeToReceiver(child, iter, doc, false, match, namespaces); if (++count < children) { child = iter.hasNext() ? iter.next() : null; } else {break;} } receiver.setCurrentNode(node); receiver.endElement(node.getQName()); if (((ElementImpl) node).declaresNamespacePrefixes()) { String prefix; for (final Iterator<String> i = ((ElementImpl) node).getPrefixes(); i.hasNext();) { prefix = i.next(); receiver.endPrefixMapping(prefix); } } if (ns.length() > 0 && (!namespaces.contains(ns))) {receiver.endPrefixMapping(node.getPrefix());} node.release(); break; case Node.TEXT_NODE: if (first && createContainerElements) { final AttrList tattribs = new AttrList(); if (showId > 0) { tattribs.addAttribute(ID_ATTRIB, node.getNodeId().toString()); tattribs.addAttribute(SOURCE_ATTRIB, doc.getFileURI().toString()); } receiver.startElement(TEXT_ELEMENT, tattribs); } receiver.setCurrentNode(node); receiver.characters(((TextImpl) node).getXMLString()); if (first && createContainerElements) {receiver.endElement(TEXT_ELEMENT);} node.release(); break; case Node.ATTRIBUTE_NODE: if ((getHighlightingMode() & TAG_ATTRIBUTE_MATCHES) == TAG_ATTRIBUTE_MATCHES && match != null && node.getNodeId().equals(match.getNodeId())) { //TODO(AR) do we need to expand attribute matches here also? see {@code matchAttrCdata} above } cdata = ((AttrImpl) node).getValue(); if(first) { if (createContainerElements) { final AttrList tattribs = new AttrList(); if (showId > 0) { tattribs.addAttribute(ID_ATTRIB, node.getNodeId().toString()); tattribs.addAttribute(SOURCE_ATTRIB, doc.getFileURI().toString()); } tattribs.addAttribute(node.getQName(), cdata); receiver.startElement(ATTRIB_ELEMENT, tattribs); receiver.endElement(ATTRIB_ELEMENT); } else { if (this.outputProperties.getProperty("output-method") != null && "text".equals(this.outputProperties.getProperty("output-method"))) { receiver.characters(node.getNodeValue()); } else { LOG.warn("Error SENR0001: attribute '" + node.getQName() + "' has no parent element. " + "While serializing document " + doc.getURI()); throw new SAXException("Error SENR0001: attribute '" + node.getQName() + "' has no parent element"); } } } else {receiver.attribute(node.getQName(), cdata);} node.release(); break; case Node.DOCUMENT_TYPE_NODE: final String systemId = ((DocumentTypeImpl) node).getSystemId(); final String publicId = ((DocumentTypeImpl) node).getPublicId(); final String name = ((DocumentTypeImpl) node).getName(); receiver.documentType(name, publicId, systemId); break; case Node.PROCESSING_INSTRUCTION_NODE: receiver.processingInstruction( ((ProcessingInstructionImpl) node).getTarget(), ((ProcessingInstructionImpl) node).getData()); node.release(); break; case Node.COMMENT_NODE: final String comment = ((CommentImpl) node).getData(); char data[] = new char[comment.length()]; comment.getChars(0, data.length, data, 0); receiver.comment(data, 0, data.length); node.release(); break; case Node.CDATA_SECTION_NODE: final String str = ((CDATASectionImpl)node).getData(); if (first) {receiver.characters(str);} else { data = new char[str.length()]; str.getChars(0,str.length(), data, 0); receiver.cdataSection(data, 0, data.length); } break; //TODO : how to process other types ? -pb } } }