/* * eXist Open Source Native XML Database * Copyright (C) 2005-2017 The eXist Project * http://exist-db.org * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public License * as published by the Free Software Foundation; either version 2 * of the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public License * along with this program; if not, write to the Free Software Foundation * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ package org.exist.xquery.util; import java.io.FileNotFoundException; import java.io.IOException; import java.io.InputStream; import java.net.ConnectException; import java.net.HttpURLConnection; import java.net.MalformedURLException; import java.net.URISyntaxException; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import org.exist.Namespaces; import org.exist.dom.persistent.DocumentImpl; import org.exist.dom.persistent.NodeProxy; import org.exist.dom.memtree.SAXAdapter; import org.exist.security.Permission; import org.exist.security.PermissionDeniedException; import org.exist.storage.BrokerPool; import org.exist.storage.lock.Lock.LockMode; import org.exist.xmldb.XmldbURI; import org.exist.xquery.XPathException; import org.exist.xquery.XQueryContext; import org.exist.xquery.value.Sequence; import org.exist.source.Source; import org.exist.source.SourceFactory; import org.exist.source.URLSource; import org.w3c.dom.Document; import org.xml.sax.InputSource; import org.xml.sax.SAXException; import org.xml.sax.SAXNotRecognizedException; import org.xml.sax.SAXNotSupportedException; import org.xml.sax.XMLReader; /** * Utilities for XPath doc related functions * * @author wolf * @author Pierrick Brihaye <pierrick.brihaye@free.fr> */ //TODO : many more improvements to handle efficiently any URI public class DocUtils { protected static final Logger LOG = LogManager.getLogger(DocUtils.class); public static Sequence getDocument(final XQueryContext context, final String path) throws XPathException, PermissionDeniedException { return getDocumentByPath(context, path); } public static boolean isDocumentAvailable(final XQueryContext context, final String path) throws XPathException { try { final Sequence seq = getDocumentByPath(context, path); return (seq != null && seq.effectiveBooleanValue()); } catch (final PermissionDeniedException e) { return false; } } private static Sequence getDocumentByPath(final XQueryContext context, final String path) throws XPathException, PermissionDeniedException { if (path.matches("^[a-z]+:.*") && !path.startsWith("xmldb:")) { /* URL */ return getDocumentByPathFromURL(context, path); } else { /* Database documents */ return getDocumentByPathFromDB(context, path); } } private static Sequence getDocumentByPathFromURL(final XQueryContext context, final String path) throws XPathException, PermissionDeniedException { XMLReader reader = null; /* URL */ try { final Source source = SourceFactory.getSource(context.getBroker(), "", path, false); try (final InputStream is = source.getInputStream()) { if (source instanceof URLSource) { final int responseCode = ((URLSource) source).getResponseCode(); if (responseCode == HttpURLConnection.HTTP_NOT_FOUND) { // Special case: '404' return Sequence.EMPTY_SEQUENCE; } else if (responseCode != HttpURLConnection.HTTP_OK) { throw new PermissionDeniedException("Server returned code " + responseCode); } } //TODO : process pseudo-protocols URLs more efficiently. org.exist.dom.memtree.DocumentImpl memtreeDoc = null; // we use eXist's in-memory DOM implementation reader = context.getBroker().getBrokerPool().getParserPool().borrowXMLReader(); //TODO : we should be able to cope with context.getBaseURI() final InputSource src = new InputSource(is); final SAXAdapter adapter = new SAXAdapter(); reader.setContentHandler(adapter); reader.parse(src); final Document doc = adapter.getDocument(); memtreeDoc = (org.exist.dom.memtree.DocumentImpl) doc; memtreeDoc.setContext(context); memtreeDoc.setDocumentURI(path); return memtreeDoc; } } catch (final ConnectException e) { // prevent long stack traces throw new XPathException(e.getMessage() + " (" + path + ")"); } catch (final MalformedURLException e) { throw new XPathException(e.getMessage(), e); } catch (final SAXException e) { throw new XPathException("An error occurred while parsing " + path + ": " + e.getMessage(), e); } catch (final IOException e) { // Special case: FileNotFoundException if (e instanceof FileNotFoundException) { return Sequence.EMPTY_SEQUENCE; } else { throw new XPathException("An error occurred while parsing " + path + ": " + e.getMessage(), e); } } finally { if (reader != null) { context.getBroker().getBrokerPool().getParserPool().returnXMLReader(reader); } } } private static Sequence getDocumentByPathFromDB(final XQueryContext context, final String path) throws XPathException, PermissionDeniedException { // check if the loaded documents should remain locked final LockMode lockType = context.lockDocumentsOnLoad() ? LockMode.WRITE_LOCK : LockMode.READ_LOCK; DocumentImpl doc = null; try { XmldbURI pathUri = XmldbURI.xmldbUriFor(path, false); final XmldbURI baseURI = context.getBaseURI().toXmldbURI(); if (baseURI != null && !(baseURI.equals("") || baseURI.equals("/db"))) { // relative collection Path: add the current base URI pathUri = baseURI.resolveCollectionPath(pathUri); } // relative collection Path: add the current module call URI try { pathUri = XmldbURI.xmldbUriFor(context.getModuleLoadPath()).resolveCollectionPath(pathUri); } catch (final Exception e) { //workaround: ignore Windows issue LOG.error(e); } // try to open the document and acquire a lock doc = context.getBroker().getXMLResource(pathUri, lockType); if (doc == null) { return Sequence.EMPTY_SEQUENCE; } else { if (!doc.getPermissions().validate(context.getSubject(), Permission.READ)) { throw new PermissionDeniedException("Insufficient privileges to read resource " + path); } if (doc.getResourceType() == DocumentImpl.BINARY_FILE) { throw new XPathException("Document " + path + " is a binary resource, not an XML document. Please consider using the function util:binary-doc() to retrieve a reference to it."); } return new NodeProxy(doc); } } catch (final URISyntaxException e) { throw new XPathException(e); } finally { // release all locks unless if (doc != null) { doc.getUpdateLock().release(lockType); } } } /** * Utility function to parse an input stream into an in-memory DOM document. * * @param context The XQuery context * @param is The input stream to parse from * @return document The document that was parsed * @throws XPathException */ public static org.exist.dom.memtree.DocumentImpl parse(final XQueryContext context, final InputStream is) throws XPathException { return parse(context.getBroker().getBrokerPool(), context, is); } /** * Utility function to parse an input stream into an in-memory DOM document. * * @param pool The broker pool * @param context The XQuery context * @param is The input stream to parse from * @return document The document that was parsed * @throws XPathException */ public static org.exist.dom.memtree.DocumentImpl parse(final BrokerPool pool, final XQueryContext context, final InputStream is) throws XPathException { // we use eXist's in-memory DOM implementation final XMLReader reader = pool.getParserPool().borrowXMLReader(); final InputSource src = new InputSource(is); final SAXAdapter adapter = new SAXAdapter(context); reader.setContentHandler(adapter); try { reader.setProperty(Namespaces.SAX_LEXICAL_HANDLER, adapter); reader.parse(src); } catch (final SAXNotRecognizedException | SAXNotSupportedException e) { throw new XPathException("Error creating XML parser: " + e.getMessage(), e); } catch (final IOException | SAXException e) { throw new XPathException("Error while parsing XML: " + e.getMessage(), e); } return adapter.getDocument(); } }