/*
* RapidMiner
*
* Copyright (C) 2001-2014 by RapidMiner and the contributors
*
* Complete list of developers available at our web site:
*
* http://rapidminer.com
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see http://www.gnu.org/licenses/.
*/
package com.rapidminer.io.process;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.net.MalformedURLException;
import java.net.URI;
import java.net.URISyntaxException;
import java.net.URL;
import java.nio.charset.Charset;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Date;
import java.util.GregorianCalendar;
import java.util.HashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import java.util.logging.Level;
import javax.xml.XMLConstants;
import javax.xml.datatype.DatatypeConfigurationException;
import javax.xml.datatype.DatatypeFactory;
import javax.xml.datatype.XMLGregorianCalendar;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.Result;
import javax.xml.transform.Source;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerConfigurationException;
import javax.xml.transform.TransformerException;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.TransformerFactoryConfigurationError;
import javax.xml.transform.dom.DOMResult;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import javax.xml.validation.SchemaFactory;
import javax.xml.validation.Validator;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.SAXException;
import com.rapidminer.tools.I18N;
import com.rapidminer.tools.LogService;
import com.rapidminer.tools.XMLException;
/**
* This class offers several convenience methods for treating XML documents-
*
* @author Sebastian Land, Simon Fischer
*/
public class XMLTools {
private static final Map<URI, Validator> VALIDATORS = new HashMap<URI, Validator>();
private final static DocumentBuilderFactory BUILDER_FACTORY;
public static final String SCHEMA_URL_PROCESS = "http://www.rapidminer.com/xml/schema/RapidMinerProcess";
static {
DocumentBuilderFactory domFactory = DocumentBuilderFactory.newInstance();
domFactory.setNamespaceAware(true);
BUILDER_FACTORY = domFactory;
}
/**
* Creates a new {@link DocumentBuilder} instance.
*
* Needed because DocumentBuilder is not thread-safe
* and crashes when different threads try to parse at the same time.
* @return
* @throws IOException if it fails to create a {@link DocumentBuilder}
*/
private static DocumentBuilder createDocumentBuilder() throws IOException {
try {
synchronized (BUILDER_FACTORY) {
return BUILDER_FACTORY.newDocumentBuilder();
}
} catch (ParserConfigurationException e) {
LogService.getRoot().log(Level.WARNING, "Unable to create document builder", e);
throw new IOException(e);
}
}
private static Validator getValidator(URI schemaURI) throws XMLException {
if (schemaURI == null) {
throw new NullPointerException("SchemaURL is null!");
}
synchronized (VALIDATORS) {
if (VALIDATORS.containsKey(schemaURI)) {
return VALIDATORS.get(schemaURI);
} else {
SchemaFactory factory = SchemaFactory.newInstance(XMLConstants.W3C_XML_SCHEMA_NS_URI);
Validator validator;
try {
validator = factory.newSchema(schemaURI.toURL()).newValidator();
} catch (SAXException e) {
throw new XMLException("Cannot parse XML schema: "+e.getMessage(), e);
} catch (MalformedURLException e) {
throw new XMLException("Cannot parse XML schema: "+e.getMessage(), e);
}
VALIDATORS.put(schemaURI, validator);
return validator;
}
}
}
/**
* This method should not be called since it is slower than {@link #parseAndValidate(InputStream, URI, String)}
*/
public static Document parseAndValidate(InputStream in, URL schemaURL, String sourceName) throws XMLException, IOException {
try {
return parseAndValidate(in, new URI(schemaURL.toString()), sourceName);
} catch (URISyntaxException e) {
throw new XMLException("Could not resolve URL.", e);
}
}
/**
* The schema URL might be given as URI for performance reasons.
*/
public static Document parseAndValidate(InputStream in, URI schemaURL, String sourceName) throws XMLException, IOException {
XMLErrorHandler errorHandler = new XMLErrorHandler(sourceName);
Document doc;
try {
doc = createDocumentBuilder().parse(in);
} catch (SAXException e) {
throw new XMLException(errorHandler.toString(), e);
}
Source source = new DOMSource(doc);
DOMResult result = new DOMResult();
Validator validator = getValidator(schemaURL);
validator.setErrorHandler(errorHandler);
try {
validator.validate(source, result);
} catch (SAXException e) {
throw new XMLException(errorHandler.toString(), e);
}
if (errorHandler.hasErrors()) {
throw new XMLException(errorHandler.toString());
}
return (Document) result.getNode();
}
public static Document parse(String string) throws SAXException, IOException {
return createDocumentBuilder().parse(new ByteArrayInputStream(string.getBytes(Charset.forName("UTF-8"))));
// new ReaderInputStream(new StringReader(string)));
}
public static Document parse(InputStream in) throws SAXException, IOException {
return createDocumentBuilder().parse(in);
}
public static Document parse(File file) throws SAXException, IOException {
return createDocumentBuilder().parse(file);
}
public static String toString(Document document) throws XMLException {
ByteArrayOutputStream buf = new ByteArrayOutputStream();
Charset utf8 = Charset.forName("UTF-8");
stream(document, buf, utf8);
return new String(buf.toByteArray(), utf8);
}
/**
* @param document
* @param encoding
* @return
* @throws XMLException
* @deprecated use {@link #toString(Document)} instead
*/
public static String toString(Document document, Charset encoding) throws XMLException {
ByteArrayOutputStream buf = new ByteArrayOutputStream();
stream(document, buf, encoding);
return new String(buf.toByteArray(), encoding);
}
public static void stream(Document document, File file, Charset encoding) throws XMLException {
OutputStream out = null;
try {
out = new FileOutputStream(file);
stream(document, out, encoding);
} catch (IOException e) {
throw new XMLException("Cannot save XML to " + file + ": " + e, e);
} finally {
if (out != null) {
try {
out.close();
} catch (IOException e) {
}
}
}
}
public static void stream(Document document, OutputStream out, Charset encoding) throws XMLException {
stream(new DOMSource(document), out, encoding);
}
public static void stream(DOMSource source, OutputStream out, Charset encoding) throws XMLException {
// we wrap this in a Writer to fix a Java bug
// see http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=6296446
if (encoding == null) {
encoding = Charset.forName("UTF-8");
}
stream(source, new StreamResult(new OutputStreamWriter(out, encoding)), encoding);
}
public static void stream(Document document, Result result, Charset encoding) throws XMLException {
stream(new DOMSource(document), result, encoding);
}
public static void stream(DOMSource source, Result result, Charset encoding) throws XMLException {
stream(source, result, encoding, null);
}
public static void stream(DOMSource source, Result result, Charset encoding, Properties outputProperties) throws XMLException {
Transformer transformer;
try {
TransformerFactory tf = TransformerFactory.newInstance();
try {
tf.setAttribute("indent-number", Integer.valueOf(2));
} catch (IllegalArgumentException e) {
//LogService.getRoot().log(Level.WARNING, "XML transformer does not support indentation: " + e);
LogService.getRoot().log(Level.WARNING,
I18N.getMessage(LogService.getRoot().getResourceBundle(),
"com.rapidminer.io.process.XMLTools.xml_transformer_does_not_support_identation",
e));
}
transformer = tf.newTransformer();
transformer.setOutputProperty(OutputKeys.INDENT, "yes");
if (outputProperties != null)
transformer.setOutputProperties(outputProperties);
if (encoding != null) {
transformer.setOutputProperty(OutputKeys.ENCODING, encoding.name());
}
} catch (TransformerConfigurationException e) {
throw new XMLException("Cannot transform XML: " + e, e);
} catch (TransformerFactoryConfigurationError e) {
throw new XMLException("Cannot transform XML: " + e, e);
}
try {
transformer.transform(source, result);
} catch (TransformerException e) {
throw new XMLException("Cannot transform XML: " + e, e);
}
}
/**
* As {@link #getTagContents(Element, String, boolean)}, but never throws an exception. Returns null if can't
* retrieve string.
*/
public static String getTagContents(Element element, String tag) {
try {
return getTagContents(element, tag, false);
} catch (XMLException e) {
// cannot happen
return null;
}
}
public static String getTagContents(Element element, String tag, String deflt) {
String result = getTagContents(element, tag);
if (result == null) {
return deflt;
} else {
return result;
}
}
/**
* For a tag <parent> <tagName>content</tagName> <something>else</something> ... </parent>
*
* returns "content". This will return the content of the first occurring child element with name tagName. If no
* such tag exists and {@link XMLException} is thrown if throwExceptionOnError is true. Otherwise null is returned.
* */
public static String getTagContents(Element parent, String tagName, boolean throwExceptionOnError) throws XMLException {
NodeList nodeList = parent.getChildNodes();
for (int i = 0; i < nodeList.getLength(); i++) {
Node node = nodeList.item(i);
if (node instanceof Element && ((Element) node).getTagName().equals(tagName)) {
Element child = (Element) node;
return child.getTextContent();
}
}
if (throwExceptionOnError) {
throw new XMLException("Missing tag: <" + tagName + "> in <" + parent.getTagName() + ">.");
} else {
return null;
}
}
/**
* This will parse the text contents of an child element of element parent with the given tagName as integer. If no
* such child element can be found an XMLException is thrown. If more than one exists, the first is used. A {@link XMLException} is
* thrown if the text content is not a valid integer.
*/
public static int getTagContentsAsInt(Element element, String tag) throws XMLException {
final String string = getTagContents(element, tag, true);
try {
return Integer.parseInt(string);
} catch (NumberFormatException e) {
throw new XMLException("Contents of tag <" + tag + "> must be integer, but found '" + string + "'.");
}
}
/**
* This will parse the text contents of an child element of element parent with the given tagName as integer. If no
* such child element can be found, the given default value is returned. If more than one exists, the first is used. A
* {@link XMLException} is thrown if the text content is not a valid integer.
*/
public static int getTagContentsAsInt(Element element, String tag, int dfltValue) throws XMLException {
final String string = getTagContents(element, tag, false);
if (string == null) {
return dfltValue;
}
try {
return Integer.parseInt(string);
} catch (NumberFormatException e) {
throw new XMLException("Contents of tag <" + tag + "> must be integer, but found '" + string + "'.");
}
}
/**
* This will parse the text contents of an child element of element parent with the given tagName as long. If no
* such child element can be found an XMLException is thrown. If more than one exists, the first is used. A {@link XMLException} is
* thrown if the text content is not a valid long.
*/
public static long getTagContentsAsLong(Element element, String tag) throws XMLException {
final String string = getTagContents(element, tag, true);
try {
return Long.parseLong(string);
} catch (NumberFormatException e) {
throw new XMLException("Contents of tag <" + tag + "> must be integer, but found '" + string + "'.");
}
}
/**
* This will parse the text contents of an child element of element parent with the given tagName as long. If no
* such child element can be found, the given default value is returned. If more than one exists, the first is used. A
* {@link XMLException} is thrown if the text content is not a valid long.
*/
public static long getTagContentsAsLong(Element element, String tag, int dfltValue) throws XMLException {
final String string = getTagContents(element, tag, false);
if (string == null) {
return dfltValue;
}
try {
return Long.parseLong(string);
} catch (NumberFormatException e) {
throw new XMLException("Contents of tag <" + tag + "> must be integer, but found '" + string + "'.");
}
}
/**
* This will parse the text contents of an child element of element parent with the given tagName as double. If no
* such child element can be found, the given default value is returned. If more than one exists, the first is used. A
* {@link XMLException} is thrown if the text content is not a valid integer.
*/
public static double getTagContentsAsDouble(Element element, String tag, double dfltValue) throws XMLException {
final String string = getTagContents(element, tag, false);
if (string == null) {
return dfltValue;
}
try {
return Double.parseDouble(string);
} catch (NumberFormatException e) {
throw new XMLException("Contents of tag <" + tag + "> must be double, but found '" + string + "'.");
}
}
/**
* This will parse the text contents of an child element of element parent with the given tagName as boolean. If no
* such child element can be found the default is returned. If more than one exists, the first is used. A {@link NumberFormatException}
* is thrown if the text content is not a valid integer.
*/
public static boolean getTagContentsAsBoolean(Element parent, String tagName, boolean dflt) throws XMLException {
String string = getTagContents(parent, tagName, false);
if (string == null) {
return dflt;
}
try {
return Boolean.parseBoolean(string);
} catch (NumberFormatException e) {
throw new XMLException("Contents of tag <" + tagName + "> must be true or false, but found '" + string + "'.");
}
}
/**
* If parent has a direct child with the given name, the child's children are removed and are replaced by a single
* text node with the given text. If no direct child of parent with the given tag name exists, a new one is created.
*/
public static void setTagContents(Element parent, String tagName, String value) {
if (value == null) {
value = "";
}
Element child = null;
NodeList list = parent.getChildNodes();
for (int i = 0; i < list.getLength(); i++) {
Node node = list.item(i);
if (node instanceof Element) {
if (((Element) node).getTagName().equals(tagName)) {
child = (Element) node;
break;
}
}
}
if (child == null) {
child = parent.getOwnerDocument().createElement(tagName);
parent.appendChild(child);
} else {
while (child.hasChildNodes()) {
child.removeChild(child.getFirstChild());
}
}
child.appendChild(parent.getOwnerDocument().createTextNode(value));
}
/**
* This method removes all child elements with the given name of the given element.
*/
public static void deleteTagContents(Element parentElement, String name) {
NodeList children = parentElement.getElementsByTagName(name);
for (int i = 0; i < children.getLength(); i++) {
Element child = (Element) children.item(i);
parentElement.removeChild(child);
}
}
public static XMLGregorianCalendar getXMLGregorianCalendar(Date date) {
if (date == null) {
return null;
}
// Calendar calendar = Calendar.getInstance();
// calendar.setTimeInMillis(date.getTime());
DatatypeFactory datatypeFactory;
try {
datatypeFactory = DatatypeFactory.newInstance();
} catch (DatatypeConfigurationException e) {
throw new RuntimeException("Failed to create XMLGregorianCalendar: " + e, e);
}
GregorianCalendar c = new GregorianCalendar();
c.setTime(date);
return datatypeFactory.newXMLGregorianCalendar(c);
//
// XMLGregorianCalendar xmlGregorianCalendar = datatypeFactory.newXMLGregorianCalendar();
// xmlGregorianCalendar.setYear(calendar.get(Calendar.YEAR));
// xmlGregorianCalendar.setMonth(calendar.get(Calendar.MONTH) + 1);
// xmlGregorianCalendar.setDay(calendar.get(Calendar.DAY_OF_MONTH));
// xmlGregorianCalendar.setHour(calendar.get(Calendar.HOUR_OF_DAY));
// xmlGregorianCalendar.setMinute(calendar.get(Calendar.MINUTE));
// xmlGregorianCalendar.setSecond(calendar.get(Calendar.SECOND));
// xmlGregorianCalendar.setMillisecond(calendar.get(Calendar.MILLISECOND));
// // xmlGregorianCalendar.setTimezone(calendar.get(((Calendar.DST_OFFSET)+calendar.get(Calendar.ZONE_OFFSET))/(60*1000)));
// return xmlGregorianCalendar;
}
/**
* This will return the inner tag of the given element with the given tagName. If no such element can be found, or
* if there are more than one, an {@link XMLException} is thrown.
*/
public static Element getUniqueInnerTag(Element element, String tagName) throws XMLException {
return getUniqueInnerTag(element, tagName, true);
}
/**
* This method will return null if the element doesn't exist if obligatory is false. Otherwise an exception is
* thrown. If the element is not unique, an exception is thrown in any cases.
*/
public static Element getUniqueInnerTag(Element element, String tagName, boolean obligatory) throws XMLException {
NodeList children = element.getChildNodes();
Collection<Element> elements = new ArrayList<Element>();
for(int i=0; i<children.getLength(); i++) {
if(children.item(i) instanceof Element) {
Element child = (Element) children.item(i);
if(tagName.equals(child.getTagName())) {
elements.add(child);
}
}
}
switch (elements.size()) {
case 0:
if (obligatory)
throw new XMLException("Missing inner tag <" + tagName + "> inside <" + element.getTagName() + ">.");
else
return null;
case 1:
return elements.iterator().next();
default:
throw new XMLException("Inner tag <" + tagName + "> inside <" + element.getTagName() + "> must be unique, but found " + children.getLength() + ".");
}
}
/**
* This method will return a Collection of all Elements that are direct child elements of the given element and have
* the given tag name.
*/
public static Collection<Element> getChildElements(Element father, String tagName) {
LinkedList<Element> elements = new LinkedList<Element>();
NodeList list = father.getChildNodes();
for (int i = 0; i < list.getLength(); i++) {
Node node = list.item(i);
if (node instanceof Element) {
if (node.getNodeName().equals(tagName))
elements.add((Element) node);
}
}
return elements;
}
/**
* This method will return a Collection of all Elements that are direct child elements of the given element.
*/
public static Collection<Element> getChildElements(Element father) {
LinkedList<Element> elements = new LinkedList<Element>();
NodeList list = father.getChildNodes();
for (int i = 0; i < list.getLength(); i++) {
Node node = list.item(i);
if (node instanceof Element) {
elements.add((Element) node);
}
}
return elements;
}
/**
* This method will return the single inner child with the given name of the given father element. If obligatory is
* true, an Exception is thrown if the element is not present. If it's ambiguous, an execption is thrown in any
* case.
*/
public static Element getChildElement(Element father, String tagName, boolean mandatory) throws XMLException {
Collection<Element> children = getChildElements(father, tagName);
switch (children.size()) {
case 0:
if (mandatory)
throw new XMLException("Missing child tag <" + tagName + "> inside <" + father.getTagName() + ">.");
else
return null;
case 1:
return children.iterator().next();
default:
throw new XMLException("Child tag <" + tagName + "> inside <" + father.getTagName() + "> must be unique, but found " + children.size() + ".");
}
}
/**
* This is the same as {@link #getChildElement(Element, String, boolean)}, but its always
* obligatory to have the child element.
*
* @throws XMLException
*/
public static Element getUniqueChildElement(Element father, String tagName) throws XMLException {
return getChildElement(father, tagName, true);
}
/**
* This adds a single tag with the given content to the given parent element. The new tag is automatically appended.
*/
public static void addTag(Element parent, String name, String textValue) {
Element child = parent.getOwnerDocument().createElement(name);
child.setTextContent(textValue);
parent.appendChild(child);
}
/**
* Creates a new, empty document.
*/
public static Document createDocument() {
try {
DocumentBuilder builder = createDocumentBuilder();
return builder.newDocument();
} catch (IOException e) {
return null;
}
}
/**
* This will add an empty new tag to the given fatherElement with the given name.
*/
public static Element addTag(Element fatherElement, String tagName) {
Element createElement = fatherElement.getOwnerDocument().createElement(tagName);
fatherElement.appendChild(createElement);
return createElement;
}
/**
* Returns the unique child of the given element with the given tag name. This child tag must be unique, or an exception will be raised.
* If optional is false and the tag is missing, this method also raises an exception. Otherwise it returns null.
*/
public static Element getChildTag(Element element, String xmlTagName, boolean optional) throws XMLException {
NodeList children = element.getChildNodes();
Element found = null;
for (int i = 0; i < children.getLength(); i++) {
Node n = children.item(i);
if (n instanceof Element) {
if (((Element) n).getTagName().equals(xmlTagName)) {
if (found != null) {
throw new XMLException("Tag <" + xmlTagName + "> in <" + element.getTagName() + "> must be unique.");
} else {
found = (Element) n;
}
}
}
}
if (!optional && found == null) {
throw new XMLException("Tag <" + xmlTagName + "> in <" + element.getTagName() + "> is missing.");
} else {
return found;
}
}
/**
* Returns the contents of the inner tags with the given name as String array.
*/
public static String[] getChildTagsContentAsStringArray(Element father, String childElementName) {
Collection<Element> valueElements = XMLTools.getChildElements(father, childElementName);
String[] values = new String[valueElements.size()];
int i = 0;
for (Element valueElement : valueElements) {
values[i] = valueElement.getTextContent();
i++;
}
return values;
}
/**
* Returns the contents of the inner tags with the given name as int array.
*
* @throws XMLException
*/
public static int[] getChildTagsContentAsIntArray(Element father, String childElementName) throws XMLException {
Collection<Element> valueElements = XMLTools.getChildElements(father, childElementName);
int[] values = new int[valueElements.size()];
int i = 0;
for (Element valueElement : valueElements) {
try {
values[i] = Integer.valueOf(valueElement.getTextContent().trim());
} catch (NumberFormatException e) {
throw new XMLException("Invalid format for element content of type " + childElementName, e);
}
i++;
}
return values;
}
/**
* This method will get a XPath expression matching all elements given.
* This works by following this algorithm:
* 1. Check whether the last element is of same type
* Yes:
* if paths of elements are of same structure, keep it, but remove counters where necessary
* if not,
*/
public static String getXPath(Document document, Element...elements) {
Map<String, List<Element>> elementTypeElementsMap = new HashMap<String, List<Element>>();
for (Element element: elements) {
List<Element> typeElements = elementTypeElementsMap.get(element.getTagName());
if (typeElements == null) {
typeElements = new LinkedList<Element>();
elementTypeElementsMap.put(element.getTagName(), typeElements);
}
typeElements.add(element);
}
// for each single type of element build single longest common path of all elements
Element[] parentElements = new Element[elements.length];
for (int i = 0; i < elements.length; i++) {
parentElements[i] = (Element) elements[i].getParentNode();
}
return "";
}
}