/******************************************************************************* * Copyright 2012 André Rouél * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. ******************************************************************************/ package net.sf.uadetector.internal.data; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import javax.annotation.Nonnull; import net.sf.qualitycheck.Check; import net.sf.uadetector.internal.data.domain.Browser; import net.sf.uadetector.internal.data.domain.BrowserOperatingSystemMapping; import net.sf.uadetector.internal.data.domain.BrowserPattern; import net.sf.uadetector.internal.data.domain.BrowserType; import net.sf.uadetector.internal.data.domain.Device; import net.sf.uadetector.internal.data.domain.DevicePattern; import net.sf.uadetector.internal.data.domain.OperatingSystem; import net.sf.uadetector.internal.data.domain.OperatingSystemPattern; import net.sf.uadetector.internal.data.domain.Robot; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.xml.sax.Attributes; import org.xml.sax.InputSource; import org.xml.sax.SAXException; import org.xml.sax.SAXParseException; import org.xml.sax.helpers.DefaultHandler; public final class XmlDataHandler extends DefaultHandler { public enum Tag { /** * Tag name of a browser entry */ BROWSER("browser"), /** * Tag name of the ID of an browser pattern */ BROWSER_ID("browser_id"), /** * Tag name of the informational URL of a browser entry */ BROWSER_INFO_URL("browser_info_url"), /** * Tag name of a mapping entry between a browser and an operating system */ BROWSER_OS_MAPPING("browser_os"), /** * Tag name of a browser pattern */ BROWSER_PATTERN("browser_reg"), /** * Tag name of a browser type entry */ BROWSER_TYPE("browser_type"), /** * Tag name of the type ID of a browser */ BROWSER_TYPE_ID("type"), /** * Tag name of a producer company of an user agent */ COMPANY("company"), /** * Tag name of the URL of a producer company from an user agent */ COMPANY_URL("url_company"), /** * Tag name of a device */ DEVICE("device"), /** * Tag name of a device pattern */ DEVICE_ID("device_id"), /** * Tag name of the informational URL of an device entry */ DEVICE_INFO_URL("device_info_url"), /** * Tag name of a device pattern */ DEVICE_PATTERN("device_reg"), /** * Tag name of all devices */ DEVICES("devices"), /** * Tag name of all device patterns */ DEVICES_PATTERN("devices_reg"), /** * Tag name of an family of an user agent */ FAMILY("family"), /** * Tag name of the icon of an entry */ ICON("icon"), /** * Tag name of an ID of an user agent */ ID("id"), /** * Tag name of the product name of an user agent */ NAME("name"), /** * Tag name of an operating system entry */ OPERATING_SYSTEM("os"), /** * Tag name of the ID of an operating system pattern */ OPERATING_SYSTEM_ID("os_id"), /** * Tag name of the informational URL of an operating system entry */ OPERATING_SYSTEM_INFO_URL("os_info_url"), /** * Tag name of an operating system pattern */ OPERATING_SYSTEM_PATTERN("operating_system_reg"), /** * Tag name of the order of an user agent pattern */ PATTERN_ORDER("order"), /** * Tag name of the regular expression of an user agent pattern */ PATTERN_REGEX("regstring"), /** * Tag name of a robot entry */ ROBOT("robot"), /** * Tag name of the informational URL of a robot entry */ ROBOT_INFO_URL("bot_info_url"), /** * Tag name of the product URL of an user agent */ URL("url"), /** * Tag name of an user agent string of a robot entry */ USERAGENT("useragent"), /** * Tag name of the data version */ VERSION("version"); public static Tag evaluate(@Nonnull final String tagName) { Check.notNull(tagName, "tagName"); Tag result = null; for (final Tag tag : values()) { if (tag.getTagName().equalsIgnoreCase(tagName)) { result = tag; break; } } return result; } public static boolean isBrowserOsMappingTag(final String tagName) { return BROWSER_OS_MAPPING.getTagName().equalsIgnoreCase(tagName); } public static boolean isBrowserPatternTag(final String tagName) { return BROWSER_PATTERN.getTagName().equalsIgnoreCase(tagName); } public static boolean isBrowserTag(final String tagName) { return BROWSER.getTagName().equalsIgnoreCase(tagName); } public static boolean isBrowserTypeTag(final String tagName) { return BROWSER_TYPE.getTagName().equalsIgnoreCase(tagName); } public static boolean isDevicePatternTag(final String tagName) { return DEVICE_PATTERN.getTagName().equalsIgnoreCase(tagName); } public static boolean isDeviceTag(final String tagName) { return DEVICE.getTagName().equalsIgnoreCase(tagName); } public static boolean isIdTag(final String tagName) { return ID.getTagName().equalsIgnoreCase(tagName); } public static boolean isOperatingSystemPatternTag(final String tagName) { return OPERATING_SYSTEM_PATTERN.getTagName().equalsIgnoreCase(tagName); } public static boolean isOperatingSystemTag(final String tagName) { return OPERATING_SYSTEM.getTagName().equalsIgnoreCase(tagName); } public static boolean isRobotTag(final String tagName) { return ROBOT.getTagName().equalsIgnoreCase(tagName); } @Nonnull private String tagName; private Tag(@Nonnull final String tagName) { this.tagName = tagName; } @Nonnull public String getTagName() { return tagName; } } /** * Character set to read the internal Document Type Definition (DTD) of UAS data */ private static final String CHARSET = "UTF-8"; /** * Corresponding logger for this class */ private static final Logger LOG = LoggerFactory.getLogger(XmlDataHandler.class); /** * Path to the internal Document Type Definition (DTD) of UAS data files to be able to work completely offline */ protected static final String UASDATA_DEF = "uadetector/uasxmldata.dtd"; /** * URL to the Document Type Definition (DTD) of UAS data files */ protected static final String UASDATA_DEF_URL = "http://data.udger.com/uasxmldata_old.dtd"; /** * Logs an issue while parsing XML. * * @param prefix * log level as string to add at the beginning of the message * @param e * exception to log */ protected static void logParsingIssue(final String prefix, final SAXParseException e) { final StringBuilder buffer = new StringBuilder(); buffer.append(prefix); buffer.append(" while reading UAS data: "); buffer.append(e.getMessage()); buffer.append(" (line: "); buffer.append(e.getLineNumber()); if (e.getSystemId() != null) { buffer.append(" uri: "); buffer.append(e.getSystemId()); } buffer.append(")"); LOG.warn(buffer.toString()); } private Browser.Builder browserBuilder = new Browser.Builder(); private Device.Builder deviceBuilder = new Device.Builder(); private BrowserOperatingSystemMapping.Builder browserOsMappingBuilder = new BrowserOperatingSystemMapping.Builder(); private BrowserPattern.Builder browserPatternBuilder = new BrowserPattern.Builder(); private DevicePattern.Builder devicePatternBuilder = new DevicePattern.Builder(); private BrowserType.Builder browserTypeBuilder = new BrowserType.Builder(); private StringBuilder buffer = new StringBuilder(); private Tag currentTag = null; @Nonnull private final DataBuilder dataBuilder; /** * Flag to note that a fatal error occurred while parsing the document */ private boolean error = false; private boolean isBrowser = false; private boolean isBrowserOsMapping = false; private boolean isBrowserPattern = false; private boolean isBrowserType = false; private boolean isDevice = false; private boolean isDevicePattern = false; private boolean isOperatingSystem = false; private boolean isOperatingSystemPattern = false; private boolean isRobot = false; private OperatingSystem.Builder operatingSystemBuilder = new OperatingSystem.Builder(); private OperatingSystemPattern.Builder operatingSystemPatternBuilder = new OperatingSystemPattern.Builder(); private Robot.Builder robotBuilder = new Robot.Builder(); /** * Flag to note that a warning occurred while parsing the document */ private boolean warning = false; public XmlDataHandler(@Nonnull final DataBuilder builder) { Check.notNull(builder, "builder"); dataBuilder = builder; } private void addToBrowserBuilder() { if (isBrowser) { if (currentTag == Tag.ID) { browserBuilder.setId(buffer.toString()); } else if (currentTag == Tag.BROWSER_TYPE_ID) { browserBuilder.setTypeId(buffer.toString()); } else if (currentTag == Tag.NAME) { browserBuilder.setFamilyName(buffer.toString()); } else if (currentTag == Tag.URL) { browserBuilder.setUrl(buffer.toString()); } else if (currentTag == Tag.COMPANY) { browserBuilder.setProducer(buffer.toString()); } else if (currentTag == Tag.COMPANY_URL) { browserBuilder.setProducerUrl(buffer.toString()); } else if (currentTag == Tag.ICON) { browserBuilder.setIcon(buffer.toString()); } else if (currentTag == Tag.BROWSER_INFO_URL) { browserBuilder.setInfoUrl(buffer.toString()); } } } private void addToBrowserOsMappingBuilder() { if (isBrowserOsMapping && currentTag == Tag.BROWSER_ID) { browserOsMappingBuilder.setBrowserId(buffer.toString()); } else if (isBrowserOsMapping && currentTag == Tag.OPERATING_SYSTEM_ID) { browserOsMappingBuilder.setOperatingSystemId(buffer.toString()); } } private void addToBrowserPatternBuilder() { if (isBrowserPattern && currentTag == Tag.PATTERN_ORDER) { browserPatternBuilder.setPosition(buffer.toString()); } else if (isBrowserPattern && currentTag == Tag.BROWSER_ID) { browserPatternBuilder.setId(buffer.toString()); } else if (isBrowserPattern && currentTag == Tag.PATTERN_REGEX) { browserPatternBuilder.setPerlRegularExpression(buffer.toString()); } } private void addToBrowserTypeBuilder() { if (isBrowserType && currentTag == Tag.ID) { browserTypeBuilder.setId(buffer.toString()); } else if (isBrowserType && currentTag == Tag.BROWSER_TYPE_ID) { browserTypeBuilder.setName(buffer.toString()); } } private void addToDeviceBuilder() { if (isDevice) { if (currentTag == Tag.ID) { deviceBuilder.setId(buffer.toString()); } else if (currentTag == Tag.NAME) { deviceBuilder.setName(buffer.toString()); } else if (currentTag == Tag.ICON) { deviceBuilder.setIcon(buffer.toString()); } else if (currentTag == Tag.DEVICE_INFO_URL) { deviceBuilder.setInfoUrl(buffer.toString()); } } } private void addToDevicePatternBuilder() { if (isDevicePattern && currentTag == Tag.PATTERN_ORDER) { devicePatternBuilder.setPosition(buffer.toString()); } else if (isDevicePattern && currentTag == Tag.DEVICE_ID) { devicePatternBuilder.setId(buffer.toString()); } else if (isDevicePattern && currentTag == Tag.PATTERN_REGEX) { devicePatternBuilder.setPerlRegularExpression(buffer.toString()); } } private void addToOperatingSystemBuilder() { if (isOperatingSystem) { if (currentTag == Tag.ID) { operatingSystemBuilder.setId(buffer.toString()); } else if (currentTag == Tag.FAMILY) { operatingSystemBuilder.setFamily(buffer.toString()); } else if (currentTag == Tag.NAME) { operatingSystemBuilder.setName(buffer.toString()); } else if (currentTag == Tag.URL) { operatingSystemBuilder.setUrl(buffer.toString()); } else if (currentTag == Tag.COMPANY) { operatingSystemBuilder.setProducer(buffer.toString()); } else if (currentTag == Tag.COMPANY_URL) { operatingSystemBuilder.setProducerUrl(buffer.toString()); } else if (currentTag == Tag.ICON) { operatingSystemBuilder.setIcon(buffer.toString()); } else if (currentTag == Tag.OPERATING_SYSTEM_INFO_URL) { operatingSystemBuilder.setInfoUrl(buffer.toString()); } } } private void addToOperatingSystemPatternBuilder() { if (isOperatingSystemPattern) { if (currentTag == Tag.PATTERN_ORDER) { operatingSystemPatternBuilder.setPosition(buffer.toString()); } else if (currentTag == Tag.OPERATING_SYSTEM_ID) { operatingSystemPatternBuilder.setId(buffer.toString()); } else if (currentTag == Tag.PATTERN_REGEX) { operatingSystemPatternBuilder.setPerlRegularExpression(buffer.toString()); } } } private void addToRobotBuilder() { if (isRobot) { if (currentTag == Tag.ID) { robotBuilder.setId(buffer.toString()); } else if (currentTag == Tag.USERAGENT) { robotBuilder.setUserAgentString(buffer.toString()); } else if (currentTag == Tag.FAMILY) { robotBuilder.setFamilyName(buffer.toString()); } else if (currentTag == Tag.NAME) { robotBuilder.setName(buffer.toString()); } else if (currentTag == Tag.COMPANY) { robotBuilder.setProducer(buffer.toString()); } else if (currentTag == Tag.COMPANY_URL) { robotBuilder.setProducerUrl(buffer.toString()); } else if (currentTag == Tag.ICON) { robotBuilder.setIcon(buffer.toString()); } else if (currentTag == Tag.ROBOT_INFO_URL) { robotBuilder.setInfoUrl(buffer.toString()); } } } @Override public void characters(final char ch[], final int start, final int length) throws SAXException { buffer.append(new String(ch, start, length)); } @Override public void endElement(final String uri, final String localName, final String tagName) throws SAXException { transferToSpecificBuilderAndReset(); if (Tag.isRobotTag(tagName)) { saveAndResetRobotBuilder(); isRobot = false; } else if (Tag.isBrowserTag(tagName)) { saveAndResetBrowserBuilder(); isBrowser = false; } else if (Tag.isOperatingSystemTag(tagName)) { saveAndResetOperatingSystemBuilder(); isOperatingSystem = false; } else if (Tag.isBrowserTypeTag(tagName)) { saveAndResetBrowserTypeBuilder(); isBrowserType = false; } else if (Tag.isBrowserPatternTag(tagName)) { saveAndResetBrowserPatternBuilder(); isBrowserPattern = false; } else if (Tag.isBrowserOsMappingTag(tagName)) { saveAndResetBrowserOperatingSystemMapping(); isBrowserOsMapping = false; } else if (Tag.isOperatingSystemPatternTag(tagName)) { saveAndResetOperatingSystemPatternBuilder(); isOperatingSystemPattern = false; } else if (Tag.isDeviceTag(tagName)) { saveAndResetDeviceBuilder(); isDevice = false; } else if (Tag.isDevicePatternTag(tagName)) { saveAndResetDevicePatternBuilder(); isDevicePattern = false; } currentTag = null; } @Override public void error(final SAXParseException e) throws SAXException { error = true; logParsingIssue("Error", e); super.fatalError(e); } @Override public void fatalError(final SAXParseException e) throws SAXException { logParsingIssue("Fatal error", e); // this call throws a SAXException super.fatalError(e); } /** * Gets the flag whether an error occurred while parsing the document. * * @return {@code true} if an error occurred otherwise {@code false} */ public boolean hasError() { return error; } /** * Gets the flag whether an warning occurred while parsing the document. * * @return {@code true} if an warning occurred otherwise {@code false} */ public boolean hasWarning() { return warning; } @Override public InputSource resolveEntity(final String publicId, final String systemId) throws IOException, SAXException { if (UASDATA_DEF_URL.equals(systemId)) { final InputStream stream = this.getClass().getClassLoader().getResourceAsStream(UASDATA_DEF); return new InputSource(new InputStreamReader(stream, CHARSET)); } throw new SAXException("unable to resolve remote entity, systemId = " + systemId); } private void saveAndResetBrowserBuilder() { dataBuilder.appendBrowserBuilder(browserBuilder); browserBuilder = new Browser.Builder(); } private void saveAndResetBrowserOperatingSystemMapping() { dataBuilder.appendBrowserOperatingSystemMapping(browserOsMappingBuilder.build()); browserOsMappingBuilder = new BrowserOperatingSystemMapping.Builder(); } private void saveAndResetBrowserPatternBuilder() { try { dataBuilder.appendBrowserPattern(browserPatternBuilder.build()); } catch (final IllegalArgumentException e) { LOG.warn("Can not append browser pattern: " + e.getLocalizedMessage()); } browserPatternBuilder = new BrowserPattern.Builder(); } private void saveAndResetBrowserTypeBuilder() { dataBuilder.appendBrowserType(browserTypeBuilder.build()); browserTypeBuilder = new BrowserType.Builder(); } private void saveAndResetDeviceBuilder() { dataBuilder.appendDeviceBuilder(deviceBuilder); deviceBuilder = new Device.Builder(); } private void saveAndResetDevicePatternBuilder() { try { dataBuilder.appendDevicePattern(devicePatternBuilder.build()); } catch (final IllegalArgumentException e) { LOG.warn("Can not append device pattern: " + e.getLocalizedMessage()); } devicePatternBuilder = new DevicePattern.Builder(); } private void saveAndResetOperatingSystemBuilder() { dataBuilder.appendOperatingSystemBuilder(operatingSystemBuilder); operatingSystemBuilder = new OperatingSystem.Builder(); } private void saveAndResetOperatingSystemPatternBuilder() { try { dataBuilder.appendOperatingSystemPattern(operatingSystemPatternBuilder.build()); } catch (final IllegalArgumentException e) { LOG.warn("Can not append OS pattern: " + e.getLocalizedMessage()); } operatingSystemPatternBuilder = new OperatingSystemPattern.Builder(); } private void saveAndResetRobotBuilder() { dataBuilder.appendRobot(robotBuilder.build()); robotBuilder = new Robot.Builder(); } @Override public void startElement(final String uri, final String localName, final String tagName, final Attributes attributes) throws SAXException { if (Tag.isRobotTag(tagName)) { isRobot = true; } else if (Tag.isBrowserTag(tagName)) { isBrowser = true; } else if (Tag.isOperatingSystemTag(tagName)) { isOperatingSystem = true; } else if (Tag.isBrowserTypeTag(tagName)) { isBrowserType = true; } else if (Tag.isBrowserPatternTag(tagName)) { isBrowserPattern = true; } else if (Tag.isBrowserOsMappingTag(tagName)) { isBrowserOsMapping = true; } else if (Tag.isOperatingSystemPatternTag(tagName)) { isOperatingSystemPattern = true; } else if (Tag.isDeviceTag(tagName)) { isDevice = true; } else if (Tag.isDevicePatternTag(tagName)) { isDevicePattern = true; } currentTag = Tag.evaluate(tagName); } /** * Transfers all characters of a specific tag to the corresponding builder and resets the string buffer. */ private void transferToSpecificBuilderAndReset() { // version if (currentTag == Tag.VERSION) { dataBuilder.setVersion(buffer.toString()); } // robot browser addToRobotBuilder(); // build browser addToBrowserBuilder(); // build operating system addToOperatingSystemBuilder(); // build browser pattern addToBrowserPatternBuilder(); // build browser type addToBrowserTypeBuilder(); // build browser to operating system mapping addToBrowserOsMappingBuilder(); // build operating system pattern addToOperatingSystemPatternBuilder(); // build browser addToDeviceBuilder(); // build browser pattern addToDevicePatternBuilder(); buffer = new StringBuilder(); } @Override public void warning(final SAXParseException e) throws SAXException { warning = true; logParsingIssue("Warning", e); super.warning(e); } }