SafeXML.java example

Explorer
cachewolf-master
- lib
- src
  - CacheWolf
  - build
    - CabHelper.java
/*
GNU General Public License
CacheWolf is a software for PocketPC, Win and Linux that
enables paperless caching.
It supports the sites geocaching.com and opencaching.de

Copyright (C) 2006  CacheWolf development team
See http://www.cachewolf.de/ for more information.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
 */
package CacheWolf.utils;

/**
 *	A class to replace unsafe XML characters with characters that a user
 *	"can read", and vice versa
 * 20061222: skg Modified cleanback to speed up the new index.xml reader
 */

import ewe.util.Hashtable;

public final class SafeXML {
    private static final char ENTITY_START = '&';
    private static final char ENTITY_END = ';';

    private final static Hashtable iso2htmlMappings = new Hashtable(300);
    static {
	final String[] mappingArray = new String[] {
		"'",
		"'", // Added 20061227 - not a valid HTML entity but used in XML
		""", "\"", "&", "&", "<", "<", ">", ">", " ", " ", "¡", "�", "¢", "�", "£", "�", "¤", "�", "¥", "�", "¦", "�", "§", "�", "¨", "�", "©", "�", "ª", "�",
		"«", "�", "¬", "�", "", "�", "®", "�", "¯", "�", "°", "�", "±", "�", "²", "�", "³", "�", "´", "�", "µ", "�", "¶", "�", "·", "�", "¸", "�", "¹", "�",
		"º", "�", "»", "�", "¼", "�", "½", "�", "¾", "�", "¿", "�", "À", "�", "Á", "�", "Â", "�", "Ã", "�", "Ä", "�", "Å", "�", "Æ", "�", "Ç", "�",
		"È", "�", "É", "�", "Ê", "�", "Ë", "�", "Ì", "�", "Í", "�", "Î", "�", "Ï", "�", "Ð", "�", "Ñ", "�", "Ò", "�", "Ó", "�", "Ô", "�", "Õ", "�",
		"Ö", "�", "×", "�", "Ø", "�", "Ù", "�", "Ú", "�", "Û", "�", "Ü", "�", "Ý", "�", "Þ", "�", "ß", "�", "à", "�", "á", "�", "â", "�", "ã", "�",
		"ä", "�", "å", "�", "æ", "�", "ç", "�", "è", "�", "é", "�", "ê", "�", "ë", "�", "ì", "�", "í", "�", "î", "�", "ï", "�", "ð", "�", "ñ", "�",
		"ò", "�", "ó", "�", "ô", "�", "õ", "�", "ö", "�", "÷", "�", "ø", "�", "ù", "�", "ú", "�", "û", "�", "ü", "�", "ý", "�", "þ", "�", "ÿ", "�",
		"–", "�" };
	for (int i = 0; i < mappingArray.length; i = i + 2) {
	    iso2htmlMappings.put(mappingArray[i], mappingArray[i + 1]);
	}
    }

    /**
     * Converts a <code>String</code> containing HTML entities to
     * a <code>String</code> containing only ISO8859-1 characters.
     * 
     * Uses <a href="http://www.ramsch.org/martin/uni/fmi-hp/iso8859-1.html">ISO8859-1 table by Martin Ramsch</a>.
     * 
     * @author <a href="mailto:ey@inweb.de">Christian Ey</a>
     * 
     * @version 1.0
     * @param htmlString
     *            The <code>String</code> containing HTML
     *            entities
     * @return A <code>String</code> containing only ISO8859-1
     *         characters
     */
    public final static String html2iso8859s1(String htmlString) {
	int indexStart;
	// return immediately if string is null or does not contain &
	if (htmlString != null && (indexStart = htmlString.indexOf(ENTITY_START)) >= 0) {
	    // copy everything from the beginning to entity start into buffer
	    StringBuffer isoBuffer = new StringBuffer(htmlString.substring(0, indexStart));
	    while (indexStart >= 0) {
		int indexEnd = htmlString.indexOf(ENTITY_END, indexStart + 1);
		if (indexEnd >= 0) {
		    int alternativeStart = htmlString.indexOf(ENTITY_START, indexStart + 1);
		    if ((alternativeStart > indexStart) && (alternativeStart < indexEnd)) {
			// a second index start is found inbetween current index start
			// and index end

			// flush the html string inbetween
			isoBuffer.append(htmlString.substring(indexStart, alternativeStart));

			// use the second index start and loop again
			indexStart = alternativeStart;
		    } else {
			String entity = htmlString.substring(indexStart, indexEnd + 1);
			appendEntityAsIsoChar(entity, isoBuffer);
			indexStart = htmlString.indexOf(ENTITY_START, indexEnd + 1);
			if (indexStart >= 0) {
			    // another entity start detected, flush the html string inbetween
			    isoBuffer.append(htmlString.substring(indexEnd + 1, indexStart));
			} else {
			    // no further entity start detected, flush rest of html string
			    isoBuffer.append(htmlString.substring(indexEnd + 1));
			}
		    }
		} else {
		    // entity start without matching entity end detected, ignore gracefully
		    isoBuffer.append(htmlString.substring(indexStart));
		    break;
		}
	    }
	    return isoBuffer.toString();
	} else {
	    // nothing to do
	    return htmlString;
	}
    }

    private final static void appendEntityAsIsoChar(String entity, StringBuffer addto) {
	if (entity.startsWith("&#")) {
	    try {
		int value;
		if (entity.charAt(2) == 'x' || entity.charAt(2) == 'X') {
		    // number in hexadecimal // not tested because I don't have an XML containing hexadecimal encodings
		    value = Integer.parseInt(entity.substring(3, entity.length() - 1), 16);
		} else {
		    // number is decimal
		    value = Integer.parseInt(entity.substring(2, entity.length() - 1));
		}
		if (value < 256)
		    addto.append((char) value);
		else
		    addto.append(entity); // no valid Iso
	    } catch (NumberFormatException e) {
		addto.append(entity); // not a valid number, insert original text
	    }

	} // number format exception
	else { // entity with a name like """
	    String isoCharacter = (String) iso2htmlMappings.get(entity);
	    if (isoCharacter != null) {
		// insert iso character instead of html entity
		addto.append(isoCharacter);
	    } else {
		// illegal entity detected, ignore gracefully
		addto.append(entity);
	    }
	}
    }

    /**
     * convert a single char to its equivalent HTML entity. Ordinary chars are
     * not changed. 160 ->  
     * 
     * @param c
     *            Char to convert
     * 
     * @return equivalent string eg. &, null means leave char as is.
     */
    private final static String charToEntity(char c) {
	switch (c) {
	case '"':
	    return """;
	case '&':
	    return "&";
	case '<':
	    return "<";
	case '>':
	    return ">";
	case '\'':
	    return "'";
	default:
	    return null;
	} // end switch
    } // end charToEntity

    /**
     * Converts text to HTML by quoting dangerous characters. Text must not
     * already contain entities. e.g. " ==> " < ==> < ordinary text
     * passes unchanged. Does not convert space to  
     * 
     * @param text
     *            raw text to be processed.
     * 
     * @return translated text, or null if input is null.
     */
    public final static String string2Html(String text) {
	if (text == null)
	    return null;
	int originalTextLength = text.length();
	StringBuffer sb = new StringBuffer(originalTextLength * 110 / 100);
	int charsToAppend = 0;
	for (int i = 0; i < originalTextLength; i++) {
	    char c = text.charAt(i);
	    String entity = charToEntity(c);
	    if (entity == null) {
		// we could sb.append( c ), but that would be slower
		// than saving them up for a big append.
		charsToAppend++;
	    } else {
		if (charsToAppend != 0) {
		    sb.append(text.substring(i - charsToAppend, i));
		    charsToAppend = 0;
		}
		sb.append(entity);
	    }
	} // end for
	  // append chars to the right of the last entity.
	if (charsToAppend != 0) {
	    sb.append(text.substring(originalTextLength - charsToAppend, originalTextLength));
	}

	// if result is not longer, we did not do anything. Save RAM.
	return (sb.length() == originalTextLength) ? text : sb.toString();
    } // end insertEntities

    /**
     * Converts a data string to something that is safe to use inside
     * an XML file (like prefs.xml) - entities like & are *NOT*
     * valid XML unless declared specially, so we must use the numerical
     * values here.
     * 
     * @param str (String) raw text to be processed
     * 
     * @return (String) translated text, or null if input is null
     */
    public final static String cleanGPX(String str) {
	String dummy = STRreplace.replace(str, "&", "&");
	// "&#" --> "&#"); //Darstellung Umlaute etc : siehe http://www.geoclub.de/viewtopic.php?f=40&t=50635&p=798796#p798796
	// aber so etwas nicht "&#entry15063" --> !!not!! "&#entry15063" (Cache GCPB5P export -> gpx, import -> mapsource)
	int pos = 0;
	while (pos > -1) {
	    pos = dummy.indexOf("&#", pos);
	    int pos1 = dummy.indexOf(";", pos + 6);
	    int k = pos1 - pos; // wann kommt das ; als Ende eines numerischen entities?
	    if (pos > -1) {
		if (pos1 > -1) {
		    if (k < 12) {
			String s = dummy.substring(pos + 6, pos + 8).toLowerCase();
			char c = s.charAt(0);
			char c1 = s.charAt(1);
			if ((c == 'x' && ((c1 >= '0' && c1 <= '9') || (c1 >= 'a' && c1 <= 'f'))) || (c >= '0' && c <= '9')) {
			    dummy = dummy.substring(0, pos + 1) + dummy.substring(pos + 5, dummy.length());
			}
		    }
		}
		pos++;
	    }
	}
	dummy = STRreplace.replace(dummy, "&amp;", "&"); // falls schon & im str war

	dummy = STRreplace.replace(dummy, "<", "<");
	dummy = STRreplace.replace(dummy, ">", ">");
	dummy = STRreplace.replace(dummy, "\"", """);
	dummy = STRreplace.replace(dummy, "'", "'");
	// why
	dummy = STRreplace.replace(dummy, "\u0004", "");
	// this means changing content,
	// but it is the easiest way of avoiding ]]> to be interpreted as endmark of CDATA-section
	dummy = STRreplace.replace(dummy, "]]>", "]] >");
	// \ in gpx is not imported by mapsource, basecamp, garmin?...(there is no replacement)
	dummy = STRreplace.replace(dummy, "\\", "BkSlsh;");

	return dummy;
    }

    public final static String strxmlencode(boolean src) {
	/* bools are always safe */
	return (src ? "true" : "false");
    }

    public final static String strxmlencode(int src) {
	/* numbers are always safe */
	return (Integer.toString(src));
    }

}