/*
GNU General Public License
CacheWolf is a software for PocketPC, Win and Linux that
enables paperless caching.
It supports the sites geocaching.com and opencaching.de
Copyright (C) 2006 CacheWolf development team
See http://www.cachewolf.de/ for more information.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
package CacheWolf.utils;
/**
* A class to replace unsafe XML characters with characters that a user
* "can read", and vice versa
* 20061222: skg Modified cleanback to speed up the new index.xml reader
*/
import ewe.util.Hashtable;
public final class SafeXML {
private static final char ENTITY_START = '&';
private static final char ENTITY_END = ';';
private final static Hashtable iso2htmlMappings = new Hashtable(300);
static {
final String[] mappingArray = new String[] {
"'",
"'", // Added 20061227 - not a valid HTML entity but used in XML
""", "\"", "&", "&", "<", "<", ">", ">", " ", " ", "¡", "�", "¢", "�", "£", "�", "¤", "�", "¥", "�", "¦", "�", "§", "�", "¨", "�", "©", "�", "ª", "�",
"«", "�", "¬", "�", "", "�", "®", "�", "¯", "�", "°", "�", "±", "�", "²", "�", "³", "�", "´", "�", "µ", "�", "¶", "�", "·", "�", "¸", "�", "¹", "�",
"º", "�", "»", "�", "¼", "�", "½", "�", "¾", "�", "¿", "�", "À", "�", "Á", "�", "Â", "�", "Ã", "�", "Ä", "�", "Å", "�", "Æ", "�", "Ç", "�",
"È", "�", "É", "�", "Ê", "�", "Ë", "�", "Ì", "�", "Í", "�", "Î", "�", "Ï", "�", "Ð", "�", "Ñ", "�", "Ò", "�", "Ó", "�", "Ô", "�", "Õ", "�",
"Ö", "�", "×", "�", "Ø", "�", "Ù", "�", "Ú", "�", "Û", "�", "Ü", "�", "Ý", "�", "Þ", "�", "ß", "�", "à", "�", "á", "�", "â", "�", "ã", "�",
"ä", "�", "å", "�", "æ", "�", "ç", "�", "è", "�", "é", "�", "ê", "�", "ë", "�", "ì", "�", "í", "�", "î", "�", "ï", "�", "ð", "�", "ñ", "�",
"ò", "�", "ó", "�", "ô", "�", "õ", "�", "ö", "�", "÷", "�", "ø", "�", "ù", "�", "ú", "�", "û", "�", "ü", "�", "ý", "�", "þ", "�", "ÿ", "�",
"–", "�" };
for (int i = 0; i < mappingArray.length; i = i + 2) {
iso2htmlMappings.put(mappingArray[i], mappingArray[i + 1]);
}
}
/**
* Converts a <code>String</code> containing HTML entities to
* a <code>String</code> containing only ISO8859-1 characters.
*
* Uses <a href="http://www.ramsch.org/martin/uni/fmi-hp/iso8859-1.html">ISO8859-1 table by Martin Ramsch</a>.
*
* @author <a href="mailto:ey@inweb.de">Christian Ey</a>
*
* @version 1.0
* @param htmlString
* The <code>String</code> containing HTML
* entities
* @return A <code>String</code> containing only ISO8859-1
* characters
*/
public final static String html2iso8859s1(String htmlString) {
int indexStart;
// return immediately if string is null or does not contain &
if (htmlString != null && (indexStart = htmlString.indexOf(ENTITY_START)) >= 0) {
// copy everything from the beginning to entity start into buffer
StringBuffer isoBuffer = new StringBuffer(htmlString.substring(0, indexStart));
while (indexStart >= 0) {
int indexEnd = htmlString.indexOf(ENTITY_END, indexStart + 1);
if (indexEnd >= 0) {
int alternativeStart = htmlString.indexOf(ENTITY_START, indexStart + 1);
if ((alternativeStart > indexStart) && (alternativeStart < indexEnd)) {
// a second index start is found inbetween current index start
// and index end
// flush the html string inbetween
isoBuffer.append(htmlString.substring(indexStart, alternativeStart));
// use the second index start and loop again
indexStart = alternativeStart;
} else {
String entity = htmlString.substring(indexStart, indexEnd + 1);
appendEntityAsIsoChar(entity, isoBuffer);
indexStart = htmlString.indexOf(ENTITY_START, indexEnd + 1);
if (indexStart >= 0) {
// another entity start detected, flush the html string inbetween
isoBuffer.append(htmlString.substring(indexEnd + 1, indexStart));
} else {
// no further entity start detected, flush rest of html string
isoBuffer.append(htmlString.substring(indexEnd + 1));
}
}
} else {
// entity start without matching entity end detected, ignore gracefully
isoBuffer.append(htmlString.substring(indexStart));
break;
}
}
return isoBuffer.toString();
} else {
// nothing to do
return htmlString;
}
}
private final static void appendEntityAsIsoChar(String entity, StringBuffer addto) {
if (entity.startsWith("")) {
try {
int value;
if (entity.charAt(2) == 'x' || entity.charAt(2) == 'X') {
// number in hexadecimal // not tested because I don't have an XML containing hexadecimal encodings
value = Integer.parseInt(entity.substring(3, entity.length() - 1), 16);
} else {
// number is decimal
value = Integer.parseInt(entity.substring(2, entity.length() - 1));
}
if (value < 256)
addto.append((char) value);
else
addto.append(entity); // no valid Iso
} catch (NumberFormatException e) {
addto.append(entity); // not a valid number, insert original text
}
} // number format exception
else { // entity with a name like """
String isoCharacter = (String) iso2htmlMappings.get(entity);
if (isoCharacter != null) {
// insert iso character instead of html entity
addto.append(isoCharacter);
} else {
// illegal entity detected, ignore gracefully
addto.append(entity);
}
}
}
/**
* convert a single char to its equivalent HTML entity. Ordinary chars are
* not changed. 160 ->
*
* @param c
* Char to convert
*
* @return equivalent string eg. &, null means leave char as is.
*/
private final static String charToEntity(char c) {
switch (c) {
case '"':
return """;
case '&':
return "&";
case '<':
return "<";
case '>':
return ">";
case '\'':
return "'";
default:
return null;
} // end switch
} // end charToEntity
/**
* Converts text to HTML by quoting dangerous characters. Text must not
* already contain entities. e.g. " ==> " < ==> < ordinary text
* passes unchanged. Does not convert space to
*
* @param text
* raw text to be processed.
*
* @return translated text, or null if input is null.
*/
public final static String string2Html(String text) {
if (text == null)
return null;
int originalTextLength = text.length();
StringBuffer sb = new StringBuffer(originalTextLength * 110 / 100);
int charsToAppend = 0;
for (int i = 0; i < originalTextLength; i++) {
char c = text.charAt(i);
String entity = charToEntity(c);
if (entity == null) {
// we could sb.append( c ), but that would be slower
// than saving them up for a big append.
charsToAppend++;
} else {
if (charsToAppend != 0) {
sb.append(text.substring(i - charsToAppend, i));
charsToAppend = 0;
}
sb.append(entity);
}
} // end for
// append chars to the right of the last entity.
if (charsToAppend != 0) {
sb.append(text.substring(originalTextLength - charsToAppend, originalTextLength));
}
// if result is not longer, we did not do anything. Save RAM.
return (sb.length() == originalTextLength) ? text : sb.toString();
} // end insertEntities
/**
* Converts a data string to something that is safe to use inside
* an XML file (like prefs.xml) - entities like & are *NOT*
* valid XML unless declared specially, so we must use the numerical
* values here.
*
* @param str (String) raw text to be processed
*
* @return (String) translated text, or null if input is null
*/
public final static String cleanGPX(String str) {
String dummy = STRreplace.replace(str, "&", "&");
// "&#" --> ""); //Darstellung Umlaute etc : siehe http://www.geoclub.de/viewtopic.php?f=40&t=50635&p=798796#p798796
// aber so etwas nicht "&#entry15063" --> !!not!! "entry15063" (Cache GCPB5P export -> gpx, import -> mapsource)
int pos = 0;
while (pos > -1) {
pos = dummy.indexOf("&#", pos);
int pos1 = dummy.indexOf(";", pos + 6);
int k = pos1 - pos; // wann kommt das ; als Ende eines numerischen entities?
if (pos > -1) {
if (pos1 > -1) {
if (k < 12) {
String s = dummy.substring(pos + 6, pos + 8).toLowerCase();
char c = s.charAt(0);
char c1 = s.charAt(1);
if ((c == 'x' && ((c1 >= '0' && c1 <= '9') || (c1 >= 'a' && c1 <= 'f'))) || (c >= '0' && c <= '9')) {
dummy = dummy.substring(0, pos + 1) + dummy.substring(pos + 5, dummy.length());
}
}
}
pos++;
}
}
dummy = STRreplace.replace(dummy, "&", "&"); // falls schon & im str war
dummy = STRreplace.replace(dummy, "<", "<");
dummy = STRreplace.replace(dummy, ">", ">");
dummy = STRreplace.replace(dummy, "\"", """);
dummy = STRreplace.replace(dummy, "'", "'");
// why
dummy = STRreplace.replace(dummy, "\u0004", "");
// this means changing content,
// but it is the easiest way of avoiding ]]> to be interpreted as endmark of CDATA-section
dummy = STRreplace.replace(dummy, "]]>", "]] >");
// \ in gpx is not imported by mapsource, basecamp, garmin?...(there is no replacement)
dummy = STRreplace.replace(dummy, "\\", "BkSlsh;");
return dummy;
}
public final static String strxmlencode(boolean src) {
/* bools are always safe */
return (src ? "true" : "false");
}
public final static String strxmlencode(int src) {
/* numbers are always safe */
return (Integer.toString(src));
}
}