/*
Copyright (C) SYSTAP, LLC DBA Blazegraph 2006-2016. All rights reserved.
Contact:
SYSTAP, LLC DBA Blazegraph
2501 Calvert ST NW #106
Washington, DC 20008
licenses@blazegraph.com
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
package com.bigdata.util;
/**
* Collection of some utility methods for HTML.
*
* @author <a href="mailto:thompsonbry@users.sourceforge.net">Bryan Thompson</a>
* @version $Id$
*/
public class HTMLUtility {
/**
*
*/
public HTMLUtility() {
super();
}
/**
* <p>
* Sometimes you want to escape something without using a DOM instance. This
* method escapes a String value so that it may be written as the value of
* an XML attribute in a manner that is also compatible with HTML. Note that
* the best solution is to use a DOM instance, which will automatically
* escape attribute values and PCDATA as they are inserted into the DOM
* instance.
* </p>
* <p>
* The following notes are excerpted from the HTML and XML specifications
* <ul>
* <li>HTML: By default, SGML requires that all attribute values be
* delimited using either double quotation marks (ASCII decimal 34) or
* single quotation marks (ASCII decimal 39). Single quote marks can be
* included within the attribute value when the value is delimited by double
* quote marks, and vice versa. Authors may also use numeric character
* references to represent double quotes (") and single quotes (').
* For double quotes authors can also use the character entity reference
* ".<br>
* In certain cases, authors may specify the value of an attribute without
* any quotation marks. The attribute value may only contain letters (a-z
* and A-Z), digits (0-9), hyphens (ASCII decimal 45), periods (ASCII
* decimal 46), underscores (ASCII decimal 95), and colons (ASCII decimal
* 58). We recommend using quotation marks even when it is possible to
* eliminate them.</li>
* <li>XML: The ampersand character (&) and the left angle bracket (<) may
* appear in their literal form only when used as markup delimiters, or
* within a comment, a processing instruction, or a CDATA section. If they
* are needed elsewhere, they must be escaped using either numeric character
* references or the strings "&" and "<" respectively. The right
* angle bracket (>) may be represented using the string ">", and must,
* for compatibility, be escaped using ">" or a character reference when
* it appears in the string "]]>" in content, when that string is not
* marking the end of a CDATA section.</li>
* </ul>
* </p>
*/
public static String escapeForXHTML(final String s) {
if( s == null ) {
throw new IllegalArgumentException();
}
final int len = s.length();
if (len == 0)
return s;
final StringBuffer sb = new StringBuffer(len + 20);
for (int i = 0; i < len; i++) {
final char ch = s.charAt(i);
switch (ch) {
case '"':
sb.append(""");
break;
case '\'':
sb.append("'");
break;
case '/':
sb.append("/");
break;
case '&':
sb.append("&");
break;
case '<':
sb.append("<");
break;
case '>':
sb.append(">");
break;
default:
sb.append(ch);
break;
}
}
return sb.toString();
}
// /**
// * Same as escapeForXHTML but respects the encoding
// * parameter.
// */
// public static String escapeForXHTML(String s, String enc)
// throws java.io.UnsupportedEncodingException
// {
//
// String retval = escapeForXHTML(s);
//
// return new String(retval.getBytes(enc), enc);
// }
public static String escapeForXMLName(String s) {
if( s == null ) {
throw new IllegalArgumentException();
}
int len = s.length();
if (len == 0)
return s;
StringBuffer sb = new StringBuffer(len + 20);
char ch = s.charAt(0);
if(Character.isDigit(ch))
{
sb.append("_num_");
}
for (int i = 0; i < len; i++) {
ch = s.charAt(i);
switch (ch) {
case '"':
sb.append("_quote_");
break;
case '\'':
sb.append("_apos_");
break;
case '&':
sb.append("_amp_");
break;
case '<':
sb.append("_lt_");
break;
case '>':
sb.append("_gt_");
break;
case '$':
sb.append("_dollar_");
break;
case ':':
sb.append("_colon_");
break;
case '~':
sb.append("_tilda_");
break;
case '(':
sb.append("_lparen_");
break;
case ')':
sb.append("_rparen_");
break;
case ',':
sb.append("_comma_");
break;
case '=':
sb.append("_eq_");
break;
case '!':
sb.append("_bang_");
break;
case '?':
sb.append("_quest_");
break;
case '/':
sb.append("_fw_slash_");
break;
case '\\':
sb.append("_bk_slash_");
break;
case ';':
sb.append("_semicolon_");
break;
case '.':
sb.append("_period_");
break;
case '`':
sb.append("_tic_");
break;
default:
sb.append(ch);
break;
}
}
return sb.toString();
}
}