//------------------------------------------------------------------------------
// Copyright (c) 2005, 2007 IBM Corporation and others.
// All rights reserved. This program and the accompanying materials
// are made available under the terms of the Eclipse Public License v1.0
// which accompanies this distribution, and is available at
// http://www.eclipse.org/legal/epl-v10.html
//
// Contributors:
// IBM Corporation - initial implementation
//------------------------------------------------------------------------------
package org.eclipse.epf.common.utils;
import java.net.URI;
import java.util.List;
import java.util.Map;
import java.util.regex.Pattern;
import org.eclipse.core.runtime.IPath;
import org.eclipse.core.runtime.Path;
import org.eclipse.core.runtime.Platform;
import org.eclipse.epf.common.CommonPlugin;
import com.ibm.icu.lang.UCharacter;
import com.ibm.icu.util.StringTokenizer;
/**
* A helper class for manuipulating strings.
*
* @author Kelvin Low
* @author Jinhua Xi
* @since 1.0
*/
public class StrUtil {
public static final String EMPTY_STRING = ""; //$NON-NLS-1$
public static final String TAB = "\t"; //$NON-NLS-1$
private static final String REGEXP_ANY_SPECIAL_CHARS = "(`|~|!|@|#|\\$|%|\\^|&|\\*|\\(|\\)|\\+|=|\\[|\\]|\\||\\:|\"|<|>|\\?|/|'|\\s|\\\\)+"; //$NON-NLS-1$
private static final String REGEXP_INVALID_PUBLISHED_PATH_CHARS = "(\\[|#|\\*|\\?|\"|<|>|\\||!|%|/|\\])+"; //$NON-NLS-1$
private static final String REGEXP_INVALID_PUBLISHED_PATH_CHARS_LINUX = "(\\[|#|\\*|\\?|\"|<|>|\\||!|%|\\])+"; //$NON-NLS-1$
private static final String REGEXP_INVALID_FILENAME_CHARS = "(\\[|#|/|\\\\|\\:|\\*|\\?|\"|<|>|\\||\\]|\\s)+"; //$NON-NLS-1$
private static final char[] HEX_DIGITS = { '0', '1', '2', '3', '4', '5',
'6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' };
public static final String LINE_FEED = System.getProperty("line.separator"); //$NON-NLS-1$
public static final String ESCAPED_LF = "
"; //$NON-NLS-1$
public static final String ESCAPED_CR = "
"; //$NON-NLS-1$
public static final String ESCAPED_LINE_FEED = LINE_FEED.replace(
"\n", ESCAPED_LF).replace("\r", ESCAPED_CR); //$NON-NLS-1$ //$NON-NLS-2$
public static final String LINE_FEED_REGEX = LINE_FEED.replaceAll(
"\\\\", "\\\\"); //$NON-NLS-1$ //$NON-NLS-2$
public static final String ESCAPED_LINE_FEED_REGEX = ESCAPED_LINE_FEED;
public static final String HTML_BREAK = "<br/>";//$NON-NLS-1$
public static final String HTML_COPY = "©";//$NON-NLS-1$
public static final String HTML_EURO = "€";//$NON-NLS-1$
public static final String HTML_REG = "®";//$NON-NLS-1$
public static final String HTML_TRADEMARK = "™";//$NON-NLS-1$
public static boolean during_migration = false;
private static StrUtilOptions options;
/**
* Private constructor to prevent this class from being instantiated. All
* methods in this class should be static.
*/
private StrUtil() {
}
/**
* Tests for null string.
* <p>
* A null string is defined as one that has an empty reference or has zero
* length.
*
* @param str
* a string
* @return <code>true</code> if the given string is a null string
*/
public static boolean isNull(String str) {
return str == null || str.length() == 0;
}
/**
* Tests for blank string.
* <p>
* A blank string is defined as one that has an empty reference or has zero
* length after the leading and trailing space characters are trimmed.
*
* @param str
* a string
* @return <code>true</code> if the given string is a blank string
*/
public static boolean isBlank(String str) {
return str == null || str.trim().length() == 0;
}
/**
* Removes the leading and trailing space characters from a string.
*
* @param str
* a string
* @return a string with no leading and trailing space characters
*/
public static String trim(String str) {
return str == null ? null : str.trim();
}
/**
* Removes whitespace characters (TAB, CR, LF and SPACE) from a string.
*
* @param str
* a string
* @return a string with whitespace characters removed
*/
public static String removeWhiteSpaceChars(String str) {
int len = (str == null) ? 0 : str.length();
for (int i = 0; i < len; i++) {
switch (str.charAt(i)) {
case '\t':
case '\r':
case '\n':
case ' ':
break;
default:
return str;
}
}
return ""; //$NON-NLS-1$
}
/**
* Removes CR, LF from a string.
*
* @param str
* a string
* @return a string with CR, LF removed
*/
public static String removeNewlines(String str) {
StringBuffer sb = new StringBuffer();
int len = (str == null) ? 0 : str.length();
for (int i = 0; i < len; i++) {
char c = str.charAt(i);
switch (c) {
case '\r':
case '\n':
break;
default:
sb.append(c);
}
}
return sb.toString();
}
/**
* Splits a string into an array of string tokens.
*
* @param str
* a string
* @param sep
* a string containing the string separators
* @param count
* the desired number of string tokens
* @return an array of string tokens
*/
public static String[] split(String str, String sep, int count) {
if (str == null || count == 0 || count < -1) {
return null;
}
StringTokenizer tokenizer = new StringTokenizer(str, sep,
count == -1 ? false : true);
if (count == -1) {
count = tokenizer.countTokens();
}
String[] result = new String[count];
int i = 0;
while (tokenizer.hasMoreTokens()) {
String t = tokenizer.nextToken();
if (i < count) {
if ((t.length() == 1) && (sep.indexOf(t) != -1)) {
continue;
}
result[i++] = t;
} else {
result[count - 1] += t;
}
}
return result;
}
/**
* Splits a string into an array of string tokens.
*
* @param str
* a string
* @param sep
* a string containing the string separators
* @return an array of string tokens
*/
public static String[] split(String str, String sep) {
return split(str, sep, -1);
}
/**
* Replaces a substring within a string with another substring.
* <p>
* Note: Only the first occurrence of the substring will be replaced.
*
* @param str
* a string
* @param src
* the substring to replace
* @param tgt
* the substring to use for the replacement
* @return a string with the first substring replaced
*
* TODO: Review implementation. Call String.replaceFirst() instead?
*/
public static String replace(String str, String src, String tgt) {
if ((str == null || str.length() == 0)
|| (src == null || src.length() == 0)) {
return str;
}
String tmpStr = str;
int index;
while ((index = tmpStr.indexOf(src)) != -1) {
tmpStr = tmpStr.substring(0, index) + tgt
+ tmpStr.substring(index + src.length());
}
return tmpStr;
}
/**
* Returns the integer value of a string.
*
* @param str
* a string
* @param defaultValue
* a default integer value for the string
* @return the integer value of the given string
*/
public static int getIntValue(String str, int defaultValue) {
if (StrUtil.isBlank(str)) {
return defaultValue;
}
try {
return Integer.parseInt(str);
} catch (NumberFormatException e) {
return defaultValue;
}
}
/**
* Returns an array of bytes representing the UTF-8 encoding of a string.
*
* @param str
* a string
* @return a byte array containing the UTF-8 encoding of the given string
*/
public static byte[] getUTF8Bytes(String str) {
char[] c = str.toCharArray();
int len = c.length;
int count = 0;
for (int i = 0; i < len; i++) {
int ch = c[i];
if (ch <= 0x7f) {
count++;
} else if (ch <= 0x7ff) {
count += 2;
} else {
count += 3;
}
}
byte[] b = new byte[count];
int off = 0;
for (int i = 0; i < len; i++) {
int ch = c[i];
if (ch <= 0x7f) {
b[off++] = (byte) ch;
} else if (ch <= 0x7ff) {
b[off++] = (byte) ((ch >> 6) | 0xc0);
b[off++] = (byte) ((ch & 0x3f) | 0x80);
} else {
b[off++] = (byte) ((ch >> 12) | 0xe0);
b[off++] = (byte) (((ch >> 6) & 0x3f) | 0x80);
b[off++] = (byte) ((ch & 0x3f) | 0x80);
}
}
return b;
}
/**
* Returns the hexidecimal character representation for an integer.
*
* @param value
* an integer
* @return the hexidecimal representation
*/
private static char toHex(int value) {
return HEX_DIGITS[(value & 0xF)];
}
/**
* Returns the escaped Unicode representation of a string.
*
* @param str
* a string
* @param skipASCII
* if <code>true</code>, avoid escaping the ASCII characters
* @return the escaped Unicode representation of the given string
*/
public static String toEscapedUnicode(String str, boolean skipASCII) {
int len = str.length();
StringBuffer result = new StringBuffer(len * 2);
for (int i = 0; i < len; i++) {
char ch = str.charAt(i);
if (skipASCII && ch < 0x007E) {
result.append(ch);
} else {
result.append("\\u"); //$NON-NLS-1$
result.append(toHex((ch >> 12) & 0xF));
result.append(toHex((ch >> 8) & 0xF));
result.append(toHex((ch >> 4) & 0xF));
result.append(toHex(ch & 0xF));
}
}
return result.toString();
}
/**
* Returns the escaped HTML representation of a string.
*
* @param html
* a HTML string
* @return the escaped Unicode representation of the given HTML string
*/
public static String getEscapedHTML(String html) {
if (html == null || html.length() == 0) {
return ""; //$NON-NLS-1$
}
StrUtilOptions options = getOptions();
StringBuffer result = new StringBuffer();
int length = html.length();
for (int i = 0; i < length; i++) {
char ch = html.charAt(i);
switch (ch) {
case '%':
if (i + 4 < length) {
String hexStr = html.substring(i + 1, i + 5);
boolean validHextStr = true;
for (int j = 0; j < hexStr.length(); j++) {
char c = hexStr.charAt(j);
if (!((c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F'))) {
validHextStr = false;
break;
}
}
if (options == null) {
//code below will treat "%20de" as " de"
//this may lose some double bytes character(e.g. Chinese), which start with %20, but keep all url links
//so far open this convertion not only during library migration to support file like "my design.gif"
if (/*during_migration && */validHextStr) {
if (hexStr.startsWith("20")) { //$NON-NLS-1$
result.append("%20"); //$NON-NLS-1$
i += 2;
break;
}
}
} else {
int ix = options.getRteUrlDecodingOption();
if (ix == 1) {
validHextStr = false;
} else if (ix == 2) {
String key = getHexStr("%" + hexStr);
if (key != null && options.getRteUrlDecodingHexMap().containsKey(key)) {
validHextStr = false;
}
}
}
if (validHextStr) {
try {
int codePoint = Integer.parseInt(hexStr, 16);
char[] c = UCharacter.toChars(codePoint);
result.append(c);
i += 4;
break;
} catch (NumberFormatException e) {
// wasn't a valid hex string..
// fall through to the result.append(ch)
} catch (Exception e) {
CommonPlugin.getDefault().getLogger().logError(e);
}
}
}
result.append(ch);
break;
case '\u00a9':
result.append(HTML_COPY);
break;
case '\u00ae':
result.append(HTML_REG);
break;
case '\u20ac':
result.append(HTML_EURO);
break;
case '\u2122':
result.append(HTML_TRADEMARK);
break;
default:
result.append(ch);
break;
}
}
return result.toString();
}
/**
* Returns the plain text from HTML text.
* <p>
* Note: All HTML tags will be stripped.
*
* @param html
* the HTML text.
* @return the plain text representation of the given HTML text
*/
public static String getPlainText(String html) {
if (html == null) {
return ""; //$NON-NLS-1$
}
final Pattern p_plaintext_filter = Pattern.compile(
"<[^>]*?>", Pattern.CASE_INSENSITIVE | Pattern.DOTALL); //$NON-NLS-1$
final Pattern p_plaintext_filter2 = Pattern.compile(
"&.{1,5}[^;];", Pattern.CASE_INSENSITIVE | Pattern.DOTALL); //$NON-NLS-1$
final Pattern p_plaintext_filter3 = Pattern.compile(
"\\s+", Pattern.CASE_INSENSITIVE | Pattern.DOTALL); //$NON-NLS-1$
String result = html.replaceAll(p_plaintext_filter.pattern(), " ") //$NON-NLS-1$
.replaceAll(p_plaintext_filter2.pattern(), " ") //$NON-NLS-1$
.replaceAll(p_plaintext_filter3.pattern(), " "); //$NON-NLS-1$
return result;
}
/**
* Converts a string into a valid file name.
*
* @param str
* a string
* @return a valid file name derived from the given string
*/
public static String makeValidFileName(String str) {
if (str == null) {
return ""; //$NON-NLS-1$
}
return getPlainText(str)
.replaceAll(REGEXP_INVALID_FILENAME_CHARS, " ").trim(); //$NON-NLS-1$
}
public static String removeSpecialCharacters(String str) {
if (str == null) {
return ""; //$NON-NLS-1$
}
return getPlainText(str)
.replaceAll(REGEXP_ANY_SPECIAL_CHARS, " ").trim(); //$NON-NLS-1$
}
/**
* Returns true if the path does not contain any invalid filename
* characters.
*
* @param path
* the file path
* @return <code>true</code> if the given path contains only valid
* filename characters
*/
public static boolean isValidPublishPath(String path) {
// return path.replaceAll(invalidPublishPathCharsRegExp,
// "").equals(path);
if (Platform.getOS().equals(Platform.WS_WIN32)) {
return path
.replaceAll(REGEXP_INVALID_PUBLISHED_PATH_CHARS, "").equals(path); //$NON-NLS-1$
}
// else default to Linux
return path
.replaceAll(REGEXP_INVALID_PUBLISHED_PATH_CHARS_LINUX, "").equals(path); //$NON-NLS-1$
}
/**
* Returns the lower-case of str unless the current locale is German
*
* @param str
* @return
*/
public static String toLower(String str) {
if (Platform.getNL().startsWith("de")) { //$NON-NLS-1$
return str;
}
return str.toLowerCase();
}
/**
* escape the " and '
*
* @param str
* @return
*/
public static String escape(String str) {
return str.replaceAll("'", "\\\\\'").replaceAll("\"", "\\\\\""); //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$
}
/**
* Converts a <code>List</code> to an String array.
*
* @param list
* a <code>List</code>
* @return a String array
*/
public static String[] convertListToStrArray(List list) {
if (list != null) {
int cnt = list.size();
String[] strArray = new String[cnt];
for (int i = 0; i < cnt; i++) {
String str = (String) list.get(i);
strArray[i] = new String(str);
}
return strArray;
} else {
return null;
}
}
public static boolean isValidName(String name) {
return name.replaceAll(REGEXP_ANY_SPECIAL_CHARS, "").equals(name); //$NON-NLS-1$
}
/**
* added some test code
*
* @param args
*/
public static void main(String[] args) {
String text = "A&% ` $~!@#$%^&*()_-+={}[\"]:|\\:;\"'<,>.?/ \t\r\nZ"; //$NON-NLS-1$
System.out.println("[" + text + "] <== text"); //$NON-NLS-1$ //$NON-NLS-2$
System.out.println("[" + text.replaceAll(REGEXP_ANY_SPECIAL_CHARS, " ") //$NON-NLS-1$ //$NON-NLS-2$
+ "] <== All"); //$NON-NLS-1$
System.out.println("[" //$NON-NLS-1$
+ text.replaceAll(REGEXP_INVALID_FILENAME_CHARS, " ") //$NON-NLS-1$
+ "] <== File Name"); //$NON-NLS-1$
System.out.println("[" //$NON-NLS-1$
+ text.replaceAll(REGEXP_INVALID_PUBLISHED_PATH_CHARS, " ") //$NON-NLS-1$
+ "] <== path"); //$NON-NLS-1$
System.out.println("[" //$NON-NLS-1$
+ text.replaceAll(REGEXP_INVALID_PUBLISHED_PATH_CHARS_LINUX,
" ") + "] <== path, Linux"); //$NON-NLS-1$ //$NON-NLS-2$
System.out.println(""); //$NON-NLS-1$
}
/**
* Converts the platform line-separator (\n or \n\r or \r) to <br/>
*
* @param text
* @return
*/
public static String convertNewlinesToHTML(String text) {
if (text != null) {
text = text.replaceAll(LINE_FEED_REGEX, HTML_BREAK + LINE_FEED);
text = text.replaceAll(ESCAPED_LINE_FEED_REGEX, HTML_BREAK
+ ESCAPED_LINE_FEED);
}
return text;
}
public static URI toURI(String pathStr) {
if (pathStr != null && pathStr.length() > 0) {
IPath path = Path.fromOSString(pathStr);
try {
return path.toFile().toURI();
} catch (Exception e) {
CommonPlugin.getDefault().getLogger().logError(e);
}
}
return null;
}
public static String escapeChar(String text, char c) {
int i=text.indexOf(c);
if ( i < 0 ) {
return text;
}
int start = 0;
StringBuffer buffer = new StringBuffer();
while ( i > start ) {
buffer.append(text.substring(start, i)).append("\\"); //$NON-NLS-1$
start = i;
i=text.indexOf(c, start+1);
}
buffer.append(text.substring(start));
return buffer.toString();
}
public static String getHexStr(String str) {
if (str.length() < 3) {
return null;
}
if (str.charAt(0) != '%') {
return null;
}
StringBuffer b = new StringBuffer();
b.append('%');
for (int i = 1 ; i <= 2; i++) {
char c = str.charAt(i);
if (c >= 'a' && c <= 'z') {
c -= 'a';
c += 'A';
}
boolean valid = (c >= '0' && c <= '9') ||
(c >= 'A' && c <= 'F');
if (!valid) {
return null;
}
b.append(c);
}
return b.toString();
}
public interface StrUtilOptions {
int getRteUrlDecodingOption();
Map<String, String> getRteUrlDecodingHexMap();
}
public static StrUtilOptions getOptions() {
return options;
}
public static void setOptions(StrUtilOptions options) {
StrUtil.options = options;
}
}