/*
* Copyright 2001-2013 Geert Bevin (gbevin[remove] at uwyn dot com)
* Licensed under the Apache License, Version 2.0 (the "License")
*/
package com.uwyn.rife.tools;
import com.uwyn.rife.config.RifeConfig;
import com.uwyn.rife.datastructures.DocumentPosition;
import com.uwyn.rife.datastructures.EnumClass;
import java.io.UnsupportedEncodingException;
import java.lang.reflect.Array;
import java.net.URLEncoder;
import java.nio.CharBuffer;
import java.nio.charset.CharacterCodingException;
import java.nio.charset.Charset;
import java.nio.charset.CharsetEncoder;
import java.text.BreakIterator;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* General purpose class containing common <code>String</code> manipulation
* methods.
*
* @author Geert Bevin (gbevin[remove] at uwyn dot com)
* @since 1.0
*/
public abstract class StringUtils
{
public static final BbcodeOption SHORTEN_URL = new BbcodeOption("SHORTEN_URL");
public static final BbcodeOption SANITIZE_URL = new BbcodeOption("SANITIZE_URL");
public static final BbcodeOption CONVERT_BARE_URLS = new BbcodeOption("CONVERT_BARE_URLS");
public static final BbcodeOption NO_FOLLOW_LINKS = new BbcodeOption("NO_FOLLOW_LINKS");
public static final Pattern BBCODE_COLOR = Pattern.compile("\\[color\\s*=\\s*([#\\w]*)\\s*\\]", Pattern.CASE_INSENSITIVE);
public static final Pattern BBCODE_SIZE = Pattern.compile("\\[size\\s*=\\s*([+\\-]?[0-9]*)\\s*\\]", Pattern.CASE_INSENSITIVE);
public static final Pattern BBCODE_URL_SHORT = Pattern.compile("\\[url\\]\\s*([^\\s]*)\\s*\\[\\/url\\]", Pattern.CASE_INSENSITIVE);
public static final Pattern BBCODE_URL_LONG = Pattern.compile("\\[url=([^\\[]*)\\]([^\\[]*)\\[/url\\]", Pattern.CASE_INSENSITIVE);
public static final Pattern BBCODE_IMG = Pattern.compile("\\[img\\]\\s*([^\\s]*)\\s*\\[\\/img\\]", Pattern.CASE_INSENSITIVE);
public static final Pattern BBCODE_QUOTE_LONG = Pattern.compile("\\[quote=([^\\]]+\\]*)\\]", Pattern.CASE_INSENSITIVE | Pattern.MULTILINE);
public static final Pattern BBCODE_BAREURL = Pattern.compile("(?:[^\"'=>\\]]|^)((?:http|ftp)s?://(?:%[\\p{Digit}A-Fa-f][\\p{Digit}A-Fa-f]|[\\-_\\.!~*';\\|/?:@#&=\\+$,\\p{Alnum}])+)", Pattern.CASE_INSENSITIVE | Pattern.MULTILINE);
private static final Map<Character, String> AGGRESSIVE_HTML_ENCODE_MAP = new HashMap<>();
private static final Map<Character, String> DEFENSIVE_HTML_ENCODE_MAP = new HashMap<>();
private static final Map<Character, String> XML_ENCODE_MAP = new HashMap<>();
private static final Map<Character, String> STRING_ENCODE_MAP = new HashMap<>();
private static final Map<Character, String> SQL_ENCODE_MAP = new HashMap<>();
private static final Map<Character, String> LATEX_ENCODE_MAP = new HashMap<>();
private static final Map<String, Character> HTML_DECODE_MAP = new HashMap<>();
private static final HtmlEncoderFallbackHandler HTML_ENCODER_FALLBACK = new HtmlEncoderFallbackHandler();
static
{
// Html encoding mapping according to the HTML 4.0 spec
// http://www.w3.org/TR/REC-html40/sgml/entities.html
// Special characters for HTML
AGGRESSIVE_HTML_ENCODE_MAP.put('\u0026', "&");
AGGRESSIVE_HTML_ENCODE_MAP.put('\u003C', "<");
AGGRESSIVE_HTML_ENCODE_MAP.put('\u003E', ">");
AGGRESSIVE_HTML_ENCODE_MAP.put('\u0022', """);
DEFENSIVE_HTML_ENCODE_MAP.put('\u0152', "Œ");
DEFENSIVE_HTML_ENCODE_MAP.put('\u0153', "œ");
DEFENSIVE_HTML_ENCODE_MAP.put('\u0160', "Š");
DEFENSIVE_HTML_ENCODE_MAP.put('\u0161', "š");
DEFENSIVE_HTML_ENCODE_MAP.put('\u0178', "Ÿ");
DEFENSIVE_HTML_ENCODE_MAP.put('\u02C6', "ˆ");
DEFENSIVE_HTML_ENCODE_MAP.put('\u02DC', "˜");
DEFENSIVE_HTML_ENCODE_MAP.put('\u2002', " ");
DEFENSIVE_HTML_ENCODE_MAP.put('\u2003', " ");
DEFENSIVE_HTML_ENCODE_MAP.put('\u2009', " ");
DEFENSIVE_HTML_ENCODE_MAP.put('\u200C', "");
DEFENSIVE_HTML_ENCODE_MAP.put('\u200D', "");
DEFENSIVE_HTML_ENCODE_MAP.put('\u200E', "");
DEFENSIVE_HTML_ENCODE_MAP.put('\u200F', "");
DEFENSIVE_HTML_ENCODE_MAP.put('\u2013', "–");
DEFENSIVE_HTML_ENCODE_MAP.put('\u2014', "—");
DEFENSIVE_HTML_ENCODE_MAP.put('\u2018', "‘");
DEFENSIVE_HTML_ENCODE_MAP.put('\u2019', "’");
DEFENSIVE_HTML_ENCODE_MAP.put('\u201A', "‚");
DEFENSIVE_HTML_ENCODE_MAP.put('\u201C', "“");
DEFENSIVE_HTML_ENCODE_MAP.put('\u201D', "”");
DEFENSIVE_HTML_ENCODE_MAP.put('\u201E', "„");
DEFENSIVE_HTML_ENCODE_MAP.put('\u2020', "†");
DEFENSIVE_HTML_ENCODE_MAP.put('\u2021', "‡");
DEFENSIVE_HTML_ENCODE_MAP.put('\u2030', "‰");
DEFENSIVE_HTML_ENCODE_MAP.put('\u2039', "‹");
DEFENSIVE_HTML_ENCODE_MAP.put('\u203A', "›");
DEFENSIVE_HTML_ENCODE_MAP.put('\u20AC', "€");
// Character entity references for ISO 8859-1 characters
DEFENSIVE_HTML_ENCODE_MAP.put('\u00A0', " ");
DEFENSIVE_HTML_ENCODE_MAP.put('\u00A1', "¡");
DEFENSIVE_HTML_ENCODE_MAP.put('\u00A2', "¢");
DEFENSIVE_HTML_ENCODE_MAP.put('\u00A3', "£");
DEFENSIVE_HTML_ENCODE_MAP.put('\u00A4', "¤");
DEFENSIVE_HTML_ENCODE_MAP.put('\u00A5', "¥");
DEFENSIVE_HTML_ENCODE_MAP.put('\u00A6', "¦");
DEFENSIVE_HTML_ENCODE_MAP.put('\u00A7', "§");
DEFENSIVE_HTML_ENCODE_MAP.put('\u00A8', "¨");
DEFENSIVE_HTML_ENCODE_MAP.put('\u00A9', "©");
DEFENSIVE_HTML_ENCODE_MAP.put('\u00AA', "ª");
DEFENSIVE_HTML_ENCODE_MAP.put('\u00AB', "«");
DEFENSIVE_HTML_ENCODE_MAP.put('\u00AC', "¬");
DEFENSIVE_HTML_ENCODE_MAP.put('\u00AD', "");
DEFENSIVE_HTML_ENCODE_MAP.put('\u00AE', "®");
DEFENSIVE_HTML_ENCODE_MAP.put('\u00AF', "¯");
DEFENSIVE_HTML_ENCODE_MAP.put('\u00B0', "°");
DEFENSIVE_HTML_ENCODE_MAP.put('\u00B1', "±");
DEFENSIVE_HTML_ENCODE_MAP.put('\u00B2', "²");
DEFENSIVE_HTML_ENCODE_MAP.put('\u00B3', "³");
DEFENSIVE_HTML_ENCODE_MAP.put('\u00B4', "´");
DEFENSIVE_HTML_ENCODE_MAP.put('\u00B5', "µ");
DEFENSIVE_HTML_ENCODE_MAP.put('\u00B6', "¶");
DEFENSIVE_HTML_ENCODE_MAP.put('\u00B7', "·");
DEFENSIVE_HTML_ENCODE_MAP.put('\u00B8', "¸");
DEFENSIVE_HTML_ENCODE_MAP.put('\u00B9', "¹");
DEFENSIVE_HTML_ENCODE_MAP.put('\u00BA', "º");
DEFENSIVE_HTML_ENCODE_MAP.put('\u00BB', "»");
DEFENSIVE_HTML_ENCODE_MAP.put('\u00BC', "¼");
DEFENSIVE_HTML_ENCODE_MAP.put('\u00BD', "½");
DEFENSIVE_HTML_ENCODE_MAP.put('\u00BE', "¾");
DEFENSIVE_HTML_ENCODE_MAP.put('\u00BF', "¿");
DEFENSIVE_HTML_ENCODE_MAP.put('\u00C0', "À");
DEFENSIVE_HTML_ENCODE_MAP.put('\u00C1', "Á");
DEFENSIVE_HTML_ENCODE_MAP.put('\u00C2', "Â");
DEFENSIVE_HTML_ENCODE_MAP.put('\u00C3', "Ã");
DEFENSIVE_HTML_ENCODE_MAP.put('\u00C4', "Ä");
DEFENSIVE_HTML_ENCODE_MAP.put('\u00C5', "Å");
DEFENSIVE_HTML_ENCODE_MAP.put('\u00C6', "Æ");
DEFENSIVE_HTML_ENCODE_MAP.put('\u00C7', "Ç");
DEFENSIVE_HTML_ENCODE_MAP.put('\u00C8', "È");
DEFENSIVE_HTML_ENCODE_MAP.put('\u00C9', "É");
DEFENSIVE_HTML_ENCODE_MAP.put('\u00CA', "Ê");
DEFENSIVE_HTML_ENCODE_MAP.put('\u00CB', "Ë");
DEFENSIVE_HTML_ENCODE_MAP.put('\u00CC', "Ì");
DEFENSIVE_HTML_ENCODE_MAP.put('\u00CD', "Í");
DEFENSIVE_HTML_ENCODE_MAP.put('\u00CE', "Î");
DEFENSIVE_HTML_ENCODE_MAP.put('\u00CF', "Ï");
DEFENSIVE_HTML_ENCODE_MAP.put('\u00D0', "Ð");
DEFENSIVE_HTML_ENCODE_MAP.put('\u00D1', "Ñ");
DEFENSIVE_HTML_ENCODE_MAP.put('\u00D2', "Ò");
DEFENSIVE_HTML_ENCODE_MAP.put('\u00D3', "Ó");
DEFENSIVE_HTML_ENCODE_MAP.put('\u00D4', "Ô");
DEFENSIVE_HTML_ENCODE_MAP.put('\u00D5', "Õ");
DEFENSIVE_HTML_ENCODE_MAP.put('\u00D6', "Ö");
DEFENSIVE_HTML_ENCODE_MAP.put('\u00D7', "×");
DEFENSIVE_HTML_ENCODE_MAP.put('\u00D8', "Ø");
DEFENSIVE_HTML_ENCODE_MAP.put('\u00D9', "Ù");
DEFENSIVE_HTML_ENCODE_MAP.put('\u00DA', "Ú");
DEFENSIVE_HTML_ENCODE_MAP.put('\u00DB', "Û");
DEFENSIVE_HTML_ENCODE_MAP.put('\u00DC', "Ü");
DEFENSIVE_HTML_ENCODE_MAP.put('\u00DD', "Ý");
DEFENSIVE_HTML_ENCODE_MAP.put('\u00DE', "Þ");
DEFENSIVE_HTML_ENCODE_MAP.put('\u00DF', "ß");
DEFENSIVE_HTML_ENCODE_MAP.put('\u00E0', "à");
DEFENSIVE_HTML_ENCODE_MAP.put('\u00E1', "á");
DEFENSIVE_HTML_ENCODE_MAP.put('\u00E2', "â");
DEFENSIVE_HTML_ENCODE_MAP.put('\u00E3', "ã");
DEFENSIVE_HTML_ENCODE_MAP.put('\u00E4', "ä");
DEFENSIVE_HTML_ENCODE_MAP.put('\u00E5', "å");
DEFENSIVE_HTML_ENCODE_MAP.put('\u00E6', "æ");
DEFENSIVE_HTML_ENCODE_MAP.put('\u00E7', "ç");
DEFENSIVE_HTML_ENCODE_MAP.put('\u00E8', "è");
DEFENSIVE_HTML_ENCODE_MAP.put('\u00E9', "é");
DEFENSIVE_HTML_ENCODE_MAP.put('\u00EA', "ê");
DEFENSIVE_HTML_ENCODE_MAP.put('\u00EB', "ë");
DEFENSIVE_HTML_ENCODE_MAP.put('\u00EC', "ì");
DEFENSIVE_HTML_ENCODE_MAP.put('\u00ED', "í");
DEFENSIVE_HTML_ENCODE_MAP.put('\u00EE', "î");
DEFENSIVE_HTML_ENCODE_MAP.put('\u00EF', "ï");
DEFENSIVE_HTML_ENCODE_MAP.put('\u00F0', "ð");
DEFENSIVE_HTML_ENCODE_MAP.put('\u00F1', "ñ");
DEFENSIVE_HTML_ENCODE_MAP.put('\u00F2', "ò");
DEFENSIVE_HTML_ENCODE_MAP.put('\u00F3', "ó");
DEFENSIVE_HTML_ENCODE_MAP.put('\u00F4', "ô");
DEFENSIVE_HTML_ENCODE_MAP.put('\u00F5', "õ");
DEFENSIVE_HTML_ENCODE_MAP.put('\u00F6', "ö");
DEFENSIVE_HTML_ENCODE_MAP.put('\u00F7', "÷");
DEFENSIVE_HTML_ENCODE_MAP.put('\u00F8', "ø");
DEFENSIVE_HTML_ENCODE_MAP.put('\u00F9', "ù");
DEFENSIVE_HTML_ENCODE_MAP.put('\u00FA', "ú");
DEFENSIVE_HTML_ENCODE_MAP.put('\u00FB', "û");
DEFENSIVE_HTML_ENCODE_MAP.put('\u00FC', "ü");
DEFENSIVE_HTML_ENCODE_MAP.put('\u00FD', "ý");
DEFENSIVE_HTML_ENCODE_MAP.put('\u00FE', "þ");
DEFENSIVE_HTML_ENCODE_MAP.put('\u00FF', "ÿ");
// Mathematical, Greek and Symbolic characters for HTML
DEFENSIVE_HTML_ENCODE_MAP.put('\u0192', "ƒ");
DEFENSIVE_HTML_ENCODE_MAP.put('\u0391', "Α");
DEFENSIVE_HTML_ENCODE_MAP.put('\u0392', "Β");
DEFENSIVE_HTML_ENCODE_MAP.put('\u0393', "Γ");
DEFENSIVE_HTML_ENCODE_MAP.put('\u0394', "Δ");
DEFENSIVE_HTML_ENCODE_MAP.put('\u0395', "Ε");
DEFENSIVE_HTML_ENCODE_MAP.put('\u0396', "Ζ");
DEFENSIVE_HTML_ENCODE_MAP.put('\u0397', "Η");
DEFENSIVE_HTML_ENCODE_MAP.put('\u0398', "Θ");
DEFENSIVE_HTML_ENCODE_MAP.put('\u0399', "Ι");
DEFENSIVE_HTML_ENCODE_MAP.put('\u039A', "Κ");
DEFENSIVE_HTML_ENCODE_MAP.put('\u039B', "Λ");
DEFENSIVE_HTML_ENCODE_MAP.put('\u039C', "Μ");
DEFENSIVE_HTML_ENCODE_MAP.put('\u039D', "Ν");
DEFENSIVE_HTML_ENCODE_MAP.put('\u039E', "Ξ");
DEFENSIVE_HTML_ENCODE_MAP.put('\u039F', "Ο");
DEFENSIVE_HTML_ENCODE_MAP.put('\u03A0', "Π");
DEFENSIVE_HTML_ENCODE_MAP.put('\u03A1', "Ρ");
DEFENSIVE_HTML_ENCODE_MAP.put('\u03A3', "Σ");
DEFENSIVE_HTML_ENCODE_MAP.put('\u03A4', "Τ");
DEFENSIVE_HTML_ENCODE_MAP.put('\u03A5', "Υ");
DEFENSIVE_HTML_ENCODE_MAP.put('\u03A6', "Φ");
DEFENSIVE_HTML_ENCODE_MAP.put('\u03A7', "Χ");
DEFENSIVE_HTML_ENCODE_MAP.put('\u03A8', "Ψ");
DEFENSIVE_HTML_ENCODE_MAP.put('\u03A9', "Ω");
DEFENSIVE_HTML_ENCODE_MAP.put('\u03B1', "α");
DEFENSIVE_HTML_ENCODE_MAP.put('\u03B2', "β");
DEFENSIVE_HTML_ENCODE_MAP.put('\u03B3', "γ");
DEFENSIVE_HTML_ENCODE_MAP.put('\u03B4', "δ");
DEFENSIVE_HTML_ENCODE_MAP.put('\u03B5', "ε");
DEFENSIVE_HTML_ENCODE_MAP.put('\u03B6', "ζ");
DEFENSIVE_HTML_ENCODE_MAP.put('\u03B7', "η");
DEFENSIVE_HTML_ENCODE_MAP.put('\u03B8', "θ");
DEFENSIVE_HTML_ENCODE_MAP.put('\u03B9', "ι");
DEFENSIVE_HTML_ENCODE_MAP.put('\u03BA', "κ");
DEFENSIVE_HTML_ENCODE_MAP.put('\u03BB', "λ");
DEFENSIVE_HTML_ENCODE_MAP.put('\u03BC', "μ");
DEFENSIVE_HTML_ENCODE_MAP.put('\u03BD', "ν");
DEFENSIVE_HTML_ENCODE_MAP.put('\u03BE', "ξ");
DEFENSIVE_HTML_ENCODE_MAP.put('\u03BF', "ο");
DEFENSIVE_HTML_ENCODE_MAP.put('\u03C0', "π");
DEFENSIVE_HTML_ENCODE_MAP.put('\u03C1', "ρ");
DEFENSIVE_HTML_ENCODE_MAP.put('\u03C2', "ς");
DEFENSIVE_HTML_ENCODE_MAP.put('\u03C3', "σ");
DEFENSIVE_HTML_ENCODE_MAP.put('\u03C4', "τ");
DEFENSIVE_HTML_ENCODE_MAP.put('\u03C5', "υ");
DEFENSIVE_HTML_ENCODE_MAP.put('\u03C6', "φ");
DEFENSIVE_HTML_ENCODE_MAP.put('\u03C7', "χ");
DEFENSIVE_HTML_ENCODE_MAP.put('\u03C8', "ψ");
DEFENSIVE_HTML_ENCODE_MAP.put('\u03C9', "ω");
DEFENSIVE_HTML_ENCODE_MAP.put('\u03D1', "ϑ");
DEFENSIVE_HTML_ENCODE_MAP.put('\u03D2', "ϒ");
DEFENSIVE_HTML_ENCODE_MAP.put('\u03D6', "ϖ");
DEFENSIVE_HTML_ENCODE_MAP.put('\u2022', "•");
DEFENSIVE_HTML_ENCODE_MAP.put('\u2026', "…");
DEFENSIVE_HTML_ENCODE_MAP.put('\u2032', "′");
DEFENSIVE_HTML_ENCODE_MAP.put('\u2033', "″");
DEFENSIVE_HTML_ENCODE_MAP.put('\u203E', "‾");
DEFENSIVE_HTML_ENCODE_MAP.put('\u2044', "⁄");
DEFENSIVE_HTML_ENCODE_MAP.put('\u2118', "℘");
DEFENSIVE_HTML_ENCODE_MAP.put('\u2111', "ℑ");
DEFENSIVE_HTML_ENCODE_MAP.put('\u211C', "ℜ");
DEFENSIVE_HTML_ENCODE_MAP.put('\u2122', "™");
DEFENSIVE_HTML_ENCODE_MAP.put('\u2135', "ℵ");
DEFENSIVE_HTML_ENCODE_MAP.put('\u2190', "←");
DEFENSIVE_HTML_ENCODE_MAP.put('\u2191', "↑");
DEFENSIVE_HTML_ENCODE_MAP.put('\u2192', "→");
DEFENSIVE_HTML_ENCODE_MAP.put('\u2193', "↓");
DEFENSIVE_HTML_ENCODE_MAP.put('\u2194', "↔");
DEFENSIVE_HTML_ENCODE_MAP.put('\u21B5', "↵");
DEFENSIVE_HTML_ENCODE_MAP.put('\u21D0', "⇐");
DEFENSIVE_HTML_ENCODE_MAP.put('\u21D1', "⇑");
DEFENSIVE_HTML_ENCODE_MAP.put('\u21D2', "⇒");
DEFENSIVE_HTML_ENCODE_MAP.put('\u21D3', "⇓");
DEFENSIVE_HTML_ENCODE_MAP.put('\u21D4', "⇔");
DEFENSIVE_HTML_ENCODE_MAP.put('\u2200', "∀");
DEFENSIVE_HTML_ENCODE_MAP.put('\u2202', "∂");
DEFENSIVE_HTML_ENCODE_MAP.put('\u2203', "∃");
DEFENSIVE_HTML_ENCODE_MAP.put('\u2205', "∅");
DEFENSIVE_HTML_ENCODE_MAP.put('\u2207', "∇");
DEFENSIVE_HTML_ENCODE_MAP.put('\u2208', "∈");
DEFENSIVE_HTML_ENCODE_MAP.put('\u2209', "∉");
DEFENSIVE_HTML_ENCODE_MAP.put('\u220B', "∋");
DEFENSIVE_HTML_ENCODE_MAP.put('\u220F', "∏");
DEFENSIVE_HTML_ENCODE_MAP.put('\u2211', "∑");
DEFENSIVE_HTML_ENCODE_MAP.put('\u2212', "−");
DEFENSIVE_HTML_ENCODE_MAP.put('\u2217', "∗");
DEFENSIVE_HTML_ENCODE_MAP.put('\u221A', "√");
DEFENSIVE_HTML_ENCODE_MAP.put('\u221D', "∝");
DEFENSIVE_HTML_ENCODE_MAP.put('\u221E', "∞");
DEFENSIVE_HTML_ENCODE_MAP.put('\u2220', "∠");
DEFENSIVE_HTML_ENCODE_MAP.put('\u2227', "∧");
DEFENSIVE_HTML_ENCODE_MAP.put('\u2228', "∨");
DEFENSIVE_HTML_ENCODE_MAP.put('\u2229', "∩");
DEFENSIVE_HTML_ENCODE_MAP.put('\u222A', "∪");
DEFENSIVE_HTML_ENCODE_MAP.put('\u222B', "∫");
DEFENSIVE_HTML_ENCODE_MAP.put('\u2234', "∴");
DEFENSIVE_HTML_ENCODE_MAP.put('\u223C', "∼");
DEFENSIVE_HTML_ENCODE_MAP.put('\u2245', "≅");
DEFENSIVE_HTML_ENCODE_MAP.put('\u2248', "≈");
DEFENSIVE_HTML_ENCODE_MAP.put('\u2260', "≠");
DEFENSIVE_HTML_ENCODE_MAP.put('\u2261', "≡");
DEFENSIVE_HTML_ENCODE_MAP.put('\u2264', "≤");
DEFENSIVE_HTML_ENCODE_MAP.put('\u2265', "≥");
DEFENSIVE_HTML_ENCODE_MAP.put('\u2282', "⊂");
DEFENSIVE_HTML_ENCODE_MAP.put('\u2283', "⊃");
DEFENSIVE_HTML_ENCODE_MAP.put('\u2284', "⊄");
DEFENSIVE_HTML_ENCODE_MAP.put('\u2286', "⊆");
DEFENSIVE_HTML_ENCODE_MAP.put('\u2287', "⊇");
DEFENSIVE_HTML_ENCODE_MAP.put('\u2295', "⊕");
DEFENSIVE_HTML_ENCODE_MAP.put('\u2297', "⊗");
DEFENSIVE_HTML_ENCODE_MAP.put('\u22A5', "⊥");
DEFENSIVE_HTML_ENCODE_MAP.put('\u22C5', "⋅");
DEFENSIVE_HTML_ENCODE_MAP.put('\u2308', "⌈");
DEFENSIVE_HTML_ENCODE_MAP.put('\u2309', "⌉");
DEFENSIVE_HTML_ENCODE_MAP.put('\u230A', "⌊");
DEFENSIVE_HTML_ENCODE_MAP.put('\u230B', "⌋");
DEFENSIVE_HTML_ENCODE_MAP.put('\u2329', "〈");
DEFENSIVE_HTML_ENCODE_MAP.put('\u232A', "〉");
DEFENSIVE_HTML_ENCODE_MAP.put('\u25CA', "◊");
DEFENSIVE_HTML_ENCODE_MAP.put('\u2660', "♠");
DEFENSIVE_HTML_ENCODE_MAP.put('\u2663', "♣");
DEFENSIVE_HTML_ENCODE_MAP.put('\u2665', "♥");
DEFENSIVE_HTML_ENCODE_MAP.put('\u2666', "♦");
Set<Map.Entry<Character, String>> aggresive_entries = AGGRESSIVE_HTML_ENCODE_MAP.entrySet();
for (Map.Entry<Character, String> entry : aggresive_entries)
{
HTML_DECODE_MAP.put(entry.getValue(), entry.getKey());
}
Set<Map.Entry<Character, String>> defensive_entries = DEFENSIVE_HTML_ENCODE_MAP.entrySet();
for (Map.Entry<Character, String> entry : defensive_entries)
{
HTML_DECODE_MAP.put(entry.getValue(), entry.getKey());
}
XML_ENCODE_MAP.put('\u0026', "&");
XML_ENCODE_MAP.put('\'', "'");
XML_ENCODE_MAP.put('\u0022', """);
XML_ENCODE_MAP.put('\u003C', "<");
XML_ENCODE_MAP.put('\u003E', ">");
SQL_ENCODE_MAP.put('\'', "''");
STRING_ENCODE_MAP.put('\\', "\\\\");
STRING_ENCODE_MAP.put('\n', "\\n");
STRING_ENCODE_MAP.put('\r', "\\r");
STRING_ENCODE_MAP.put('\t', "\\t");
STRING_ENCODE_MAP.put('"', "\\\"");
LATEX_ENCODE_MAP.put('\\', "\\\\");
LATEX_ENCODE_MAP.put('#', "\\#");
LATEX_ENCODE_MAP.put('$', "\\$");
LATEX_ENCODE_MAP.put('%', "\\%");
LATEX_ENCODE_MAP.put('&', "\\&");
LATEX_ENCODE_MAP.put('~', "\\~");
LATEX_ENCODE_MAP.put('_', "\\_");
LATEX_ENCODE_MAP.put('^', "\\^");
LATEX_ENCODE_MAP.put('{', "\\{");
LATEX_ENCODE_MAP.put('}', "\\}");
LATEX_ENCODE_MAP.put('\u00A1', "!'");
LATEX_ENCODE_MAP.put('\u00BF', "?'");
LATEX_ENCODE_MAP.put('\u00C0', "\\`{A}");
LATEX_ENCODE_MAP.put('\u00C1', "\\'{A}");
LATEX_ENCODE_MAP.put('\u00C2', "\\^{A}");
LATEX_ENCODE_MAP.put('\u00C3', "\\H{A}");
LATEX_ENCODE_MAP.put('\u00C4', "\\\"{A}");
LATEX_ENCODE_MAP.put('\u00C5', "\\AA");
LATEX_ENCODE_MAP.put('\u00C6', "\\AE");
LATEX_ENCODE_MAP.put('\u00C7', "\\c{C}");
LATEX_ENCODE_MAP.put('\u00C8', "\\`{E}");
LATEX_ENCODE_MAP.put('\u00C9', "\\'{E}");
LATEX_ENCODE_MAP.put('\u00CA', "\\^{E}");
LATEX_ENCODE_MAP.put('\u00CB', "\\\"{E}");
LATEX_ENCODE_MAP.put('\u00CC', "\\`{I}");
LATEX_ENCODE_MAP.put('\u00CD', "\\'{I}");
LATEX_ENCODE_MAP.put('\u00CE', "\\^{I}");
LATEX_ENCODE_MAP.put('\u00CF', "\\\"{I}");
// todo \u00D0
LATEX_ENCODE_MAP.put('\u00D1', "\\H{N}");
LATEX_ENCODE_MAP.put('\u00D2', "\\`{O}");
LATEX_ENCODE_MAP.put('\u00D3', "\\'{O}");
LATEX_ENCODE_MAP.put('\u00D4', "\\^{O}");
LATEX_ENCODE_MAP.put('\u00D5', "\\H{O}");
LATEX_ENCODE_MAP.put('\u00D6', "\\\"{O}");
// todo \u00D7
LATEX_ENCODE_MAP.put('\u00D8', "\\O");
LATEX_ENCODE_MAP.put('\u00D9', "\\`{U}");
LATEX_ENCODE_MAP.put('\u00DA', "\\'{U}");
LATEX_ENCODE_MAP.put('\u00DB', "\\^{U}");
LATEX_ENCODE_MAP.put('\u00DC', "\\\"{U}");
LATEX_ENCODE_MAP.put('\u00DD', "\\'{Y}");
// todo \u00DE
LATEX_ENCODE_MAP.put('\u00DF', "\\ss");
LATEX_ENCODE_MAP.put('\u00E0', "\\`{a}");
LATEX_ENCODE_MAP.put('\u00E1', "\\'{a}");
LATEX_ENCODE_MAP.put('\u00E2', "\\^{a}");
LATEX_ENCODE_MAP.put('\u00E3', "\\H{a}");
LATEX_ENCODE_MAP.put('\u00E4', "\\\"{a}");
LATEX_ENCODE_MAP.put('\u00E5', "\\aa");
LATEX_ENCODE_MAP.put('\u00E6', "\\ae");
LATEX_ENCODE_MAP.put('\u00E7', "\\c{c}");
LATEX_ENCODE_MAP.put('\u00E8', "\\`{e}");
LATEX_ENCODE_MAP.put('\u00E9', "\\'{e}");
LATEX_ENCODE_MAP.put('\u00EA', "\\^{e}");
LATEX_ENCODE_MAP.put('\u00EB', "\\\"{e}");
LATEX_ENCODE_MAP.put('\u00EC', "\\`{i}");
LATEX_ENCODE_MAP.put('\u00ED', "\\'{i}");
LATEX_ENCODE_MAP.put('\u00EE', "\\^{i}");
LATEX_ENCODE_MAP.put('\u00EF', "\\\"{i}");
// todo \u00F0
LATEX_ENCODE_MAP.put('\u00F1', "\\H{n}");
LATEX_ENCODE_MAP.put('\u00F2', "\\`{o}");
LATEX_ENCODE_MAP.put('\u00F3', "\\'{o}");
LATEX_ENCODE_MAP.put('\u00F4', "\\^{o}");
LATEX_ENCODE_MAP.put('\u00F5', "\\H{o}");
LATEX_ENCODE_MAP.put('\u00F6', "\\\"{o}");
// todo \u00F7
LATEX_ENCODE_MAP.put('\u00F8', "\\o");
LATEX_ENCODE_MAP.put('\u00F9', "\\`{u}");
LATEX_ENCODE_MAP.put('\u00FA', "\\'{u}");
LATEX_ENCODE_MAP.put('\u00FB', "\\^{u}");
LATEX_ENCODE_MAP.put('\u00FC', "\\\"{u}");
LATEX_ENCODE_MAP.put('\u00FD', "\\'{y}");
// todo \u00FE
LATEX_ENCODE_MAP.put('\u00FF', "\\\"{y}");
}
public static String ENCODING_US_ASCII = "US-ASCII";
public static String ENCODING_ISO_8859_1 = "ISO-8859-1";
public static String ENCODING_ISO_8859_2 = "ISO-8859-2";
public static String ENCODING_ISO_8859_5 = "ISO-8859-5";
public static String ENCODING_UTF_8 = "UTF-8";
public static String ENCODING_UTF_16BE = "UTF-16BE";
public static String ENCODING_UTF_16LE = "UTF-16LE";
public static String ENCODING_UTF_16 = "UTF-16";
public static Charset CHARSET_US_ASCII = Charset.forName(StringUtils.ENCODING_US_ASCII);
/**
* Transforms a provided <code>String</code> object into a new string,
* containing only valid characters for a java class name.
*
* @param name The string that has to be transformed into a valid class
* name.
* @return The encoded <code>String</code> object.
* @see #encodeUrl(String)
* @see #encodeHtml(String)
* @see #encodeXml(String)
* @see #encodeSql(String)
* @see #encodeLatex(String)
* @see #encodeRegexp(String)
* @since 1.0
*/
public static String encodeClassname(String name)
{
if (null == name)
{
return null;
}
Pattern pattern = Pattern.compile("[^\\w]");
Matcher matcher = pattern.matcher(name);
return matcher.replaceAll("_");
}
private static boolean needsUrlEncoding(String source)
{
if (null == source)
{
return false;
}
// check if the string needs encoding first since
// the URLEncoder always allocates a StringBuffer, even when the
// string is returned as-is
boolean encode = false;
char ch;
for (int i = 0; i < source.length(); i++)
{
ch = source.charAt(i);
if (ch >= 'a' && ch <= 'z' ||
ch >= 'A' && ch <= 'Z' ||
ch >= '0' && ch <= '9' ||
ch == '-' || ch == '_' || ch == '.' || ch == '*')
{
continue;
}
encode = true;
break;
}
return encode;
}
/**
* Transforms a provided <code>String</code> object into a new string,
* containing only valid URL characters.
*
* @param source The string that has to be transformed into a valid URL
* string.
* @return The encoded <code>String</code> object.
* @see #encodeClassname(String)
* @see #encodeUrlValue(String)
* @see #encodeHtml(String)
* @see #encodeXml(String)
* @see #encodeSql(String)
* @see #encodeLatex(String)
* @see #encodeRegexp(String)
* @since 1.0
*/
public static String encodeUrl(String source)
{
if (!needsUrlEncoding(source))
{
return source;
}
try
{
return URLEncoder.encode(source, ENCODING_ISO_8859_1);
}
///CLOVER:OFF
catch (UnsupportedEncodingException e)
{
// this should never happen, ISO-8859-1 is a standard encoding
throw new RuntimeException(e);
}
///CLOVER:ON
}
/**
* Transforms a provided <code>String</code> object into a new string,
* only pure US Ascii strings are preserved and URL encoded in a regular
* way. Strings with characters from other encodings will be encoded in a
* RIFE-specific manner to allow international data to passed along the
* query string.
*
* @param source The string that has to be transformed into a valid URL
* parameter string.
* @return The encoded <code>String</code> object.
* @see #decodeUrlValue(String)
* @see #encodeClassname(String)
* @see #encodeUrl(String)
* @see #encodeHtml(String)
* @see #encodeXml(String)
* @see #encodeSql(String)
* @see #encodeLatex(String)
* @see #encodeRegexp(String)
* @since 1.0
*/
public static String encodeUrlValue(String source)
{
if (!needsUrlEncoding(source))
{
return source;
}
// check if the string is valid US-ASCII encoding
boolean valid = true;
CharsetEncoder encoder = CHARSET_US_ASCII.newEncoder();
try
{
encoder.encode(CharBuffer.wrap(source));
}
catch (CharacterCodingException e)
{
valid = false;
}
try
{
// if it is valid US-ASCII, use the regular URL encoding method
if (valid)
{
return URLEncoder.encode(source, ENCODING_US_ASCII);
}
// otherwise, base-64 encode the UTF-8 bytes and mark the string
// as being encoded in a special way
else
{
StringBuilder encoded = new StringBuilder("%02%02");
String base64 = Base64.encodeToString(source.getBytes(ENCODING_UTF_8), false);
String base64_urlsafe = replace(base64, "=", "%3D");
encoded.append(base64_urlsafe);
return encoded.toString();
}
}
///CLOVER:OFF
catch (UnsupportedEncodingException e)
{
// this should never happen, ISO-8859-1 is a standard encoding
throw new RuntimeException(e);
}
///CLOVER:ON
}
/**
* Decodes a <code>String</code> that has been encoded in a RIFE-specific
* manner for URL usage.. Before calling this method, you should first
* verify if the value needs decoding by using the
* <code>doesUrlValueNeedDecoding(String)</code> method.
*
* @param source the value that has been encoded for URL usage in a
* RIFE-specific way
* @return The decoded <code>String</code> object.
* @see #encodeUrlValue(String)
* @see #doesUrlValueNeedDecoding(String)
* @since 1.0
*/
public static String decodeUrlValue(String source)
{
try
{
byte[] decoded = Base64.decode(source.substring(2));
if (null == decoded)
{
return null;
}
else
{
return new String(decoded, StringUtils.ENCODING_UTF_8);
}
}
///CLOVER:OFF
catch (UnsupportedEncodingException e)
{
// this should never happen, UTF-8 is a standard encoding
throw new RuntimeException(e);
}
///CLOVER:ON
}
/**
* Checks if a <code>String</code> is encoded in a RIFE-specific manner
* for URL usage.
*
* @param source the value that might have been encoded for URL usage in a
* RIFE-specific way
* @return <code>true</code> if the value is encoded in the RIFE-specific
* format; and
* <p><code>false</code> otherwise
* @see #encodeUrlValue(String)
* @see #decodeUrlValue(String)
* @since 1.0
*/
public static boolean doesUrlValueNeedDecoding(String source)
{
return source != null &&
source.length() > 2 &&
source.startsWith("\u0002\u0002");
}
private static boolean needsHtmlEncoding(String source, boolean defensive)
{
if (null == source)
{
return false;
}
boolean encode = false;
char ch;
for (int i = 0; i < source.length(); i++)
{
ch = source.charAt(i);
if ((defensive || (ch != '\u0022' && ch != '\u0026' && ch != '\u003C' && ch != '\u003E')) &&
ch < '\u00A0')
{
continue;
}
encode = true;
break;
}
return encode;
}
/**
* @since 1.6
*/
public static String decodeHtml(String source)
{
if (null == source ||
0 == source.length())
{
return source;
}
int current_index = 0;
int delimiter_start_index;
int delimiter_end_index;
StringBuilder result = null;
while (current_index <= source.length())
{
delimiter_start_index = source.indexOf('&', current_index);
if (delimiter_start_index != -1)
{
delimiter_end_index = source.indexOf(';', delimiter_start_index + 1);
if (delimiter_end_index != -1)
{
// ensure that the string builder is setup correctly
if (null == result)
{
result = new StringBuilder();
}
// add the text that leads up to this match
if (delimiter_start_index > current_index)
{
result.append(source.substring(current_index, delimiter_start_index));
}
// add the decoded entity
String entity = source.substring(delimiter_start_index, delimiter_end_index + 1);
current_index = delimiter_end_index + 1;
// try to decoded numeric entities
if (entity.charAt(1) == '#')
{
int start = 2;
int radix = 10;
// check if the number is hexadecimal
if (entity.charAt(2) == 'X' || entity.charAt(2) == 'x')
{
start++;
radix = 16;
}
try
{
Character c = (char)Integer.parseInt(entity.substring(start, entity.length() - 1), radix);
result.append(c);
}
// when the number of the entity can't be parsed, add the entity as-is
catch (NumberFormatException e)
{
result.append(entity);
}
}
else
{
// try to decode the entity as a literal
Character decoded = HTML_DECODE_MAP.get(entity);
if (decoded != null)
{
result.append(decoded);
}
// if there was no match, add the entity as-is
else
{
result.append(entity);
}
}
}
else
{
break;
}
}
else
{
break;
}
}
if (null == result)
{
return source;
}
else if (current_index < source.length())
{
result.append(source.substring(current_index));
}
return result.toString();
}
/**
* Transforms a provided <code>String</code> object into a new string,
* containing only valid Html characters.
*
* @param source The string that has to be transformed into a valid Html
* string.
* @return The encoded <code>String</code> object.
* @see #encodeClassname(String)
* @see #encodeUrl(String)
* @see #encodeUrlValue(String)
* @see #encodeXml(String)
* @see #encodeSql(String)
* @see #encodeString(String)
* @see #encodeLatex(String)
* @see #encodeRegexp(String)
* @since 1.0
*/
public static String encodeHtml(String source)
{
if (needsHtmlEncoding(source, false))
{
return encode(source, HTML_ENCODER_FALLBACK, AGGRESSIVE_HTML_ENCODE_MAP, DEFENSIVE_HTML_ENCODE_MAP);
}
return source;
}
/**
* Transforms a provided <code>String</code> object into a new string,
* containing as much as possible Html characters. It is safe to already
* feed existing Html to this method since &, < and > will not
* be encoded.
*
* @param source The string that has to be transformed into a valid Html
* string.
* @return The encoded <code>String</code> object.
* @see #encodeClassname(String)
* @see #encodeUrl(String)
* @see #encodeUrlValue(String)
* @see #encodeXml(String)
* @see #encodeSql(String)
* @see #encodeString(String)
* @see #encodeLatex(String)
* @see #encodeRegexp(String)
* @since 1.0
*/
public static String encodeHtmlDefensive(String source)
{
if (needsHtmlEncoding(source, true))
{
return encode(source, null, DEFENSIVE_HTML_ENCODE_MAP);
}
return source;
}
/**
* Transforms a provided <code>String</code> object into a new string,
* containing only valid XML characters.
*
* @param source The string that has to be transformed into a valid XML
* string.
* @return The encoded <code>String</code> object.
* @see #encodeClassname(String)
* @see #encodeUrl(String)
* @see #encodeUrlValue(String)
* @see #encodeHtml(String)
* @see #encodeSql(String)
* @see #encodeString(String)
* @see #encodeLatex(String)
* @see #encodeRegexp(String)
* @since 1.0
*/
public static String encodeXml(String source)
{
return encode(source, null, XML_ENCODE_MAP);
}
/**
* Transforms a provided <code>String</code> object into a new string,
* containing only valid <code>String</code> characters.
*
* @param source The string that has to be transformed into a valid
* sequence of <code>String</code> characters.
* @return The encoded <code>String</code> object.
* @see #encodeClassname(String)
* @see #encodeUrl(String)
* @see #encodeUrlValue(String)
* @see #encodeHtml(String)
* @see #encodeXml(String)
* @see #encodeSql(String)
* @see #encodeLatex(String)
* @see #encodeRegexp(String)
* @since 1.0
*/
public static String encodeString(String source)
{
return encode(source, null, STRING_ENCODE_MAP);
}
/**
* Transforms a provided <code>String</code> object into a series of
* unicode escape codes.
*
* @param source The string that has to be transformed into a valid
* sequence of unicode escape codes
* @return The encoded <code>String</code> object.
* @see #encodeClassname(String)
* @see #encodeUrl(String)
* @see #encodeUrlValue(String)
* @see #encodeHtml(String)
* @see #encodeXml(String)
* @see #encodeSql(String)
* @see #encodeLatex(String)
* @see #encodeRegexp(String)
* @since 1.0
*/
public static String encodeUnicode(String source)
{
if (null == source)
{
return null;
}
StringBuilder encoded = new StringBuilder();
String hexstring;
for (int i = 0; i < source.length(); i++)
{
hexstring = Integer.toHexString((int)source.charAt(i)).toUpperCase();
encoded.append("\\u");
// fill with zeros
for (int j = hexstring.length(); j < 4; j++)
{
encoded.append("0");
}
encoded.append(hexstring);
}
return encoded.toString();
}
/**
* Transforms a provided <code>String</code> object into a new string,
* containing only valid Sql characters.
*
* @param source The string that has to be transformed into a valid Sql
* string.
* @return The encoded <code>String</code> object.
* @see #encodeClassname(String)
* @see #encodeUrl(String)
* @see #encodeUrlValue(String)
* @see #encodeHtml(String)
* @see #encodeXml(String)
* @see #encodeString(String)
* @see #encodeLatex(String)
* @see #encodeRegexp(String)
* @since 1.0
*/
public static String encodeSql(String source)
{
return encode(source, null, SQL_ENCODE_MAP);
}
/**
* Transforms a provided <code>String</code> object into a new string,
* containing only valid LaTeX characters.
*
* @param source The string that has to be transformed into a valid LaTeX
* string.
* @return The encoded <code>String</code> object.
* @see #encodeClassname(String)
* @see #encodeUrl(String)
* @see #encodeUrlValue(String)
* @see #encodeHtml(String)
* @see #encodeXml(String)
* @see #encodeSql(String)
* @see #encodeString(String)
* @see #encodeRegexp(String)
* @since 1.0
*/
public static String encodeLatex(String source)
{
if (null == source)
{
return null;
}
source = encode(source, null, LATEX_ENCODE_MAP);
source = StringUtils.replace(source, "latex", "\\LaTeX", false);
return source;
}
/**
* Transforms a provided <code>String</code> object into a new string,
* using the mapping that are provided through the supplied encoding
* table.
*
* @param source The string that has to be transformed into a valid
* string, using the mappings that are provided through the supplied
* encoding table.
* @param encodingTables A <code>Map</code> object containing the mappings
* to transform characters into valid entities. The keys of this map
* should be <code>Character</code> objects and the values
* <code>String</code> objects.
* @return The encoded <code>String</code> object.
* @since 1.0
*/
private static String encode(String source, EncoderFallbackHandler fallbackHandler, Map<Character, String>... encodingTables)
{
if (null == source)
{
return null;
}
if (null == encodingTables ||
0 == encodingTables.length)
{
return source;
}
StringBuilder encoded_string = null;
char[] string_to_encode_array = source.toCharArray();
int last_match = -1;
for (int i = 0; i < string_to_encode_array.length; i++)
{
char char_to_encode = string_to_encode_array[i];
for (Map<Character, String> encoding_table : encodingTables)
{
if (encoding_table.containsKey(char_to_encode))
{
encoded_string = prepareEncodedString(source, encoded_string, i, last_match, string_to_encode_array);
encoded_string.append(encoding_table.get(char_to_encode));
last_match = i;
}
}
if (fallbackHandler != null &&
last_match < i &&
fallbackHandler.hasFallback(char_to_encode))
{
encoded_string = prepareEncodedString(source, encoded_string, i, last_match, string_to_encode_array);
fallbackHandler.appendFallback(encoded_string, char_to_encode);
last_match = i;
}
}
if (null == encoded_string)
{
return source;
}
else
{
int difference = string_to_encode_array.length - (last_match + 1);
if (difference > 0)
{
encoded_string.append(string_to_encode_array, last_match + 1, difference);
}
return encoded_string.toString();
}
}
private static StringBuilder prepareEncodedString(String source, StringBuilder encodedString, int i, int lastMatch, char[] stringToEncodeArray)
{
if (null == encodedString)
{
encodedString = new StringBuilder(source.length());
}
int difference = i - (lastMatch + 1);
if (difference > 0)
{
encodedString.append(stringToEncodeArray, lastMatch + 1, difference);
}
return encodedString;
}
/**
* Transforms a provided <code>String</code> object into a literal that can
* be included into a regular expression {@link Pattern} as-is. None of the
* regular expression escapes in the string will be functional anymore.
*
* @param source The string that has to be escaped as a literal
* @return The encoded <code>String</code> object.
* @see #encodeClassname(String)
* @see #encodeUrl(String)
* @see #encodeUrlValue(String)
* @see #encodeHtml(String)
* @see #encodeXml(String)
* @see #encodeSql(String)
* @see #encodeString(String)
* @see #encodeLatex(String)
* @since 1.3
*/
public static String encodeRegexp(String source)
{
int regexp_quote_start = source.indexOf("\\E");
if (-1 == regexp_quote_start)
{
return "\\Q" + source + "\\E";
}
StringBuilder buffer = new StringBuilder(source.length() * 2);
buffer.append("\\Q");
regexp_quote_start = 0;
int current = 0;
while (-1 == (regexp_quote_start = source.indexOf("\\E", current)))
{
buffer.append(source.substring(current, regexp_quote_start));
current = regexp_quote_start + 2;
buffer.append("\\E\\\\E\\Q");
}
buffer.append(source.substring(current, source.length()));
buffer.append("\\E");
return buffer.toString();
}
/**
* Counts the number of times a substring occures in a provided string in
* a case-sensitive manner.
*
* @param source The <code>String</code> object that will be searched in.
* @param substring The string whose occurances will we counted.
* @return An <code>int</code> value containing the number of occurances
* of the substring.
* @since 1.0
*/
public static int count(String source, String substring)
{
return count(source, substring, true);
}
/**
* Counts the number of times a substring occures in a provided string.
*
* @param source The <code>String</code> object that will be searched in.
* @param substring The string whose occurances will we counted.
* @param matchCase A <code>boolean</code> indicating if the match is
* going to be performed in a case-sensitive manner or not.
* @return An <code>int</code> value containing the number of occurances
* of the substring.
* @since 1.0
*/
public static int count(String source, String substring, boolean matchCase)
{
if (null == source)
{
return 0;
}
if (null == substring)
{
return 0;
}
int current_index = 0;
int substring_index = 0;
int count = 0;
if (!matchCase)
{
source = source.toLowerCase();
substring = substring.toLowerCase();
}
while (current_index < source.length() - 1)
{
substring_index = source.indexOf(substring, current_index);
if (-1 == substring_index)
{
break;
}
else
{
current_index = substring_index + substring.length();
count++;
}
}
return count;
}
/**
* Splits a string into different parts, using a seperator string to
* detect the seperation boundaries in a case-sensitive manner. The
* seperator will not be included in the list of parts.
*
* @param source The string that will be split into parts.
* @param seperator The seperator string that will be used to determine
* the parts.
* @return An <code>ArrayList</code> containing the parts as
* <code>String</code> objects.
* @since 1.0
*/
public static List<String> split(String source, String seperator)
{
return split(source, seperator, true);
}
/**
* Splits a string into different parts, using a seperator string to
* detect the seperation boundaries. The seperator will not be included in
* the list of parts.
*
* @param source The string that will be split into parts.
* @param seperator The seperator string that will be used to determine
* the parts.
* @param matchCase A <code>boolean</code> indicating if the match is
* going to be performed in a case-sensitive manner or not.
* @return An <code>ArrayList</code> containing the parts as
* <code>String</code> objects.
* @since 1.0
*/
public static List<String> split(String source, String seperator, boolean matchCase)
{
ArrayList<String> substrings = new ArrayList<>();
if (null == source)
{
return substrings;
}
if (null == seperator)
{
substrings.add(source);
return substrings;
}
int current_index = 0;
int delimiter_index;
String element;
String source_lookup_reference;
if (!matchCase)
{
source_lookup_reference = source.toLowerCase();
seperator = seperator.toLowerCase();
}
else
{
source_lookup_reference = source;
}
while (current_index <= source_lookup_reference.length())
{
delimiter_index = source_lookup_reference.indexOf(seperator, current_index);
if (-1 == delimiter_index)
{
element = new String(source.substring(current_index, source.length()));
substrings.add(element);
current_index = source.length() + 1;
}
else
{
element = new String(source.substring(current_index, delimiter_index));
substrings.add(element);
current_index = delimiter_index + seperator.length();
}
}
return substrings;
}
/**
* Splits a string into different parts, using a seperator string to
* detect the seperation boundaries in a case-sensitive manner. The
* seperator will not be included in the parts array.
*
* @param source The string that will be split into parts.
* @param seperator The seperator string that will be used to determine
* the parts.
* @return A <code>String[]</code> array containing the seperated parts.
* @since 1.0
*/
public static String[] splitToArray(String source, String seperator)
{
return splitToArray(source, seperator, true);
}
/**
* Splits a string into different parts, using a seperator string to
* detect the seperation boundaries. The seperator will not be included in
* the parts array.
*
* @param source The string that will be split into parts.
* @param seperator The seperator string that will be used to determine
* the parts.
* @param matchCase A <code>boolean</code> indicating if the match is
* going to be performed in a case-sensitive manner or not.
* @return A <code>String[]</code> array containing the seperated parts.
* @since 1.0
*/
public static String[] splitToArray(String source, String seperator, boolean matchCase)
{
List<String> substrings = split(source, seperator, matchCase);
String[] substrings_array = new String[substrings.size()];
substrings_array = substrings.toArray(substrings_array);
return substrings_array;
}
/**
* Splits a string into integers, using a seperator string to detect the
* seperation boundaries in a case-sensitive manner. If a part couldn't be
* converted to an integer, it will be omitted from the resulting array.
*
* @param source The string that will be split into integers.
* @param seperator The seperator string that will be used to determine
* the parts.
* @return An <code>int[]</code> array containing the seperated parts.
* @since 1.0
*/
public static int[] splitToIntArray(String source, String seperator)
{
return splitToIntArray(source, seperator, true);
}
/**
* Splits a string into integers, using a seperator string to detect the
* seperation boundaries. If a part couldn't be converted to an integer,
* it will be omitted from the resulting array.
*
* @param source The string that will be split into integers.
* @param seperator The seperator string that will be used to determine
* the parts.
* @param matchCase A <code>boolean</code> indicating if the match is
* going to be performed in a case-sensitive manner or not.
* @return An <code>int[]</code> array containing the seperated parts.
* @since 1.0
*/
public static int[] splitToIntArray(String source, String seperator, boolean matchCase)
{
List<String> string_parts = split(source, seperator, matchCase);
int number_of_valid_parts = 0;
for (String string_part : string_parts)
{
try
{
Integer.parseInt(string_part);
number_of_valid_parts++;
}
catch (NumberFormatException e)
{
// just continue
}
}
int[] string_parts_int = (int[])Array.newInstance(int.class, number_of_valid_parts);
int added_parts = 0;
for (String string_part : string_parts)
{
try
{
string_parts_int[added_parts] = Integer.parseInt(string_part);
added_parts++;
}
catch (NumberFormatException e)
{
// just continue
}
}
return string_parts_int;
}
/**
* Splits a string into bytes, using a seperator string to detect the
* seperation boundaries in a case-sensitive manner. If a part couldn't be
* converted to a <code>byte</code>, it will be omitted from the resulting
* array.
*
* @param source The string that will be split into bytes.
* @param seperator The seperator string that will be used to determine
* the parts.
* @return A <code>byte[]</code> array containing the bytes.
* @since 1.0
*/
public static byte[] splitToByteArray(String source, String seperator)
{
return splitToByteArray(source, seperator, true);
}
/**
* Splits a string into bytes, using a seperator string to detect the
* seperation boundaries. If a part couldn't be converted to a
* <code>byte</code>, it will be omitted from the resulting array.
*
* @param source The string that will be split into bytes.
* @param seperator The seperator string that will be used to determine
* the parts.
* @param matchCase A <code>boolean</code> indicating if the match is
* going to be performed in a case-sensitive manner or not.
* @return A <code>byte[]</code> array containing the bytes.
* @since 1.0
*/
public static byte[] splitToByteArray(String source, String seperator, boolean matchCase)
{
List<String> string_parts = split(source, seperator, matchCase);
int number_of_valid_parts = 0;
for (String string_part : string_parts)
{
try
{
Byte.parseByte(string_part);
number_of_valid_parts++;
}
catch (NumberFormatException e)
{
// just continue
}
}
byte[] string_parts_byte = (byte[])Array.newInstance(byte.class, number_of_valid_parts);
int added_parts = 0;
for (String string_part : string_parts)
{
try
{
string_parts_byte[added_parts] = Byte.parseByte(string_part);
added_parts++;
}
catch (NumberFormatException e)
{
// just continue
}
}
return string_parts_byte;
}
/**
* Removes all occurances of a string from the front of another string in
* a case-sensitive manner.
*
* @param source The string in which the matching will be done.
* @param stringToStrip The string that will be stripped from the front.
* @return A new <code>String</code> containing the stripped result.
* @since 1.0
*/
public static String stripFromFront(String source, String stringToStrip)
{
return stripFromFront(source, stringToStrip, true);
}
/**
* Removes all occurances of a string from the front of another string.
*
* @param source The string in which the matching will be done.
* @param stringToStrip The string that will be stripped from the front.
* @param matchCase A <code>boolean</code> indicating if the match is
* going to be performed in a case-sensitive manner or not.
* @return A new <code>String</code> containing the stripping result.
* @since 1.0
*/
public static String stripFromFront(String source, String stringToStrip, boolean matchCase)
{
if (null == source)
{
return null;
}
if (null == stringToStrip)
{
return source;
}
int strip_length = stringToStrip.length();
int new_index;
int last_index;
String source_lookup_reference;
if (!matchCase)
{
source_lookup_reference = source.toLowerCase();
stringToStrip = stringToStrip.toLowerCase();
}
else
{
source_lookup_reference = source;
}
new_index = source_lookup_reference.indexOf(stringToStrip);
if (0 == new_index)
{
do
{
last_index = new_index;
new_index = source_lookup_reference.indexOf(stringToStrip, new_index + strip_length);
}
while (new_index != -1 &&
new_index == last_index + strip_length);
return source.substring(last_index + strip_length);
}
else
{
return source;
}
}
/**
* Removes all occurances of a string from the end of another string in a
* case-sensitive manner.
*
* @param source The string in which the matching will be done.
* @param stringToStrip The string that will be stripped from the end.
* @return A new <code>String</code> containing the stripped result.
* @since 1.0
*/
public static String stripFromEnd(String source, String stringToStrip)
{
return stripFromEnd(source, stringToStrip, true);
}
/**
* Removes all occurances of a string from the end of another string.
*
* @param source The string in which the matching will be done.
* @param stringToStrip The string that will be stripped from the end.
* @param matchCase A <code>boolean</code> indicating if the match is
* going to be performed in a case-sensitive manner or not.
* @return A new <code>String</code> containing the stripped result.
* @since 1.0
*/
public static String stripFromEnd(String source, String stringToStrip, boolean matchCase)
{
if (null == source)
{
return null;
}
if (null == stringToStrip)
{
return source;
}
int strip_length = stringToStrip.length();
int new_index;
int last_index;
String source_lookup_reference;
if (!matchCase)
{
source_lookup_reference = source.toLowerCase();
stringToStrip = stringToStrip.toLowerCase();
}
else
{
source_lookup_reference = source;
}
new_index = source_lookup_reference.lastIndexOf(stringToStrip);
if (new_index != -1 &&
source.length() == new_index + strip_length)
{
do
{
last_index = new_index;
new_index = source_lookup_reference.lastIndexOf(stringToStrip, last_index - 1);
}
while (new_index != -1 &&
new_index == last_index - strip_length);
return source.substring(0, last_index);
}
else
{
return source;
}
}
/**
* Searches for a string within a specified string in a case-sensitive
* manner and replaces every match with another string.
*
* @param source The string in which the matching parts will be replaced.
* @param stringToReplace The string that will be searched for.
* @param replacementString The string that will replace each matching
* part.
* @return A new <code>String</code> object containing the replacement
* result.
* @since 1.0
*/
public static String replace(String source, String stringToReplace, String replacementString)
{
return replace(source, stringToReplace, replacementString, true);
}
/**
* Searches for a string within a specified string and replaces every
* match with another string.
*
* @param source The string in which the matching parts will be replaced.
* @param stringToReplace The string that will be searched for.
* @param replacementString The string that will replace each matching
* part.
* @param matchCase A <code>boolean</code> indicating if the match is
* going to be performed in a case-sensitive manner or not.
* @return A new <code>String</code> object containing the replacement
* result.
* @since 1.0
*/
public static String replace(String source, String stringToReplace, String replacementString, boolean matchCase)
{
if (null == source)
{
return null;
}
if (null == stringToReplace)
{
return source;
}
if (null == replacementString)
{
return source;
}
Iterator<String> string_parts = split(source, stringToReplace, matchCase).iterator();
StringBuilder new_string = new StringBuilder();
while (string_parts.hasNext())
{
String string_part = string_parts.next();
new_string.append(string_part);
if (string_parts.hasNext())
{
new_string.append(replacementString);
}
}
return new_string.toString();
}
/**
* Creates a new string that contains the provided string a number of
* times.
*
* @param source The string that will be repeated.
* @param count The number of times that the string will be repeated.
* @return A new <code>String</code> object containing the repeated
* concatenation result.
* @since 1.0
*/
public static String repeat(String source, int count)
{
if (null == source)
{
return null;
}
StringBuilder new_string = new StringBuilder();
while (count > 0)
{
new_string.append(source);
count--;
}
return new_string.toString();
}
/**
* Creates a new array of <code>String</code> objects, containing the
* elements of a supplied <code>Iterator</code>.
*
* @param iterator The iterator containing the elements to create the
* array with.
* @return The new <code>String</code> array.
* @since 1.0
*/
public static String[] toStringArray(Iterator<String> iterator)
{
if (null == iterator)
{
return new String[0];
}
ArrayList<String> strings = new ArrayList<>();
while (iterator.hasNext())
{
strings.add(iterator.next());
}
String[] string_array = new String[strings.size()];
strings.toArray(string_array);
return string_array;
}
/**
* Creates a new <code>ArrayList</code>, containing the elements of a
* supplied array of <code>String</code> objects.
*
* @param stringArray The array of <code>String</code> objects that have
* to be converted.
* @return The new <code>ArrayList</code> with the elements of the
* <code>String</code> array.
* @since 1.0
*/
public static List<String> toArrayList(String[] stringArray)
{
List<String> strings = new ArrayList<>();
if (null == stringArray)
{
return strings;
}
Collections.addAll(strings, stringArray);
return strings;
}
/**
* Creates a new <code>String</code> object, containing the elements of a
* supplied <code>Collection</code> of <code>String</code> objects joined
* by a given seperator.
*
* @param collection The <code>Collection</code> containing the elements
* to join.
* @param seperator The seperator used to join the string elements.
* @return A new <code>String</code> with the join result.
* @since 1.0
*/
public static String join(Collection collection, String seperator)
{
if (null == collection)
{
return null;
}
if (null == seperator)
{
seperator = "";
}
if (0 == collection.size())
{
return "";
}
else
{
StringBuilder result = new StringBuilder();
for (Object element : collection)
{
result.append(String.valueOf(element));
result.append(seperator);
}
result.setLength(result.length() - seperator.length());
return result.toString();
}
}
/**
* Creates a new <code>String</code> object, containing the elements of a
* supplied array, joined by a given seperator.
*
* @param array The object array containing the elements to join.
* @param seperator The seperator used to join the string elements.
* @return A new <code>String</code> with the join result.
* @since 1.0
*/
public static String join(Object[] array, String seperator)
{
return join(array, seperator, null, false);
}
/**
* Creates a new <code>String</code> object, containing the elements of a
* supplied array, joined by a given seperator.
*
* @param array The object array containing the elements to join.
* @param seperator The seperator used to join the string elements.
* @param delimiter The delimiter used to surround the string elements.
* @return A new <code>String</code> with the join result.
* @since 1.0
*/
public static String join(Object[] array, String seperator, String delimiter)
{
return join(array, seperator, delimiter, false);
}
/**
* Creates a new <code>String</code> object, containing the elements of a
* supplied array, joined by a given seperator.
*
* @param array The object array containing the elements to join.
* @param seperator The seperator used to join the string elements.
* @param delimiter The delimiter used to surround the string elements.
* @param encodeStrings Indicates whether the characters of the string
* representation of the Array values should be encoded.
* @return A new <code>String</code> with the join result.
* @since 1.0
*/
public static String join(Object[] array, String seperator, String delimiter, boolean encodeStrings)
{
if (null == array)
{
return null;
}
if (null == seperator)
{
seperator = "";
}
if (null == delimiter)
{
delimiter = "";
}
if (0 == array.length)
{
return "";
}
else
{
int current_index = 0;
String array_value;
StringBuilder result = new StringBuilder();
while (current_index < array.length - 1)
{
if (null == array[current_index])
{
result.append("null");
}
else
{
array_value = String.valueOf(array[current_index]);
if (encodeStrings)
{
array_value = encodeString(array_value);
}
result.append(delimiter);
result.append(array_value);
result.append(delimiter);
}
result.append(seperator);
current_index++;
}
if (null == array[current_index])
{
result.append("null");
}
else
{
array_value = String.valueOf(array[current_index]);
if (encodeStrings)
{
array_value = encodeString(array_value);
}
result.append(delimiter);
result.append(array_value);
result.append(delimiter);
}
return result.toString();
}
}
/**
* Creates a new <code>String</code> object, containing the elements of a
* supplied array, joined by a given seperator.
*
* @param array The boolean array containing the values to join.
* @param seperator The seperator used to join the string elements.
* @return A new <code>String</code> with the join result.
* @since 1.0
*/
public static String join(boolean[] array, String seperator)
{
if (null == array)
{
return null;
}
if (null == seperator)
{
seperator = "";
}
if (0 == array.length)
{
return "";
}
else
{
int current_index = 0;
String result = "";
while (current_index < array.length - 1)
{
result = result + array[current_index] + seperator;
current_index++;
}
result = result + array[current_index];
return result;
}
}
/**
* Creates a new <code>String</code> object, containing the elements of a
* supplied array, joined by a given seperator.
*
* @param array The byte array containing the values to join.
* @param seperator The seperator used to join the string elements.
* @return A new <code>String</code> with the join result.
* @since 1.0
*/
public static String join(byte[] array, String seperator)
{
if (null == array)
{
return null;
}
if (null == seperator)
{
seperator = "";
}
if (0 == array.length)
{
return "";
}
else
{
int current_index = 0;
String result = "";
while (current_index < array.length - 1)
{
result = result + array[current_index] + seperator;
current_index++;
}
result = result + array[current_index];
return result;
}
}
/**
* Creates a new <code>String</code> object, containing the elements of a
* supplied array, joined by a given seperator.
*
* @param array The double array containing the values to join.
* @param seperator The seperator used to join the string elements.
* @return A new <code>String</code> with the join result.
* @since 1.0
*/
public static String join(double[] array, String seperator)
{
if (null == array)
{
return null;
}
if (null == seperator)
{
seperator = "";
}
if (0 == array.length)
{
return "";
}
else
{
int current_index = 0;
String result = "";
while (current_index < array.length - 1)
{
result = result + array[current_index] + seperator;
current_index++;
}
result = result + array[current_index];
return result;
}
}
/**
* Creates a new <code>String</code> object, containing the elements of a
* supplied array, joined by a given seperator.
*
* @param array The float array containing the values to join.
* @param seperator The seperator used to join the string elements.
* @return A new <code>String</code> with the join result.
* @since 1.0
*/
public static String join(float[] array, String seperator)
{
if (null == array)
{
return null;
}
if (null == seperator)
{
seperator = "";
}
if (0 == array.length)
{
return "";
}
else
{
int current_index = 0;
String result = "";
while (current_index < array.length - 1)
{
result = result + array[current_index] + seperator;
current_index++;
}
result = result + array[current_index];
return result;
}
}
/**
* Creates a new <code>String</code> object, containing the elements of a
* supplied array, joined by a given seperator.
*
* @param array The integer array containing the values to join.
* @param seperator The seperator used to join the string elements.
* @return A new <code>String</code> with the join result.
* @since 1.0
*/
public static String join(int[] array, String seperator)
{
if (null == array)
{
return null;
}
if (null == seperator)
{
seperator = "";
}
if (0 == array.length)
{
return "";
}
else
{
int current_index = 0;
String result = "";
while (current_index < array.length - 1)
{
result = result + array[current_index] + seperator;
current_index++;
}
result = result + array[current_index];
return result;
}
}
/**
* Creates a new <code>String</code> object, containing the elements of a
* supplied array, joined by a given seperator.
*
* @param array The long array containing the values to join.
* @param seperator The seperator used to join the string elements.
* @return A new <code>String</code> with the join result.
* @since 1.0
*/
public static String join(long[] array, String seperator)
{
if (null == array)
{
return null;
}
if (null == seperator)
{
seperator = "";
}
if (0 == array.length)
{
return "";
}
else
{
int current_index = 0;
String result = "";
while (current_index < array.length - 1)
{
result = result + array[current_index] + seperator;
current_index++;
}
result = result + array[current_index];
return result;
}
}
/**
* Creates a new <code>String</code> object, containing the elements of a
* supplied array, joined by a given seperator.
*
* @param array The short array containing the values to join.
* @param seperator The seperator used to join the string elements.
* @return A new <code>String</code> with the join result.
* @since 1.0
*/
public static String join(short[] array, String seperator)
{
if (null == array)
{
return null;
}
if (null == seperator)
{
seperator = "";
}
if (0 == array.length)
{
return "";
}
else
{
int current_index = 0;
String result = "";
while (current_index < array.length - 1)
{
result = result + array[current_index] + seperator;
current_index++;
}
result = result + array[current_index];
return result;
}
}
/**
* Creates a new <code>String</code> object, containing the elements of a
* supplied array, joined by a given seperator.
*
* @param array The char array containing the values to join.
* @param seperator The seperator used to join the string elements.
* @return A new <code>String</code> with the join result.
* @since 1.0
*/
public static String join(char[] array, String seperator)
{
return join(array, seperator, null);
}
/**
* Creates a new <code>String</code> object, containing the elements of a
* supplied array, joined by a given seperator.
*
* @param array The char array containing the values to join.
* @param seperator The seperator used to join the string elements.
* @param delimiter The delimiter used to surround the string elements.
* @return A new <code>String</code> with the join result.
* @since 1.0
*/
public static String join(char[] array, String seperator, String delimiter)
{
if (null == array)
{
return null;
}
if (null == seperator)
{
seperator = "";
}
if (null == delimiter)
{
delimiter = "";
}
if (0 == array.length)
{
return "";
}
else
{
int current_index = 0;
StringBuilder result = new StringBuilder();
while (current_index < array.length - 1)
{
result.append(delimiter);
result.append(array[current_index]);
result.append(delimiter);
result.append(seperator);
current_index++;
}
result.append(delimiter);
result.append(String.valueOf(array[current_index]));
result.append(delimiter);
return result.toString();
}
}
/**
* Returns an array that contains all the occurances of a substring in a
* string in the correct order. The search will be performed in a
* case-sensitive manner.
*
* @param source The <code>String</code> object that will be searched in.
* @param substring The string whose occurances will we counted.
* @return An <code>int[]</code> array containing the indices of the
* substring.
* @since 1.0
*/
public static int[] indicesOf(String source, String substring)
{
return indicesOf(source, substring, true);
}
/**
* Returns an array that contains all the occurances of a substring in a
* string in the correct order.
*
* @param source The <code>String</code> object that will be searched in.
* @param substring The string whose occurances will we counted.
* @param matchCase A <code>boolean</code> indicating if the match is
* going to be performed in a case-sensitive manner or not.
* @return An <code>int[]</code> array containing the indices of the
* substring.
* @since 1.0
*/
public static int[] indicesOf(String source, String substring, boolean matchCase)
{
if (null == source ||
null == substring)
{
return new int[0];
}
String source_lookup_reference;
if (!matchCase)
{
source_lookup_reference = source.toLowerCase();
substring = substring.toLowerCase();
}
else
{
source_lookup_reference = source;
}
int current_index = 0;
int substring_index = 0;
int count = count(source_lookup_reference, substring);
int[] indices = new int[count];
int counter = 0;
while (current_index < source.length() - 1)
{
substring_index = source_lookup_reference.indexOf(substring, current_index);
if (-1 == substring_index)
{
break;
}
else
{
current_index = substring_index + substring.length();
indices[counter] = substring_index;
counter++;
}
}
return indices;
}
/**
* Matches a collection of regular expressions against a string.
*
* @param value The <code>String</code> that will be checked.
* @param regexps The collection of regular expressions against which the
* match will be performed.
* @return The <code>Matcher</code> instance that corresponds to the
* <code>String</code> that returned a successful match; or
* <p><code>null</code> if no match could be found.
* @since 1.0
*/
public static Matcher getMatchingRegexp(String value, Collection<Pattern> regexps)
{
if (value != null &&
value.length() > 0 &&
regexps != null &&
regexps.size() > 0)
{
Matcher matcher;
for (Pattern regexp : regexps)
{
matcher = regexp.matcher(value);
if (matcher.matches())
{
return matcher;
}
}
}
return null;
}
/**
* Matches a collection of strings against a regular expression.
*
* @param values The <code>Collection</code> of <code>String</code>
* objects that will be checked.
* @param regexp The regular expression <code>Pattern</code> against which
* the matches will be performed.
* @return The <code>Matcher</code> instance that corresponds to the
* <code>String</code> that returned a successful match; or
* <p><code>null</code> if no match could be found.
* @since 1.0
*/
public static Matcher getRegexpMatch(Collection<String> values, Pattern regexp)
{
if (values != null &&
values.size() > 0 &&
regexp != null)
{
Matcher matcher;
for (String value : values)
{
matcher = regexp.matcher(value);
if (matcher.matches())
{
return matcher;
}
}
}
return null;
}
/**
* Checks if the name filters through an including and an excluding
* regular expression.
*
* @param name The <code>String</code> that will be filtered.
* @param included The regular expressions that needs to succeed
* @param excluded The regular expressions that needs to fail
* @return <code>true</code> if the name filtered through correctly; or
* <p><code>false</code> otherwise.
* @since 1.0
*/
public static boolean filter(String name, Pattern included, Pattern excluded)
{
Pattern[] included_array = null;
if (included != null)
{
included_array = new Pattern[]{included};
}
Pattern[] excluded_array = null;
if (excluded != null)
{
excluded_array = new Pattern[]{excluded};
}
return filter(name, included_array, excluded_array);
}
/**
* Checks if the name filters through a series of including and excluding
* regular expressions.
*
* @param name The <code>String</code> that will be filtered.
* @param included An array of regular expressions that need to succeed
* @param excluded An array of regular expressions that need to fail
* @return <code>true</code> if the name filtered through correctly; or
* <p><code>false</code> otherwise.
* @since 1.0
*/
public static boolean filter(String name, Pattern[] included, Pattern[] excluded)
{
if (null == name)
{
return false;
}
boolean accepted = false;
// retain only the includes
if (null == included)
{
accepted = true;
}
else
{
for (Pattern pattern : included)
{
if (pattern != null &&
pattern.matcher(name).matches())
{
accepted = true;
break;
}
}
}
// remove the excludes
if (accepted &&
excluded != null)
{
for (Pattern pattern : excluded)
{
if (pattern != null &&
pattern.matcher(name).matches())
{
accepted = false;
break;
}
}
}
return accepted;
}
/**
* Ensure that the first character of the provided string is upper case.
*
* @param source The <code>String</code> to capitalize.
* @return The capitalized <code>String</code>.
* @since 1.0
*/
public static String capitalize(String source)
{
if (source == null || source.length() == 0)
{
return source;
}
if (source.length() > 1 &&
Character.isUpperCase(source.charAt(0)))
{
return source;
}
char chars[] = source.toCharArray();
chars[0] = Character.toUpperCase(chars[0]);
return new String(chars);
}
/**
* Ensure that the first character of the provided string lower case.
*
* @param source The <code>String</code> to uncapitalize.
* @return The uncapitalized <code>String</code>.
* @since 1.5
*/
public static String uncapitalize(String source)
{
if (source == null || source.length() == 0)
{
return source;
}
if (source.length() > 1 &&
Character.isLowerCase(source.charAt(0)))
{
return source;
}
char chars[] = source.toCharArray();
chars[0] = Character.toLowerCase(chars[0]);
return new String(chars);
}
private static String convertUrl(String source, Pattern pattern, boolean shorten, boolean sanitize, boolean no_follow)
{
int max_length = RifeConfig.Tools.getMaxVisualUrlLength();
String result = source;
Matcher url_matcher = pattern.matcher(source);
boolean found = url_matcher.find();
if (found)
{
String visual_url;
String actual_url;
int last = 0;
StringBuilder sb = new StringBuilder();
do
{
actual_url = url_matcher.group(1);
if (url_matcher.groupCount() > 1)
{
visual_url = url_matcher.group(2);
}
else
{
visual_url = actual_url;
}
if (sanitize)
{
// defang javascript
actual_url = StringUtils.replace(actual_url, "javascript:", "");
// fill in http:// for URLs that don't begin with /
if ((!actual_url.contains("://")) &&
(!actual_url.startsWith("/")))
{
actual_url = "http://" + actual_url;
}
}
if (pattern.equals(BBCODE_BAREURL))
{
sb.append(source.substring(last, url_matcher.start(1)));
}
else
{
sb.append(source.substring(last, url_matcher.start(0)));
}
sb.append("<a href=\"");
sb.append(actual_url);
sb.append("\"");
if (actual_url.startsWith("http://") ||
actual_url.startsWith("https://"))
{
sb.append(" target=\"_blank\"");
}
if (no_follow)
{
sb.append(" rel=\"nofollow\"");
}
sb.append(">");
if (visual_url.length() <= max_length || !shorten)
{
sb.append(visual_url);
}
else
{
String ellipsis = "...";
int query_index = visual_url.indexOf("?");
// remove query string but keep '?'
if (query_index != -1)
{
visual_url = visual_url.substring(0, query_index + 1) + ellipsis;
}
if (visual_url.length() >= max_length)
{
int last_slash = visual_url.lastIndexOf("/");
int start_slash = visual_url.indexOf("/", visual_url.indexOf("://") + 3);
if (last_slash != start_slash)
{
visual_url = visual_url.substring(0, start_slash + 1) + ellipsis + visual_url.substring(last_slash);
}
}
sb.append(visual_url);
}
sb.append("</a>");
if (pattern.equals(BBCODE_BAREURL))
{
last = url_matcher.end(1);
}
else
{
last = url_matcher.end(0);
}
found = url_matcher.find();
}
while (found);
sb.append(source.substring(last));
result = sb.toString();
}
return result;
}
/**
* Converts a BBCode marked-up text to regular html.
*
* @param source The text with BBCode tags.
* @return A <code>String</code> with the corresponding HTML code
* @since 1.0
*/
public static String convertBbcode(String source)
{
if (null == source)
{
return null;
}
return convertBbcode(source, (BbcodeOption[])null);
}
/**
* Converts a BBCode marked-up text to regular html.
*
* @param source The text with BBCode tags.
* @return A <code>String</code> with the corresponding HTML code
* @since 1.0
*/
public static String convertBbcode(final String source, BbcodeOption... options)
{
if (null == source)
{
return null;
}
boolean shorten = false;
boolean sanitize = false;
boolean convert_bare = false;
boolean no_follow_links = false;
if (options != null)
{
for (BbcodeOption option : options)
{
if (option.equals(StringUtils.SHORTEN_URL))
{
shorten = true;
}
else if (option.equals(StringUtils.SANITIZE_URL))
{
sanitize = true;
}
else if (option.equals(StringUtils.CONVERT_BARE_URLS))
{
convert_bare = true;
}
else if (option.equals(StringUtils.NO_FOLLOW_LINKS))
{
no_follow_links = true;
}
}
}
String sourcecopy = source;
StringBuilder result = new StringBuilder(source.length());
int startindex;
int endIndex;
int nextCodeIndex;
while (-1 != (startindex = sourcecopy.indexOf("[code]")))
{
// handle parsed
String parsed = sourcecopy.substring(0, startindex);
endIndex = sourcecopy.indexOf("[/code]") + 7; // 7 == the sizeof "[/code]"
nextCodeIndex = sourcecopy.indexOf("[code]", startindex + 6); // 6 == the sizeof "[code]"
if (endIndex < 0)
{
// not ended... set to end of string
endIndex = sourcecopy.length() - 1;
}
if (nextCodeIndex < endIndex && nextCodeIndex > 0)
{
// nested [code] tags
/* must end before the next [code]
* this will leave a dangling [/code] but the HTML is valid
*/
StringBuilder sourcecopycopy = new StringBuilder();
sourcecopycopy
.append(sourcecopy.substring(0, nextCodeIndex))
.append("[/code]")
.append(sourcecopy.substring(nextCodeIndex));
sourcecopy = sourcecopycopy.toString();
endIndex = sourcecopy.indexOf("[/code]") + 7;
}
if (startindex > endIndex)
{
// dangling [/code]
endIndex = sourcecopy.indexOf("[/code]", endIndex + 7) + 7; // 7 == the sizeof "[/code]"
if (endIndex < 0)
{
endIndex = sourcecopy.length() - 1;
}
}
String code = sourcecopy.substring(startindex, endIndex);
parsed = parseBBCode(parsed, shorten, sanitize, convert_bare, no_follow_links);
// handle raw
code = StringUtils.replace(code, "[code]", "<div class=\"codebody\"><pre>", false);
code = StringUtils.replace(code, "[/code]", "</pre></div>", false);
result
.append(parsed)
.append(code);
sourcecopy = sourcecopy.substring(endIndex);
}
result.append(parseBBCode(sourcecopy, shorten, sanitize, convert_bare, no_follow_links));
return result.toString();
}
private static String parseBBCode(String source, boolean shorten, boolean sanitize, boolean convert_bare, boolean no_follow)
{
String result = source;
result = StringUtils.replace(result, "[b]", "<b>", false);
result = StringUtils.replace(result, "[/b]", "</b>", false);
result = StringUtils.replace(result, "[u]", "<u>", false);
result = StringUtils.replace(result, "[/u]", "</u>", false);
result = StringUtils.replace(result, "[i]", "<i>", false);
result = StringUtils.replace(result, "[/i]", "</i>", false);
result = StringUtils.replace(result, "[pre]", "<pre>", false);
result = StringUtils.replace(result, "[/pre]", "</pre>", false);
String resultCopy = result;
String resultLowerCopy = result.toLowerCase();
StringBuilder buffer = new StringBuilder();
int startIndex;
int endIndex;
while (-1 != (startIndex = resultLowerCopy.indexOf("[*]")))
{
int begin = resultLowerCopy.indexOf("[list]", startIndex + 3);
int end = resultLowerCopy.indexOf("[/list]", startIndex + 3);
int next = resultLowerCopy.indexOf("[*]", startIndex + 3); // 3 == sizeof [*]
if (begin == -1)
{
begin = Integer.MAX_VALUE;
}
if (end == -1)
{
end = Integer.MAX_VALUE;
}
if (next == -1)
{
next = Integer.MAX_VALUE;
}
if (next < begin && next < end)
{
endIndex = next;
}
else if (begin < next && begin < end)
{
endIndex = begin;
}
else if (end < next && end < begin)
{
endIndex = end;
}
else
{
endIndex = resultLowerCopy.length();
}
buffer
.append(resultCopy.substring(0, startIndex))
.append("<li>")
.append(resultCopy.substring(startIndex + 3, endIndex)) // 3 == sizeof [*]
.append("</li>");
resultCopy = resultCopy.substring(endIndex);
resultLowerCopy = resultLowerCopy.substring(endIndex);
}
buffer.append(resultCopy.substring(0));
result = buffer.toString();
result = StringUtils.replace(result, "[list]", "<ul>", false);
result = StringUtils.replace(result, "[/list]", "</ul>", false);
Matcher color_matcher = BBCODE_COLOR.matcher(result);
result = color_matcher.replaceAll("<font color=\"$1\">");
result = StringUtils.replace(result, "[/color]", "</font>", false);
Matcher size_matcher = BBCODE_SIZE.matcher(result);
result = size_matcher.replaceAll("<font size=\"$1\">");
result = StringUtils.replace(result, "[/size]", "</font>", false);
result = convertUrl(result, BBCODE_URL_SHORT, shorten, sanitize, no_follow);
result = convertUrl(result, BBCODE_URL_LONG, shorten, sanitize, no_follow);
if (convert_bare)
{
result = convertUrl(result, BBCODE_BAREURL, shorten, sanitize, no_follow);
}
Matcher img_matcher = BBCODE_IMG.matcher(result);
result = img_matcher.replaceAll("<div class=\"bbcode_img\"><img src=\"$1\" border=\"0\" alt=\"\" /></div>");
Matcher quote_matcher_long = BBCODE_QUOTE_LONG.matcher(result);
result = quote_matcher_long.replaceAll("<div class=\"quoteaccount\">$1:</div><div class=\"quotebody\">");
result = StringUtils.replace(result, "[quote]", "<div class=\"quotebody\">", false);
result = StringUtils.replace(result, "[/quote]", "</div>", false);
result = StringUtils.replace(result, "\r\n", "<br />\r");
result = StringUtils.replace(result, "\n", "<br />\n");
result = StringUtils.replace(result, "\r", "\r\n");
// remove the BR that could be added due to code formatting ppl
// use to format lists
result = StringUtils.replace(result, "ul><br />\r\n", "ul>\r\n");
result = StringUtils.replace(result, "ul><br />\n", "ul>\n");
return result;
}
/**
* Converts a <code>String</code> to a <code>boolean</code> value.
*
* @param value The <code>String</code> to convert.
* @return The corresponding <code>boolean</code> value.
* @since 1.0
*/
public static boolean convertToBoolean(String value)
{
if (null == value)
{
return false;
}
return value.equals("1") ||
value.equalsIgnoreCase("t") ||
value.equalsIgnoreCase("true") ||
value.equalsIgnoreCase("y") ||
value.equalsIgnoreCase("yes") ||
value.equalsIgnoreCase("on");
}
/**
* Converts all tabs on a line to spaces according to the provided tab
* width.
*
* @param line The line whose tabs have to be converted.
* @param tabWidth The tab width.
* @return A new <code>String</code> object containing the line with the
* replaced tabs.
* @since 1.0
*/
public static String convertTabsToSpaces(String line, int tabWidth)
{
StringBuilder result = new StringBuilder();
int tab_index;
int last_tab_index = 0;
int added_chars = 0;
int tab_size;
while ((tab_index = line.indexOf("\t", last_tab_index)) != -1)
{
tab_size = tabWidth - ((tab_index + added_chars) % tabWidth);
if (0 == tab_size)
{
tab_size = tabWidth;
}
added_chars += tab_size - 1;
result.append(line.substring(last_tab_index, tab_index));
result.append(StringUtils.repeat(" ", tab_size));
last_tab_index = tab_index + 1;
}
if (0 == last_tab_index)
{
return line;
}
else
{
result.append(line.substring(last_tab_index));
}
return result.toString();
}
/**
* Ensures that all whitespace is removed from a <code>String</code>.
* <p>It also works with a <code>null</code> argument.
*
* @param source The <code>String</code> to trim.
* @return The trimmed <code>String</code>.
* @since 1.0
*/
public static String trim(String source)
{
if (source == null || source.length() == 0)
{
return source;
}
return source.trim();
}
/**
* Calculates the {@link DocumentPosition} of a character index in a
* document.
*
* @param document a <code>String</code> with the document where the
* position should be looked up in
* @param characterIndex the index of the character
* @return the resulting <code>DocumentPosition</code> instance; or
* <p><code>null</code> if the <code>characterIndex</code> was invalid or
* if the <code>document</code> was null
* @since 1.0
*/
public static DocumentPosition getDocumentPosition(String document, int characterIndex)
{
if (null == document ||
characterIndex < 0 ||
characterIndex > document.length())
{
return null;
}
int line = 0;
int column;
String[] linebreaks = new String[]{"\r\n", "\n", "\r"};
int last_linebreak_index = 0;
int next_linebreak_index = document.length();
int match = -1;
do
{
line++;
for (String linebreak : linebreaks)
{
match = document.indexOf(linebreak, last_linebreak_index);
if (match != -1)
{
if (match >= characterIndex)
{
next_linebreak_index = match;
match = -1;
break;
}
last_linebreak_index = match + linebreak.length();
break;
}
}
}
while (match != -1);
column = characterIndex - last_linebreak_index + 1;
return new DocumentPosition(document.substring(last_linebreak_index, next_linebreak_index), line, column);
}
/**
* Reformats a string where lines that are longer than <tt>width</tt>
* are split apart at the earliest wordbreak or at maxLength, whichever is
* sooner. If the width specified is less than 5 or greater than the input
* Strings length the string will be returned as is.
* <p/>
* Please note that this method can be lossy - trailing spaces on wrapped
* lines may be trimmed.
*
* @param input the String to reformat.
* @param width the maximum length of any one line.
* @return a new String with reformatted as needed.
*/
public static String wordWrap(String input, int width, Locale locale)
{
// handle invalid input
if (input == null)
{
return "";
}
else if (width < 5)
{
return input;
}
else if (width >= input.length())
{
return input;
}
// default locale
if (locale == null)
{
locale = Locale.US;
}
StringBuilder buffer = new StringBuilder(input.length());
int current_index = 0;
int delimiter_index = 0;
String seperator = "\n";
String line;
// go over the input string and jump from line to line
while (current_index <= input.length())
{
// look for the next linebreak
delimiter_index = input.indexOf(seperator, current_index);
// get the line that corresponds to it
if (-1 == delimiter_index)
{
line = new String(input.substring(current_index, input.length()));
current_index = input.length() + 1;
}
else
{
line = new String(input.substring(current_index, delimiter_index));
current_index = delimiter_index + seperator.length();
}
// handle the wrapping of the line
BreakIterator breaks = BreakIterator.getLineInstance(locale);
breaks.setText(line);
int line_start = 0;
int start = breaks.first();
int end = breaks.next();
while (end != BreakIterator.DONE)
{
// check if the width has been exceeded
if (end - 1 - line_start >= width)
{
boolean break_line = true;
// first check if the last characters were spaces,
// if they were and by removing them the width is not
// exceeded, just continue
if (Character.isWhitespace(line.charAt(end - 1)))
{
for (int j = end - 1; j >= 0; j--)
{
if (!Character.isWhitespace(line.charAt(j)))
{
if (j - line_start < width)
{
break_line = false;
}
break;
}
}
}
if (break_line)
{
String line_breaked = line.substring(line_start, start);
// this can happen with trailing whitespace
if (line_breaked.length() > width)
{
line_breaked = line_breaked.substring(0, width);
}
buffer.append(line_breaked);
buffer.append("\n");
line_start = start;
}
}
start = end;
end = breaks.next();
}
if (line_start < line.length())
{
buffer.append(line.substring(line_start));
}
if (delimiter_index != -1)
{
buffer.append("\n");
}
}
return buffer.toString();
}
private static interface EncoderFallbackHandler
{
abstract boolean hasFallback(char character);
abstract void appendFallback(StringBuilder encodedBuffer, char character);
}
private static class HtmlEncoderFallbackHandler implements EncoderFallbackHandler
{
private final static String PREFIX = "";
private final static String SUFFIX = ";";
public boolean hasFallback(char character)
{
return character >= '\u00A0';
}
public void appendFallback(StringBuilder encodedBuffer, char character)
{
encodedBuffer.append(PREFIX);
encodedBuffer.append((int)character);
encodedBuffer.append(SUFFIX);
}
}
public static class BbcodeOption extends EnumClass<String>
{
protected BbcodeOption(String identifier)
{
super(identifier);
}
}
}