/* * Copyright 2001-2013 Geert Bevin (gbevin[remove] at uwyn dot com) * Licensed under the Apache License, Version 2.0 (the "License") */ package com.uwyn.rife.tools; import com.uwyn.rife.config.RifeConfig; import com.uwyn.rife.datastructures.DocumentPosition; import com.uwyn.rife.datastructures.EnumClass; import java.io.UnsupportedEncodingException; import java.lang.reflect.Array; import java.net.URLEncoder; import java.nio.CharBuffer; import java.nio.charset.CharacterCodingException; import java.nio.charset.Charset; import java.nio.charset.CharsetEncoder; import java.text.BreakIterator; import java.util.*; import java.util.regex.Matcher; import java.util.regex.Pattern; /** * General purpose class containing common <code>String</code> manipulation * methods. * * @author Geert Bevin (gbevin[remove] at uwyn dot com) * @since 1.0 */ public abstract class StringUtils { public static final BbcodeOption SHORTEN_URL = new BbcodeOption("SHORTEN_URL"); public static final BbcodeOption SANITIZE_URL = new BbcodeOption("SANITIZE_URL"); public static final BbcodeOption CONVERT_BARE_URLS = new BbcodeOption("CONVERT_BARE_URLS"); public static final BbcodeOption NO_FOLLOW_LINKS = new BbcodeOption("NO_FOLLOW_LINKS"); public static final Pattern BBCODE_COLOR = Pattern.compile("\\[color\\s*=\\s*([#\\w]*)\\s*\\]", Pattern.CASE_INSENSITIVE); public static final Pattern BBCODE_SIZE = Pattern.compile("\\[size\\s*=\\s*([+\\-]?[0-9]*)\\s*\\]", Pattern.CASE_INSENSITIVE); public static final Pattern BBCODE_URL_SHORT = Pattern.compile("\\[url\\]\\s*([^\\s]*)\\s*\\[\\/url\\]", Pattern.CASE_INSENSITIVE); public static final Pattern BBCODE_URL_LONG = Pattern.compile("\\[url=([^\\[]*)\\]([^\\[]*)\\[/url\\]", Pattern.CASE_INSENSITIVE); public static final Pattern BBCODE_IMG = Pattern.compile("\\[img\\]\\s*([^\\s]*)\\s*\\[\\/img\\]", Pattern.CASE_INSENSITIVE); public static final Pattern BBCODE_QUOTE_LONG = Pattern.compile("\\[quote=([^\\]]+\\]*)\\]", Pattern.CASE_INSENSITIVE | Pattern.MULTILINE); public static final Pattern BBCODE_BAREURL = Pattern.compile("(?:[^\"'=>\\]]|^)((?:http|ftp)s?://(?:%[\\p{Digit}A-Fa-f][\\p{Digit}A-Fa-f]|[\\-_\\.!~*';\\|/?:@#&=\\+$,\\p{Alnum}])+)", Pattern.CASE_INSENSITIVE | Pattern.MULTILINE); private static final Map<Character, String> AGGRESSIVE_HTML_ENCODE_MAP = new HashMap<>(); private static final Map<Character, String> DEFENSIVE_HTML_ENCODE_MAP = new HashMap<>(); private static final Map<Character, String> XML_ENCODE_MAP = new HashMap<>(); private static final Map<Character, String> STRING_ENCODE_MAP = new HashMap<>(); private static final Map<Character, String> SQL_ENCODE_MAP = new HashMap<>(); private static final Map<Character, String> LATEX_ENCODE_MAP = new HashMap<>(); private static final Map<String, Character> HTML_DECODE_MAP = new HashMap<>(); private static final HtmlEncoderFallbackHandler HTML_ENCODER_FALLBACK = new HtmlEncoderFallbackHandler(); static { // Html encoding mapping according to the HTML 4.0 spec // http://www.w3.org/TR/REC-html40/sgml/entities.html // Special characters for HTML AGGRESSIVE_HTML_ENCODE_MAP.put('\u0026', "&"); AGGRESSIVE_HTML_ENCODE_MAP.put('\u003C', "<"); AGGRESSIVE_HTML_ENCODE_MAP.put('\u003E', ">"); AGGRESSIVE_HTML_ENCODE_MAP.put('\u0022', """); DEFENSIVE_HTML_ENCODE_MAP.put('\u0152', "Œ"); DEFENSIVE_HTML_ENCODE_MAP.put('\u0153', "œ"); DEFENSIVE_HTML_ENCODE_MAP.put('\u0160', "Š"); DEFENSIVE_HTML_ENCODE_MAP.put('\u0161', "š"); DEFENSIVE_HTML_ENCODE_MAP.put('\u0178', "Ÿ"); DEFENSIVE_HTML_ENCODE_MAP.put('\u02C6', "ˆ"); DEFENSIVE_HTML_ENCODE_MAP.put('\u02DC', "˜"); DEFENSIVE_HTML_ENCODE_MAP.put('\u2002', " "); DEFENSIVE_HTML_ENCODE_MAP.put('\u2003', " "); DEFENSIVE_HTML_ENCODE_MAP.put('\u2009', " "); DEFENSIVE_HTML_ENCODE_MAP.put('\u200C', "‌"); DEFENSIVE_HTML_ENCODE_MAP.put('\u200D', "‍"); DEFENSIVE_HTML_ENCODE_MAP.put('\u200E', "‎"); DEFENSIVE_HTML_ENCODE_MAP.put('\u200F', "‏"); DEFENSIVE_HTML_ENCODE_MAP.put('\u2013', "–"); DEFENSIVE_HTML_ENCODE_MAP.put('\u2014', "—"); DEFENSIVE_HTML_ENCODE_MAP.put('\u2018', "‘"); DEFENSIVE_HTML_ENCODE_MAP.put('\u2019', "’"); DEFENSIVE_HTML_ENCODE_MAP.put('\u201A', "‚"); DEFENSIVE_HTML_ENCODE_MAP.put('\u201C', "“"); DEFENSIVE_HTML_ENCODE_MAP.put('\u201D', "”"); DEFENSIVE_HTML_ENCODE_MAP.put('\u201E', "„"); DEFENSIVE_HTML_ENCODE_MAP.put('\u2020', "†"); DEFENSIVE_HTML_ENCODE_MAP.put('\u2021', "‡"); DEFENSIVE_HTML_ENCODE_MAP.put('\u2030', "‰"); DEFENSIVE_HTML_ENCODE_MAP.put('\u2039', "‹"); DEFENSIVE_HTML_ENCODE_MAP.put('\u203A', "›"); DEFENSIVE_HTML_ENCODE_MAP.put('\u20AC', "€"); // Character entity references for ISO 8859-1 characters DEFENSIVE_HTML_ENCODE_MAP.put('\u00A0', " "); DEFENSIVE_HTML_ENCODE_MAP.put('\u00A1', "¡"); DEFENSIVE_HTML_ENCODE_MAP.put('\u00A2', "¢"); DEFENSIVE_HTML_ENCODE_MAP.put('\u00A3', "£"); DEFENSIVE_HTML_ENCODE_MAP.put('\u00A4', "¤"); DEFENSIVE_HTML_ENCODE_MAP.put('\u00A5', "¥"); DEFENSIVE_HTML_ENCODE_MAP.put('\u00A6', "¦"); DEFENSIVE_HTML_ENCODE_MAP.put('\u00A7', "§"); DEFENSIVE_HTML_ENCODE_MAP.put('\u00A8', "¨"); DEFENSIVE_HTML_ENCODE_MAP.put('\u00A9', "©"); DEFENSIVE_HTML_ENCODE_MAP.put('\u00AA', "ª"); DEFENSIVE_HTML_ENCODE_MAP.put('\u00AB', "«"); DEFENSIVE_HTML_ENCODE_MAP.put('\u00AC', "¬"); DEFENSIVE_HTML_ENCODE_MAP.put('\u00AD', "­"); DEFENSIVE_HTML_ENCODE_MAP.put('\u00AE', "®"); DEFENSIVE_HTML_ENCODE_MAP.put('\u00AF', "¯"); DEFENSIVE_HTML_ENCODE_MAP.put('\u00B0', "°"); DEFENSIVE_HTML_ENCODE_MAP.put('\u00B1', "±"); DEFENSIVE_HTML_ENCODE_MAP.put('\u00B2', "²"); DEFENSIVE_HTML_ENCODE_MAP.put('\u00B3', "³"); DEFENSIVE_HTML_ENCODE_MAP.put('\u00B4', "´"); DEFENSIVE_HTML_ENCODE_MAP.put('\u00B5', "µ"); DEFENSIVE_HTML_ENCODE_MAP.put('\u00B6', "¶"); DEFENSIVE_HTML_ENCODE_MAP.put('\u00B7', "·"); DEFENSIVE_HTML_ENCODE_MAP.put('\u00B8', "¸"); DEFENSIVE_HTML_ENCODE_MAP.put('\u00B9', "¹"); DEFENSIVE_HTML_ENCODE_MAP.put('\u00BA', "º"); DEFENSIVE_HTML_ENCODE_MAP.put('\u00BB', "»"); DEFENSIVE_HTML_ENCODE_MAP.put('\u00BC', "¼"); DEFENSIVE_HTML_ENCODE_MAP.put('\u00BD', "½"); DEFENSIVE_HTML_ENCODE_MAP.put('\u00BE', "¾"); DEFENSIVE_HTML_ENCODE_MAP.put('\u00BF', "¿"); DEFENSIVE_HTML_ENCODE_MAP.put('\u00C0', "À"); DEFENSIVE_HTML_ENCODE_MAP.put('\u00C1', "Á"); DEFENSIVE_HTML_ENCODE_MAP.put('\u00C2', "Â"); DEFENSIVE_HTML_ENCODE_MAP.put('\u00C3', "Ã"); DEFENSIVE_HTML_ENCODE_MAP.put('\u00C4', "Ä"); DEFENSIVE_HTML_ENCODE_MAP.put('\u00C5', "Å"); DEFENSIVE_HTML_ENCODE_MAP.put('\u00C6', "Æ"); DEFENSIVE_HTML_ENCODE_MAP.put('\u00C7', "Ç"); DEFENSIVE_HTML_ENCODE_MAP.put('\u00C8', "È"); DEFENSIVE_HTML_ENCODE_MAP.put('\u00C9', "É"); DEFENSIVE_HTML_ENCODE_MAP.put('\u00CA', "Ê"); DEFENSIVE_HTML_ENCODE_MAP.put('\u00CB', "Ë"); DEFENSIVE_HTML_ENCODE_MAP.put('\u00CC', "Ì"); DEFENSIVE_HTML_ENCODE_MAP.put('\u00CD', "Í"); DEFENSIVE_HTML_ENCODE_MAP.put('\u00CE', "Î"); DEFENSIVE_HTML_ENCODE_MAP.put('\u00CF', "Ï"); DEFENSIVE_HTML_ENCODE_MAP.put('\u00D0', "Ð"); DEFENSIVE_HTML_ENCODE_MAP.put('\u00D1', "Ñ"); DEFENSIVE_HTML_ENCODE_MAP.put('\u00D2', "Ò"); DEFENSIVE_HTML_ENCODE_MAP.put('\u00D3', "Ó"); DEFENSIVE_HTML_ENCODE_MAP.put('\u00D4', "Ô"); DEFENSIVE_HTML_ENCODE_MAP.put('\u00D5', "Õ"); DEFENSIVE_HTML_ENCODE_MAP.put('\u00D6', "Ö"); DEFENSIVE_HTML_ENCODE_MAP.put('\u00D7', "×"); DEFENSIVE_HTML_ENCODE_MAP.put('\u00D8', "Ø"); DEFENSIVE_HTML_ENCODE_MAP.put('\u00D9', "Ù"); DEFENSIVE_HTML_ENCODE_MAP.put('\u00DA', "Ú"); DEFENSIVE_HTML_ENCODE_MAP.put('\u00DB', "Û"); DEFENSIVE_HTML_ENCODE_MAP.put('\u00DC', "Ü"); DEFENSIVE_HTML_ENCODE_MAP.put('\u00DD', "Ý"); DEFENSIVE_HTML_ENCODE_MAP.put('\u00DE', "Þ"); DEFENSIVE_HTML_ENCODE_MAP.put('\u00DF', "ß"); DEFENSIVE_HTML_ENCODE_MAP.put('\u00E0', "à"); DEFENSIVE_HTML_ENCODE_MAP.put('\u00E1', "á"); DEFENSIVE_HTML_ENCODE_MAP.put('\u00E2', "â"); DEFENSIVE_HTML_ENCODE_MAP.put('\u00E3', "ã"); DEFENSIVE_HTML_ENCODE_MAP.put('\u00E4', "ä"); DEFENSIVE_HTML_ENCODE_MAP.put('\u00E5', "å"); DEFENSIVE_HTML_ENCODE_MAP.put('\u00E6', "æ"); DEFENSIVE_HTML_ENCODE_MAP.put('\u00E7', "ç"); DEFENSIVE_HTML_ENCODE_MAP.put('\u00E8', "è"); DEFENSIVE_HTML_ENCODE_MAP.put('\u00E9', "é"); DEFENSIVE_HTML_ENCODE_MAP.put('\u00EA', "ê"); DEFENSIVE_HTML_ENCODE_MAP.put('\u00EB', "ë"); DEFENSIVE_HTML_ENCODE_MAP.put('\u00EC', "ì"); DEFENSIVE_HTML_ENCODE_MAP.put('\u00ED', "í"); DEFENSIVE_HTML_ENCODE_MAP.put('\u00EE', "î"); DEFENSIVE_HTML_ENCODE_MAP.put('\u00EF', "ï"); DEFENSIVE_HTML_ENCODE_MAP.put('\u00F0', "ð"); DEFENSIVE_HTML_ENCODE_MAP.put('\u00F1', "ñ"); DEFENSIVE_HTML_ENCODE_MAP.put('\u00F2', "ò"); DEFENSIVE_HTML_ENCODE_MAP.put('\u00F3', "ó"); DEFENSIVE_HTML_ENCODE_MAP.put('\u00F4', "ô"); DEFENSIVE_HTML_ENCODE_MAP.put('\u00F5', "õ"); DEFENSIVE_HTML_ENCODE_MAP.put('\u00F6', "ö"); DEFENSIVE_HTML_ENCODE_MAP.put('\u00F7', "÷"); DEFENSIVE_HTML_ENCODE_MAP.put('\u00F8', "ø"); DEFENSIVE_HTML_ENCODE_MAP.put('\u00F9', "ù"); DEFENSIVE_HTML_ENCODE_MAP.put('\u00FA', "ú"); DEFENSIVE_HTML_ENCODE_MAP.put('\u00FB', "û"); DEFENSIVE_HTML_ENCODE_MAP.put('\u00FC', "ü"); DEFENSIVE_HTML_ENCODE_MAP.put('\u00FD', "ý"); DEFENSIVE_HTML_ENCODE_MAP.put('\u00FE', "þ"); DEFENSIVE_HTML_ENCODE_MAP.put('\u00FF', "ÿ"); // Mathematical, Greek and Symbolic characters for HTML DEFENSIVE_HTML_ENCODE_MAP.put('\u0192', "ƒ"); DEFENSIVE_HTML_ENCODE_MAP.put('\u0391', "Α"); DEFENSIVE_HTML_ENCODE_MAP.put('\u0392', "Β"); DEFENSIVE_HTML_ENCODE_MAP.put('\u0393', "Γ"); DEFENSIVE_HTML_ENCODE_MAP.put('\u0394', "Δ"); DEFENSIVE_HTML_ENCODE_MAP.put('\u0395', "Ε"); DEFENSIVE_HTML_ENCODE_MAP.put('\u0396', "Ζ"); DEFENSIVE_HTML_ENCODE_MAP.put('\u0397', "Η"); DEFENSIVE_HTML_ENCODE_MAP.put('\u0398', "Θ"); DEFENSIVE_HTML_ENCODE_MAP.put('\u0399', "Ι"); DEFENSIVE_HTML_ENCODE_MAP.put('\u039A', "Κ"); DEFENSIVE_HTML_ENCODE_MAP.put('\u039B', "Λ"); DEFENSIVE_HTML_ENCODE_MAP.put('\u039C', "Μ"); DEFENSIVE_HTML_ENCODE_MAP.put('\u039D', "Ν"); DEFENSIVE_HTML_ENCODE_MAP.put('\u039E', "Ξ"); DEFENSIVE_HTML_ENCODE_MAP.put('\u039F', "Ο"); DEFENSIVE_HTML_ENCODE_MAP.put('\u03A0', "Π"); DEFENSIVE_HTML_ENCODE_MAP.put('\u03A1', "Ρ"); DEFENSIVE_HTML_ENCODE_MAP.put('\u03A3', "Σ"); DEFENSIVE_HTML_ENCODE_MAP.put('\u03A4', "Τ"); DEFENSIVE_HTML_ENCODE_MAP.put('\u03A5', "Υ"); DEFENSIVE_HTML_ENCODE_MAP.put('\u03A6', "Φ"); DEFENSIVE_HTML_ENCODE_MAP.put('\u03A7', "Χ"); DEFENSIVE_HTML_ENCODE_MAP.put('\u03A8', "Ψ"); DEFENSIVE_HTML_ENCODE_MAP.put('\u03A9', "Ω"); DEFENSIVE_HTML_ENCODE_MAP.put('\u03B1', "α"); DEFENSIVE_HTML_ENCODE_MAP.put('\u03B2', "β"); DEFENSIVE_HTML_ENCODE_MAP.put('\u03B3', "γ"); DEFENSIVE_HTML_ENCODE_MAP.put('\u03B4', "δ"); DEFENSIVE_HTML_ENCODE_MAP.put('\u03B5', "ε"); DEFENSIVE_HTML_ENCODE_MAP.put('\u03B6', "ζ"); DEFENSIVE_HTML_ENCODE_MAP.put('\u03B7', "η"); DEFENSIVE_HTML_ENCODE_MAP.put('\u03B8', "θ"); DEFENSIVE_HTML_ENCODE_MAP.put('\u03B9', "ι"); DEFENSIVE_HTML_ENCODE_MAP.put('\u03BA', "κ"); DEFENSIVE_HTML_ENCODE_MAP.put('\u03BB', "λ"); DEFENSIVE_HTML_ENCODE_MAP.put('\u03BC', "μ"); DEFENSIVE_HTML_ENCODE_MAP.put('\u03BD', "ν"); DEFENSIVE_HTML_ENCODE_MAP.put('\u03BE', "ξ"); DEFENSIVE_HTML_ENCODE_MAP.put('\u03BF', "ο"); DEFENSIVE_HTML_ENCODE_MAP.put('\u03C0', "π"); DEFENSIVE_HTML_ENCODE_MAP.put('\u03C1', "ρ"); DEFENSIVE_HTML_ENCODE_MAP.put('\u03C2', "ς"); DEFENSIVE_HTML_ENCODE_MAP.put('\u03C3', "σ"); DEFENSIVE_HTML_ENCODE_MAP.put('\u03C4', "τ"); DEFENSIVE_HTML_ENCODE_MAP.put('\u03C5', "υ"); DEFENSIVE_HTML_ENCODE_MAP.put('\u03C6', "φ"); DEFENSIVE_HTML_ENCODE_MAP.put('\u03C7', "χ"); DEFENSIVE_HTML_ENCODE_MAP.put('\u03C8', "ψ"); DEFENSIVE_HTML_ENCODE_MAP.put('\u03C9', "ω"); DEFENSIVE_HTML_ENCODE_MAP.put('\u03D1', "ϑ"); DEFENSIVE_HTML_ENCODE_MAP.put('\u03D2', "ϒ"); DEFENSIVE_HTML_ENCODE_MAP.put('\u03D6', "ϖ"); DEFENSIVE_HTML_ENCODE_MAP.put('\u2022', "•"); DEFENSIVE_HTML_ENCODE_MAP.put('\u2026', "…"); DEFENSIVE_HTML_ENCODE_MAP.put('\u2032', "′"); DEFENSIVE_HTML_ENCODE_MAP.put('\u2033', "″"); DEFENSIVE_HTML_ENCODE_MAP.put('\u203E', "‾"); DEFENSIVE_HTML_ENCODE_MAP.put('\u2044', "⁄"); DEFENSIVE_HTML_ENCODE_MAP.put('\u2118', "℘"); DEFENSIVE_HTML_ENCODE_MAP.put('\u2111', "ℑ"); DEFENSIVE_HTML_ENCODE_MAP.put('\u211C', "ℜ"); DEFENSIVE_HTML_ENCODE_MAP.put('\u2122', "™"); DEFENSIVE_HTML_ENCODE_MAP.put('\u2135', "ℵ"); DEFENSIVE_HTML_ENCODE_MAP.put('\u2190', "←"); DEFENSIVE_HTML_ENCODE_MAP.put('\u2191', "↑"); DEFENSIVE_HTML_ENCODE_MAP.put('\u2192', "→"); DEFENSIVE_HTML_ENCODE_MAP.put('\u2193', "↓"); DEFENSIVE_HTML_ENCODE_MAP.put('\u2194', "↔"); DEFENSIVE_HTML_ENCODE_MAP.put('\u21B5', "↵"); DEFENSIVE_HTML_ENCODE_MAP.put('\u21D0', "⇐"); DEFENSIVE_HTML_ENCODE_MAP.put('\u21D1', "⇑"); DEFENSIVE_HTML_ENCODE_MAP.put('\u21D2', "⇒"); DEFENSIVE_HTML_ENCODE_MAP.put('\u21D3', "⇓"); DEFENSIVE_HTML_ENCODE_MAP.put('\u21D4', "⇔"); DEFENSIVE_HTML_ENCODE_MAP.put('\u2200', "∀"); DEFENSIVE_HTML_ENCODE_MAP.put('\u2202', "∂"); DEFENSIVE_HTML_ENCODE_MAP.put('\u2203', "∃"); DEFENSIVE_HTML_ENCODE_MAP.put('\u2205', "∅"); DEFENSIVE_HTML_ENCODE_MAP.put('\u2207', "∇"); DEFENSIVE_HTML_ENCODE_MAP.put('\u2208', "∈"); DEFENSIVE_HTML_ENCODE_MAP.put('\u2209', "∉"); DEFENSIVE_HTML_ENCODE_MAP.put('\u220B', "∋"); DEFENSIVE_HTML_ENCODE_MAP.put('\u220F', "∏"); DEFENSIVE_HTML_ENCODE_MAP.put('\u2211', "∑"); DEFENSIVE_HTML_ENCODE_MAP.put('\u2212', "−"); DEFENSIVE_HTML_ENCODE_MAP.put('\u2217', "∗"); DEFENSIVE_HTML_ENCODE_MAP.put('\u221A', "√"); DEFENSIVE_HTML_ENCODE_MAP.put('\u221D', "∝"); DEFENSIVE_HTML_ENCODE_MAP.put('\u221E', "∞"); DEFENSIVE_HTML_ENCODE_MAP.put('\u2220', "∠"); DEFENSIVE_HTML_ENCODE_MAP.put('\u2227', "∧"); DEFENSIVE_HTML_ENCODE_MAP.put('\u2228', "∨"); DEFENSIVE_HTML_ENCODE_MAP.put('\u2229', "∩"); DEFENSIVE_HTML_ENCODE_MAP.put('\u222A', "∪"); DEFENSIVE_HTML_ENCODE_MAP.put('\u222B', "∫"); DEFENSIVE_HTML_ENCODE_MAP.put('\u2234', "∴"); DEFENSIVE_HTML_ENCODE_MAP.put('\u223C', "∼"); DEFENSIVE_HTML_ENCODE_MAP.put('\u2245', "≅"); DEFENSIVE_HTML_ENCODE_MAP.put('\u2248', "≈"); DEFENSIVE_HTML_ENCODE_MAP.put('\u2260', "≠"); DEFENSIVE_HTML_ENCODE_MAP.put('\u2261', "≡"); DEFENSIVE_HTML_ENCODE_MAP.put('\u2264', "≤"); DEFENSIVE_HTML_ENCODE_MAP.put('\u2265', "≥"); DEFENSIVE_HTML_ENCODE_MAP.put('\u2282', "⊂"); DEFENSIVE_HTML_ENCODE_MAP.put('\u2283', "⊃"); DEFENSIVE_HTML_ENCODE_MAP.put('\u2284', "⊄"); DEFENSIVE_HTML_ENCODE_MAP.put('\u2286', "⊆"); DEFENSIVE_HTML_ENCODE_MAP.put('\u2287', "⊇"); DEFENSIVE_HTML_ENCODE_MAP.put('\u2295', "⊕"); DEFENSIVE_HTML_ENCODE_MAP.put('\u2297', "⊗"); DEFENSIVE_HTML_ENCODE_MAP.put('\u22A5', "⊥"); DEFENSIVE_HTML_ENCODE_MAP.put('\u22C5', "⋅"); DEFENSIVE_HTML_ENCODE_MAP.put('\u2308', "⌈"); DEFENSIVE_HTML_ENCODE_MAP.put('\u2309', "⌉"); DEFENSIVE_HTML_ENCODE_MAP.put('\u230A', "⌊"); DEFENSIVE_HTML_ENCODE_MAP.put('\u230B', "⌋"); DEFENSIVE_HTML_ENCODE_MAP.put('\u2329', "⟨"); DEFENSIVE_HTML_ENCODE_MAP.put('\u232A', "⟩"); DEFENSIVE_HTML_ENCODE_MAP.put('\u25CA', "◊"); DEFENSIVE_HTML_ENCODE_MAP.put('\u2660', "♠"); DEFENSIVE_HTML_ENCODE_MAP.put('\u2663', "♣"); DEFENSIVE_HTML_ENCODE_MAP.put('\u2665', "♥"); DEFENSIVE_HTML_ENCODE_MAP.put('\u2666', "♦"); Set<Map.Entry<Character, String>> aggresive_entries = AGGRESSIVE_HTML_ENCODE_MAP.entrySet(); for (Map.Entry<Character, String> entry : aggresive_entries) { HTML_DECODE_MAP.put(entry.getValue(), entry.getKey()); } Set<Map.Entry<Character, String>> defensive_entries = DEFENSIVE_HTML_ENCODE_MAP.entrySet(); for (Map.Entry<Character, String> entry : defensive_entries) { HTML_DECODE_MAP.put(entry.getValue(), entry.getKey()); } XML_ENCODE_MAP.put('\u0026', "&"); XML_ENCODE_MAP.put('\'', "'"); XML_ENCODE_MAP.put('\u0022', """); XML_ENCODE_MAP.put('\u003C', "<"); XML_ENCODE_MAP.put('\u003E', ">"); SQL_ENCODE_MAP.put('\'', "''"); STRING_ENCODE_MAP.put('\\', "\\\\"); STRING_ENCODE_MAP.put('\n', "\\n"); STRING_ENCODE_MAP.put('\r', "\\r"); STRING_ENCODE_MAP.put('\t', "\\t"); STRING_ENCODE_MAP.put('"', "\\\""); LATEX_ENCODE_MAP.put('\\', "\\\\"); LATEX_ENCODE_MAP.put('#', "\\#"); LATEX_ENCODE_MAP.put('$', "\\$"); LATEX_ENCODE_MAP.put('%', "\\%"); LATEX_ENCODE_MAP.put('&', "\\&"); LATEX_ENCODE_MAP.put('~', "\\~"); LATEX_ENCODE_MAP.put('_', "\\_"); LATEX_ENCODE_MAP.put('^', "\\^"); LATEX_ENCODE_MAP.put('{', "\\{"); LATEX_ENCODE_MAP.put('}', "\\}"); LATEX_ENCODE_MAP.put('\u00A1', "!'"); LATEX_ENCODE_MAP.put('\u00BF', "?'"); LATEX_ENCODE_MAP.put('\u00C0', "\\`{A}"); LATEX_ENCODE_MAP.put('\u00C1', "\\'{A}"); LATEX_ENCODE_MAP.put('\u00C2', "\\^{A}"); LATEX_ENCODE_MAP.put('\u00C3', "\\H{A}"); LATEX_ENCODE_MAP.put('\u00C4', "\\\"{A}"); LATEX_ENCODE_MAP.put('\u00C5', "\\AA"); LATEX_ENCODE_MAP.put('\u00C6', "\\AE"); LATEX_ENCODE_MAP.put('\u00C7', "\\c{C}"); LATEX_ENCODE_MAP.put('\u00C8', "\\`{E}"); LATEX_ENCODE_MAP.put('\u00C9', "\\'{E}"); LATEX_ENCODE_MAP.put('\u00CA', "\\^{E}"); LATEX_ENCODE_MAP.put('\u00CB', "\\\"{E}"); LATEX_ENCODE_MAP.put('\u00CC', "\\`{I}"); LATEX_ENCODE_MAP.put('\u00CD', "\\'{I}"); LATEX_ENCODE_MAP.put('\u00CE', "\\^{I}"); LATEX_ENCODE_MAP.put('\u00CF', "\\\"{I}"); // todo \u00D0 LATEX_ENCODE_MAP.put('\u00D1', "\\H{N}"); LATEX_ENCODE_MAP.put('\u00D2', "\\`{O}"); LATEX_ENCODE_MAP.put('\u00D3', "\\'{O}"); LATEX_ENCODE_MAP.put('\u00D4', "\\^{O}"); LATEX_ENCODE_MAP.put('\u00D5', "\\H{O}"); LATEX_ENCODE_MAP.put('\u00D6', "\\\"{O}"); // todo \u00D7 LATEX_ENCODE_MAP.put('\u00D8', "\\O"); LATEX_ENCODE_MAP.put('\u00D9', "\\`{U}"); LATEX_ENCODE_MAP.put('\u00DA', "\\'{U}"); LATEX_ENCODE_MAP.put('\u00DB', "\\^{U}"); LATEX_ENCODE_MAP.put('\u00DC', "\\\"{U}"); LATEX_ENCODE_MAP.put('\u00DD', "\\'{Y}"); // todo \u00DE LATEX_ENCODE_MAP.put('\u00DF', "\\ss"); LATEX_ENCODE_MAP.put('\u00E0', "\\`{a}"); LATEX_ENCODE_MAP.put('\u00E1', "\\'{a}"); LATEX_ENCODE_MAP.put('\u00E2', "\\^{a}"); LATEX_ENCODE_MAP.put('\u00E3', "\\H{a}"); LATEX_ENCODE_MAP.put('\u00E4', "\\\"{a}"); LATEX_ENCODE_MAP.put('\u00E5', "\\aa"); LATEX_ENCODE_MAP.put('\u00E6', "\\ae"); LATEX_ENCODE_MAP.put('\u00E7', "\\c{c}"); LATEX_ENCODE_MAP.put('\u00E8', "\\`{e}"); LATEX_ENCODE_MAP.put('\u00E9', "\\'{e}"); LATEX_ENCODE_MAP.put('\u00EA', "\\^{e}"); LATEX_ENCODE_MAP.put('\u00EB', "\\\"{e}"); LATEX_ENCODE_MAP.put('\u00EC', "\\`{i}"); LATEX_ENCODE_MAP.put('\u00ED', "\\'{i}"); LATEX_ENCODE_MAP.put('\u00EE', "\\^{i}"); LATEX_ENCODE_MAP.put('\u00EF', "\\\"{i}"); // todo \u00F0 LATEX_ENCODE_MAP.put('\u00F1', "\\H{n}"); LATEX_ENCODE_MAP.put('\u00F2', "\\`{o}"); LATEX_ENCODE_MAP.put('\u00F3', "\\'{o}"); LATEX_ENCODE_MAP.put('\u00F4', "\\^{o}"); LATEX_ENCODE_MAP.put('\u00F5', "\\H{o}"); LATEX_ENCODE_MAP.put('\u00F6', "\\\"{o}"); // todo \u00F7 LATEX_ENCODE_MAP.put('\u00F8', "\\o"); LATEX_ENCODE_MAP.put('\u00F9', "\\`{u}"); LATEX_ENCODE_MAP.put('\u00FA', "\\'{u}"); LATEX_ENCODE_MAP.put('\u00FB', "\\^{u}"); LATEX_ENCODE_MAP.put('\u00FC', "\\\"{u}"); LATEX_ENCODE_MAP.put('\u00FD', "\\'{y}"); // todo \u00FE LATEX_ENCODE_MAP.put('\u00FF', "\\\"{y}"); } public static String ENCODING_US_ASCII = "US-ASCII"; public static String ENCODING_ISO_8859_1 = "ISO-8859-1"; public static String ENCODING_ISO_8859_2 = "ISO-8859-2"; public static String ENCODING_ISO_8859_5 = "ISO-8859-5"; public static String ENCODING_UTF_8 = "UTF-8"; public static String ENCODING_UTF_16BE = "UTF-16BE"; public static String ENCODING_UTF_16LE = "UTF-16LE"; public static String ENCODING_UTF_16 = "UTF-16"; public static Charset CHARSET_US_ASCII = Charset.forName(StringUtils.ENCODING_US_ASCII); /** * Transforms a provided <code>String</code> object into a new string, * containing only valid characters for a java class name. * * @param name The string that has to be transformed into a valid class * name. * @return The encoded <code>String</code> object. * @see #encodeUrl(String) * @see #encodeHtml(String) * @see #encodeXml(String) * @see #encodeSql(String) * @see #encodeLatex(String) * @see #encodeRegexp(String) * @since 1.0 */ public static String encodeClassname(String name) { if (null == name) { return null; } Pattern pattern = Pattern.compile("[^\\w]"); Matcher matcher = pattern.matcher(name); return matcher.replaceAll("_"); } private static boolean needsUrlEncoding(String source) { if (null == source) { return false; } // check if the string needs encoding first since // the URLEncoder always allocates a StringBuffer, even when the // string is returned as-is boolean encode = false; char ch; for (int i = 0; i < source.length(); i++) { ch = source.charAt(i); if (ch >= 'a' && ch <= 'z' || ch >= 'A' && ch <= 'Z' || ch >= '0' && ch <= '9' || ch == '-' || ch == '_' || ch == '.' || ch == '*') { continue; } encode = true; break; } return encode; } /** * Transforms a provided <code>String</code> object into a new string, * containing only valid URL characters. * * @param source The string that has to be transformed into a valid URL * string. * @return The encoded <code>String</code> object. * @see #encodeClassname(String) * @see #encodeUrlValue(String) * @see #encodeHtml(String) * @see #encodeXml(String) * @see #encodeSql(String) * @see #encodeLatex(String) * @see #encodeRegexp(String) * @since 1.0 */ public static String encodeUrl(String source) { if (!needsUrlEncoding(source)) { return source; } try { return URLEncoder.encode(source, ENCODING_ISO_8859_1); } ///CLOVER:OFF catch (UnsupportedEncodingException e) { // this should never happen, ISO-8859-1 is a standard encoding throw new RuntimeException(e); } ///CLOVER:ON } /** * Transforms a provided <code>String</code> object into a new string, * only pure US Ascii strings are preserved and URL encoded in a regular * way. Strings with characters from other encodings will be encoded in a * RIFE-specific manner to allow international data to passed along the * query string. * * @param source The string that has to be transformed into a valid URL * parameter string. * @return The encoded <code>String</code> object. * @see #decodeUrlValue(String) * @see #encodeClassname(String) * @see #encodeUrl(String) * @see #encodeHtml(String) * @see #encodeXml(String) * @see #encodeSql(String) * @see #encodeLatex(String) * @see #encodeRegexp(String) * @since 1.0 */ public static String encodeUrlValue(String source) { if (!needsUrlEncoding(source)) { return source; } // check if the string is valid US-ASCII encoding boolean valid = true; CharsetEncoder encoder = CHARSET_US_ASCII.newEncoder(); try { encoder.encode(CharBuffer.wrap(source)); } catch (CharacterCodingException e) { valid = false; } try { // if it is valid US-ASCII, use the regular URL encoding method if (valid) { return URLEncoder.encode(source, ENCODING_US_ASCII); } // otherwise, base-64 encode the UTF-8 bytes and mark the string // as being encoded in a special way else { StringBuilder encoded = new StringBuilder("%02%02"); String base64 = Base64.encodeToString(source.getBytes(ENCODING_UTF_8), false); String base64_urlsafe = replace(base64, "=", "%3D"); encoded.append(base64_urlsafe); return encoded.toString(); } } ///CLOVER:OFF catch (UnsupportedEncodingException e) { // this should never happen, ISO-8859-1 is a standard encoding throw new RuntimeException(e); } ///CLOVER:ON } /** * Decodes a <code>String</code> that has been encoded in a RIFE-specific * manner for URL usage.. Before calling this method, you should first * verify if the value needs decoding by using the * <code>doesUrlValueNeedDecoding(String)</code> method. * * @param source the value that has been encoded for URL usage in a * RIFE-specific way * @return The decoded <code>String</code> object. * @see #encodeUrlValue(String) * @see #doesUrlValueNeedDecoding(String) * @since 1.0 */ public static String decodeUrlValue(String source) { try { byte[] decoded = Base64.decode(source.substring(2)); if (null == decoded) { return null; } else { return new String(decoded, StringUtils.ENCODING_UTF_8); } } ///CLOVER:OFF catch (UnsupportedEncodingException e) { // this should never happen, UTF-8 is a standard encoding throw new RuntimeException(e); } ///CLOVER:ON } /** * Checks if a <code>String</code> is encoded in a RIFE-specific manner * for URL usage. * * @param source the value that might have been encoded for URL usage in a * RIFE-specific way * @return <code>true</code> if the value is encoded in the RIFE-specific * format; and * <p><code>false</code> otherwise * @see #encodeUrlValue(String) * @see #decodeUrlValue(String) * @since 1.0 */ public static boolean doesUrlValueNeedDecoding(String source) { return source != null && source.length() > 2 && source.startsWith("\u0002\u0002"); } private static boolean needsHtmlEncoding(String source, boolean defensive) { if (null == source) { return false; } boolean encode = false; char ch; for (int i = 0; i < source.length(); i++) { ch = source.charAt(i); if ((defensive || (ch != '\u0022' && ch != '\u0026' && ch != '\u003C' && ch != '\u003E')) && ch < '\u00A0') { continue; } encode = true; break; } return encode; } /** * @since 1.6 */ public static String decodeHtml(String source) { if (null == source || 0 == source.length()) { return source; } int current_index = 0; int delimiter_start_index; int delimiter_end_index; StringBuilder result = null; while (current_index <= source.length()) { delimiter_start_index = source.indexOf('&', current_index); if (delimiter_start_index != -1) { delimiter_end_index = source.indexOf(';', delimiter_start_index + 1); if (delimiter_end_index != -1) { // ensure that the string builder is setup correctly if (null == result) { result = new StringBuilder(); } // add the text that leads up to this match if (delimiter_start_index > current_index) { result.append(source.substring(current_index, delimiter_start_index)); } // add the decoded entity String entity = source.substring(delimiter_start_index, delimiter_end_index + 1); current_index = delimiter_end_index + 1; // try to decoded numeric entities if (entity.charAt(1) == '#') { int start = 2; int radix = 10; // check if the number is hexadecimal if (entity.charAt(2) == 'X' || entity.charAt(2) == 'x') { start++; radix = 16; } try { Character c = (char)Integer.parseInt(entity.substring(start, entity.length() - 1), radix); result.append(c); } // when the number of the entity can't be parsed, add the entity as-is catch (NumberFormatException e) { result.append(entity); } } else { // try to decode the entity as a literal Character decoded = HTML_DECODE_MAP.get(entity); if (decoded != null) { result.append(decoded); } // if there was no match, add the entity as-is else { result.append(entity); } } } else { break; } } else { break; } } if (null == result) { return source; } else if (current_index < source.length()) { result.append(source.substring(current_index)); } return result.toString(); } /** * Transforms a provided <code>String</code> object into a new string, * containing only valid Html characters. * * @param source The string that has to be transformed into a valid Html * string. * @return The encoded <code>String</code> object. * @see #encodeClassname(String) * @see #encodeUrl(String) * @see #encodeUrlValue(String) * @see #encodeXml(String) * @see #encodeSql(String) * @see #encodeString(String) * @see #encodeLatex(String) * @see #encodeRegexp(String) * @since 1.0 */ public static String encodeHtml(String source) { if (needsHtmlEncoding(source, false)) { return encode(source, HTML_ENCODER_FALLBACK, AGGRESSIVE_HTML_ENCODE_MAP, DEFENSIVE_HTML_ENCODE_MAP); } return source; } /** * Transforms a provided <code>String</code> object into a new string, * containing as much as possible Html characters. It is safe to already * feed existing Html to this method since &, < and > will not * be encoded. * * @param source The string that has to be transformed into a valid Html * string. * @return The encoded <code>String</code> object. * @see #encodeClassname(String) * @see #encodeUrl(String) * @see #encodeUrlValue(String) * @see #encodeXml(String) * @see #encodeSql(String) * @see #encodeString(String) * @see #encodeLatex(String) * @see #encodeRegexp(String) * @since 1.0 */ public static String encodeHtmlDefensive(String source) { if (needsHtmlEncoding(source, true)) { return encode(source, null, DEFENSIVE_HTML_ENCODE_MAP); } return source; } /** * Transforms a provided <code>String</code> object into a new string, * containing only valid XML characters. * * @param source The string that has to be transformed into a valid XML * string. * @return The encoded <code>String</code> object. * @see #encodeClassname(String) * @see #encodeUrl(String) * @see #encodeUrlValue(String) * @see #encodeHtml(String) * @see #encodeSql(String) * @see #encodeString(String) * @see #encodeLatex(String) * @see #encodeRegexp(String) * @since 1.0 */ public static String encodeXml(String source) { return encode(source, null, XML_ENCODE_MAP); } /** * Transforms a provided <code>String</code> object into a new string, * containing only valid <code>String</code> characters. * * @param source The string that has to be transformed into a valid * sequence of <code>String</code> characters. * @return The encoded <code>String</code> object. * @see #encodeClassname(String) * @see #encodeUrl(String) * @see #encodeUrlValue(String) * @see #encodeHtml(String) * @see #encodeXml(String) * @see #encodeSql(String) * @see #encodeLatex(String) * @see #encodeRegexp(String) * @since 1.0 */ public static String encodeString(String source) { return encode(source, null, STRING_ENCODE_MAP); } /** * Transforms a provided <code>String</code> object into a series of * unicode escape codes. * * @param source The string that has to be transformed into a valid * sequence of unicode escape codes * @return The encoded <code>String</code> object. * @see #encodeClassname(String) * @see #encodeUrl(String) * @see #encodeUrlValue(String) * @see #encodeHtml(String) * @see #encodeXml(String) * @see #encodeSql(String) * @see #encodeLatex(String) * @see #encodeRegexp(String) * @since 1.0 */ public static String encodeUnicode(String source) { if (null == source) { return null; } StringBuilder encoded = new StringBuilder(); String hexstring; for (int i = 0; i < source.length(); i++) { hexstring = Integer.toHexString((int)source.charAt(i)).toUpperCase(); encoded.append("\\u"); // fill with zeros for (int j = hexstring.length(); j < 4; j++) { encoded.append("0"); } encoded.append(hexstring); } return encoded.toString(); } /** * Transforms a provided <code>String</code> object into a new string, * containing only valid Sql characters. * * @param source The string that has to be transformed into a valid Sql * string. * @return The encoded <code>String</code> object. * @see #encodeClassname(String) * @see #encodeUrl(String) * @see #encodeUrlValue(String) * @see #encodeHtml(String) * @see #encodeXml(String) * @see #encodeString(String) * @see #encodeLatex(String) * @see #encodeRegexp(String) * @since 1.0 */ public static String encodeSql(String source) { return encode(source, null, SQL_ENCODE_MAP); } /** * Transforms a provided <code>String</code> object into a new string, * containing only valid LaTeX characters. * * @param source The string that has to be transformed into a valid LaTeX * string. * @return The encoded <code>String</code> object. * @see #encodeClassname(String) * @see #encodeUrl(String) * @see #encodeUrlValue(String) * @see #encodeHtml(String) * @see #encodeXml(String) * @see #encodeSql(String) * @see #encodeString(String) * @see #encodeRegexp(String) * @since 1.0 */ public static String encodeLatex(String source) { if (null == source) { return null; } source = encode(source, null, LATEX_ENCODE_MAP); source = StringUtils.replace(source, "latex", "\\LaTeX", false); return source; } /** * Transforms a provided <code>String</code> object into a new string, * using the mapping that are provided through the supplied encoding * table. * * @param source The string that has to be transformed into a valid * string, using the mappings that are provided through the supplied * encoding table. * @param encodingTables A <code>Map</code> object containing the mappings * to transform characters into valid entities. The keys of this map * should be <code>Character</code> objects and the values * <code>String</code> objects. * @return The encoded <code>String</code> object. * @since 1.0 */ private static String encode(String source, EncoderFallbackHandler fallbackHandler, Map<Character, String>... encodingTables) { if (null == source) { return null; } if (null == encodingTables || 0 == encodingTables.length) { return source; } StringBuilder encoded_string = null; char[] string_to_encode_array = source.toCharArray(); int last_match = -1; for (int i = 0; i < string_to_encode_array.length; i++) { char char_to_encode = string_to_encode_array[i]; for (Map<Character, String> encoding_table : encodingTables) { if (encoding_table.containsKey(char_to_encode)) { encoded_string = prepareEncodedString(source, encoded_string, i, last_match, string_to_encode_array); encoded_string.append(encoding_table.get(char_to_encode)); last_match = i; } } if (fallbackHandler != null && last_match < i && fallbackHandler.hasFallback(char_to_encode)) { encoded_string = prepareEncodedString(source, encoded_string, i, last_match, string_to_encode_array); fallbackHandler.appendFallback(encoded_string, char_to_encode); last_match = i; } } if (null == encoded_string) { return source; } else { int difference = string_to_encode_array.length - (last_match + 1); if (difference > 0) { encoded_string.append(string_to_encode_array, last_match + 1, difference); } return encoded_string.toString(); } } private static StringBuilder prepareEncodedString(String source, StringBuilder encodedString, int i, int lastMatch, char[] stringToEncodeArray) { if (null == encodedString) { encodedString = new StringBuilder(source.length()); } int difference = i - (lastMatch + 1); if (difference > 0) { encodedString.append(stringToEncodeArray, lastMatch + 1, difference); } return encodedString; } /** * Transforms a provided <code>String</code> object into a literal that can * be included into a regular expression {@link Pattern} as-is. None of the * regular expression escapes in the string will be functional anymore. * * @param source The string that has to be escaped as a literal * @return The encoded <code>String</code> object. * @see #encodeClassname(String) * @see #encodeUrl(String) * @see #encodeUrlValue(String) * @see #encodeHtml(String) * @see #encodeXml(String) * @see #encodeSql(String) * @see #encodeString(String) * @see #encodeLatex(String) * @since 1.3 */ public static String encodeRegexp(String source) { int regexp_quote_start = source.indexOf("\\E"); if (-1 == regexp_quote_start) { return "\\Q" + source + "\\E"; } StringBuilder buffer = new StringBuilder(source.length() * 2); buffer.append("\\Q"); regexp_quote_start = 0; int current = 0; while (-1 == (regexp_quote_start = source.indexOf("\\E", current))) { buffer.append(source.substring(current, regexp_quote_start)); current = regexp_quote_start + 2; buffer.append("\\E\\\\E\\Q"); } buffer.append(source.substring(current, source.length())); buffer.append("\\E"); return buffer.toString(); } /** * Counts the number of times a substring occures in a provided string in * a case-sensitive manner. * * @param source The <code>String</code> object that will be searched in. * @param substring The string whose occurances will we counted. * @return An <code>int</code> value containing the number of occurances * of the substring. * @since 1.0 */ public static int count(String source, String substring) { return count(source, substring, true); } /** * Counts the number of times a substring occures in a provided string. * * @param source The <code>String</code> object that will be searched in. * @param substring The string whose occurances will we counted. * @param matchCase A <code>boolean</code> indicating if the match is * going to be performed in a case-sensitive manner or not. * @return An <code>int</code> value containing the number of occurances * of the substring. * @since 1.0 */ public static int count(String source, String substring, boolean matchCase) { if (null == source) { return 0; } if (null == substring) { return 0; } int current_index = 0; int substring_index = 0; int count = 0; if (!matchCase) { source = source.toLowerCase(); substring = substring.toLowerCase(); } while (current_index < source.length() - 1) { substring_index = source.indexOf(substring, current_index); if (-1 == substring_index) { break; } else { current_index = substring_index + substring.length(); count++; } } return count; } /** * Splits a string into different parts, using a seperator string to * detect the seperation boundaries in a case-sensitive manner. The * seperator will not be included in the list of parts. * * @param source The string that will be split into parts. * @param seperator The seperator string that will be used to determine * the parts. * @return An <code>ArrayList</code> containing the parts as * <code>String</code> objects. * @since 1.0 */ public static List<String> split(String source, String seperator) { return split(source, seperator, true); } /** * Splits a string into different parts, using a seperator string to * detect the seperation boundaries. The seperator will not be included in * the list of parts. * * @param source The string that will be split into parts. * @param seperator The seperator string that will be used to determine * the parts. * @param matchCase A <code>boolean</code> indicating if the match is * going to be performed in a case-sensitive manner or not. * @return An <code>ArrayList</code> containing the parts as * <code>String</code> objects. * @since 1.0 */ public static List<String> split(String source, String seperator, boolean matchCase) { ArrayList<String> substrings = new ArrayList<>(); if (null == source) { return substrings; } if (null == seperator) { substrings.add(source); return substrings; } int current_index = 0; int delimiter_index; String element; String source_lookup_reference; if (!matchCase) { source_lookup_reference = source.toLowerCase(); seperator = seperator.toLowerCase(); } else { source_lookup_reference = source; } while (current_index <= source_lookup_reference.length()) { delimiter_index = source_lookup_reference.indexOf(seperator, current_index); if (-1 == delimiter_index) { element = new String(source.substring(current_index, source.length())); substrings.add(element); current_index = source.length() + 1; } else { element = new String(source.substring(current_index, delimiter_index)); substrings.add(element); current_index = delimiter_index + seperator.length(); } } return substrings; } /** * Splits a string into different parts, using a seperator string to * detect the seperation boundaries in a case-sensitive manner. The * seperator will not be included in the parts array. * * @param source The string that will be split into parts. * @param seperator The seperator string that will be used to determine * the parts. * @return A <code>String[]</code> array containing the seperated parts. * @since 1.0 */ public static String[] splitToArray(String source, String seperator) { return splitToArray(source, seperator, true); } /** * Splits a string into different parts, using a seperator string to * detect the seperation boundaries. The seperator will not be included in * the parts array. * * @param source The string that will be split into parts. * @param seperator The seperator string that will be used to determine * the parts. * @param matchCase A <code>boolean</code> indicating if the match is * going to be performed in a case-sensitive manner or not. * @return A <code>String[]</code> array containing the seperated parts. * @since 1.0 */ public static String[] splitToArray(String source, String seperator, boolean matchCase) { List<String> substrings = split(source, seperator, matchCase); String[] substrings_array = new String[substrings.size()]; substrings_array = substrings.toArray(substrings_array); return substrings_array; } /** * Splits a string into integers, using a seperator string to detect the * seperation boundaries in a case-sensitive manner. If a part couldn't be * converted to an integer, it will be omitted from the resulting array. * * @param source The string that will be split into integers. * @param seperator The seperator string that will be used to determine * the parts. * @return An <code>int[]</code> array containing the seperated parts. * @since 1.0 */ public static int[] splitToIntArray(String source, String seperator) { return splitToIntArray(source, seperator, true); } /** * Splits a string into integers, using a seperator string to detect the * seperation boundaries. If a part couldn't be converted to an integer, * it will be omitted from the resulting array. * * @param source The string that will be split into integers. * @param seperator The seperator string that will be used to determine * the parts. * @param matchCase A <code>boolean</code> indicating if the match is * going to be performed in a case-sensitive manner or not. * @return An <code>int[]</code> array containing the seperated parts. * @since 1.0 */ public static int[] splitToIntArray(String source, String seperator, boolean matchCase) { List<String> string_parts = split(source, seperator, matchCase); int number_of_valid_parts = 0; for (String string_part : string_parts) { try { Integer.parseInt(string_part); number_of_valid_parts++; } catch (NumberFormatException e) { // just continue } } int[] string_parts_int = (int[])Array.newInstance(int.class, number_of_valid_parts); int added_parts = 0; for (String string_part : string_parts) { try { string_parts_int[added_parts] = Integer.parseInt(string_part); added_parts++; } catch (NumberFormatException e) { // just continue } } return string_parts_int; } /** * Splits a string into bytes, using a seperator string to detect the * seperation boundaries in a case-sensitive manner. If a part couldn't be * converted to a <code>byte</code>, it will be omitted from the resulting * array. * * @param source The string that will be split into bytes. * @param seperator The seperator string that will be used to determine * the parts. * @return A <code>byte[]</code> array containing the bytes. * @since 1.0 */ public static byte[] splitToByteArray(String source, String seperator) { return splitToByteArray(source, seperator, true); } /** * Splits a string into bytes, using a seperator string to detect the * seperation boundaries. If a part couldn't be converted to a * <code>byte</code>, it will be omitted from the resulting array. * * @param source The string that will be split into bytes. * @param seperator The seperator string that will be used to determine * the parts. * @param matchCase A <code>boolean</code> indicating if the match is * going to be performed in a case-sensitive manner or not. * @return A <code>byte[]</code> array containing the bytes. * @since 1.0 */ public static byte[] splitToByteArray(String source, String seperator, boolean matchCase) { List<String> string_parts = split(source, seperator, matchCase); int number_of_valid_parts = 0; for (String string_part : string_parts) { try { Byte.parseByte(string_part); number_of_valid_parts++; } catch (NumberFormatException e) { // just continue } } byte[] string_parts_byte = (byte[])Array.newInstance(byte.class, number_of_valid_parts); int added_parts = 0; for (String string_part : string_parts) { try { string_parts_byte[added_parts] = Byte.parseByte(string_part); added_parts++; } catch (NumberFormatException e) { // just continue } } return string_parts_byte; } /** * Removes all occurances of a string from the front of another string in * a case-sensitive manner. * * @param source The string in which the matching will be done. * @param stringToStrip The string that will be stripped from the front. * @return A new <code>String</code> containing the stripped result. * @since 1.0 */ public static String stripFromFront(String source, String stringToStrip) { return stripFromFront(source, stringToStrip, true); } /** * Removes all occurances of a string from the front of another string. * * @param source The string in which the matching will be done. * @param stringToStrip The string that will be stripped from the front. * @param matchCase A <code>boolean</code> indicating if the match is * going to be performed in a case-sensitive manner or not. * @return A new <code>String</code> containing the stripping result. * @since 1.0 */ public static String stripFromFront(String source, String stringToStrip, boolean matchCase) { if (null == source) { return null; } if (null == stringToStrip) { return source; } int strip_length = stringToStrip.length(); int new_index; int last_index; String source_lookup_reference; if (!matchCase) { source_lookup_reference = source.toLowerCase(); stringToStrip = stringToStrip.toLowerCase(); } else { source_lookup_reference = source; } new_index = source_lookup_reference.indexOf(stringToStrip); if (0 == new_index) { do { last_index = new_index; new_index = source_lookup_reference.indexOf(stringToStrip, new_index + strip_length); } while (new_index != -1 && new_index == last_index + strip_length); return source.substring(last_index + strip_length); } else { return source; } } /** * Removes all occurances of a string from the end of another string in a * case-sensitive manner. * * @param source The string in which the matching will be done. * @param stringToStrip The string that will be stripped from the end. * @return A new <code>String</code> containing the stripped result. * @since 1.0 */ public static String stripFromEnd(String source, String stringToStrip) { return stripFromEnd(source, stringToStrip, true); } /** * Removes all occurances of a string from the end of another string. * * @param source The string in which the matching will be done. * @param stringToStrip The string that will be stripped from the end. * @param matchCase A <code>boolean</code> indicating if the match is * going to be performed in a case-sensitive manner or not. * @return A new <code>String</code> containing the stripped result. * @since 1.0 */ public static String stripFromEnd(String source, String stringToStrip, boolean matchCase) { if (null == source) { return null; } if (null == stringToStrip) { return source; } int strip_length = stringToStrip.length(); int new_index; int last_index; String source_lookup_reference; if (!matchCase) { source_lookup_reference = source.toLowerCase(); stringToStrip = stringToStrip.toLowerCase(); } else { source_lookup_reference = source; } new_index = source_lookup_reference.lastIndexOf(stringToStrip); if (new_index != -1 && source.length() == new_index + strip_length) { do { last_index = new_index; new_index = source_lookup_reference.lastIndexOf(stringToStrip, last_index - 1); } while (new_index != -1 && new_index == last_index - strip_length); return source.substring(0, last_index); } else { return source; } } /** * Searches for a string within a specified string in a case-sensitive * manner and replaces every match with another string. * * @param source The string in which the matching parts will be replaced. * @param stringToReplace The string that will be searched for. * @param replacementString The string that will replace each matching * part. * @return A new <code>String</code> object containing the replacement * result. * @since 1.0 */ public static String replace(String source, String stringToReplace, String replacementString) { return replace(source, stringToReplace, replacementString, true); } /** * Searches for a string within a specified string and replaces every * match with another string. * * @param source The string in which the matching parts will be replaced. * @param stringToReplace The string that will be searched for. * @param replacementString The string that will replace each matching * part. * @param matchCase A <code>boolean</code> indicating if the match is * going to be performed in a case-sensitive manner or not. * @return A new <code>String</code> object containing the replacement * result. * @since 1.0 */ public static String replace(String source, String stringToReplace, String replacementString, boolean matchCase) { if (null == source) { return null; } if (null == stringToReplace) { return source; } if (null == replacementString) { return source; } Iterator<String> string_parts = split(source, stringToReplace, matchCase).iterator(); StringBuilder new_string = new StringBuilder(); while (string_parts.hasNext()) { String string_part = string_parts.next(); new_string.append(string_part); if (string_parts.hasNext()) { new_string.append(replacementString); } } return new_string.toString(); } /** * Creates a new string that contains the provided string a number of * times. * * @param source The string that will be repeated. * @param count The number of times that the string will be repeated. * @return A new <code>String</code> object containing the repeated * concatenation result. * @since 1.0 */ public static String repeat(String source, int count) { if (null == source) { return null; } StringBuilder new_string = new StringBuilder(); while (count > 0) { new_string.append(source); count--; } return new_string.toString(); } /** * Creates a new array of <code>String</code> objects, containing the * elements of a supplied <code>Iterator</code>. * * @param iterator The iterator containing the elements to create the * array with. * @return The new <code>String</code> array. * @since 1.0 */ public static String[] toStringArray(Iterator<String> iterator) { if (null == iterator) { return new String[0]; } ArrayList<String> strings = new ArrayList<>(); while (iterator.hasNext()) { strings.add(iterator.next()); } String[] string_array = new String[strings.size()]; strings.toArray(string_array); return string_array; } /** * Creates a new <code>ArrayList</code>, containing the elements of a * supplied array of <code>String</code> objects. * * @param stringArray The array of <code>String</code> objects that have * to be converted. * @return The new <code>ArrayList</code> with the elements of the * <code>String</code> array. * @since 1.0 */ public static List<String> toArrayList(String[] stringArray) { List<String> strings = new ArrayList<>(); if (null == stringArray) { return strings; } Collections.addAll(strings, stringArray); return strings; } /** * Creates a new <code>String</code> object, containing the elements of a * supplied <code>Collection</code> of <code>String</code> objects joined * by a given seperator. * * @param collection The <code>Collection</code> containing the elements * to join. * @param seperator The seperator used to join the string elements. * @return A new <code>String</code> with the join result. * @since 1.0 */ public static String join(Collection collection, String seperator) { if (null == collection) { return null; } if (null == seperator) { seperator = ""; } if (0 == collection.size()) { return ""; } else { StringBuilder result = new StringBuilder(); for (Object element : collection) { result.append(String.valueOf(element)); result.append(seperator); } result.setLength(result.length() - seperator.length()); return result.toString(); } } /** * Creates a new <code>String</code> object, containing the elements of a * supplied array, joined by a given seperator. * * @param array The object array containing the elements to join. * @param seperator The seperator used to join the string elements. * @return A new <code>String</code> with the join result. * @since 1.0 */ public static String join(Object[] array, String seperator) { return join(array, seperator, null, false); } /** * Creates a new <code>String</code> object, containing the elements of a * supplied array, joined by a given seperator. * * @param array The object array containing the elements to join. * @param seperator The seperator used to join the string elements. * @param delimiter The delimiter used to surround the string elements. * @return A new <code>String</code> with the join result. * @since 1.0 */ public static String join(Object[] array, String seperator, String delimiter) { return join(array, seperator, delimiter, false); } /** * Creates a new <code>String</code> object, containing the elements of a * supplied array, joined by a given seperator. * * @param array The object array containing the elements to join. * @param seperator The seperator used to join the string elements. * @param delimiter The delimiter used to surround the string elements. * @param encodeStrings Indicates whether the characters of the string * representation of the Array values should be encoded. * @return A new <code>String</code> with the join result. * @since 1.0 */ public static String join(Object[] array, String seperator, String delimiter, boolean encodeStrings) { if (null == array) { return null; } if (null == seperator) { seperator = ""; } if (null == delimiter) { delimiter = ""; } if (0 == array.length) { return ""; } else { int current_index = 0; String array_value; StringBuilder result = new StringBuilder(); while (current_index < array.length - 1) { if (null == array[current_index]) { result.append("null"); } else { array_value = String.valueOf(array[current_index]); if (encodeStrings) { array_value = encodeString(array_value); } result.append(delimiter); result.append(array_value); result.append(delimiter); } result.append(seperator); current_index++; } if (null == array[current_index]) { result.append("null"); } else { array_value = String.valueOf(array[current_index]); if (encodeStrings) { array_value = encodeString(array_value); } result.append(delimiter); result.append(array_value); result.append(delimiter); } return result.toString(); } } /** * Creates a new <code>String</code> object, containing the elements of a * supplied array, joined by a given seperator. * * @param array The boolean array containing the values to join. * @param seperator The seperator used to join the string elements. * @return A new <code>String</code> with the join result. * @since 1.0 */ public static String join(boolean[] array, String seperator) { if (null == array) { return null; } if (null == seperator) { seperator = ""; } if (0 == array.length) { return ""; } else { int current_index = 0; String result = ""; while (current_index < array.length - 1) { result = result + array[current_index] + seperator; current_index++; } result = result + array[current_index]; return result; } } /** * Creates a new <code>String</code> object, containing the elements of a * supplied array, joined by a given seperator. * * @param array The byte array containing the values to join. * @param seperator The seperator used to join the string elements. * @return A new <code>String</code> with the join result. * @since 1.0 */ public static String join(byte[] array, String seperator) { if (null == array) { return null; } if (null == seperator) { seperator = ""; } if (0 == array.length) { return ""; } else { int current_index = 0; String result = ""; while (current_index < array.length - 1) { result = result + array[current_index] + seperator; current_index++; } result = result + array[current_index]; return result; } } /** * Creates a new <code>String</code> object, containing the elements of a * supplied array, joined by a given seperator. * * @param array The double array containing the values to join. * @param seperator The seperator used to join the string elements. * @return A new <code>String</code> with the join result. * @since 1.0 */ public static String join(double[] array, String seperator) { if (null == array) { return null; } if (null == seperator) { seperator = ""; } if (0 == array.length) { return ""; } else { int current_index = 0; String result = ""; while (current_index < array.length - 1) { result = result + array[current_index] + seperator; current_index++; } result = result + array[current_index]; return result; } } /** * Creates a new <code>String</code> object, containing the elements of a * supplied array, joined by a given seperator. * * @param array The float array containing the values to join. * @param seperator The seperator used to join the string elements. * @return A new <code>String</code> with the join result. * @since 1.0 */ public static String join(float[] array, String seperator) { if (null == array) { return null; } if (null == seperator) { seperator = ""; } if (0 == array.length) { return ""; } else { int current_index = 0; String result = ""; while (current_index < array.length - 1) { result = result + array[current_index] + seperator; current_index++; } result = result + array[current_index]; return result; } } /** * Creates a new <code>String</code> object, containing the elements of a * supplied array, joined by a given seperator. * * @param array The integer array containing the values to join. * @param seperator The seperator used to join the string elements. * @return A new <code>String</code> with the join result. * @since 1.0 */ public static String join(int[] array, String seperator) { if (null == array) { return null; } if (null == seperator) { seperator = ""; } if (0 == array.length) { return ""; } else { int current_index = 0; String result = ""; while (current_index < array.length - 1) { result = result + array[current_index] + seperator; current_index++; } result = result + array[current_index]; return result; } } /** * Creates a new <code>String</code> object, containing the elements of a * supplied array, joined by a given seperator. * * @param array The long array containing the values to join. * @param seperator The seperator used to join the string elements. * @return A new <code>String</code> with the join result. * @since 1.0 */ public static String join(long[] array, String seperator) { if (null == array) { return null; } if (null == seperator) { seperator = ""; } if (0 == array.length) { return ""; } else { int current_index = 0; String result = ""; while (current_index < array.length - 1) { result = result + array[current_index] + seperator; current_index++; } result = result + array[current_index]; return result; } } /** * Creates a new <code>String</code> object, containing the elements of a * supplied array, joined by a given seperator. * * @param array The short array containing the values to join. * @param seperator The seperator used to join the string elements. * @return A new <code>String</code> with the join result. * @since 1.0 */ public static String join(short[] array, String seperator) { if (null == array) { return null; } if (null == seperator) { seperator = ""; } if (0 == array.length) { return ""; } else { int current_index = 0; String result = ""; while (current_index < array.length - 1) { result = result + array[current_index] + seperator; current_index++; } result = result + array[current_index]; return result; } } /** * Creates a new <code>String</code> object, containing the elements of a * supplied array, joined by a given seperator. * * @param array The char array containing the values to join. * @param seperator The seperator used to join the string elements. * @return A new <code>String</code> with the join result. * @since 1.0 */ public static String join(char[] array, String seperator) { return join(array, seperator, null); } /** * Creates a new <code>String</code> object, containing the elements of a * supplied array, joined by a given seperator. * * @param array The char array containing the values to join. * @param seperator The seperator used to join the string elements. * @param delimiter The delimiter used to surround the string elements. * @return A new <code>String</code> with the join result. * @since 1.0 */ public static String join(char[] array, String seperator, String delimiter) { if (null == array) { return null; } if (null == seperator) { seperator = ""; } if (null == delimiter) { delimiter = ""; } if (0 == array.length) { return ""; } else { int current_index = 0; StringBuilder result = new StringBuilder(); while (current_index < array.length - 1) { result.append(delimiter); result.append(array[current_index]); result.append(delimiter); result.append(seperator); current_index++; } result.append(delimiter); result.append(String.valueOf(array[current_index])); result.append(delimiter); return result.toString(); } } /** * Returns an array that contains all the occurances of a substring in a * string in the correct order. The search will be performed in a * case-sensitive manner. * * @param source The <code>String</code> object that will be searched in. * @param substring The string whose occurances will we counted. * @return An <code>int[]</code> array containing the indices of the * substring. * @since 1.0 */ public static int[] indicesOf(String source, String substring) { return indicesOf(source, substring, true); } /** * Returns an array that contains all the occurances of a substring in a * string in the correct order. * * @param source The <code>String</code> object that will be searched in. * @param substring The string whose occurances will we counted. * @param matchCase A <code>boolean</code> indicating if the match is * going to be performed in a case-sensitive manner or not. * @return An <code>int[]</code> array containing the indices of the * substring. * @since 1.0 */ public static int[] indicesOf(String source, String substring, boolean matchCase) { if (null == source || null == substring) { return new int[0]; } String source_lookup_reference; if (!matchCase) { source_lookup_reference = source.toLowerCase(); substring = substring.toLowerCase(); } else { source_lookup_reference = source; } int current_index = 0; int substring_index = 0; int count = count(source_lookup_reference, substring); int[] indices = new int[count]; int counter = 0; while (current_index < source.length() - 1) { substring_index = source_lookup_reference.indexOf(substring, current_index); if (-1 == substring_index) { break; } else { current_index = substring_index + substring.length(); indices[counter] = substring_index; counter++; } } return indices; } /** * Matches a collection of regular expressions against a string. * * @param value The <code>String</code> that will be checked. * @param regexps The collection of regular expressions against which the * match will be performed. * @return The <code>Matcher</code> instance that corresponds to the * <code>String</code> that returned a successful match; or * <p><code>null</code> if no match could be found. * @since 1.0 */ public static Matcher getMatchingRegexp(String value, Collection<Pattern> regexps) { if (value != null && value.length() > 0 && regexps != null && regexps.size() > 0) { Matcher matcher; for (Pattern regexp : regexps) { matcher = regexp.matcher(value); if (matcher.matches()) { return matcher; } } } return null; } /** * Matches a collection of strings against a regular expression. * * @param values The <code>Collection</code> of <code>String</code> * objects that will be checked. * @param regexp The regular expression <code>Pattern</code> against which * the matches will be performed. * @return The <code>Matcher</code> instance that corresponds to the * <code>String</code> that returned a successful match; or * <p><code>null</code> if no match could be found. * @since 1.0 */ public static Matcher getRegexpMatch(Collection<String> values, Pattern regexp) { if (values != null && values.size() > 0 && regexp != null) { Matcher matcher; for (String value : values) { matcher = regexp.matcher(value); if (matcher.matches()) { return matcher; } } } return null; } /** * Checks if the name filters through an including and an excluding * regular expression. * * @param name The <code>String</code> that will be filtered. * @param included The regular expressions that needs to succeed * @param excluded The regular expressions that needs to fail * @return <code>true</code> if the name filtered through correctly; or * <p><code>false</code> otherwise. * @since 1.0 */ public static boolean filter(String name, Pattern included, Pattern excluded) { Pattern[] included_array = null; if (included != null) { included_array = new Pattern[]{included}; } Pattern[] excluded_array = null; if (excluded != null) { excluded_array = new Pattern[]{excluded}; } return filter(name, included_array, excluded_array); } /** * Checks if the name filters through a series of including and excluding * regular expressions. * * @param name The <code>String</code> that will be filtered. * @param included An array of regular expressions that need to succeed * @param excluded An array of regular expressions that need to fail * @return <code>true</code> if the name filtered through correctly; or * <p><code>false</code> otherwise. * @since 1.0 */ public static boolean filter(String name, Pattern[] included, Pattern[] excluded) { if (null == name) { return false; } boolean accepted = false; // retain only the includes if (null == included) { accepted = true; } else { for (Pattern pattern : included) { if (pattern != null && pattern.matcher(name).matches()) { accepted = true; break; } } } // remove the excludes if (accepted && excluded != null) { for (Pattern pattern : excluded) { if (pattern != null && pattern.matcher(name).matches()) { accepted = false; break; } } } return accepted; } /** * Ensure that the first character of the provided string is upper case. * * @param source The <code>String</code> to capitalize. * @return The capitalized <code>String</code>. * @since 1.0 */ public static String capitalize(String source) { if (source == null || source.length() == 0) { return source; } if (source.length() > 1 && Character.isUpperCase(source.charAt(0))) { return source; } char chars[] = source.toCharArray(); chars[0] = Character.toUpperCase(chars[0]); return new String(chars); } /** * Ensure that the first character of the provided string lower case. * * @param source The <code>String</code> to uncapitalize. * @return The uncapitalized <code>String</code>. * @since 1.5 */ public static String uncapitalize(String source) { if (source == null || source.length() == 0) { return source; } if (source.length() > 1 && Character.isLowerCase(source.charAt(0))) { return source; } char chars[] = source.toCharArray(); chars[0] = Character.toLowerCase(chars[0]); return new String(chars); } private static String convertUrl(String source, Pattern pattern, boolean shorten, boolean sanitize, boolean no_follow) { int max_length = RifeConfig.Tools.getMaxVisualUrlLength(); String result = source; Matcher url_matcher = pattern.matcher(source); boolean found = url_matcher.find(); if (found) { String visual_url; String actual_url; int last = 0; StringBuilder sb = new StringBuilder(); do { actual_url = url_matcher.group(1); if (url_matcher.groupCount() > 1) { visual_url = url_matcher.group(2); } else { visual_url = actual_url; } if (sanitize) { // defang javascript actual_url = StringUtils.replace(actual_url, "javascript:", ""); // fill in http:// for URLs that don't begin with / if ((!actual_url.contains("://")) && (!actual_url.startsWith("/"))) { actual_url = "http://" + actual_url; } } if (pattern.equals(BBCODE_BAREURL)) { sb.append(source.substring(last, url_matcher.start(1))); } else { sb.append(source.substring(last, url_matcher.start(0))); } sb.append("<a href=\""); sb.append(actual_url); sb.append("\""); if (actual_url.startsWith("http://") || actual_url.startsWith("https://")) { sb.append(" target=\"_blank\""); } if (no_follow) { sb.append(" rel=\"nofollow\""); } sb.append(">"); if (visual_url.length() <= max_length || !shorten) { sb.append(visual_url); } else { String ellipsis = "..."; int query_index = visual_url.indexOf("?"); // remove query string but keep '?' if (query_index != -1) { visual_url = visual_url.substring(0, query_index + 1) + ellipsis; } if (visual_url.length() >= max_length) { int last_slash = visual_url.lastIndexOf("/"); int start_slash = visual_url.indexOf("/", visual_url.indexOf("://") + 3); if (last_slash != start_slash) { visual_url = visual_url.substring(0, start_slash + 1) + ellipsis + visual_url.substring(last_slash); } } sb.append(visual_url); } sb.append("</a>"); if (pattern.equals(BBCODE_BAREURL)) { last = url_matcher.end(1); } else { last = url_matcher.end(0); } found = url_matcher.find(); } while (found); sb.append(source.substring(last)); result = sb.toString(); } return result; } /** * Converts a BBCode marked-up text to regular html. * * @param source The text with BBCode tags. * @return A <code>String</code> with the corresponding HTML code * @since 1.0 */ public static String convertBbcode(String source) { if (null == source) { return null; } return convertBbcode(source, (BbcodeOption[])null); } /** * Converts a BBCode marked-up text to regular html. * * @param source The text with BBCode tags. * @return A <code>String</code> with the corresponding HTML code * @since 1.0 */ public static String convertBbcode(final String source, BbcodeOption... options) { if (null == source) { return null; } boolean shorten = false; boolean sanitize = false; boolean convert_bare = false; boolean no_follow_links = false; if (options != null) { for (BbcodeOption option : options) { if (option.equals(StringUtils.SHORTEN_URL)) { shorten = true; } else if (option.equals(StringUtils.SANITIZE_URL)) { sanitize = true; } else if (option.equals(StringUtils.CONVERT_BARE_URLS)) { convert_bare = true; } else if (option.equals(StringUtils.NO_FOLLOW_LINKS)) { no_follow_links = true; } } } String sourcecopy = source; StringBuilder result = new StringBuilder(source.length()); int startindex; int endIndex; int nextCodeIndex; while (-1 != (startindex = sourcecopy.indexOf("[code]"))) { // handle parsed String parsed = sourcecopy.substring(0, startindex); endIndex = sourcecopy.indexOf("[/code]") + 7; // 7 == the sizeof "[/code]" nextCodeIndex = sourcecopy.indexOf("[code]", startindex + 6); // 6 == the sizeof "[code]" if (endIndex < 0) { // not ended... set to end of string endIndex = sourcecopy.length() - 1; } if (nextCodeIndex < endIndex && nextCodeIndex > 0) { // nested [code] tags /* must end before the next [code] * this will leave a dangling [/code] but the HTML is valid */ StringBuilder sourcecopycopy = new StringBuilder(); sourcecopycopy .append(sourcecopy.substring(0, nextCodeIndex)) .append("[/code]") .append(sourcecopy.substring(nextCodeIndex)); sourcecopy = sourcecopycopy.toString(); endIndex = sourcecopy.indexOf("[/code]") + 7; } if (startindex > endIndex) { // dangling [/code] endIndex = sourcecopy.indexOf("[/code]", endIndex + 7) + 7; // 7 == the sizeof "[/code]" if (endIndex < 0) { endIndex = sourcecopy.length() - 1; } } String code = sourcecopy.substring(startindex, endIndex); parsed = parseBBCode(parsed, shorten, sanitize, convert_bare, no_follow_links); // handle raw code = StringUtils.replace(code, "[code]", "<div class=\"codebody\"><pre>", false); code = StringUtils.replace(code, "[/code]", "</pre></div>", false); result .append(parsed) .append(code); sourcecopy = sourcecopy.substring(endIndex); } result.append(parseBBCode(sourcecopy, shorten, sanitize, convert_bare, no_follow_links)); return result.toString(); } private static String parseBBCode(String source, boolean shorten, boolean sanitize, boolean convert_bare, boolean no_follow) { String result = source; result = StringUtils.replace(result, "[b]", "<b>", false); result = StringUtils.replace(result, "[/b]", "</b>", false); result = StringUtils.replace(result, "[u]", "<u>", false); result = StringUtils.replace(result, "[/u]", "</u>", false); result = StringUtils.replace(result, "[i]", "<i>", false); result = StringUtils.replace(result, "[/i]", "</i>", false); result = StringUtils.replace(result, "[pre]", "<pre>", false); result = StringUtils.replace(result, "[/pre]", "</pre>", false); String resultCopy = result; String resultLowerCopy = result.toLowerCase(); StringBuilder buffer = new StringBuilder(); int startIndex; int endIndex; while (-1 != (startIndex = resultLowerCopy.indexOf("[*]"))) { int begin = resultLowerCopy.indexOf("[list]", startIndex + 3); int end = resultLowerCopy.indexOf("[/list]", startIndex + 3); int next = resultLowerCopy.indexOf("[*]", startIndex + 3); // 3 == sizeof [*] if (begin == -1) { begin = Integer.MAX_VALUE; } if (end == -1) { end = Integer.MAX_VALUE; } if (next == -1) { next = Integer.MAX_VALUE; } if (next < begin && next < end) { endIndex = next; } else if (begin < next && begin < end) { endIndex = begin; } else if (end < next && end < begin) { endIndex = end; } else { endIndex = resultLowerCopy.length(); } buffer .append(resultCopy.substring(0, startIndex)) .append("<li>") .append(resultCopy.substring(startIndex + 3, endIndex)) // 3 == sizeof [*] .append("</li>"); resultCopy = resultCopy.substring(endIndex); resultLowerCopy = resultLowerCopy.substring(endIndex); } buffer.append(resultCopy.substring(0)); result = buffer.toString(); result = StringUtils.replace(result, "[list]", "<ul>", false); result = StringUtils.replace(result, "[/list]", "</ul>", false); Matcher color_matcher = BBCODE_COLOR.matcher(result); result = color_matcher.replaceAll("<font color=\"$1\">"); result = StringUtils.replace(result, "[/color]", "</font>", false); Matcher size_matcher = BBCODE_SIZE.matcher(result); result = size_matcher.replaceAll("<font size=\"$1\">"); result = StringUtils.replace(result, "[/size]", "</font>", false); result = convertUrl(result, BBCODE_URL_SHORT, shorten, sanitize, no_follow); result = convertUrl(result, BBCODE_URL_LONG, shorten, sanitize, no_follow); if (convert_bare) { result = convertUrl(result, BBCODE_BAREURL, shorten, sanitize, no_follow); } Matcher img_matcher = BBCODE_IMG.matcher(result); result = img_matcher.replaceAll("<div class=\"bbcode_img\"><img src=\"$1\" border=\"0\" alt=\"\" /></div>"); Matcher quote_matcher_long = BBCODE_QUOTE_LONG.matcher(result); result = quote_matcher_long.replaceAll("<div class=\"quoteaccount\">$1:</div><div class=\"quotebody\">"); result = StringUtils.replace(result, "[quote]", "<div class=\"quotebody\">", false); result = StringUtils.replace(result, "[/quote]", "</div>", false); result = StringUtils.replace(result, "\r\n", "<br />\r"); result = StringUtils.replace(result, "\n", "<br />\n"); result = StringUtils.replace(result, "\r", "\r\n"); // remove the BR that could be added due to code formatting ppl // use to format lists result = StringUtils.replace(result, "ul><br />\r\n", "ul>\r\n"); result = StringUtils.replace(result, "ul><br />\n", "ul>\n"); return result; } /** * Converts a <code>String</code> to a <code>boolean</code> value. * * @param value The <code>String</code> to convert. * @return The corresponding <code>boolean</code> value. * @since 1.0 */ public static boolean convertToBoolean(String value) { if (null == value) { return false; } return value.equals("1") || value.equalsIgnoreCase("t") || value.equalsIgnoreCase("true") || value.equalsIgnoreCase("y") || value.equalsIgnoreCase("yes") || value.equalsIgnoreCase("on"); } /** * Converts all tabs on a line to spaces according to the provided tab * width. * * @param line The line whose tabs have to be converted. * @param tabWidth The tab width. * @return A new <code>String</code> object containing the line with the * replaced tabs. * @since 1.0 */ public static String convertTabsToSpaces(String line, int tabWidth) { StringBuilder result = new StringBuilder(); int tab_index; int last_tab_index = 0; int added_chars = 0; int tab_size; while ((tab_index = line.indexOf("\t", last_tab_index)) != -1) { tab_size = tabWidth - ((tab_index + added_chars) % tabWidth); if (0 == tab_size) { tab_size = tabWidth; } added_chars += tab_size - 1; result.append(line.substring(last_tab_index, tab_index)); result.append(StringUtils.repeat(" ", tab_size)); last_tab_index = tab_index + 1; } if (0 == last_tab_index) { return line; } else { result.append(line.substring(last_tab_index)); } return result.toString(); } /** * Ensures that all whitespace is removed from a <code>String</code>. * <p>It also works with a <code>null</code> argument. * * @param source The <code>String</code> to trim. * @return The trimmed <code>String</code>. * @since 1.0 */ public static String trim(String source) { if (source == null || source.length() == 0) { return source; } return source.trim(); } /** * Calculates the {@link DocumentPosition} of a character index in a * document. * * @param document a <code>String</code> with the document where the * position should be looked up in * @param characterIndex the index of the character * @return the resulting <code>DocumentPosition</code> instance; or * <p><code>null</code> if the <code>characterIndex</code> was invalid or * if the <code>document</code> was null * @since 1.0 */ public static DocumentPosition getDocumentPosition(String document, int characterIndex) { if (null == document || characterIndex < 0 || characterIndex > document.length()) { return null; } int line = 0; int column; String[] linebreaks = new String[]{"\r\n", "\n", "\r"}; int last_linebreak_index = 0; int next_linebreak_index = document.length(); int match = -1; do { line++; for (String linebreak : linebreaks) { match = document.indexOf(linebreak, last_linebreak_index); if (match != -1) { if (match >= characterIndex) { next_linebreak_index = match; match = -1; break; } last_linebreak_index = match + linebreak.length(); break; } } } while (match != -1); column = characterIndex - last_linebreak_index + 1; return new DocumentPosition(document.substring(last_linebreak_index, next_linebreak_index), line, column); } /** * Reformats a string where lines that are longer than <tt>width</tt> * are split apart at the earliest wordbreak or at maxLength, whichever is * sooner. If the width specified is less than 5 or greater than the input * Strings length the string will be returned as is. * <p/> * Please note that this method can be lossy - trailing spaces on wrapped * lines may be trimmed. * * @param input the String to reformat. * @param width the maximum length of any one line. * @return a new String with reformatted as needed. */ public static String wordWrap(String input, int width, Locale locale) { // handle invalid input if (input == null) { return ""; } else if (width < 5) { return input; } else if (width >= input.length()) { return input; } // default locale if (locale == null) { locale = Locale.US; } StringBuilder buffer = new StringBuilder(input.length()); int current_index = 0; int delimiter_index = 0; String seperator = "\n"; String line; // go over the input string and jump from line to line while (current_index <= input.length()) { // look for the next linebreak delimiter_index = input.indexOf(seperator, current_index); // get the line that corresponds to it if (-1 == delimiter_index) { line = new String(input.substring(current_index, input.length())); current_index = input.length() + 1; } else { line = new String(input.substring(current_index, delimiter_index)); current_index = delimiter_index + seperator.length(); } // handle the wrapping of the line BreakIterator breaks = BreakIterator.getLineInstance(locale); breaks.setText(line); int line_start = 0; int start = breaks.first(); int end = breaks.next(); while (end != BreakIterator.DONE) { // check if the width has been exceeded if (end - 1 - line_start >= width) { boolean break_line = true; // first check if the last characters were spaces, // if they were and by removing them the width is not // exceeded, just continue if (Character.isWhitespace(line.charAt(end - 1))) { for (int j = end - 1; j >= 0; j--) { if (!Character.isWhitespace(line.charAt(j))) { if (j - line_start < width) { break_line = false; } break; } } } if (break_line) { String line_breaked = line.substring(line_start, start); // this can happen with trailing whitespace if (line_breaked.length() > width) { line_breaked = line_breaked.substring(0, width); } buffer.append(line_breaked); buffer.append("\n"); line_start = start; } } start = end; end = breaks.next(); } if (line_start < line.length()) { buffer.append(line.substring(line_start)); } if (delimiter_index != -1) { buffer.append("\n"); } } return buffer.toString(); } private static interface EncoderFallbackHandler { abstract boolean hasFallback(char character); abstract void appendFallback(StringBuilder encodedBuffer, char character); } private static class HtmlEncoderFallbackHandler implements EncoderFallbackHandler { private final static String PREFIX = "&#"; private final static String SUFFIX = ";"; public boolean hasFallback(char character) { return character >= '\u00A0'; } public void appendFallback(StringBuilder encodedBuffer, char character) { encodedBuffer.append(PREFIX); encodedBuffer.append((int)character); encodedBuffer.append(SUFFIX); } } public static class BbcodeOption extends EnumClass<String> { protected BbcodeOption(String identifier) { super(identifier); } } }