/* * xtc - The eXTensible Compiler * Copyright (C) 2004-2011 Robert Grimm * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public License * version 2.1 as published by the Free Software Foundation. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, * USA. */ package xtc.util; import java.io.File; import java.io.FileNotFoundException; import java.io.IOException; import java.io.Reader; import java.io.Writer; import java.util.Iterator; import java.util.List; import java.util.regex.Pattern; import xtc.Constants; /** * Implementation of utilities for language processors, focusing on * strings and I/O. * * @author Robert Grimm * @version $Revision: 1.48 $ */ public final class Utilities { /** Hide constructor. */ private Utilities() { /* Nothing to do. */ } /** The buffer size for pumping data. */ private static final int BUFFER_SIZE = 4096; /** The buffer for pumping data. */ private static char[] charBuffer = null; /** The regular expression pattern for white space. */ public static final Pattern WHITE_SPACE = Pattern.compile("\\s+"); /** * The regular expression pattern for newlines surrounded by spaces. */ public static final Pattern SPACE_NEWLINE_SPACE = Pattern.compile("[ \t\f]*(?:\r\n|\r|\n)[ \t\f]*"); /** * The regular expression pattern for newlines surrounded by spaces * and optionally followed by stars '<code>*</code>'. */ public static final Pattern COMMENT_NEWLINE = Pattern.compile("[ \t\f]*(?:\r\n|\r|\n)[ \t\f]*\\**[ \t\f]*"); /** * Flag for escaping the bell '<code>\a</code>' and vertical tab * '<code>\v</code>' characters, which may be escaped in standard C * but not in Java. */ public static final int ESCAPE_BELL_VT = 0x01; /** * Flag for using doubled escape sequences. Doubled escape * sequences will still read as an escape sequence, even if they are * included in a program source character or string. */ public static final int ESCAPE_DOUBLE = 0x02; /** * Flag for escaping the '<code>[</code>', '<code>-</code>', and * '<code>]</code>' characters used in regular expressions. */ public static final int ESCAPE_REGEX = 0x04; /** * Flag for escaping all characters that neither are printable ASCII * characters nor have named escapes using Unicode escapes. */ public static final int ESCAPE_UNICODE = 0x08; /** * Flag for escaping less-than '<code><</code>' and greater-than * '<code>></code>' signs as HTML entities. */ public static final int ESCAPE_HTML = 0x10; /** The escape flags for standard C escapes. */ public static final int C_ESCAPES = ESCAPE_BELL_VT | ESCAPE_UNICODE; /** The escape flags for Java escapes. */ public static final int JAVA_ESCAPES = ESCAPE_UNICODE; /** The escape flags for Java and HTML escapes. */ public static final int JAVA_HTML_ESCAPES = ESCAPE_UNICODE | ESCAPE_HTML; /** The escape flags for Java and regex escapes. */ public static final int FULL_ESCAPES = ESCAPE_UNICODE | ESCAPE_REGEX; /** The escape flags for Java, regex, and HTML escapes. */ public static final int FULL_HTML_ESCAPES = FULL_ESCAPES | ESCAPE_HTML; // ======================================================================= /** * Determine whether the specified name is qualified. * * @param name The name. * @return <code>true</code> if the name is qualified. */ public static boolean isQualified(String name) { final int length = name.length(); boolean opaque = false; for (int i=0; i<length; i++) { char c = name.charAt(i); if (opaque) { if (Constants.END_OPAQUE == c) opaque = false; } else { if (Constants.QUALIFIER == c) return true; if (Constants.START_OPAQUE == c) opaque = true; } } return false; } /** * Extract the qualifying part from the specified qualified name. * For example, the qualifying part for <code>xtc.parser.Rats</code> * is <code>xtc.parser</code>. * * @param qname The qualified name. * @return The qualifying part, or <code>null</code> if the * specified name has no qualifying part. */ public static String getQualifier(String qname) { boolean opaque = false; for (int i=qname.length()-1; i>=0; i--) { char c = qname.charAt(i); if (opaque) { if (Constants.START_OPAQUE == c) opaque = false; } else { if (Constants.QUALIFIER == c) return qname.substring(0, i); if (Constants.END_OPAQUE == c) opaque = true; } } return null; } /** * Extract the unqualified name from the specified qualified name. * For example, the unqualified name for * <code>xtc.parser.Rats</code> is <code>Rats</code>. * * @param qname The qualified name. * @return The unqualified name. */ public static String getName(String qname) { boolean opaque = false; for (int i=qname.length()-1; i>=0; i--) { char c = qname.charAt(i); if (opaque) { if (Constants.START_OPAQUE == c) opaque = false; } else { if (Constants.QUALIFIER == c) return qname.substring(i+1); if (Constants.END_OPAQUE == c) opaque = true; } } return qname; } /** * Construct an unqualified name. If the specified name is * qualified, this method returns the unqualified name. Otherwise, * it returns the specified name. * * @param name The name. * @return The unqualified name. */ public static String unqualify(String name) { return isQualified(name)? getName(name) : name; } /** * Construct a qualified name. If the specified qualifier is * <code>null</code>, this method returns the unqualified name * * @param qualifier The qualifier. * @param name The unqualified name. * @return The corresponding qualified name. */ public static String qualify(String qualifier, String name) { return null == qualifier ? name : qualifier + Constants.QUALIFIER + name; } /** * Convert the specified qualified name to its components. * * @param qname The qualified name. * @return The corresponding identifier. */ public static String[] toComponents(String qname) { final int length = qname.length(); int count = 1; boolean opaque = false; // Count the number of components. for (int i=0; i<length; i++) { char c = qname.charAt(i); if (opaque) { if (Constants.END_OPAQUE == c) opaque = false; } else { if (Constants.QUALIFIER == c) count++; if (Constants.START_OPAQUE == c) opaque = true; } } // Wrap up with the trivial case of no qualifiers. if (1 == count) return new String[] { qname }; // Set up the component array. String[] components = new String[count]; int start = -1; count = 0; // Fill in the component array. for (int i=0; i<length; i++) { char c = qname.charAt(i); if (opaque) { if (Constants.END_OPAQUE == c) opaque = false; } else { if (Constants.QUALIFIER == c) { components[count] = qname.substring(start+1, i); start = i; count++; } else if (Constants.START_OPAQUE == c) { opaque = true; } } } // Don't forget the last component. components[count] = qname.substring(start+1,length); // Done. return components; } /** * Replace all occurrences of the qualifier character with the * specified character. * * @param s The string. * @param qual The new qualifier. * @return The requalified string. */ private static String requalify(String s, char qual) { final int length = s.length(); StringBuilder buf = new StringBuilder(length); boolean opaque = false; for (int i=0; i<length; i++) { char c = s.charAt(i); if (opaque) { buf.append(c); if (Constants.END_OPAQUE == c) opaque = false; } else { if (Constants.QUALIFIER == c) { buf.append(qual); } else { buf.append(c); if (Constants.START_OPAQUE == c) opaque = true; } } } return buf.toString(); } /** * Convert the specified qualified name into a programming language * identifier. * * @param qname The qualified name. * @return The corresponding identifier. */ public static String toIdentifier(String qname) { return requalify(qname, '$'); } /** * Convert the specified qualified name into a file path. * * @param qname The qualified name. * @return The corresponding file path. */ public static String toPath(String qname) { return requalify(qname, File.separatorChar); } /** * Convert the specified qualified name into a file path. * * @param qname The qualified name. * @param ext The file's extension. * @return The corresponding file path. */ public static String toPath(String qname, String ext) { return toPath(qname) + '.' + ext; } // ======================================================================= /** * Locate the specified file. This method searches for the * specified file, relative to each root in the list of * <code>java.io.File</code> objects. * * @param roots The list of file system roots to search. * @param path The (relative) file path. * @return The corresponding file. * @throws FileNotFoundException * Signals that the specified file could not be found. */ public static File locate(List<File> roots, String path) throws FileNotFoundException { for (File root : roots) { File file = new File(root, path); if (file.exists() && file.isFile()) { return file; } } throw new FileNotFoundException(path + " not found"); } // ======================================================================= /** * Pump all data from the specified reader to the specified writer. * * @param in The reader. * @param out The writer. * @throws IOException Signals an exceptinal condition during I/O. */ public static void pump(Reader in, Writer out) throws IOException { if (null == charBuffer) charBuffer = new char[BUFFER_SIZE]; int number = in.read(charBuffer); while (-1 != number) { out.write(charBuffer, 0, number); number = in.read(charBuffer); } } // ======================================================================= /** * Remove all whitespace from the specified string. * * @param s The string. * @return The string without whitespace. */ public static String withoutSpace(String s) { return WHITE_SPACE.matcher(s).replaceAll(""); } // ======================================================================= /** * Determine whether the escape bell and vertical tabs flag is set. * * @see #ESCAPE_BELL_VT * * @param flags The escape flags. * @return <code>true</code> if the escape bell and vertical tabs * flag is set. */ public static boolean useBellVTEscapes(int flags) { return (0 != (ESCAPE_BELL_VT & flags)); } /** * Determine whether the escape double flag is set. * * @see #ESCAPE_DOUBLE * * @param flags The escape flags. * @return <code>true</code> if the escape double flag is set. */ public static boolean useDoubleEscapes(int flags) { return (0 != (ESCAPE_DOUBLE & flags)); } /** * Determine whether the escape regex flag is set. * * @see #ESCAPE_REGEX * * @param flags The escape flags. * @return <code>true</code> if the escape regex flag is set. */ public static boolean useRegexEscapes(int flags) { return (0 != (ESCAPE_REGEX & flags)); } /** * Determine whether the escape Unicode flag is set. * * @see #ESCAPE_UNICODE * * @param flags The escape flags. * @return <code>true</code> if the escape Unicode flag is set. */ public static boolean useUnicodeEscapes(int flags) { return (0 != (ESCAPE_UNICODE & flags)); } /** * Determine whether the escape HTML flag is set. * * @see #ESCAPE_HTML * * @param flags The escape flags. * @return <code>true</code> if the escape HTML flag is set. */ public static boolean useHTMLEscapes(int flags) { return (0 != (ESCAPE_HTML & flags)); } // ======================================================================= /** * Escape the specified character into the specified appendable. * * @param c The character. * @param out The appendable. * @param flags The escape flags. * @throws IOException Signals an I/O error. */ public static void escape(char c, Appendable out, int flags) throws IOException { switch (c) { case '\u0007': if (useBellVTEscapes(flags)) { if (useDoubleEscapes(flags)) { out.append("\\\\a"); } else { out.append("\\a"); } } // We still need to go through default processing. break; case '\b': if (useDoubleEscapes(flags)) { out.append("\\\\b"); } else { out.append("\\b"); } return; case '\t': if (useDoubleEscapes(flags)) { out.append("\\\\t"); } else { out.append("\\t"); } return; case '\n': if (useDoubleEscapes(flags)) { out.append("\\\\n"); } else { out.append("\\n"); } return; case '\u000b': if (useBellVTEscapes(flags)) { if (useDoubleEscapes(flags)) { out.append("\\\\v"); } else { out.append("\\v"); } } // We still need to go through default processing. break; case '\f': if (useDoubleEscapes(flags)) { out.append("\\\\f"); } else { out.append("\\f"); } return; case '\r': if (useDoubleEscapes(flags)) { out.append("\\\\r"); } else { out.append("\\r"); } return; case '\"': if (useDoubleEscapes(flags)) { out.append("\\\\\\\""); } else { out.append("\\\""); } return; case '\'': if (useDoubleEscapes(flags)) { out.append("\\\\\\\'"); } else { out.append("\\\'"); } return; case '-': if (useRegexEscapes(flags)) { if (useDoubleEscapes(flags)) { out.append("\\\\-"); } else { out.append("\\-"); } } else { out.append('-'); } return; case '<': if (useHTMLEscapes(flags)) { out.append("<"); } else { out.append('<'); } return; case '>': if (useHTMLEscapes(flags)) { out.append(">"); } else { out.append('>'); } return; case '[': if (useRegexEscapes(flags)) { if (useDoubleEscapes(flags)) { out.append("\\\\["); } else { out.append("\\["); } } else { out.append('['); } return; case '\\': if (useDoubleEscapes(flags)) { out.append("\\\\\\\\"); } else { out.append("\\\\"); } return; case ']': if (useRegexEscapes(flags)) { if (useDoubleEscapes(flags)) { out.append("\\\\]"); } else { out.append("\\]"); } } else { out.append(']'); } return; default: // The default case is handled below. } if (useUnicodeEscapes(flags) && ((' ' > c) || ('~' < c))) { String hex = Integer.toHexString(c); String padding; switch (hex.length()) { case 1: padding = "000"; break; case 2: padding = "00"; break; case 3: padding = "0"; break; default: padding = ""; } if (useDoubleEscapes(flags)) { out.append("\\\\"); } else { out.append("\\"); } out.append('u'); out.append(padding); out.append(hex); } else { out.append(c); } } /** * Escape the specified string into the specified appendable. * * @param s The string. * @param out The appendable. * @param flags The escape flags. * @throws IOException Signals an I/O error. */ public static void escape(String s, Appendable out, int flags) throws IOException { final int length = s.length(); for (int i=0; i<length; i++) { escape(s.charAt(i), out, flags); } } /** * Escape the specified character. * * @param c The character. * @param flags The escape flags. * @return The escape character as a string. */ public static String escape(char c, int flags) { StringBuilder buf = new StringBuilder(1); try { escape(c, buf, flags); } catch (IOException x) { assert false; } return buf.toString(); } /** * Escape the specified string. * * @param s The string. * @param flags The escape flags. * @return The escaped string. */ public static String escape(String s, int flags) { StringBuilder buf = new StringBuilder(s.length()); try { escape(s, buf, flags); } catch (IOException x) { assert false; } return buf.toString(); } /** * Unescape the specified string. This method unescapes basic Java * escapes ('<code>\b</code>', '<code>\f</code>', '<code>\n</code>', * '<code>\r</code>', '<code>\t</code>', '<code>\"</code>', * '<code>\'</code>', and '<code>\\</code>'), the additional * standard C escapes ('<code>\a</code>', '<code>\v</code>', and * '<code>\?</code>'), standard C's octal escapes, and standard C's * and Java's Unicode escapes. To support regex-like character * classes, it also unescapes '<code>\-</code>', '<code>\[</code>', * and '<code>\]</code>'. * * @param s The string to unescape. * @return The unescaped string. * @throws IllegalArgumentException Signals a malformed string. */ public static String unescape(String s) { if (-1 == s.indexOf('\\')) { return s; } final int length = s.length(); StringBuilder buf = new StringBuilder(length); for (int i=0; i<length; i++) { char c = s.charAt(i); if ('\\' != c) { buf.append(c); } else { i++; if (i >= length) { throw new IllegalArgumentException("incomplete escape sequence"); } c = s.charAt(i); switch (c) { case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': { int val = c; if (i+1 < length) { c = s.charAt(i+1); if (('0' <= c) && (c <= '7')) { val = (val * 8) + c; i++; if (i+1 <length) { c = s.charAt(i+1); if (('0' <= c) && (c <= '7')) { val = (val * 8) + c; i++; } } } } buf.append((char)val); } break; case '"': buf.append('"'); break; case '\'': buf.append('\''); break; case '-': buf.append('-'); break; case '?': buf.append('?'); break; case '[': buf.append('['); break; case '\\': buf.append('\\'); break; case ']': buf.append(']'); break; case 'a': buf.append('\u0007'); break; case 'b': buf.append('\b'); break; case 'f': buf.append('\f'); break; case 'n': buf.append('\n'); break; case 'r': buf.append('\r'); break; case 't': buf.append('\t'); break; case 'u': i += 4; int n; if (i >= length) { throw new IllegalArgumentException("incomplete universal character"+ " name " + s.substring(i-3)); } try { n = Integer.parseInt(s.substring(i-3, i+1), 16); } catch (NumberFormatException x) { throw new IllegalArgumentException("malformed universal character" + " name " + s.substring(i-3,i+1)); } buf.append((char)n); break; case 'v': buf.append('\u000b'); break; default: throw new IllegalArgumentException("illegal escaped character \'\\" + c + "\'"); } } } return buf.toString(); } // ======================================================================= /** * Convert the specified list to a human-readable representation. * This method uses <code>toString()</code> for each element in the * specified list to generate a human-readable representation. * * @param l The list. * @return The human-readable representation. */ public static String format(List<?> l) { final int length = l.size(); StringBuilder buf = new StringBuilder(); Iterator iter = l.iterator(); while (iter.hasNext()) { String el = iter.next().toString(); if ((1 < length) && (! iter.hasNext())) { buf.append("and "); } buf.append(el); if ((2 == length) && (iter.hasNext())) { buf.append(' '); } else if (iter.hasNext()) { buf.append(", "); } } return buf.toString(); } // ======================================================================= /** * Split the specified identifier. This method splits identifiers * using an upper case character for each word component into a * string of lower case words separated by the specified separator. * * @param id The identifier. * @param separator The separator. * @return The split identifier. */ public static String split(String id, char separator) { // Drop any suffixes. int idx = id.indexOf('$'); if (-1 != idx) { id = id.substring(0, idx); } // Count the number of upper case characters. final int length = id.length(); boolean startsUpper = false; int upperCount = 0; for (int i=0; i<length; i++) { if (Character.isUpperCase(id.charAt(i))) { if (0 == i) startsUpper = true; upperCount++; } } // No conversion is necessary if all characters are either lower // or upper case. if ((0 == upperCount) || (length == upperCount)) { return id; } // Do the actual conversion. final int size = startsUpper ? length+upperCount-1 : length+upperCount; StringBuilder buf = new StringBuilder(size); for (int i=0; i<length; i++) { char c = id.charAt(i); if (Character.isUpperCase(c)) { if (0 != i) { buf.append(separator); } buf.append(Character.toLowerCase(c)); } else { buf.append(c); } } return buf.toString(); } /** * Get the appropriate indefinite article for the specified noun. * * @param noun The noun. * @return The corresponding indefinite article. */ public static String toArticle(String noun) { if (noun.startsWith("a") || noun.startsWith("e") || noun.startsWith("i") || noun.startsWith("o") || noun.startsWith("u")) { return "an"; } else { return "a"; } } }