/* * Copyright (c) 2009 The Jackson Laboratory * * This software was developed by Gary Churchill's Lab at The Jackson * Laboratory (see http://research.jax.org/faculty/churchill). * * This is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this software. If not, see <http://www.gnu.org/licenses/>. */ package org.jax.r; import java.util.List; /** * A collection of static R utility functions * @author <A HREF="mailto:keith.sheppard@jax.org">Keith Sheppard</A> */ public class RUtilities { private static final String R_VECTOR_START = "c("; private static final String R_VECTOR_END = ")"; private static final String R_VECTOR_ELEMENT_SEPERATOR = ", "; private static final String R_STRING_QUOTES = "\""; private static final String R_TRUE = "TRUE"; private static final String R_FALSE = "FALSE"; private static final String[] RESERVED_R_IDENTIFIERS = new String [] { "if", "else", "if", "else", "repeat", "while", "function", "for", "in", "next", "break", "TRUE", "FALSE", "NULL", "Inf", "NaN", "NA", "NA_integer_", "NA_real_", "NA_complex_", "NA_character_"}; private static final char BACKTICK = '`'; /** * private constructor (all our functions are static) */ private RUtilities() { } /** * Get the names expression for the given inner expression * @param rInnerExpression * the inner expression that we're getting names for * @return * the names */ public static String rNamesExpression(String rInnerExpression) { return "names(" + rInnerExpression + ")"; } /** * Create an index expression given the R expression and index * @param rExpressionToIndex * the R expression * @param zeroBasedIndex * the index * @return * the index expression */ public static String indexExpression( String rExpressionToIndex, int zeroBasedIndex) { // R wants 1 based indexes return rExpressionToIndex + "[" + (zeroBasedIndex + 1) + "]"; } /** * Create an index expression given the R expression and index * @param rExpressionToIndex * the R expression * @param zeroBasedColumnIndex * the index * @return * the index expression */ public static String columnIndexExpression( String rExpressionToIndex, int zeroBasedColumnIndex) { // R wants 1 based indexes return rExpressionToIndex + "[," + (zeroBasedColumnIndex + 1) + "]"; } /** * Create an index expression given the R expression and index * @param rExpressionToIndex * the R expression * @param zeroBasedRowIndex * the index * @return * the index expression */ public static String rowIndexExpression( String rExpressionToIndex, int zeroBasedRowIndex) { // R wants 1 based indexes return rExpressionToIndex + "[" + (zeroBasedRowIndex + 1) + ",]"; } /** * Convert the given double array into an R vector string * @param doubleArray * the double array to convert * @return * the R value string */ public static String doubleArrayToRVector(double[] doubleArray) { String[] rValueArray = new String[doubleArray.length]; for(int i = 0; i < doubleArray.length; i++) { rValueArray[i] = RUtilities.javaDoubleToRDouble(doubleArray[i]); } return RUtilities.objectArrayToRVector(rValueArray); } /** * Convert the given java double to an R double string * @param javaDouble * the java double to convert * @return * the R string value */ public static String javaDoubleToRDouble(double javaDouble) { if(javaDouble == Double.POSITIVE_INFINITY) { return "Inf"; } else { return Double.toString(javaDouble); } } /** * Convert the given integer array into an R vector string * @param intArray * the int array to convert * @return * the R value string */ public static String intArrayToRVector(int[] intArray) { String[] rValueArray = new String[intArray.length]; for(int i = 0; i < intArray.length; i++) { rValueArray[i] = RUtilities.javaIntToRInt(intArray[i]); } return RUtilities.objectArrayToRVector(rValueArray); } /** * Convert the given java integer into an R value string * @param javaInt * the java integer * @return * the R value string for the given java integer */ public static String javaIntToRInt(int javaInt) { return Integer.toString(javaInt); } /** * Convert a java string list to it's R string vector representation * @param stringList * the string list * @return * the vector */ public static String stringListToRVector(List<String> stringList) { return stringArrayToRVector( stringList.toArray(new String[stringList.size()])); } /** * Convert the given string array into an r vector string * @param stringArray * the java string * @return * the string for an R vector of strings */ public static String stringArrayToRVector(String[] stringArray) { String[] rStringArray = new String[stringArray.length]; for(int i = 0; i < stringArray.length; i++) { rStringArray[i] = RUtilities.javaStringToRString(stringArray[i]); } return RUtilities.objectArrayToRVector(rStringArray); } /** * Convert the given raw java string into an R string by adding quotes * @param javaString * the raw java string * @return * the R quoted string */ public static String javaStringToRString(String javaString) { // we need to quote the string for r return R_STRING_QUOTES + javaString.replace("\\", "\\\\") + R_STRING_QUOTES; } /** * Convert the given boolean array to an R vector string * @param booleanArray * the boolean array to convert * @return * the R vector string */ public static String booleanArrayToRVector(boolean[] booleanArray) { String[] rValueArray = new String[booleanArray.length]; for(int i = 0; i < booleanArray.length; i++) { rValueArray[i] = RUtilities.javaBooleanToRBoolean(booleanArray[i]); } return RUtilities.objectArrayToRVector(rValueArray); } /** * Convert the given java boolean type into an R value string * @param javaBoolean * the java boolean to convert * @return * the R value string */ public static String javaBooleanToRBoolean(boolean javaBoolean) { return javaBoolean ? R_TRUE : R_FALSE; } /** * Convert the given values to an R vector. Note that the given values * are not quoted. It is assumed that if they are already in "R" form and * so the {@link Object#toString()} value is used directly * @param rValues * the values whose {@link Object#toString()} values * we will use to form the list * @return * the string that can be used as an R vector */ public static String objectArrayToRVector(Object[] rValues) { StringBuffer sb = new StringBuffer(R_VECTOR_START); if(rValues.length > 0) { // treat the 1st one as a special case (no comma) sb.append(rValues[0].toString()); // all the rest have commas for(int i = 1; i < rValues.length; i++) { sb.append(R_VECTOR_ELEMENT_SEPERATOR); sb.append(rValues[i].toString()); } } sb.append(R_VECTOR_END); return sb.toString(); } /** * Convert from an R identifier to a "readable string". All we're doing * in this implementation is replacing underscores with spaces. * @see #fromReadableNameToRIdentifier(String) * @param rIdentifier * the identifier that we're getting a "readable" name for * @return * the "readable" name */ public static String fromRIdentifierToReadableName(String rIdentifier) { return rIdentifier.replace('_', ' '); } /** * Get an error message that results from converting the given * readable name to an R identifier. * @param readableName * the name to check * @return * an error message suitable for presenting to the user or null * if there is nothing wrong with the given readableName */ public static String getErrorMessageForReadableName(String readableName) { // use the message in the exception (if we get one) try { fromReadableNameToRIdentifier(readableName); return null; } catch(RSyntaxException ex) { return ex.getMessage(); } } /** * Convert the given name from a readable name to an R identifier. The * only conversion that this method currently does is to go from spaces * to underscores. Other than that the given name must be a valid R * identifier in every other way or an {@link RSyntaxException} is * thrown. * @param readableName * the readable name * @return * the R identifier for the readable name * @throws RSyntaxException * if we can't convert the given readable name to an identifier */ public static String fromReadableNameToRIdentifier(String readableName) throws RSyntaxException { StringBuffer rIdentifierBuffer = new StringBuffer(readableName.length()); if(readableName.length() > 0) { rIdentifierBuffer.append(fromReadableHeadCharToRIdentifier( readableName.charAt(0))); for(int i = 1; i < readableName.length(); i++) { rIdentifierBuffer.append(fromReadableTailCharToRIdentifier( readableName.charAt(i))); } } String rIdentifier = rIdentifierBuffer.toString(); if(isAReservedRIdentifier(rIdentifier)) { throw new RSyntaxException( "The name \"" + readableName + "\" clashes with a reserved " + "R identifier. Please change it."); } else { return rIdentifier; } } /** * Determine if the given string is a reserved R identifier * @param identifierToCheck * the identifier we're checking * @return * true iff its reserved */ private static boolean isAReservedRIdentifier(String identifierToCheck) { for(String currReservedIdentifier: RESERVED_R_IDENTIFIERS) { if(identifierToCheck.equals(currReservedIdentifier)) { return true; } } return false; } /** * Convert a readable head char into an R identifier * @param readableHeadChar * the readable head character * @return * the legal identifier char * @throws RSyntaxException * if we can't make the conversion to an R character */ private static char fromReadableHeadCharToRIdentifier(char readableHeadChar) throws RSyntaxException { char headIdentifierChar = fromReadableTailCharToRIdentifier(readableHeadChar); if(headIdentifierChar == '_' || (headIdentifierChar >= '0' && headIdentifierChar <= '9')) { throw new RSyntaxException( "Name cannot start with \'" + readableHeadChar + "\'. " + "Legal starting characters are \'a\'-\'z\' and \'A\'-\'Z\'"); } else { return headIdentifierChar; } } /** * Convert from an identifiers tail character (anything but the starting * character) to its "readable" counterpart * @param readableTailChar * the readable character * @return * the readable character * @throws RSyntaxException * if we cannot make the conversion for the given character */ private static char fromReadableTailCharToRIdentifier(char readableTailChar) throws RSyntaxException { if(readableTailChar == '_') { return readableTailChar; } else { if(readableTailChar == ' ') { return '_'; } else { if(readableTailChar >= '0' && readableTailChar <= '9') { return readableTailChar; } else { char upperReadableTailChar = Character.toUpperCase(readableTailChar); if(upperReadableTailChar >= 'A' && upperReadableTailChar <= 'Z') { return readableTailChar; } else { throw new RSyntaxException( "Name cannot contain \'" + readableTailChar + "\'. Legal values are \'_\', \'a\'-\'z\', " + "\'A\'-\'Z\', \'0\'-\'9\' and spaces."); } } } } } /** * Quote the given identifier if it is needed * @param identifier * the identifier that we might quote * @return * the R identifier for the readable name * @throws RSyntaxException * if we can't convert the given readable name to an identifier */ public static String quoteIdentifierIfRequired(String identifier) throws RSyntaxException { int nameLen = identifier.length(); if(nameLen == 0) { throw new RSyntaxException( "identifier name cannot be empty"); } else { boolean quotingRequired = false; backtickCheck(identifier.charAt(0)); if(!isLeagalIdentifierStartChar(identifier.charAt(0))) { quotingRequired = true; } else { for(int i = 1; i < nameLen; i++) { char currChar = identifier.charAt(i); backtickCheck(currChar); if(!isLeagalIdentifierChar(currChar)) { quotingRequired = true; break; } } } if(quotingRequired || isAReservedRIdentifier(identifier)) { // backticks can make almost any invalid identifier a valid // identifier return BACKTICK + identifier + BACKTICK; } else { return identifier; } } } /** * Throws an exception if the given character is a backtick * @param character * the character * @throws RSyntaxException * the exception (if its a backtick) */ private static void backtickCheck(char character) throws RSyntaxException { if(character == BACKTICK) { throw new RSyntaxException( "Backticks '`' are not permitted in identifier names"); } } /** * return true iff the given character is legal in R even when unquoted * @param startChar * the character to test * @return * true if it's legal when unquoted */ private static boolean isLeagalIdentifierStartChar(char startChar) { return (startChar >= 'a' && startChar <= 'z') || (startChar >= 'A' && startChar <= 'Z'); } /** * Like {@link #isLeagalIdentifierStartChar(char)} but for the internal * characters * @param character * the characters * @return * true or false */ private static boolean isLeagalIdentifierChar(char character) { return isLeagalIdentifierStartChar(character) || character == '_' || character == '.' || (character >= '0' && character <= '9'); } }