/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package java.lang; /** * The wrapper for the primitive type {@code char}. This class also provides a * number of utility methods for working with characters. * <p> * Character data is based upon the Unicode Standard, 4.0. The Unicode * specification, character tables and other information are available at <a * href="http://www.unicode.org/">http://www.unicode.org/</a>. * <p> * Unicode characters are referred to as <i>code points</i>. The range of valid * code points is U+0000 to U+10FFFF. The <i>Basic Multilingual Plane (BMP)</i> * is the code point range U+0000 to U+FFFF. Characters above the BMP are * referred to as <i>Supplementary Characters</i>. On the Java platform, UTF-16 * encoding and {@code char} pairs are used to represent code points in the * supplementary range. A pair of {@code char} values that represent a * supplementary character are made up of a <i>high surrogate</i> with a value * range of 0xD800 to 0xDBFF and a <i>low surrogate</i> with a value range of * 0xDC00 to 0xDFFF. * <p> * On the Java platform a {@code char} value represents either a single BMP code * point or a UTF-16 unit that's part of a surrogate pair. The {@code int} type * is used to represent all Unicode code points. * * JUniversal REMOVED: * * Serialization support * * TYPE static (since Class not supported) * * Unicode category constants & type related methods * * Everything pertaining to Unicode blocks * * codePoint related methods & surrogate contants * * title case related methods * * directionality constants & methods * * Java identifier related methods * * @since 1.0 */ public final class Character implements Comparable<Character> { private final char value; /** * The minimum {@code Character} value. */ public static final char MIN_VALUE = '\u0000'; /** * The maximum {@code Character} value. */ public static final char MAX_VALUE = '\uffff'; /** * The minimum radix used for conversions between characters and integers. */ public static final int MIN_RADIX = 2; /** * The maximum radix used for conversions between characters and integers. */ public static final int MAX_RADIX = 36; // Note: This can't be set to "char.class", since *that* is // defined to be "java.lang.Character.TYPE"; /** * The number of bits required to represent a {@code Character} value * unsigned form. * * @since 1.5 */ public static final int SIZE = 16; /** * Constructs a new {@code Character} with the specified primitive char * value. * * @param value * the primitive char value to store in the new instance. */ public Character(char value) { this.value = value; } /** * Gets the primitive value of this character. * * @return this object's primitive value. */ public char charValue() { return value; } /** * Compares this object to the specified character object to determine their * relative order. * * @param c * the character object to compare this object to. * @return {@code 0} if the value of this character and the value of * {@code c} are equal; a positive value if the value of this * character is greater than the value of {@code c}; a negative * value if the value of this character is less than the value of * {@code c}. * @see java.lang.Comparable * @since 1.2 */ public int compareTo(Character c) { return value - c.value; } /** * Returns a {@code Character} instance for the {@code char} value passed. * For ASCII/Latin-1 characters (and generally all characters with a Unicode * value up to 512), this method should be used instead of the constructor, * as it maintains a cache of corresponding {@code Character} instances. * * JUniversal CHANGE: Cache optimization was removed for now * * @param c * the char value for which to get a {@code Character} instance. * @return the {@code Character} instance for {@code c}. * @since 1.5 */ public static Character valueOf(char c) { return new Character(c); } /** * Convenience method to determine the value of the specified character * {@code c} in the supplied radix. The value of {@code radix} must be * between MIN_RADIX and MAX_RADIX. * * JUniversal CHANGE: Only ASCII characters are detected as potential letters * * @param c * the character to determine the value of. * @param radix * the radix. * @return the value of {@code c} in {@code radix} if {@code radix} lies * between {@link #MIN_RADIX} and {@link #MAX_RADIX}; -1 otherwise. */ public static int digit(char c, int radix) { if (radix >= MIN_RADIX && radix <= MAX_RADIX) { // Optimized for ASCII (and only ASCII is supported for JUniversal) int result = -1; if ('0' <= c && c <= '9') { result = c - '0'; } else if ('a' <= c && c <= 'z') { result = c - ('a' - 10); } else if ('A' <= c && c <= 'Z') { result = c - ('A' - 10); } return result < radix ? result : -1; } return -1; } /** * Compares this object with the specified object and indicates if they are * equal. In order to be equal, {@code object} must be an instance of * {@code Character} and have the same char value as this object. * * @param object * the object to compare this double with. * @return {@code true} if the specified object is equal to this * {@code Character}; {@code false} otherwise. */ @Override public boolean equals(Object object) { return (object instanceof Character) && (value == ((Character) object).value); } /** * Returns the character which represents the specified digit in the * specified radix. The {@code radix} must be between {@code MIN_RADIX} and * {@code MAX_RADIX} inclusive; {@code digit} must not be negative and * smaller than {@code radix}. If any of these conditions does not hold, 0 * is returned. * * @param digit * the integer value. * @param radix * the radix. * @return the character which represents the {@code digit} in the * {@code radix}. */ public static char forDigit(int digit, int radix) { if (MIN_RADIX <= radix && radix <= MAX_RADIX) { if (0 <= digit && digit < radix) { return (char) (digit < 10 ? digit + '0' : digit + 'a' - 10); } } return 0; } /** * Gets the numeric value of the specified Unicode character. * * JUniversal CHANGE: Only ASCII characters potentially return a value different than -1 * * @param c * the Unicode character to get the numeric value of. * @return a non-negative numeric integer value if a numeric value for * {@code c} exists, -1 if there is no numeric value for {@code c}, * -2 if the numeric value can not be represented with an integer. */ public static int getNumericValue(char c) { // Optimized for ASCII (and only ASCII is supported for JUniversal) if (c >= '0' && c <= '9') { return c - '0'; } if (c >= 'a' && c <= 'z') { return c - ('a' - 10); } if (c >= 'A' && c <= 'Z') { return c - ('A' - 10); } return -1; } @Override public int hashCode() { return value; } /** * Indicates whether the specified character is a digit. * * JUniversal CHANGE: Only ASCII characters are detected as potentially digits * * @param c * the character to check. * @return {@code true} if {@code c} is a digit; {@code false} * otherwise. */ public static boolean isDigit(char c) { // Optimized case for ASCII (and only ASCII is supported for JUniversal) if ('0' <= c && c <= '9') { return true; } return false; } /** * Indicates whether the specified character is an ISO control character. * * @param c * the character to check. * @return {@code true} if {@code c} is an ISO control character; * {@code false} otherwise. */ public static boolean isISOControl(char c) { return isISOControl((int)c); } /** * Indicates whether the specified code point is an ISO control character. * * @param c * the code point to check. * @return {@code true} if {@code c} is an ISO control character; * {@code false} otherwise. */ public static boolean isISOControl(int c) { return (c >= 0 && c <= 0x1f) || (c >= 0x7f && c <= 0x9f); } /** * Indicates whether the specified character is a letter. * * JUniversal CHANGE: Only ASCII characters are detected as potential letters * * @param c * the character to check. * @return {@code true} if {@code c} is a letter; {@code false} otherwise. */ public static boolean isLetter(char c) { if (('A' <= c && c <= 'Z') || ('a' <= c && c <= 'z')) { return true; } return false; } /** * Indicates whether the specified character is a letter or a digit. * * JUniversal CHANGE: Only ASCII characters are detected as potential letters or digits * * @param c * the character to check. * @return {@code true} if {@code c} is a letter or a digit; {@code false} * otherwise. */ public static boolean isLetterOrDigit(char c) { return isLetter(c) || isDigit(c); } /** * Indicates whether the specified character is a lower case letter. * * JUniversal CHANGE: Only ASCII characters are detected as potentially lower case * * @param c * the character to check. * @return {@code true} if {@code c} is a lower case letter; {@code false} * otherwise. */ public static boolean isLowerCase(char c) { // Optimized case for ASCII (and only ASCII is supported for JUniversal) if ('a' <= c && c <= 'z') { return true; } return false; } /** * Indicates whether the specified character is a Java space. * * @param c * the character to check. * @return {@code true} if {@code c} is a Java space; {@code false} * otherwise. * @deprecated Use {@link #isWhitespace(char)} */ @Deprecated public static boolean isSpace(char c) { return c == '\n' || c == '\t' || c == '\f' || c == '\r' || c == ' '; } /** * Indicates whether the specified character is a Unicode space character. * That is, if it is a member of one of the Unicode categories Space * Separator, Line Separator, or Paragraph Separator. * * @param c * the character to check. * @return {@code true} if {@code c} is a Unicode space character, * {@code false} otherwise. */ public static boolean isSpaceChar(char c) { if (c == 0x20 || c == 0xa0 || c == 0x1680) { return true; } if (c < 0x2000) { return false; } return c <= 0x200b || c == 0x2028 || c == 0x2029 || c == 0x202f || c == 0x3000; } /** * Indicates whether the specified character is an upper case letter. * * JUniversal CHANGE: Only ASCII characters are changed in case * * @param c * the character to check. * @return {@code true} if {@code c} is a upper case letter; {@code false} * otherwise. */ public static boolean isUpperCase(char c) { // Optimized case for ASCII (and only ASCII is supported for JUniversal) if ('A' <= c && c <= 'Z') { return true; } return false; } /** * Indicates whether the specified character is a whitespace character in * Java. * * @param c * the character to check. * @return {@code true} if the supplied {@code c} is a whitespace character * in Java; {@code false} otherwise. */ public static boolean isWhitespace(char c) { // Optimized case for ASCII if ((c >= 0x1c && c <= 0x20) || (c >= 0x9 && c <= 0xd)) { return true; } if (c == 0x1680) { return true; } if (c < 0x2000 || c == 0x2007) { return false; } return c <= 0x200b || c == 0x2028 || c == 0x2029 || c == 0x3000; } /** * Reverses the order of the first and second byte in the specified * character. * * @param c * the character to reverse. * @return the character with reordered bytes. */ public static char reverseBytes(char c) { return (char)((c<<8) | (c>>8)); } /** * Returns the lower case equivalent for the specified character if the * character is an upper case letter. Otherwise, the specified character is * returned unchanged. * * JUniversal CHANGE: Only ASCII characters are changed in case * * @param c * the character * @return if {@code c} is an upper case character then its lower case * counterpart, otherwise just {@code c}. */ public static char toLowerCase(char c) { // Optimized case for ASCII (and only ASCII is supported for JUniversal) if ('A' <= c && c <= 'Z') { return (char) (c + ('a' - 'A')); } return c; } @Override public String toString() { return String.valueOf(value); } /** * Converts the specified character to its string representation. * * @param value * the character to convert. * @return the character converted to a string. */ public static String toString(char value) { return String.valueOf(value); } /** * Returns the upper case equivalent for the specified character if the * character is a lower case letter. Otherwise, the specified character is * returned unchanged. * * JUniversal CHANGE: Only ASCII characters are changed * * @param c * the character to convert. * @return if {@code c} is a lower case character then its upper case * counterpart, otherwise just {@code c}. */ public static char toUpperCase(char c) { // Optimized case for ASCII (and only ASCII is supported for JUniversal) if ('a' <= c && c <= 'z') { return (char) (c - ('a' - 'A')); } return c; } }