/* * AddressParser.java February 2001 * * Copyright (C) 2001, Niall Gallagher <niallg@users.sf.net> * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or * implied. See the License for the specific language governing * permissions and limitations under the License. */ package org.simpleframework.http.parse; import org.simpleframework.http.Address; import org.simpleframework.http.Path; import org.simpleframework.http.Query; import org.simpleframework.util.KeyMap; import org.simpleframework.util.parse.Parser; /** * This parser is used to parse uniform resource identifiers. The uniform * resource identifier syntax is given in RFC 2396. This parser can parse * relative and absolute URI's. The uniform resource identifier syntax that this * parser will parse are based on the generic web based URL similar to the * syntax represented in RFC 2616 section 3.2.2. The syntax used to parse this * URI is a modified version of RFC 2396 * * <pre> * * URI = (absoluteURI | relativeURI) * absoluteURI = scheme ":" ("//" netpath | relativeURI) * relativeURI = path ["?" querypart] * netpath = domain [":" port] relativeURI * path = *("/" segment) * segment = *pchar *( ";" param ) * * </pre> * * This implements the <code>Address</code> interface and provides methods that * access the various parts of the URI. The parameters in the path segments of * the uniform resource identifier are stored in name value pairs. If parameter * names are not unique across the path segments then only the deepest parameter * will be stored from the path segment. For example if the URI represented was * <code>http://domain/path1;x=y/path2;x=z</code> the value for the parameter * named <code>x</code> would be <code>z</code>. * <p> * This will normalize the path part of the uniform resource identifier. A * normalized path is one that contains no back references like "./" and "../". * The normalized path will not contain the path parameters. * <p> * The <code>setPath</code> method is used to reset the path this uniform * resource identifier has, it also resets the parameters. The parameters are * extracted from the new path given. * * @author Niall Gallagher */ public class AddressParser extends Parser implements Address { /** * Parameters are stored so that the can be viewed. */ private ParameterMap param; /** * This is the path used to represent the address path. */ private Path normal; /** * This contains the query parameters for the address. */ private Query data; /** * Used to track the characters that form the path. */ private Token path; /** * Used to track the characters that form the domain. */ private Token domain; /** * Used to track the characters that form the query. */ private Token query; /** * Used to track the name characters of a parameter. */ private Token name; /** * Used to track the value characters of a parameter. */ private Token value; /** * References the scheme that this URI contains. */ private Token scheme; /** * Contains the port number if it was specified. */ private int port; /** * Default constructor will create a <code>AddressParser</code> that * contains no specifics. The instance will return <code>null</code> for all * the get methods. The parsers get methods are populated by using the * <code>parse</code> method. */ public AddressParser() { this.param = new ParameterMap(); this.path = new Token(); this.domain = new Token(); this.query = new Token(); this.scheme = new Token(); this.name = new Token(); this.value = new Token(); } /** * This is primarily a convenience constructor. This will parse the * <code>String</code> given to extract the specifics. This could be * achieved by calling the default no-arg constructor and then using the * instance to invoke the <code>parse</code> method on that * <code>String</code> to extract the parts. * * @param text * a <code>String</code> containing a URI value */ public AddressParser(String text) { this(); this.parse(text); } /** * This allows the scheme of the URL given to be returned. If the URI does * not contain a scheme then this will return null. The scheme of the URI is * the part that specifies the type of protocol that the URI is used for, an * example <code>gopher://domain/path</code> is a URI that is intended for * the gopher protocol. The scheme is the string <code>gopher</code>. * * @return this returns the scheme tag for the URI if there is one specified * for it */ @Override public String getScheme() { return this.scheme.toString(); } /** * This is used to retrive the domain of this URI. The domain part in the * URI is an optional part, an example * <code>http://domain/path?querypart</code>. This will return the value of * the domain part. If there is no domain part then this will return null * otherwise the domain value found in the uniform resource identifier. * * @return the domain part of this uniform resource identifier this * represents */ @Override public String getDomain() { return this.domain.toString(); } /** * This is used to retrive the path of this URI. The path part is the most * fundemental part of the URI. This will return the value of the path. If * there is no path part then this will return <code>/</code> to indicate * the root. * <p> * The <code>Path</code> object returned by this will contain no path * parameters. The path parameters are available using the * <code>Address</code> methods. The reason that this does not contain any * of the path parameters is so that if the path is needed to be converted * into ab OS specific path then the path parameters will not need to be * separately parsed out. * * @return the path that this URI contains, this value will not contain any * back references such as "./" and "../" or any path parameters */ @Override public Path getPath() { String text = this.path.toString(); if (text == null) { this.normal = new PathParser("/"); } if (this.normal == null) { this.normal = new PathParser(text); } return this.normal; } /** * This is used to retrieve the query of this URI. The query part in the URI * is an optional part. This will return the value of the query part. If * there is no query part then this will return an empty <code>Query</code> * object. The query is an optional member of a URI and comes after the path * part, it is preceded by a question mark, <code>?</code> character. For * example the following URI contains <code>query</code> for its query part, * <code>http://host:port/path?query</code>. * <p> * This returns a <code>org.simpleframework.http.Query</code> object that * can be used to interact directly with the query values. The * <code>Query</code> object is a read-only interface to the query * parameters, and so will not affect the URI. * * @return a <code>Query</code> object for the query part */ @Override public Query getQuery() { String text = this.query.toString(); if (text == null) { this.data = new QueryParser(); } if (this.data == null) { this.data = new QueryParser(text); } return this.data; } /** * This is used to retrive the port of the uniform resource identifier. The * port part in this is an optional part, an example * <code>http://host:port/path?querypart</code>. This will return the value * of the port. If there is no port then this will return <code>-1</code> * because this represents an impossible uniform resource identifier port. * The port is an optional part. * * @return this returns the port of the uniform resource identifier */ @Override public int getPort() { return this.port <= 0 ? -1 : this.port; } /** * This extracts the parameter values from the uniform resource identifier * represented by this object. The parameters that a uniform resource * identifier contains are embedded in the path part of the URI. If the path * contains no parameters then this will return an empty <code>Map</code> * instance. * <p> * This will produce unique name and value parameters. Thus if the URI * contains several path segments with similar parameter names this will * return the deepest parameter. For example if the URI represented was * <code>http://domain/path1;x=y/path2;x=z</code> the value for the * parameter named <code>x</code> would be <code>z</code>. * * @return this will return the parameter names found in the URI */ @Override public KeyMap<String> getParameters() { return this.param; } /** * This allows the scheme for the URI to be specified. If the URI does not * contain a scheme then this will attach the scheme and the * <code>://</code> identifier to ensure that the * <code>Address.toString</code> will produce the correct syntax. * <p> * Caution must be taken to ensure that the port and the scheme are * consistent. So if the original URI was <code>http://domain:80/path</code> * and the scheme was changed to <code>ftp</code> the port number that * remains is the standard HTTP port not the FTP port. * * @param value * this specifies the protocol this URI is intended for */ public void setScheme(String value) { this.scheme.value = value; } /** * This will set the domain to whatever value is in the string parameter. If * the string is null then this URI objects <code>toString</code> method * will not contain the domain. The result of the <code>toString</code> * method will be <code>/path/path?query</code>. If the path is non-null * this URI will contain the path. * * @param value * this will be the new domain of this uniform resource * identifier, if it is not null */ public void setDomain(String value) { this.domain.value = value; } /** * This will set the port to whatever value it is given. If the value is 0 * or less then the <code>toString</code> will will not contain the optional * port. If port number is above 0 then the <code>toString</code> method * will produce a URI like <code>http://host:123/path</code> but only if * there is a valid domain. * * @param port * the port value that this URI is to have */ public void setPort(int port) { this.port = port; } /** * This will set the path to whatever value it is given. If the value is * null then this <code>Address.toString</code> method will not contain the * path, that is if path is null then it will be interpreted as * <code>/</code>. * <p> * This will reset the parameters this URI has. If the value given to this * method has embedded parameters these will form the parameters of this * URI. The value given may not be the same value that the * <code>getPath</code> produces. The path will have all back references and * parameters stripped. * * @param text * the path that this URI is to be set with */ public void setPath(String text) { if (!text.startsWith("/")) { text = "/" + text; } this.domain.toString(); this.query.toString(); this.scheme.toString(); this.param.clear(); this.path.clear(); this.parsePath(text); /* extract params */ } /** * This will set the path to whatever value it is given. If the value is * null then this <code>Address.toString</code> method will not contain the * path, that is if path is null then it will be interpreted as * <code>/</code>. * <p> * This will reset the parameters this URI has. If the value given to this * method has embedded parameters these will form the parameters of this * URI. The value given may not be the same value that the * <code>getPath</code> produces. The path will have all back references and * parameters stripped. * * @param path * the path that this URI is to be set with */ public void setPath(Path path) { if (path != null) { this.normal = path; } else { this.setPath("/"); } } /** * This is used to parse the path given with the <code>setPath</code> * method. The path contains name and value pairs. These parameters are * embedded into the path segments using a semicolon character, ';'. Since * the parameters to not form part of the actual path mapping they are * removed from the path and stored. Each parameter can then be extracted * from this parser using the methods provided by the <code>Address</code> * interface. * * @param path * this is the path that is to be parsed and have the parameter * values extracted */ private void parsePath(String path) { this.count = path.length(); this.ensureCapacity(this.count); path.getChars(0, this.count, this.buf, 0); this.normal = null; this.off = 0; this.path(); } /** * This will set the query to whatever value it is given. If the value is * null then this <code>Address.toString</code> method will not contain the * query. If the query was <code>abc</code> then the <code>toString</code> * method would produca a string like <code>http://host:port/path?abc</code> * . If the query is null this URI would have no query part. The query must * not contain the <code>?</code> character. * * @param value * the query that this uniform resource identifier is to be set * to if it is non-null */ public void setQuery(String value) { this.query.value = value; this.data = null; } /** * This will set the query to whatever value it is given. If the value is * null then this <code>Address.toString</code> method will not contain the * query. If the <code>Query.toString</code> returns null then the query * will be empty. This is basically the <code>setQuery(String)</code> method * with the string value from the issued <code>Query.toString</code> method. * * @param query * a <code>Query</code> object that contains the name value * parameters for the query */ public void setQuery(Query query) { if (this.value != null) { this.data = query; } else { this.setQuery(""); } } /** * This will check to see what type of URI this is if it is an * <code>absoluteURI</code> or a <code>relativeURI</code>. To see the * definition of a URI see RFC 2616 for the definition of a URL and for more * specifics see RFC 2396 for the expressions. */ @Override protected void parse() { if (this.count > 0) { if (this.buf[0] == '/') { this.relativeURI(); } else { this.absoluteURI(); } } } /** * This will empty each tokens cache. A tokens cache is used to represent a * token once the token's <code>toString</code> method has been called. Thus * when the <code>toString</code> method is called then the token depends on * the value of the cache alone in further calls to <code>toString</code>. * However if a URI has just been parsed and that method has not been * invoked then the cache is created from the buf if its length is greater * than zero. */ @Override protected void init() { this.param.clear(); this.domain.clear(); this.path.clear(); this.query.clear(); this.scheme.clear(); this.off = this.port = 0; this.normal = null; this.data = null; } /** * This is a specific definition of a type of URI. An absolute URI is a URI * that contains a host and port. It is the most frequently used type of * URI. This will define the host and the optional port part. As well as the * relative URI part. This uses a simpler syntax than the one specified in * RFC 2396 <code><pre> * * absoluteURI = scheme ":" ("//" netpath | relativeURI) * relativeURI = path ["?" querypart] * netpath = domain [":" port] relativeURI * path = *("/" segment) * segment = *pchar *( ";" param ) * * </pre></code> This syntax is sufficent to handle HTTP style URI's as well * as GOPHER and FTP and various other 'simple' schemes. See RFC 2396 for * the syntax of an <code>absoluteURI</code>. */ private void absoluteURI() { this.scheme(); this.netPath(); } /** * This will check to see if there is a scheme in the URI. If there is a * scheme found in the URI this returns true and removes that scheme tag of * the form "ftp:" or "http:" or whatever the protocol scheme tag may be for * the URI. * <p> * The syntax for the scheme is given in RFC 2396 as follows <code><pre> * * scheme = alpha *( alpha | digit | "+" | "-" | "." ) * * </pre></code> This will however also skips the "://" from the tag so of * the URI was <code>gopher://domain/path</code> then the URI would be * <code>domain/path</code> afterwards. */ private void scheme() { int mark = this.off; int pos = this.off; if (this.alpha(this.buf[this.off])) { while (this.off < this.count) { char next = this.buf[this.off++]; if (this.schemeChar(next)) { pos++; } else if (next == ':') { if (!this.skip("//")) { this.off = mark; pos = mark; } break; } else { this.off = mark; pos = mark; break; } } this.scheme.len = pos - mark; this.scheme.off = mark; } } /** * This method is used to assist the scheme method. This will check to see * if the type of the character is the same as those described in RFC 2396 * for a scheme character. The scheme tag can contain an alphanumeric of the * following <code>"+", "-", "."</code>. * * @param c * this is the character that is being checked * * @return this returns true if the character is a valid scheme character */ private boolean schemeChar(char c) { switch (c) { case '+': case '-': case '.': return true; default: return this.alphanum(c); } } /** * The network path is the path that contains the network address of the * host that this URI is targeted at. This will parse the domain name of the * host and also a port number before parsing a relativeURI <code><pre> * * netpath = domain [":" port] relativeURI * * </pre></code> This syntax is modified from the URI specification on RFC * 2396. */ private void netPath() { this.domain(); if (this.skip(":")) { this.port(); } this.relativeURI(); } /** * This is a specific definition of a type of URI. A relative URI is a URI * that contains no host or port. It is basically the resource within the * host. This will extract the path and the optional query part of the URI. * Rfc2396 has the proper definition of a <code>relativeURI</code>. */ private void relativeURI() { this.path(); if (this.skip("?")) { this.query(); } } /** * This is used to extract the optional port from a given URI. This will * read a sequence of digit characters and convert the <code>String</code> * of digit characters into a decimal number. The digits will be added to * the port variable. If there is no port number this will not update the * read offset. */ private void port() { while (this.off < this.count) { if (!this.digit(this.buf[this.off])) { break; } this.port *= 10; this.port += this.buf[this.off]; this.port -= '0'; this.off++; } } /** * This is used to extract the domain from the given URI. This will firstly * initialize the token object that represents the domain. This allows the * token's <code>toString</code> method to return the extracted value of the * token rather than getting confused with previous values set by a previous * parse method. * <p> * This uses the following delimiters to determine the end of the domain * <code>?</code>,<code>:</code> and <code>/<code>. This * ensures that the read offset does not go out of bounds and * consequently throw an <code>IndexOutOfBoundsException</code>. */ private void domain() { int mark = this.off; loop: while (this.off < this.count) { switch (this.buf[this.off]) { case '/': case ':': case '?': break loop; default: this.off++; } } this.domain.len = this.off - mark; this.domain.off = mark; } /** * This is used to extract the segments from the given URI. This will * firstly initialize the token object that represents the path. This allows * the token's <code>toString</code> method to return the extracted value of * the token rather than getting confused with previous values set by a * previous parse method. * <p> * This is slightly different from RFC 2396 in that it defines a pchar as * the RFC 2396 definition of a pchar without the escaped chars. So this * method has to ensure that no escaped chars go unchecked. This ensures * that the read offset does not go out of bounds and throw an * <code>IndexOutOfBoundsException</code>. */ private void path() { int mark = this.off; int pos = this.off; while (this.skip("/")) { this.buf[pos++] = '/'; while (this.off < this.count) { if (this.buf[this.off] == ';') { while (this.skip(";")) { this.param(); this.insert(); } break; } if (this.buf[this.off] == '%') { this.escape(); } else if (!this.pchar(this.buf[this.off])) { break; } this.buf[pos++] = this.buf[this.off++]; } } this.path.len = pos - mark; this.path.off = mark; } /** * This is used to extract the query from the given URI. This will firstly * initialize the token object that represents the query. This allows the * token's <code>toString</code> method to return the extracted value of the * token rather than getting confused with previous values set by a previous * parse method. The calculation of the query part of a URI is basically the * end of the URI. */ private void query() { this.query.len = this.count - this.off; this.query.off = this.off; } /** * This is an expression that is defined by RFC 2396 it is used in the * definition of a segment expression. This is basically a list of pchars. * <p> * This method has to ensure that no escaped chars go unchecked. This * ensures that the read offset does not goe out of bounds and consequently * throw an out of bounds exception. */ private void param() { this.name(); if (this.skip("=")) { /* in case of error */ this.value(); } } /** * This extracts the name of the parameter from the character buffer. The * name of a parameter is defined as a set of pchars including escape * sequences. This will extract the parameter name and buffer the chars. The * name ends when a equals character, "=", is encountered or in the case of * a malformed parameter when the next character is not a pchar. */ private void name() { int mark = this.off; int pos = this.off; while (this.off < this.count) { if (this.buf[this.off] == '%') { /* escaped */ this.escape(); } else if (this.buf[this.off] == '=') { break; } else if (!this.pchar(this.buf[this.off])) { break; } this.buf[pos++] = this.buf[this.off++]; } this.name.len = pos - mark; this.name.off = mark; } /** * This extracts a parameter value from a path segment. The parameter value * consists of a sequence of pchars and some escape sequences. The parameter * value is buffered so that the name and values can be paired. The end of * the value is determined as the end of the buffer or the last pchar. */ private void value() { int mark = this.off; int pos = this.off; while (this.off < this.count) { if (this.buf[this.off] == '%') { /* escaped */ this.escape(); } else if (!this.pchar(this.buf[this.off])) { break; } this.buf[pos++] = this.buf[this.off++]; } this.value.len = pos - mark; this.value.off = mark; } /** * This method adds the name and value to a map so that the next name and * value can be collected. The name and value are added to the map as string * objects. Once added to the map the <code>Token</code> objects are set to * have zero length so they can be reused to collect further values. This * will add the values to the map as an array of type string. This is done * so that if there are multiple values that they can be stored. */ private void insert() { if (this.value.length() > 0) { if (this.name.length() > 0) { this.insert(this.name, this.value); } } this.name.clear(); this.value.clear(); } /** * This will add the given name and value to the parameters map. This will * only store a single value per parameter name, so only the parameter that * was latest encountered will be saved. The <code>getQuery</code> method * can be used to collect the parameter values using the parameter name. * * @param name * this is the name of the value to be inserted * @param value * this is the value of a that is to be inserted */ private void insert(Token name, Token value) { this.insert(name.toString(), value.toString()); } /** * This will add the given name and value to the parameters map. This will * only store a single value per parameter name, so only the parameter that * was latest encountered will be saved. The <code>getQuery</code> method * can be used to collect the parameter values using the parameter name. * * @param name * this is the name of the value to be inserted * @param value * this is the value of a that is to be inserted */ private void insert(String name, String value) { this.param.put(name, value); } /** * This converts an encountered escaped sequence, that is all embedded * hexidecimal characters into a native UCS character value. This does not * take any characters from the stream it just prepares the buffer with the * correct byte. The escaped sequence within the URI will be interpreded as * UTF-8. * <p> * This will leave the next character to read from the buffer as the * character encoded from the URI. If there is a fully valid escaped * sequence, that is <code>"%" HEX HEX</code>. This decodes the escaped * sequence using UTF-8 encoding, all encoded sequences should be in UCS-2 * to fit in a Java char. */ private void escape() { int peek = this.peek(this.off); if (!this.unicode(peek)) { this.binary(peek); } } /** * This method determines, using a peek character, whether the sequence of * escaped characters within the URI is binary data. If the data within the * escaped sequence is binary then this will ensure that the next character * read from the URI is the binary octet. This is used strictly for backward * compatible parsing of URI strings, binary data should never appear. * * @param peek * this is the first escaped character from the URI * * @return currently this implementation always returns true */ private boolean binary(int peek) { if ((this.off + 2) < this.count) { this.off += 2; this.buf[this.off] = this.bits(peek); } return true; } /** * This method determines, using a peek character, whether the sequence of * escaped characters within the URI is in UTF-8. If a UTF-8 character can * be successfully decoded from the URI it will be the next character read * from the buffer. This can check for both UCS-2 and UCS-4 characters. * However, because the Java <code>char</code> can only hold UCS-2, the * UCS-4 characters will have only the low order octets stored. * <p> * The WWW Consortium provides a reference implementation of a UTF-8 * decoding for Java, in this the low order octets in the UCS-4 sequence are * used for the character. So, in the absence of a defined behaviour, the * W3C behaviour is assumed. * * @param peek * this is the first escaped character from the URI * * @return this returns true if a UTF-8 character is decoded */ private boolean unicode(int peek) { if ((peek & 0x80) == 0x00) return this.unicode(peek, 0); if ((peek & 0xe0) == 0xc0) return this.unicode(peek & 0x1f, 1); if ((peek & 0xf0) == 0xe0) return this.unicode(peek & 0x0f, 2); if ((peek & 0xf8) == 0xf0) return this.unicode(peek & 0x07, 3); if ((peek & 0xfc) == 0xf8) return this.unicode(peek & 0x03, 4); if ((peek & 0xfe) == 0xfc) return this.unicode(peek & 0x01, 5); return false; } /** * This method will decode the specified amount of escaped characters from * the URI and convert them into a single Java UCS-2 character. If there are * not enough characters within the URI then this will return false and * leave the URI alone. * <p> * The number of characters left is determined from the first UTF-8 octet, * as specified in RFC 2279, and because this is a URI there must that * number of <code>"%" HEX HEX</code> sequences left. If successful the next * character read is the UTF-8 sequence decoded into a native UCS-2 * character. * * @param peek * contains the bits read from the first UTF octet * @param more * this specifies the number of UTF octets left * * @return this returns true if a UTF-8 character is decoded */ private boolean unicode(int peek, int more) { if ((this.off + (more * 3)) >= this.count) return false; return this.unicode(peek, more, this.off); } /** * This will decode the specified amount of trailing UTF-8 bits from the * URI. The trailing bits are those following the first UTF-8 octet, which * specifies the length, in octets, of the sequence. The trailing octets are * if the form 10xxxxxx, for each of these octets only the last six bits are * valid UCS bits. So a conversion is basically an accumulation of these. * <p> * If at any point during the accumulation of the UTF-8 bits there is a * parsing error, then parsing is aborted an false is returned, as a result * the URI is left unchanged. * * @param peek * bytes that have been accumulated from the URI * @param more * this specifies the number of UTF octets left * @param pos * this specifies the position the parsing begins * * @return this returns true if a UTF-8 character is decoded */ private boolean unicode(int peek, int more, int pos) { while (more-- > 0) { if (this.buf[pos] == '%') { int next = pos + 3; int hex = this.peek(next); if ((hex & 0xc0) == 0x80) { peek = (peek << 6) | (hex & 0x3f); pos = next; continue; } } return false; } if ((pos + 2) < this.count) { this.off = pos + 2; this.buf[this.off] = this.bits(peek); } return true; } /** * Defines behaviour for UCS-2 versus UCS-4 conversion from four octets. The * UTF-8 encoding scheme enables UCS-4 characters to be encoded and * decodeded. However, Java supports the 16-bit UCS-2 character set, and so * the 32-bit UCS-4 character set is not compatable. This basically decides * what to do with UCS-4. * * @param data * up to four octets to be converted to UCS-2 format * * @return this returns a native UCS-2 character from the int */ private char bits(int data) { return (char) data; } /** * This will return the escape expression specified from the URI as an * integer value of the hexidecimal sequence. This does not make any changes * to the buffer it simply checks to see if the characters at the position * specified are an escaped set characters of the form * <code>"%" HEX HEX</code>, if so, then it will convert that hexidecimal * string in to an integer value, or -1 if the expression is not * hexidecimal. * * @param pos * this is the position the expression starts from * * @return the integer value of the hexidecimal expression */ private int peek(int pos) { if (this.buf[pos] == '%') { if (this.count <= (pos + 2)) return -1; char high = this.buf[pos + 1]; char low = this.buf[pos + 2]; return this.convert(high, low); } return -1; } /** * This will convert the two hexidecimal characters to a real integer value, * which is returned. This requires characters within the range of 'A' to * 'F' and 'a' to 'f', and also the digits '0' to '9'. The characters * encoded using the ISO-8859-1 encoding scheme, if the characters are not * with in the range specified then this returns -1. * * @param high * this is the high four bits within the integer * @param low * this is the low four bits within the integer * * @return this returns the indeger value of the conversion */ private int convert(char high, char low) { int hex = 0x00; if (this.hex(high) && this.hex(low)) { if (('A' <= high) && (high <= 'F')) { high -= 'A' - 'a'; } if (high >= 'a') { hex ^= (high - 'a') + 10; } else { hex ^= high - '0'; } hex <<= 4; if (('A' <= low) && (low <= 'F')) { low -= 'A' - 'a'; } if (low >= 'a') { hex ^= (low - 'a') + 10; } else { hex ^= low - '0'; } return hex; } return -1; } /** * This is used to determine wheather a char is a hexidecimal * <code>char</code> or not. A hexidecimal character is consdered to be a * character within the range of <code>0 - 9</code> and between * <code>a - f</code> and <code>A - F</code>. This will return * <code>true</code> if the character is in this range. * * @param ch * this is the character which is to be determined here * * @return true if the character given has a hexidecimal value */ private boolean hex(char ch) { if ((ch >= '0') && (ch <= '9')) return true; else if ((ch >= 'a') && (ch <= 'f')) return true; else if ((ch >= 'A') && (ch <= 'F')) return true; return false; } /** * This is a character set defined by RFC 2396 it is used to determine the * valididity of cetain <code>chars</code> within a Uniform Resource * Identifier. RFC 2396 defines an unreserverd char as * <code>alphanum | mark</code>. * * @param c * the character value that is being checked * * @return true if the character has an unreserved value */ private boolean unreserved(char c) { return this.mark(c) || this.alphanum(c); } /** * This is used to determine wheather or not a given unicode character is an * alphabetic character or a digit character. That is withing the range * <code>0 - 9</code> and between <code>a - z</code> it uses * <code>iso-8859-1</code> to compare the character. * * @param c * the character value that is being checked * * @return true if the character has an alphanumeric value */ private boolean alphanum(char c) { return this.digit(c) || this.alpha(c); } /** * This is used to determine wheather or not a given unicode character is an * alphabetic character. This uses encoding <code>iso-8859-1</code> to * compare the characters. * * @param c * the character value that is being checked * * @return true if the character has an alphabetic value */ private boolean alpha(char c) { return ((c <= 'z') && ('a' <= c)) || ((c <= 'Z') && ('A' <= c)); } /** * This is a character set defined by RFC 2396 it checks the valididity of * cetain chars within a uniform resource identifier. The RFC 2396 defines a * mark char as <code>"-", * "_", ".", "!", "~", "*", "'", "(", ")"</code>. * * @param c * the character value that is being checked * * @return true if the character is a mark character */ private boolean mark(char c) { switch (c) { case '-': case '_': case '.': case '!': case '~': case '*': case '\'': case '(': case ')': return true; default: return false; } } /** * This is a character set defined by RFC 2396 it is used to check the * valididity of cetain chars within a generic uniform resource identifier. * The RFC 2396 defines a pchar char as unreserved or escaped or one of the * following characters <code>":", "@", "=", * "&", "+", "$", ","</code> this will not check to see if the char is * an escaped char, that is <code>% HEX HEX</code>. Because this takes 3 * chars. * * @param c * the character value that is being checked * * @return true if the character is a pchar character */ private boolean pchar(char c) { switch (c) { case '@': case '&': case '=': case '+': case '$': case ',': case ':': return true; default: return this.unreserved(c); } } /** * This is used to convert this URI object into a <code>String</code> * object. This will only convert the parts of the URI that exist, so the * URI may not contain the domain or the query part and it will not contain * the path parameters. If the URI contains all these parts then it will * return somthing like * * <pre> * scheme://host:port/path/path?querypart * </pre> * <p> * It can return <code>/path/path?querypart</code> style relative URI's. If * any of the parts are set to null then that part will be missing, for * example if <code>setDomain</code> method is invoked with a null parameter * then the domain and port will be missing from the resulting URI. If the * path part is set to null using the <code>setPath</code> then the path * will be <code>/</code>. An example URI with the path part of null would * be * * <pre> * scheme://host:port/?querypart * </pre> * * @return the URI with only the path part and the non-null optional parts * of the uniform resource identifier */ @Override public String toString() { return (this.scheme.length() > 0 ? this.scheme + "://" : "") + (this.domain.length() > 0 ? this.domain + (this.port > 0 ? ":" + this.port : "") : "") + this.getPath() + (this.param.size() > 0 ? this.param : "") + (this.query.length() > 0 ? "?" + this.query : ""); } /** * The <code>ParameterMap</code> is uses to store the parameters that are to * be encoded in to the address. This will append all of the parameters to * the end of the path. These can later be extracted by parsing the address. * * @author Niall Gallagher */ private class ParameterMap extends KeyMap<String> { /** * This will return the parameters encoded in such a way that it can be * appended to the end of the path. These parameters can be added to the * address such that they do not form a query parameter. Values such as * session identifiers are often added as the path parameters to the * address. * * @return this returns the representation of the parameters */ private String encode() { StringBuilder text = new StringBuilder(); for (String name : AddressParser.this.param) { String value = AddressParser.this.param.get(name); text.append(";"); text.append(name); if (value != null) { text.append("="); text.append(value); ; } } return text.toString(); } /** * This will return the parameters encoded in such a way that it can be * appended to the end of the path. These parameters can be added to the * address such that they do not form a query parameter. Values such as * session identifiers are often added as the path parameters to the * address. * * @return this returns the representation of the parameters */ @Override public String toString() { return this.encode(); } } /** * This is used as an alternative to the <code>ParseBuffer</code> for * extracting tokens from the URI without allocating memory. This will * basically mark out regions within the buffer which are used to represent * the token. When the token value is required the region is used to create * a <code>String</code> object. */ private class Token { /** * This can be used to override the value for this token. */ public String value; /** * This represents the start offset within the buffer. */ public int off; /** * This represents the number of charters in the token. */ public int len; /** * If the <code>Token</code> is to be reused this will clear all * previous data. Clearing the buffer allows it to be reused if there is * a new URI to be parsed. This ensures that a null is returned if the * token length is zero. */ public void clear() { this.value = null; this.len = 0; } /** * This is used to determine the number of characters this token * contains. This is used rather than accessing the length directly so * that the value the token represents can be overridden easily without * upsetting the token. * * @return this returns the number of characters this uses */ public int length() { if (this.value == null) return this.len; return this.value.length(); } /** * This method will convert the <code>Token</code> into it's * <code>String</code> equivelant. This will firstly check to see if * there is a value, for the string representation, if there is the * value is returned, otherwise the region is converted into a * <code>String</code> and returned. * * @return this returns a value representing the token */ @Override public String toString() { if (this.value != null) return this.value; if (this.len > 0) { this.value = new String(AddressParser.this.buf, this.off, this.len); } return this.value; } } }