QueryParser.java example

Explorer
someluigis-peripherals-master
- slp_common
/*
 * QueryParser.java December 2002
 *
 * Copyright (C) 2002, Niall Gallagher <niallg@users.sf.net>
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 
 * implied. See the License for the specific language governing 
 * permissions and limitations under the License.
 */

package org.simpleframework.http.parse;

import java.net.URLEncoder;
import java.util.Set;

import org.simpleframework.http.Query;
import org.simpleframework.util.parse.MapParser;

/**
 * The <code>ParameterParser</code> is used to parse data encoded in the
 * <code>application/x-www-form-urlencoded</code> MIME type. It is also used to
 * parse a query string from a HTTP URL, see RFC 2616. The parsed parameters are
 * available through the various methods of the
 * <code>org.simpleframework.http.net.Query</code> interface. The syntax of the
 * parsed parameters is described below in BNF.
 * 
 * <pre>
 * 
 *    params  = *(pair [ "&" params])
 *    pair    = name "=" value
 *    name    = *(text | escaped)
 *    value   = *(text | escaped)
 *    escaped = % HEX HEX
 * 
 * </pre>
 * 
 * This will consume all data found as a name or value, if the data is a "+"
 * character then it is replaced with a space character. This regards only "=",
 * "&", and "%" as having special values. The "=" character delimits the
 * name from the value and the "&" delimits the name value pair. The "%"
 * character represents the start of an escaped sequence, which consists of two
 * hex digits. All escaped sequences are converted to its character value.
 * 
 * @author Niall Gallagher
 */
public class QueryParser extends MapParser<String> implements Query {

    /**
     * Used to accumulate the characters for the parameter name.
     */
    private Token name;

    /**
     * Used to accumulate the characters for the parameter value.
     */
    private Token value;

    /**
     * Constructor for the <code>ParameterParser</code>. This creates an
     * instance that can be use to parse HTML form data and URL query strings
     * encoded as application/x-www-form-urlencoded. The parsed parameters are
     * made available through the interface
     * <code>org.simpleframework.util.net.Query</code>.
     */
    public QueryParser() {
        this.name = new Token();
        this.value = new Token();
    }

    /**
     * Constructor for the <code>ParameterParser</code>. This creates an
     * instance that can be use to parse HTML form data and URL query strings
     * encoded as application/x-www-form-urlencoded. The parsed parameters are
     * made available through the interface
     * <code>org.simpleframework.util.net.Query</code>.
     * 
     * @param text
     *            this is the text to parse for the parameters
     */
    public QueryParser(String text) {
        this();
        this.parse(text);
    }

    /**
     * This extracts an integer parameter for the named value. If the named
     * parameter does not exist this will return a zero value. If however the
     * parameter exists but is not in the format of a decimal integer value then
     * this will throw an exception.
     * 
     * @param name
     *            the name of the parameter value to retrieve
     * 
     * @return this returns the named parameter value as an integer
     */
    @Override
    public int getInteger(Object name) {
        String value = this.get(name);

        if (value != null) return Integer.parseInt(value);
        return 0;
    }

    /**
     * This extracts a float parameter for the named value. If the named
     * parameter does not exist this will return a zero value. If however the
     * parameter exists but is not in the format of a floating point number then
     * this will throw an exception.
     * 
     * @param name
     *            the name of the parameter value to retrieve
     * 
     * @return this returns the named parameter value as a float
     */
    @Override
    public float getFloat(Object name) {
        String value = this.get(name);

        if (value != null) return Float.parseFloat(value);
        return 0.0f;
    }

    /**
     * This extracts a boolean parameter for the named value. If the named
     * parameter does not exist this will return false otherwise the value is
     * evaluated. If it is either <code>true</code> or <code>false</code> then
     * those boolean values are returned.
     * 
     * @param name
     *            the name of the parameter value to retrieve
     * 
     * @return this returns the named parameter value as an float
     */
    @Override
    public boolean getBoolean(Object name) {
        Boolean flag = Boolean.FALSE;
        String value = this.get(name);

        if (value != null) {
            flag = Boolean.valueOf(value);
        }
        return flag.booleanValue();
    }

    /**
     * This initializes the parser so that it can be used several times. This
     * clears any previous parameters extracted. This ensures that when the next
     * <code>parse(String)</code> is invoked the status of the
     * <code>Query</code> is empty.
     */
    @Override
    protected void init() {
        this.all.clear();
        this.map.clear();
        this.name.len = 0;
        this.value.len = 0;
        this.off = 0;
    }

    /**
     * This performs the actual parsing of the parameter text. The parameters
     * parsed from this are taken as "name=value" pairs. Multiple pairs within
     * the text are separated by an "&". This will parse and insert all
     * parameters into a hashtable.
     */
    @Override
    protected void parse() {
        this.param();
        while (this.skip("&")) {
            this.param();
        }
    }

    /**
     * This method adds the name and value to a map so that the next name and
     * value can be collected. The name and value are added to the map as string
     * objects. Once added to the map the <code>Token</code> objects are set to
     * have zero length so they can be reused to collect further values. This
     * will add the values to the map as an array of type string. This is done
     * so that if there are multiple values that they can be stored.
     */
    private void insert() {
        if (this.name.len > 0) {
            this.insert(this.name, this.value);
        }
        this.name.len = 0;
        this.value.len = 0;
    }

    /**
     * This will add the given name and value to the parameters map. If any
     * previous value of the given name has been inserted into the map then this
     * will overwrite that value. This is used to ensure that the string value
     * is inserted to the map.
     * 
     * @param name
     *            this is the name of the value to be inserted
     * @param value
     *            this is the value of a that is to be inserted
     */
    private void insert(Token name, Token value) {
        this.put(name.toString(), value.toString());
    }

    /**
     * This is an expression that is defined by RFC 2396 it is used in the
     * definition of a segment expression. This is basically a list of chars
     * with escaped sequences.
     * <p>
     * This method has to ensure that no escaped chars go unchecked. This
     * ensures that the read offset does not go out of bounds and consequently
     * throw an out of bounds exception.
     */
    private void param() {
        this.name();
        if (this.skip("=")) { /* in case of error */
            this.value();
        }
        this.insert();
    }

    /**
     * This extracts the name of the parameter from the character buffer. The
     * name of a parameter is defined as a set of chars including escape
     * sequences. This will extract the parameter name and buffer the chars. The
     * name ends when a equals character, "=", is encountered.
     */
    private void name() {
        int mark = this.off;
        int pos = this.off;

        while (this.off < this.count) {
            if (this.buf[this.off] == '%') { /* escaped */
                this.escape();
            } else if (this.buf[this.off] == '=') {
                break;
            } else if (this.buf[this.off] == '+') {
                this.buf[this.off] = ' ';
            }
            this.buf[pos++] = this.buf[this.off++];
        }
        this.name.len = pos - mark;
        this.name.off = mark;
    }

    /**
     * This extracts a parameter value from a path segment. The parameter value
     * consists of a sequence of chars and some escape sequences. The parameter
     * value is buffered so that the name and values can be paired. The end of
     * the value is determined as the end of the buffer or an ampersand.
     */
    private void value() {
        int mark = this.off;
        int pos = this.off;

        while (this.off < this.count) {
            if (this.buf[this.off] == '%') { /* escaped */
                this.escape();
            } else if (this.buf[this.off] == '+') {
                this.buf[this.off] = ' ';
            } else if (this.buf[this.off] == '&') {
                break;
            }
            this.buf[pos++] = this.buf[this.off++];
        }
        this.value.len = pos - mark;
        this.value.off = mark;
    }

    /**
     * This converts an encountered escaped sequence, that is all embedded
     * hexidecimal characters into a native UCS character value. This does not
     * take any characters from the stream it just prepares the buffer with the
     * correct byte. The escaped sequence within the URI will be interpreded as
     * UTF-8.
     * <p>
     * This will leave the next character to read from the buffer as the
     * character encoded from the URI. If there is a fully valid escaped
     * sequence, that is <code>"%" HEX HEX</code>. This decodes the escaped
     * sequence using UTF-8 encoding, all encoded sequences should be in UCS-2
     * to fit in a Java char.
     */
    private void escape() {
        int peek = this.peek(this.off);

        if (!this.unicode(peek)) {
            this.binary(peek);
        }
    }

    /**
     * This method determines, using a peek character, whether the sequence of
     * escaped characters within the URI is binary data. If the data within the
     * escaped sequence is binary then this will ensure that the next character
     * read from the URI is the binary octet. This is used strictly for backward
     * compatible parsing of URI strings, binary data should never appear.
     * 
     * @param peek
     *            this is the first escaped character from the URI
     * 
     * @return currently this implementation always returns true
     */
    private boolean binary(int peek) {
        if ((this.off + 2) < this.count) {
            this.off += 2;
            this.buf[this.off] = this.bits(peek);
        }
        return true;
    }

    /**
     * This method determines, using a peek character, whether the sequence of
     * escaped characters within the URI is in UTF-8. If a UTF-8 character can
     * be successfully decoded from the URI it will be the next character read
     * from the buffer. This can check for both UCS-2 and UCS-4 characters.
     * However, because the Java <code>char</code> can only hold UCS-2, the
     * UCS-4 characters will have only the low order octets stored.
     * <p>
     * The WWW Consortium provides a reference implementation of a UTF-8
     * decoding for Java, in this the low order octets in the UCS-4 sequence are
     * used for the character. So, in the absence of a defined behaviour, the
     * W3C behaviour is assumed.
     * 
     * @param peek
     *            this is the first escaped character from the URI
     * 
     * @return this returns true if a UTF-8 character is decoded
     */
    private boolean unicode(int peek) {
        if ((peek & 0x80) == 0x00) return this.unicode(peek, 0);
        if ((peek & 0xe0) == 0xc0) return this.unicode(peek & 0x1f, 1);
        if ((peek & 0xf0) == 0xe0) return this.unicode(peek & 0x0f, 2);
        if ((peek & 0xf8) == 0xf0) return this.unicode(peek & 0x07, 3);
        if ((peek & 0xfc) == 0xf8) return this.unicode(peek & 0x03, 4);
        if ((peek & 0xfe) == 0xfc) return this.unicode(peek & 0x01, 5);
        return false;
    }

    /**
     * This method will decode the specified amount of escaped characters from
     * the URI and convert them into a single Java UCS-2 character. If there are
     * not enough characters within the URI then this will return false and
     * leave the URI alone.
     * <p>
     * The number of characters left is determined from the first UTF-8 octet,
     * as specified in RFC 2279, and because this is a URI there must that
     * number of <code>"%" HEX HEX</code> sequences left. If successful the next
     * character read is the UTF-8 sequence decoded into a native UCS-2
     * character.
     * 
     * @param peek
     *            contains the bits read from the first UTF octet
     * @param more
     *            this specifies the number of UTF octets left
     * 
     * @return this returns true if a UTF-8 character is decoded
     */
    private boolean unicode(int peek, int more) {
        if ((this.off + (more * 3)) >= this.count) return false;
        return this.unicode(peek, more, this.off);
    }

    /**
     * This will decode the specified amount of trailing UTF-8 bits from the
     * URI. The trailing bits are those following the first UTF-8 octet, which
     * specifies the length, in octets, of the sequence. The trailing octets are
     * of the form 10xxxxxx, for each of these octets only the last six bits are
     * valid UCS bits. So a conversion is basically an accumulation of these.
     * <p>
     * If at any point during the accumulation of the UTF-8 bits there is a
     * parsing error, then parsing is aborted an false is returned, as a result
     * the URI is left unchanged.
     * 
     * @param peek
     *            bytes that have been accumulated fron the URI
     * @param more
     *            this specifies the number of UTF octets left
     * @param pos
     *            this specifies the position the parsing begins
     * 
     * @return this returns true if a UTF-8 character is decoded
     */
    private boolean unicode(int peek, int more, int pos) {
        while (more-- > 0) {
            if (this.buf[pos] == '%') {
                int next = pos + 3;
                int hex = this.peek(next);

                if ((hex & 0xc0) == 0x80) {
                    peek = (peek << 6) | (hex & 0x3f);
                    pos = next;
                    continue;
                }
            }
            return false;
        }
        if ((pos + 2) < this.count) {
            this.off = pos + 2;
            this.buf[this.off] = this.bits(peek);
        }
        return true;
    }

    /**
     * Defines behaviour for UCS-2 versus UCS-4 conversion from four octets. The
     * UTF-8 encoding scheme enables UCS-4 characters to be encoded and
     * decodeded. However, Java supports the 16-bit UCS-2 character set, and so
     * the 32-bit UCS-4 character set is not compatable. This basically decides
     * what to do with UCS-4.
     * 
     * @param data
     *            up to four octets to be converted to UCS-2 format
     * 
     * @return this returns a native UCS-2 character from the int
     */
    private char bits(int data) {
        return (char) data;
    }

    /**
     * This will return the escape expression specified from the URI as an
     * integer value of the hexadecimal sequence. This does not make any changes
     * to the buffer it simply checks to see if the characters at the position
     * specified are an escaped set characters of the form
     * <code>"%" HEX HEX</code>, if so, then it will convert that hexadecimal
     * string in to an integer value, or -1 if the expression is not
     * hexadecimal.
     * 
     * @param pos
     *            this is the position the expression starts from
     * 
     * @return the integer value of the hexadecimal expression
     */
    private int peek(int pos) {
        if (this.buf[pos] == '%') {
            if (this.count <= (pos + 2)) return -1;
            char high = this.buf[pos + 1];
            char low = this.buf[pos + 2];

            return this.convert(high, low);
        }
        return -1;
    }

    /**
     * This will convert the two hexidecimal characters to a real integer value,
     * which is returned. This requires characters within the range of 'A' to
     * 'F' and 'a' to 'f', and also the digits '0' to '9'. The characters
     * encoded using the ISO-8859-1 encoding scheme, if the characters are not
     * with in the range specified then this returns -1.
     * 
     * @param high
     *            this is the high four bits within the integer
     * @param low
     *            this is the low four bits within the integer
     * 
     * @return this returns the indeger value of the conversion
     */
    private int convert(char high, char low) {
        int hex = 0x00;

        if (this.hex(high) && this.hex(low)) {
            if (('A' <= high) && (high <= 'F')) {
                high -= 'A' - 'a';
            }
            if (high >= 'a') {
                hex ^= (high - 'a') + 10;
            } else {
                hex ^= high - '0';
            }
            hex <<= 4;

            if (('A' <= low) && (low <= 'F')) {
                low -= 'A' - 'a';
            }
            if (low >= 'a') {
                hex ^= (low - 'a') + 10;
            } else {
                hex ^= low - '0';
            }
            return hex;
        }
        return -1;
    }

    /**
     * This is used to determine whether a char is a hexadecimal
     * <code>char</code> or not. A hexadecimal character is considered to be a
     * character within the range of <code>0 - 9</code> and between
     * <code>a - f</code> and <code>A - F</code>. This will return
     * <code>true</code> if the character is in this range.
     * 
     * @param ch
     *            this is the character which is to be determined here
     * 
     * @return true if the character given has a hexadecimal value
     */
    private boolean hex(char ch) {
        if ((ch >= '0') && (ch <= '9'))
            return true;
        else if ((ch >= 'a') && (ch <= 'f'))
            return true;
        else if ((ch >= 'A') && (ch <= 'F')) return true;
        return false;
    }

    /**
     * This <code>encode</code> method will escape the text that is provided.
     * This is used to that the parameter pairs can be encoded in such a way
     * that it can be transferred over HTTP/1.1 using the ISO-8859-1 character
     * set.
     * 
     * @param text
     *            this is the text that is to be escaped
     * 
     * @return the text with % HEX HEX UTF-8 escape sequences
     */
    private String encode(String text) {
        try {
            return URLEncoder.encode(text, "UTF-8");
        } catch (Exception e) {
            return text;
        }
    }

    /**
     * This <code>encode</code> method will escape the name=value pair provided
     * using the UTF-8 character set. This method will ensure that the
     * parameters are encoded in such a way that they can be transferred via
     * HTTP in ISO-8859-1.
     * 
     * @param name
     *            this is the name of that is to be escaped
     * @param value
     *            this is the value that is to be escaped
     * 
     * @return the pair with % HEX HEX UTF-8 escape sequences
     */
    private String encode(String name, String value) {
        return this.encode(name) + "=" + this.encode(value);
    }

    /**
     * This <code>toString</code> method is used to compose an string in the
     * <code>application/x-www-form-urlencoded</code> MIME type. This will
     * encode the tokens specified in the <code>Set</code>. Each name=value pair
     * acquired is converted into a UTF-8 escape sequence so that the parameters
     * can be sent in the IS0-8859-1 format required via the HTTP/1.1
     * specification RFC 2616.
     * 
     * @param set
     *            this is the set of parameters to be encoded
     * 
     * @return returns a HTTP parameter encoding for the pairs
     */
    public String toString(Set set) {
        Object[] list = set.toArray();
        String text = "";

        for (int i = 0; i < list.length; i++) {
            String name = list[i].toString();
            String value = this.get(name);

            if (i > 0) {
                text += "&";
            }
            text += this.encode(name, value);
        }
        return text;
    }

    /**
     * This <code>toString</code> method is used to compose an string in the
     * <code>application/x-www-form-urlencoded</code> MIME type. This will
     * iterate over all tokens that have been added to this object, either
     * during parsing, or during use of the instance. Each name=value pair
     * acquired is converted into a UTF-8 escape sequence so that the parameters
     * can be sent in the IS0-8859-1 format required via the HTTP/1.1
     * specification RFC 2616.
     * 
     * @return returns a HTTP parameter encoding for the pairs
     */
    @Override
    public String toString() {
        Set set = this.map.keySet();

        if (this.map.size() > 0) return this.toString(set);
        return "";
    }

    /**
     * This is used to mark regions within the buffer that represent a valid
     * token for either the name of a parameter or its value. This is used as an
     * alternative to the <code>ParseBuffer</code> which requires memory to be
     * allocated for storing the data read from the buffer. This requires only
     * two integer values.
     */
    private class Token {

        /**
         * This represents the number of characters in the token.
         */
        public int len;

        /**
         * This represents the start offset within the buffer.
         */
        public int off;

        /**
         * In order to represent the <code>Token</code> as a value that can be
         * used this converts it to a <code>String</code>. If the length of the
         * token is less than or equal to zero this will return and empty string
         * for the value.
         * 
         * @return this returns a value representing the token
         */
        @Override
        public String toString() {
            if (this.len <= 0) return "";
            return new String(QueryParser.this.buf, this.off, this.len);
        }
    }
}