/* * CookieParser.java February 2001 * * Copyright (C) 2001, Niall Gallagher <niallg@users.sf.net> * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or * implied. See the License for the specific language governing * permissions and limitations under the License. */ package org.simpleframework.http.parse; import java.util.Iterator; import org.simpleframework.http.Cookie; import org.simpleframework.util.parse.Parser; /** * CookieParser is used to parse the cookie header. The cookie header is one of * the headers that is used by the HTTP state management mechanism. The Cookie * header is the header that is sent from the client to the server in response * to a Set-Cookie header. The syntax of the Cookie header as taken from RFC * 2109, HTTP State Management Mechanism. * * <pre> * * cookie = "Cookie:" cookie-version * 1*((";" | ",") cookie-value) * cookie-value = NAME "=" VALUE [";" path] [";" domain] * cookie-version = "$Version" "=" value * NAME = attr * VALUE = value * path = "$Path" "=" value * domain = "$Domain" "=" value * * </pre> * * The cookie header may consist of several cookies. Each cookie can be * extracted from the header by examining the it syntax of the cookie header. * The syntax of the cookie header is defined in RFC 2109. * <p> * Each cookie has a <code>$Version</code> attribute followed by multiple * cookies. Each contains a name and a value, followed by an optional * <code>$Path</code> and <code>$Domain</code> attribute. This will parse a * given cookie header and return each cookie extracted as a <code>Cookie</code> * object. * * @author Niall Gallagher */ public class CookieParser extends Parser implements Iterable<Cookie> { /** * Determines when the <code>Parser</code> has finished. */ private boolean finished; /** * Used so the <code>Parser</code> does not parse twice. */ private boolean parsed; /** * Version of the <code>Cookie</code> being parsed. */ private int version; /** * Used to store the name of the <code>Cookie</code>. */ private Token name; /** * Used to store the value of the <code>Cookie</code>. */ private Token value; /** * Used to store the <code>$Path</code> values. */ private Token path; /** * Used to store the <code>$Domain</code> values. */ private Token domain; /** * Create a <code>CookieParser</code> that contains no cookies. the instance * will return <code>false</code> for the <code>hasNext</code> method. * cookies may be parsed using this instance by using the <code>parse</code> * method. */ public CookieParser() { this.path = new Token(); this.domain = new Token(); this.name = new Token(); this.value = new Token(); this.finished = true; } /** * This is primarily a convineance constructor. This will parse the * <code>String</code> given to extract the cookies. This could be achived * by calling the default no-arg constructor and then using the instance to * invoke the <code>parse</code> method on that <code>String</code>. * * @param header * a <code>String</code> containing a cookie value */ public CookieParser(String header) { this(); this.parse(header); } /** * Resets the cookie and the buffer variables for this * <code>CookieParser</code>. It is used to set the state of the parser to * start parsing a new cookie. */ @Override protected void init() { this.finished = false; this.parsed = false; this.version = 0; this.off = 0; this.version(); } /** * This will extract the next <code>Cookie</code> from the buffer. If all * the characters in the buffer have already been examined then this method * will simply do nothing. Otherwise this will parse the remainder of the * buffer and (if it follows RFC 2109) produce a <code>Cookie</code>. */ @Override protected void parse() { if (!this.finished) { this.cookie(); this.parsed = true; } } /** * This is used to skip an arbitrary <code>String</code> within the * <code>char</code> buf. It checks the length of the <code>String</code> * first to ensure that it will not go out of bounds. A comparison is then * made with the buffers contents and the <code>String</code> if the reigon * in the buffer matched the <code>String</code> then the offset within the * buffer is increased by the <code>String</code>'s length so that it has * effectively skipped it. * <p> * This <code>skip</code> method will ignore all of the whitespace text. * This will also skip trailing spaces within the the input text and all * spaces within the source text. For example if the input was the string * "s omete xt" and the source was "some text to skip" then the result of a * skip ignoring spaces would be "to skip" in the source string, as the * trailing spaces are also eaten by this. * * @param text * this is the <code>String</code> value to be skipped * * @return true if the <code>String</code> was skipped */ @Override protected boolean skip(String text) { int size = text.length(); int seek = this.off; int read = 0; if ((this.off + size) > this.count) return false; while (read < size) { char a = text.charAt(read); char b = this.buf[seek]; if (this.space(b)) { if (++seek >= this.count) return false; } else if (this.space(a)) { if (++read >= size) { continue; } } else { if (this.toLower(a) != this.toLower(b)) return false; read++; seek++; } } for (this.off = seek; this.off < this.count; this.off++) { if (!this.space(this.buf[this.off])) { break; } } return true; } /** * This is used to acquire the cookie values from the provided the provided * source text. This allows the cookie parser to be used within a for each * loop to parse out the values of a cookie one by one so that they may be * used or stored. * * @return this returns an iterator for extracting cookie value */ @Override public Iterator<Cookie> iterator() { return new Sequence(); } /** * This is used so that the collection of <code>Cookies</code> can be * reiterated. This allows the collection to be reused. The * <code>reset</code> method will invoke the super classes <code>init</code> * method. This will reinitialize this <code>Parser</code> so the cookie * will be reparsed. */ public void reset() { this.init(); this.parse(); } /** * Creates the <code>Cookie</code> from the token objects. It is assumed * that the <code>Cookie</code> <code>String</code> has been parsed when * this is called. This should only be used after the <code>parse</code> * method has been called. * <p> * If there is no <code>$Domain</code> or <code>$Path</code> within the * <code>Cookie</code> <code>String</code> then the <code>getDomain</code> * and <code>getPath</code> are null. * * @return the <code>Cookie</code> that was just parsed */ private Cookie getCookie() { return this.getCookie(this.name.toString(), this.value.toString()); } /** * Creates the <code>Cookie</code> from the token objects. It is assumed * that the <code>Cookie</code> <code>String</code> has been parsed when * this is called. This should only be used after the <code>parse</code> * method has been called. * <p> * If there is no <code>$Domain</code> or <code>$Path</code> within the * <code>Cookie</code> <code>String</code> then the <code>getDomain</code> * and <code>getPath</code> are null. * * @param name * the name that the <code>Cookie</code> contains * @param value * the value that the <code>Cookie</code> contains * * @return the <code>Cookie</code> that was just parsed */ private Cookie getCookie(String name, String value) { Cookie cookie = new Cookie(name, value, false); if (this.domain.len > 0) { cookie.setDomain(this.domain.toString()); } if (this.path.len > 0) { cookie.setPath(this.path.toString()); } cookie.setVersion(this.version); return cookie; } /** * This is used to parse a <code>Cookie</code> from the buffer that contains * the <code>Cookie</code> values. This will first try to remove any * trailing value after the version/prev <code>Cookie</code> once this is * removed it will extract the name/value pair from the <code>Cookie</code>. * The name and value of the <code>Cookie</code> will be saved by the name * and value tokens. */ private void cookie() { if (!this.skip(",")) { /* ,|; */ this.skip(";"); } this.name(); this.skip("="); /* = */ this.value(); } /** * This initializes the name token and extracts the name of this * <code>Cookie</code>. The offset and length of the name will be saved in * the name token. This will read all <code>char</code>'s upto but excluding * the first '=' <code>char</code> encountered from the <code>off</code> * within the buffer. */ private void name() { this.name.off = this.off; this.name.len = 0; while (this.off < this.count) { if (this.buf[this.off] == '=') { break; } this.name.len++; this.off++; } } /** * Used to extract everything found after the <code>NAME '='</code> within a * <code>Cookie</code>. This extracts the <code>Cookie</code> value the * <code>$Path</code> and <code>$Domain</code> attributes if they exist * (i.e. <code>$Path</code> and <code>$Domain</code> are optional in a * cookie see RFC 2109). * <p> * The path method reads the terminal found before it as does the * <code>domain</code> method that is ";$Path" is read as the first part of * the path method. This is because if there is no path the parser should * not read data it does not know belongs to a specific part of the * <code>Cookie</code>. */ private void value() { this.data(); this.path(); this.domain(); } /** * This initializes the value token and extracts the value of this * <code>Cookie</code>. The offset and length of the value will be saved in * the value token. This will read all <code>char</code>'s upto but * excluding the first terminal char encountered from the off within the * buffer, or if the value is a literal it will read a literal from the * buffer (literal is any data between quotes except if the quote is * prefixed with a backward slash character that is '\'). */ private void data() { this.value.off = this.off; this.value.len = 0; if ((this.off < this.count) && (this.buf[this.off] == '"')) { this.value.len++; for (this.off++; this.off < this.count;) { this.value.len++; if (this.buf[this.off++] == '"') if (this.buf[this.off - 2] != '\\') { break; } } this.value.len -= 2; /* remove " */ this.value.off++; /* remove " */ } else { while (this.off < this.count) { if (this.terminal(this.buf[this.off])) { break; } this.value.len++; this.off++; } } } /** * This initializes the path token and extracts the <code>$Path</code> of * this <code>Cookie</code>. The offset and length of the path will be saved * in the path token. This will read all <code>char</code>'s up to but * excluding the first terminal <code>char</code> encountered from the * <code>off</code> within the buffer, or if the value is a literal it will * read a literal from the buffer (literal is any data between quotes except * if the quote is prefixed with a backward slash character, that is '\'). * <p> * This reads the terminal before the <code>$Path</code> so that if there is * no <code>$Path</code> for the <code>Cookie</code> then the character * before it will not be read needlessly. */ private void path() { this.path.len = 0; /* reset */ if (this.skip(";$Path=")) { this.path.off = this.off; if (this.buf[this.off] == '"') { this.path.len++; for (this.off++; this.off < this.count;) { this.path.len++; if (this.buf[this.off++] == '"') if (this.buf[this.off - 2] != '\\') { break; } } this.path.len -= 2; /* remove " */ this.path.off++; /* remove " */ } else { while (this.off < this.count) { if (this.terminal(this.buf[this.off])) { break; } this.path.len++; this.off++; } } } } /** * Initializes the domain token and extracts the <code>$Domain</code> of * this <code>Cookie</code>. The offset and length of the domain will be * saved in the path token. This will read all characters up to but * excluding the first terminal <code>char</code> encountered from the off * within the buffer, or if the value is a literal it will read a literal * from the buffer (literal is any data between quotes except if the quote * is prefixed with a backward slash character, that is '\'). * <p> * This reads the terminal before the <code>$Domain</code> so that if there * is no <code>$Domain</code> for the <code>Cookie</code> then the character * before it will not be read needlessly. */ private void domain() { this.domain.len = 0; /* reset */ if (this.skip(";$Domain=")) { this.domain.off = this.off; if (this.buf[this.off] == '"') { this.domain.len++; for (this.off++; this.off < this.count;) { this.domain.len++; if (this.buf[this.off++] == '"') if (this.buf[this.off - 2] != '\\') { break; } } this.domain.len -= 2; /* remove " */ this.domain.off++; /* remove " */ } else { while (this.off < this.count) { if (this.terminal(this.buf[this.off])) { break; } this.domain.len++; this.off++; } } } } /** * This extracts the <code>$Version</code> of this <code>Cookie</code>. The * version is parsed and converted into a decimal int from the digit * characters that make up a version. * <p> * This will read all digit <code>char</code>'s up to but excluding the * first non digit <code>char</code> that it encounters from the offset * within the buffer, or if the value is a literal it will read a literal * from the buffer (literal is any data between quotes except if the quote * is prefixed with a backward slash character i.e. '\'). */ private void version() { if (this.skip("$Version=")) { if (this.buf[this.off] == '"') { this.off++; } while (this.off < this.count) { if (!this.digit(this.buf[this.off])) { break; } this.version *= 10; this.version += this.buf[this.off]; this.version -= '0'; this.off++; } if (this.buf[this.off] == '"') { this.off++; } } else { this.version = 1; } } /** * This is used to determine if a given iso8859-1 character is a terminal * character. That is either the ';' or ',' characters. Although the RFC * 2109 says the terminal can be either a comma, it is not used by any * browsers. * * @param ch * the character that is to be compared * * @return true if this is a semicolon character */ private boolean terminal(char ch) { return ch == ';'; } /** * This is used to represent an <code>Iterator</code> that will iterate over * the available cookies within the provided source text. This allows the * cookie parser to be used as an iterable with for each loops. Cookies can * not be removed with this. */ private class Sequence implements Iterator<Cookie> { /** * Extracts the next <code>Cookie</code> object from the string given. * This will return <code>null</code> when there are no more cookies * left in the <code>String</code> being parsed. * <p> * To find out when there are no more cookies left use the * <code>hasNext</code> method. This will only set the name, value, * path, domain name version of the <code>cookie</code> because as of * RFC 2109 these are the only attributes a <code>Cookie</code> may * have, the path and domain are optional. * * @return an initialized <code>Cookie</code> object */ @Override public Cookie next() { if (!this.hasNext()) return null; CookieParser.this.parsed = false; return CookieParser.this.getCookie(); } /** * Determine whether or not there are any <code>Cookie</code>s left in * the <code>String</code>. This will attempt to extract another * <code>Cookie</code> from the <code>String</code> and cache the result * so the <code>next</code> method will produce this <code>Cookie</code> * . If another <code>Cookie</code> cannot be parsed from the remainder * of the <code>String</code> then this will return <code>false</code> * otherwise it will return <code>true</code>. * * @return true if there are more cookies false otherwise */ @Override public boolean hasNext() { if (CookieParser.this.finished) return false; if (CookieParser.this.parsed) return true; CookieParser.this.parse(); if (CookieParser.this.name.len <= 0) { CookieParser.this.finished = true; return false; } return true; } /** * This method is used to remove items from the iterator. This however * performs no action as the act of parsing should not modify the * underlying source text value so that it can be reset with the * <code>reset</code> method and used again. */ @Override public void remove() { return; } } /** * This is a token object that is used to store the offset and length of a * region of chars in the <code>CookieParser.buf</code> array. The * <code>toString</code> method of this token will produce the * <code>String</code> value of the region it represents. */ private class Token { /** * The numer of characters that were consumed by this token. */ public int len; /** * The offset within the buffer that this token starts from. */ public int off; /** * This converts region within the buffer to a <code>String</code>. This * converts the region only if there is a sufficient length. * * @return the <code>String</code> value of the region */ @Override public String toString() { return new String(CookieParser.this.buf, this.off, this.len); } } }