/* ************************************************************************
#
# DivConq
#
# http://divconq.com/
#
# Copyright:
# Copyright 2014 eTimeline, LLC. All rights reserved.
#
# License:
# See the license.txt file in the project's top-level directory for details.
#
# Authors:
# * Andy White
#
************************************************************************ */
/*
* CookieParser.java February 2001
*
* Copyright (C) 2001, Niall Gallagher <niallg@users.sf.net>
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
* implied. See the License for the specific language governing
* permissions and limitations under the License.
*/
package divconq.www.http.parse;
import divconq.www.http.Cookie;
import divconq.www.util.parse.Parser;
import java.util.Iterator;
/**
* CookieParser is used to parse the cookie header. The cookie header is
* one of the headers that is used by the HTTP state management mechanism.
* The Cookie header is the header that is sent from the client to the
* server in response to a Set-Cookie header. The syntax of the Cookie
* header as taken from RFC 2109, HTTP State Management Mechanism.
* <pre>
*
* cookie = "Cookie:" cookie-version
* 1*((";" | ",") cookie-value)
* cookie-value = NAME "=" VALUE [";" path] [";" domain]
* cookie-version = "$Version" "=" value
* NAME = attr
* VALUE = value
* path = "$Path" "=" value
* domain = "$Domain" "=" value
*
* </pre>
* The cookie header may consist of several cookies. Each cookie can be
* extracted from the header by examining the it syntax of the cookie
* header. The syntax of the cookie header is defined in RFC 2109.
* <p>
* Each cookie has a <code>$Version</code> attribute followed by multiple
* cookies. Each contains a name and a value, followed by an optional
* <code>$Path</code> and <code>$Domain</code> attribute. This will parse
* a given cookie header and return each cookie extracted as a
* <code>Cookie</code> object.
*
* @author Niall Gallagher
*/
public class CookieParser extends Parser implements Iterable<Cookie> {
/**
* Determines when the <code>Parser</code> has finished.
*/
private boolean finished;
/**
* Used so the <code>Parser</code> does not parse twice.
*/
private boolean parsed;
/**
* Version of the <code>Cookie</code> being parsed.
*/
private int version;
/**
* Used to store the name of the <code>Cookie</code>.
*/
private Token name;
/**
* Used to store the value of the <code>Cookie</code>.
*/
private Token value;
/**
* Used to store the <code>$Path</code> values.
*/
private Token path;
/**
* Used to store the <code>$Domain</code> values.
*/
private Token domain;
/**
* Create a <code>CookieParser</code> that contains no cookies.
* the instance will return <code>false</code> for the
* <code>hasNext</code> method. cookies may be parsed using
* this instance by using the <code>parse</code> method.
*/
public CookieParser(){
this.path = new Token();
this.domain = new Token();
this.name = new Token();
this.value = new Token();
this.finished = true;
}
/**
* This is primarily a convineance constructor. This will parse the
* <code>String</code> given to extract the cookies. This could be
* achived by calling the default no-arg constructor and then using
* the instance to invoke the <code>parse</code> method on that
* <code>String</code>.
*
* @param header a <code>String</code> containing a cookie value
*/
public CookieParser(String header){
this();
parse(header);
}
/**
* Resets the cookie and the buffer variables for this
* <code>CookieParser</code>. It is used to set the
* state of the parser to start parsing a new cookie.
*/
protected void init() {
finished = false;
parsed =false;
version = 0;
off = 0;
version();
}
/**
* This will extract the next <code>Cookie</code> from the
* buffer. If all the characters in the buffer have already
* been examined then this method will simply do nothing.
* Otherwise this will parse the remainder of the buffer
* and (if it follows RFC 2109) produce a <code>Cookie</code>.
*/
protected void parse() {
if(!finished){
cookie();
parsed=true;
}
}
/**
* This is used to skip an arbitrary <code>String</code> within the
* <code>char</code> buf. It checks the length of the <code>String</code>
* first to ensure that it will not go out of bounds. A comparison
* is then made with the buffers contents and the <code>String</code>
* if the reigon in the buffer matched the <code>String</code> then the
* offset within the buffer is increased by the <code>String</code>'s
* length so that it has effectively skipped it.
* <p>
* This <code>skip</code> method will ignore all of the whitespace text.
* This will also skip trailing spaces within the the input text and
* all spaces within the source text. For example if the input was
* the string "s omete xt" and the source was "some text to skip" then
* the result of a skip ignoring spaces would be "to skip" in the
* source string, as the trailing spaces are also eaten by this.
*
* @param text this is the <code>String</code> value to be skipped
*
* @return true if the <code>String</code> was skipped
*/
protected boolean skip(String text){
int size = text.length();
int seek = off;
int read = 0;
if(off + size > count){
return false;
}
while(read < size) {
char a = text.charAt(read);
char b = buf[seek];
if(space(b)){
if(++seek >= count){
return false;
}
}else if(space(a)){
if(++read >= size) {
continue;
}
}else {
if(toLower(a) != toLower(b)){
return false;
}
read++;
seek++;
}
}
for(off = seek; off < count; off++){
if(!space(buf[off]))
break;
}
return true;
}
/**
* This is used to acquire the cookie values from the provided
* the provided source text. This allows the cookie parser to be
* used within a for each loop to parse out the values of a
* cookie one by one so that they may be used or stored.
*
* @return this returns an iterator for extracting cookie value
*/
public Iterator<Cookie> iterator() {
return new Sequence();
}
/**
* This is used so that the collection of <code>Cookies</code>
* can be reiterated. This allows the collection to be reused.
* The <code>reset</code> method will invoke the super classes
* <code>init</code> method. This will reinitialize this
* <code>Parser</code> so the cookie will be reparsed.
*/
public void reset() {
init();
parse();
}
/**
* Creates the <code>Cookie</code> from the token objects. It is
* assumed that the <code>Cookie</code> <code>String</code> has
* been parsed when this is called. This should only be used after
* the <code>parse</code> method has been called.
* <p>
* If there is no <code>$Domain</code> or <code>$Path</code>
* within the <code>Cookie</code> <code>String</code> then the
* <code>getDomain</code> and <code>getPath</code> are null.
*
* @return the <code>Cookie</code> that was just parsed
*/
private Cookie getCookie() {
return getCookie(name.toString(),
value.toString());
}
/**
* Creates the <code>Cookie</code> from the token objects. It is
* assumed that the <code>Cookie</code> <code>String</code> has
* been parsed when this is called. This should only be used after
* the <code>parse</code> method has been called.
* <p>
* If there is no <code>$Domain</code> or <code>$Path</code>
* within the <code>Cookie</code> <code>String</code> then the
* <code>getDomain</code> and <code>getPath</code> are null.
*
* @param name the name that the <code>Cookie</code> contains
* @param value the value that the <code>Cookie</code> contains
*
* @return the <code>Cookie</code> that was just parsed
*/
private Cookie getCookie(String name, String value) {
Cookie cookie = new Cookie(name, value, false);
if(domain.len > 0) {
cookie.setDomain(domain.toString());
}
if(path.len > 0) {
cookie.setPath(path.toString());
}
cookie.setVersion(version);
return cookie;
}
/**
* This is used to parse a <code>Cookie</code> from the buffer
* that contains the <code>Cookie</code> values. This will first
* try to remove any trailing value after the version/prev
* <code>Cookie</code> once this is removed it will extract the
* name/value pair from the <code>Cookie</code>. The name and
* value of the <code>Cookie</code> will be saved by the name
* and value tokens.
*/
private void cookie(){
if(!skip(",")){ /* ,|; */
skip(";");
}
name();
skip("="); /* = */
value();
}
/**
* This initializes the name token and extracts the name of this
* <code>Cookie</code>. The offset and length of the name will be
* saved in the name token. This will read all <code>char</code>'s
* upto but excluding the first '=' <code>char</code> encountered
* from the <code>off</code> within the buffer.
*/
private void name() {
name.off = off;
name.len = 0;
while(off < count){
if(buf[off] == '='){
break;
}
name.len++;
off++;
}
}
/**
* Used to extract everything found after the <code>NAME '='</code>
* within a <code>Cookie</code>. This extracts the <code>Cookie</code>
* value the <code>$Path</code> and <code>$Domain</code> attributes
* if they exist (i.e. <code>$Path</code> and <code>$Domain</code>
* are optional in a cookie see RFC 2109).
* <p>
* The path method reads the terminal found before it as does the
* <code>domain</code> method that is ";$Path" is read as the first
* part of the path method. This is because if there is no path the
* parser should not read data it does not know belongs to a specific
* part of the <code>Cookie</code>.
*/
private void value() {
data();
path();
domain();
}
/**
* This initializes the value token and extracts the value of this
* <code>Cookie</code>. The offset and length of the value will be
* saved in the value token. This will read all <code>char</code>'s
* upto but excluding the first terminal char encountered from the
* off within the buffer, or if the value is a literal it will read
* a literal from the buffer (literal is any data between quotes
* except if the quote is prefixed with a backward slash character
* that is '\').
*/
private void data() {
value.off = off;
value.len = 0;
if(off < count && buf[off] == '"'){
value.len++;
for(off++; off < count;){
value.len++;
if(buf[off++]=='"')
if(buf[off-2]!='\\'){
break;
}
}
value.len-=2; /* remove " */
value.off++; /* remove " */
}else {
while(off < count){
if(terminal(buf[off]))
break;
value.len++;
off++;
}
}
}
/**
* This initializes the path token and extracts the <code>$Path</code>
* of this <code>Cookie</code>. The offset and length of the path will
* be saved in the path token. This will read all <code>char</code>'s
* up to but excluding the first terminal <code>char</code> encountered
* from the <code>off</code> within the buffer, or if the value is a
* literal it will read a literal from the buffer (literal is any data
* between quotes except if the quote is prefixed with a backward slash
* character, that is '\').
* <p>
* This reads the terminal before the <code>$Path</code> so that if
* there is no <code>$Path</code> for the <code>Cookie</code> then
* the character before it will not be read needlessly.
*/
private void path() {
path.len = 0; /* reset */
if(skip(";$Path=")){
path.off = off;
if(buf[off] == '"'){
path.len++;
for(off++; off < count;){
path.len++;
if(buf[off++]=='"')
if(buf[off-2]!='\\'){
break;
}
}
path.len-=2; /* remove " */
path.off++; /* remove " */
}else{
while(off < count){
if(terminal(buf[off]))
break;
path.len++;
off++;
}
}
}
}
/**
* Initializes the domain token and extracts the <code>$Domain</code>
* of this <code>Cookie</code>. The offset and length of the domain
* will be saved in the path token. This will read all characters up
* to but excluding the first terminal <code>char</code> encountered
* from the off within the buffer, or if the value is a literal it
* will read a literal from the buffer (literal is any data between
* quotes except if the quote is prefixed with a backward slash
* character, that is '\').
* <p>
* This reads the terminal before the <code>$Domain</code> so that
* if there is no <code>$Domain</code> for the <code>Cookie</code>
* then the character before it will not be read needlessly.
*/
private void domain(){
domain.len = 0; /* reset */
if(skip(";$Domain=")) {
domain.off = off;
if(buf[off] == '"'){
domain.len++;
for(off++; off < count;){
domain.len++;
if(buf[off++]=='"')
if(buf[off-2]!='\\'){
break;
}
}
domain.len-=2; /* remove " */
domain.off++; /* remove " */
}else{
while(off < count){
if(terminal(buf[off]))
break;
domain.len++;
off++;
}
}
}
}
/**
* This extracts the <code>$Version</code> of this <code>Cookie</code>.
* The version is parsed and converted into a decimal int from the digit
* characters that make up a version.
* <p>
* This will read all digit <code>char</code>'s up to but excluding the
* first non digit <code>char</code> that it encounters from the offset
* within the buffer, or if the value is a literal it will read a literal
* from the buffer (literal is any data between quotes except if the quote
* is prefixed with a backward slash character i.e. '\').
*/
private void version(){
if(skip("$Version=")) {
if(buf[off] == '"'){
off++;
}
while(off < count){
if(!digit(buf[off])){
break;
}
version *= 10;
version += buf[off];
version -= '0';
off++;
}
if(buf[off] == '"'){
off++;
}
}else{
version = 1;
}
}
/**
* This is used to determine if a given iso8859-1 character is
* a terminal character. That is either the ';' or ','
* characters. Although the RFC 2109 says the terminal can be
* either a comma, it is not used by any browsers.
*
* @param ch the character that is to be compared
*
* @return true if this is a semicolon character
*/
private boolean terminal(char ch) {
return ch == ';';
}
/**
* This is used to represent an <code>Iterator</code> that will
* iterate over the available cookies within the provided source
* text. This allows the cookie parser to be used as an iterable
* with for each loops. Cookies can not be removed with this.
*/
private class Sequence implements Iterator<Cookie> {
/**
* Extracts the next <code>Cookie</code> object from the string
* given. This will return <code>null</code> when there are no
* more cookies left in the <code>String</code> being parsed.
* <p>
* To find out when there are no more cookies left use the
* <code>hasNext</code> method. This will only set the name,
* value, path, domain name version of the <code>cookie</code>
* because as of RFC 2109 these are the only attributes a
* <code>Cookie</code> may have, the path and domain are
* optional.
*
* @return an initialized <code>Cookie</code> object
*/
public Cookie next(){
if(!hasNext()) {
return null;
}
parsed = false;
return getCookie();
}
/**
* Determine whether or not there are any <code>Cookie</code>s
* left in the <code>String</code>. This will attempt to extract
* another <code>Cookie</code> from the <code>String</code> and
* cache the result so the <code>next</code> method will produce
* this <code>Cookie</code>. If another <code>Cookie</code> cannot
* be parsed from the remainder of the <code>String</code> then
* this will return <code>false</code> otherwise it will return
* <code>true</code>.
*
* @return true if there are more cookies false otherwise
*/
public boolean hasNext(){
if(finished) {
return false;
}
if(parsed) {
return true;
}
parse();
if(name.len <=0){
finished = true;
return false;
}
return true;
}
/**
* This method is used to remove items from the iterator. This
* however performs no action as the act of parsing should not
* modify the underlying source text value so that it can be
* reset with the <code>reset</code> method and used again.
*/
public void remove() {
return;
}
}
/**
* This is a token object that is used to store the offset and
* length of a region of chars in the <code>CookieParser.buf</code>
* array. The <code>toString</code> method of this token will
* produce the <code>String</code> value of the region it
* represents.
*/
private class Token {
/**
* The numer of characters that were consumed by this token.
*/
public int len;
/**
* The offset within the buffer that this token starts from.
*/
public int off;
/**
* This converts region within the buffer to a <code>String</code>.
* This converts the region only if there is a sufficient length.
*
* @return the <code>String</code> value of the region
*/
public String toString(){
return new String(buf,off,len);
}
}
}