CookieParser.java example

Explorer
divconq-master
/* ************************************************************************
#
#  DivConq
#
#  http://divconq.com/
#
#  Copyright:
#    Copyright 2014 eTimeline, LLC. All rights reserved.
#
#  License:
#    See the license.txt file in the project's top-level directory for details.
#
#  Authors:
#    * Andy White
#
************************************************************************ */

/*
 * CookieParser.java February 2001
 *
 * Copyright (C) 2001, Niall Gallagher <niallg@users.sf.net>
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 
 * implied. See the License for the specific language governing 
 * permissions and limitations under the License.
 */

package divconq.www.http.parse;

import divconq.www.http.Cookie;
import divconq.www.util.parse.Parser;

import java.util.Iterator;

/**
 * CookieParser is used to parse the cookie header. The cookie header is
 * one of the headers that is used by the HTTP state management mechanism.
 * The Cookie header is the header that is sent from the client to the
 * server in response to a Set-Cookie header. The syntax of the Cookie
 * header as taken from RFC 2109, HTTP State Management Mechanism.
 * <pre>
 *
 *  cookie          =       "Cookie:" cookie-version
 *                          1*((";" | ",") cookie-value)
 *  cookie-value    =       NAME "=" VALUE [";" path] [";" domain]
 *  cookie-version  =       "$Version" "=" value
 *  NAME            =       attr
 *  VALUE           =       value
 *  path            =       "$Path" "=" value
 *  domain          =       "$Domain" "=" value
 *
 * </pre>
 * The cookie header may consist of several cookies. Each cookie can be
 * extracted from the header by examining the it syntax of the cookie
 * header. The syntax of the cookie header is defined in RFC 2109.
 * <p>
 * Each cookie has a <code>$Version</code> attribute followed by multiple
 * cookies. Each contains a name and a value, followed  by an optional
 * <code>$Path</code> and <code>$Domain</code> attribute. This will parse
 * a given cookie header and return each cookie extracted as a
 * <code>Cookie</code> object.
 *
 * @author Niall Gallagher
 */
public class CookieParser extends Parser implements Iterable<Cookie> {

   /**
    * Determines when the <code>Parser</code> has finished.
    */
   private boolean finished;

   /**
    * Used so the <code>Parser</code> does not parse twice.
    */
   private boolean parsed;

   /**
    * Version of the <code>Cookie</code> being parsed.
    */
   private int version;

   /**
    * Used to store the name of the <code>Cookie</code>.
    */
   private Token name;

   /**
    * Used to store the value of the <code>Cookie</code>.
    */
   private Token value;

   /**
    * Used to store the <code>$Path</code> values.
    */
   private Token path;

   /**
    * Used to store the <code>$Domain</code> values.
    */
   private Token domain;

   /**
    * Create a <code>CookieParser</code> that contains no cookies.
    * the instance will return <code>false</code> for the
    * <code>hasNext</code> method. cookies may be parsed using
    * this instance by using the <code>parse</code> method.
    */
   public CookieParser(){
      this.path = new Token();
      this.domain = new Token();
      this.name = new Token();
      this.value = new Token();
      this.finished = true;
   }

   /**
    * This is primarily a convineance constructor. This will parse the
    * <code>String</code> given to extract the cookies. This could be
    * achived by calling the default no-arg constructor and then using
    * the instance to invoke the <code>parse</code> method on that
    * <code>String</code>.
    *
    * @param header a <code>String</code> containing a cookie value
    */
   public CookieParser(String header){
      this();
      parse(header);
   }

   /**
    * Resets the cookie and the buffer variables for this
    * <code>CookieParser</code>. It is used to set the
    * state of the parser to start parsing a new cookie.
    */
   protected void init() {
      finished = false;
      parsed =false;
      version = 0;
      off = 0;
      version();
   }

   /**
    * This will extract the next <code>Cookie</code> from the
    * buffer. If all the characters in the buffer have already
    * been examined then this method will simply do nothing.
    * Otherwise this will parse the remainder of the buffer
    * and (if it follows RFC 2109) produce a <code>Cookie</code>.
    */
   protected void parse() {
      if(!finished){
         cookie();
         parsed=true;
      }
   }

   /**
    * This is used to skip an arbitrary <code>String</code> within the
    * <code>char</code> buf. It checks the length of the <code>String</code>
    * first to ensure that it will not go out of bounds. A comparison
    * is then made with the buffers contents and the <code>String</code>
    * if the reigon in the buffer matched the <code>String</code> then the
    * offset within the buffer is increased by the <code>String</code>'s
    * length so that it has effectively skipped it.
    * <p>
    * This <code>skip</code> method will ignore all of the whitespace text.
    * This will also skip trailing spaces within the the input text and
    * all spaces within the source text. For example if the input was
    * the string "s omete xt" and the source was "some text to skip" then
    * the result of a skip ignoring spaces would be "to skip" in the
    * source string, as the trailing spaces are also eaten by this.
    *
    * @param text this is the <code>String</code> value to be skipped
    *
    * @return true if the <code>String</code> was skipped
    */
   protected boolean skip(String text){      
      int size = text.length();
      int seek = off;
      int read = 0;

      if(off + size > count){
         return false;
      }
      while(read < size) {
         char a = text.charAt(read);
         char b = buf[seek];

         if(space(b)){
            if(++seek >= count){
               return false;
            }
         }else if(space(a)){
            if(++read >= size) {
               continue;
            }
         }else {
            if(toLower(a) != toLower(b)){
               return false;
            }
            read++;
            seek++;
         }
      }
      for(off = seek; off < count; off++){
         if(!space(buf[off]))
            break;
      }
      return true;
   }
   
   /**
    * This is used to acquire the cookie values from the provided 
    * the provided source text. This allows the cookie parser to be
    * used within a for each loop to parse out the values of a
    * cookie one by one so that they may be used or stored.
    * 
    * @return this returns an iterator for extracting cookie value
    */
   public Iterator<Cookie> iterator() {
      return new Sequence();
   }

   /**
    * This is used so that the collection of <code>Cookies</code>
    * can be reiterated. This allows the collection to be reused.
    * The <code>reset</code> method will invoke the super classes
    * <code>init</code> method. This will reinitialize this
    * <code>Parser</code> so the cookie will be reparsed.
    */
   public void reset() {
      init();
      parse();
   }

   /**
    * Creates the <code>Cookie</code> from the token objects. It is
    * assumed that the <code>Cookie</code> <code>String</code> has
    * been parsed when this is called. This should only be used after
    * the <code>parse</code> method has been called.
    * <p>
    * If there is no <code>$Domain</code> or <code>$Path</code>
    * within  the <code>Cookie</code> <code>String</code> then the
    * <code>getDomain</code> and <code>getPath</code> are null.
    *
    * @return the <code>Cookie</code> that was just parsed
    */
   private Cookie getCookie() {
      return getCookie(name.toString(),
         value.toString());
   }

   /**
    * Creates the <code>Cookie</code> from the token objects. It is
    * assumed that the <code>Cookie</code> <code>String</code> has
    * been parsed when this is called. This should only be used after
    * the <code>parse</code> method has been called.
    * <p>
    * If there is no <code>$Domain</code> or <code>$Path</code>
    * within  the <code>Cookie</code> <code>String</code> then the
    * <code>getDomain</code> and <code>getPath</code> are null.
    *
    * @param name the name that the <code>Cookie</code> contains
    * @param value the value that the <code>Cookie</code> contains
    *
    * @return the <code>Cookie</code> that was just parsed
    */
   private Cookie getCookie(String name, String value) {
      Cookie cookie = new Cookie(name, value, false);
      
      if(domain.len > 0) {
         cookie.setDomain(domain.toString());
      }
      if(path.len > 0) {
         cookie.setPath(path.toString());
      }
      cookie.setVersion(version);
      return cookie;
   }

   /**
    * This is used to parse a <code>Cookie</code> from the buffer
    * that contains the <code>Cookie</code> values. This will first
    * try to remove any trailing value after the version/prev
    * <code>Cookie</code> once this is removed it will extract the
    * name/value pair from the <code>Cookie</code>. The name and
    * value of the <code>Cookie</code> will be saved by the name
    * and value tokens.
    */
   private void cookie(){
      if(!skip(",")){ /* ,|; */
         skip(";");
      }
      name();
      skip("="); /* = */
      value();
   }

   /**
    * This initializes the name token and extracts the name of this
    * <code>Cookie</code>. The offset and length of the name will be
    * saved in the name token. This will read all <code>char</code>'s
    * upto but excluding the first '=' <code>char</code> encountered
    * from the <code>off</code> within the buffer.
    */
   private void name() {
      name.off = off;
      name.len = 0;
      while(off < count){
         if(buf[off] == '='){
            break;
         }
         name.len++;
         off++;
      }
   }

   /**
    * Used to extract everything found after the <code>NAME '='</code>
    * within a <code>Cookie</code>. This extracts the <code>Cookie</code>
    * value the <code>$Path</code> and <code>$Domain</code> attributes
    * if they exist (i.e. <code>$Path</code> and <code>$Domain</code>
    * are optional in a cookie see RFC 2109).
    * <p>
    * The path method reads the terminal found before it as does the
    * <code>domain</code> method that is ";$Path" is read as the first
    * part of the path method. This is because if there is no path the
    * parser should not read data it does not know belongs to a specific
    * part of the <code>Cookie</code>.
    */
   private void value() {
      data();
      path();
      domain();
   }

   /**
    * This initializes the value token and extracts the value of this
    * <code>Cookie</code>. The offset and length of the value will be
    * saved in the value token. This will read all <code>char</code>'s
    * upto but excluding the first terminal char encountered from the
    * off within the buffer, or if the value is a literal it will read
    * a literal from the buffer (literal is any data between quotes
    * except if the quote is prefixed with a backward slash character
    * that is '\').
    */
   private void data() {
      value.off = off;
      value.len = 0;
      if(off < count && buf[off] == '"'){
         value.len++;
         for(off++; off < count;){
            value.len++;
            if(buf[off++]=='"')
               if(buf[off-2]!='\\'){
                  break;
               }
         }
         value.len-=2;  /* remove " */
         value.off++; /* remove " */
      }else {
         while(off < count){
            if(terminal(buf[off]))
               break;
            value.len++;
            off++;
         }
      }
   }

   /**
    * This initializes the path token and extracts the <code>$Path</code>
    * of this <code>Cookie</code>. The offset and length of the path will
    * be saved in the path token. This will read all <code>char</code>'s
    * up to but excluding the first terminal <code>char</code> encountered
    * from the <code>off</code> within the buffer, or if the value is a
    * literal it will read a literal from the buffer (literal is any data
    * between quotes except if the quote is prefixed with a backward slash
    * character, that is '\').
    * <p>
    * This reads the terminal before the <code>$Path</code> so that if
    * there is no <code>$Path</code> for the <code>Cookie</code> then
    * the character before it will not be read needlessly.
    */
   private void path() {
      path.len = 0; /* reset */
      if(skip(";$Path=")){
         path.off = off;
         if(buf[off] == '"'){
            path.len++;
            for(off++; off < count;){
               path.len++;
               if(buf[off++]=='"')
                  if(buf[off-2]!='\\'){
                     break;
                  }
            }
            path.len-=2;  /* remove " */
            path.off++; /* remove " */
         }else{
            while(off < count){
               if(terminal(buf[off]))
                  break;
               path.len++;
               off++;
            }
         }
      }
   }

   /**
    * Initializes the domain token and extracts the <code>$Domain</code>
    * of this <code>Cookie</code>. The offset and length of the domain
    * will be saved in the path token. This will read all characters up
    * to but excluding the first terminal <code>char</code> encountered
    * from the off within the buffer, or  if the value is a literal it
    * will read a literal from the buffer (literal is any data between
    * quotes except if the quote is prefixed with a backward slash
    * character, that is '\').
    * <p>
    * This reads the terminal before the <code>$Domain</code> so that
    * if there is  no <code>$Domain</code> for the <code>Cookie</code>
    * then the character before it will not be read needlessly.
    */
   private void domain(){
      domain.len = 0;   /* reset */
      if(skip(";$Domain=")) {
         domain.off = off;
         if(buf[off] == '"'){
            domain.len++;
            for(off++; off < count;){
               domain.len++;
               if(buf[off++]=='"')
                  if(buf[off-2]!='\\'){
                     break;
                  }
            }
            domain.len-=2;  /* remove " */
            domain.off++; /* remove " */
         }else{
            while(off < count){
               if(terminal(buf[off]))
                  break;
               domain.len++;
               off++;
            }
         }
      }
   }

   /**
    * This extracts the <code>$Version</code> of this <code>Cookie</code>.
    * The version is parsed and converted into a decimal int from the digit
    * characters that make up a version.
    * <p>
    * This will read all digit <code>char</code>'s up to but excluding the
    * first non digit <code>char</code> that it encounters from the offset
    * within the buffer, or if the value is a literal it will read a literal
    * from the buffer (literal is any data between quotes except if the quote
    * is prefixed with a backward slash character i.e. '\').
    */
   private void version(){
      if(skip("$Version=")) {
         if(buf[off] == '"'){
            off++;
         }
         while(off < count){
            if(!digit(buf[off])){
               break;
            }
            version *= 10;
            version += buf[off];
            version -= '0';
            off++;
         }
         if(buf[off] == '"'){
            off++;
         }
      }else{
         version = 1;
      }
   }

   /**
    * This is used to determine if a given  iso8859-1 character is
    * a terminal character. That is either the ';' or ','
    * characters. Although the RFC 2109 says the terminal can be
    * either a comma, it is not used by any browsers.
    *
    * @param ch the character that is to be compared
    *
    * @return true if this is a semicolon character
    */
   private boolean terminal(char ch) {
      return ch == ';'; 
   }

   /**
    * This is used to represent an <code>Iterator</code> that will 
    * iterate over the available cookies within the provided source
    * text. This allows the cookie parser to be used as an iterable
    * with for each loops. Cookies can not be removed with this.
    */
   private class Sequence implements Iterator<Cookie> {
      
      /**
       * Extracts the next <code>Cookie</code> object from the string
       * given. This will return <code>null</code> when there are no
       * more cookies left in the <code>String</code> being parsed.
       * <p>
       * To find out when there are no more cookies left use the
       * <code>hasNext</code> method. This will only set the name,
       * value, path, domain name version of the <code>cookie</code>
       * because as of RFC 2109 these are the only attributes a
       * <code>Cookie</code> may have, the path and domain are
       * optional.
       *
       * @return an initialized <code>Cookie</code> object
       */
      public Cookie next(){
         if(!hasNext()) {
            return null;
         }
         parsed = false;
         return getCookie();
      }
      

      /**
       * Determine whether or not there are any <code>Cookie</code>s
       * left in the <code>String</code>. This will attempt to extract
       * another <code>Cookie</code> from the <code>String</code> and
       * cache the result so the <code>next</code> method will produce
       * this <code>Cookie</code>. If another <code>Cookie</code> cannot
       * be parsed from the remainder of the <code>String</code> then
       * this will return <code>false</code> otherwise it will return
       * <code>true</code>.
       *
       * @return true if there are more cookies false otherwise
       */
      public boolean hasNext(){
         if(finished) {
            return false;
         }
         if(parsed) {
            return true;
         }
         parse();
         
         if(name.len <=0){
            finished = true;
            return false;
         }
         return true;

      }
      
      /**
       * This method is used to remove items from the iterator. This
       * however performs no action as the act of parsing should not
       * modify the underlying source text value so that it can be 
       * reset with the <code>reset</code> method and used again.
       */
      public void remove() {
         return;
      }
   }
   
   /**
    * This is a token object that is used to store the offset and
    * length of a region of chars in the <code>CookieParser.buf</code>
    * array. The <code>toString</code> method of this token will
    * produce the <code>String</code> value of the region it
    * represents.
    */
   private class Token {

      /**
       * The numer of characters that were consumed by this token.
       */
      public int len;

      /**
       * The offset within the buffer that this token starts from.
       */
      public int off;

      /**
       * This converts region within the buffer to a <code>String</code>.
       * This converts the region only if there is a sufficient length.
       *
       * @return the <code>String</code> value of the region
       */
      public String toString(){
         return new String(buf,off,len);
      }
   }
}