/* * ListParser.java September 2003 * * Copyright (C) 2003, Niall Gallagher <niallg@users.sf.net> * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or * implied. See the License for the specific language governing * permissions and limitations under the License. */ package org.simpleframework.http.parse; import static java.lang.Long.MAX_VALUE; import java.util.ArrayList; import java.util.List; import java.util.PriorityQueue; import org.simpleframework.util.parse.Parser; /** * The <code>ListParser</code> is used to extract a comma separated list of HTTP * header values. This will extract values without any leading or trailing * spaces, which enables the values to be used. Listing the values that appear * in the header also requires that the values are ordered. This orders the * values using the values that appear with any quality parameter associated * with it. The quality value is a special parameter that often found in a comma * separated value list to specify the client preference. * * <pre> * * image/gif, image/jpeg, text/html * image/gif;q=1.0, image/jpeg;q=0.8, image/png; q=1.0,*;q=0.1 * gzip;q=1.0, identity; q=0.5, *;q=0 * * </pre> * * The above lists taken from RFC 2616 provides an example of the common form * comma separated values take. The first illustrates a simple comma delimited * list, here the ordering of values is determined from left to right. The * second and third list have quality values associated with them, these are * used to specify a preference and thus order. * <p> * Each value within a list has an implicit quality value of 1.0. If the value * is explicitly set with a the "q" parameter, then the values can range from * 1.0 to 0.001. This parser ensures that the order of values returned from the * <code>list</code> method adheres to the optional quality parameters and * ensures that the quality parameters a removed from the resulting text. * * @author Niall Gallagher */ public abstract class ListParser<T> extends Parser { /** * Provides a quick means of sorting the values extracted. */ private PriorityQueue<Entry> order; /** * Contains all the values extracted from the header(s). */ private List<T> list; /** * This is used as a working space to parse the value. */ private char[] text; /** * The quality associated with an individual value. */ private long qvalue; /** * Used to index into the write offset for the value. */ private int pos; /** * This is used to determine whether to gather tokens. */ private boolean build; /** * Constructor for the <code>ListParser</code>. This creates a parser with * no initial parse data, if there are headers to be parsed then the * <code>parse(String)</code> method or <code>parse(List)</code> method can * be used. This will parse a delimited list according so RFC 2616 section * 4.2. */ public ListParser() { this.order = new PriorityQueue<Entry>(); this.list = new ArrayList<T>(); this.text = new char[0]; } /** * Constructor for the <code>ListParser</code>. This creates a parser with * the text supplied. This will parse the comma separated list according to * RFC 2616 section 2.1 and 4.2. The tokens can be extracted using the * <code>list</code> method, which will also sort and trim the tokens. * * @param text * this is the comma separated list to be parsed */ public ListParser(String text) { this(); this.parse(text); } /** * Constructor for the <code>ListParser</code>. This creates a parser with * the text supplied. This will parse the comma separated list according to * RFC 2616 section 2.1 and 4.2. The tokens can be extracted using the * <code>list</code> method, which will also sort and trim the tokens. * * @param list * a list of comma separated lists to be parsed */ public ListParser(List<String> list) { this(); this.parse(list); } /** * This allows multiple header values to be represented as one single comma * separated list. RFC 2616 states that multiple message header fields with * the same field name may be present in a message if and only if the entire * field value for that header field is defined as a comma separated list. * This means that if there are multiple header values with the same name * they can be combined into a single comma separated list. * * @param list * this is a list of header values to be combined */ public void parse(List<String> list) { for (String value : list) { this.parse(value); this.build = true; } this.build = false; } /** * This will build an ordered list of values extracted from the comma * separated header value. This enables the most preferred token, to be * taken from the first index of the array and the least preferred token to * be taken from the last index. * * @return tokens parsed from the list ordered by preference */ public List<T> list() { return this.list; } /** * This is used to remove the <code>String</code> tokens from the priority * queue and place those tokens in an array. The The <code>String</code> * tokens are placed into the array in an ordered manner so that the most * preferred token is inserted into the start of the list. */ private void build() { while (!this.order.isEmpty()) { Entry entry = this.order.remove(); T value = entry.getValue(); this.list.add(value); } } /** * This ensures that tokens are taken from the comma separated list as long * as there bytes left to be examined within the source text. This also * makes sure that the implicit qvalue is decreased each time a token is * extracted from the list. */ @Override protected void parse() { while (this.off < this.count) { this.clear(); this.value(); this.save(); } this.build(); } /** * Initializes the parser so that tokens can be extracted from the list. * This creates a write buffer so that a if there is only one token as long * as the source text, then that token can be accommodated, also this starts * of the initial qvalue implicit to tokens within the list as the maximum * long value. * <p> * One thing that should be noted is that this will not empty the priority * queue on each string parsed. This ensures that if there are multiple * strings they can be parsed quickly and also contribute to the final * result. */ @Override protected void init() { if (this.text.length < this.count) { this.text = new char[this.count]; } if (!this.build) { this.list.clear(); } this.pos = this.off = 0; this.order.clear(); } /** * This is used to return the parser to a semi-initialized state. After * extracting a token from the list the buffer will have accumulated bytes, * this ensures that bytes previously written to the buffer do not interfere * with the next token extracted. * <p> * This also ensures the implicit qvalue is reset to the maximum long value, * so that the next token parsed without a qvalue will have the highest * priority and be placed at the top of the list. This ensures order is * always maintained. */ private void clear() { this.qvalue = MAX_VALUE; this.pos = 0; } /** * This method will extract a token from a comma separated list and write it * to a buffer. This performs the extraction in such a way that it can * tolerate literals, parameters, and quality value parameters. The only * alterations made to the token by this method is the removal of quality * values, that is, qvalue parameters which have the name "q". Below is an * example of some of the lists that this can parse. * * <pre> * * token; quantity=1;q=0.001, token; text="a, b, c, d";q=0 * image/gif, , image/jpeg, image/png;q=0.8, * * token="\"a, b, c, d\", a, b, c, d", token="a";q=0.9,, * * </pre> * * This will only interpret a comma delimiter outside quotes of a literal. * So if there are comma separated tokens that have quoted strings, then * commas within those quoted strings will not upset the extraction of the * token. Also escaped strings are tolerated according to RFC 2616 section * 2. */ private void value() { parse: while (this.off < this.count) { if (this.buf[this.off++] == '"') { /* "[t]ext" */ this.text[this.pos++] = this.buf[this.off - 1]; /* ["]text" */ while (++this.off < this.count) { /* "text"[] */ if (this.buf[this.off - 1] == '"') { /* "text["] */ if (this.buf[this.off - 2] != '\\') { break; } } this.text[this.pos++] = this.buf[this.off - 1]; /* "tex[t]" */ } } else if (this.buf[this.off - 1] == ';') { /* [;] q=0.1 */ for (int seek = this.off; (seek + 1) < this.count;) {/* * ;[ * ]q=0.1 */ if (!this.space(this.buf[seek])) { /* ;[ ]q=0.1 */ if (this.buf[seek] == 'q') { /* ; [q]=0.1 */ if (this.buf[seek + 1] == '=') { /* ; q[=]0.1 */ this.off = seek; this.qvalue(); continue parse; } } break; } seek++; } } if (this.buf[this.off - 1] == ',') { break; } this.text[this.pos++] = this.buf[this.off - 1]; } } /** * This method will trim whitespace from the extracted token and store that * token within the <code>PriorityQueue</code>. This ensures that the tokens * parsed from the comma separated list can be used. Trimming the whitespace * is something that will be done to the tokens so that they can be * examined, so this ensures that the overhead of the * <code>String.trim</code> method is not required to remove trailing or * leading spaces. This also ensures that empty tokens are not saved. */ private void save() { int size = this.pos; int start = 0; while (size > 0) { if (!this.space(this.text[size - 1])) { break; } size--; } while (start < this.pos) { if (this.space(this.text[start])) { start++; size--; } else { break; } } if (size > 0) { T value = this.create(this.text, start, size); if (value != null) { this.save(value); } } } /** * This stores the string in the <code>PriorityQueue</code>. If the qvalue * extracted from the header value is less that 0.001 then this will not * store the token. This ensures that client applications can specify tokens * that are unacceptable to it. * * @param value * this is the token to be enqueued into the queue */ private void save(T value) { int size = this.order.size(); if (this.qvalue > 0) { this.order.offer(new Entry(value, this.qvalue, size)); } } /** * This is used to extract the qvalue parameter from the header. The qvalue * parameter is identified by a parameter with the name "q" and a numeric * floating point number. The number can be in the range of 0.000 to 1.000. * The <code>qvalue</code> is parsed byte bit shifting a byte in to a value * in to a long, this may cause problems with varying accuracy. */ private void qvalue() { if (this.skip("q=")) { char digit = 0; for (this.qvalue = 0; this.off < this.count;) { if (this.buf[this.off] == '.') { this.off++; continue; } if (!this.digit(this.buf[this.off])) { break; } digit = this.buf[this.off]; digit -= '0'; this.qvalue |= digit; this.qvalue <<= 4; this.off++; } } } /** * This creates an value object using the range of characters that have been * parsed as an item within the list of values. It is up to the * implementation to create a value to insert in to the list. A null value * will be ignored if returned. * * @param text * this is the text buffer to acquire the value from * @param start * the offset within the array to take characters * @param len * this is the number of characters within the token */ protected abstract T create(char[] text, int start, int len); /** * The <code>Entry</code> object provides a comparable object to insert in * to a priority queue. This will sort the value using the quality value * parameter parsed from the list. If there are values with the same quality * value this this will sort the values by a secondary order parameter. */ private class Entry implements Comparable<Entry> { /** * This is the value that is represented by this entry. */ private final T value; /** * This is the priority value that is used to sort entries. */ private final long priority; /** * This is the secondary order value used to sort entries. */ private final int order; /** * Constructor for the <code>Entry</code> object. This is used to create * a comparable value that can be inserted in to a priority queue and * extracted in order of the priority value. * * @param value * this is the value that is represented by this * @param priority * this is the priority value for sorting * @param order * this is the secondary priority value used */ public Entry(T value, long priority, int order) { this.priority = priority; this.order = order; this.value = value; } /** * This acquires the value represented by this entry. This is can be * used to place the value within a list as it is taken from the * priority queue. Acquiring the values in this way facilitates a * priority ordered list of values. * * @return this returns the value represented by this */ public T getValue() { return this.value; } /** * This is used to sort the entries within the priority queue using the * provided priority of specified. If the entries have the same priority * value then they are sorted using a secondary order value, which is * the insertion index. * * @param entry * this is the entry to be compared to * * @return this returns the result of the entry comparison */ @Override public int compareTo(Entry entry) { long value = entry.priority - this.priority; if (value > 0) return 1; if (value < 0) return -1; return this.order - entry.order; } } }