/*
* SegmentConsumer.java February 2007
*
* Copyright (C) 2001, Niall Gallagher <niallg@users.sf.net>
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
* implied. See the License for the specific language governing
* permissions and limitations under the License.
*/
package org.simpleframework.http.message;
import static org.simpleframework.http.Protocol.ACCEPT_LANGUAGE;
import static org.simpleframework.http.Protocol.CONTENT_DISPOSITION;
import static org.simpleframework.http.Protocol.CONTENT_LENGTH;
import static org.simpleframework.http.Protocol.CONTENT_TYPE;
import static org.simpleframework.http.Protocol.COOKIE;
import static org.simpleframework.http.Protocol.EXPECT;
import static org.simpleframework.http.Protocol.TRANSFER_ENCODING;
import java.io.IOException;
import java.util.Collections;
import java.util.List;
import java.util.Locale;
import org.simpleframework.http.ContentDisposition;
import org.simpleframework.http.ContentType;
import org.simpleframework.http.Cookie;
import org.simpleframework.http.parse.ContentDispositionParser;
import org.simpleframework.http.parse.ContentTypeParser;
import org.simpleframework.http.parse.CookieParser;
import org.simpleframework.http.parse.LanguageParser;
/**
* The <code>SegmentConsumer</code> object provides a consumer that is used to
* consume a HTTP header. This will read all headers within a HTTP header
* message until the carriage return line feed empty line is encountered. Once
* all headers are consumed they are available using the case insensitive header
* name. This will remove leading and trailing whitespace from the names and
* values parsed.
*
* @author Niall Gallagher
*/
public class SegmentConsumer extends ArrayConsumer implements Segment {
/**
* This is the terminal carriage return and line feed end line.
*/
private static final byte[] TERMINAL = { 13, 10, 13, 10 };
/**
* This is used to represent the content disposition header.
*/
protected ContentDisposition disposition;
/**
* This is used to parse the languages accepted in the request.
*/
protected LanguageParser language;
/**
* This is used to parse the cookie headers that are consumed.
*/
protected CookieParser cookies;
/**
* This is used to parse the content type header consumed.
*/
protected ContentType type;
/**
* This represents the transfer encoding value of the body.
*/
protected String encoding;
/**
* This is used to store all consumed headers by the header name.
*/
protected MessageHeader header;
/**
* During parsing this is used to store the parsed header name,
*/
protected String name;
/**
* During parsing this is used to store the parsed header value.
*/
protected String value;
/**
* This is used to determine if there is a continue expected.
*/
protected boolean expect;
/**
* Represents the length of the body from the content length.
*/
protected long length;
/**
* This represents the length limit of the HTTP header cosumed.
*/
protected long limit;
/**
* This is used to track the read offset within the header.
*/
protected int pos;
/**
* This is used to track how much of the terminal is read.
*/
protected int scan;
/**
* Constructor for the <code>SegmentConsumer</code> object. This is used to
* create a segment consumer used to consume and parse a HTTP message
* header. This delegates parsing of headers if they represent special
* headers, like content type or cookies.
*/
public SegmentConsumer() {
this(1048576);
}
/**
* Constructor for the <code>SegmentConsumer</code> object. This is used to
* create a segment consumer used to consume and parse a HTTP message
* header. This delegates parsing of headers if they represent special
* headers, like content type or cookies.
*
* @param limit
* this is the length limit for a HTTP header
*/
public SegmentConsumer(int limit) {
this.language = new LanguageParser();
this.cookies = new CookieParser();
this.header = new MessageHeader();
this.limit = limit;
this.length = -1;
}
/**
* This method is used to determine the type of a part. Typically a part is
* either a text parameter or a file. If this is true then the content
* represented by the associated part is a file.
*
* @return this returns true if the associated part is a file
*/
@Override
public boolean isFile() {
if (this.disposition == null) return false;
return this.disposition.isFile();
}
/**
* This method is used to acquire the name of the part. Typically this is
* used when the part represents a text parameter rather than a file.
* However, this can also be used with a file part.
*
* @return this returns the name of the associated part
*/
@Override
public String getName() {
if (this.disposition == null) return null;
return this.disposition.getName();
}
/**
* This method is used to acquire the file name of the part. This is used
* when the part represents a text parameter rather than a file. However,
* this can also be used with a file part.
*
* @return this returns the file name of the associated part
*/
@Override
public String getFileName() {
if (this.disposition == null) return null;
return this.disposition.getFileName();
}
/**
* This is a convenience method that can be used to determine the content
* type of the message body. This will determine whether there is a
* <code>Content-Type</code> header, if there is then this will parse that
* header and represent it as a typed object which will expose the various
* parts of the HTTP header.
*
* @return this returns the content type value if it exists
*/
@Override
public ContentType getContentType() {
return this.type;
}
/**
* This is a convenience method that can be used to determine the length of
* the message body. This will determine if there is a
* <code>Content-Length</code> header, if it does then the length can be
* determined, if not then this returns -1.
*
* @return the content length, or -1 if it cannot be determined
*/
@Override
public long getContentLength() {
return this.length;
}
/**
* This is a convenience method that can be used to determine the content
* type of the message body. This will determine whether there is a
* <code>Transfer-Encoding</code> header, if there is then this will parse
* that header and return the first token in the comma separated list of
* values, which is the primary value.
*
* @return this returns the transfer encoding value if it exists
*/
@Override
public String getTransferEncoding() {
return this.encoding;
}
/**
* This is a convenience method that can be used to determine the content
* type of the message body. This will determine whether there is a
* <code>Content-Disposition</code> header, if there is this will parse that
* header and represent it as a typed object which will expose the various
* parts of the HTTP header.
*
* @return this returns the content disposition value if it exists
*/
@Override
public ContentDisposition getDisposition() {
return this.disposition;
}
/**
* This is used to acquire the locales from the request header. The locales
* are provided in the <code>Accept-Language</code> header. This provides an
* indication as to the languages that the client accepts. It provides the
* locales in preference order.
*
* @return this returns the locales preferred by the client
*/
public List<Locale> getLocales() {
if (this.language != null) return this.language.list();
return Collections.emptyList();
}
/**
* This can be used to get the values of HTTP message headers that have the
* specified name. This is a convenience method that will present that
* values as tokens extracted from the header. This has obvious performance
* benefits as it avoids having to deal with <code>substring</code> and
* <code>trim</code> calls.
* <p>
* The tokens returned by this method are ordered according to there HTTP
* quality values, or "q" values, see RFC 2616 section 3.9. This also strips
* out the quality parameter from tokens returned. So "image/html; q=0.9"
* results in "image/html". If there are no "q" values present then order is
* by appearance.
* <p>
* The result from this is either the trimmed header value, that is, the
* header value with no leading or trailing whitespace or an array of
* trimmed tokens ordered with the most preferred in the lower indexes, so
* index 0 is has highest preference.
*
* @param name
* the name of the headers that are to be retrieved
*
* @return ordered array of tokens extracted from the header(s)
*/
@Override
public List<String> getValues(String name) {
return this.header.getValues(name);
}
/**
* This can be used to get the value of the first message header that has
* the specified name. The value provided from this will be trimmed so there
* is no need to modify the value, also if the header name specified refers
* to a comma separated list of values the value returned is the first value
* in that list. This returns null if theres no HTTP message header.
*
* @param name
* the HTTP message header to get the value from
*
* @return this returns the value that the HTTP message header
*/
@Override
public String getValue(String name) {
return this.header.getValue(name);
}
/**
* This is used to determine if the header represents one that requires the
* HTTP/1.1 continue expectation. If the request does require this
* expectation then it should be send the 100 status code which prompts
* delivery of the message body.
*
* @return this returns true if a continue expectation exists
*/
public boolean isExpectContinue() {
return this.expect;
}
/**
* This method is used to add an additional chunk size to the internal
* array. Resizing of the internal array is required as the consumed bytes
* may exceed the initial size of the array. In such a scenario the array is
* expanded the chunk size.
*
* @param size
* this is the minimum size to expand the array to
*/
@Override
protected void resize(int size) throws IOException {
if (size > this.limit)
throw new IOException("Header has exceeded maximum size");
super.resize(size);
}
/**
* This is used to process the headers when the terminal token has been
* fully read from the consumed bytes. Processing will extract all headers
* from the HTTP header message and further parse those values if required.
*/
@Override
protected void process() throws IOException {
this.headers();
}
/**
* This is used to parse the headers from the consumed HTTP header and add
* them to the segment. Once added they are available via the header name in
* a case insensitive manner. If the header has a special value, that is, if
* further information is required it will be extracted and exposed in the
* segment interface.
*/
protected void headers() {
while (this.pos < this.count) {
this.header();
this.add(this.name, this.value);
}
}
/**
* This is used to parse a header from the consumed HTTP message and add
* them to the segment. Once added it is available via the header name in a
* case insensitive manner. If the header has a special value, that is, if
* further information is required it will be extracted and exposed in the
* segment interface.
*/
private void header() {
this.adjust();
this.name();
this.adjust();
this.value();
this.end();
}
/**
* This is used to add the name and value specified as a special header
* within the segment. Special headers are those where there are values of
* interest to the segment. For instance the Content-Length, Content-Type,
* and Cookie headers are parsed using an external parser to extract the
* values.
*
* @param name
* this is the name of the header to be added
* @param value
* this is the value of the header to be added
*/
protected void add(String name, String value) {
if (this.equal(ACCEPT_LANGUAGE, name)) {
this.language(value);
} else if (this.equal(CONTENT_LENGTH, name)) {
this.length(value);
} else if (this.equal(CONTENT_TYPE, name)) {
this.type(value);
} else if (this.equal(CONTENT_DISPOSITION, name)) {
this.disposition(value);
} else if (this.equal(TRANSFER_ENCODING, name)) {
this.encoding(value);
} else if (this.equal(EXPECT, name)) {
this.expect(value);
} else if (this.equal(COOKIE, name)) {
this.cookie(value);
}
this.header.addValue(name, value);
}
/**
* This is used to determine if the expect continue header is present and
* thus there is a requirement to send the continue status before the client
* sends the request body. This will basically assume the expectation is
* always continue.
*
* @param value
* the value in the expect continue header
*/
protected void expect(String value) {
this.expect = true;
}
/**
* This will accept any cookie header and parse it such that all cookies
* within it are converted to <code>Cookie</code> objects and made available
* as typed objects. If the value can not be parsed this will not add the
* cookie value.
*
* @param value
* this is the value of the cookie to be parsed
*/
protected void cookie(String value) {
this.cookies.parse(value);
for (Cookie cookie : this.cookies) {
this.header.setCookie(cookie);
}
}
/**
* This is used to parse the <code>Accept-Language</code> header value. This
* allows the locales the client is interested in to be provided in
* preference order and allows the client do alter and response based on the
* locale the client has provided.
*
* @param value
* this is the value that is to be parsed
*/
protected void language(String value) {
this.language = new LanguageParser(value);
}
/**
* This is used to parse the content type header header so that the MIME
* type is available to the segment. This provides an instance of the
* <code>ContentType</code> object to represent the content type header,
* which exposes the charset value.
*
* @param value
* this is the content type value to parse
*/
protected void type(String value) {
this.type = new ContentTypeParser(value);
}
/**
* This is used to parse the content disposition header header so that the
* MIME type is available to the segment. This provides an instance of the
* <code>Disposition<code> object to represent
* the content disposition, this exposes the upload type.
*
* @param value
* this is the content type value to parse
*/
protected void disposition(String value) {
this.disposition = new ContentDispositionParser(value);
}
/**
* This is used to store the transfer encoding header value. This is used to
* determine the encoding of the body this segment represents. Typically
* this will be the chunked encoding.
*
* @param value
* this is the value representing the encoding
*/
protected void encoding(String value) {
this.encoding = value;
}
/**
* This is used to parse a provided header value for the content length. If
* the string provided is not an integer value this will throw a number
* format exception, by default length is -1.
*
* @param value
* this is the header value of the content length
*/
protected void length(String value) {
try {
this.length = Long.parseLong(value);
} catch (Exception e) {
this.length = -1;
}
}
/**
* This updates the token for the header name. The name is parsed according
* to the presence of a colon ':'. Once a colon character is encountered
* then this header name is considered to be read from the buffer and is
* used to key the value after the colon.
*/
private void name() {
Token token = new Token(this.pos, 0);
while (this.pos < this.count) {
if (this.array[this.pos] == ':') {
this.pos++;
break;
}
token.size++;
this.pos++;
}
this.name = token.text();
}
/**
* This is used to parse the HTTP header value. This will parse it in such a
* way that the line can be folded over several lines see RFC 2616 for the
* syntax of a folded line. The folded line is basically a way to wrap a
* single HTTP header into several lines using a tab at the start of the
* following line to indicate that the header flows onto the next line.
*/
private void value() {
Token token = new Token(this.pos, 0);
scan: for (int mark = 0; this.pos < this.count;) {
if (this.terminal(this.array[this.pos])) { /* CR or LF */
for (int i = 0; this.pos < this.count; i++) {
if (this.array[this.pos++] == 10) { /* skip the LF */
if (this.pos < this.array.length) {
if (this.space(this.array[this.pos])) {
mark += i + 1; /* account for bytes examined */
break; /* folding line */
}
}
break scan; /* not a folding line */
}
}
} else {
if (!this.space(this.array[this.pos])) {
token.size = ++mark;
} else {
mark++;
}
this.pos++;
}
}
this.value = token.text();
}
/**
* This will update the offset variable so that the next read will be of a
* non whitespace character. According to RFC 2616 a white space character
* is a tab or a space. This will remove multiple occurrences of whitespace
* characters until an non-whitespace character is encountered.
*/
protected void adjust() {
while (this.pos < this.count) {
if (!this.space(this.array[this.pos])) {
break;
}
this.pos++;
}
}
/**
* This will update the offset variable so that the next read will be a non
* whitespace character or terminal character. According to RFC 2616 a white
* space character is a tab or a space. This will remove multiple
* occurrences of whitespace characters until an non-whitespace character or
* a non-terminal is encountered. This is basically used to follow through
* to the end of a header line.
*/
protected void end() {
while (this.pos < this.count) {
if (!this.white(this.array[this.pos])) {
break;
}
this.pos++;
}
}
/**
* This method is used to scan for the terminal token. It searches for the
* token and returns the number of bytes in the buffer after the terminal
* token. Returning the excess bytes allows the consumer to reset the bytes
* within the consumer object.
*
* @return this returns the number of excess bytes consumed
*/
@Override
protected int scan() {
int length = this.count;
while (this.pos < this.count) {
if (this.array[this.pos++] != TERMINAL[this.scan++]) {
this.scan = 0;
}
if (this.scan == TERMINAL.length) {
this.done = true;
this.count = this.pos;
this.pos = 0;
return length - this.count;
}
}
return 0;
}
/**
* This is used to determine if two header names are equal, this is done to
* ensure that the case insensitivity of HTTP header names is observed.
* Special headers are processed using this consumer and this is used to
* ensure the correct header is always matched.
*
* @param name
* this is the name to compare the parsed token with
* @param token
* this is the header name token to examine
*
* @return true of the header name token is equal to the name
*/
protected boolean equal(String name, String token) {
return name.equalsIgnoreCase(token);
}
/**
* This identifies a given ISO-8859-1 byte as a space character. A space is
* either a space or a tab character in ISO-8859-1.
*
* @param octet
* the byte to determine whether it is a space
*
* @return true if it is a space character, false otherwise
*/
protected boolean space(byte octet) {
return (octet == ' ') || (octet == '\t');
}
/**
* This determines if an ISO-8859-1 byte is a terminal character. A terminal
* character is a carriage return or a line feed character.
*
* @param octet
* the byte to determine whether it is a terminal
*
* @return true if it is a terminal character, false otherwise
*/
protected boolean terminal(byte octet) {
return (octet == 13) || (octet == 10);
}
/**
* This is used to determine if a given ISO-8859-1 byte is a white space
* character, such as a tab or space or a terminal character, such as a
* carriage return or a new line. If it is, this will return true otherwise
* it returns false.
*
* @param octet
* this is to be checked to see if it is a space
*
* @return true if the byte is a space character, false otherwise
*/
protected boolean white(byte octet) {
switch (octet) {
case ' ':
case '\r':
case '\n':
case '\t':
return true;
default:
return false;
}
}
/**
* This is used to provide a string representation of the header read.
* Providing a string representation of the header is used so that on
* debugging the contents of the delivered header can be inspected in order
* to determine a cause of error.
*
* @return this returns a string representation of the header
*/
@Override
public String toString() {
return new String(this.array, 0, this.count);
}
/**
* This is used to track the boundaries of a token so that it can be
* converted in to a usable string. This will track the length and offset
* within the consumed array of the token. When the token is to be used it
* can be converted in to a string.
*/
private class Token {
/**
* This is used to track the number of bytes within the array.
*/
public int size;
/**
* This is used to mark the start offset within the array.
*/
public int off;
/**
* Constructor for the <code>Token</code> object. This is used to create
* a new token to track the range of bytes that will be used to create a
* string representing the parsed value.
*
* @param off
* the starting offset for the token range
* @param size
* the number of bytes used for the token
*/
public Token(int off, int size) {
this.off = off;
this.size = size;
}
/**
* This is used to convert the byte range to a string. This will use
* UTF-8 encoding for the string which is compatible with the HTTP
* default header encoding of ISO-8859-1.
*
* @return the encoded string representing the token
*/
public String text() {
return this.text("UTF-8");
}
/**
* This is used to convert the byte range to a string. This will use
* specified encoding, if that encoding is not supported then this will
* return null for the token value.
*
* @return the encoded string representing the token
*/
public String text(String charset) {
try {
return new String(SegmentConsumer.this.array, this.off,
this.size, charset);
} catch (IOException e) {
return null;
}
}
}
}