/* * SegmentConsumer.java February 2007 * * Copyright (C) 2001, Niall Gallagher <niallg@users.sf.net> * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or * implied. See the License for the specific language governing * permissions and limitations under the License. */ package org.simpleframework.http.message; import static org.simpleframework.http.Protocol.ACCEPT_LANGUAGE; import static org.simpleframework.http.Protocol.CONTENT_DISPOSITION; import static org.simpleframework.http.Protocol.CONTENT_LENGTH; import static org.simpleframework.http.Protocol.CONTENT_TYPE; import static org.simpleframework.http.Protocol.COOKIE; import static org.simpleframework.http.Protocol.EXPECT; import static org.simpleframework.http.Protocol.TRANSFER_ENCODING; import java.io.IOException; import java.util.Collections; import java.util.List; import java.util.Locale; import org.simpleframework.http.ContentDisposition; import org.simpleframework.http.ContentType; import org.simpleframework.http.Cookie; import org.simpleframework.http.parse.ContentDispositionParser; import org.simpleframework.http.parse.ContentTypeParser; import org.simpleframework.http.parse.CookieParser; import org.simpleframework.http.parse.LanguageParser; /** * The <code>SegmentConsumer</code> object provides a consumer that is used to * consume a HTTP header. This will read all headers within a HTTP header * message until the carriage return line feed empty line is encountered. Once * all headers are consumed they are available using the case insensitive header * name. This will remove leading and trailing whitespace from the names and * values parsed. * * @author Niall Gallagher */ public class SegmentConsumer extends ArrayConsumer implements Segment { /** * This is the terminal carriage return and line feed end line. */ private static final byte[] TERMINAL = { 13, 10, 13, 10 }; /** * This is used to represent the content disposition header. */ protected ContentDisposition disposition; /** * This is used to parse the languages accepted in the request. */ protected LanguageParser language; /** * This is used to parse the cookie headers that are consumed. */ protected CookieParser cookies; /** * This is used to parse the content type header consumed. */ protected ContentType type; /** * This represents the transfer encoding value of the body. */ protected String encoding; /** * This is used to store all consumed headers by the header name. */ protected MessageHeader header; /** * During parsing this is used to store the parsed header name, */ protected String name; /** * During parsing this is used to store the parsed header value. */ protected String value; /** * This is used to determine if there is a continue expected. */ protected boolean expect; /** * Represents the length of the body from the content length. */ protected long length; /** * This represents the length limit of the HTTP header cosumed. */ protected long limit; /** * This is used to track the read offset within the header. */ protected int pos; /** * This is used to track how much of the terminal is read. */ protected int scan; /** * Constructor for the <code>SegmentConsumer</code> object. This is used to * create a segment consumer used to consume and parse a HTTP message * header. This delegates parsing of headers if they represent special * headers, like content type or cookies. */ public SegmentConsumer() { this(1048576); } /** * Constructor for the <code>SegmentConsumer</code> object. This is used to * create a segment consumer used to consume and parse a HTTP message * header. This delegates parsing of headers if they represent special * headers, like content type or cookies. * * @param limit * this is the length limit for a HTTP header */ public SegmentConsumer(int limit) { this.language = new LanguageParser(); this.cookies = new CookieParser(); this.header = new MessageHeader(); this.limit = limit; this.length = -1; } /** * This method is used to determine the type of a part. Typically a part is * either a text parameter or a file. If this is true then the content * represented by the associated part is a file. * * @return this returns true if the associated part is a file */ @Override public boolean isFile() { if (this.disposition == null) return false; return this.disposition.isFile(); } /** * This method is used to acquire the name of the part. Typically this is * used when the part represents a text parameter rather than a file. * However, this can also be used with a file part. * * @return this returns the name of the associated part */ @Override public String getName() { if (this.disposition == null) return null; return this.disposition.getName(); } /** * This method is used to acquire the file name of the part. This is used * when the part represents a text parameter rather than a file. However, * this can also be used with a file part. * * @return this returns the file name of the associated part */ @Override public String getFileName() { if (this.disposition == null) return null; return this.disposition.getFileName(); } /** * This is a convenience method that can be used to determine the content * type of the message body. This will determine whether there is a * <code>Content-Type</code> header, if there is then this will parse that * header and represent it as a typed object which will expose the various * parts of the HTTP header. * * @return this returns the content type value if it exists */ @Override public ContentType getContentType() { return this.type; } /** * This is a convenience method that can be used to determine the length of * the message body. This will determine if there is a * <code>Content-Length</code> header, if it does then the length can be * determined, if not then this returns -1. * * @return the content length, or -1 if it cannot be determined */ @Override public long getContentLength() { return this.length; } /** * This is a convenience method that can be used to determine the content * type of the message body. This will determine whether there is a * <code>Transfer-Encoding</code> header, if there is then this will parse * that header and return the first token in the comma separated list of * values, which is the primary value. * * @return this returns the transfer encoding value if it exists */ @Override public String getTransferEncoding() { return this.encoding; } /** * This is a convenience method that can be used to determine the content * type of the message body. This will determine whether there is a * <code>Content-Disposition</code> header, if there is this will parse that * header and represent it as a typed object which will expose the various * parts of the HTTP header. * * @return this returns the content disposition value if it exists */ @Override public ContentDisposition getDisposition() { return this.disposition; } /** * This is used to acquire the locales from the request header. The locales * are provided in the <code>Accept-Language</code> header. This provides an * indication as to the languages that the client accepts. It provides the * locales in preference order. * * @return this returns the locales preferred by the client */ public List<Locale> getLocales() { if (this.language != null) return this.language.list(); return Collections.emptyList(); } /** * This can be used to get the values of HTTP message headers that have the * specified name. This is a convenience method that will present that * values as tokens extracted from the header. This has obvious performance * benefits as it avoids having to deal with <code>substring</code> and * <code>trim</code> calls. * <p> * The tokens returned by this method are ordered according to there HTTP * quality values, or "q" values, see RFC 2616 section 3.9. This also strips * out the quality parameter from tokens returned. So "image/html; q=0.9" * results in "image/html". If there are no "q" values present then order is * by appearance. * <p> * The result from this is either the trimmed header value, that is, the * header value with no leading or trailing whitespace or an array of * trimmed tokens ordered with the most preferred in the lower indexes, so * index 0 is has highest preference. * * @param name * the name of the headers that are to be retrieved * * @return ordered array of tokens extracted from the header(s) */ @Override public List<String> getValues(String name) { return this.header.getValues(name); } /** * This can be used to get the value of the first message header that has * the specified name. The value provided from this will be trimmed so there * is no need to modify the value, also if the header name specified refers * to a comma separated list of values the value returned is the first value * in that list. This returns null if theres no HTTP message header. * * @param name * the HTTP message header to get the value from * * @return this returns the value that the HTTP message header */ @Override public String getValue(String name) { return this.header.getValue(name); } /** * This is used to determine if the header represents one that requires the * HTTP/1.1 continue expectation. If the request does require this * expectation then it should be send the 100 status code which prompts * delivery of the message body. * * @return this returns true if a continue expectation exists */ public boolean isExpectContinue() { return this.expect; } /** * This method is used to add an additional chunk size to the internal * array. Resizing of the internal array is required as the consumed bytes * may exceed the initial size of the array. In such a scenario the array is * expanded the chunk size. * * @param size * this is the minimum size to expand the array to */ @Override protected void resize(int size) throws IOException { if (size > this.limit) throw new IOException("Header has exceeded maximum size"); super.resize(size); } /** * This is used to process the headers when the terminal token has been * fully read from the consumed bytes. Processing will extract all headers * from the HTTP header message and further parse those values if required. */ @Override protected void process() throws IOException { this.headers(); } /** * This is used to parse the headers from the consumed HTTP header and add * them to the segment. Once added they are available via the header name in * a case insensitive manner. If the header has a special value, that is, if * further information is required it will be extracted and exposed in the * segment interface. */ protected void headers() { while (this.pos < this.count) { this.header(); this.add(this.name, this.value); } } /** * This is used to parse a header from the consumed HTTP message and add * them to the segment. Once added it is available via the header name in a * case insensitive manner. If the header has a special value, that is, if * further information is required it will be extracted and exposed in the * segment interface. */ private void header() { this.adjust(); this.name(); this.adjust(); this.value(); this.end(); } /** * This is used to add the name and value specified as a special header * within the segment. Special headers are those where there are values of * interest to the segment. For instance the Content-Length, Content-Type, * and Cookie headers are parsed using an external parser to extract the * values. * * @param name * this is the name of the header to be added * @param value * this is the value of the header to be added */ protected void add(String name, String value) { if (this.equal(ACCEPT_LANGUAGE, name)) { this.language(value); } else if (this.equal(CONTENT_LENGTH, name)) { this.length(value); } else if (this.equal(CONTENT_TYPE, name)) { this.type(value); } else if (this.equal(CONTENT_DISPOSITION, name)) { this.disposition(value); } else if (this.equal(TRANSFER_ENCODING, name)) { this.encoding(value); } else if (this.equal(EXPECT, name)) { this.expect(value); } else if (this.equal(COOKIE, name)) { this.cookie(value); } this.header.addValue(name, value); } /** * This is used to determine if the expect continue header is present and * thus there is a requirement to send the continue status before the client * sends the request body. This will basically assume the expectation is * always continue. * * @param value * the value in the expect continue header */ protected void expect(String value) { this.expect = true; } /** * This will accept any cookie header and parse it such that all cookies * within it are converted to <code>Cookie</code> objects and made available * as typed objects. If the value can not be parsed this will not add the * cookie value. * * @param value * this is the value of the cookie to be parsed */ protected void cookie(String value) { this.cookies.parse(value); for (Cookie cookie : this.cookies) { this.header.setCookie(cookie); } } /** * This is used to parse the <code>Accept-Language</code> header value. This * allows the locales the client is interested in to be provided in * preference order and allows the client do alter and response based on the * locale the client has provided. * * @param value * this is the value that is to be parsed */ protected void language(String value) { this.language = new LanguageParser(value); } /** * This is used to parse the content type header header so that the MIME * type is available to the segment. This provides an instance of the * <code>ContentType</code> object to represent the content type header, * which exposes the charset value. * * @param value * this is the content type value to parse */ protected void type(String value) { this.type = new ContentTypeParser(value); } /** * This is used to parse the content disposition header header so that the * MIME type is available to the segment. This provides an instance of the * <code>Disposition<code> object to represent * the content disposition, this exposes the upload type. * * @param value * this is the content type value to parse */ protected void disposition(String value) { this.disposition = new ContentDispositionParser(value); } /** * This is used to store the transfer encoding header value. This is used to * determine the encoding of the body this segment represents. Typically * this will be the chunked encoding. * * @param value * this is the value representing the encoding */ protected void encoding(String value) { this.encoding = value; } /** * This is used to parse a provided header value for the content length. If * the string provided is not an integer value this will throw a number * format exception, by default length is -1. * * @param value * this is the header value of the content length */ protected void length(String value) { try { this.length = Long.parseLong(value); } catch (Exception e) { this.length = -1; } } /** * This updates the token for the header name. The name is parsed according * to the presence of a colon ':'. Once a colon character is encountered * then this header name is considered to be read from the buffer and is * used to key the value after the colon. */ private void name() { Token token = new Token(this.pos, 0); while (this.pos < this.count) { if (this.array[this.pos] == ':') { this.pos++; break; } token.size++; this.pos++; } this.name = token.text(); } /** * This is used to parse the HTTP header value. This will parse it in such a * way that the line can be folded over several lines see RFC 2616 for the * syntax of a folded line. The folded line is basically a way to wrap a * single HTTP header into several lines using a tab at the start of the * following line to indicate that the header flows onto the next line. */ private void value() { Token token = new Token(this.pos, 0); scan: for (int mark = 0; this.pos < this.count;) { if (this.terminal(this.array[this.pos])) { /* CR or LF */ for (int i = 0; this.pos < this.count; i++) { if (this.array[this.pos++] == 10) { /* skip the LF */ if (this.pos < this.array.length) { if (this.space(this.array[this.pos])) { mark += i + 1; /* account for bytes examined */ break; /* folding line */ } } break scan; /* not a folding line */ } } } else { if (!this.space(this.array[this.pos])) { token.size = ++mark; } else { mark++; } this.pos++; } } this.value = token.text(); } /** * This will update the offset variable so that the next read will be of a * non whitespace character. According to RFC 2616 a white space character * is a tab or a space. This will remove multiple occurrences of whitespace * characters until an non-whitespace character is encountered. */ protected void adjust() { while (this.pos < this.count) { if (!this.space(this.array[this.pos])) { break; } this.pos++; } } /** * This will update the offset variable so that the next read will be a non * whitespace character or terminal character. According to RFC 2616 a white * space character is a tab or a space. This will remove multiple * occurrences of whitespace characters until an non-whitespace character or * a non-terminal is encountered. This is basically used to follow through * to the end of a header line. */ protected void end() { while (this.pos < this.count) { if (!this.white(this.array[this.pos])) { break; } this.pos++; } } /** * This method is used to scan for the terminal token. It searches for the * token and returns the number of bytes in the buffer after the terminal * token. Returning the excess bytes allows the consumer to reset the bytes * within the consumer object. * * @return this returns the number of excess bytes consumed */ @Override protected int scan() { int length = this.count; while (this.pos < this.count) { if (this.array[this.pos++] != TERMINAL[this.scan++]) { this.scan = 0; } if (this.scan == TERMINAL.length) { this.done = true; this.count = this.pos; this.pos = 0; return length - this.count; } } return 0; } /** * This is used to determine if two header names are equal, this is done to * ensure that the case insensitivity of HTTP header names is observed. * Special headers are processed using this consumer and this is used to * ensure the correct header is always matched. * * @param name * this is the name to compare the parsed token with * @param token * this is the header name token to examine * * @return true of the header name token is equal to the name */ protected boolean equal(String name, String token) { return name.equalsIgnoreCase(token); } /** * This identifies a given ISO-8859-1 byte as a space character. A space is * either a space or a tab character in ISO-8859-1. * * @param octet * the byte to determine whether it is a space * * @return true if it is a space character, false otherwise */ protected boolean space(byte octet) { return (octet == ' ') || (octet == '\t'); } /** * This determines if an ISO-8859-1 byte is a terminal character. A terminal * character is a carriage return or a line feed character. * * @param octet * the byte to determine whether it is a terminal * * @return true if it is a terminal character, false otherwise */ protected boolean terminal(byte octet) { return (octet == 13) || (octet == 10); } /** * This is used to determine if a given ISO-8859-1 byte is a white space * character, such as a tab or space or a terminal character, such as a * carriage return or a new line. If it is, this will return true otherwise * it returns false. * * @param octet * this is to be checked to see if it is a space * * @return true if the byte is a space character, false otherwise */ protected boolean white(byte octet) { switch (octet) { case ' ': case '\r': case '\n': case '\t': return true; default: return false; } } /** * This is used to provide a string representation of the header read. * Providing a string representation of the header is used so that on * debugging the contents of the delivered header can be inspected in order * to determine a cause of error. * * @return this returns a string representation of the header */ @Override public String toString() { return new String(this.array, 0, this.count); } /** * This is used to track the boundaries of a token so that it can be * converted in to a usable string. This will track the length and offset * within the consumed array of the token. When the token is to be used it * can be converted in to a string. */ private class Token { /** * This is used to track the number of bytes within the array. */ public int size; /** * This is used to mark the start offset within the array. */ public int off; /** * Constructor for the <code>Token</code> object. This is used to create * a new token to track the range of bytes that will be used to create a * string representing the parsed value. * * @param off * the starting offset for the token range * @param size * the number of bytes used for the token */ public Token(int off, int size) { this.off = off; this.size = size; } /** * This is used to convert the byte range to a string. This will use * UTF-8 encoding for the string which is compatible with the HTTP * default header encoding of ISO-8859-1. * * @return the encoded string representing the token */ public String text() { return this.text("UTF-8"); } /** * This is used to convert the byte range to a string. This will use * specified encoding, if that encoding is not supported then this will * return null for the token value. * * @return the encoded string representing the token */ public String text(String charset) { try { return new String(SegmentConsumer.this.array, this.off, this.size, charset); } catch (IOException e) { return null; } } } }