package com.wangyin.ak47.pipes.http; import java.util.ArrayList; import java.util.List; import java.util.StringTokenizer; import com.wangyin.ak47.common.ByteUtil; import com.wangyin.ak47.common.Logger; import com.wangyin.ak47.core.Buffer; /** * 简单的Http协议解析器 * 无依赖第三方解析库 * 支持 chunked * * @author wyhubingyin * */ public class SimpleHttpParser { private static final Logger log = new Logger(SimpleHttpParser.class); // \n public static final byte LF = 10; // \r public static final byte CR = 13; // \r\n public static final byte[] CRLF_BYTE_ARRAY = new byte[]{CR,LF}; // \r\n string public static final String CRLF_STRING = "\r\n"; // 一行最大长度 private static final int MAX_HEADER_LINE_LENGTH_LIMIT = 4 * 1024; // startline + header 的最大字节数 private static final int MAX_HEADERS_BLOCK_SIZE_LIMIT = 8 * 1024; /** * Buffer →→→ HttpRequest * * @param buf Buffer * @return HttpRequest */ public static SimpleHttpRequest parseRequest(Buffer buf){ SimpleHttpMessage shm = parseMessage(buf); if( null == shm ){ return null; } return SimpleHttpRequest.valueOf(shm); } /** * Buffer →→→ HttpResponse * * @param buf Buffer * @return HttpResponse */ public static SimpleHttpResponse parseResponse(Buffer buf){ SimpleHttpMessage shm = parseMessage(buf); if( null == shm ){ return null; } return SimpleHttpResponse.valueOf(shm); } /** * Buffer →→→ HttpMessage * * @param buf Buffer * @return HttpMessage */ public static SimpleHttpMessage parseMessage(Buffer buf){ int beginIndex = buf.readerIndex(); int allReadableNum = buf.readableBytes(); int firstTryReadableNum = Math.min(allReadableNum, MAX_HEADERS_BLOCK_SIZE_LIMIT); byte[] firstTryBytes = new byte[firstTryReadableNum]; buf.readBytes(firstTryBytes); SimpleHttpMessage shm = new SimpleHttpMessage(); // readOffset int readOffset = 0; // parse startLine for( ; readOffset < firstTryBytes.length-1 && readOffset < MAX_HEADER_LINE_LENGTH_LIMIT; readOffset++ ){ if( firstTryBytes[readOffset] == CR && firstTryBytes[readOffset+1] == LF){ break; } } if( readOffset == MAX_HEADER_LINE_LENGTH_LIMIT ){ log.warn("decode HttpMessage fail. StartLine is too long."); buf.readerIndex(beginIndex); return null; } if( readOffset >= firstTryBytes.length-1 ){ log.debug("Need more read in parsing start line."); buf.readerIndex(beginIndex); return null; } String startLine = new String(firstTryBytes, 0, readOffset); // //下面的代码由于性能问题舍弃掉 // String[] startLineItems = startLine.split(" ",3); // if( startLineItems.length != 3 ){ // log.warn("Malformed first line."); // buf.readerIndex(beginIndex); // return null; // }else if( !startLineItems[2].startsWith("HTTP") && !startLineItems[0].startsWith("HTTP")){ // log.warn("Malformed first line, may NOT http."); // buf.readerIndex(beginIndex); // return null; // } StringTokenizer strToken = new StringTokenizer(startLine, " "); if( strToken.countTokens() < 3 ){ log.warn("Malformed first line: {}.", startLine); buf.readerIndex(beginIndex); return null; } shm.setStartLine(startLine); // parse headers readOffset = readOffset + 2; while( readOffset < firstTryBytes.length-1 ){ if( firstTryBytes[readOffset] == CR && firstTryBytes[readOffset+1] == LF ){ // header end break; } int lineBeginIndex = readOffset; int lineReadLen = 0; while( readOffset < firstTryBytes.length-1 && lineReadLen < MAX_HEADER_LINE_LENGTH_LIMIT ){ if( firstTryBytes[readOffset] == CR && firstTryBytes[readOffset+1] == LF ){ break; } readOffset++; lineReadLen++; } if( readOffset >= firstTryBytes.length-1 ){ log.debug("Need more read in parsing headers."); buf.readerIndex(beginIndex); return null; } if( lineReadLen == MAX_HEADER_LINE_LENGTH_LIMIT ){ log.error("decode HttpMessage fail. Header is too long."); buf.readerIndex(beginIndex); return null; } readOffset = readOffset + 2; String headerLine = new String(firstTryBytes, lineBeginIndex, lineReadLen); // // 由于split的性能问题,舍弃 // String[] headerLineItems = headerLine.split(":", 2); // if( headerLineItems.length != 2 ){ // log.warn("decode HttpMessage fail. Malformed header line."); // buf.readerIndex(beginIndex); // return null; // } // String skey = headerLineItems[0].trim().toLowerCase(); // String value = headerLineItems[1].trim(); StringTokenizer headerToken = new StringTokenizer(headerLine, ":"); if( headerToken.hasMoreTokens() ){ String key = headerToken.nextToken(); if( headerToken.hasMoreTokens() ){ String value = headerToken.nextToken().trim(); // key = StringUtil2.asciiToLowerCase(key); shm.addHeader(key, value); }else{ log.warn("HttpMessage decode fail. Malformed header line[{}].", headerLine); buf.readerIndex(beginIndex); return null; } }else{ log.warn("HttpMessage decode fail. Malformed header line[{}].", headerLine); buf.readerIndex(beginIndex); return null; } } if( readOffset >= firstTryBytes.length-1 ){ log.debug("Need more read in parsing headers."); buf.readerIndex(beginIndex); return null; } // parse content readOffset = readOffset + 2; String sContentLength = shm.getHeaderFirst("Content-Length"); String transferEncoding = shm.getHeaderFirst("Transfer-Encoding"); if( null != sContentLength ){ // check content-length int contentLength = Integer.parseInt(sContentLength); if( contentLength > 0 ){ if( readOffset + contentLength > allReadableNum ){ log.debug("Need more read in parsing content."); buf.readerIndex(beginIndex); return null; }else{ int diff = readOffset + contentLength - firstTryReadableNum; if( diff >= 0 ){ byte[] cntBytes = new byte[contentLength]; ByteUtil.copy(firstTryBytes, readOffset, cntBytes, 0, firstTryReadableNum - readOffset); buf.readBytes(cntBytes, firstTryReadableNum - readOffset, diff); shm.setContent(cntBytes); }else if( diff < 0 ){ byte[] cnt = new byte[contentLength]; ByteUtil.copy(firstTryBytes, readOffset, cnt, 0, contentLength); shm.setContent(cnt); buf.readerIndex(beginIndex + readOffset + contentLength); } } } }else if( transferEncoding != null && "chunked".equals(transferEncoding) ){ // read chunked // 如果一个HTTP消息(请求消息或应答消息)的Transfer-Encoding消息头的值为chunked,那么,消息体由数量未定的块组成,并以最后一个大小为0的块为结束。 // 每一个非空的块都以该块包含数据的字节数(字节数以十六进制表示)开始,跟随一个CRLF (回车及换行),然后是数据本身,最后块CRLF结束。在一些实现中,块大小和CRLF之间填充有白空格(0x20)。 // 最后一块是单行,由块大小(0),一些可选的填充白空格,以及CRLF。最后一块不再包含任何数据,但是可以发送可选的尾部,包括消息头字段。 // 消息最后以CRLF结尾。 // 示例: // HTTP/1.1 200 OK // Content-Type: text/plain // Transfer-Encoding: chunked // // 25 // This is the data in the first chunk // // 1C // and this is the second one // // 3 // con // 8 // sequence // 0 int diff = allReadableNum - firstTryReadableNum; byte[] cntBytes = null; if( diff > 0 ){ cntBytes = new byte[allReadableNum - readOffset]; ByteUtil.copy(firstTryBytes, readOffset, cntBytes, 0, firstTryReadableNum-readOffset); buf.readBytes(cntBytes, firstTryReadableNum-readOffset, diff); }else{ // diff == 0 cntBytes = new byte[firstTryReadableNum - readOffset]; ByteUtil.copy(firstTryBytes, readOffset, cntBytes, 0, firstTryReadableNum - readOffset); } int cntReadOffset = 0; List<byte[]> chunkedList = new ArrayList<byte[]>(); while( cntReadOffset < cntBytes.length-1 ){ // 读出块大小 int lineBeginIndex = cntReadOffset; int lineReadLen = 0; while( cntReadOffset < cntBytes.length-1 && lineReadLen < MAX_HEADER_LINE_LENGTH_LIMIT ){ if( cntBytes[cntReadOffset] == CR && cntBytes[cntReadOffset+1] == LF ){ break; } cntReadOffset++; lineReadLen++; } if( cntReadOffset >= cntBytes.length-1 ){ log.debug("Need more read in parsing chunked body."); buf.readerIndex(beginIndex); return null; } if( lineReadLen == MAX_HEADER_LINE_LENGTH_LIMIT ){ log.error("decode HttpMessage fail. chunked size is too long."); buf.readerIndex(beginIndex); return null; } cntReadOffset = cntReadOffset + 2; String chunkedSizeLine = new String(cntBytes, lineBeginIndex, lineReadLen); log.error("chunkedSizeLine: {}", chunkedSizeLine); int chunkedSize = Integer.parseInt(chunkedSizeLine, 16); if( chunkedSize == 0 ){ // last chunked break; } if( cntReadOffset + chunkedSize + 2 > cntBytes.length ){ log.debug("Need more read in parsing chunked body."); buf.readerIndex(beginIndex); return null; } byte[] chunked = ByteUtil.copyOf(cntBytes, cntReadOffset, chunkedSize); chunkedList.add(chunked); // drop crlf cntReadOffset = cntReadOffset + chunkedSize + 2; } if( cntReadOffset >= cntBytes.length-1 ){ log.debug("Need more read in parsing chunked body."); buf.readerIndex(beginIndex); return null; } // 拼装body byte[] content = ByteUtil.merge( chunkedList ); shm.setContent(content); }else{ // // 如果没有content-length,并且非chunked,这里肯定是一个错误的HttpMessage。 // // 这里采取高容错率做法,能读多少就读多少。可能不太好。 // log.error("Http response header does NOT has 'content-length' or 'chunked'. Just read as mush as possible of content."); // // int diff = allReadableNum - firstTryReadableNum; // byte[] cntBytes = null; // if( diff > 0 ){ // cntBytes = new byte[allReadableNum - readOffset]; // ByteUtil.copy(firstTryBytes, readOffset, cntBytes, 0, firstTryReadableNum-readOffset); // // buf.readBytes(cntBytes, firstTryReadableNum-readOffset, diff); // }else{ // diff == 0 // cntBytes = new byte[firstTryReadableNum - readOffset]; // ByteUtil.copy(firstTryBytes, readOffset, cntBytes, 0, firstTryReadableNum - readOffset); // } // // shm.setContent(cntBytes); // 如果没有content-length,并且非chunked,那么就认为只有header。 // 也就是说,啥也不做。 } // return httpmessage return shm; } }