/** * Copyright 2015 StreamSets Inc. * * Licensed under the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.streamsets.pipeline.lib.parser.log; import com.streamsets.pipeline.lib.parser.DataParserException; import com.streamsets.pipeline.lib.parser.shaded.org.apache.catalina.util.Strftime; import java.util.Locale; public class ApacheCustomLogHelper { private static final int PERCENT_STATE = 0; private static final int LITERAL_STATE = 1; private static final int NEXT_LAYOUT = 2; private static final int ERROR = -1; private ApacheCustomLogHelper() {} public static String translateApacheLayoutToGrok(String patternLayout) throws DataParserException { int state = NEXT_LAYOUT; StringBuilder regex = new StringBuilder(); StringBuilder partialRegex = new StringBuilder(); String argument = null; int index = 0; while (index < patternLayout.length()) { char c = patternLayout.charAt(index); switch (c) { case ' ': regex.append(partialRegex.toString()).append(" "); partialRegex.setLength(0); break; case '%': state = PERCENT_STATE; //read the argument string index++; StringBuilder sb = new StringBuilder(); if (index < patternLayout.length()) { if (patternLayout.charAt(index) == '{') { index++; while (index < patternLayout.length() && patternLayout.charAt(index) != '}') { //read the number of characters sb.append(patternLayout.charAt(index++)); } argument = sb.toString(); } else if (patternLayout.charAt(index) == '>' || patternLayout.charAt(index) == '<') { sb.append(patternLayout.charAt(index)); argument = sb.toString(); } } if(argument == null) { index--; } break; case '-': if (state == PERCENT_STATE) { //FIXME confirm that this is not allowed and throw exception throw new DataParserException(Errors.LOG_PARSER_02, patternLayout); } else if (state == NEXT_LAYOUT) { partialRegex.append(c); } else if (state == LITERAL_STATE) { throw new DataParserException(Errors.LOG_PARSER_02, patternLayout); } break; case '[': case ']': if (state == PERCENT_STATE) { throw new DataParserException(Errors.LOG_PARSER_02, patternLayout); } else if (state == NEXT_LAYOUT) { partialRegex.append("\\").append(c); } else if (state == LITERAL_STATE) { throw new DataParserException(Errors.LOG_PARSER_02, patternLayout); } break; case '!': case ',': case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': if (state == PERCENT_STATE) { //ignore } else if (state == NEXT_LAYOUT) { partialRegex.append(c); } else { throw new DataParserException(Errors.LOG_PARSER_02, patternLayout); } break; case '.': if (state == PERCENT_STATE) { //FIXME confirm that this is not allowed and throw exception throw new DataParserException(Errors.LOG_PARSER_02, patternLayout); } else if (state == NEXT_LAYOUT) { partialRegex.append("\\."); } else { throw new DataParserException(Errors.LOG_PARSER_02, patternLayout); } break; case 'a': state = checkStateAndAppend(partialRegex, state, "%{IP:" + ApacheAccessLogConstants.REMOTE_IP_ADDRESS + "}", c); break; case 'A': state = checkStateAndAppend(partialRegex, state, "%{IP:" + ApacheAccessLogConstants.LOCAL_IP_ADDRESS + "}", c); break; case 'B': state = checkStateAndAppend(partialRegex, state, "%{NUMBER:" + ApacheAccessLogConstants.BYTES_SENT + "}", c); break; case 'b': state = checkStateAndAppend(partialRegex, state, "(?:%{NUMBER:" + ApacheAccessLogConstants.BYTES_SENT + "}|-)", c); break; case 'C': //ignore the name of the cookie from the format string state = checkStateAndAppend(partialRegex, state, "%{DATA:cookieContent}", c); //reset argument after consuming or in this case after ignoring argument = null; break; case 'D': state = checkStateAndAppend(partialRegex, state, "%{NUMBER:" + ApacheAccessLogConstants.TIME_TO_SERVE_MICROSECONDS + "}", c); break; case 'e': //ignore argument state = checkStateAndAppend(partialRegex, state, "%{DATA:envContent}", c); //reset argument after consuming or in this case after ignoring argument = null; break; case 'f': state = checkStateAndAppend(partialRegex, state, "%{JAVAFILE:" + ApacheAccessLogConstants.FILENAME + "}", c); break; case 'h': state = checkStateAndAppend(partialRegex, state, "%{IPORHOST:" + ApacheAccessLogConstants.REMOTE_HOST + "}", c); break; case 'H': state = checkStateAndAppend(partialRegex, state, "HTTP/%{NUMBER:" + ApacheAccessLogConstants.HTTP_VERSION + "}", c); break; case 'i': //use argument /*if(argument != null && argument.equals("Referer")) { //(?<requestTime>" + datePattern + ")" state = checkStateAndAppend(partialRegex, state, "(?<" + ApacheAccessLogHelper.REFERER + ">[^\"]+)", c); } else if (argument != null && argument.equals("User-agent")) { state = checkStateAndAppend(partialRegex, state, "(?<" + ApacheAccessLogHelper.USER_AGENT + ">[^\"]+)", c); } else { state = checkStateAndAppend(partialRegex, state, "%{DATA:" + "headerContent" + "}", c); }*/ if(argument != null && argument.equals("Referer")) { //(?<requestTime>" + datePattern + ")" state = checkStateAndAppend(partialRegex, state, "%{DATA:" + ApacheAccessLogConstants.REFERER + "}", c); } else if (argument != null && argument.equals("User-agent")) { state = checkStateAndAppend(partialRegex, state, "%{DATA:" + ApacheAccessLogConstants.USER_AGENT + "}", c); } else { state = checkStateAndAppend(partialRegex, state, "%{DATA:" + "headerContent" + "}", c); } //reset argument after consuming argument = null; break; case 'k': state = checkStateAndAppend(partialRegex, state, "%{NUMBER:" + ApacheAccessLogConstants.KEEP_ALIVE + "}", c); break; case 'l': state = checkStateAndAppend(partialRegex, state, "%{USER:" + ApacheAccessLogConstants.LOG_NAME + "}", c); break; case 'm': state = checkStateAndAppend(partialRegex, state, "%{WORD:" + ApacheAccessLogConstants.REQUEST_METHOD + "}", c); break; case 'n': state = checkStateAndAppend(partialRegex, state, "%{DATA:note}", c); //reset argument after consuming or in this case after ignoring argument = null; break; case 'o': state = checkStateAndAppend(partialRegex, state, "%{DATA:contents}", c); //reset argument after consuming or in this case after ignoring argument = null; break; case 'p': //FIXME ignore argument if(argument != null) { if(!argument.equals("canonical") && !argument.equals("local") && !argument.equals("remote")) { throw new DataParserException(Errors.LOG_PARSER_02, patternLayout); } } state = checkStateAndAppend(partialRegex, state, "%{NUMBER:" + ApacheAccessLogConstants.CANONICAL_PORT + "}", c); //reset argument after consuming argument = null; break; case 'P': //FIXME ignore argument if(argument != null) { if(!argument.equals("pid") && !argument.equals("tid") && !argument.equals("hextid")) { throw new DataParserException(Errors.LOG_PARSER_02, patternLayout); } } state = checkStateAndAppend(partialRegex, state, "%{NUMBER:" + ApacheAccessLogConstants.CHILD_PID + "}", c); //reset argument after consuming argument = null; break; case 'q': state = checkStateAndAppend(partialRegex, state, "%{DATA:" + ApacheAccessLogConstants.QUERY_STRING + "}", c); break; case 'r': state = checkStateAndAppend(partialRegex, state, "%{DATA:" + ApacheAccessLogConstants.REQUEST + "}", c); break; case 'R': state = checkStateAndAppend(partialRegex, state, "%{DATA:" + ApacheAccessLogConstants.RESPONSE_HANDLER + "}", c); break; case 's': state = checkStateAndAppend(partialRegex, state, "%{NUMBER:" + ApacheAccessLogConstants.STATUS + "}", c); argument = null; break; case 't': state = checkStateAndAppend(partialRegex, state, getDatePatternFromArgument(argument), c); //reset argument after consuming argument = null; break; case 'T': state = checkStateAndAppend(partialRegex, state, "%{NUMBER:" + ApacheAccessLogConstants.TIME_TO_SERVE_REQUEST + "}", c); break; case 'u': state = checkStateAndAppend(partialRegex, state, "%{USER:" + ApacheAccessLogConstants.REMOTE_USER + "}", c); break; case 'U': state = checkStateAndAppend(partialRegex, state, "%{NOTSPACE:" + ApacheAccessLogConstants.URL_PATH + "}", c); break; case 'v': state = checkStateAndAppend(partialRegex, state, "%{HOST:" + ApacheAccessLogConstants.CANONICAL_SERVER_NAME + "}", c); break; case 'V': state = checkStateAndAppend(partialRegex, state, "%{HOST:" + ApacheAccessLogConstants.SERVER_NAME + "}", c); break; case 'X': state = checkStateAndAppend(partialRegex, state, "%{WORD:" + ApacheAccessLogConstants.CONNECTION_STATUS + "}", c); break; case 'I': state = checkStateAndAppend(partialRegex, state, "%{NUMBER:" + ApacheAccessLogConstants.BYTES_RECEIVED + "}", c); break; case 'O': state = checkStateAndAppend(partialRegex, state, "%{NUMBER:" + ApacheAccessLogConstants.BYTES_SENT + "}", c); break; default: if(state == PERCENT_STATE) { throw new DataParserException(Errors.LOG_PARSER_02, patternLayout); } partialRegex.append(c); } index++; } regex.append(partialRegex.toString()); return "^" /*begins with*/ + regex.toString().trim(); } private static String getArgument(String token, int index) { StringBuilder sb = new StringBuilder(); if (index < token.length()) { if (token.charAt(index) == '{') { index++; while (index < token.length() && token.charAt(index) != '}') { //read the number of characters sb.append(token.charAt(index++)); } return sb.toString(); } else if (token.charAt(index) == '>' || token.charAt(index) == '<') { sb.append(token.charAt(index)); return sb.toString(); } } return null; } public static String getDatePatternFromArgument(String dateArgument) { String datePattern; if(dateArgument == null || dateArgument.isEmpty()) { datePattern = getDefaultDatePattern(); } else { //convert the strftime(3) date layout to simple date format Strftime strftime = new Strftime(dateArgument, Locale.getDefault()); String simpleDateFormat = strftime.convertDateFormat(dateArgument); datePattern = Log4jHelper.getPatternFromDateArgument(simpleDateFormat); } return "(?<requestTime>" + datePattern + ")"; } public static String getDefaultDatePattern() { return "%{HTTPDATE}"; } private static int checkStateAndAppend(StringBuilder sb, int state, String pattern, char c) { if (state == PERCENT_STATE) { //encountered conversion character sb.append(pattern); state = NEXT_LAYOUT; } else if (state == NEXT_LAYOUT) { sb.append(c); } else { state = ERROR; } return state; } }