/** * Copyright 2015 StreamSets Inc. * * Licensed under the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.streamsets.pipeline.lib.parser.log; import com.streamsets.pipeline.lib.parser.DataParserException; public class Log4jHelper { private static final int PERCENT_STATE = 0; private static final int LITERAL_STATE = 1; private static final int NEXT_LAYOUT = 2; private static final int ERROR = -1; private static final String TRAILING_SPACE = "(?:\\s*)"; private Log4jHelper() {} public static String translateLog4jLayoutToGrok(String patternLayout) throws DataParserException { //remove trailing '%n's since the reader does not return the new line character while(patternLayout.endsWith("%n")) { patternLayout = patternLayout.substring(0, patternLayout.length()-2); } int state = NEXT_LAYOUT; StringBuilder regex = new StringBuilder(); StringBuilder partialRegex = new StringBuilder(); String argument; int index = 0; boolean leftPadWithSpace = false; boolean rightPadWithSpace = false; while (index < patternLayout.length()) { char c = patternLayout.charAt(index); switch (c) { case ' ': regex.append(partialRegex.toString()).append(" "); partialRegex.setLength(0); leftPadWithSpace = false; rightPadWithSpace = false; break; case '%': state = PERCENT_STATE; break; case '-': if (state == PERCENT_STATE) { rightPadWithSpace = true; } else if (state == NEXT_LAYOUT) { partialRegex.append(c); } else if (state == LITERAL_STATE) { throw new DataParserException(Errors.LOG_PARSER_02, patternLayout); } break; case '[': case ']': if (state == PERCENT_STATE) { throw new DataParserException(Errors.LOG_PARSER_02, patternLayout); } else if (state == NEXT_LAYOUT) { partialRegex.append("\\").append(c); } else if (state == LITERAL_STATE) { throw new DataParserException(Errors.LOG_PARSER_02, patternLayout); } break; case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': if (state == PERCENT_STATE) { if(!rightPadWithSpace && !leftPadWithSpace) { partialRegex.append(TRAILING_SPACE); leftPadWithSpace = true; } } else if (state == NEXT_LAYOUT) { partialRegex.append(c); } else { throw new DataParserException(Errors.LOG_PARSER_02, patternLayout); } break; case '.': if (state == PERCENT_STATE) { //no-op } else if (state == NEXT_LAYOUT) { partialRegex.append("\\."); } else { throw new DataParserException(Errors.LOG_PARSER_02, patternLayout); } break; case 'c' : index += ignoreArgument(patternLayout, index); state = checkStateAndAppend(partialRegex, state, "%{JAVACLASS:category}", c, rightPadWithSpace); break; case 'C' : index += ignoreArgument(patternLayout, index); state = checkStateAndAppend(partialRegex, state, "%{JAVACLASS:class}", c, rightPadWithSpace); break; case 'F' : index += ignoreArgument(patternLayout, index); state = checkStateAndAppend(partialRegex, state, "%{JAVAFILE:filename}", c, rightPadWithSpace); break; case 'l' : index += ignoreArgument(patternLayout, index); state = checkStateAndAppend(partialRegex, state, "%{JAVASTACKTRACEPART:location}", c, rightPadWithSpace); break; case 'L' : index += ignoreArgument(patternLayout, index); state = checkStateAndAppend(partialRegex, state, "%{NONNEGINT:line}", c, rightPadWithSpace); break; case 'm' : index += ignoreArgument(patternLayout, index); state = checkStateAndAppend(partialRegex, state, "%{GREEDYDATA:message}", c, rightPadWithSpace); break; case 'n' : index += ignoreArgument(patternLayout, index); state = checkStateAndAppend(partialRegex, state, "\\r?\\n", c, rightPadWithSpace); break; case 'M' : index += ignoreArgument(patternLayout, index); state = checkStateAndAppend(partialRegex, state, "%{WORD:method}", c, rightPadWithSpace); break; case 'p' : index += ignoreArgument(patternLayout, index); state = checkStateAndAppend(partialRegex, state, "%{LOGLEVEL:severity}", c, rightPadWithSpace); break; case 'r' : index += ignoreArgument(patternLayout, index); state = checkStateAndAppend(partialRegex, state, "%{INT:relativetime}", c, rightPadWithSpace); break; case 't' : index += ignoreArgument(patternLayout, index); state = checkStateAndAppend(partialRegex, state, "%{DATA:thread}", c, rightPadWithSpace); break; case 'x' : index += ignoreArgument(patternLayout, index); state = checkStateAndAppend(partialRegex, state, "%{DATA:ndc}?", c, rightPadWithSpace); break; case 'X' : argument = getArgument(patternLayout, ++index); if (null == argument) { state = checkStateAndAppend(partialRegex, state, getDefaultMDCPattern(), c, rightPadWithSpace); } else { index += argument.length() + 1 /* +1 for '}'*/; state = checkStateAndAppend(partialRegex, state, "%{DATA:" + argument + "}?", c, rightPadWithSpace); } break; case 'd' : argument = getArgument(patternLayout, ++index); if(argument != null) { index += argument.length() + 1 /* +1 for the '}'*/; } else { index--; } state = checkStateAndAppend(partialRegex, state, getDatePatternFromArgument(argument), c, rightPadWithSpace); break; default: if(state == PERCENT_STATE) { throw new DataParserException(Errors.LOG_PARSER_02, patternLayout); } partialRegex.append(c); } index++; } regex.append(partialRegex.toString()); return "^" /*begins with*/ + regex.toString().trim(); } private static int ignoreArgument(String token, int index) { String argument = getArgument(token, ++index); if(argument != null) { return argument.length() + 2 /*+1 for '{' and +1 for '}'*/; } return 0; } private static String getArgument(String token, int index) { if (index < token.length()) { if (token.charAt(index) == '{') { index++; StringBuilder sb = new StringBuilder(); while (index < token.length() && token.charAt(index) != '}') { //read the number of characters sb.append(token.charAt(index++)); } return sb.toString(); } } return null; } public static String getDatePatternFromArgument(String dateArgument) { String datePattern; if(dateArgument == null || dateArgument.isEmpty()) { datePattern = getDefaultDatePattern(); } else { switch (dateArgument) { case "ISO8601": datePattern = "%{TIMESTAMP_ISO8601}"; break; case "ABSOLUTE": datePattern = "%{HOUR}:%{MINUTE}:%{SECOND}"; break; case "DATE" : datePattern = "%{MONTHDAY} %{MONTH} %{YEAR} %{HOUR}:%{MINUTE}:%{SECOND}"; break; default: //custom date format, generate regex based on the date format datePattern = getPatternFromDateArgument(dateArgument); } } return "(?<timestamp>" + datePattern + ")"; } private static String getDefaultMDCPattern() { return "\\{(?<mdc>(?:\\{[^\\}]*,[^\\}]*\\})*)\\}"; } public static String getDefaultDatePattern() { return "%{TIMESTAMP_ISO8601}"; } public static String getPatternFromDateArgument(String dateFormat) { return ".{" + dateFormat.length() + "}"; } private static int checkStateAndAppend(StringBuilder sb, int state, String pattern, char c, boolean rightPadWithSpace) { if (state == PERCENT_STATE) { //encountered conversion character sb.append(pattern); if(rightPadWithSpace) { sb.append(TRAILING_SPACE); } state = NEXT_LAYOUT; } else if (state == NEXT_LAYOUT) { sb.append(c); } else { state = ERROR; } return state; } }