/** * Copyright 2015 StreamSets Inc. * * Licensed under the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.streamsets.pipeline.lib.parser.log; import com.streamsets.pipeline.lib.parser.DataParserException; import com.streamsets.pipeline.lib.parser.shaded.org.aicer.grok.dictionary.GrokDictionary; import com.streamsets.pipeline.lib.parser.shaded.org.aicer.grok.util.Grok; import org.junit.Assert; import org.junit.Test; import java.util.Map; public class TestApacheAccessLogHelper { private static final String COMMON_LOG_FORMAT = "%h %l %u [%t] \"%r\" %>s %b"; private static final String COMMON_LOG_FORMAT_LOG_LINE = "127.0.0.1 ss h [10/Oct/2000:13:55:36 -0700] \"GET /apache_pb.gif HTTP/1.0\" 200 2326"; private static final String NCSA_COMBINED_LOG_FORMAT = "%h %l %u [%t] \"%r\" %>s %b \"%{Referer}i\" \"%{User-agent}i\""; private static final String NCSA_COMBINED_LOG_FORMAT_LOG_LINE = "127.0.0.1 ss h [10/Oct/2000:13:55:36 -0700] \"GET /apache_pb.gif HTTP/1.0\" 200 2326" + " \"http://www.example.com/start.html\" \"Mozilla/4.08 [en] (Win98; I ;Nav)\""; private static final String SIMPLE_FORMAT = "%H %m [%t] %U"; private static final String SIMPLE_LOG_LINE = "HTTP/1.1 GET [12/Oct/2006:16:49:06 +0530] /index.php"; private static final String RFC_822_DATE = "%{%a, %d %b %y %T %z}t"; @Test public void testApacheFormatConversion() throws DataParserException { Assert.assertEquals( "^%{IPORHOST:remoteHost} %{USER:logName} %{USER:remoteUser} \\[(?<requestTime>%{HTTPDATE})\\] \"%{DATA:request}\" " + "%{NUMBER:status} (?:%{NUMBER:bytesSent}|-)", ApacheCustomLogHelper.translateApacheLayoutToGrok(COMMON_LOG_FORMAT)); Assert.assertEquals( "^%{IPORHOST:remoteHost} %{USER:logName} %{USER:remoteUser} \\[(?<requestTime>%{HTTPDATE})\\] \"%{DATA:request}\" " + "%{NUMBER:status} (?:%{NUMBER:bytesSent}|-) \"%{DATA:referer}\" \"%{DATA:userAgent}\"", ApacheCustomLogHelper.translateApacheLayoutToGrok(NCSA_COMBINED_LOG_FORMAT)); Assert.assertEquals( "^HTTP/%{NUMBER:httpversion} %{WORD:requestMethod} \\[(?<requestTime>%{HTTPDATE})\\] %{NOTSPACE:urlPath}", ApacheCustomLogHelper.translateApacheLayoutToGrok(SIMPLE_FORMAT)); } @Test public void testCustomPatternConversion1() throws DataParserException { GrokDictionary grokDictionary = createGrokDictionary(); Grok grok = grokDictionary.compileExpression(ApacheCustomLogHelper.translateApacheLayoutToGrok(COMMON_LOG_FORMAT)); Map<String, String> namedGroupToValuesMap = grok.extractNamedGroups(COMMON_LOG_FORMAT_LOG_LINE); Assert.assertTrue(namedGroupToValuesMap.containsKey("remoteHost")); Assert.assertEquals("127.0.0.1", namedGroupToValuesMap.get("remoteHost")); Assert.assertTrue(namedGroupToValuesMap.containsKey("logName")); Assert.assertEquals("ss", namedGroupToValuesMap.get("logName")); Assert.assertTrue(namedGroupToValuesMap.containsKey("remoteUser")); Assert.assertEquals("h", namedGroupToValuesMap.get("remoteUser")); Assert.assertTrue(namedGroupToValuesMap.containsKey("requestTime")); Assert.assertEquals("10/Oct/2000:13:55:36 -0700", namedGroupToValuesMap.get("requestTime")); Assert.assertTrue(namedGroupToValuesMap.containsKey("request")); Assert.assertEquals("GET /apache_pb.gif HTTP/1.0", namedGroupToValuesMap.get("request")); Assert.assertTrue(namedGroupToValuesMap.containsKey("status")); Assert.assertEquals("200", namedGroupToValuesMap.get("status")); Assert.assertTrue(namedGroupToValuesMap.containsKey("bytesSent")); Assert.assertEquals("2326", namedGroupToValuesMap.get("bytesSent")); } @Test public void testCustomPatternConversion2() throws DataParserException { GrokDictionary grokDictionary = createGrokDictionary(); Grok grok = grokDictionary.compileExpression( ApacheCustomLogHelper.translateApacheLayoutToGrok(NCSA_COMBINED_LOG_FORMAT)); Map<String, String> namedGroupToValuesMap = grok.extractNamedGroups(NCSA_COMBINED_LOG_FORMAT_LOG_LINE); Assert.assertTrue(namedGroupToValuesMap.containsKey("remoteHost")); Assert.assertEquals("127.0.0.1", namedGroupToValuesMap.get("remoteHost")); Assert.assertTrue(namedGroupToValuesMap.containsKey("logName")); Assert.assertEquals("ss", namedGroupToValuesMap.get("logName")); Assert.assertTrue(namedGroupToValuesMap.containsKey("remoteUser")); Assert.assertEquals("h", namedGroupToValuesMap.get("remoteUser")); Assert.assertTrue(namedGroupToValuesMap.containsKey("requestTime")); Assert.assertEquals("10/Oct/2000:13:55:36 -0700", namedGroupToValuesMap.get("requestTime")); Assert.assertTrue(namedGroupToValuesMap.containsKey("request")); Assert.assertEquals("GET /apache_pb.gif HTTP/1.0", namedGroupToValuesMap.get("request")); Assert.assertTrue(namedGroupToValuesMap.containsKey("status")); Assert.assertEquals("200", namedGroupToValuesMap.get("status")); Assert.assertTrue(namedGroupToValuesMap.containsKey("bytesSent")); Assert.assertEquals("2326", namedGroupToValuesMap.get("bytesSent")); Assert.assertTrue(namedGroupToValuesMap.containsKey("referer")); Assert.assertEquals("http://www.example.com/start.html", namedGroupToValuesMap.get("referer")); Assert.assertTrue(namedGroupToValuesMap.containsKey("userAgent")); Assert.assertEquals("Mozilla/4.08 [en] (Win98; I ;Nav)", namedGroupToValuesMap.get("userAgent")); } @Test public void testCustomPatternConversion3() throws DataParserException { GrokDictionary grokDictionary = createGrokDictionary(); Grok grok = grokDictionary.compileExpression(ApacheCustomLogHelper.translateApacheLayoutToGrok(SIMPLE_FORMAT)); Map<String, String> namedGroupToValuesMap = grok.extractNamedGroups(SIMPLE_LOG_LINE); Assert.assertTrue(namedGroupToValuesMap.containsKey("httpversion")); Assert.assertEquals("1.1", namedGroupToValuesMap.get("httpversion")); Assert.assertTrue(namedGroupToValuesMap.containsKey("requestMethod")); Assert.assertEquals("GET", namedGroupToValuesMap.get("requestMethod")); Assert.assertTrue(namedGroupToValuesMap.containsKey("requestTime")); Assert.assertEquals("12/Oct/2006:16:49:06 +0530", namedGroupToValuesMap.get("requestTime")); Assert.assertTrue(namedGroupToValuesMap.containsKey("urlPath")); Assert.assertEquals("/index.php", namedGroupToValuesMap.get("urlPath")); } private GrokDictionary createGrokDictionary() { GrokDictionary grokDictionary = new GrokDictionary(); //Add grok patterns and Java patterns by default grokDictionary.addDictionary(getClass().getClassLoader().getResourceAsStream(Constants.GROK_PATTERNS_FILE_NAME)); grokDictionary.addDictionary(getClass().getClassLoader().getResourceAsStream( Constants.GROK_JAVA_LOG_PATTERNS_FILE_NAME)); grokDictionary.addDictionary(getClass().getClassLoader().getResourceAsStream( Constants.GROK_LOG4J_LOG_PATTERNS_FILE_NAME)); grokDictionary.bind(); return grokDictionary; } }