/** * Copyright 2015 StreamSets Inc. * * Licensed under the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.streamsets.pipeline.lib.parser.log; import com.streamsets.pipeline.api.OnRecordError; import com.streamsets.pipeline.api.Record; import com.streamsets.pipeline.api.Stage; import com.streamsets.pipeline.config.LogMode; import com.streamsets.pipeline.lib.parser.DataParser; import com.streamsets.pipeline.lib.parser.DataParserException; import com.streamsets.pipeline.lib.parser.DataParserFactory; import com.streamsets.pipeline.lib.parser.DataParserFactoryBuilder; import com.streamsets.pipeline.lib.parser.DataParserFormat; import com.streamsets.pipeline.sdk.ContextInfoCreator; import org.junit.Assert; import org.junit.Test; import java.io.ByteArrayInputStream; import java.io.IOException; import java.io.InputStream; import java.util.Collections; public class TestGrokParser { private static final String LOG_LINE = "[3223] 26 Feb 23:59:01 Background append only file rewriting started by pid " + "19383 [19383] 26 Feb 23:59:01 SYNC append only file rewrite performed "; private static final String REGEX_DEFINITION = "REDISTIMESTAMP %{MONTHDAY} %{MONTH} %{TIME}\n" + "REDISLOG \\[%{POSINT:pid}\\] %{REDISTIMESTAMP:timestamp} .*"; private static final String REGEX = "%{REDISLOG}"; private Stage.Context getContext() { return ContextInfoCreator.createSourceContext("i", false, OnRecordError.TO_ERROR, Collections.<String>emptyList()); } @Test public void testParse() throws Exception { DataParser parser = getDataParser(LOG_LINE, 1000, 0); Assert.assertEquals(0, Long.parseLong(parser.getOffset())); Record record = parser.parse(); Assert.assertNotNull(record); Assert.assertEquals("id::0", record.getHeader().getSourceId()); Assert.assertEquals(LOG_LINE, record.get().getValueAsMap().get("originalLine").getValueAsString()); Assert.assertFalse(record.has("/truncated")); Assert.assertEquals(146, Long.parseLong(parser.getOffset())); Assert.assertTrue(record.has("/timestamp")); Assert.assertEquals("26 Feb 23:59:01", record.get("/timestamp").getValueAsString()); Assert.assertTrue(record.has("/pid")); Assert.assertEquals("3223", record.get("/pid").getValueAsString()); parser.close(); } @Test public void testParseWithOffset() throws Exception { DataParser parser = getDataParser("Hello\n" + LOG_LINE, 1000, 6); Assert.assertEquals(6, Long.parseLong(parser.getOffset())); Record record = parser.parse(); Assert.assertNotNull(record); Assert.assertEquals("id::6", record.getHeader().getSourceId()); Assert.assertEquals(LOG_LINE, record.get().getValueAsMap().get("originalLine").getValueAsString()); Assert.assertFalse(record.has("/truncated")); Assert.assertEquals(152, Long.parseLong(parser.getOffset())); Assert.assertTrue(record.has("/timestamp")); Assert.assertEquals("26 Feb 23:59:01", record.get("/timestamp").getValueAsString()); Assert.assertTrue(record.has("/pid")); Assert.assertEquals("3223", record.get("/pid").getValueAsString()); record = parser.parse(); Assert.assertNull(record); Assert.assertEquals(-1, Long.parseLong(parser.getOffset())); parser.close(); } @Test(expected = IOException.class) public void testClose() throws Exception { DataParser parser = getDataParser("Hello\nByte", 1000, 0); parser.close(); parser.parse(); } @Test(expected = DataParserException.class) public void testTruncate() throws Exception { DataParser parser = getDataParser(LOG_LINE, 7, 0); Assert.assertEquals(0, Long.parseLong(parser.getOffset())); try { parser.parse(); } finally { parser.close(); } } @Test(expected = DataParserException.class) public void testParseNonLogLine() throws Exception { DataParser parser = getDataParser( "127.0.0.1 ss h [10/Oct/2000:13:55:36 -0700] This is a log line that does not confirm to common log format", 1000, 0); Assert.assertEquals(0, Long.parseLong(parser.getOffset())); try { parser.parse(); } finally { parser.close(); } } private DataParser getDataParser(String logLine, int maxObjectLength, int readerOffset) throws DataParserException { InputStream is = new ByteArrayInputStream(logLine.getBytes()); DataParserFactoryBuilder dataParserFactoryBuilder = new DataParserFactoryBuilder(getContext(), DataParserFormat.LOG); DataParserFactory factory = dataParserFactoryBuilder .setMaxDataLen(maxObjectLength) .setMode(LogMode.GROK) .setOverRunLimit(1000) .setConfig(LogDataParserFactory.RETAIN_ORIGINAL_TEXT_KEY, true) .setConfig(LogDataParserFactory.GROK_PATTERN_KEY, REGEX) .setConfig(LogDataParserFactory.GROK_PATTERN_DEFINITION_KEY, REGEX_DEFINITION) .build(); return factory.getParser("id", is, String.valueOf(readerOffset)); } }