/** * Copyright 2015 StreamSets Inc. * * Licensed under the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.streamsets.pipeline.lib.parser.log; import com.streamsets.pipeline.api.OnRecordError; import com.streamsets.pipeline.api.Record; import com.streamsets.pipeline.api.Stage; import com.streamsets.pipeline.api.ext.io.OverrunReader; import com.streamsets.pipeline.lib.parser.DataParser; import com.streamsets.pipeline.lib.parser.DataParserException; import com.streamsets.pipeline.lib.parser.StringBuilderPoolFactory; import com.streamsets.pipeline.sdk.ContextInfoCreator; import org.apache.commons.pool2.impl.GenericObjectPool; import org.apache.commons.pool2.impl.GenericObjectPoolConfig; import org.junit.Assert; import org.junit.Test; import java.io.IOException; import java.io.StringReader; import java.util.Collections; import java.util.HashMap; import java.util.Map; import java.util.regex.Pattern; public class TestRegexParser { private static final String LOG_LINE = "127.0.0.1 ss h [10/Oct/2000:13:55:36 -0700] \"GET /apache_pb.gif HTTP/1.0\" " + "200 2326 Hello"; private static final String REGEX = "^(\\S+) (\\S+) (\\S+) \\[([\\w:/]+\\s[+\\-]\\d{4})\\] \"(\\S+ \\S+ \\S+)\" (\\d{3}) (\\d+)"; private static final Map<String, Integer> FIELD_TO_GROUP_MAP = new HashMap<>(); static { FIELD_TO_GROUP_MAP.put("remoteHost", 1); FIELD_TO_GROUP_MAP.put("logName", 2); FIELD_TO_GROUP_MAP.put("remoteUser", 3); FIELD_TO_GROUP_MAP.put("requestTime", 4); FIELD_TO_GROUP_MAP.put("request", 5); FIELD_TO_GROUP_MAP.put("status", 6); FIELD_TO_GROUP_MAP.put("bytesSent", 7); } private Stage.Context getContext() { return ContextInfoCreator.createSourceContext("i", false, OnRecordError.TO_ERROR, Collections.<String>emptyList()); } @Test public void testParse() throws Exception { OverrunReader reader = new OverrunReader(new StringReader(LOG_LINE), 1000, true, false); DataParser parser = new RegexParser(getContext(), "id", reader, 0, 1000, true, Pattern.compile(REGEX), FIELD_TO_GROUP_MAP, getStringBuilderPool(), getStringBuilderPool()); Assert.assertEquals(0, Long.parseLong(parser.getOffset())); Record record = parser.parse(); Assert.assertNotNull(record); Assert.assertEquals("id::0", record.getHeader().getSourceId()); Assert.assertEquals(LOG_LINE, record.get().getValueAsMap().get("originalLine").getValueAsString()); Assert.assertFalse(record.has("/truncated")); Assert.assertEquals(88, Long.parseLong(parser.getOffset())); Assert.assertTrue(record.has("/remoteHost")); Assert.assertEquals("127.0.0.1", record.get("/remoteHost").getValueAsString()); Assert.assertTrue(record.has("/logName")); Assert.assertEquals("ss", record.get("/logName").getValueAsString()); Assert.assertTrue(record.has("/remoteUser")); Assert.assertEquals("h", record.get("/remoteUser").getValueAsString()); Assert.assertTrue(record.has("/requestTime")); Assert.assertEquals("10/Oct/2000:13:55:36 -0700", record.get("/requestTime").getValueAsString()); Assert.assertTrue(record.has("/request")); Assert.assertEquals("GET /apache_pb.gif HTTP/1.0", record.get("/request").getValueAsString()); Assert.assertTrue(record.has("/status")); Assert.assertEquals("200", record.get("/status").getValueAsString()); Assert.assertTrue(record.has("/bytesSent")); Assert.assertEquals("2326", record.get("/bytesSent").getValueAsString()); parser.close(); } @Test public void testParseWithOffset() throws Exception { OverrunReader reader = new OverrunReader(new StringReader( "Hello\n" + LOG_LINE), 1000, true, false); DataParser parser = new RegexParser(getContext(), "id", reader, 6, 1000, true, Pattern.compile(REGEX), FIELD_TO_GROUP_MAP, getStringBuilderPool(), getStringBuilderPool()); Assert.assertEquals(6, Long.parseLong(parser.getOffset())); Record record = parser.parse(); Assert.assertNotNull(record); Assert.assertEquals("id::6", record.getHeader().getSourceId()); Assert.assertEquals(LOG_LINE, record.get().getValueAsMap().get("originalLine").getValueAsString()); Assert.assertFalse(record.has("/truncated")); Assert.assertEquals(94, Long.parseLong(parser.getOffset())); Assert.assertTrue(record.has("/remoteHost")); Assert.assertEquals("127.0.0.1", record.get("/remoteHost").getValueAsString()); Assert.assertTrue(record.has("/logName")); Assert.assertEquals("ss", record.get("/logName").getValueAsString()); Assert.assertTrue(record.has("/remoteUser")); Assert.assertEquals("h", record.get("/remoteUser").getValueAsString()); Assert.assertTrue(record.has("/requestTime")); Assert.assertEquals("10/Oct/2000:13:55:36 -0700", record.get("/requestTime").getValueAsString()); Assert.assertTrue(record.has("/request")); Assert.assertEquals("GET /apache_pb.gif HTTP/1.0", record.get("/request").getValueAsString()); Assert.assertTrue(record.has("/status")); Assert.assertEquals("200", record.get("/status").getValueAsString()); Assert.assertTrue(record.has("/bytesSent")); Assert.assertEquals("2326", record.get("/bytesSent").getValueAsString()); record = parser.parse(); Assert.assertNull(record); Assert.assertEquals(-1, Long.parseLong(parser.getOffset())); parser.close(); } @Test(expected = IOException.class) public void testClose() throws Exception { OverrunReader reader = new OverrunReader(new StringReader("Hello\nByte"), 1000, true, false); DataParser parser = new RegexParser(getContext(), "id", reader, 0, 1000, false, Pattern.compile(REGEX), FIELD_TO_GROUP_MAP, getStringBuilderPool(), getStringBuilderPool()); parser.close(); parser.parse(); } @Test(expected = DataParserException.class) public void testTruncate() throws Exception { OverrunReader reader = new OverrunReader(new StringReader( LOG_LINE), 1000, true, false); DataParser parser = new RegexParser(getContext(), "id", reader, 0, 25, true, Pattern.compile(REGEX), FIELD_TO_GROUP_MAP, getStringBuilderPool(), getStringBuilderPool()); //cut short to 25 Assert.assertEquals(0, Long.parseLong(parser.getOffset())); try { parser.parse(); } finally { parser.close(); } } @Test(expected = DataParserException.class) public void testParseNonLogLine() throws Exception { OverrunReader reader = new OverrunReader(new StringReader( "127.0.0.1 ss h [10/Oct/2000:13:55:36 -0700] This is a log line that does not confirm to common log format"), 1000, true, false); DataParser parser = new RegexParser(getContext(), "id", reader, 0, 1000, true, Pattern.compile(REGEX), FIELD_TO_GROUP_MAP, getStringBuilderPool(), getStringBuilderPool()); Assert.assertEquals(0, Long.parseLong(parser.getOffset())); try { parser.parse(); } finally { parser.close(); } } private GenericObjectPool<StringBuilder> getStringBuilderPool() { GenericObjectPoolConfig stringBuilderPoolConfig = new GenericObjectPoolConfig(); stringBuilderPoolConfig.setMaxTotal(1); stringBuilderPoolConfig.setMinIdle(1); stringBuilderPoolConfig.setMaxIdle(1); stringBuilderPoolConfig.setBlockWhenExhausted(false); return new GenericObjectPool<>(new StringBuilderPoolFactory(1024), stringBuilderPoolConfig); } }