/**
* Copyright 2015 StreamSets Inc.
*
* Licensed under the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.streamsets.pipeline.lib.parser.text;
import com.streamsets.pipeline.api.OnRecordError;
import com.streamsets.pipeline.api.Record;
import com.streamsets.pipeline.api.Stage;
import com.streamsets.pipeline.api.ext.io.OverrunException;
import com.streamsets.pipeline.api.ext.io.OverrunReader;
import com.streamsets.pipeline.lib.parser.DataParser;
import com.streamsets.pipeline.lib.parser.StringBuilderPoolFactory;
import com.streamsets.pipeline.sdk.ContextInfoCreator;
import org.apache.commons.pool2.impl.GenericObjectPool;
import org.apache.commons.pool2.impl.GenericObjectPoolConfig;
import org.junit.Assert;
import org.junit.Test;
import java.io.IOException;
import java.io.StringReader;
import java.util.Collections;
public class TestTextCharDataParser {
@SuppressWarnings("unchecked")
private Stage.Context getContext() {
return ContextInfoCreator.createSourceContext("i", false, OnRecordError.TO_ERROR, Collections.EMPTY_LIST);
}
@Test
public void testParse() throws Exception {
OverrunReader reader = new OverrunReader(new StringReader("Hello\nBye"), 1000, true, false);
DataParser parser = new TextCharDataParser(
getContext(),
"id",
false,
false,
"",
false,
reader,
0,
1000,
"text",
"truncated",
getStringBuilderPool()
);
Assert.assertEquals(0, Long.parseLong(parser.getOffset()));
Record record = parser.parse();
Assert.assertNotNull(record);
Assert.assertEquals("id::0", record.getHeader().getSourceId());
Assert.assertEquals("Hello", record.get().getValueAsMap().get("text").getValueAsString());
Assert.assertFalse(record.has("/truncated"));
Assert.assertEquals(6, Long.parseLong(parser.getOffset()));
record = parser.parse();
Assert.assertNotNull(record);
Assert.assertEquals("id::6", record.getHeader().getSourceId());
Assert.assertEquals("Bye", record.get().getValueAsMap().get("text").getValueAsString());
Assert.assertFalse(record.has("/truncated"));
Assert.assertEquals(9, Long.parseLong(parser.getOffset()));
record = parser.parse();
Assert.assertNull(record);
Assert.assertEquals(-1, Long.parseLong(parser.getOffset()));
parser.close();
}
@Test
public void testParseWithOffset() throws Exception {
OverrunReader reader = new OverrunReader(new StringReader("Hello\nBye"), 1000, true, false);
DataParser parser = new TextCharDataParser(
getContext(),
"id",
false,
false,
"",
false,
reader,
6,
1000,
"text",
"truncated",
getStringBuilderPool()
);
Assert.assertEquals(6, Long.parseLong(parser.getOffset()));
Record record = parser.parse();
Assert.assertNotNull(record);
Assert.assertEquals("id::6", record.getHeader().getSourceId());
Assert.assertEquals("Bye", record.get().getValueAsMap().get("text").getValueAsString());
Assert.assertFalse(record.has("/truncated"));
Assert.assertEquals(9, Long.parseLong(parser.getOffset()));
record = parser.parse();
Assert.assertNull(record);
Assert.assertEquals(-1, Long.parseLong(parser.getOffset()));
parser.close();
}
@Test(expected = IOException.class)
public void testClose() throws Exception {
OverrunReader reader = new OverrunReader(new StringReader("Hello\nByte"), 1000, true, false);
DataParser parser = new TextCharDataParser(
getContext(),
"id",
false,
false,
"",
false,
reader,
0,
1000,
"text",
"truncated",
getStringBuilderPool()
);
parser.close();
parser.parse();
}
@Test
public void testTruncate() throws Exception {
OverrunReader reader = new OverrunReader(new StringReader("Hello\nBye"), 1000, true, false);
DataParser parser = new TextCharDataParser(
getContext(),
"id",
false,
false,
"",
false,
reader,
0,
3,
"text",
"truncated",
getStringBuilderPool()
);
Assert.assertEquals(0, Long.parseLong(parser.getOffset()));
Record record = parser.parse();
Assert.assertNotNull(record);
Assert.assertEquals("id::0", record.getHeader().getSourceId());
Assert.assertEquals("Hel", record.get().getValueAsMap().get("text").getValueAsString());
Assert.assertTrue(record.has("/truncated"));
Assert.assertEquals(6, Long.parseLong(parser.getOffset()));
record = parser.parse();
Assert.assertNotNull(record);
Assert.assertEquals("id::6", record.getHeader().getSourceId());
Assert.assertEquals("Bye", record.get().getValueAsMap().get("text").getValueAsString());
Assert.assertFalse(record.has("/truncated"));
Assert.assertEquals(9, Long.parseLong(parser.getOffset()));
record = parser.parse();
Assert.assertNull(record);
Assert.assertEquals(-1, Long.parseLong(parser.getOffset()));
parser.close();
}
private String createTextLines(int underLimitLength, int underLimitLines, int overLimitLength) {
StringBuilder sb = new StringBuilder(underLimitLength * underLimitLength + overLimitLength + underLimitLines + 1);
for (int line = 0; line < underLimitLines; line++) {
for (int len = 0; len < underLimitLength; len++) {
sb.append((char) (len % 28 + 65));
}
sb.append('\n');
}
for (int len = 0; len < overLimitLength; len++) {
sb.append((char) (len % 28 + 65));
}
sb.append('\n');
return sb.toString();
}
@Test(expected = OverrunException.class)
public void testOverrun() throws Exception {
OverrunReader reader = new OverrunReader(new StringReader(createTextLines(1000, 20, 5000)), 2 * 1000, true, false);
int lines = 0;
try (DataParser parser = new TextCharDataParser(
getContext(),
"id",
false,
false,
"",
false,
reader,
0,
3,
"text",
"truncated",
getStringBuilderPool()
)) {
// we read 20 lines under the limit then one over the limit
while (parser.parse() != null) {
lines++;
}
} finally {
Assert.assertEquals(20, lines);
}
}
@Test
public void testCollapseAllDefault() throws Exception {
OverrunReader reader = new OverrunReader(new StringReader("Hello\nBye"), 1000, true, false);
DataParser parser = new TextCharDataParser(
getContext(),
"id",
true,
false,
"",
false,
reader,
0,
100,
"text",
"truncated",
getStringBuilderPool()
);
Record record = parser.parse();
Assert.assertNotNull(record);
Assert.assertEquals("Hello\nBye\n", record.get().getValueAsMap().get("text").getValueAsString());
record = parser.parse();
Assert.assertNull(record);
parser.close();
}
@Test
public void testCollapseAllWithCustomDelimiter() throws Exception {
OverrunReader reader = new OverrunReader(new StringReader("Hello\nBye"), 1000, true, false);
DataParser parser = new TextCharDataParser(
getContext(),
"id",
true,
true,
"\r\n",
false,
reader,
0,
100,
"text",
"truncated",
getStringBuilderPool()
);
Record record = parser.parse();
Assert.assertNotNull(record);
Assert.assertEquals("Hello\nBye\n", record.get().getValueAsMap().get("text").getValueAsString());
record = parser.parse();
Assert.assertNull(record);
parser.close();
}
@Test
public void testCustomDelimiterForJson() throws Exception {
String record1 = "{\"menu\": {\n" +
" \"id\": \"file\",\n" +
" \"value\": \"Record1\",\n" +
" \"popup\": {\n" +
" \"menuitem\": [\n" +
" {\"value\": \"New\", \"onclick\": \"CreateNewDoc()\"},\n" +
" {\"value\": \"Open\", \"onclick\": \"OpenDoc()\"},\n" +
" {\"value\": \"Close\", \"onclick\": \"CloseDoc()\"}]}}}";
String record2 = "{\"menu\": {\n" +
" \"id\": \"file\",\n" +
" \"value\": \"Record2\",\n" +
" \"popup\": {\n" +
" \"menuitem\": [\n" +
" {\"value\": \"New\", \"onclick\": \"CreateNewDoc()\"},\n" +
" {\"value\": \"Open\", \"onclick\": \"OpenDoc()\"},\n" +
" {\"value\": \"Close\", \"onclick\": \"CloseDoc()\"}]}}}";
OverrunReader reader = new OverrunReader(new StringReader(record1 + "\n" +record2 + "\n"), 1000, true, false);
DataParser parser = new TextCharDataParser(
getContext(),
"id",
false,
true,
"}]}}}\n",
true,
reader, 0, 10000, "text", "truncated", getStringBuilderPool());
Record record = parser.parse();
Assert.assertNotNull(record);
Assert.assertEquals(record1 + "\n", record.get().getValueAsMap().get("text").getValueAsString());
record = parser.parse();
Assert.assertNotNull(record);
Assert.assertEquals(record2 + "\n", record.get().getValueAsMap().get("text").getValueAsString());
record = parser.parse();
Assert.assertNull(record);
parser.close();
}
private GenericObjectPool<StringBuilder> getStringBuilderPool() {
GenericObjectPoolConfig stringBuilderPoolConfig = new GenericObjectPoolConfig();
stringBuilderPoolConfig.setMaxTotal(1);
stringBuilderPoolConfig.setMinIdle(1);
stringBuilderPoolConfig.setMaxIdle(1);
stringBuilderPoolConfig.setBlockWhenExhausted(false);
return new GenericObjectPool<>(new StringBuilderPoolFactory(1024), stringBuilderPoolConfig);
}
}