/**
* Copyright 2015 StreamSets Inc.
*
* Licensed under the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.streamsets.pipeline.lib.parser.delimited;
import com.streamsets.pipeline.api.Field;
import com.streamsets.pipeline.api.OnRecordError;
import com.streamsets.pipeline.api.Record;
import com.streamsets.pipeline.api.Stage;
import com.streamsets.pipeline.config.CsvHeader;
import com.streamsets.pipeline.config.CsvRecordType;
import com.streamsets.pipeline.api.ext.io.OverrunReader;
import com.streamsets.pipeline.lib.parser.DataParser;
import com.streamsets.pipeline.lib.parser.RecoverableDataParserException;
import com.streamsets.pipeline.sdk.ContextInfoCreator;
import org.apache.commons.csv.CSVFormat;
import org.junit.Assert;
import org.junit.Test;
import java.io.IOException;
import java.io.StringReader;
import java.util.Collections;
import java.util.List;
public class TestDelimitedCharDataParser {
private Stage.Context getContext() {
return ContextInfoCreator.createSourceContext("i", false, OnRecordError.TO_ERROR, Collections.EMPTY_LIST);
}
@Test
public void testParseNoHeader() throws Exception {
OverrunReader reader = new OverrunReader(new StringReader("A,B\na,b"), 1000, true, false);
DataParser parser = new DelimitedCharDataParser(getContext(), "id", reader, 0, 0, CSVFormat.DEFAULT,
CsvHeader.NO_HEADER, -1, CsvRecordType.LIST, false, null);
Assert.assertEquals("0", parser.getOffset());
Record record = parser.parse();
Assert.assertNotNull(record);
Assert.assertEquals("id::0", record.getHeader().getSourceId());
Assert.assertEquals("A", record.get().getValueAsList().get(0).getValueAsMap().get("value").getValueAsString());
Assert.assertFalse(record.has("[0]/header"));
Assert.assertEquals("B", record.get().getValueAsList().get(1).getValueAsMap().get("value").getValueAsString());
Assert.assertFalse(record.has("[1]/header"));
Assert.assertEquals("4", parser.getOffset());
record = parser.parse();
Assert.assertNotNull(record);
Assert.assertEquals("id::4", record.getHeader().getSourceId());
Assert.assertEquals("a", record.get().getValueAsList().get(0).getValueAsMap().get("value").getValueAsString());
Assert.assertFalse(record.has("[0]/header"));
Assert.assertEquals("b", record.get().getValueAsList().get(1).getValueAsMap().get("value").getValueAsString());
Assert.assertFalse(record.has("[1]/header"));
Assert.assertEquals("7", parser.getOffset());
record = parser.parse();
Assert.assertNull(record);
Assert.assertEquals("-1", parser.getOffset());
parser.close();
}
@Test
public void testParseNoHeaderWithListMap() throws Exception {
OverrunReader reader = new OverrunReader(new StringReader("A,B\na,b"), 1000, true, false);
DataParser parser = new DelimitedCharDataParser(getContext(), "id", reader, 0, 0, CSVFormat.DEFAULT,
CsvHeader.NO_HEADER, -1, CsvRecordType.LIST_MAP, false, null);
Assert.assertEquals("0", parser.getOffset());
Record record = parser.parse();
Assert.assertNotNull(record);
Assert.assertEquals("id::0", record.getHeader().getSourceId());
Assert.assertEquals("A", record.get().getValueAsListMap().get("0").getValueAsString());
Assert.assertEquals("B", record.get().getValueAsListMap().get("1").getValueAsString());
Assert.assertEquals("4", parser.getOffset());
record = parser.parse();
Assert.assertNotNull(record);
Assert.assertEquals("id::4", record.getHeader().getSourceId());
Assert.assertEquals("a", record.get().getValueAsListMap().get("0").getValueAsString());
Assert.assertEquals("b", record.get().getValueAsListMap().get("1").getValueAsString());
Assert.assertEquals("7", parser.getOffset());
record = parser.parse();
Assert.assertNull(record);
Assert.assertEquals("-1", parser.getOffset());
parser.close();
}
@Test
public void testParseIgnoreHeader() throws Exception {
OverrunReader reader = new OverrunReader(new StringReader("A,B\na,b"), 1000, true, false);
DataParser parser = new DelimitedCharDataParser(getContext(), "id", reader, 0, 0, CSVFormat.DEFAULT,
CsvHeader.IGNORE_HEADER, -1, CsvRecordType.LIST, false, null);
Assert.assertEquals("4", parser.getOffset());
Record record = parser.parse();
Assert.assertNotNull(record);
Assert.assertEquals("id::4", record.getHeader().getSourceId());
Assert.assertEquals("a", record.get().getValueAsList().get(0).getValueAsMap().get("value").getValueAsString());
Assert.assertFalse(record.has("[0]/header"));
Assert.assertEquals("b", record.get().getValueAsList().get(1).getValueAsMap().get("value").getValueAsString());
Assert.assertFalse(record.has("[1]/header"));
Assert.assertEquals("7", parser.getOffset());
record = parser.parse();
Assert.assertNull(record);
Assert.assertEquals("-1", parser.getOffset());
parser.close();
}
@Test
public void testParseIgnoreHeaderWithListMap() throws Exception {
OverrunReader reader = new OverrunReader(new StringReader("A,B\na,b"), 1000, true, false);
DataParser parser = new DelimitedCharDataParser(getContext(), "id", reader, 0, 0, CSVFormat.DEFAULT,
CsvHeader.IGNORE_HEADER, -1, CsvRecordType.LIST_MAP, false, null);
Assert.assertEquals("4", parser.getOffset());
Record record = parser.parse();
Assert.assertNotNull(record);
Assert.assertEquals("id::4", record.getHeader().getSourceId());
Assert.assertEquals("a", record.get().getValueAsListMap().get("0").getValueAsString());
Assert.assertEquals("b", record.get().getValueAsListMap().get("1").getValueAsString());
Assert.assertEquals("7", parser.getOffset());
record = parser.parse();
Assert.assertNull(record);
Assert.assertEquals("-1", parser.getOffset());
parser.close();
}
@Test
public void testParseWithHeader() throws Exception {
OverrunReader reader = new OverrunReader(new StringReader("A,B\na,b"), 1000, true, false);
DataParser parser = new DelimitedCharDataParser(getContext(), "id", reader, 0, 0, CSVFormat.DEFAULT,
CsvHeader.WITH_HEADER, -1, CsvRecordType.LIST, false, null);
Assert.assertEquals("4", parser.getOffset());
Record record = parser.parse();
Assert.assertNotNull(record);
Assert.assertEquals("id::4", record.getHeader().getSourceId());
Assert.assertEquals("a", record.get().getValueAsList().get(0).getValueAsMap().get("value").getValueAsString());
Assert.assertEquals("A", record.get().getValueAsList().get(0).getValueAsMap().get("header").getValueAsString());
Assert.assertEquals("b", record.get().getValueAsList().get(1).getValueAsMap().get("value").getValueAsString());
Assert.assertEquals("B", record.get().getValueAsList().get(1).getValueAsMap().get("header").getValueAsString());
Assert.assertEquals("7", parser.getOffset());
record = parser.parse();
Assert.assertNull(record);
Assert.assertEquals("-1", parser.getOffset());
parser.close();
}
@Test
public void testParseWithHeaderWithListMap() throws Exception {
OverrunReader reader = new OverrunReader(new StringReader("A,B\na,b"), 1000, true, false);
DataParser parser = new DelimitedCharDataParser(getContext(), "id", reader, 0, 0, CSVFormat.DEFAULT,
CsvHeader.WITH_HEADER, -1, CsvRecordType.LIST_MAP, false, null);
Assert.assertEquals("4", parser.getOffset());
Record record = parser.parse();
Assert.assertNotNull(record);
Assert.assertEquals("id::4", record.getHeader().getSourceId());
Assert.assertEquals("a", record.get().getValueAsListMap().get("A").getValueAsString());
Assert.assertEquals("b", record.get().getValueAsListMap().get("B").getValueAsString());
Assert.assertEquals("7", parser.getOffset());
record = parser.parse();
Assert.assertNull(record);
Assert.assertEquals("-1", parser.getOffset());
parser.close();
}
@Test
public void testParseNoHeaderWithOffset() throws Exception {
OverrunReader reader = new OverrunReader(new StringReader("A,B\na,b"), 1000, true, false);
DataParser parser = new DelimitedCharDataParser(getContext(), "id", reader, 4, 0, CSVFormat.DEFAULT,
CsvHeader.NO_HEADER, -1, CsvRecordType.LIST, false, null);
Assert.assertEquals("4", parser.getOffset());
Record record = parser.parse();
Assert.assertNotNull(record);
Assert.assertEquals("id::4", record.getHeader().getSourceId());
Assert.assertEquals("a", record.get().getValueAsList().get(0).getValueAsMap().get("value").getValueAsString());
Assert.assertFalse(record.has("[0]/header"));
Assert.assertEquals("b", record.get().getValueAsList().get(1).getValueAsMap().get("value").getValueAsString());
Assert.assertFalse(record.has("[1]/header"));
Assert.assertEquals("7", parser.getOffset());
record = parser.parse();
Assert.assertNull(record);
Assert.assertEquals("-1", parser.getOffset());
parser.close();
}
@Test
public void testParseIgnoreHeaderWithOffset() throws Exception {
OverrunReader reader = new OverrunReader(new StringReader("A,B\na,b"), 1000, true, false);
DataParser parser = new DelimitedCharDataParser(getContext(), "id", reader, 4, 0, CSVFormat.DEFAULT,
CsvHeader.IGNORE_HEADER, -1, CsvRecordType.LIST, false, null);
Assert.assertEquals("4", parser.getOffset());
Record record = parser.parse();
Assert.assertNotNull(record);
Assert.assertEquals("id::4", record.getHeader().getSourceId());
Assert.assertEquals("a", record.get().getValueAsList().get(0).getValueAsMap().get("value").getValueAsString());
Assert.assertFalse(record.has("[0]/header"));
Assert.assertEquals("b", record.get().getValueAsList().get(1).getValueAsMap().get("value").getValueAsString());
Assert.assertFalse(record.has("[1]/header"));
Assert.assertEquals("7", parser.getOffset());
record = parser.parse();
Assert.assertNull(record);
Assert.assertEquals("-1", parser.getOffset());
parser.close();
}
@Test
public void testParseIgnoreHeaderWithOffset2() throws Exception {
OverrunReader reader = new OverrunReader(new StringReader("A,B\na,b\ne,f"), 1000, true, false);
DataParser parser = new DelimitedCharDataParser(getContext(), "id", reader, 8, 0, CSVFormat.DEFAULT,
CsvHeader.IGNORE_HEADER, -1, CsvRecordType.LIST, false, null);
Assert.assertEquals("8", parser.getOffset());
Record record = parser.parse();
Assert.assertNotNull(record);
Assert.assertEquals("id::8", record.getHeader().getSourceId());
Assert.assertEquals("e", record.get().getValueAsList().get(0).getValueAsMap().get("value").getValueAsString());
Assert.assertFalse(record.has("[0]/header"));
Assert.assertEquals("f", record.get().getValueAsList().get(1).getValueAsMap().get("value").getValueAsString());
Assert.assertFalse(record.has("[1]/header"));
Assert.assertEquals("11", parser.getOffset());
record = parser.parse();
Assert.assertNull(record);
Assert.assertEquals("-1", parser.getOffset());
parser.close();
}
@Test
public void testParseWithHeaderWithOffset() throws Exception {
OverrunReader reader = new OverrunReader(new StringReader("A,B\na,b\ne,f"), 1000, true, false);
DataParser parser = new DelimitedCharDataParser(getContext(), "id", reader, 8, 0, CSVFormat.DEFAULT,
CsvHeader.WITH_HEADER, -1, CsvRecordType.LIST, false, null);
Assert.assertEquals("8", parser.getOffset());
Record record = parser.parse();
Assert.assertNotNull(record);
Assert.assertEquals("id::8", record.getHeader().getSourceId());
Assert.assertEquals("e", record.get().getValueAsList().get(0).getValueAsMap().get("value").getValueAsString());
Assert.assertEquals("A", record.get().getValueAsList().get(0).getValueAsMap().get("header").getValueAsString());
Assert.assertEquals("f", record.get().getValueAsList().get(1).getValueAsMap().get("value").getValueAsString());
Assert.assertEquals("B", record.get().getValueAsList().get(1).getValueAsMap().get("header").getValueAsString());
Assert.assertEquals("11", parser.getOffset());
record = parser.parse();
Assert.assertNull(record);
Assert.assertEquals("-1", parser.getOffset());
parser.close();
}
@Test(expected = IOException.class)
public void testClose() throws Exception {
OverrunReader reader = new OverrunReader(new StringReader("A,B\na,b"), 1000, true, false);
DataParser parser = new DelimitedCharDataParser(getContext(), "id", reader, 0, 0, CSVFormat.DEFAULT,
CsvHeader.IGNORE_HEADER, -1, CsvRecordType.LIST, false, null);
parser.close();
parser.parse();
}
@Test
public void testParseNullConstant() throws Exception {
OverrunReader reader = new OverrunReader(new StringReader("A,null\nnull,B"), 1000, true, false);
DataParser parser = new DelimitedCharDataParser(getContext(), "id", reader, 0, 0, CSVFormat.DEFAULT,
CsvHeader.NO_HEADER, -1, CsvRecordType.LIST, true, "null");
Assert.assertEquals("0", parser.getOffset());
Record record = parser.parse();
Assert.assertNotNull(record);
Assert.assertEquals("A", record.get().getValueAsList().get(0).getValueAsMap().get("value").getValueAsString());
Assert.assertNull(record.get().getValueAsList().get(1).getValueAsMap().get("value").getValueAsString());
record = parser.parse();
Assert.assertNotNull(record);
Assert.assertNull(record.get().getValueAsList().get(0).getValueAsMap().get("value").getValueAsString());
Assert.assertEquals("B", record.get().getValueAsList().get(1).getValueAsMap().get("value").getValueAsString());
record = parser.parse();
Assert.assertNull(record);
parser.close();
}
@Test
public void testMoreColumnsThenInHeader() throws Exception {
OverrunReader reader = new OverrunReader(new StringReader("A,B\na,b,c"), 1000, true, false);
DataParser parser = new DelimitedCharDataParser(getContext(), "id", reader, 0, 0, CSVFormat.DEFAULT,
CsvHeader.WITH_HEADER, -1, CsvRecordType.LIST, false, null);
Assert.assertEquals("4", parser.getOffset());
try {
parser.parse();
Assert.fail("Expected exception while parsing!");
} catch(RecoverableDataParserException ex) {
Record r = ex.getUnparsedRecord();
Assert.assertNotNull(r);
Assert.assertTrue(r.has("/columns"));
Assert.assertTrue(r.has("/headers"));
List<Field> headers = r.get("/headers").getValueAsList();
Assert.assertNotNull(headers);
Assert.assertEquals(2, headers.size());
Assert.assertEquals("A", headers.get(0).getValueAsString());
Assert.assertEquals("B", headers.get(1).getValueAsString());
List<Field> columns = r.get("/columns").getValueAsList();
Assert.assertNotNull(columns);
Assert.assertEquals(3, columns.size());
Assert.assertEquals("a", columns.get(0).getValueAsString());
Assert.assertEquals("b", columns.get(1).getValueAsString());
Assert.assertEquals("c", columns.get(2).getValueAsString());
}
}
@Test
public void testClRfEndOfLines() throws Exception {
OverrunReader reader = new OverrunReader(new StringReader("A,B\r\na,b"), 1000, true, false);
DataParser parser = new DelimitedCharDataParser(getContext(), "id", reader, 0, 0, CSVFormat.DEFAULT,
CsvHeader.NO_HEADER, -1, CsvRecordType.LIST, false, null);
Assert.assertEquals("0", parser.getOffset());
Record record = parser.parse();
Assert.assertNotNull(record);
Assert.assertEquals("id::0", record.getHeader().getSourceId());
Assert.assertEquals("A", record.get().getValueAsList().get(0).getValueAsMap().get("value").getValueAsString());
Assert.assertFalse(record.has("[0]/header"));
Assert.assertEquals("B", record.get().getValueAsList().get(1).getValueAsMap().get("value").getValueAsString());
Assert.assertFalse(record.has("[1]/header"));
Assert.assertEquals("5", parser.getOffset());
record = parser.parse();
Assert.assertNotNull(record);
Assert.assertEquals("id::5", record.getHeader().getSourceId());
Assert.assertEquals("a", record.get().getValueAsList().get(0).getValueAsMap().get("value").getValueAsString());
Assert.assertFalse(record.has("[0]/header"));
Assert.assertEquals("b", record.get().getValueAsList().get(1).getValueAsMap().get("value").getValueAsString());
Assert.assertFalse(record.has("[1]/header"));
Assert.assertEquals("8", parser.getOffset());
record = parser.parse();
Assert.assertNull(record);
Assert.assertEquals("-1", parser.getOffset());
parser.close();
}
@Test
public void testClEndOfLines() throws Exception {
OverrunReader reader = new OverrunReader(new StringReader("A,B\ra,b"), 1000, true, false);
DataParser parser = new DelimitedCharDataParser(getContext(), "id", reader, 0, 0, CSVFormat.DEFAULT,
CsvHeader.NO_HEADER, -1, CsvRecordType.LIST, false, null);
Assert.assertEquals("0", parser.getOffset());
Record record = parser.parse();
Assert.assertNotNull(record);
Assert.assertEquals("id::0", record.getHeader().getSourceId());
Assert.assertEquals("A", record.get().getValueAsList().get(0).getValueAsMap().get("value").getValueAsString());
Assert.assertFalse(record.has("[0]/header"));
Assert.assertEquals("B", record.get().getValueAsList().get(1).getValueAsMap().get("value").getValueAsString());
Assert.assertFalse(record.has("[1]/header"));
Assert.assertEquals("4", parser.getOffset());
record = parser.parse();
Assert.assertNotNull(record);
Assert.assertEquals("id::4", record.getHeader().getSourceId());
Assert.assertEquals("a", record.get().getValueAsList().get(0).getValueAsMap().get("value").getValueAsString());
Assert.assertFalse(record.has("[0]/header"));
Assert.assertEquals("b", record.get().getValueAsList().get(1).getValueAsMap().get("value").getValueAsString());
Assert.assertFalse(record.has("[1]/header"));
Assert.assertEquals("7", parser.getOffset());
record = parser.parse();
Assert.assertNull(record);
Assert.assertEquals("-1", parser.getOffset());
parser.close();
}
}