/** * Copyright 2015 StreamSets Inc. * * Licensed under the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.streamsets.pipeline.lib.parser.json; import com.fasterxml.jackson.databind.ObjectMapper; import com.google.common.io.Resources; import com.streamsets.pipeline.api.OnRecordError; import com.streamsets.pipeline.api.Record; import com.streamsets.pipeline.api.Stage; import com.streamsets.pipeline.config.Compression; import com.streamsets.pipeline.config.JsonMode; import com.streamsets.pipeline.lib.parser.DataParser; import com.streamsets.pipeline.lib.parser.DataParserFactory; import com.streamsets.pipeline.lib.parser.DataParserFactoryBuilder; import com.streamsets.pipeline.lib.parser.DataParserFormat; import com.streamsets.pipeline.sdk.ContextInfoCreator; import com.streamsets.pipeline.sdk.DataCollectorServicesUtils; import org.junit.Assert; import org.junit.BeforeClass; import org.junit.Test; import java.util.Collections; import java.util.Map; public class TestJsonDataParserWithCompression { private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); private Stage.Context getContext() { return ContextInfoCreator.createSourceContext("i", false, OnRecordError.TO_ERROR, Collections.EMPTY_LIST); } @BeforeClass public static void setUpClass() { DataCollectorServicesUtils.loadDefaultServices(); } @Test public void testParseMultipleJson1() throws Exception { DataParserFactoryBuilder dataParserFactoryBuilder = new DataParserFactoryBuilder(getContext(), DataParserFormat.JSON); DataParserFactory factory = dataParserFactoryBuilder .setMaxDataLen(1000) .setMode(JsonMode.MULTIPLE_OBJECTS) .setCompression(Compression.COMPRESSED_ARCHIVE) .setFilePatternInArchive("*.txt") .build(); String offset = "0"; DataParser parser = factory.getParser("id", Resources.getResource("testArchive.tar.gz").openStream(), offset); Assert.assertEquals(0, Long.parseLong(parser.getOffset())); // testArchive.tar.gz contains 5 files file-0.txt - file-4.txt each with 3 Json string Record record; Map<String, Object> archiveInputOffset; for(int i = 0; i < 5; i++) { record = parser.parse(); Assert.assertEquals("Hello", record.get().getValueAsList().get(0).getValueAsString()); offset = parser.getOffset(); archiveInputOffset = OBJECT_MAPPER.readValue(offset, Map.class); Assert.assertNotNull(archiveInputOffset); Assert.assertEquals("file-" + i + ".txt", archiveInputOffset.get("fileName")); Assert.assertEquals("10", archiveInputOffset.get("fileOffset")); record = parser.parse(); Assert.assertEquals("Hi", record.get().getValueAsList().get(0).getValueAsString()); offset = parser.getOffset(); archiveInputOffset = OBJECT_MAPPER.readValue(offset, Map.class); Assert.assertNotNull(archiveInputOffset); Assert.assertEquals("file-" + i + ".txt", archiveInputOffset.get("fileName")); Assert.assertEquals("17", archiveInputOffset.get("fileOffset")); record = parser.parse(); Assert.assertEquals("Bye", record.get().getValueAsList().get(0).getValueAsString()); offset = parser.getOffset(); archiveInputOffset = OBJECT_MAPPER.readValue(offset, Map.class); Assert.assertNotNull(archiveInputOffset); Assert.assertEquals("file-" + i + ".txt", archiveInputOffset.get("fileName")); Assert.assertEquals("24", archiveInputOffset.get("fileOffset")); } // Done reading the archive. Next attempt to parse should return null and offset "-1" Assert.assertNull(parser.parse()); Assert.assertEquals("-1", parser.getOffset()); parser.close(); } @Test public void testParseMultipleJson2() throws Exception { //This test is different from the above. The parser is recreated before every read. This mimicks stop and restart DataParserFactoryBuilder dataParserFactoryBuilder = new DataParserFactoryBuilder(getContext(), DataParserFormat.JSON); DataParserFactory factory = dataParserFactoryBuilder .setMaxDataLen(1000) .setMode(JsonMode.MULTIPLE_OBJECTS) .setCompression(Compression.COMPRESSED_ARCHIVE) .setFilePatternInArchive("*.txt") .build(); String offset = "0"; DataParser parser = factory.getParser("id", Resources.getResource("testArchive.tar.gz").openStream(), offset); Assert.assertEquals(0, Long.parseLong(parser.getOffset())); // testArchive.tar.gz contains 5 files file-0.txt - file-4.txt each with 3 Json string Record record; Map<String, Object> archiveInputOffset; for(int i = 0; i < 5; i++) { parser = factory.getParser("id",Resources.getResource("testArchive.tar.gz").openStream(), offset); record = parser.parse(); Assert.assertEquals("Hello", record.get().getValueAsList().get(0).getValueAsString()); offset = parser.getOffset(); archiveInputOffset = OBJECT_MAPPER.readValue(offset, Map.class); Assert.assertNotNull(archiveInputOffset); Assert.assertEquals("file-" + i + ".txt", archiveInputOffset.get("fileName")); Assert.assertEquals("10", archiveInputOffset.get("fileOffset")); parser = factory.getParser("id", Resources.getResource("testArchive.tar.gz").openStream(), offset); record = parser.parse(); Assert.assertEquals("Hi", record.get().getValueAsList().get(0).getValueAsString()); offset = parser.getOffset(); archiveInputOffset = OBJECT_MAPPER.readValue(offset, Map.class); Assert.assertNotNull(archiveInputOffset); Assert.assertEquals("file-" + i + ".txt", archiveInputOffset.get("fileName")); Assert.assertEquals("17", archiveInputOffset.get("fileOffset")); parser = factory.getParser("id", Resources.getResource("testArchive.tar.gz").openStream(), offset); record = parser.parse(); Assert.assertEquals("Bye", record.get().getValueAsList().get(0).getValueAsString()); offset = parser.getOffset(); archiveInputOffset = OBJECT_MAPPER.readValue(offset, Map.class); Assert.assertNotNull(archiveInputOffset); Assert.assertEquals("file-" + i + ".txt", archiveInputOffset.get("fileName")); Assert.assertEquals("24", archiveInputOffset.get("fileOffset")); } parser = factory.getParser("id", Resources.getResource("testArchive.tar.gz").openStream(), offset); Assert.assertNull(parser.parse()); Assert.assertEquals("-1", parser.getOffset()); parser.close(); } }