/**
* Copyright 2015 StreamSets Inc.
*
* Licensed under the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.streamsets.pipeline.lib.parser;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.streamsets.pipeline.api.Record;
import com.streamsets.pipeline.config.Compression;
import com.streamsets.pipeline.sdk.DataCollectorServicesUtils;
import com.streamsets.pipeline.sdk.RecordCreator;
import org.apache.commons.compress.archivers.ArchiveEntry;
import org.apache.commons.compress.archivers.ArchiveOutputStream;
import org.apache.commons.compress.archivers.ArchiveStreamFactory;
import org.apache.commons.compress.compressors.CompressorOutputStream;
import org.apache.commons.compress.compressors.CompressorStreamFactory;
import org.apache.commons.io.IOUtils;
import org.junit.Assert;
import org.junit.BeforeClass;
import org.junit.Test;
import java.io.BufferedReader;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStream;
import java.io.SequenceInputStream;
import java.util.Map;
import java.util.UUID;
public class TestCompressionInputBuilder {
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
@BeforeClass
public static void setUpClass() {
DataCollectorServicesUtils.loadDefaultServices();
}
@Test
public void testCompressionInput() throws Exception {
testCompressedFile("gz");
testCompressedFile("bzip2");
testCompressedFile("xz");
testCompressedFile("DEFLATE");
testConcatenatedCompressedFile("gz");
testConcatenatedCompressedFile("bzip2");
testConcatenatedCompressedFile("xz");
}
@Test
public void testArchiveInput() throws Exception {
testArchive(ArchiveStreamFactory.TAR);
}
private void testCompressedFile(String compressionType) throws Exception {
//write data into the stream using the specified compression
ByteArrayOutputStream bOut = new ByteArrayOutputStream();
CompressorOutputStream cOut = new CompressorStreamFactory().createCompressorOutputStream(compressionType, bOut);
cOut.write("StreamSets".getBytes());
cOut.close();
//create compression input
CompressionDataParser.CompressionInputBuilder compressionInputBuilder =
new CompressionDataParser.CompressionInputBuilder(Compression.COMPRESSED_FILE, null,
new ByteArrayInputStream(bOut.toByteArray()), "0");
CompressionDataParser.CompressionInput input = compressionInputBuilder.build();
//verify
Assert.assertNotNull(input);
Assert.assertEquals("myFile::4567", input.wrapOffset("myFile::4567"));
Assert.assertEquals("myFile::4567", input.wrapRecordId("myFile::4567"));
InputStream myFile = input.getNextInputStream();
BufferedReader reader = new BufferedReader(new InputStreamReader(myFile));
Assert.assertEquals("StreamSets", reader.readLine());
}
private void testConcatenatedCompressedFile(String compressionType) throws Exception {
ByteArrayOutputStream bytes1 = new ByteArrayOutputStream();
ByteArrayOutputStream bytes2 = new ByteArrayOutputStream();
CompressorOutputStream compressed1 = new CompressorStreamFactory()
.createCompressorOutputStream(compressionType, bytes1);
CompressorOutputStream compressed2 = new CompressorStreamFactory()
.createCompressorOutputStream(compressionType, bytes2);
compressed1.write("line1\n".getBytes());
compressed1.close();
compressed2.write("line2".getBytes());
compressed2.close();
CompressionDataParser.CompressionInputBuilder compressionInputBuilder =
new CompressionDataParser.CompressionInputBuilder(
Compression.COMPRESSED_FILE,
null,
new SequenceInputStream(
new ByteArrayInputStream(bytes1.toByteArray()),
new ByteArrayInputStream(bytes2.toByteArray())
),
"0"
);
CompressionDataParser.CompressionInput input = compressionInputBuilder.build();
//verify
Assert.assertNotNull(input);
Assert.assertEquals("myFile::4567", input.wrapOffset("myFile::4567"));
Assert.assertEquals("myFile::4567", input.wrapRecordId("myFile::4567"));
InputStream myFile = input.getNextInputStream();
BufferedReader reader = new BufferedReader(new InputStreamReader(myFile));
Assert.assertEquals("line1", reader.readLine());
Assert.assertEquals("line2", reader.readLine());
}
@SuppressWarnings("unchecked")
private void testArchive(String archiveType) throws Exception {
//create an archive with multiple files, files containing multiple objects
File dir = new File("target", UUID.randomUUID().toString());
dir.mkdirs();
OutputStream archiveOut = new FileOutputStream(new File(dir, "myArchive"));
ArchiveOutputStream archiveOutputStream = new ArchiveStreamFactory().createArchiveOutputStream(archiveType, archiveOut);
File inputFile;
ArchiveEntry archiveEntry;
FileOutputStream fileOutputStream;
for(int i = 0; i < 5; i++) {
String fileName = "file-" + i + ".txt";
inputFile = new File(dir, fileName);
fileOutputStream = new FileOutputStream(inputFile);
IOUtils.write(("StreamSets" + i).getBytes(), fileOutputStream);
fileOutputStream.close();
archiveEntry = archiveOutputStream.createArchiveEntry(inputFile, fileName);
archiveOutputStream.putArchiveEntry(archiveEntry);
IOUtils.copy(new FileInputStream(inputFile), archiveOutputStream);
archiveOutputStream.closeArchiveEntry();
}
archiveOutputStream.finish();
archiveOut.close();
//create compression input
FileInputStream fileInputStream = new FileInputStream(new File(dir, "myArchive"));
CompressionDataParser.CompressionInputBuilder compressionInputBuilder =
new CompressionDataParser.CompressionInputBuilder(Compression.ARCHIVE, "*.txt", fileInputStream, "0");
CompressionDataParser.CompressionInput input = compressionInputBuilder.build();
// before reading
Assert.assertNotNull(input);
// The default wrapped offset before reading any file
String wrappedOffset = "{\"fileName\": \"myfile\", \"fileOffset\":\"0\"}";
Map<String, Object> archiveInputOffset = OBJECT_MAPPER.readValue(wrappedOffset, Map.class);
Assert.assertNotNull(archiveInputOffset);
Assert.assertEquals("myfile", archiveInputOffset.get("fileName"));
Assert.assertEquals("0", archiveInputOffset.get("fileOffset"));
Assert.assertEquals("0", input.getStreamPosition(wrappedOffset));
// read and check wrapped offset
BufferedReader reader = new BufferedReader(
new InputStreamReader(input.getNextInputStream()));
Assert.assertEquals("StreamSets0", reader.readLine());
wrappedOffset = input.wrapOffset("4567");
archiveInputOffset = OBJECT_MAPPER.readValue(wrappedOffset, Map.class);
Assert.assertNotNull(archiveInputOffset);
Assert.assertEquals("file-0.txt", archiveInputOffset.get("fileName"));
Assert.assertEquals("4567", archiveInputOffset.get("fileOffset"));
checkHeader(input, "file-0.txt", input.getStreamPosition(wrappedOffset));
String recordIdPattern = "myFile/file-0.txt";
Assert.assertEquals(recordIdPattern, input.wrapRecordId("myFile"));
}
void checkHeader(CompressionDataParser.CompressionInput input, String fileName, String offset) throws Exception {
Record record = RecordCreator.create();
Record.Header header = record.getHeader();
input.wrapRecordHeaders(header, offset);
Assert.assertNotNull(
record.getHeader().getAttribute(CompressionDataParser.CompressionInputBuilder.ArchiveInput.FILE_PATH_INSIDE_ARCHIVE)
);
Assert.assertNotNull(
record.getHeader().getAttribute(CompressionDataParser.CompressionInputBuilder.ArchiveInput.FILE_NAME_INSIDE_ARCHIVE)
);
Assert.assertEquals(
record.getHeader().getAttribute(CompressionDataParser.CompressionInputBuilder.ArchiveInput.FILE_NAME_INSIDE_ARCHIVE),
fileName
);
Assert.assertNotNull(
record.getHeader().getAttribute(CompressionDataParser.CompressionInputBuilder.ArchiveInput.FILE_OFFSET_INSIDER_ARCHIVE)
);
Assert.assertEquals(
record.getHeader().getAttribute(CompressionDataParser.CompressionInputBuilder.ArchiveInput.FILE_OFFSET_INSIDER_ARCHIVE),
offset
);
}
}