/**
* Copyright 2015 StreamSets Inc.
*
* Licensed under the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.streamsets.pipeline.lib.io;
import com.google.common.collect.ImmutableList;
import com.streamsets.pipeline.lib.parser.shaded.com.google.code.regexp.Pattern;
import org.junit.Assert;
import org.junit.Test;
import org.mockito.Mockito;
import java.io.IOException;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import java.util.ArrayDeque;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Queue;
public class TestMultiLineLiveFileReader {
private static LiveFileChunk toChunk(String ... lines) {
return toChunk(false, lines);
}
private static List<String> toLines(List<LiveFileChunk> chunks) {
List<String> lines = new ArrayList<>();
for (LiveFileChunk chunk : chunks) {
if (chunk != null) {
for (FileLine line : chunk.getLines()) {
lines.add(line.getText());
}
}
}
return lines;
}
private static LiveFileChunk toChunk(boolean truncated, String ... lines) {
byte[][] lineBytes = new byte[lines.length][];
int totalSize = 0;
for (int i = 0; i < lines.length; i++) {
lineBytes[i] = lines[i].getBytes();
totalSize += lineBytes[i].length;
}
byte[] buffer = new byte[totalSize];
int pos = 0;
for (int i = 0; i < lineBytes.length; i++) {
System.arraycopy(lineBytes[i], 0, buffer, pos, lineBytes[i].length);
pos += lineBytes[i].length;
}
return new LiveFileChunk("tag", Mockito.mock(LiveFile.class), StandardCharsets.UTF_8, buffer, 0, pos, truncated);
}
private static class MockLineLiveFileReader implements LiveFileReader {
private final Queue<LiveFileChunk> chunks;
private final boolean returnNulls;
private int nextCount;
private long offset;
public MockLineLiveFileReader(boolean returnNulls, LiveFileChunk ... chunks) {
this.chunks = new ArrayDeque<>(ImmutableList.copyOf(chunks));
this.returnNulls = returnNulls;
}
@Override
public LiveFile getLiveFile() {
return Mockito.mock(LiveFile.class);
}
@Override
public Charset getCharset() {
return StandardCharsets.UTF_8;
}
@Override
public long getOffset() {
return offset;
}
@Override
public boolean hasNext() throws IOException {
return !chunks.isEmpty();
}
@Override
public LiveFileChunk next(long waitMillis) throws IOException {
LiveFileChunk chunk = null;
if (!returnNulls || nextCount % 2 != 0) {
chunk =chunks.poll();
offset += chunk.getLength();
}
nextCount++;
return chunk;
}
@Override
public void close() throws IOException {
}
}
@Test
public void testMockChunks() {
LiveFileChunk chunk = toChunk("Hello\n");
Assert.assertEquals(1, chunk.getLines().size());
Assert.assertEquals("Hello\n", chunk.getLines().get(0).getText());
chunk = toChunk("Hello\n", "Bye\n");
Assert.assertEquals(2, chunk.getLines().size());
Assert.assertEquals("Hello\n", chunk.getLines().get(0).getText());
Assert.assertEquals(0, chunk.getLines().get(0).getOffset());
Assert.assertEquals(0, chunk.getLines().get(0).getFileOffset());
Assert.assertEquals("Hello\n".length(), chunk.getLines().get(0).getLength());
Assert.assertEquals("Bye\n", chunk.getLines().get(1).getText());
Assert.assertEquals("Hello\n".length(), chunk.getLines().get(1).getOffset());
Assert.assertEquals("Hello\n".length(), chunk.getLines().get(1).getFileOffset());
Assert.assertEquals("Bye\n".length(), chunk.getLines().get(1).getLength());
}
@Test
public void testChunksToLines() {
List<LiveFileChunk> chunks = Arrays.asList(toChunk("Hello\n", "Hola\n"), null, toChunk("Bye\n"));
Assert.assertEquals(ImmutableList.of("Hello\n", "Hola\n", "Bye\n"), toLines(chunks));
}
@Test
public void testMockLineLiveFileReader() throws Exception {
Assert.assertFalse(new MockLineLiveFileReader(false).hasNext());
Assert.assertEquals(0, new MockLineLiveFileReader(false).getOffset());
LiveFileChunk chunk1 = toChunk("Hello\n");
LiveFileReader reader = new MockLineLiveFileReader(false, chunk1);
Assert.assertTrue(reader.hasNext());
Assert.assertEquals(0, reader.getOffset());
Assert.assertEquals(chunk1, reader.next(0));
Assert.assertEquals(chunk1.getLength(), reader.getOffset());
Assert.assertFalse(reader.hasNext());
reader.close();
LiveFileChunk chunk2= toChunk("Bye\n");
reader = new MockLineLiveFileReader(true, chunk1, chunk2);
Assert.assertEquals(0, reader.getOffset());
Assert.assertTrue(reader.hasNext());
Assert.assertEquals(null, reader.next(0));
Assert.assertEquals(0, reader.getOffset());
Assert.assertTrue(reader.hasNext());
Assert.assertEquals(chunk1, reader.next(0));
Assert.assertEquals(chunk1.getLength(), reader.getOffset());
Assert.assertTrue(reader.hasNext());
Assert.assertEquals(null, reader.next(0));
Assert.assertEquals(chunk1.getLength(), reader.getOffset());
Assert.assertTrue(reader.hasNext());
Assert.assertEquals(chunk2, reader.next(0));
Assert.assertEquals(chunk1.getLength() + chunk2.getLength(), reader.getOffset());
Assert.assertFalse(reader.hasNext());
Assert.assertEquals(chunk1.getLength() + chunk2.getLength(), reader.getOffset());
// next() after EOF returns null
Assert.assertNull(reader.next(0));
reader.close();
}
private void testAllSingleLines(boolean returnNulls) throws Exception {
LiveFileChunk chunk1 = toChunk("Hello\n");
LiveFileChunk chunk2= toChunk("Bye\n");
List<LiveFileChunk> expected = ImmutableList.of(chunk1, chunk2);
List<LiveFileChunk> got = new ArrayList<>();
LiveFileReader reader = new MockLineLiveFileReader(returnNulls, chunk1, chunk2);
reader = new MultiLineLiveFileReader("t", reader, Pattern.compile(".*"));
while (reader.hasNext()) {
LiveFileChunk chunk = reader.next(0);
got.add(chunk);
}
reader.close();
Assert.assertEquals(toLines(expected), toLines(got));
}
@Test
public void testAllSingleLinesNotNulls() throws Exception {
testAllSingleLines(false);
}
@Test
public void testAllSingleLinesNulls() throws Exception {
testAllSingleLines(true);
}
private void testMultiLineWithinChunks(boolean returnNulls) throws Exception {
LiveFileChunk chunk1 = toChunk("A0\n", "A1\n", "B1\n", "A2\n");
LiveFileChunk chunk2= toChunk("A3\n", "B3\n");
List<LiveFileChunk> got = new ArrayList<>();
LiveFileReader reader = new MockLineLiveFileReader(returnNulls, chunk1, chunk2);
reader = new MultiLineLiveFileReader("t", reader, Pattern.compile("A.*"));
while (reader.hasNext()) {
LiveFileChunk chunk = reader.next(0);
got.add(chunk);
}
reader.close();
Assert.assertEquals(ImmutableList.of("A0\n", "A1\nB1\n", "A2\n", "A3\nB3\n"), toLines(got));
}
@Test
public void testMultiLineWithinChunksNotNulls() throws Exception {
testMultiLineWithinChunks(false);
}
@Test
public void testMultiLineWithinChunksNulls() throws Exception {
testMultiLineWithinChunks(true);
}
private void testMultiLineLastChunk(boolean returnNulls) throws Exception {
LiveFileChunk chunk1 = toChunk("A0\n", "A1\n", "B1\n", "A2\n", "B2\n");
List<LiveFileChunk> got = new ArrayList<>();
LiveFileReader reader = new MockLineLiveFileReader(returnNulls, chunk1);
reader = new MultiLineLiveFileReader("t", reader, Pattern.compile("A.*"));
while (reader.hasNext()) {
LiveFileChunk chunk = reader.next(0);
got.add(chunk);
}
reader.close();
Assert.assertEquals(ImmutableList.of("A0\n", "A1\nB1\n", "A2\nB2\n"), toLines(got));
}
@Test
public void testMultiLineLastChunkNotNulls() throws Exception {
testMultiLineLastChunk(false);
}
@Test
public void testMultiLineLastChunkNulls() throws Exception {
testMultiLineLastChunk(true);
}
private void testMultiLineAcrossChunks(boolean returnNulls) throws Exception {
LiveFileChunk chunk1 = toChunk("A0\n", "A1\n", "B1\n", "A2\n");
LiveFileChunk chunk2= toChunk("B2\n", "A3\n", "B3\n");
LiveFileChunk chunk3= toChunk("A4\n", "A5\n");
List<LiveFileChunk> got = new ArrayList<>();
LiveFileReader reader = new MockLineLiveFileReader(returnNulls, chunk1, chunk2, chunk3);
reader = new MultiLineLiveFileReader("t", reader, Pattern.compile("A.*"));
while (reader.hasNext()) {
LiveFileChunk chunk = reader.next(0);
got.add(chunk);
}
reader.close();
Assert.assertEquals(ImmutableList.of("A0\n", "A1\nB1\n", "A2\nB2\n", "A3\nB3\n", "A4\n", "A5\n"), toLines(got));
}
@Test
public void testMultiLineAcrossChunksNotNulls() throws Exception {
testMultiLineAcrossChunks(false);
}
@Test
public void testMultiLineAcrossChunksNulls() throws Exception {
testMultiLineAcrossChunks(true);
}
@Test
public void testOffset() throws Exception {
LiveFileChunk chunk1 = toChunk("A0\n", "A1\n", "B1\n", "A2\n");
LiveFileChunk chunk2= toChunk("B2\n", "A3\n", "B3\n");
LiveFileChunk chunk3= toChunk("A4\n", "A5\n");
List<LiveFileChunk> got = new ArrayList<>();
LiveFileReader reader = new MockLineLiveFileReader(false, chunk1, chunk2, chunk3);
reader = new MultiLineLiveFileReader("t", reader, Pattern.compile("A.*"));
Assert.assertEquals(0, reader.getOffset());
Assert.assertTrue(reader.hasNext());
LiveFileChunk chunk = reader.next(0); //A0 A1 B1
Assert.assertEquals(3 * 3, reader.getOffset());
Assert.assertTrue(reader.hasNext());
chunk = reader.next(0); //A2 B2
Assert.assertEquals(3 * 3 + 2 * 3, reader.getOffset());
Assert.assertTrue(reader.hasNext());
chunk = reader.next(0); //A3 B3 A4
Assert.assertEquals(3 * 3 + 2 * 3 + 3 * 3, reader.getOffset());
Assert.assertTrue(reader.hasNext());
chunk = reader.next(0); //A5
Assert.assertEquals(3 * 3 + 2 * 3 + 3 * 3 + 1 * 3, reader.getOffset());
Assert.assertFalse(reader.hasNext());
Assert.assertEquals(3 * 3 + 2 * 3 + 3 * 3 + 1 * 3, reader.getOffset());
reader.close();
}
}