/**
* Copyright 2015 StreamSets Inc.
*
* Licensed under the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.streamsets.pipeline.lib.io;
import com.streamsets.pipeline.config.FileRollMode;
import com.streamsets.pipeline.config.PostProcessingOptions;
import com.streamsets.pipeline.sdk.DataCollectorServicesUtils;
import org.junit.Assert;
import org.junit.Before;
import org.junit.BeforeClass;
import org.junit.Test;
import java.io.File;
import java.io.IOException;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.nio.file.StandardOpenOption;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Map;
import java.util.Random;
import java.util.Set;
import java.util.UUID;
public class TestMultiFileReader {
private static final Charset UTF8 = StandardCharsets.UTF_8;
private File testDir1;
private File testDir2;
@BeforeClass
public static void setUpClass() {
DataCollectorServicesUtils.loadDefaultServices();
}
@Before
public void setUp() {
testDir1 = new File("target", UUID.randomUUID().toString()).getAbsoluteFile();
Assert.assertTrue(testDir1.mkdirs());
testDir2 = new File("target", UUID.randomUUID().toString()).getAbsoluteFile();
Assert.assertTrue(testDir2.mkdirs());
}
@Test
public void testEmptyDirectory() throws IOException {
File file = new File(testDir1, "file.txt");
MultiFileInfo di =
new MultiFileInfo(null, file.getPath(), FileRollMode.REVERSE_COUNTER, "", "", "");
MultiFileReader mdr = new MultiFileReader(Arrays.asList(di),
UTF8,
1024,
PostProcessingOptions.NONE,
null,
false,
0,
false,
false
);
mdr.setOffsets(new HashMap<String, String>());
long start = System.currentTimeMillis();
Assert.assertNull(mdr.next(20));
Assert.assertTrue(System.currentTimeMillis() - start >= 20);
Assert.assertEquals(1, mdr.getOffsets().size());
Assert.assertNotNull("", mdr.getOffsets().get(di.getFileKey()));
mdr.close();
}
@Test
public void testWithOneDirectory() throws IOException {
File file = new File(testDir1, "file.txt");
Files.write(file.toPath(), Arrays.asList("Hello"), UTF8);
MultiFileInfo di =
new MultiFileInfo("tag", file.getPath(), FileRollMode.REVERSE_COUNTER, "", "", "");
MultiFileReader mdr = new MultiFileReader(Arrays.asList(di),
UTF8,
1024,
PostProcessingOptions.NONE,
null,
false,
0,
false,
false
);
mdr.setOffsets(new HashMap<String, String>());
long start = System.currentTimeMillis();
LiveFileChunk chunk = mdr.next(1000);
Assert.assertTrue(System.currentTimeMillis() - start < 1000);
Assert.assertNotNull(chunk);
Assert.assertEquals("tag", chunk.getTag());
Assert.assertEquals("Hello\n", chunk.getLines().get(0).getText());
Assert.assertEquals(1, mdr.getOffsets().size());
Assert.assertTrue(mdr.getOffsets().get(di.getFileKey()).startsWith("6"));
Assert.assertTrue(mdr.getOffsets().get(di.getFileKey()).contains("file.txt"));
Assert.assertNull(mdr.next(0));
Files.write(new File(testDir1, "file.txt").toPath(), Arrays.asList("Bye"), UTF8, StandardOpenOption.APPEND);
mdr.setOffsets(mdr.getOffsets());
chunk = mdr.next(0);
Assert.assertNotNull(chunk);
Assert.assertEquals("Bye\n", chunk.getLines().get(0).getText());
Assert.assertEquals(1, mdr.getOffsets().size());
Assert.assertTrue(mdr.getOffsets().get(di.getFileKey()).startsWith("10"));
Assert.assertTrue(mdr.getOffsets().get(di.getFileKey()).contains("file.txt"));
Assert.assertNull(mdr.next(0));
mdr.close();
}
@Test(expected = IOException.class)
public void testWithMultipleFilesInSameDirectoryWithSameName() throws Exception {
File file1 = new File(testDir1, "f1.txt");
Files.write(file1.toPath(), Arrays.asList("f1.0"), UTF8);
MultiFileInfo di1 =
new MultiFileInfo(null, file1.getPath(), FileRollMode.REVERSE_COUNTER, "", "", "");
MultiFileInfo di2 =
new MultiFileInfo(null, file1.getPath(), FileRollMode.REVERSE_COUNTER, "", "", "");
new MultiFileReader(Arrays.asList(di1, di2), UTF8, 1024, PostProcessingOptions.NONE, null, false, 0, false, false);
}
@Test
public void testWithMultipleDirectories() throws Exception {
File file1 = new File(testDir1, "f1.txt");
File file2 = new File(testDir1, "f2.txt");
Files.write(file1.toPath(), Arrays.asList("f1.0"), UTF8);
Files.write(file2.toPath(), Arrays.asList("f2.00"), UTF8);
MultiFileInfo di1 =
new MultiFileInfo("tag1", file1.getPath(), FileRollMode.REVERSE_COUNTER, "", "", "");
MultiFileInfo di2 =
new MultiFileInfo("tag2", file2.getPath(), FileRollMode.REVERSE_COUNTER, "", "", "");
MultiFileReader mdr = new MultiFileReader(Arrays.asList(di1, di2),
UTF8,
1024,
PostProcessingOptions.NONE,
null,
false,
0,
false,
false
);
// just open the multidir, no file events
Assert.assertTrue(mdr.getEvents().isEmpty());
// reads first dir
mdr.setOffsets(new HashMap<String, String>());
// after setOffset there should be no file events
Assert.assertTrue(mdr.getEvents().isEmpty());
LiveFileChunk chunk = mdr.next(0);
Assert.assertNotNull(chunk);
Assert.assertEquals("tag1", chunk.getTag());
Assert.assertEquals("f1.0\n", chunk.getLines().get(0).getText());
Assert.assertEquals(2, mdr.getOffsets().size());
Assert.assertTrue(mdr.getOffsets().get(di1.getFileKey()).startsWith("5"));
Assert.assertTrue(mdr.getOffsets().get(di1.getFileKey()).contains("f1.txt"));
Assert.assertTrue(mdr.getOffsets().get(di2.getFileKey()).isEmpty());
//after first read we should get 1st file start event
Assert.assertEquals(1, mdr.getEvents().size());
LiveFile lf1 = new LiveFile(file1.toPath());
Assert.assertEquals(new FileEvent(lf1, FileEvent.Action.START), mdr.getEvents().get(0));
Files.write(file1.toPath(), Arrays.asList("f1.01"), UTF8, StandardOpenOption.APPEND);
// reads second dir even if first dir has new data (round robin to avoid starvation)
mdr.setOffsets(mdr.getOffsets());
// after setOffset there should be no file events
Assert.assertTrue(mdr.getEvents().isEmpty());
chunk = mdr.next(0);
//after first read we should get 2nd file start event
Assert.assertEquals(1, mdr.getEvents().size());
LiveFile lf2 = new LiveFile(file2.toPath());
Assert.assertEquals(new FileEvent(lf2, FileEvent.Action.START), mdr.getEvents().get(0));
Assert.assertNotNull(chunk);
Assert.assertEquals("tag2", chunk.getTag());
Assert.assertEquals("f2.00\n", chunk.getLines().get(0).getText());
Assert.assertEquals(2, mdr.getOffsets().size());
Assert.assertTrue(mdr.getOffsets().get(di1.getFileKey()).startsWith("5"));
Assert.assertTrue(mdr.getOffsets().get(di1.getFileKey()).contains("f1.txt"));
Assert.assertTrue(mdr.getOffsets().get(di2.getFileKey()).startsWith("6"));
Assert.assertTrue(mdr.getOffsets().get(di2.getFileKey()).contains("f2.txt"));
// reads first dir cause has data
mdr.setOffsets(mdr.getOffsets());
chunk = mdr.next(0);
Assert.assertNotNull(chunk);
Assert.assertEquals("tag1", chunk.getTag());
Assert.assertEquals("f1.01\n", chunk.getLines().get(0).getText());
// no data in any dir
mdr.setOffsets(mdr.getOffsets());
chunk = mdr.next(0);
// no file events, we keep reading the same files
Assert.assertTrue(mdr.getEvents().isEmpty());
Assert.assertNull(chunk);
Files.write(file2.toPath(), Arrays.asList("f2.01"), UTF8, StandardOpenOption.APPEND);
// reads any dir with data
mdr.setOffsets(mdr.getOffsets());
chunk = mdr.next(0);
// no file events, we keep reading the same files
Assert.assertTrue(mdr.getEvents().isEmpty());
Assert.assertNotNull(chunk);
Assert.assertEquals("tag2", chunk.getTag());
Assert.assertEquals("f2.01\n", chunk.getLines().get(0).getText());
Assert.assertEquals(2, mdr.getOffsets().size());
Files.write(file2.toPath(), Arrays.asList("f2.02"), UTF8, StandardOpenOption.APPEND);
Files.move(file2.toPath(), Paths.get(file2 + ".1"));
//lets sleep a bit more than the refresh interval in order to detect the rename
Thread.sleep(SingleLineLiveFileReader.REFRESH_INTERVAL + 1);
// reads rolled file from second dir
mdr.setOffsets(mdr.getOffsets());
chunk = mdr.next(0);
//after first read we should get 1 file end event for the original lf2
LiveFile oldLf2 = lf2.refresh(); //old because it is renamed
Assert.assertEquals(1, mdr.getEvents().size());
Assert.assertEquals(new FileEvent(oldLf2, FileEvent.Action.END), mdr.getEvents().get(0));
Assert.assertNotNull(chunk);
Assert.assertEquals("tag2", chunk.getTag());
Assert.assertEquals("f2.02\n", chunk.getLines().get(0).getText());
Assert.assertEquals(2, mdr.getOffsets().size());
Files.write(file2.toPath(), Arrays.asList("f2.03"), UTF8, StandardOpenOption.CREATE);
// reads live file from second dir
mdr.setOffsets(mdr.getOffsets());
chunk = mdr.next(0);
//after first read we should get 1 file start event for the new lf2
Assert.assertEquals(1, mdr.getEvents().size());
Assert.assertEquals(new FileEvent(new LiveFile(file2.toPath()), FileEvent.Action.START), mdr.getEvents().get(0));
long start = System.currentTimeMillis();
while (chunk == null && System.currentTimeMillis() - start < 10000) {
// we need to do sleep for a bit to ensure data is flushed to the FS
Thread.sleep(100);
// reads live file from second dir
mdr.setOffsets(mdr.getOffsets());
chunk = mdr.next(0);
}
Assert.assertNotNull(chunk);
Assert.assertEquals("tag2", chunk.getTag());
Assert.assertFalse(chunk.getLines().isEmpty());
Assert.assertEquals("f2.03\n", chunk.getLines().get(0).getText());
Assert.assertEquals(2, mdr.getOffsets().size());
Assert.assertNull(mdr.next(0));
mdr.close();
}
// log roll mode has a different live file strategy, so verifying things work there too
@Test
public void testPostProcessingDeleteLogRollMode() throws Exception {
File file = new File(testDir1, "f1.txt");
File file1 = new File(testDir1, "f1.txt.1");
Files.write(file1.toPath(), Arrays.asList("f1"), UTF8);
MultiFileInfo di1 =
new MultiFileInfo(null, file.getPath(), FileRollMode.REVERSE_COUNTER, "", "", "");
MultiFileReader mdr = new MultiFileReader(Arrays.asList(di1),
UTF8,
1024,
PostProcessingOptions.DELETE,
null,
false,
0,
false,
false
);
Assert.assertTrue(file1.exists());
mdr.setOffsets(new HashMap<String, String>());
//read file content
Assert.assertNotNull(mdr.next(0));
//reach eof
Assert.assertNull(mdr.next(0));
Assert.assertFalse(file1.exists());
mdr.close();
}
@Test
public void testPostProcessingDelete() throws Exception {
File file1 = new File(testDir1, "f1.txt");
File file2 = new File(testDir1, "f2.txt");
Files.write(file1.toPath(), Arrays.asList("f1"), UTF8);
MultiFileInfo di1 =
new MultiFileInfo(null, new File(testDir1, "f${PATTERN}.txt").getPath(),
FileRollMode.PATTERN, ".", "", "");
MultiFileReader mdr = new MultiFileReader(Arrays.asList(di1),
UTF8,
1024,
PostProcessingOptions.DELETE,
null,
false,
0,
false,
false
);
Assert.assertTrue(file1.exists());
mdr.setOffsets(new HashMap<String, String>());
//read file content
Assert.assertNotNull(mdr.next(0));
//triggers a periodic 'roll'
Files.createFile(file2.toPath());
//sleeps to trigger a livefile refresh
Thread.sleep(SingleLineLiveFileReader.REFRESH_INTERVAL * 2 + 1);
//reach eof
Assert.assertNull(mdr.next(0));
Assert.assertFalse(file1.exists());
mdr.close();
}
@Test
public void testPostProcessingArchive() throws Exception {
File file1 = new File(testDir1, "f1.txt");
File file2 = new File(testDir1, "f2.txt");
Files.write(file1.toPath(), Arrays.asList("f1"), UTF8);
MultiFileInfo di1 =
new MultiFileInfo(null, new File(testDir1, "f${PATTERN}.txt").getPath(),
FileRollMode.PATTERN, ".", "", "");
MultiFileReader mdr = new MultiFileReader(Arrays.asList(di1),
UTF8,
1024,
PostProcessingOptions.ARCHIVE,
testDir2.getAbsolutePath(),
false,
0,
false,
false
);
Assert.assertTrue(file1.exists());
mdr.setOffsets(new HashMap<String, String>());
//read file content
Assert.assertNotNull(mdr.next(0));
//triggers a periodic 'roll'
Files.createFile(file2.toPath());
//sleeps to trigger a livefile refresh
Thread.sleep(SingleLineLiveFileReader.REFRESH_INTERVAL + 1);
//reach eof
Assert.assertNull(mdr.next(0));
Assert.assertFalse(file1.exists());
Path f1Archived = Paths.get(testDir2.getAbsolutePath(), file1.getPath());
Assert.assertTrue(Files.exists(f1Archived));
mdr.close();
}
@Test
public void testPostProcessingDeleteInPreviewMode() throws Exception {
File file1 = new File(testDir1, "f1.txt");
File file2 = new File(testDir1, "f2.txt");
Files.write(file1.toPath(), Arrays.asList("f1"), UTF8);
MultiFileInfo di1 =
new MultiFileInfo(null, new File(testDir1, "f${PATTERN}.txt").getPath(),
FileRollMode.PATTERN, ".", "", "");
MultiFileReader mdr = new MultiFileReader(Arrays.asList(di1),
UTF8,
1024,
PostProcessingOptions.DELETE,
null,
false,
0,
false,
true
);
Assert.assertTrue(file1.exists());
mdr.setOffsets(new HashMap<String, String>());
//read file content
Assert.assertNotNull(mdr.next(0));
//triggers a periodic 'roll'
Files.createFile(file2.toPath());
//sleeps to trigger a livefile refresh
Thread.sleep(SingleLineLiveFileReader.REFRESH_INTERVAL * 2 + 1);
//reach eof
Assert.assertNull(mdr.next(0));
Assert.assertTrue(file1.exists());
mdr.close();
}
@Test
public void testPostProcessingArchiveInPreviewMode() throws Exception {
File file1 = new File(testDir1, "f1.txt");
File file2 = new File(testDir1, "f2.txt");
Files.write(file1.toPath(), Arrays.asList("f1"), UTF8);
MultiFileInfo di1 =
new MultiFileInfo(null, new File(testDir1, "f${PATTERN}.txt").getPath(),
FileRollMode.PATTERN, ".", "", "");
MultiFileReader mdr = new MultiFileReader(Arrays.asList(di1),
UTF8,
1024,
PostProcessingOptions.ARCHIVE,
testDir2.getAbsolutePath(),
false,
0,
false,
true
);
Assert.assertTrue(file1.exists());
mdr.setOffsets(new HashMap<String, String>());
//read file content
Assert.assertNotNull(mdr.next(0));
//triggers a periodic 'roll'
Files.createFile(file2.toPath());
//sleeps to trigger a livefile refresh
Thread.sleep(SingleLineLiveFileReader.REFRESH_INTERVAL + 1);
//reach eof
Assert.assertNull(mdr.next(0));
Assert.assertTrue(file1.exists());
Path f1Archived = Paths.get(testDir2.getAbsolutePath(), file1.getPath());
Assert.assertFalse(Files.exists(f1Archived));
mdr.close();
}
@Test
public void testMultiLineFiles() throws Exception {
File file1 = new File(testDir1, "f1.txt");
File file2 = new File(testDir1, "f2.txt");
Files.write(file1.toPath(), Arrays.asList("A1M1", "a1m1", "A1M2"), UTF8);
Files.write(file2.toPath(), Arrays.asList("B2M1", "B2M2", "b2m2", "B3M3"), UTF8);
MultiFileInfo di1 = new MultiFileInfo("tag1", file1.getPath(), FileRollMode.REVERSE_COUNTER, "", "", "^A.*");
MultiFileInfo di2 = new MultiFileInfo("tag2", file2.getPath(), FileRollMode.REVERSE_COUNTER, "", "", "^B.*");
MultiFileReader mdr = new MultiFileReader(Arrays.asList(di1, di2),
UTF8,
1024,
PostProcessingOptions.NONE,
null,
false,
0,
false,
false
);
Set<String> lines = new HashSet<>();
Map<String, String> offsets = new HashMap<>();
while (lines.size() < 3) {
mdr.setOffsets(offsets);
LiveFileChunk chunk = mdr.next(0);
if (chunk != null) {
for (FileLine line : chunk.getLines()) {
lines.add(line.getText());
}
}
offsets = mdr.getOffsets();
}
mdr.close();
}
private String getRandomData(int numOfLines, int lineLength) {
final Random r = new Random();
StringBuilder sb = new StringBuilder();
for (int i =0 ;i < numOfLines; i++) {
for (int j=0; j < lineLength; j++) {
sb.append((char)(97 + r.nextInt(25)));
}
sb.append("\n");
}
return sb.toString();
}
@Test(timeout = 2000)
public void testArchivingWithOffsetLagAndPendingFiles() throws Exception {
File testDir = new File("target", UUID.randomUUID().toString()).getAbsoluteFile();
File archiveDir = new File(testDir, "archive").getAbsoluteFile();
int totalNumOfFiles = 8, numOfFilesPerDir = 2, numOfLinesPerFile = 20;
Assert.assertTrue(testDir.mkdirs());
Assert.assertTrue(archiveDir.mkdirs());
String data = getRandomData(numOfLinesPerFile, 50);
File innerDir = null;
Set<String> dirs = new LinkedHashSet<>();
int numOfFileForCurrentDir = 0;
for (int i = 0; i < totalNumOfFiles; i++) {
if (innerDir == null || numOfFileForCurrentDir == numOfFilesPerDir) {
innerDir = new File(testDir, String.valueOf(i));
Assert.assertTrue(innerDir.mkdirs());
dirs.add(innerDir.getAbsolutePath());
numOfFileForCurrentDir = 0;
}
String fileName = "file_" + i;
Files.write(Paths.get(innerDir.getAbsolutePath() + "/" + fileName), data.getBytes());
numOfFileForCurrentDir++;
}
List<MultiFileInfo> multiFileInfos = new ArrayList<>();
int i = 0;
for (String dir : dirs) {
MultiFileInfo multiFileInfo =
new MultiFileInfo("tag" + i, dir + "/${PATTERN}", FileRollMode.PATTERN, ".*", "", "");
multiFileInfos.add(multiFileInfo);
i++;
}
MultiFileReader mdr = new MultiFileReader(multiFileInfos,
UTF8,
1024,
PostProcessingOptions.DELETE,
archiveDir.getAbsolutePath(),
true,
1,
false,
false
);
Map<String, String> offsets = new HashMap<>();
int numOfLinesProcessed = 0, totalNumberOfLines = totalNumOfFiles * numOfLinesPerFile;
do {
mdr.setOffsets(offsets);
LiveFileChunk chunk = mdr.next(0);
if (chunk != null) {
numOfLinesProcessed += chunk.getLines().size();
}
offsets = mdr.getOffsets();
mdr.getOffsetsLag(offsets);
mdr.getPendingFiles();
} while (numOfLinesProcessed < totalNumberOfLines);
}
}