/**
* Copyright 2015 StreamSets Inc.
*
* Licensed under the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.streamsets.pipeline.lib.io;
import com.google.common.collect.ImmutableList;
import com.streamsets.pipeline.lib.parser.shaded.com.google.code.regexp.Pattern;
import java.io.IOException;
import java.nio.charset.Charset;
import java.util.ArrayList;
import java.util.List;
/**
* A <code>MultiLineLiveFileReader</code> is Reader that allows to read a file in a 'tail -f' mode while keeping track
* of the current offset and detecting if the file has been renamed.
* <p/>
* It tails multi line files (i.e. Log4j logs with stack traces, MySQL logs)
* <p/>
* It should be used wrapping a {@link SingleLineLiveFileReader}.
* <p/>
* All lines that do not match the main line pattern are collapsed on the previous main line. The implementation works
* doing a look ahead.
* <p/>
* The assumption is that multi lines do not continue in other files. So when we read the EOF we flush the last
* accumulated multi line as a complete multi line.
*/
public class MultiLineLiveFileReader implements LiveFileReader {
private final String tag;
private final LiveFileReader reader;
private final Pattern pattern;
private boolean incompleteMultiLineTruncated;
private final StringBuilder incompleteMultiLine;
private long incompleteMultiLineOffset;
/**
* Creates a multi line reader.
*
* @param reader The single line reader to use.
* @param mainLinePattern the regex pattern that determines if a line is main line.
*/
public MultiLineLiveFileReader(String tag, LiveFileReader reader, Pattern mainLinePattern) {
this.tag = tag;
this.reader = reader;
this.pattern = mainLinePattern;
incompleteMultiLine = new StringBuilder(2048);
}
@Override
public LiveFile getLiveFile() {
return reader.getLiveFile();
}
@Override
public Charset getCharset() {
return reader.getCharset();
}
@Override
public long getOffset() {
// we have to correct the reader offset with the length of the incomplete multi lines as that is logical position
// for user of the multi line reader
return reader.getOffset() - incompleteMultiLine.length();
}
@Override
public boolean hasNext() throws IOException {
// if the underlying reader is EOF we still have to flush the current incomplete multi line as a complete multi line
// so we return true if we have incomplete multi lines
return reader.hasNext() || incompleteMultiLine.length() != 0;
}
@Override
public LiveFileChunk next(long waitMillis) throws IOException {
LiveFileChunk chunk = null;
if (!reader.hasNext()) {
if (incompleteMultiLine.length() > 0) {
// the underlying reader is EOF, we still have to return the current incomplete multiline.
// now we know is as a complete multiline because we reached EOF
chunk = new LiveFileChunk(tag, reader.getLiveFile(), reader.getCharset(),
ImmutableList.of(new FileLine(incompleteMultiLineOffset, incompleteMultiLine.toString())),
incompleteMultiLineTruncated);
incompleteMultiLine.setLength(0);
}
} else {
// get new chunk from underlying reader
LiveFileChunk newChunk = reader.next(waitMillis);
if (newChunk != null) {
chunk = resolveChunk(newChunk);
}
}
return chunk;
}
// finds the first main line in the chunk from the specified index position onwards
int findNextMainLine(LiveFileChunk chunk, int startIdx) {
List<FileLine> lines = chunk.getLines();
int found = -1;
for (int i = startIdx; found == -1 && i < lines.size(); i++) {
if (pattern.matcher(lines.get(i).getText().trim()).matches()) {
found = i;
}
}
return found;
}
// compacts all multi lines of chunk into single lines.
// it there is an incomplete multiline from a previous chunk it starts from it.
LiveFileChunk resolveChunk(LiveFileChunk chunk) {
List<FileLine> completeLines = new ArrayList<>();
List<FileLine> chunkLines = chunk.getLines();
if (incompleteMultiLine.length() == 0) {
incompleteMultiLineOffset = chunk.getOffset();
incompleteMultiLineTruncated = chunk.isTruncated();
}
incompleteMultiLineTruncated |= chunk.isTruncated();
int pos = 0;
int idx = findNextMainLine(chunk, pos);
// while we have main lines we keep adding/compacting into the new chunk
while (idx > -1) {
//any multi lines up to the next main line belong to the previous main line
for (int i = pos; i < idx; i++) {
incompleteMultiLine.append(chunkLines.get(i).getText());
}
// if we have incomplete lines, at this point they are a complete multiline, compact and add to new chunk lines
if (incompleteMultiLine.length() != 0) {
completeLines.add(new FileLine(incompleteMultiLineOffset, incompleteMultiLine.toString()));
incompleteMultiLineOffset += incompleteMultiLine.length();
// clear the incomplete multi lines as we just used them to create a full line
incompleteMultiLine.setLength(0);
incompleteMultiLineTruncated = false;
}
// add the current main line as incomplete as we still don't if it is a complete line
incompleteMultiLine.append(chunkLines.get(idx).getText());
// find the next main line
pos = idx + 1;
idx = findNextMainLine(chunk, pos);
}
// lets process the left over multi lines in the chunk after the last main line.
// if any they will kept to completed with lines from the next chunk.
for (int i = pos; i < chunkLines.size(); i++) {
incompleteMultiLine.append(chunkLines.get(i).getText());
}
if (completeLines.isEmpty()) {
// didn't get a complete multi line yet, we keep storing lines but return a null chunk
chunk = null;
} else {
// create a new chunk with all complete multi lines
chunk = new LiveFileChunk(chunk.getTag(), chunk.getFile(), chunk.getCharset(), completeLines,
incompleteMultiLineTruncated);
}
return chunk;
}
@Override
public void close() throws IOException {
reader.close();
}
}