/** * Copyright 2015 StreamSets Inc. * * Licensed under the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.streamsets.pipeline.lib.io; import com.streamsets.pipeline.api.impl.Utils; import com.streamsets.pipeline.lib.util.ThreadUtil; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.IOException; import java.io.Reader; import java.nio.ByteBuffer; import java.nio.channels.SeekableByteChannel; import java.nio.charset.Charset; import java.nio.file.Files; import java.nio.file.StandardOpenOption; /** * A <code>SingleLiveFileReader</code> is a Reader that allows to read a file in a 'tail -f' mode while keeping track * of the current offset and detecting if the file has been renamed. * <p/> * It tails one line at the time. To tail log files with multi-line logs (i.e. Log4j logs with stack traces, MySQL logs) * use the {@link MultiLineLiveFileReader}. * <p/> * The <code>LiveFileReader</code> uses the <code>Iterator</code> pattern to get chunks of lines of the file. * <p/> * The <code>LiveFileReader</code> performs non-blocking read operations (using Java NIO). * <p/> * IMPORTANT: The offsets are always on bytes, we are working with charsets where CR and LF are always one byte * (ie UTF-8 or ASCII) * <p/> * IMPORTANT: The provided charset must encode LF and CR as '0x0A' and '0x0D' respectively. */ public class SingleLineLiveFileReader implements LiveFileReader { private static final Logger LOG = LoggerFactory.getLogger(SingleLineLiveFileReader.class); // we refresh the LiveFile every 500 msecs, to detect if it has been renamed static final long REFRESH_INTERVAL = Integer.parseInt(System.getProperty("LiveFileReader.refresh.ms", "500")); // we sleep for 10 millisec to yield CPU private static final long YIELD_INTERVAL = Integer.parseInt(System.getProperty("LiveFileReader.yield.ms", "10")); private final RollMode rollMode; private final LiveFile originalFile; private String tag; private LiveFile currentFile; private final Charset charset; private long offset; private boolean truncateMode; private final SeekableByteChannel channel; private final ByteBuffer buffer; private final byte[] chunkBytes; private boolean open; private long lastLiveFileRefresh; private int lastPosCheckedForEol; private boolean rolled; // negative offset means we are in truncate mode, we need to discard data until we find an EOL /** * Creates a <code>SingleLiveFileReader</code>. * * @param rollMode {@link RollMode} the file roll mode. * @param tag the file tag. * @param file {@link LiveFile} of the file to read. * @param charset {@link Charset} of the file, the returned {@link LiveFileChunk} will have a {@link Reader} * using this character set. * @param offset offset in bytes to start reading the file from. If the offset is a negative number its absolute value * will be used and from there data will be ignored until the first EOL. This allows handling offsets * of truncated lines. * @param maxLineLen the maximum line length including the EOL characters, if the length is exceeded, the line will * be truncated. * @throws IOException thrown if the file could not be opened or the specified offset is beyond the current file * length. * @throws IllegalArgumentException thrown if the the provided charset must encode LF and CR as '0x0A' and '0x0D' * respectively. */ public SingleLineLiveFileReader(RollMode rollMode, String tag, LiveFile file, Charset charset, long offset, int maxLineLen) throws IOException { Utils.checkNotNull(rollMode, "rollMode"); Utils.checkNotNull(file, "file"); Utils.checkNotNull(charset, "charset"); Utils.checkArgument(maxLineLen > 1, "maxLineLen must greater than 1"); validateCharset(charset, '\n', "\\n"); validateCharset(charset, '\r', "\\r"); this.rollMode = rollMode; this.tag = tag; this.originalFile = file; this.charset = charset; this.offset = Math.abs(offset); truncateMode = offset < 0; currentFile = originalFile.refresh(); if (!currentFile.equals(originalFile)) { LOG.debug("Original file '{}' refreshed to '{}'", file, currentFile); } channel = Files.newByteChannel(currentFile.getPath(), StandardOpenOption.READ); open = true; long actualSize; try { actualSize = channel.size(); } catch (IOException ex) { closeChannel(); throw ex; } if (offset > actualSize) { channel.close(); throw new IOException(Utils.format("File '{}', offset '{}' beyond file size '{}'", currentFile.getPath(), offset, actualSize)); } try { channel.position(this.offset); } catch (IOException ex) { closeChannel(); throw ex; } LOG.debug("File '{}', positioned at offset '{}'", currentFile, offset); buffer = ByteBuffer.allocate(maxLineLen); chunkBytes = new byte[maxLineLen]; lastPosCheckedForEol = 0; } private void closeChannel() { if (open) { try { open = false; channel.close(); } catch (IOException ex) { //NOP } } } private void validateCharset(Charset charset, char c, String cStr) { ByteBuffer bf = charset.encode("" + c); if (bf.limit() != 1) { throw new IllegalArgumentException(Utils.format("Charset '{}' does not encode character '{}' in one byte", charset, cStr)); } byte b = bf.get(); if (b != (byte)c) { throw new IllegalArgumentException(Utils.format("Charset '{}' does not encode character '{}' as '{}'", charset, cStr, c)); } } @Override public LiveFile getLiveFile() { return currentFile; } @Override public Charset getCharset() { return charset; } // offset will be negative if we are in truncate mode. @Override public long getOffset() { Utils.checkState(open, Utils.formatL("LiveFileReder for '{}' is not open", currentFile)); return (truncateMode) ? -offset : offset; } @Override public boolean hasNext() throws IOException { Utils.checkState(open, Utils.formatL("LiveFileReader for '{}' is not open", currentFile)); // the buffer is dirty, or the file is still live, or the channel pos is less than the file length return (buffer.position() > 0) || !isEof(); } @Override public LiveFileChunk next(long waitMillis) throws IOException { Utils.checkArgument(waitMillis >= 0, "waitMillis must equal or greater than zero"); Utils.checkState(open, Utils.formatL("LiveFileReader for '{}' is not open", currentFile)); LiveFileChunk liveFileChunk = null; long start = System.currentTimeMillis() + waitMillis; try { while (true) { if (!hasNext()) { break; } if (truncateMode) { if (LOG.isTraceEnabled()) { LOG.trace("File '{}' at offset '{} in fast forward mode", currentFile, channel.position()); } truncateMode = fastForward(); } if (!truncateMode) { liveFileChunk = readChunk(); if (LOG.isTraceEnabled()) { LOG.trace("File '{}' at offset '{} got chunk '{}'", currentFile, channel.position(), liveFileChunk != null); } if (liveFileChunk != null) { break; } } if (System.currentTimeMillis() - start >= 0) { if (LOG.isTraceEnabled()) { LOG.trace("File '{}' at offset '{} timed out while waiting for chunk", currentFile, channel.position()); } //wait timeout break; } //yielding CPU while in wait loop if (!ThreadUtil.sleep(YIELD_INTERVAL)) { LOG.trace("File '{}' at offset '{} interrupted while yielding CPU", currentFile, channel.position()); break; } } offset = channel.position() - buffer.position(); return liveFileChunk; } catch (IOException ex) { closeChannel(); throw ex; } } @Override public void close() throws IOException { if (open) { open = false; channel.close(); } } // IMPLEMENTATION // returns true if still in truncate mode, false otherwise private boolean fastForward() throws IOException { try { boolean stillTruncate; buffer.clear(); if (channel.read(buffer) > -1 || isEof()) { //set the buffer into read from mode buffer.flip(); //we have data, lets look for the first EOL in it. int firstEolIdx = findEndOfFirstLine(buffer); if (firstEolIdx > -1) { // set position to position after first EOL buffer.position(firstEolIdx + 1); // set the buffer back into write into mode keeping data after first EOL buffer.compact(); stillTruncate = false; offset = channel.position() - buffer.position(); } else { // no EOL yet // whatever was read will be discarded on next next() call stillTruncate = true; offset = channel.position(); } } else { // no data read // whatever was read will be discarded on next next() call stillTruncate = true; offset = channel.position(); } return stillTruncate; } catch (IOException ex) { closeChannel(); throw ex; } } private LiveFileChunk readChunk() throws IOException { try { LiveFileChunk liveFileChunk = null; if (channel.read(buffer) > 0 || buffer.limit() - buffer.position() > 0 || isEof()) { // we have data, set the buffer into read from mode buffer.flip(); // lets look for the last EOL in it int lastEolIdx = (isEof()) ? buffer.limit() : findEndOfLastLine(buffer); if (lastEolIdx > -1) { // we have an EOL in the buffer or we are at the end of the file int chunkSize = lastEolIdx - buffer.position(); buffer.get(chunkBytes, 0, chunkSize); // create reader with exactly the chunk liveFileChunk = new LiveFileChunk(tag, currentFile, charset, chunkBytes, offset, chunkSize, false); } else if (buffer.limit() == buffer.capacity()) { // buffer is full and we don't have an EOL, return truncated chunk and go into truncate mode. // we have an EOL in the buffer or we are at the end of the file int chunkSize = buffer.limit() - buffer.position(); buffer.get(chunkBytes, 0, chunkSize); // create reader with exactly the chunk liveFileChunk = new LiveFileChunk(tag, currentFile, charset, chunkBytes, offset, chunkSize, true); truncateMode = true; } else { // we don't have an EOL and the buffer is not full, no chunk in this read liveFileChunk = null; } // set the buffer back into write into mode with the leftover data buffer.compact(); // correcting next position in buffer scanned for EOL to reflect post compact() position. lastPosCheckedForEol = buffer.position(); } return liveFileChunk; } catch (IOException ex) { closeChannel(); throw ex; } } private boolean isEof() throws IOException { try { if (!rolled) { if (originalFile.equals(currentFile) && System.currentTimeMillis() - lastLiveFileRefresh > REFRESH_INTERVAL) { currentFile = originalFile.refresh(); if (!currentFile.equals(originalFile)) { LOG.debug("Original file '{}' refreshed to '{}'", originalFile, currentFile); } rolled = rollMode.isFileRolled(currentFile); lastLiveFileRefresh = System.currentTimeMillis(); } } return rolled && channel.position() >= channel.size(); } catch (IOException ex) { closeChannel(); throw ex; } } private int findEndOfLastLine(ByteBuffer buffer) { for (int i = buffer.limit() - 1; i > lastPosCheckedForEol; i--) { // as we are going backwards, this will handle \r\n EOLs as well without producing extra EOLs // and if a buffer ends in \r, the last line will be kept as incomplete until the next chunk. if (buffer.get(i) == '\n') { return i + 1; // including EOL character } } return -1; } private int findEndOfFirstLine(ByteBuffer buffer) { for (int i = buffer.position(); i < buffer.limit(); i++) { if (buffer.get(i) == '\n') { return i; } if (buffer.get(i) == '\r') { // handling \r\n EOLs, if the buffer ends exactly after \n, then the next buffer read will work because of the // \n detection and no extra EOLs will be produced. Also, note this method is used only in truncated mode // doing fastforward to the next line. if (i + 1 < buffer.limit() && buffer.get(i + 1) == '\n') { return i + 1; } return i; } } return -1; } }