/** * Copyright 2015 StreamSets Inc. * * Licensed under the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.streamsets.pipeline.lib.io; import com.streamsets.pipeline.api.impl.Utils; import com.streamsets.pipeline.config.PostProcessingOptions; import com.streamsets.pipeline.lib.parser.shaded.com.google.code.regexp.Pattern; import org.apache.commons.io.IOUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.IOException; import java.nio.charset.Charset; import java.nio.file.Files; import java.nio.file.Path; import java.nio.file.Paths; /** * The <code>FileContext</code> encapsulates all live information about a directory being scanned/read. */ public class FileContext { private static final Logger LOG = LoggerFactory.getLogger(FileContext.class); private final MultiFileInfo multiFileInfo; private final LiveDirectoryScanner scanner; private final Charset charset; private final int maxLineLength; private final PostProcessingOptions postProcessing; private final String archiveDir; private final FileEventPublisher eventPublisher; private LiveFile currentFile; private LiveFileReader reader; private LiveFile startingCurrentFileName; private long startingOffset; private RollMode rollMode; private final Path dir; private boolean open; private boolean inPreviewMode; public FileContext(MultiFileInfo multiFileInfo, Charset charset, int maxLineLength, PostProcessingOptions postProcessing, String archiveDir, FileEventPublisher eventPublisher, boolean inPreviewMode) throws IOException { open = true; this.multiFileInfo = multiFileInfo; this.charset = charset; this.maxLineLength = maxLineLength; this.postProcessing = postProcessing; this.archiveDir = archiveDir; this.eventPublisher = eventPublisher; Path fullPath = Paths.get(multiFileInfo.getFileFullPath()); dir = fullPath.getParent(); Path name = fullPath.getFileName(); rollMode = multiFileInfo.getFileRollMode().createRollMode(name.toString(), multiFileInfo.getPattern()); scanner = new LiveDirectoryScanner(dir.toString(), multiFileInfo.getFirstFile(), getRollMode()); this.inPreviewMode = inPreviewMode; } public String toString() { return Utils.format("FileContext[path={} rollMode={}]", multiFileInfo.getFileFullPath(), rollMode); } public long getPendingFiles() throws IOException{ return scanner.getPendingFiles(currentFile); } public boolean hasReader() { Utils.checkState(open, "FileContext is closed"); return reader != null; } // a file context is active while its parent directory exists. public boolean isActive() { return Files.exists(dir); } public void close() { if (open && reader != null) { open = false; try { reader.close(); } catch (IOException ex) { LOG.warn("Could not close '{}' file property: {}", reader.getLiveFile(), ex.toString(), ex); } finally { reader = null; } } } // prepares and gets the reader if available before a read. public LiveFileReader getReader() throws IOException { Utils.checkState(open, "FileContext is closed"); if (reader == null) { currentFile = getStartingCurrentFileName(); long fileOffset = getStartingOffset(); boolean needsToScan = currentFile == null || fileOffset == Long.MAX_VALUE; if (needsToScan) { if (currentFile != null) { // we need to refresh the file in case the name changed before scanning as the scanner does not refresh currentFile = currentFile.refresh(); } currentFile = scanner.scan(currentFile); fileOffset = 0; } if (currentFile != null) { reader = new SingleLineLiveFileReader(getRollMode(), getMultiFileInfo().getTag(), currentFile, charset, fileOffset, maxLineLength); if (!multiFileInfo.getMultiLineMainLinePatter().isEmpty()) { reader = new MultiLineLiveFileReader(getMultiFileInfo().getTag(), reader, Pattern.compile(multiFileInfo.getMultiLineMainLinePatter())); } if (fileOffset == 0) { // file start event eventPublisher.publish(new FileEvent(currentFile, FileEvent.Action.START)); } } } return reader; } // updates reader and offsets after a read. public void releaseReader(boolean inErrorDiscardReader) throws IOException { Utils.checkState(open, "FileContext is closed"); // update starting offsets for next invocation either cold (no reader) or hot (reader) boolean hasNext; try { hasNext = reader != null && reader.hasNext(); } catch (IOException ex) { IOUtils.closeQuietly(reader); reader = null; hasNext = false; } boolean doneWithFile = !hasNext || inErrorDiscardReader; if (doneWithFile) { IOUtils.closeQuietly(reader); reader = null; // Using Long.MAX_VALUE to signal we reach the end of the file and next iteration should get the next file. setStartingCurrentFileName(currentFile); setStartingOffset(Long.MAX_VALUE); // If we failed to open the file in first place, it will be null and hence we won't do anything with it. if(currentFile == null) { return; } // File end event LiveFile file = currentFile.refresh(); if (inErrorDiscardReader) { LOG.warn("Processing file '{}' produced an error, skipping '{}' post processing on that file", file, postProcessing); eventPublisher.publish(new FileEvent(file, FileEvent.Action.ERROR)); } else { eventPublisher.publish(new FileEvent(file, FileEvent.Action.END)); switch (postProcessing) { case NONE: LOG.debug("File '{}' processing completed, post processing action 'NONE'", file); break; case DELETE: if(!inPreviewMode) { try { Files.delete(file.getPath()); LOG.debug("File '{}' processing completed, post processing action 'DELETED'", file); } catch (IOException ex) { throw new IOException(Utils.format("Could not delete '{}': {}", file, ex.toString()), ex); } } break; case ARCHIVE: if(!inPreviewMode) { Path fileArchive = Paths.get(archiveDir, file.getPath().toString()); if (fileArchive == null) { throw new IOException("Could not find archive file"); } try { Files.createDirectories(fileArchive.getParent()); Files.move(file.getPath(), fileArchive); LOG.debug("File '{}' processing completed, post processing action 'ARCHIVED' as", file); } catch (IOException ex) { throw new IOException(Utils.format("Could not archive '{}': {}", file, ex.toString()), ex); } } break; } } } else { setStartingCurrentFileName(currentFile); setStartingOffset(getReader().getOffset()); } } public MultiFileInfo getMultiFileInfo() { return multiFileInfo; } public LiveFile getStartingCurrentFileName() { Utils.checkState(open, "FileContext is closed"); return startingCurrentFileName; } public long getStartingOffset() { Utils.checkState(open, "FileContext is closed"); return startingOffset; } public RollMode getRollMode() { Utils.checkState(open, "FileContext is closed"); return rollMode; } public void setStartingCurrentFileName(LiveFile startingCurrentFileName) { Utils.checkState(open, "FileContext is closed"); this.startingCurrentFileName = startingCurrentFileName; } public void setStartingOffset(long startingOffset) { Utils.checkState(open, "FileContext is closed"); this.startingOffset = startingOffset; } }