/** * Copyright 2015 StreamSets Inc. * * Licensed under the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.streamsets.pipeline.lib.io; import com.streamsets.pipeline.api.impl.Utils; import com.streamsets.pipeline.lib.util.ThreadUtil; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.File; import java.io.IOException; import java.nio.file.DirectoryStream; import java.nio.file.FileSystems; import java.nio.file.Files; import java.nio.file.NoSuchFileException; import java.nio.file.Path; import java.nio.file.PathMatcher; import java.util.ArrayList; import java.util.Collections; import java.util.Comparator; import java.util.List; /** * The <code>LiveDirectoryScanner</code> scans a directory for the next file to process. * <p/> * There is a 'live' file, which is a file that is actively been written to it and there are 'rolled' files which * are not actively written to anymore (they are previous 'live' files). * <p/> * IMPORTANT: 'Rolled' files must have the 'live' file name as prefix. * <p/> * When asked for a file, the last processed file is passed (or <code>null</code> if none) and the scanner will * return the next 'rolled' file in order, if there are no more 'rolled' files it will return the 'live' file, * if there is not 'live' file it will return <code>null</code>. * <p/> * {@link LiveFile}s are used in order to handle the case of file renames. * <p/> * There are 3 possible orders for 'rolled' files, reverse counter, date and alphabetical. Reverse counter follows * Log4j <code>RollingFileAppender</code> file renaming handling. Date ordering supports 5 different precisions * following Log4j <code>DailyRollingFileAppender</code> file renaming handling. Alphabetical uses alphabetical * order of the postfix. */ public class LiveDirectoryScanner { private static final Logger LOG = LoggerFactory.getLogger(LiveDirectoryScanner.class); private final Path firstFile; private final String liveFileName; private final File dir; private final PathMatcher fileMatcher; private final RollMode rollMode; private final Comparator<Path> pathComparator; /** * Creates a <code>LiveDirectoryScanner</code> instance. * * @param dirName directory to scan. * @param firstFileName first 'rolled' file to look for if [@link #scan()} is invoked with <code>null</code>. * @param rollMode rolled files mode to use for ordering rolled files. * @throws IOException thrown if the scanner could not be created due to an IO error. */ public LiveDirectoryScanner(String dirName, String firstFileName, RollMode rollMode) throws IOException { Utils.checkNotNull(dirName, "dirName"); Utils.checkArgument(!dirName.isEmpty(), "dirName cannot be empty"); Utils.checkNotNull(rollMode, "rollMode"); dir = new File(dirName); if (!dir.exists()) { throw new IOException(Utils.format("Directory path '{}' does not exist", dir.getAbsolutePath())); } if (!dir.isDirectory()) { throw new IOException(Utils.format("Directory path '{}' is not a directory", dir.getAbsolutePath())); } this.rollMode = rollMode; // liveFileName needs to be massaged by roll mode this.liveFileName = rollMode.getLiveFileName(); // firstFileName needs to be verified by roll mode Utils.checkArgument(this.rollMode.isFirstAcceptable(firstFileName), Utils.formatL("firstFileName '{}' is not an acceptable file name", firstFileName)); this.firstFile = (firstFileName == null || firstFileName.isEmpty()) ? null : new File(dir, firstFileName).toPath(); pathComparator = this.rollMode.getComparator(); //TODO check if we need to escape liveFileName fileMatcher = FileSystems.getDefault().getPathMatcher(this.rollMode.getPattern()); } private class FileFilter implements DirectoryStream.Filter<Path> { private final Path firstFile; private final boolean includeFirstFileName; private final Comparator<Path> comparator; public FileFilter(Path firstFile, boolean includeFirstFileName, Comparator<Path> comparator) { this.firstFile = firstFile; this.includeFirstFileName = includeFirstFileName; this.comparator = comparator; } @Override public boolean accept(Path entry) throws IOException { boolean accept = false; if (fileMatcher.matches(entry.getFileName()) && Files.isRegularFile(entry)) { if (firstFile == null) { accept = true; } else { int compares = comparator.compare(entry, firstFile); accept = (compares == 0 && includeFirstFileName) || (compares > 0); } } return accept; } } // last == null means start from beginning /** * Scans the directory of for the next file. * * @param current the last 'rolled' file processed. Use <code>null</code> to look for the first one. The provided * file cannot be the 'live' file. * @return the next 'rolled' file in order, if there are no more 'rolled' files it will return the 'live' file, * if there is not 'live' file it will return <code>null</code>. * @throws IOException thrown if the directory could not be scanned. */ public LiveFile scan(LiveFile current) throws IOException { try { return scanInternal(current); } catch (NoSuchFileException ex) { // this could happen because there has been a file rotation/deletion after the search/filter/sort and before // the creation of the nen current. Lets sleep for 50ms and try again, if fails again give up. ThreadUtil.sleep(50); return scanInternal(current); } } /** * Scans the directory for number of files yet to be processed. * * @param current the last 'rolled' file processed. Use <code>null</code> to look for the first one. * @return the number of files yet to be processed. * @throws IOException thrown if the directory could not be scanned. */ public long getPendingFiles(LiveFile current) throws IOException{ //Current will not be acceptable for roll files (if active file is without a counter/date pattern) //and will be later renamed to a file with counter/date suffix, if that is the case we should //return 0 as number of pending files if (current == null || rollMode.isCurrentAcceptable(current.getPath().getFileName().toString())) { return findToBeProcessedMatchingFiles(current!=null? current.refresh() : null).size(); } return 0; } private LiveFile scanInternal(LiveFile current) throws IOException { Utils.checkArgument(current == null || rollMode.isCurrentAcceptable( current.getPath().getFileName().toString()), Utils.formatL("Current file '{}' is not acceptable for live file '{}' using '{}'", current, liveFileName, rollMode)); List<Path> matchingFiles = findToBeProcessedMatchingFiles(current); LOG.debug("Scanned '{}' matching files", matchingFiles.size()); if (matchingFiles.size() > 0) { // sort all matching files (they don't necessary come in order from the OS) // we sort them using the comparator of the NonLivePostfix Collections.sort(matchingFiles, pathComparator); // we found a rolled file, create it as such current = new LiveFile(matchingFiles.get(0)); } else { // we are not behind with rolled files, lets return the live file try { if (liveFileName != null) { current = new LiveFile(new File(dir, liveFileName).toPath()); } else { current = null; } } catch (NoSuchFileException ex) { // if the live file does not currently exists, return null as we cannot have a LiveFile without an iNode current = null; } } LOG.debug("Scan selected '{}' ", current); return current; } private List<Path> findToBeProcessedMatchingFiles(LiveFile current) throws IOException { FileFilter filter ; if (current == null) { // we don't have current file, // let scan from the configured first file and include the first file itself if found filter = new FileFilter(firstFile, true, pathComparator); } else { // we do have a current file, we need to find the next file filter = new FileFilter(current.getPath(), false, pathComparator); } List<Path> matchingFiles = new ArrayList<>(); try (DirectoryStream<Path> matches = Files.newDirectoryStream(dir.toPath(), filter)) { for (Path file : matches) { matchingFiles.add(file); } } return matchingFiles; } }