/** * Copyright 2015 StreamSets Inc. * * Licensed under the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.streamsets.pipeline.lib.io; import com.streamsets.pipeline.api.ext.DataCollectorServices; import com.streamsets.pipeline.api.ext.json.JsonMapper; import com.streamsets.pipeline.api.impl.Utils; import org.apache.commons.codec.binary.Base64; import org.apache.commons.io.IOUtils; import java.io.FileInputStream; import java.io.IOException; import java.io.InputStream; import java.nio.file.DirectoryStream; import java.nio.file.Files; import java.nio.file.NoSuchFileException; import java.nio.file.Path; import java.nio.file.Paths; import java.nio.file.attribute.BasicFileAttributes; import java.security.MessageDigest; import java.security.NoSuchAlgorithmException; import java.util.LinkedHashMap; import java.util.Map; /** * A <code>LiveFile</code> is a File reference that keeps track of its iNode and it can resync its name, * using the iNode as the anchor, in case of a rename. IMPORTANT: The rename must be within the same directory. * <p/> * <b>NOTE:</b> EXT4 filesystems reuse iNodes immediately, so if you delete a file and create a new file the iNode * of the old file will most likely be used for the new file. To be able to handle this case and detect a file has * been renamed (as opposed to deleted followed by a complete different file being created reusing the iNode) we * hash the head (1024 bytes) of the file (Brocks idea). * <p/> * The primary use case for this class is for handling log files which may be rotated (renamed) while the file is * being accessed. By keeping track of the iNode, it is possible to get intermittent access to the same file (i.e. * from an application that has been restarted). * <p/> * A <code>LiveFile</code> is immutable. */ public class LiveFile { private static final int HEAD_LEN = 1024; private final Path path; private final String headHash; private final int headLen; private final String iNode; /** * Creates a <code>LiveFile</code> given a {@link Path}. * * @param path the Path of the LiveFile. The file referred by the Path must exist. * @throws IOException thrown if the LiveFile does not exist. */ public LiveFile(Path path) throws IOException { Utils.checkNotNull(path, "path"); this.path = path.toAbsolutePath(); if (!this.path.toFile().isFile()) { throw new NoSuchFileException(Utils.format("Path '{}' is not a file", this.path)); } BasicFileAttributes attrs = Files.readAttributes(path, BasicFileAttributes.class); headLen = (int) Math.min(HEAD_LEN, attrs.size()); headHash = computeHash(path, headLen); iNode = attrs.fileKey().toString(); } private LiveFile(Path path, String inode, String headHash, int headLen) { this.path = path.toAbsolutePath(); iNode = inode; this.headHash = headHash; this.headLen = headLen; } String computeHash(Path path, int len) throws IOException { byte[] buffer = new byte[len]; try (InputStream is = new FileInputStream(path.toFile())) { IOUtils.readFully(is, buffer); } try { MessageDigest digest = MessageDigest.getInstance("MD5"); buffer = digest.digest(buffer); return Base64.encodeBase64String(buffer); } catch (NoSuchAlgorithmException ex) { throw new IOException(ex); } } /** * Returns the {@link Path} of the <code>LiveFile</code>. * * @return the {@link Path} of the <code>LiveFile</code>. */ public Path getPath() { return path; } /** * Returns the iNode of the <code>LiveFile</code>. * * @return the iNode of the <code>LiveFile</code>. */ public String getINode() { return iNode; } @Override public int hashCode() { return path.hashCode() + iNode.hashCode() + headHash.hashCode(); } @Override public boolean equals(Object obj) { if (obj == null) { return false; } if (this == obj) { return true; } if (obj instanceof LiveFile) { LiveFile other = (LiveFile) obj; return path.equals(other.path) && iNode.equals(other.iNode) && headHash.equals(other.headHash); } return false; } public String toString() { return String.format("LiveFile[path=%s, iNode=%s, headHash=%s]", path, iNode, headHash); } /** * Serializes the <code>LiveFile</code> as a string. * * @return the serialized string representation of the <code>LiveFile</code>. */ @SuppressWarnings("unchecked") public String serialize() { Map map = new LinkedHashMap(); map.put("path", path.toString()); map.put("headHash", headHash); map.put("headLen", headLen); map.put("inode", iNode); try { JsonMapper objectMapper = DataCollectorServices.instance().get(JsonMapper.SERVICE_KEY); return objectMapper.writeValueAsString(map); } catch (Exception ex) { throw new RuntimeException(Utils.format("Unexpected exception: {}", ex.toString()), ex); } } /** * Deserializes a string representation of a <code>LiveFile</code>. * <p/> * * @param str the string representation of a <code>LiveFile</code>. * @return the deserialized <code>LiveFile</code> * @throws IOException thrown if the string con not be deserialized into a <code>LiveFile</code>. */ public static LiveFile deserialize(String str) throws IOException { Utils.checkNotNull(str, "str"); try { JsonMapper objectMapper = DataCollectorServices.instance().get(JsonMapper.SERVICE_KEY); Map map = objectMapper.readValue(str, Map.class); Path path = Paths.get((String) map.get("path")); String headHash = (map.containsKey("headHash")) ? (String) map.get("headHash") : ""; int headLen = (map.containsKey("headLen")) ? (int) map.get("headLen") : 0; String inode = (String) map.get("inode"); return new LiveFile(path, inode, headHash, headLen); } catch (RuntimeException|IOException ex) { throw new IllegalArgumentException(Utils.format("Invalid LiveFile serialized string '{}': {}", str, ex.toString()), ex); } } /** * Refreshes the <code>LiveFile</code>, if the file was renamed, the path will have the new name. * * @return the refreshed file if the file has been renamed, or itself if the file has not been rename or the file * does not exist in the directory anymore. * @throws IOException thrown if the LiveFile could not be refreshed */ public LiveFile refresh() throws IOException { LiveFile refresh = this; boolean changed; try { BasicFileAttributes attrs = Files.readAttributes(path, BasicFileAttributes.class); String iNodeCurrent = attrs.fileKey().toString(); int headLenCurrent = (int) Math.min(headLen, attrs.size()); String headHashCurrent = computeHash(path, headLenCurrent); changed = !this.iNode.equals(iNodeCurrent) || !this.headHash.equals(headHashCurrent); } catch (NoSuchFileException ex) { changed = true; } if (changed) { try (DirectoryStream<Path> directoryStream = Files.newDirectoryStream(path.getParent())) { for (Path path : directoryStream) { BasicFileAttributes attrs = Files.readAttributes(path, BasicFileAttributes.class); String iNode = attrs.fileKey().toString(); int headLen = (int) Math.min(this.headLen, attrs.size()); String headHash = computeHash(path, headLen); if (iNode.equals(this.iNode) && headHash.equals(this.headHash)) { if (headLen == 0) { headLen = (int) Math.min(HEAD_LEN, attrs.size()); headHash = computeHash(path, headLen); } refresh = new LiveFile(path, iNode, headHash, headLen); break; } } } } return refresh; } }