/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package gobblin.data.management.retention.dataset;
import java.io.IOException;
import java.util.Arrays;
import java.util.Collection;
import java.util.HashSet;
import java.util.Properties;
import java.util.Set;
import lombok.Getter;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.slf4j.Logger;
import com.typesafe.config.Config;
import gobblin.data.management.trash.ProxiedTrash;
import gobblin.data.management.trash.TrashFactory;
import gobblin.data.management.version.FileSystemDatasetVersion;
import gobblin.dataset.FileSystemDataset;
import gobblin.util.PathUtils;
/**
* A helper class to delete {@link Path}s of a FileSystemDatasetVersion.
* <p>
* Supports the following job level settings:
* <ul>
* <li> Simulate Mode - Only log paths to be deleted without deleting data by setting {@value #SIMULATE_KEY} to true.
* <li> Skip Trash - Delete permanent by setting {@value #SKIP_TRASH_KEY} to true.
* <li> Auto delete empty parent directories - By setting {@value #DELETE_EMPTY_DIRECTORIES_KEY} to true.
* <li> Proxy as owner and delete - By setting {@value #DELETE_AS_OWNER_KEY} to true.
* </ul>
* </p>
*/
public class FsCleanableHelper {
public static final String CONFIGURATION_KEY_PREFIX = "gobblin.retention.";
public static final String SIMULATE_KEY = CONFIGURATION_KEY_PREFIX + "simulate";
public static final String SIMULATE_DEFAULT = Boolean.toString(false);
public static final String SKIP_TRASH_KEY = CONFIGURATION_KEY_PREFIX + "skip.trash";
public static final String SKIP_TRASH_DEFAULT = Boolean.toString(false);
public static final String DELETE_EMPTY_DIRECTORIES_KEY = CONFIGURATION_KEY_PREFIX + "delete.empty.directories";
public static final String DELETE_EMPTY_DIRECTORIES_DEFAULT = Boolean.toString(true);
public static final String DELETE_AS_OWNER_KEY = CONFIGURATION_KEY_PREFIX + "delete.as.owner";
public static final String DELETE_AS_OWNER_DEFAULT = Boolean.toString(true);
protected final FileSystem fs;
@Getter
protected final ProxiedTrash trash;
protected final boolean simulate;
protected final boolean skipTrash;
protected final boolean deleteEmptyDirectories;
protected final boolean deleteAsOwner;
protected final Logger log;
public FsCleanableHelper(FileSystem fs, Properties properties, boolean simulate, boolean skipTrash, boolean deleteEmptyDirectories, boolean deleteAsOwner,
Logger log) throws IOException {
this.log = log;
this.fs = fs;
this.simulate = simulate;
this.skipTrash = skipTrash;
this.deleteEmptyDirectories = deleteEmptyDirectories;
Properties thisProperties = new Properties();
thisProperties.putAll(properties);
if (this.simulate) {
thisProperties.setProperty(TrashFactory.SIMULATE, Boolean.toString(true));
}
if (this.skipTrash) {
thisProperties.setProperty(TrashFactory.SKIP_TRASH, Boolean.toString(true));
}
this.trash = TrashFactory.createProxiedTrash(this.fs, thisProperties);
this.deleteAsOwner = deleteAsOwner;
}
public FsCleanableHelper(final FileSystem fs, final Properties props, Config config, Logger log) throws IOException {
this(fs, props, Boolean.valueOf(props.getProperty(SIMULATE_KEY, SIMULATE_DEFAULT)),
Boolean.valueOf(props.getProperty(SKIP_TRASH_KEY, SKIP_TRASH_DEFAULT)),
Boolean.valueOf(props.getProperty(DELETE_EMPTY_DIRECTORIES_KEY, DELETE_EMPTY_DIRECTORIES_DEFAULT)),
Boolean.valueOf(props.getProperty(DELETE_AS_OWNER_KEY, DELETE_AS_OWNER_DEFAULT)), log);
}
/**
* Delete a single {@link FileSystemDatasetVersion}. All the parent {@link Path}s are after deletion, are
* added to <code>possiblyEmptyDirectories</code>. Caller need to call {@link #cleanEmptyDirectories(Set, FileSystemDataset)}
* to delete empty parent directories if any.
*/
public void clean(final FileSystemDatasetVersion versionToDelete, final Set<Path> possiblyEmptyDirectories) throws IOException {
log.info("Deleting dataset version " + versionToDelete);
Set<Path> pathsToDelete = versionToDelete.getPaths();
log.info("Deleting paths: " + Arrays.toString(pathsToDelete.toArray()));
boolean deletedAllPaths = true;
for (Path path : pathsToDelete) {
if (!this.fs.exists(path)) {
log.info(String.format("Path %s in dataset version %s does not exist", path, versionToDelete));
continue;
}
boolean successfullyDeleted = deleteAsOwner ? trash.moveToTrashAsOwner(path) : trash.moveToTrash(path);
if (successfullyDeleted) {
possiblyEmptyDirectories.add(path.getParent());
} else {
log.error("Failed to delete path " + path + " in dataset version " + versionToDelete);
deletedAllPaths = false;
}
}
if (!deletedAllPaths) {
log.error("Failed to delete some paths in dataset version " + versionToDelete);
}
}
/**
* Delete all {@link FileSystemDatasetVersion}s <code>deletableVersions</code> and also delete any empty parent directories.
*
* @param fsDataset to which the version belongs.
*/
public void clean(final Collection<? extends FileSystemDatasetVersion> deletableVersions, final FileSystemDataset fsDataset) throws IOException {
if (deletableVersions.isEmpty()) {
log.warn("No deletable dataset version can be found. Ignoring.");
return;
}
Set<Path> possiblyEmptyDirectories = new HashSet<>();
for (FileSystemDatasetVersion fsdv : deletableVersions) {
clean(fsdv, possiblyEmptyDirectories);
}
cleanEmptyDirectories(possiblyEmptyDirectories, fsDataset);
}
/**
* Deletes any empty paths in <code>possiblyEmptyDirectories</code> all the way upto the {@link FileSystemDataset#datasetRoot()}.
*/
public void cleanEmptyDirectories(final Set<Path> possiblyEmptyDirectories, final FileSystemDataset fsDataset) throws IOException {
if (this.deleteEmptyDirectories && !this.simulate) {
for (Path parentDirectory : possiblyEmptyDirectories) {
PathUtils.deleteEmptyParentDirectories(fs, fsDataset.datasetRoot(), parentDirectory);
}
}
}
}