/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package gobblin.metastore.util;
import java.io.Closeable;
import java.io.FileInputStream;
import java.io.IOException;
import java.net.URI;
import java.util.List;
import java.util.Properties;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;
import java.util.concurrent.TimeUnit;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.PathFilter;
import org.joda.time.DateTime;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.google.common.base.Optional;
import com.google.common.base.Preconditions;
import com.google.common.collect.Lists;
import com.google.common.io.Closer;
import com.google.common.io.Files;
import gobblin.configuration.ConfigurationKeys;
import gobblin.metastore.nameParser.GuidDatasetUrnStateStoreNameParser;
import gobblin.util.ExecutorsUtils;
/**
* A utility class for cleaning up old state store files created by {@link gobblin.metastore.FsStateStore}
* based on a configured retention.
* @deprecated Please use Gobblin-retention instead: http://gobblin.readthedocs.io/en/latest/data-management/Gobblin-Retention/.
*
* @author Yinan Li
*/
public class StateStoreCleaner implements Closeable {
private static final Logger LOGGER = LoggerFactory.getLogger(StateStoreCleaner.class);
private static final String STATE_STORE_CLEANER_RETENTION_KEY = "state.store.retention";
private static final String DEFAULT_STATE_STORE_CLEANER_RETENTION = "7";
private static final String STATE_STORE_CLEANER_RETENTION_TIMEUNIT_KEY = "state.store.retention.timeunit";
private static final String DEFAULT_STATE_STORE_CLEANER_RETENTION_TIMEUNIT = TimeUnit.DAYS.toString();
private static final String STATE_STORE_CLEANER_EXECUTOR_THREADS_KEY = "state.store.cleaner.executor.threads";
private static final String DEFAULT_STATE_STORE_CLEANER_EXECUTOR_THREADS = "50";
private final Path stateStoreRootDir;
private final long retention;
private final TimeUnit retentionTimeUnit;
private final ExecutorService cleanerRunnerExecutor;
private final FileSystem fs;
public StateStoreCleaner(Properties properties)
throws IOException {
Preconditions.checkArgument(properties.containsKey(ConfigurationKeys.STATE_STORE_ROOT_DIR_KEY),
"Missing configuration property for the state store root directory: "
+ ConfigurationKeys.STATE_STORE_ROOT_DIR_KEY);
this.stateStoreRootDir = new Path(properties.getProperty(ConfigurationKeys.STATE_STORE_ROOT_DIR_KEY));
this.retention = Long.parseLong(
properties.getProperty(STATE_STORE_CLEANER_RETENTION_KEY, DEFAULT_STATE_STORE_CLEANER_RETENTION));
this.retentionTimeUnit = TimeUnit.valueOf(properties
.getProperty(STATE_STORE_CLEANER_RETENTION_TIMEUNIT_KEY, DEFAULT_STATE_STORE_CLEANER_RETENTION_TIMEUNIT)
.toUpperCase());
this.cleanerRunnerExecutor = Executors.newFixedThreadPool(Integer.parseInt(properties
.getProperty(STATE_STORE_CLEANER_EXECUTOR_THREADS_KEY, DEFAULT_STATE_STORE_CLEANER_EXECUTOR_THREADS)),
ExecutorsUtils.newThreadFactory(Optional.of(LOGGER), Optional.of("StateStoreCleaner")));
URI fsUri =
URI.create(properties.getProperty(ConfigurationKeys.STATE_STORE_FS_URI_KEY, ConfigurationKeys.LOCAL_FS_URI));
this.fs = FileSystem.get(fsUri, new Configuration());
}
/**
* Run the cleaner.
* @throws ExecutionException
*/
public void run()
throws IOException, ExecutionException {
FileStatus[] stateStoreDirs = this.fs.listStatus(this.stateStoreRootDir);
if (stateStoreDirs == null || stateStoreDirs.length == 0) {
LOGGER.warn("The state store root directory does not exist or is empty");
return;
}
List<Future<?>> futures = Lists.newArrayList();
for (FileStatus stateStoreDir : stateStoreDirs) {
futures.add(this.cleanerRunnerExecutor
.submit(new CleanerRunner(this.fs, stateStoreDir.getPath(), this.retention, this.retentionTimeUnit)));
}
for (Future<?> future : futures) {
try {
future.get();
} catch (InterruptedException e) {
throw new ExecutionException("Thread interrupted", e);
}
}
ExecutorsUtils.shutdownExecutorService(cleanerRunnerExecutor, Optional.of(LOGGER), 60, TimeUnit.SECONDS);
}
@Override
public void close()
throws IOException {
this.cleanerRunnerExecutor.shutdown();
}
private static class StateStoreFileFilter implements PathFilter {
@Override
public boolean accept(Path path) {
String fileName = path.getName();
String extension = Files.getFileExtension(fileName);
return isStateMetaFile(fileName) || extension.equalsIgnoreCase("jst") || extension.equalsIgnoreCase("tst");
}
boolean isStateMetaFile(String fileName) {
return fileName.startsWith(GuidDatasetUrnStateStoreNameParser.StateStoreNameVersion.V1.getDatasetUrnNameMapFile())
&& !fileName.equals(GuidDatasetUrnStateStoreNameParser.StateStoreNameVersion.V1.getDatasetUrnNameMapFile());
}
}
private static class CleanerRunner implements Runnable {
private final FileSystem fs;
private final Path stateStoreDir;
private final long retention;
private final TimeUnit retentionTimeUnit;
CleanerRunner(FileSystem fs, Path stateStoreDir, long retention, TimeUnit retentionTimeUnit) {
this.fs = fs;
this.stateStoreDir = stateStoreDir;
this.retention = retention;
this.retentionTimeUnit = retentionTimeUnit;
}
@Override
public void run() {
try {
FileStatus[] stateStoreFiles = this.fs.listStatus(this.stateStoreDir, new StateStoreFileFilter());
if (stateStoreFiles == null || stateStoreFiles.length == 0) {
LOGGER.warn("No state store files found in directory: " + this.stateStoreDir);
return;
}
LOGGER.info("Cleaning up state store directory: " + this.stateStoreDir);
for (FileStatus file : stateStoreFiles) {
if (shouldCleanUp(file) && !this.fs.delete(file.getPath(), false)) {
LOGGER.error("Failed to delete state store file: " + file.getPath());
}
}
} catch (IOException ioe) {
LOGGER.error("Failed to run state store cleaner for directory: " + this.stateStoreDir, ioe);
}
}
private boolean shouldCleanUp(FileStatus file) {
DateTime now = new DateTime();
DateTime modificationDateTime = new DateTime(file.getModificationTime());
long retentionInMills = this.retentionTimeUnit.toMillis(this.retention);
return modificationDateTime.plus(retentionInMills).isBefore(now);
}
}
public static void main(String[] args)
throws IOException {
if (args.length != 1) {
System.err.println("Usage: " + StateStoreCleaner.class.getSimpleName() + " <configuration file>");
System.exit(1);
}
Closer closer = Closer.create();
try {
Properties properties = new Properties();
properties.load(closer.register(new FileInputStream(args[0])));
closer.register(new StateStoreCleaner(properties)).run();
} catch (Throwable t) {
throw closer.rethrow(t);
} finally {
closer.close();
}
}
}