/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package gobblin.data.management.retention.dataset;
import java.io.IOException;
import java.lang.reflect.InvocationTargetException;
import java.util.List;
import java.util.Properties;
import java.util.Set;
import org.apache.commons.lang3.StringUtils;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.google.common.base.Preconditions;
import com.google.common.base.Throwables;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableSet;
import com.google.common.collect.Lists;
import com.typesafe.config.Config;
import com.typesafe.config.ConfigFactory;
import com.typesafe.config.ConfigRenderOptions;
import gobblin.data.management.policy.SelectNothingPolicy;
import gobblin.data.management.policy.VersionSelectionPolicy;
import gobblin.data.management.retention.action.MultiAccessControlAction.MultiAccessControlActionFactory;
import gobblin.data.management.retention.action.RetentionAction;
import gobblin.data.management.retention.action.RetentionAction.RetentionActionFactory;
import gobblin.data.management.retention.dataset.MultiVersionCleanableDatasetBase.VersionFinderAndPolicy.VersionFinderAndPolicyBuilder;
import gobblin.data.management.retention.policy.RetentionPolicy;
import gobblin.data.management.version.FileSystemDatasetVersion;
import gobblin.data.management.version.finder.VersionFinder;
import gobblin.util.ConfigUtils;
import gobblin.util.reflection.GobblinConstructorUtils;
/**
* {@link CleanableDatasetBase} that instantiates {@link VersionFinder} and {@link RetentionPolicy} from classes read
* from an input {@link java.util.Properties}.
*
* <p>
* The class of {@link VersionFinder} should be under key {@link #VERSION_FINDER_CLASS_KEY}, while the class of
* {@link RetentionPolicy} should be under key {@link #RETENTION_POLICY_CLASS_KEY}.
* </p>
*/
public class ConfigurableCleanableDataset<T extends FileSystemDatasetVersion>
extends MultiVersionCleanableDatasetBase<T> {
public static final String RETENTION_CONFIGURATION_KEY = "gobblin.retention";
public static final String CONFIGURATION_KEY_PREFIX = RETENTION_CONFIGURATION_KEY + ".";
public static final String VERSION_FINDER_CLASS_KEY = CONFIGURATION_KEY_PREFIX + "version.finder.class";
public static final String RETENTION_POLICY_CLASS_KEY = CONFIGURATION_KEY_PREFIX + "retention.policy.class";
public static final String SELECTION_POLICY_CLASS_KEY = CONFIGURATION_KEY_PREFIX + "selection.policy.class";
/**
* This key is used if the dataset contains multiple partition each with its own version finder and selection policy.
*
* gobblin.retention.dataset.partitions is a list of version finder and policies.
*
* E.g.
* <pre>
*
* gobblin.retention {
* partitions : [
* {
* selection {
* policy.class = data.management.policy.SelectBeforeTimeBasedPolicy
* timeBased.lookbackTime = 5d
* }
* version : {
* finder.class=gobblin.data.management.version.finder.DateTimeDatasetVersionFinder
* pattern="hourly/*/"
* }
* },
* {
* selection {
* policy.class = data.management.policy.SelectBeforeTimeBasedPolicy
* timeBased.lookbackTime = 20d
* }
* version : {
* finder.class=gobblin.data.management.version.finder.DateTimeDatasetVersionFinder
* pattern="daily/*/"
* }
* }
* ]
* }
*
*
* </pre>
*/
public static final String DATASET_PARTITIONS_LIST_KEY = CONFIGURATION_KEY_PREFIX + "dataset.partitions";
private final Path datasetRoot;
private final List<VersionFinderAndPolicy<T>> versionFindersAndPolicies;
/**
* A set of all available {@link RetentionActionFactory}s
*/
private static final Set<Class<? extends RetentionActionFactory>> RETENTION_ACTION_TYPES;
static {
RETENTION_ACTION_TYPES = ImmutableSet.<Class<? extends RetentionActionFactory>>of(MultiAccessControlActionFactory.class);
}
/**
* Creates a new ConfigurableCleanableDataset configured through gobblin-config-management. The constructor expects
* {@link #VERSION_FINDER_CLASS_KEY} and {@link #RETENTION_POLICY_CLASS_KEY} to be available in the
* <code>config</code> passed.
*/
public ConfigurableCleanableDataset(FileSystem fs, Properties jobProps, Path datasetRoot, Config config, Logger log)
throws IOException {
super(fs, jobProps, config, log);
this.datasetRoot = datasetRoot;
this.versionFindersAndPolicies = Lists.newArrayList();
if (config.hasPath(VERSION_FINDER_CLASS_KEY) && config.hasPath(RETENTION_POLICY_CLASS_KEY)) {
initWithRetentionPolicy(config, jobProps, RETENTION_POLICY_CLASS_KEY, VERSION_FINDER_CLASS_KEY);
} else if (config.hasPath(VERSION_FINDER_CLASS_KEY)) {
initWithSelectionPolicy(config.getConfig(RETENTION_CONFIGURATION_KEY), jobProps);
} else if (config.hasPath(DATASET_PARTITIONS_LIST_KEY)) {
List<? extends Config> versionAndPolicies = config.getConfigList(DATASET_PARTITIONS_LIST_KEY);
for (Config versionAndPolicy : versionAndPolicies) {
initWithSelectionPolicy(versionAndPolicy, jobProps);
}
} else {
throw new IllegalArgumentException(
String.format("Either set version finder at %s and retention policy at %s or set partitions at %s",
VERSION_FINDER_CLASS_KEY, RETENTION_POLICY_CLASS_KEY, DATASET_PARTITIONS_LIST_KEY));
}
}
public ConfigurableCleanableDataset(FileSystem fs, Properties props, Path datasetRoot) throws IOException {
this(fs, props, datasetRoot, LoggerFactory.getLogger(ConfigurableCleanableDataset.class));
}
public ConfigurableCleanableDataset(FileSystem fs, Properties props, Path datasetRoot, Logger log)
throws IOException {
this(fs, props, datasetRoot, ConfigFactory.parseProperties(props), log);
}
@Override
public Path datasetRoot() {
return this.datasetRoot;
}
@Override
public List<VersionFinderAndPolicy<T>> getVersionFindersAndPolicies() {
return this.versionFindersAndPolicies;
}
private void initWithRetentionPolicy(Config config, Properties jobProps, String retentionPolicyKey,
String versionFinderKey) {
this.versionFindersAndPolicies
.add(new VersionFinderAndPolicy<>(createRetentionPolicy(config.getString(retentionPolicyKey), config, jobProps),
createVersionFinder(config.getString(versionFinderKey), config, jobProps)));
}
private void initWithSelectionPolicy(Config config, Properties jobProps) {
String selectionPolicyKey = StringUtils.substringAfter(SELECTION_POLICY_CLASS_KEY, CONFIGURATION_KEY_PREFIX);
String versionFinderKey = StringUtils.substringAfter(VERSION_FINDER_CLASS_KEY, CONFIGURATION_KEY_PREFIX);
Preconditions.checkArgument(
config.hasPath(versionFinderKey),
String.format("Version finder class is required at %s in config %s", versionFinderKey,
config.root().render(ConfigRenderOptions.concise())));
VersionFinderAndPolicyBuilder<T> builder = VersionFinderAndPolicy.builder();
builder.versionFinder(createVersionFinder(config.getString(versionFinderKey), config, jobProps));
if (config.hasPath(selectionPolicyKey)) {
builder.versionSelectionPolicy(createSelectionPolicy(
ConfigUtils.getString(config, selectionPolicyKey, SelectNothingPolicy.class.getName()), config, jobProps));
}
for (Class<? extends RetentionActionFactory> factoryClass : RETENTION_ACTION_TYPES) {
try {
RetentionActionFactory factory = factoryClass.newInstance();
if (factory.canCreateWithConfig(config)) {
builder.retentionAction((RetentionAction) factory.createRetentionAction(config, this.fs,
ConfigUtils.propertiesToConfig(jobProps)));
}
} catch (InstantiationException | IllegalAccessException e) {
Throwables.propagate(e);
}
}
this.versionFindersAndPolicies.add(builder.build());
}
@SuppressWarnings("unchecked")
private VersionFinder<? extends T> createVersionFinder(String className, Config config, Properties jobProps) {
try {
return (VersionFinder<? extends T>) GobblinConstructorUtils.invokeFirstConstructor(Class.forName(className),
ImmutableList.<Object> of(this.fs, config), ImmutableList.<Object> of(this.fs, jobProps));
} catch (NoSuchMethodException | IllegalAccessException | InvocationTargetException | InstantiationException
| ClassNotFoundException e) {
throw new IllegalArgumentException(e);
}
}
@SuppressWarnings("unchecked")
private RetentionPolicy<T> createRetentionPolicy(String className, Config config, Properties jobProps) {
try {
return (RetentionPolicy<T>) GobblinConstructorUtils.invokeFirstConstructor(Class.forName(className),
ImmutableList.<Object> of(config), ImmutableList.<Object> of(config, jobProps),
ImmutableList.<Object> of(jobProps));
} catch (NoSuchMethodException | IllegalAccessException | InvocationTargetException | InstantiationException
| ClassNotFoundException e) {
throw new IllegalArgumentException(e);
}
}
@SuppressWarnings("unchecked")
private VersionSelectionPolicy<T> createSelectionPolicy(String className, Config config, Properties jobProps) {
try {
this.log.debug(String.format("Configuring selection policy %s for %s with %s", className, this.datasetRoot,
config.root().render(ConfigRenderOptions.concise())));
return (VersionSelectionPolicy<T>) GobblinConstructorUtils.invokeFirstConstructor(Class.forName(className),
ImmutableList.<Object> of(config), ImmutableList.<Object> of(config, jobProps),
ImmutableList.<Object> of(jobProps));
} catch (NoSuchMethodException | IllegalAccessException | InvocationTargetException | InstantiationException
| ClassNotFoundException e) {
throw new IllegalArgumentException(e);
}
}
}