/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package gobblin.data.management.policy;
import java.io.IOException;
import java.lang.reflect.InvocationTargetException;
import java.util.Collection;
import java.util.Iterator;
import java.util.List;
import java.util.Properties;
import java.util.Set;
import javax.annotation.Nullable;
import lombok.ToString;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Function;
import com.google.common.base.Preconditions;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.Iterables;
import com.google.common.collect.Lists;
import com.google.common.collect.Sets;
import com.typesafe.config.Config;
import com.typesafe.config.ConfigFactory;
import gobblin.data.management.retention.policy.CombineRetentionPolicy;
import gobblin.data.management.version.DatasetVersion;
import gobblin.util.reflection.GobblinConstructorUtils;
/**
* Implementation of {@link gobblin.data.management.policy.VersionSelectionPolicy} that allows combining different
* policies through a union or intersect operation. It will combine the selected sets from each sub-policy using the
* specified operation.
*
* <p>
* For example, if there are five versions of a dataset, a, b, c, d, e, policy1 would select versions a, b, while
* policy2 would select versions b,c, using {@link CombineSelectionPolicy} will select versions a, b, c if the
* operation is UNION, or it will select only version b if the operation is INTERSECT.
* </p>
*
* <p>
* {@link CombineRetentionPolicy} expects the following configurations:
* * version.selection.policy.class.* : specifies the classes of the policies to combine. * can be
* any value, and each such configuration defines only one class.
* * version.selection.combine.operation : operation used to combine delete
* sets. Can be UNION or INTERSECT.
* Additionally, any configuration necessary for combined policies must be specified.
* </p>
*/
@ToString
public class CombineSelectionPolicy implements VersionSelectionPolicy<DatasetVersion> {
public static final String VERSION_SELECTION_POLICIES_PREFIX = "selection.combine.policy.classes";
public static final String VERSION_SELECTION_COMBINE_OPERATION = "selection.combine.operation";
public enum CombineOperation {
INTERSECT,
UNION
}
private final List<VersionSelectionPolicy<DatasetVersion>> selectionPolicies;
private final CombineOperation combineOperation;
public CombineSelectionPolicy(List<VersionSelectionPolicy<DatasetVersion>> selectionPolicies,
CombineOperation combineOperation) {
this.combineOperation = combineOperation;
this.selectionPolicies = selectionPolicies;
}
@SuppressWarnings("unchecked")
public CombineSelectionPolicy(Config config, Properties jobProps) throws IOException {
Preconditions.checkArgument(config.hasPath(VERSION_SELECTION_POLICIES_PREFIX), "Combine operation not specified.");
ImmutableList.Builder<VersionSelectionPolicy<DatasetVersion>> builder = ImmutableList.builder();
for (String combineClassName : config.getStringList(VERSION_SELECTION_POLICIES_PREFIX)) {
try {
builder.add((VersionSelectionPolicy<DatasetVersion>) GobblinConstructorUtils.invokeFirstConstructor(
Class.forName(combineClassName), ImmutableList.<Object> of(config), ImmutableList.<Object> of(jobProps)));
} catch (NoSuchMethodException | IllegalAccessException | InvocationTargetException | InstantiationException
| ClassNotFoundException e) {
throw new IllegalArgumentException(e);
}
}
this.selectionPolicies = builder.build();
if (this.selectionPolicies.size() == 0) {
throw new IOException("No selection policies specified for " + CombineSelectionPolicy.class.getCanonicalName());
}
this.combineOperation =
CombineOperation.valueOf(config.getString(VERSION_SELECTION_COMBINE_OPERATION).toUpperCase());
}
public CombineSelectionPolicy(Properties props) throws IOException {
this(ConfigFactory.parseProperties(props), props);
}
/**
* Returns the most specific common superclass for the {@link #versionClass} of each embedded policy.
*/
@Override
public Class<? extends DatasetVersion> versionClass() {
if (this.selectionPolicies.size() == 1) {
return this.selectionPolicies.get(0).versionClass();
}
Class<? extends DatasetVersion> klazz = this.selectionPolicies.get(0).versionClass();
for (VersionSelectionPolicy<? extends DatasetVersion> policy : this.selectionPolicies) {
klazz = commonSuperclass(klazz, policy.versionClass());
}
return klazz;
}
@Override
public Collection<DatasetVersion> listSelectedVersions(final List<DatasetVersion> allVersions) {
List<Set<DatasetVersion>> candidateDeletableVersions = Lists.newArrayList(Iterables
.transform(this.selectionPolicies, new Function<VersionSelectionPolicy<DatasetVersion>, Set<DatasetVersion>>() {
@Nullable
@Override
public Set<DatasetVersion> apply(VersionSelectionPolicy<DatasetVersion> input) {
return Sets.newHashSet(input.listSelectedVersions(allVersions));
}
}));
switch (this.combineOperation) {
case INTERSECT:
return intersectDatasetVersions(candidateDeletableVersions);
case UNION:
return unionDatasetVersions(candidateDeletableVersions);
default:
throw new RuntimeException("Combine operation " + this.combineOperation + " not recognized.");
}
}
@VisibleForTesting
@SuppressWarnings("unchecked")
public static Class<? extends DatasetVersion> commonSuperclass(Class<? extends DatasetVersion> classA,
Class<? extends DatasetVersion> classB) {
if (classA.isAssignableFrom(classB)) {
// a is superclass of b, so return class of a
return classA;
}
// a is not superclass of b. Either b is superclass of a, or they are not in same branch
// find closest superclass of a that is also a superclass of b
Class<?> klazz = classA;
while (!klazz.isAssignableFrom(classB)) {
klazz = klazz.getSuperclass();
}
if (DatasetVersion.class.isAssignableFrom(klazz)) {
return (Class<? extends DatasetVersion>) klazz;
}
// this should never happen, but there for safety
return DatasetVersion.class;
}
private static Set<DatasetVersion> intersectDatasetVersions(Collection<Set<DatasetVersion>> sets) {
if (sets.size() <= 0) {
return Sets.newHashSet();
}
Iterator<Set<DatasetVersion>> it = sets.iterator();
Set<DatasetVersion> outputSet = it.next();
while (it.hasNext()) {
outputSet = Sets.intersection(outputSet, it.next());
}
return outputSet;
}
private static Set<DatasetVersion> unionDatasetVersions(Collection<Set<DatasetVersion>> sets) {
if (sets.size() <= 0) {
return Sets.newHashSet();
}
Iterator<Set<DatasetVersion>> it = sets.iterator();
Set<DatasetVersion> outputSet = it.next();
while (it.hasNext()) {
outputSet = Sets.union(outputSet, it.next());
}
return outputSet;
}
}