/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package gobblin.data.management.retention.profile;
import java.io.IOException;
import java.lang.reflect.InvocationTargetException;
import java.net.URI;
import java.net.URISyntaxException;
import java.util.Collection;
import java.util.List;
import java.util.Properties;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.lang3.reflect.ConstructorUtils;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import com.google.common.base.Splitter;
import com.google.common.base.Throwables;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.Lists;
import com.typesafe.config.Config;
import gobblin.config.client.ConfigClient;
import gobblin.config.client.ConfigClientCache;
import gobblin.config.client.api.ConfigStoreFactoryDoesNotExistsException;
import gobblin.config.client.api.VersionStabilityPolicy;
import gobblin.config.store.api.ConfigStoreCreationException;
import gobblin.config.store.api.VersionDoesNotExistException;
import gobblin.dataset.Dataset;
import gobblin.dataset.DatasetsFinder;
import gobblin.util.reflection.GobblinConstructorUtils;
/**
* A DatasetFinder that instantiates multiple DatasetFinders. {@link #findDatasets()} will return a union of all the
* datasets found by each datasetFinder
* <p>
* Subclasses will specify the dataset finder class key name to instantiate. If {@link #datasetFinderClassKey()} is set
* in jobProps, a single datasetFinder is created. Otherwise {@link #datasetFinderImportedByKey()} is used to find all
* the importedBy {@link URI}s from gobblin config management. The {@link Config} for each {@link URI} should have a
* {@link #datasetFinderClassKey()} set.
* </p>
*
*/
@Slf4j
public abstract class MultiDatasetFinder implements DatasetsFinder<Dataset> {
private static final Splitter TAGS_SPLITTER = Splitter.on(",").omitEmptyStrings().trimResults();
protected abstract String datasetFinderClassKey();
protected abstract String datasetFinderImportedByKey();
List<DatasetsFinder<Dataset>> datasetFinders;
protected final Properties jobProps;
@SuppressWarnings({ "rawtypes", "unchecked" })
public MultiDatasetFinder(FileSystem fs, Properties jobProps) {
this.jobProps = jobProps;
try {
this.datasetFinders = Lists.newArrayList();
if (jobProps.containsKey(datasetFinderClassKey())) {
try {
log.info(String.format("Instantiating datasetfinder %s ", jobProps.getProperty(datasetFinderClassKey())));
this.datasetFinders.add((DatasetsFinder) ConstructorUtils.invokeConstructor(
Class.forName(jobProps.getProperty(datasetFinderClassKey())), fs, jobProps));
} catch (NoSuchMethodException | IllegalAccessException | InvocationTargetException | InstantiationException
| ClassNotFoundException e) {
log.error(
String.format("Retention ignored could not instantiate datasetfinder %s.",
jobProps.getProperty(datasetFinderClassKey())), e);
Throwables.propagate(e);
}
} else if (jobProps.containsKey(datasetFinderImportedByKey())) {
log.info("Instatiating dataset finders using tag " + jobProps.getProperty(datasetFinderImportedByKey()));
ConfigClient client = ConfigClientCache.getClient(VersionStabilityPolicy.STRONG_LOCAL_STABILITY);
Collection<URI> importedBys = Lists.newArrayList();
for (String tag : TAGS_SPLITTER.split(jobProps.getProperty(datasetFinderImportedByKey()))) {
log.info("Looking for datasets that import tag " + tag);
importedBys.addAll(client.getImportedBy(new URI(tag), false));
}
for (URI importedBy : importedBys) {
Config datasetClassConfig = client.getConfig(importedBy);
try {
this.datasetFinders.add((DatasetsFinder) GobblinConstructorUtils.invokeFirstConstructor(
Class.forName(datasetClassConfig.getString(datasetFinderClassKey())), ImmutableList.of(fs, jobProps,
datasetClassConfig), ImmutableList.of(fs, jobProps)));
log.info(String.format("Instantiated datasetfinder %s for %s.",
datasetClassConfig.getString(datasetFinderClassKey()), importedBy));
} catch (InstantiationException | IllegalAccessException | IllegalArgumentException
| InvocationTargetException | NoSuchMethodException | SecurityException | ClassNotFoundException e) {
log.error(String.format("Retention ignored for %s. Could not instantiate datasetfinder %s.", importedBy,
datasetClassConfig.getString(datasetFinderClassKey())), e);
Throwables.propagate(e);
}
}
} else {
log.warn(String.format(
"NO DATASET_FINDERS FOUND. Either specify dataset finder class at %s or specify the imported tags at %s",
datasetFinderClassKey(), datasetFinderImportedByKey()));
}
} catch (IllegalArgumentException | VersionDoesNotExistException | ConfigStoreFactoryDoesNotExistsException
| ConfigStoreCreationException | URISyntaxException e) {
Throwables.propagate(e);
}
}
@Override
public List<Dataset> findDatasets() throws IOException {
List<Dataset> datasets = Lists.newArrayList();
for (DatasetsFinder<Dataset> df : this.datasetFinders) {
datasets.addAll(df.findDatasets());
}
return datasets;
}
@Override
public Path commonDatasetRoot() {
throw new UnsupportedOperationException("There is no common dataset root for MultiDatasetFinder");
}
}