/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package gobblin.data.management.version.finder;
import java.io.IOException;
import java.util.Collection;
import java.util.List;
import lombok.extern.slf4j.Slf4j;
import org.apache.hadoop.hive.metastore.IMetaStoreClient;
import org.apache.hadoop.hive.ql.metadata.Partition;
import com.google.common.base.Function;
import com.google.common.base.Optional;
import com.google.common.base.Predicates;
import com.google.common.collect.Iterables;
import com.google.common.collect.Lists;
import gobblin.data.management.copy.hive.HiveDataset;
import gobblin.data.management.copy.hive.HiveUtils;
import gobblin.data.management.version.DatasetVersion;
import gobblin.data.management.version.HiveDatasetVersion;
import gobblin.dataset.Dataset;
import gobblin.util.AutoReturnableObject;
/**
* An abstract {@link VersionFinder} to create {@link HiveDatasetVersion}s for all {@link Partition}s of a {@link HiveDataset}.
* Calls {@link #getDatasetVersion(Partition)} for every {@link Partition} found.
*/
@Slf4j
public abstract class AbstractHiveDatasetVersionFinder implements VersionFinder<HiveDatasetVersion> {
@Override
public Class<? extends DatasetVersion> versionClass() {
return HiveDatasetVersion.class;
}
/**
* Create {@link HiveDatasetVersion}s for all {@link Partition}s of a {@link HiveDataset}.
* Calls {@link #getDatasetVersion(Partition)} for every {@link Partition} found.
* <p>
* Note: If an exception occurs while processing a partition, that partition will be ignored in the returned collection
* </p>
*
* @throws IllegalArgumentException if <code>dataset</code> is not a {@link HiveDataset}. Or if {@link HiveDataset#getTable()}
* is not partitioned.
*/
@Override
public Collection<HiveDatasetVersion> findDatasetVersions(Dataset dataset) throws IOException {
if (!(dataset instanceof HiveDataset)) {
throw new IllegalArgumentException("HiveDatasetVersionFinder is only compatible with HiveDataset");
}
final HiveDataset hiveDataset = (HiveDataset) dataset;
if (!HiveUtils.isPartitioned(hiveDataset.getTable())) {
throw new IllegalArgumentException("HiveDatasetVersionFinder is only compatible with partitioned hive tables");
}
try (AutoReturnableObject<IMetaStoreClient> client = hiveDataset.getClientPool().getClient()) {
List<Partition> partitions = HiveUtils.getPartitions(client.get(), hiveDataset.getTable(), Optional.<String> absent());
return Lists.newArrayList(Iterables.filter(Iterables.transform(partitions, new Function<Partition, HiveDatasetVersion>() {
@Override
public HiveDatasetVersion apply(Partition partition) {
try {
return getDatasetVersion(partition);
} catch (Throwable e) {
log.warn(String.format("Failed to get DatasetVersion %s. Skipping.", partition.getCompleteName()), e);
return null;
}
}
}), Predicates.notNull()));
}
}
/**
*
* Create a {@link HiveDatasetVersion} for the {@link Partition}
* @param partition for which a {@link HiveDatasetVersion} is created
*/
protected abstract HiveDatasetVersion getDatasetVersion(Partition partition);
}