/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package gobblin.compliance;
import java.io.IOException;
import java.security.PrivilegedExceptionAction;
import java.sql.SQLException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.hive.ql.metadata.Partition;
import org.apache.hadoop.security.UserGroupInformation;
import com.google.common.base.Optional;
import com.google.common.base.Preconditions;
import lombok.extern.slf4j.Slf4j;
import gobblin.compliance.purger.HivePurgerQueryTemplate;
import gobblin.compliance.retention.HivePartitionRetentionVersion;
import gobblin.compliance.utils.ProxyUtils;
import gobblin.configuration.State;
import gobblin.data.management.copy.hive.HiveDataset;
import gobblin.data.management.copy.hive.HiveDatasetFinder;
import gobblin.dataset.Dataset;
/**
* A version finder class to find {@link HivePartitionVersion}s.
*
* @author adsharma
*/
@Slf4j
public class HivePartitionVersionFinder implements gobblin.data.management.version.finder.VersionFinder<HivePartitionVersion> {
protected final FileSystem fs;
protected final State state;
protected List<String> patterns;
private Optional<String> owner = Optional.absent();
private List<HivePartitionVersion> versions = new ArrayList<>();
public HivePartitionVersionFinder(FileSystem fs, State state, List<String> patterns) {
this.fs = fs;
this.state = new State(state);
this.patterns = patterns;
}
@Override
public Class<HivePartitionVersion> versionClass() {
return HivePartitionVersion.class;
}
/**
* Will find all the versions of the {@link HivePartitionDataset}.
*
* For a dataset with table name table1, corresponding versions table will be
* table1_backup_timestamp or table1_staging_timestamp or table1_trash_timestamp
*
* Based on pattern, a type of version will be selected eg. backup or trash or staging
*
* If a Hive version's table contain no Partitions, it will be dropped.
*/
@Override
public Collection<HivePartitionVersion> findDatasetVersions(Dataset dataset)
throws IOException {
List<HivePartitionVersion> versions = new ArrayList<>();
if (!(dataset instanceof HivePartitionDataset)) {
return versions;
}
HivePartitionDataset hivePartitionDataset = (HivePartitionDataset) dataset;
this.owner = hivePartitionDataset.getOwner();
Preconditions.checkArgument(!this.patterns.isEmpty(),
"No patterns to find versions for the dataset " + dataset.datasetURN());
versions
.addAll(findVersions(hivePartitionDataset.getName(), hivePartitionDataset.datasetURN()));
return versions;
}
private List<HivePartitionVersion> findVersions(String name, String urn)
throws IOException {
State state = new State(this.state);
Preconditions.checkArgument(this.state.contains(ComplianceConfigurationKeys.HIVE_VERSIONS_WHITELIST),
"Missing required property " + ComplianceConfigurationKeys.HIVE_VERSIONS_WHITELIST);
state.setProp(ComplianceConfigurationKeys.HIVE_DATASET_WHITELIST,
this.state.getProp(ComplianceConfigurationKeys.HIVE_VERSIONS_WHITELIST));
setVersions(name, state);
log.info("Found " + this.versions.size() + " versions for the dataset " + urn);
return this.versions;
}
private void addPartitionsToVersions(List<HivePartitionVersion> versions, String name, HiveDataset hiveDataset,
List<Partition> partitions)
throws IOException {
if (partitions.isEmpty()) {
if (Boolean.parseBoolean(this.state.getProp(ComplianceConfigurationKeys.SHOULD_DROP_EMPTY_TABLES,
ComplianceConfigurationKeys.DEFAULT_SHOULD_DROP_EMPTY_TABLES))) {
executeDropTableQuery(hiveDataset);
}
return;
}
for (Partition partition : partitions) {
if (partition.getName().equalsIgnoreCase(name)) {
versions.add(new HivePartitionRetentionVersion(partition));
}
}
}
private void executeDropTableQuery(HiveDataset hiveDataset)
throws IOException {
String dbName = hiveDataset.getTable().getDbName();
String tableName = hiveDataset.getTable().getTableName();
Optional<String> datasetOwner = Optional.fromNullable(hiveDataset.getTable().getOwner());
try (HiveProxyQueryExecutor hiveProxyQueryExecutor = ProxyUtils
.getQueryExecutor(new State(this.state), datasetOwner)) {
hiveProxyQueryExecutor.executeQuery(HivePurgerQueryTemplate.getDropTableQuery(dbName, tableName), datasetOwner);
} catch (SQLException e) {
throw new IOException(e);
}
}
private void setVersions(final String name, final State state)
throws IOException {
try {
UserGroupInformation loginUser = UserGroupInformation.getLoginUser();
loginUser.doAs(new PrivilegedExceptionAction<Void>() {
@Override
public Void run()
throws IOException {
HiveDatasetFinder finder = new HiveDatasetFinder(fs, state.getProperties());
for (HiveDataset hiveDataset : finder.findDatasets()) {
List<Partition> partitions = hiveDataset.getPartitionsFromDataset();
for (String pattern : patterns) {
if (hiveDataset.getTable().getTableName().contains(pattern)) {
addPartitionsToVersions(versions, name, hiveDataset, partitions);
}
}
}
return null;
}
});
} catch (InterruptedException | IOException e) {
throw new IOException(e);
}
}
}