/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package gobblin.hive.metastore;
import java.io.IOException;
import java.util.Arrays;
import java.util.List;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.pool2.impl.GenericObjectPoolConfig;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.metastore.IMetaStoreClient;
import org.apache.hadoop.hive.metastore.api.AlreadyExistsException;
import org.apache.hadoop.hive.metastore.api.Database;
import org.apache.hadoop.hive.metastore.api.NoSuchObjectException;
import org.apache.hadoop.hive.metastore.api.Partition;
import org.apache.hadoop.hive.metastore.api.Table;
import org.apache.thrift.TException;
import org.joda.time.DateTime;
import com.google.common.base.Optional;
import com.google.common.base.Preconditions;
import com.google.common.base.Throwables;
import com.google.common.primitives.Ints;
import gobblin.annotation.Alpha;
import gobblin.configuration.State;
import gobblin.hive.HiveLock;
import gobblin.hive.HiveMetastoreClientPool;
import gobblin.hive.HivePartition;
import gobblin.hive.HiveRegProps;
import gobblin.hive.HiveRegister;
import gobblin.hive.HiveRegistrationUnit.Column;
import gobblin.hive.HiveTable;
import gobblin.hive.spec.HiveSpec;
import gobblin.metrics.GobblinMetrics;
import gobblin.metrics.GobblinMetricsRegistry;
import gobblin.metrics.MetricContext;
import gobblin.metrics.event.EventSubmitter;
import gobblin.util.AutoCloseableLock;
import gobblin.util.AutoReturnableObject;
/**
* An implementation of {@link HiveRegister} that uses {@link IMetaStoreClient} for Hive registration.
*
* <p>
* An instance of this class is constructed with a {@link State} object or obtained via
* {@link HiveRegister#get(State)}. Property {@link HiveRegProps#HIVE_DB_ROOT_DIR} is required for registering
* a table or a partition if the database does not exist.
* </p>
*
* <p>
* The {@link #register(HiveSpec)} method is asynchronous and returns immediately. Registration is performed in a
* thread pool whose size is controlled by {@link HiveRegProps#HIVE_REGISTER_THREADS}.
* </p>
*
* @author Ziyang Liu
*/
@Slf4j
@Alpha
public class HiveMetaStoreBasedRegister extends HiveRegister {
private final HiveMetastoreClientPool clientPool;
private final HiveLock locks = new HiveLock();
private final EventSubmitter eventSubmitter;
public HiveMetaStoreBasedRegister(State state, Optional<String> metastoreURI) throws IOException {
super(state);
GenericObjectPoolConfig config = new GenericObjectPoolConfig();
config.setMaxTotal(this.props.getNumThreads());
config.setMaxIdle(this.props.getNumThreads());
this.clientPool = HiveMetastoreClientPool.get(this.props.getProperties(), metastoreURI);
MetricContext metricContext =
GobblinMetricsRegistry.getInstance().getMetricContext(state, HiveMetaStoreBasedRegister.class, GobblinMetrics.getCustomTagsFromState(state));
this.eventSubmitter = new EventSubmitter.Builder(metricContext, "gobblin.hive.HiveMetaStoreBasedRegister").build();
}
@Override
protected void registerPath(HiveSpec spec) throws IOException {
try (AutoReturnableObject<IMetaStoreClient> client = this.clientPool.getClient()) {
Table table = HiveMetaStoreUtils.getTable(spec.getTable());
createDbIfNotExists(client.get(), table.getDbName());
createOrAlterTable(client.get(), table, spec);
Optional<HivePartition> partition = spec.getPartition();
if (partition.isPresent()) {
addOrAlterPartition(client.get(), table, HiveMetaStoreUtils.getPartition(partition.get()), spec);
}
HiveMetaStoreEventHelper.submitSuccessfulPathRegistration(eventSubmitter, spec);
} catch (TException e) {
HiveMetaStoreEventHelper.submitFailedPathRegistration(eventSubmitter, spec, e);
throw new IOException(e);
}
}
@Override
public boolean createDbIfNotExists(String dbName) throws IOException {
try (AutoReturnableObject<IMetaStoreClient> client = this.clientPool.getClient()) {
return createDbIfNotExists(client.get(), dbName);
}
}
private boolean createDbIfNotExists(IMetaStoreClient client, String dbName) throws IOException {
Database db = new Database();
db.setName(dbName);
try (AutoCloseableLock lock = this.locks.getDbLock(dbName)) {
try {
client.getDatabase(db.getName());
return false;
} catch (NoSuchObjectException nsoe) {
// proceed with create
} catch (TException te) {
throw new IOException(te);
}
Preconditions.checkState(this.hiveDbRootDir.isPresent(),
"Missing required property " + HiveRegProps.HIVE_DB_ROOT_DIR);
db.setLocationUri(new Path(this.hiveDbRootDir.get(), dbName + HIVE_DB_EXTENSION).toString());
try {
client.createDatabase(db);
log.info("Created database " + dbName);
HiveMetaStoreEventHelper.submitSuccessfulDBCreation(this.eventSubmitter, dbName);
return true;
} catch (AlreadyExistsException e) {
return false;
} catch (TException e) {
HiveMetaStoreEventHelper.submitFailedDBCreation(this.eventSubmitter, dbName, e);
throw new IOException("Unable to create Hive database " + dbName, e);
}
}
}
@Override
public boolean createTableIfNotExists(HiveTable table) throws IOException {
try (AutoReturnableObject<IMetaStoreClient> client = this.clientPool.getClient();
AutoCloseableLock lock = this.locks.getTableLock(table.getDbName(), table.getTableName())) {
return createTableIfNotExists(client.get(), HiveMetaStoreUtils.getTable(table), table);
}
}
@Override
public boolean addPartitionIfNotExists(HiveTable table, HivePartition partition) throws IOException {
try (AutoReturnableObject<IMetaStoreClient> client = this.clientPool.getClient();
AutoCloseableLock lock = this.locks.getTableLock(table.getDbName(), table.getTableName())) {
try {
client.get().getPartition(table.getDbName(), table.getTableName(), partition.getValues());
return false;
} catch (NoSuchObjectException e) {
client.get().alter_partition(table.getDbName(), table.getTableName(),
getPartitionWithCreateTimeNow(HiveMetaStoreUtils.getPartition(partition)));
HiveMetaStoreEventHelper.submitSuccessfulPartitionAdd(this.eventSubmitter, table, partition);
return true;
}
} catch (TException e) {
HiveMetaStoreEventHelper.submitFailedPartitionAdd(this.eventSubmitter, table, partition, e);
throw new IOException(String.format("Unable to add partition %s in table %s in db %s", partition.getValues(),
table.getTableName(), table.getDbName()), e);
}
}
private boolean createTableIfNotExists(IMetaStoreClient client, Table table, HiveTable hiveTable) throws IOException {
String dbName = table.getDbName();
String tableName = table.getTableName();
try (AutoCloseableLock lock = this.locks.getTableLock(dbName, tableName)) {
if (client.tableExists(table.getDbName(), table.getTableName())) {
return false;
}
client.createTable(getTableWithCreateTimeNow(table));
log.info(String.format("Created Hive table %s in db %s", tableName, dbName));
HiveMetaStoreEventHelper.submitSuccessfulTableCreation(this.eventSubmitter, hiveTable);
return true;
} catch (TException e) {
HiveMetaStoreEventHelper.submitFailedTableCreation(eventSubmitter, hiveTable, e);
throw new IOException(String.format("Error in creating or altering Hive table %s in db %s", table.getTableName(),
table.getDbName()), e);
}
}
private void createOrAlterTable(IMetaStoreClient client, Table table, HiveSpec spec) throws TException {
String dbName = table.getDbName();
String tableName = table.getTableName();
try (AutoCloseableLock lock = this.locks.getTableLock(dbName, tableName)) {
try {
client.createTable(getTableWithCreateTimeNow(table));
log.info(String.format("Created Hive table %s in db %s", tableName, dbName));
} catch (AlreadyExistsException e) {
log.info("Table {} already exists in db {}.", tableName, dbName);
try {
HiveTable existingTable = HiveMetaStoreUtils.getHiveTable(client.getTable(dbName, tableName));
if (needToUpdateTable(existingTable, spec.getTable())) {
client.alter_table(dbName, tableName, getTableWithCreateTime(table, existingTable));
log.info(String.format("updated Hive table %s in db %s", tableName, dbName));
}
} catch (TException e2) {
log.error(
String.format("Unable to create or alter Hive table %s in db %s: " + e2.getMessage(), tableName, dbName),
e2);
throw e2;
}
} catch (TException e) {
log.error(
String.format("Unable to create Hive table %s in db %s: " + e.getMessage(), tableName, dbName), e);
throw e;
}
}
}
@Override
public boolean existsTable(String dbName, String tableName) throws IOException {
try (AutoReturnableObject<IMetaStoreClient> client = this.clientPool.getClient()) {
return client.get().tableExists(dbName, tableName);
} catch (TException e) {
throw new IOException(String.format("Unable to check existence of table %s in db %s", tableName, dbName), e);
}
}
@Override
public boolean existsPartition(String dbName, String tableName, List<Column> partitionKeys,
List<String> partitionValues) throws IOException {
try (AutoReturnableObject<IMetaStoreClient> client = this.clientPool.getClient()) {
client.get().getPartition(dbName, tableName, partitionValues);
return true;
} catch (NoSuchObjectException e) {
return false;
} catch (TException e) {
throw new IOException(String.format("Unable to check existence of partition %s in table %s in db %s",
partitionValues, tableName, dbName), e);
}
}
@Override
public void dropTableIfExists(String dbName, String tableName) throws IOException {
try (AutoReturnableObject<IMetaStoreClient> client = this.clientPool.getClient()) {
if (client.get().tableExists(dbName, tableName)) {
client.get().dropTable(dbName, tableName);
HiveMetaStoreEventHelper.submitSuccessfulTableDrop(eventSubmitter, dbName, tableName);
log.info("Dropped table " + tableName + " in db " + dbName);
}
} catch (TException e) {
HiveMetaStoreEventHelper.submitFailedTableDrop(eventSubmitter, dbName, tableName, e);
throw new IOException(String.format("Unable to deregister table %s in db %s", tableName, dbName), e);
}
}
@Override
public void dropPartitionIfExists(String dbName, String tableName, List<Column> partitionKeys,
List<String> partitionValues) throws IOException {
try (AutoReturnableObject<IMetaStoreClient> client = this.clientPool.getClient()) {
client.get().dropPartition(dbName, tableName, partitionValues, false);
HiveMetaStoreEventHelper.submitSuccessfulPartitionDrop(eventSubmitter, dbName, tableName, partitionValues);
log.info("Dropped partition " + partitionValues + " in table " + tableName + " in db " + dbName);
} catch (NoSuchObjectException e) {
// Partition does not exist. Nothing to do
} catch (TException e) {
HiveMetaStoreEventHelper.submitFailedPartitionDrop(eventSubmitter, dbName, tableName, partitionValues, e);
throw new IOException(String.format("Unable to check existence of Hive partition %s in table %s in db %s",
partitionValues, tableName, dbName), e);
}
}
private void addOrAlterPartition(IMetaStoreClient client, Table table, Partition partition, HiveSpec spec)
throws TException {
Preconditions.checkArgument(table.getPartitionKeysSize() == partition.getValues().size(),
String.format("Partition key size is %s but partition value size is %s", table.getPartitionKeys().size(),
partition.getValues().size()));
try (AutoCloseableLock lock =
this.locks.getPartitionLock(table.getDbName(), table.getTableName(), partition.getValues())) {
try {
client.add_partition(getPartitionWithCreateTimeNow(partition));
log.info(String.format("Added partition %s to table %s with location %s", stringifyPartition(partition),
table.getTableName(), partition.getSd().getLocation()));
} catch (TException e) {
try {
HivePartition existingPartition = HiveMetaStoreUtils
.getHivePartition(client.getPartition(table.getDbName(), table.getTableName(), partition.getValues()));
if (needToUpdatePartition(existingPartition, spec.getPartition().get())) {
log.info(String.format("Partition update required. ExistingPartition %s, newPartition %s",
stringifyPartition(existingPartition), stringifyPartition(spec.getPartition().get())));
Partition newPartition = getPartitionWithCreateTime(partition, existingPartition);
log.info(String.format("Altering partition %s", newPartition));
client.alter_partition(table.getDbName(), table.getTableName(), newPartition);
log.info(String.format("Updated partition %s in table %s with location %s", stringifyPartition(newPartition),
table.getTableName(), partition.getSd().getLocation()));
} else {
log.info(String.format("Partition %s in table %s with location %s already exists and no need to update",
stringifyPartition(partition), table.getTableName(), partition.getSd().getLocation()));
}
} catch (Throwable e2) {
log.error(String.format(
"Unable to add or alter partition %s in table %s with location %s: " + e2.getMessage(),
stringifyPartitionVerbose(partition), table.getTableName(), partition.getSd().getLocation()), e2);
throw e2;
}
}
}
}
private static String stringifyPartition(Partition partition) {
if (log.isDebugEnabled()) {
return stringifyPartitionVerbose(partition);
}
return Arrays.toString(partition.getValues().toArray());
}
private static String stringifyPartition(HivePartition partition) {
return partition.toString();
}
private static String stringifyPartitionVerbose(Partition partition) {
return partition.toString();
}
@Override
public Optional<HiveTable> getTable(String dbName, String tableName) throws IOException {
try (AutoReturnableObject<IMetaStoreClient> client = this.clientPool.getClient()) {
return Optional.of(HiveMetaStoreUtils.getHiveTable(client.get().getTable(dbName, tableName)));
} catch (NoSuchObjectException e) {
return Optional.<HiveTable> absent();
} catch (TException e) {
throw new IOException("Unable to get table " + tableName + " in db " + dbName, e);
}
}
@Override
public Optional<HivePartition> getPartition(String dbName, String tableName, List<Column> partitionKeys,
List<String> partitionValues) throws IOException {
try (AutoReturnableObject<IMetaStoreClient> client = this.clientPool.getClient()) {
return Optional
.of(HiveMetaStoreUtils.getHivePartition(client.get().getPartition(dbName, tableName, partitionValues)));
} catch (NoSuchObjectException e) {
return Optional.<HivePartition> absent();
} catch (TException e) {
throw new IOException(
"Unable to get partition " + partitionValues + " from table " + tableName + " in db " + dbName, e);
}
}
@Override
public void alterTable(HiveTable table) throws IOException {
try (AutoReturnableObject<IMetaStoreClient> client = this.clientPool.getClient()) {
if (!client.get().tableExists(table.getDbName(), table.getTableName())) {
throw new IOException("Table " + table.getTableName() + " in db " + table.getDbName() + " does not exist");
}
client.get().alter_table(table.getDbName(), table.getTableName(),
getTableWithCreateTimeNow(HiveMetaStoreUtils.getTable(table)));
HiveMetaStoreEventHelper.submitSuccessfulTableAlter(eventSubmitter, table);
} catch (TException e) {
HiveMetaStoreEventHelper.submitFailedTableAlter(eventSubmitter, table, e);
throw new IOException("Unable to alter table " + table.getTableName() + " in db " + table.getDbName(), e);
}
}
@Override
public void alterPartition(HiveTable table, HivePartition partition) throws IOException {
try (AutoReturnableObject<IMetaStoreClient> client = this.clientPool.getClient()) {
client.get().alter_partition(table.getDbName(), table.getTableName(),
getPartitionWithCreateTimeNow(HiveMetaStoreUtils.getPartition(partition)));
HiveMetaStoreEventHelper.submitSuccessfulPartitionAlter(eventSubmitter, table, partition);
} catch (TException e) {
HiveMetaStoreEventHelper.submitFailedPartitionAlter(eventSubmitter, table, partition, e);
throw new IOException(String.format("Unable to alter partition %s in table %s in db %s", partition.getValues(),
table.getTableName(), table.getDbName()), e);
}
}
private Partition getPartitionWithCreateTimeNow(Partition partition) {
return getPartitionWithCreateTime(partition, Ints.checkedCast(DateTime.now().getMillis() / 1000));
}
private Partition getPartitionWithCreateTime(Partition partition, HivePartition referencePartition) {
return getPartitionWithCreateTime(partition,
Ints.checkedCast(referencePartition.getCreateTime().or(DateTime.now().getMillis() / 1000)));
}
/**
* Sets create time if not already set.
*/
private Partition getPartitionWithCreateTime(Partition partition, int createTime) {
if (partition.isSetCreateTime() && partition.getCreateTime() > 0) {
return partition;
}
Partition actualPartition = partition.deepCopy();
actualPartition.setCreateTime(createTime);
return actualPartition;
}
private Table getTableWithCreateTimeNow(Table table) {
return gettableWithCreateTime(table, Ints.checkedCast(DateTime.now().getMillis() / 1000));
}
private Table getTableWithCreateTime(Table table, HiveTable referenceTable) {
return gettableWithCreateTime(table,
Ints.checkedCast(referenceTable.getCreateTime().or(DateTime.now().getMillis() / 1000)));
}
/**
* Sets create time if not already set.
*/
private Table gettableWithCreateTime(Table table, int createTime) {
if (table.isSetCreateTime() && table.getCreateTime() > 0) {
return table;
}
Table actualtable = table.deepCopy();
actualtable.setCreateTime(createTime);
return actualtable;
}
}