/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package gobblin.hive;
import java.io.Closeable;
import java.io.IOException;
import java.util.Arrays;
import java.util.List;
import java.util.Map;
import java.util.concurrent.Callable;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.Future;
import java.util.concurrent.TimeUnit;
import org.apache.commons.lang3.reflect.ConstructorUtils;
import com.google.common.base.Optional;
import com.google.common.base.Predicate;
import com.google.common.base.Throwables;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import com.google.common.util.concurrent.ListenableFuture;
import com.google.common.util.concurrent.ListeningExecutorService;
import gobblin.annotation.Alpha;
import gobblin.configuration.State;
import gobblin.hive.HiveRegistrationUnit.Column;
import gobblin.hive.spec.HiveSpec;
import gobblin.hive.spec.HiveSpecWithPostActivities;
import gobblin.hive.spec.HiveSpecWithPreActivities;
import gobblin.hive.spec.HiveSpecWithPredicates;
import gobblin.hive.spec.activity.Activity;
import gobblin.util.ExecutorsUtils;
import gobblin.util.executors.ScalingThreadPoolExecutor;
import lombok.Getter;
import lombok.extern.slf4j.Slf4j;
/**
* A class for registering Hive tables and partitions.
*
* @author Ziyang Liu
*/
@Slf4j
@Alpha
public abstract class HiveRegister implements Closeable {
public static final String HIVE_REGISTER_TYPE = "hive.register.type";
public static final String DEFAULT_HIVE_REGISTER_TYPE = "gobblin.hive.metastore.HiveMetaStoreBasedRegister";
public static final String HIVE_TABLE_COMPARATOR_TYPE = "hive.table.comparator.type";
public static final String DEFAULT_HIVE_TABLE_COMPARATOR_TYPE = HiveTableComparator.class.getName();
public static final String HIVE_PARTITION_COMPARATOR_TYPE = "hive.partition.comparator.type";
public static final String DEFAULT_HIVE_PARTITION_COMPARATOR_TYPE = HivePartitionComparator.class.getName();
protected static final String HIVE_DB_EXTENSION = ".db";
@Getter
protected final HiveRegProps props;
protected final Optional<String> hiveDbRootDir;
protected final ListeningExecutorService executor;
protected final Map<String, Future<Void>> futures = Maps.newConcurrentMap();
protected HiveRegister(State state) {
this.props = new HiveRegProps(state);
this.hiveDbRootDir = this.props.getDbRootDir();
this.executor = ExecutorsUtils.loggingDecorator(
ScalingThreadPoolExecutor.newScalingThreadPool(0, this.props.getNumThreads(), TimeUnit.SECONDS.toMillis(10),
ExecutorsUtils.newThreadFactory(Optional.of(log), Optional.of(getClass().getSimpleName()))));
}
/**
* Register a table or partition given a {@link HiveSpec}. This method is asynchronous and returns immediately.
* This methods evaluates the {@link Predicate}s and executes the {@link Activity}s specified in the
* {@link HiveSpec}. The actual registration happens in {@link #registerPath(HiveSpec)}, which subclasses
* should implement.
*
* @return a {@link ListenableFuture} for the process of registering the given {@link HiveSpec}.
*/
public ListenableFuture<Void> register(final HiveSpec spec) {
ListenableFuture<Void> future = this.executor.submit(new Callable<Void>() {
@Override
public Void call() throws Exception {
if (spec instanceof HiveSpecWithPredicates && !evaluatePredicates((HiveSpecWithPredicates) spec)) {
log.info("Skipping " + spec + " since predicates return false");
return null;
}
if (spec instanceof HiveSpecWithPreActivities) {
for (Activity activity : ((HiveSpecWithPreActivities) spec).getPreActivities()) {
activity.execute(HiveRegister.this);
}
}
registerPath(spec);
if (spec instanceof HiveSpecWithPostActivities) {
for (Activity activity : ((HiveSpecWithPostActivities) spec).getPostActivities()) {
activity.execute(HiveRegister.this);
}
}
return null;
}
});
this.futures.put(getSpecId(spec), future);
return future;
}
private String getSpecId(HiveSpec spec) {
Optional<HivePartition> partition = spec.getPartition();
if (partition.isPresent()) {
return String.format("%s.%s@%s", spec.getTable().getDbName(), spec.getTable().getTableName(),
Arrays.toString(partition.get().getValues().toArray()));
} else {
return String.format("%s.%s", spec.getTable().getDbName(), spec.getTable().getTableName());
}
}
private boolean evaluatePredicates(HiveSpecWithPredicates spec) {
for (Predicate<HiveRegister> pred : spec.getPredicates()) {
if (!pred.apply(this)) {
return false;
}
}
return true;
}
/**
* Register the path specified in the given {@link HiveSpec}.
*
* <p>
* This method should not evaluate {@link Predicate}s or execute {@link Activity}s associated with
* the {@link HiveSpec}, since these are done in {@link #register(HiveSpec)}.
* </p>
*/
protected abstract void registerPath(HiveSpec spec) throws IOException;
/**
* Create a Hive database if not exists.
*
* @param dbName the name of the database to be created.
* @return true if the db is successfully created; false if the db already exists.
* @throws IOException
*/
public abstract boolean createDbIfNotExists(String dbName) throws IOException;
/**
* Create a Hive table if not exists.
*
* @param table a {@link HiveTable} to be created.
* @return true if the table is successfully created; false if the table already exists.
* @throws IOException
*/
public abstract boolean createTableIfNotExists(HiveTable table) throws IOException;
/**
* Add a Hive partition to a table if not exists.
*
* @param table the {@link HiveTable} to which the partition should be added.
* @param partition a {@link HivePartition} to be added.
* @return true if the partition is successfully added; false if the partition already exists.
* @throws IOException
*/
public abstract boolean addPartitionIfNotExists(HiveTable table, HivePartition partition) throws IOException;
/**
* Determines whether a Hive table exists.
*
* @param dbName the database name
* @param tableName the table name
* @return true if the table exists, false otherwise.
* @throws IOException
*/
public abstract boolean existsTable(String dbName, String tableName) throws IOException;
/**
* Determines whether a Hive partition exists.
*
* @param dbName the database name
* @param tableName the table name
* @param partitionKeys a list of {@link Columns} representing the key of the partition
* @param partitionValues a list of Strings representing the value of the partition
* @return true if the partition exists, false otherwise.
* @throws IOException
*/
public abstract boolean existsPartition(String dbName, String tableName, List<Column> partitionKeys,
List<String> partitionValues) throws IOException;
/**
* Drop a table if exists.
*
* @param dbName the database name
* @param tableName the table name
* @throws IOException
*/
public abstract void dropTableIfExists(String dbName, String tableName) throws IOException;
/**
* Drop a partition if exists.
*
* @param dbName the database name
* @param tableName the table name
* @param partitionKeys a list of {@link Columns} representing the key of the partition
* @param partitionValues a list of Strings representing the value of the partition
* @throws IOException
*/
public abstract void dropPartitionIfExists(String dbName, String tableName, List<Column> partitionKeys,
List<String> partitionValues) throws IOException;
/**
* Get a {@link HiveTable} using the given db name and table name.
*
* @param dbName the database name
* @param tableName the table name
* @return an {@link Optional} of {@link HiveTable} if the table exists, otherwise {@link Optional#absent()}.
* @throws IOException
*/
public abstract Optional<HiveTable> getTable(String dbName, String tableName) throws IOException;
/**
* Get a {@link HivePartition} using the given db name, table name, partition keys and partition values.
*
* @param dbName the database name
* @param tableName the table name
* @param partitionKeys a list of {@link Columns} representing the key of the partition
* @param partitionValues a list of Strings representing the value of the partition
* @return an {@link Optional} of {@link HivePartition} if the partition exists, otherwise {@link Optional#absent()}.
* @throws IOException
*/
public abstract Optional<HivePartition> getPartition(String dbName, String tableName, List<Column> partitionKeys,
List<String> partitionValues) throws IOException;
/**
* Alter the given {@link HiveTable}. An Exception should be thrown if the table does not exist.
*
* @param table a {@link HiveTable} to which the existing table should be updated.
* @throws IOException
*/
public abstract void alterTable(HiveTable table) throws IOException;
/**
* Alter the given {@link HivePartition}. An Exception should be thrown if the partition does not exist.
*
* @param table the {@link HiveTable} to which the partition belongs.
* @param partition a {@link HivePartition} to which the existing partition should be updated.
* @throws IOException
*/
public abstract void alterPartition(HiveTable table, HivePartition partition) throws IOException;
/**
* Create a table if not exists, or alter a table if exists.
*
* @param table a {@link HiveTable} to be created or altered
* @throws IOException
*/
public void createOrAlterTable(HiveTable table) throws IOException {
if (!createTableIfNotExists(table)) {
alterTable(table);
}
}
/**
* Add a partition to a table if not exists, or alter a partition if exists.
*
* @param table the {@link HiveTable} to which the partition belongs.
* @param partition a {@link HivePartition} to which the existing partition should be updated.
* @throws IOException
*/
public void addOrAlterPartition(HiveTable table, HivePartition partition) throws IOException {
if (!addPartitionIfNotExists(table, partition)) {
alterPartition(table, partition);
}
}
protected HiveRegistrationUnitComparator<?> getTableComparator(HiveTable existingTable, HiveTable newTable) {
try {
Class<?> clazz =
Class.forName(this.props.getProp(HIVE_TABLE_COMPARATOR_TYPE, DEFAULT_HIVE_TABLE_COMPARATOR_TYPE));
return (HiveRegistrationUnitComparator<?>) ConstructorUtils.invokeConstructor(clazz, existingTable, newTable);
} catch (ReflectiveOperationException e) {
log.error("Unable to instantiate Hive table comparator", e);
throw Throwables.propagate(e);
}
}
protected boolean needToUpdateTable(HiveTable existingTable, HiveTable newTable) {
return getTableComparator(existingTable, newTable).compareAll().result();
}
protected HiveRegistrationUnitComparator<?> getPartitionComparator(HivePartition existingPartition,
HivePartition newPartition) {
try {
Class<?> clazz =
Class.forName(this.props.getProp(HIVE_PARTITION_COMPARATOR_TYPE, DEFAULT_HIVE_PARTITION_COMPARATOR_TYPE));
return (HiveRegistrationUnitComparator<?>) ConstructorUtils.invokeConstructor(clazz, existingPartition,
newPartition);
} catch (ReflectiveOperationException e) {
log.error("Unable to instantiate Hive partition comparator", e);
throw Throwables.propagate(e);
}
}
protected boolean needToUpdatePartition(HivePartition existingPartition, HivePartition newPartition) {
return getPartitionComparator(existingPartition, newPartition).compareAll().result();
}
/**
* Wait till all registration requested submitted via {@link #register(HiveSpec)} to finish.
*
* @throws IOException if any registration failed or was interrupted.
*/
@Override
public void close() throws IOException {
try {
for (Map.Entry<String, Future<Void>> entry : this.futures.entrySet()) {
try {
entry.getValue().get();
} catch (ExecutionException ee) {
throw new IOException("Failed to finish registration for " + entry.getKey(), ee.getCause());
}
}
} catch (InterruptedException e) {
throw new IOException(e);
} finally {
ExecutorsUtils.shutdownExecutorService(this.executor, Optional.of(log));
}
}
/**
* Get an instance of {@link HiveRegister}.
*
* @param props A {@link State} object. To get a specific implementation of {@link HiveRegister},
* specify property {@link #HIVE_REGISTER_TYPE} as the class name. Otherwise, {@link #DEFAULT_HIVE_REGISTER_TYPE}
* will be returned. This {@link State} object is also used to instantiate the {@link HiveRegister} object.
*/
public static HiveRegister get(State props) {
return get(props, Optional.<String> absent());
}
/**
* Get an instance of {@link HiveRegister}.
*
* @param props A {@link State} object. To get a specific implementation of {@link HiveRegister},
* specify property {@link #HIVE_REGISTER_TYPE} as the class name. Otherwise, {@link #DEFAULT_HIVE_REGISTER_TYPE}
* will be returned. This {@link State} object is also used to instantiate the {@link HiveRegister} object.
*/
public static HiveRegister get(State props, Optional<String> metastoreURI) {
return get(props.getProp(HIVE_REGISTER_TYPE, DEFAULT_HIVE_REGISTER_TYPE), props, metastoreURI);
}
/**
* Get an instance of {@link HiveRegister}.
*
* @param hiveRegisterType The name of a class that implements {@link HiveRegister}.
* @param props A {@link State} object used to instantiate the {@link HiveRegister} object.
*/
public static HiveRegister get(String hiveRegisterType, State props, Optional<String> metastoreURI) {
try {
return (HiveRegister) ConstructorUtils.invokeConstructor(Class.forName(hiveRegisterType), props, metastoreURI);
} catch (ReflectiveOperationException e) {
throw Throwables.propagate(e);
}
}
}