/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package gobblin.metastore;
import com.google.common.base.Predicate;
import com.google.common.base.Strings;
import com.google.common.collect.Lists;
import com.typesafe.config.Config;
import gobblin.configuration.ConfigurationKeys;
import gobblin.configuration.State;
import gobblin.password.PasswordManager;
import gobblin.util.ConfigUtils;
import gobblin.util.io.StreamUtils;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.DataInputStream;
import java.io.DataOutput;
import java.io.DataOutputStream;
import java.io.EOFException;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.sql.Blob;
import java.sql.Connection;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.util.Collection;
import java.util.Collections;
import java.util.List;
import java.util.zip.GZIPInputStream;
import java.util.zip.GZIPOutputStream;
import javax.sql.DataSource;
import org.apache.commons.dbcp.BasicDataSource;
import org.apache.hadoop.io.Text;
/**
* An implementation of {@link StateStore} backed by MySQL.
*
* <p>
*
* This implementation stores serialized {@link State}s as a blob in the database in the Sequence file format.
* The database table row is keyed by the store name and table name.
* State keys are state IDs (see {@link State#getId()}), and values are objects of {@link State} or
* any of its extensions. Keys will be empty strings if state IDs are not set
* (i.e., {@link State#getId()} returns <em>null</em>). In this case, the
* {@link MysqlStateStore#get(String, String, String)} method may not work.
* </p>
*
* @param <T> state object type
**/
public class MysqlStateStore<T extends State> implements StateStore<T> {
// Class of the state objects to be put into the store
private final Class<T> stateClass;
private final DataSource dataSource;
private final boolean compressedValues;
private static final String UPSERT_JOB_STATE_TEMPLATE =
"INSERT INTO $TABLE$ (store_name, table_name, state) VALUES(?,?,?)"
+ " ON DUPLICATE KEY UPDATE state = values(state)";
private static final String SELECT_JOB_STATE_TEMPLATE =
"SELECT state FROM $TABLE$ WHERE store_name = ? and table_name = ?";
private static final String SELECT_JOB_STATE_WITH_LIKE_TEMPLATE =
"SELECT state FROM $TABLE$ WHERE store_name = ? and table_name like ?";
private static final String SELECT_JOB_STATE_EXISTS_TEMPLATE =
"SELECT 1 FROM $TABLE$ WHERE store_name = ? and table_name = ?";
private static final String SELECT_JOB_STATE_NAMES_TEMPLATE =
"SELECT table_name FROM $TABLE$ WHERE store_name = ?";
private static final String DELETE_JOB_STORE_TEMPLATE =
"DELETE FROM $TABLE$ WHERE store_name = ?";
private static final String DELETE_JOB_STATE_TEMPLATE =
"DELETE FROM $TABLE$ WHERE store_name = ? AND table_name = ?";
private static final String CLONE_JOB_STATE_TEMPLATE =
"INSERT INTO $TABLE$(store_name, table_name, state)"
+ " (SELECT store_name, ?, state FROM $TABLE$ s WHERE"
+ " store_name = ? AND table_name = ?)"
+ " ON DUPLICATE KEY UPDATE state = s.state";
// MySQL key length limit is 767 bytes
private static final String CREATE_JOB_STATE_TABLE_TEMPLATE =
"CREATE TABLE IF NOT EXISTS $TABLE$ (store_name varchar(100) CHARACTER SET latin1 not null,"
+ "table_name varchar(667) CHARACTER SET latin1 not null,"
+ " modified_time timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,"
+ " state mediumblob, primary key(store_name, table_name))";
private final String UPSERT_JOB_STATE_SQL;
private final String SELECT_JOB_STATE_SQL;
private final String SELECT_JOB_STATE_WITH_LIKE_SQL;
private final String SELECT_JOB_STATE_EXISTS_SQL;
private final String SELECT_JOB_STATE_NAMES_SQL;
private final String DELETE_JOB_STORE_SQL;
private final String DELETE_JOB_STATE_SQL;
private final String CLONE_JOB_STATE_SQL;
/**
* Manages the persistence and retrieval of {@link State} in a MySQL database
* @param dataSource the {@link DataSource} object for connecting to MySQL
* @param stateStoreTableName the table for storing the state in rows keyed by two levels (store_name, table_name)
* @param compressedValues should values be compressed for storage?
* @param stateClass class of the {@link State}s stored in this state store
* @throws IOException
*/
public MysqlStateStore(DataSource dataSource, String stateStoreTableName, boolean compressedValues,
Class<T> stateClass) throws IOException {
this.dataSource = dataSource;
this.stateClass = stateClass;
this.compressedValues = compressedValues;
UPSERT_JOB_STATE_SQL = UPSERT_JOB_STATE_TEMPLATE.replace("$TABLE$", stateStoreTableName);
SELECT_JOB_STATE_SQL = SELECT_JOB_STATE_TEMPLATE.replace("$TABLE$", stateStoreTableName);
SELECT_JOB_STATE_WITH_LIKE_SQL = SELECT_JOB_STATE_WITH_LIKE_TEMPLATE.replace("$TABLE$", stateStoreTableName);
SELECT_JOB_STATE_EXISTS_SQL = SELECT_JOB_STATE_EXISTS_TEMPLATE.replace("$TABLE$", stateStoreTableName);
SELECT_JOB_STATE_NAMES_SQL = SELECT_JOB_STATE_NAMES_TEMPLATE.replace("$TABLE$", stateStoreTableName);
DELETE_JOB_STORE_SQL = DELETE_JOB_STORE_TEMPLATE.replace("$TABLE$", stateStoreTableName);
DELETE_JOB_STATE_SQL = DELETE_JOB_STATE_TEMPLATE.replace("$TABLE$", stateStoreTableName);
CLONE_JOB_STATE_SQL = CLONE_JOB_STATE_TEMPLATE.replace("$TABLE$", stateStoreTableName);
// create table if it does not exist
String createJobTable = CREATE_JOB_STATE_TABLE_TEMPLATE.replace("$TABLE$", stateStoreTableName);
try (Connection connection = dataSource.getConnection();
PreparedStatement createStatement = connection.prepareStatement(createJobTable)) {
createStatement.executeUpdate();
} catch (SQLException e) {
throw new IOException("Failure creation table " + stateStoreTableName, e);
}
}
/**
* creates a new {@link BasicDataSource}
* @param config the properties used for datasource instantiation
* @return
*/
public static BasicDataSource newDataSource(Config config) {
BasicDataSource basicDataSource = new BasicDataSource();
PasswordManager passwordManager = PasswordManager.getInstance(ConfigUtils.configToProperties(config));
basicDataSource.setDriverClassName(ConfigUtils.getString(config, ConfigurationKeys.STATE_STORE_DB_JDBC_DRIVER_KEY,
ConfigurationKeys.DEFAULT_STATE_STORE_DB_JDBC_DRIVER));
// MySQL server can timeout a connection so need to validate connections before use
basicDataSource.setValidationQuery("select 1");
basicDataSource.setTestOnBorrow(true);
basicDataSource.setDefaultAutoCommit(false);
basicDataSource.setTimeBetweenEvictionRunsMillis(60000);
basicDataSource.setUrl(config.getString(ConfigurationKeys.STATE_STORE_DB_URL_KEY));
basicDataSource.setUsername(passwordManager.readPassword(
config.getString(ConfigurationKeys.STATE_STORE_DB_USER_KEY)));
basicDataSource.setPassword(passwordManager.readPassword(
config.getString(ConfigurationKeys.STATE_STORE_DB_PASSWORD_KEY)));
basicDataSource.setMinEvictableIdleTimeMillis(
ConfigUtils.getLong(config, ConfigurationKeys.STATE_STORE_DB_CONN_MIN_EVICTABLE_IDLE_TIME_KEY,
ConfigurationKeys.DEFAULT_STATE_STORE_DB_CONN_MIN_EVICTABLE_IDLE_TIME));
return basicDataSource;
}
@Override
public boolean create(String storeName) throws IOException {
/* nothing to do since state will be stored as a new row in a DB table that has been validated */
return true;
}
@Override
public boolean create(String storeName, String tableName) throws IOException {
if (exists(storeName, tableName)) {
throw new IOException(String.format("State already exists for storeName %s tableName %s", storeName, tableName));
}
return true;
}
@Override
public boolean exists(String storeName, String tableName) throws IOException {
try (Connection connection = dataSource.getConnection();
PreparedStatement queryStatement = connection.prepareStatement(SELECT_JOB_STATE_EXISTS_SQL)) {
int index = 0;
queryStatement.setString(++index, storeName);
queryStatement.setString(++index, tableName);
try (ResultSet rs = queryStatement.executeQuery()) {
if (rs.next()) {
return true;
} else {
return false;
}
}
} catch (SQLException e) {
throw new IOException("Failure checking existence of storeName " + storeName + " tableName " + tableName, e);
}
}
/**
* Serializes the state to the {@link DataOutput}
* @param dataOutput output target receiving the serialized data
* @param state the state to serialize
* @throws IOException
*/
private void addStateToDataOutputStream(DataOutput dataOutput, T state) throws IOException {
new Text(Strings.nullToEmpty(state.getId())).write(dataOutput);
state.write(dataOutput);
}
@Override
public void put(String storeName, String tableName, T state) throws IOException {
putAll(storeName, tableName, Collections.singleton(state));
}
@Override
public void putAll(String storeName, String tableName, Collection<T> states) throws IOException {
try (Connection connection = dataSource.getConnection();
PreparedStatement insertStatement = connection.prepareStatement(UPSERT_JOB_STATE_SQL);
ByteArrayOutputStream byteArrayOs = new ByteArrayOutputStream();
OutputStream os = compressedValues ? new GZIPOutputStream(byteArrayOs) : byteArrayOs;
DataOutputStream dataOutput = new DataOutputStream(os)) {
int index = 0;
insertStatement.setString(++index, storeName);
insertStatement.setString(++index, tableName);
for (T state : states) {
addStateToDataOutputStream(dataOutput, state);
}
dataOutput.close();
insertStatement.setBlob(++index, new ByteArrayInputStream(byteArrayOs.toByteArray()));
insertStatement.executeUpdate();
connection.commit();
} catch (SQLException e) {
throw new IOException("Failure storing state to store " + storeName + " table " + tableName, e);
}
}
@Override
public T get(String storeName, String tableName, String stateId) throws IOException {
try (Connection connection = dataSource.getConnection();
PreparedStatement queryStatement = connection.prepareStatement(SELECT_JOB_STATE_SQL)) {
int index = 0;
queryStatement.setString(++index, storeName);
queryStatement.setString(++index, tableName);
try (ResultSet rs = queryStatement.executeQuery()) {
if (rs.next()) {
Blob blob = rs.getBlob(1);
Text key = new Text();
try (InputStream is = StreamUtils.isCompressed(blob.getBytes(1, 2)) ?
new GZIPInputStream(blob.getBinaryStream()) : blob.getBinaryStream();
DataInputStream dis = new DataInputStream(is)){
// keep deserializing while we have data
while (dis.available() > 0) {
T state = this.stateClass.newInstance();
key.readFields(dis);
state.readFields(dis);
if (key.toString().equals(stateId)) {
return state;
}
}
} catch (EOFException e) {
// no more data. GZIPInputStream.available() doesn't return 0 until after EOF.
}
}
}
} catch (RuntimeException e) {
throw e;
}catch (Exception e) {
throw new IOException("failure retrieving state from storeName " + storeName + " tableName " + tableName, e);
}
return null;
}
protected List<T> getAll(String storeName, String tableName, boolean useLike) throws IOException {
List<T> states = Lists.newArrayList();
try (Connection connection = dataSource.getConnection();
PreparedStatement queryStatement = connection.prepareStatement(useLike ?
SELECT_JOB_STATE_WITH_LIKE_SQL : SELECT_JOB_STATE_SQL)) {
queryStatement.setString(1, storeName);
queryStatement.setString(2, tableName);
try (ResultSet rs = queryStatement.executeQuery()) {
while (rs.next()) {
Blob blob = rs.getBlob(1);
Text key = new Text();
try (InputStream is = StreamUtils.isCompressed(blob.getBytes(1, 2)) ?
new GZIPInputStream(blob.getBinaryStream()) : blob.getBinaryStream();
DataInputStream dis = new DataInputStream(is)) {
// keep deserializing while we have data
while (dis.available() > 0) {
T state = this.stateClass.newInstance();
key.readString(dis);
state.readFields(dis);
states.add(state);
}
} catch (EOFException e) {
// no more data. GZIPInputStream.available() doesn't return 0 until after EOF.
}
}
}
} catch (RuntimeException re) {
throw re;
} catch (Exception e) {
throw new IOException("failure retrieving state from storeName " + storeName + " tableName " + tableName, e);
}
return states;
}
@Override
public List<T> getAll(String storeName, String tableName) throws IOException {
return getAll(storeName, tableName, false);
}
@Override
public List<T> getAll(String storeName) throws IOException {
return getAll(storeName, "%", true);
}
@Override
public List<String> getTableNames(String storeName, Predicate<String> predicate) throws IOException {
List<String> names = Lists.newArrayList();
try (Connection connection = dataSource.getConnection();
PreparedStatement queryStatement = connection.prepareStatement(SELECT_JOB_STATE_NAMES_SQL)) {
queryStatement.setString(1, storeName);
try (ResultSet rs = queryStatement.executeQuery()) {
while (rs.next()) {
String name = rs.getString(1);
if (predicate.apply(name)) {
names.add(name);
}
}
}
} catch (SQLException e) {
throw new IOException(String.format("Could not query table names for store %s", storeName), e);
}
return names;
}
@Override
public void createAlias(String storeName, String original, String alias) throws IOException {
if (!exists(storeName, original)) {
throw new IOException(String.format("State does not exist for table %s", original));
}
try (Connection connection = dataSource.getConnection();
PreparedStatement cloneStatement = connection.prepareStatement(CLONE_JOB_STATE_SQL)) {
int index = 0;
cloneStatement.setString(++index, alias);
cloneStatement.setString(++index, storeName);
cloneStatement.setString(++index, original);
cloneStatement.executeUpdate();
connection.commit();
} catch (SQLException e) {
throw new IOException(String.format("Failure creating alias for store %s original %s", storeName, original), e);
}
}
@Override
public void delete(String storeName, String tableName) throws IOException {
try (Connection connection = dataSource.getConnection();
PreparedStatement deleteStatement = connection.prepareStatement(DELETE_JOB_STATE_SQL)) {
int index = 0;
deleteStatement.setString(++index, storeName);
deleteStatement.setString(++index, tableName);
deleteStatement.executeUpdate();
connection.commit();
} catch (SQLException e) {
throw new IOException("failure deleting storeName " + storeName + " tableName " + tableName, e);
}
}
@Override
public void delete(String storeName) throws IOException {
try (Connection connection = dataSource.getConnection();
PreparedStatement deleteStatement = connection.prepareStatement(DELETE_JOB_STORE_SQL)) {
deleteStatement.setString(1, storeName);
deleteStatement.executeUpdate();
connection.commit();
} catch (SQLException e) {
throw new IOException("failure deleting storeName " + storeName, e);
}
}
}