/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package gobblin.publisher;
import java.io.IOException;
import java.sql.Connection;
import java.sql.SQLException;
import java.util.Collection;
import java.util.List;
import java.util.Map;
import java.util.Set;
import javax.sql.DataSource;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Preconditions;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import com.google.common.collect.Sets;
import gobblin.configuration.ConfigurationKeys;
import gobblin.configuration.State;
import gobblin.configuration.WorkUnitState;
import gobblin.source.extractor.JobCommitPolicy;
import gobblin.util.ForkOperatorUtils;
import gobblin.util.jdbc.DataSourceBuilder;
import gobblin.writer.commands.JdbcWriterCommands;
import gobblin.writer.commands.JdbcWriterCommandsFactory;
/**
* Publishes data into JDBC RDBMS. Expects all the data has been already in staging table.
*/
public class JdbcPublisher extends DataPublisher {
public static final String JDBC_PUBLISHER_PREFIX = "jdbc.publisher.";
public static final String JDBC_PUBLISHER_DATABASE_NAME = JDBC_PUBLISHER_PREFIX + "database_name";
public static final String JDBC_PUBLISHER_FINAL_TABLE_NAME = JDBC_PUBLISHER_PREFIX + "table_name";
public static final String JDBC_PUBLISHER_REPLACE_FINAL_TABLE = JDBC_PUBLISHER_PREFIX + "replace_table";
public static final String JDBC_PUBLISHER_USERNAME = JDBC_PUBLISHER_PREFIX + "username";
public static final String JDBC_PUBLISHER_PASSWORD = JDBC_PUBLISHER_PREFIX + "password";
public static final String JDBC_PUBLISHER_ENCRYPTION_KEY_LOC = JDBC_PUBLISHER_PREFIX + "encrypt_key_loc";
public static final String JDBC_PUBLISHER_URL = JDBC_PUBLISHER_PREFIX + "url";
public static final String JDBC_PUBLISHER_TIMEOUT = JDBC_PUBLISHER_PREFIX + "timeout";
public static final String JDBC_PUBLISHER_DRIVER = JDBC_PUBLISHER_PREFIX + "driver";
private static final Logger LOG = LoggerFactory.getLogger(JdbcPublisher.class);
private final JdbcWriterCommandsFactory jdbcWriterCommandsFactory;
/**
* Expects all data is in staging table ready to be published. To validate this, it checks COMMIT_ON_FULL_SUCCESS and PUBLISH_DATA_AT_JOB_LEVEL
* @param state
* @param jdbcWriterCommandsFactory
* @param conn
*/
@VisibleForTesting
public JdbcPublisher(State state, JdbcWriterCommandsFactory jdbcWriterCommandsFactory) {
super(state);
this.jdbcWriterCommandsFactory = jdbcWriterCommandsFactory;
validate(getState());
}
public JdbcPublisher(State state) {
this(state, new JdbcWriterCommandsFactory());
validate(getState());
}
/**
* @param state
* @throws IllegalArgumentException If job commit policy is not COMMIT_ON_FULL_SUCCESS or is not on PUBLISH_DATA_AT_JOB_LEVEL
*/
private void validate(State state) {
JobCommitPolicy jobCommitPolicy = JobCommitPolicy.getCommitPolicy(this.getState().getProperties());
if (JobCommitPolicy.COMMIT_ON_FULL_SUCCESS != jobCommitPolicy) {
throw new IllegalArgumentException(this.getClass().getSimpleName()
+ " won't publish as already commited by task. Job commit policy " + jobCommitPolicy);
}
if (!state.getPropAsBoolean(ConfigurationKeys.PUBLISH_DATA_AT_JOB_LEVEL,
ConfigurationKeys.DEFAULT_PUBLISH_DATA_AT_JOB_LEVEL)) {
throw new IllegalArgumentException(this.getClass().getSimpleName() + " won't publish as "
+ ConfigurationKeys.PUBLISH_DATA_AT_JOB_LEVEL + " is set as false");
}
}
@VisibleForTesting
public Connection createConnection() {
DataSource dataSource = DataSourceBuilder.builder().url(this.state.getProp(JDBC_PUBLISHER_URL))
.driver(this.state.getProp(JDBC_PUBLISHER_DRIVER)).userName(this.state.getProp(JDBC_PUBLISHER_USERNAME))
.passWord(this.state.getProp(JDBC_PUBLISHER_PASSWORD))
.cryptoKeyLocation(this.state.getProp(JDBC_PUBLISHER_ENCRYPTION_KEY_LOC)).maxActiveConnections(1)
.maxIdleConnections(1).state(this.state).build();
try {
return dataSource.getConnection();
} catch (SQLException e) {
throw new RuntimeException(e);
}
}
@Override
public void close() throws IOException {}
@Override
public void initialize() throws IOException {}
/**
* 1. Truncate destination table if requested
* 2. Move data from staging to destination
* 3. Update Workunit state
*
* TODO: Research on running this in parallel. While testing publishing it in parallel, it turns out delete all from the table locks the table
* so that copying table threads wait until transaction lock times out and throwing exception(MySQL). Is there a way to avoid this?
*
* {@inheritDoc}
* @see gobblin.publisher.DataPublisher#publishData(java.util.Collection)
*/
@Override
public void publishData(Collection<? extends WorkUnitState> states) throws IOException {
LOG.info("Start publishing data");
int branches = this.state.getPropAsInt(ConfigurationKeys.FORK_BRANCHES_KEY, 1);
Set<String> emptiedDestTables = Sets.newHashSet();
final Connection conn = createConnection();
final JdbcWriterCommands commands = this.jdbcWriterCommandsFactory.newInstance(this.state, conn);
try {
conn.setAutoCommit(false);
for (int i = 0; i < branches; i++) {
final String destinationTable = this.state
.getProp(ForkOperatorUtils.getPropertyNameForBranch(JDBC_PUBLISHER_FINAL_TABLE_NAME, branches, i));
final String databaseName =
this.state.getProp(ForkOperatorUtils.getPropertyNameForBranch(JDBC_PUBLISHER_DATABASE_NAME, branches, i));
Preconditions.checkNotNull(destinationTable);
if (this.state.getPropAsBoolean(
ForkOperatorUtils.getPropertyNameForBranch(JDBC_PUBLISHER_REPLACE_FINAL_TABLE, branches, i), false)
&& !emptiedDestTables.contains(destinationTable)) {
LOG.info("Deleting table " + destinationTable);
commands.deleteAll(databaseName, destinationTable);
emptiedDestTables.add(destinationTable);
}
Map<String, List<WorkUnitState>> stagingTables = getStagingTables(states, branches, i);
for (Map.Entry<String, List<WorkUnitState>> entry : stagingTables.entrySet()) {
String stagingTable = entry.getKey();
LOG.info("Copying data from staging table " + stagingTable + " into destination table " + destinationTable);
commands.copyTable(databaseName, stagingTable, destinationTable);
for (WorkUnitState workUnitState : entry.getValue()) {
workUnitState.setWorkingState(WorkUnitState.WorkingState.COMMITTED);
}
}
}
LOG.info("Commit publish data");
conn.commit();
} catch (Exception e) {
try {
LOG.error("Failed publishing. Rolling back.");
conn.rollback();
} catch (SQLException se) {
LOG.error("Failed rolling back.", se);
}
throw new RuntimeException("Failed publishing", e);
} finally {
try {
conn.close();
} catch (SQLException e) {
throw new RuntimeException(e);
}
}
}
private static Map<String, List<WorkUnitState>> getStagingTables(Collection<? extends WorkUnitState> states,
int branches, int i) {
Map<String, List<WorkUnitState>> stagingTables = Maps.newHashMap();
for (WorkUnitState workUnitState : states) {
String stagingTableKey =
ForkOperatorUtils.getPropertyNameForBranch(ConfigurationKeys.WRITER_STAGING_TABLE, branches, i);
String stagingTable = Preconditions.checkNotNull(workUnitState.getProp(stagingTableKey));
List<WorkUnitState> existing = stagingTables.get(stagingTable);
if (existing == null) {
existing = Lists.newArrayList();
stagingTables.put(stagingTable, existing);
}
existing.add(workUnitState);
}
return stagingTables;
}
@Override
public void publishMetadata(Collection<? extends WorkUnitState> states) throws IOException {}
}