/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.datastax.drivers.jdbc.pool.cassandra.connection;
import java.sql.SQLException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ConcurrentMap;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.datastax.drivers.jdbc.pool.cassandra.connection.CassandraClientMonitor.Counter;
import com.datastax.drivers.jdbc.pool.cassandra.exceptions.HectorException;
import com.datastax.drivers.jdbc.pool.cassandra.jdbc.CassandraConnectionHandle;
import com.datastax.drivers.jdbc.pool.cassandra.service.ExceptionsTranslator;
import com.datastax.drivers.jdbc.pool.cassandra.service.ExceptionsTranslatorImpl;
import com.datastax.drivers.jdbc.pool.cassandra.service.FailoverPolicy;
import com.datastax.drivers.jdbc.pool.cassandra.service.JmxMonitor;
import com.datastax.drivers.jdbc.pool.cassandra.service.Operation;
import com.datastax.drivers.jdbc.pool.cassandra.service.OperationType;
public class HConnectionManager {
private static final Logger log = LoggerFactory.getLogger(HConnectionManager.class);
private final ConcurrentMap<CassandraHost,HClientPool> hostPools;
private final ConcurrentMap<CassandraHost,HClientPool> suspendedHostPools;
private final Collection<HClientPool> hostPoolValues;
private final String clusterName;
private final LoadBalancingPolicy loadBalancingPolicy;
private final CassandraHostConfigurator cassandraHostConfigurator;
private final CassandraClientMonitor monitor;
final ExceptionsTranslator exceptionsTranslator;
private CassandraHostRetryService cassandraHostRetryService;
private NodeAutoDiscoverService nodeAutoDiscoverService;
private HostTimeoutTracker hostTimeoutTracker;
private HOpTimer timer;
private FailoverPolicy failoverPolicy;
public HConnectionManager(String clusterName, CassandraHostConfigurator cassandraHostConfigurator) {
loadBalancingPolicy = cassandraHostConfigurator.getLoadBalancingPolicy();
hostPools = new ConcurrentHashMap<CassandraHost, HClientPool>();
suspendedHostPools = new ConcurrentHashMap<CassandraHost, HClientPool>();
this.clusterName = clusterName;
if ( cassandraHostConfigurator.getRetryDownedHosts() ) {
cassandraHostRetryService = new CassandraHostRetryService(this, cassandraHostConfigurator);
}
for ( CassandraHost host : cassandraHostConfigurator.buildCassandraHosts()) {
try {
HClientPool hcp = loadBalancingPolicy.createConnection(host);
hostPools.put(host,hcp);
} catch (SQLException e) {
log.error("Could not start connection pool for host {}", host);
if ( cassandraHostRetryService != null ) {
cassandraHostRetryService.add(host);
}
}
}
if ( cassandraHostConfigurator.getUseHostTimeoutTracker() ) {
hostTimeoutTracker = new HostTimeoutTracker(this, cassandraHostConfigurator);
}
monitor = JmxMonitor.getInstance().getCassandraMonitor(this);
exceptionsTranslator = new ExceptionsTranslatorImpl();
this.cassandraHostConfigurator = cassandraHostConfigurator;
hostPoolValues = hostPools.values();
/*
if ( cassandraHostConfigurator.getAutoDiscoverHosts() ) {
nodeAutoDiscoverService = new NodeAutoDiscoverService(this, cassandraHostConfigurator);
if ( cassandraHostConfigurator.getRunAutoDiscoveryAtStartup() ) {
nodeAutoDiscoverService.doAddNodes();
}
}
*/
timer = cassandraHostConfigurator.getOpTimer();
failoverPolicy = cassandraHostConfigurator.getFailoverPolicy();
}
/**
* Returns true if the host was successfully added. In any sort of failure exceptions are
* caught and logged, returning false.
* @param cassandraHost
* @return
*/
public boolean addCassandraHost(CassandraHost cassandraHost) {
if ( !getHosts().contains(cassandraHost) ) {
HClientPool pool = null;
try {
cassandraHostConfigurator.applyConfig(cassandraHost);
pool = cassandraHostConfigurator.getLoadBalancingPolicy().createConnection(cassandraHost);
hostPools.putIfAbsent(cassandraHost, pool);
log.info("Added host {} to pool", cassandraHost.getName());
return true;
} catch (SQLException ex) {
log.error("General exception host to HConnectionManager: " + cassandraHost, ex);
}
} else {
log.info("Host already existed for pool {}", cassandraHost.getName());
}
return false;
}
/**
* Remove the {@link CassandraHost} from the pool, bypassing retry service. This
* would be called on a host that is known to be going away. Gracefully shuts down
* the underlying connections via {@link HClientPool#shutdown()}. This method
* will also:
* <ul>
* <li>shutdown pools in the suspended state, removing them from the underlying
* suspended map.</li>
* <li>remove hosts from {@link CassandraHostRetryService} if contained therein</li></ul>
*
* @param cassandraHost
*/
public boolean removeCassandraHost(CassandraHost cassandraHost) {
boolean removed = getHosts().contains(cassandraHost);
if ( removed ) {
HClientPool pool = hostPools.remove(cassandraHost);
if ( pool == null ) {
log.info("removeCassandraHost looking for host {} in suspendedHostPools", cassandraHost);
pool = suspendedHostPools.remove(cassandraHost);
}
if ( pool != null ) {
pool.shutdown();
} else {
removed = false;
log.info("removeCassandraHost attempt miss for CassandraHost {} May have been beaten by another thread?", cassandraHost);
}
} else if ( cassandraHostRetryService != null && cassandraHostRetryService.contains(cassandraHost)) {
log.info("Host {} not in active pools, but found in retry service.", cassandraHost);
removed = cassandraHostRetryService.remove(cassandraHost);
} else {
log.info("Remove requested on a host that was not found in active or disabled pools: {}", cassandraHost);
}
log.info("Remove status for CassandraHost pool {} was {}", cassandraHost, removed);
return removed;
}
/**
* Remove the {@link HClientPool} referenced by the {@link CassandraHost} from
* the active host pools. This does not shut down the pool, only removes it as a candidate from
* future operations.
* @param cassandraHost
* @return true if the operation was successful.
*/
public boolean suspendCassandraHost(CassandraHost cassandraHost) {
HClientPool pool = hostPools.remove(cassandraHost);
boolean removed = pool != null;
if ( removed ) {
suspendedHostPools.put(cassandraHost, pool);
}
log.info("Suspend operation status was {} for CassandraHost {}", removed, cassandraHost);
return removed;
}
/**
* The opposite of suspendCassandraHost, places the pool back into selection
* @param cassandraHost
* @return true if this operation was successful. A no-op returning false
* if there was no such host in the underlying suspendedHostPool map.
*/
public boolean unsuspendCassandraHost(CassandraHost cassandraHost) {
HClientPool pool = suspendedHostPools.remove(cassandraHost);
boolean readded = pool != null;
if ( readded ) {
boolean alreadyThere = hostPools.putIfAbsent(cassandraHost, pool) != null;
if ( alreadyThere ) {
log.error("Unsuspend called on a pool that was already active for CassandraHost {}", cassandraHost);
pool.shutdown();
}
}
log.info("UN-Suspend operation status was {} for CassandraHost {}", readded, cassandraHost);
return readded;
}
/**
* Returns a Set of {@link CassandraHost} which are in the suspended status
* @return
*/
public Set<CassandraHost> getSuspendedCassandraHosts() {
return suspendedHostPools.keySet();
}
public Set<CassandraHost> getHosts() {
return Collections.unmodifiableSet(hostPools.keySet());
}
public List<String> getStatusPerPool() {
List<String> stats = new ArrayList<String>();
for (HClientPool clientPool : hostPools.values()) {
stats.add(clientPool.getStatusAsString());
}
return stats;
}
/**
* Borrow a client using the failover mechanism.
*/
public CassandraConnectionHandle borrowClient() throws SQLException {
Operation<CassandraConnectionHandle> op = new Operation<CassandraConnectionHandle>(OperationType.BORROW_CLIENT) {
@Override
public CassandraConnectionHandle execute(CassandraConnectionHandle connection) throws SQLException {
return connection;
}
@Override
public void prepareForFailover(CassandraConnectionHandle connection) throws SQLException {
// NO-OP
}
};
this.operateWithFailover(op);
op.getResult().setManager(this);
return op.getResult();
}
public void operateWithFailover(Operation<?> op) throws SQLException {
final Object timerToken = timer.start();
int retries = Math.min(failoverPolicy.numRetries, hostPools.size());
HClientPool pool = null;
boolean success = false;
boolean retryable = false;
boolean firstTime = true;
CassandraConnectionHandle currentConnection = op.getConnection();
Set<CassandraHost> excludeHosts = new HashSet<CassandraHost>();
while ( !success ) {
try {
// Let's not borrow a connection the first time for regular operation since JDBC approach starts by
// the client acquiring a connection. Except for when we are only acquiring a connection through the
// failover mechanism OperationType.BORROW_CLIENT).
if (op.operationType == OperationType.BORROW_CLIENT || !firstTime) {
// Try a new host/connection
pool = getClientFromLBPolicy(excludeHosts);
currentConnection = (CassandraConnectionHandle) pool.borrowClient();
currentConnection.setManager(this);
// Set the new connection
op.setConnection(currentConnection);
}
if (!firstTime)
op.prepareForFailover(currentConnection);
firstTime = false;
op.executeAndSetResult(currentConnection);
success = true;
timer.stop(timerToken, op.stopWatchTagName, true);
break;
} catch (Exception ex) {
if ( exceptionsTranslator.isUnrecoverable(ex)) {
// break out on HUnavailableException as well since we can no longer satisfy the CL
throw (SQLException) ex;
} else if (exceptionsTranslator.hasTimedout(ex)) {
// DO NOT decrement retries, we will be keep retrying on timeouts until it comes back
// if HLT.checkTimeout(cassandraHost): suspendHost(cassandraHost);
doTimeoutCheck(pool.getCassandraHost());
retryable = true;
monitor.incCounter(Counter.RECOVERABLE_TIMED_OUT_EXCEPTIONS);
currentConnection.close();
// TODO timecheck on how long we've been waiting on timeouts here
// suggestion per user moores on hector-users
} else if (exceptionsTranslator.isATransportError(ex)) {
// client can be null in this situation
if ( currentConnection != null ) {
currentConnection.close();
}
markHostAsDown(pool.getCassandraHost());
excludeHosts.add(pool.getCassandraHost());
retryable = true;
monitor.incCounter(Counter.RECOVERABLE_TRANSPORT_EXCEPTIONS);
} else if (exceptionsTranslator.isPoolExhausted(ex)) {
retryable = true;
if ( hostPools.size() == 1 ) {
throw new SQLException(ex);
}
monitor.incCounter(Counter.POOL_EXHAUSTED);
excludeHosts.add(pool.getCassandraHost());
} else {
// something strange happened. Added here as suggested by sbridges.
// I think this gives a sane way to future-proof against any API additions
// that we don't add in time.
retryable = false;
}
if ( retries <= 0 || retryable == false) {
if (ex instanceof SQLException)
throw (SQLException) ex;
else
throw new SQLException(ex);
}
log.warn("Could not fullfill request on this host {}", pool.getCassandraHost());
log.warn("Exception: ", ex);
monitor.incCounter(Counter.SKIP_HOST_SUCCESS);
sleepBetweenHostSkips(failoverPolicy);
} finally {
--retries;
if ( !success ) {
monitor.incCounter(op.failCounter);
timer.stop(timerToken, op.stopWatchTagName, false);
}
releaseClient(currentConnection);
}
}
}
public HOpTimer getTimer() {
return timer;
}
public void setTimer(HOpTimer timer) {
this.timer = timer;
}
/**
* Use the HostTimeoutCheck and initiate a suspend if and only if
* we are configured for such AND there is more than one operating host pool
* @param cassandraHost
*/
private void doTimeoutCheck(CassandraHost cassandraHost) {
if ( hostTimeoutTracker != null && hostPools.size() > 1) {
if (hostTimeoutTracker.checkTimeout(cassandraHost) ) {
suspendCassandraHost(cassandraHost);
}
}
}
/**
* Sleeps for the specified time as determined by sleepBetweenHostsMilli.
* In many cases failing over to other hosts is done b/c the cluster is too busy, so the sleep b/w
* hosts may help reduce load on the cluster.
*/
private void sleepBetweenHostSkips(FailoverPolicy failoverPolicy) {
if (failoverPolicy.sleepBetweenHostsMilli > 0) {
if ( log.isDebugEnabled() ) {
log.debug("Will sleep for {} millisec", failoverPolicy.sleepBetweenHostsMilli);
}
try {
Thread.sleep(failoverPolicy.sleepBetweenHostsMilli);
} catch (InterruptedException e) {
log.warn("Sleep between hosts interrupted", e);
}
}
}
private HClientPool getClientFromLBPolicy(Set<CassandraHost> excludeHosts) {
if ( hostPools.isEmpty() ) {
throw new HectorException("All host pools marked down. Retry burden pushed out to client.");
}
return loadBalancingPolicy.getPool(hostPoolValues, excludeHosts);
}
public void releaseClient(CassandraConnectionHandle connectionHandle) throws SQLException {
if (connectionHandle == null ) return;
if (connectionHandle.isClosed) return;
HClientPool pool = hostPools.get(connectionHandle.getCassandraHost());
if ( pool == null ) {
pool = suspendedHostPools.get(connectionHandle.getCassandraHost());
}
if ( pool != null ) {
pool.releaseClient(connectionHandle);
} else {
log.info("Client {} released to inactive or dead pool. Closing.", connectionHandle.getCassandraHost());
closeQuietly(connectionHandle);
}
connectionHandle.isClosed = true;
}
private void closeQuietly(CassandraConnectionHandle connectionHandle) {
try {
connectionHandle.getInternalConnection().close();
} catch (SQLException e) {
log.info("Unexpected error while closing the connection: " + connectionHandle.getCassandraHost());
}
}
void markHostAsDown(CassandraHost cassandraHost) {
log.error("MARK HOST AS DOWN TRIGGERED for host {}", cassandraHost.getName());
HClientPool pool = hostPools.remove(cassandraHost);
if ( pool != null ) {
log.error("Pool state on shutdown: {}", pool.getStatusAsString());
pool.shutdown();
if ( cassandraHostRetryService != null )
cassandraHostRetryService.add(cassandraHost);
}
}
public Set<CassandraHost> getDownedHosts() {
return cassandraHostRetryService.getDownedHosts();
}
public Collection<HClientPool> getActivePools() {
return Collections.unmodifiableCollection(hostPools.values());
}
public String getClusterName() {
return clusterName;
}
public void shutdown() {
log.info("Shutdown called on HConnectionManager");
if ( cassandraHostRetryService != null )
cassandraHostRetryService.shutdown();
if ( nodeAutoDiscoverService != null )
nodeAutoDiscoverService.shutdown();
if ( hostTimeoutTracker != null )
hostTimeoutTracker.shutdown();
for (HClientPool pool : hostPools.values()) {
try {
pool.shutdown();
} catch (IllegalArgumentException iae) {
log.error("Out of order in HConnectionManager shutdown()?: {}", iae.getMessage());
}
}
}
}