/* * Copyright 2002-2011 the original author or authors. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.springframework.integration.cluster; import java.util.Date; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.springframework.context.SmartLifecycle; import org.springframework.dao.DuplicateKeyException; import org.springframework.dao.EmptyResultDataAccessException; import org.springframework.transaction.annotation.Transactional; import com.springsource.insight.annotation.InsightEndPoint; /** * Implementation of {@link ClusterControl} using a row in a database * table as a global lock across nodes. Ensures that one and only one * node is active; allows exactly one non-master node to take over * if necessary. * * @author Gary Russell * */ public class ClusterControlImpl implements ClusterControl, SmartLifecycle { private ControlBusGateway controlBusGateway; private HeartbeatGateway<Heartbeat> heartbeatGateway; private ClusterStatusRepository clusterStatusRepository; private final String applicationId; private final String application; private final boolean singleSource; private final String adapterName; private final String member; private volatile long monitorInterval; private volatile long keepaliveInterval; private volatile boolean ranOnce; private volatile Date lastMonitor = new Date(0); private volatile Date lastMessage = new Date(0); private volatile long lastUpdate; private volatile Date lastHeartbeat = new Date(0); private volatile boolean master; private volatile boolean running; private volatile boolean pausing; private volatile boolean paused; private volatile int messageCount; private Log logger = LogFactory.getLog(getClass()); public ClusterControlImpl(String applicationId, boolean singleSource, String member, String adapterName, long monitorInterval, long keepaliveInterval, ControlBusGateway controlBusGateway, HeartbeatGateway<Heartbeat> heartbeatGateway, ClusterStatusRepository clusterStatusRepository) { this.singleSource = singleSource; this.member = member; this.application = applicationId; if (singleSource) { this.applicationId = applicationId; } else { this.applicationId = applicationId + ":" + member; } this.adapterName = adapterName; this.monitorInterval = monitorInterval; this.keepaliveInterval = keepaliveInterval; this.controlBusGateway = controlBusGateway; this.heartbeatGateway = heartbeatGateway; this.clusterStatusRepository = clusterStatusRepository; } @Transactional public boolean verifyStatus(boolean heartbeat) { Long now = System.currentTimeMillis(); ClusterStatus clusterStatus = null; if (!paused && !heartbeat && now - this.lastUpdate < this.keepaliveInterval) { this.messageCount++; return true; } try { clusterStatus = this.lockClusterStatus(); if (checkPaused(clusterStatus)) { return false; } if (this.singleSource) { boolean result = updateLastProcessedOrStop(clusterStatus); this.lastUpdate = now; if (!heartbeat && result) { this.messageCount++; } return result; } else { this.updateLastProcessed(clusterStatus); this.messageCount++; return true; } } catch (Exception e) { if (this.singleSource) { logger.error("Cluster Control Failure; emergency stopping adapter", e); this.stopInbound(); this.master = false; return false; } else { logger.error("Cluster Control Exception", e); return true; } } finally { this.lastMessage = new Date(now); if (heartbeat) { this.lastHeartbeat = this.lastMessage; } if (clusterStatus != null) { this.clusterStatusRepository.unlock(applicationId); } } } /** * @param clusterStatus */ private boolean checkPaused(ClusterStatus clusterStatus) { if (ClusterStatus.STATUS_PAUSED.equals(clusterStatus.getStatus())) { if (this.singleSource) { logger.warn("Application is PAUSED " + clusterStatus.toString()); } else { logger.warn("Instance is PAUSED " + clusterStatus.toString()); } this.stopInbound(); this.paused = true; this.pausing = false; return true; } return false; } private boolean updateLastProcessedOrStop(ClusterStatus clusterStatus) { if (this.member.equals(clusterStatus.getCurrentMaster())) { if (logger.isDebugEnabled()) { if (clusterStatus.getPendingUsurper() != null && !("".equals(clusterStatus.getPendingUsurper()))) { logger.debug("Removing pending usurper " + clusterStatus.getPendingUsurper()); } } updateLastProcessed(clusterStatus); return true; } else { if (logger.isDebugEnabled()) { logger.debug("Current master is " + clusterStatus.getCurrentMaster()); } this.stopInbound(); return false; } } /** * @param clusterStatus */ private void updateLastProcessed(ClusterStatus clusterStatus) { clusterStatus.setLastProcessed(new Date()); clusterStatus.setPendingUsurper(""); this.clusterStatusRepository.updateLastProcessed(clusterStatus); logger.trace("Updated Last Processed"); } public void sendHeartbeat() { Heartbeat heartbeat = new Heartbeat(); if (logger.isDebugEnabled()) { logger.debug("Sending heartbeat " + heartbeat); } this.heartbeatGateway.sendHeartbeat(heartbeat); } public void stopInbound() { logger.debug("Stopping adapter"); this.controlBusGateway.sendCommand("@'" + this.adapterName + "'.stop()"); } public void startInbound() { if (this.singleSource && !this.master) { throw new RuntimeException("Cannot start adapter - single source application " + "and this is not the master - " + this.clusterStatusRepository.find(this.applicationId)); } this.startInboundInternal(); } private void startInboundInternal() { logger.debug("Starting adapter"); this.controlBusGateway.sendCommand("@'" + this.adapterName + "'.start()"); } @Transactional(timeout=30) @InsightEndPoint public void doMonitor() { if (!this.isRunning()) { return; } ClusterStatus clusterStatus = null; try { clusterStatus = lockClusterStatus(); if (checkPaused(clusterStatus)) { return; } if (this.singleSource) { monitorSingleSource(clusterStatus); } else { if (this.paused) { this.doResume(clusterStatus); } } this.paused = false; } finally { if (clusterStatus != null) { this.clusterStatusRepository.unlock(applicationId); } } } private void doResume(ClusterStatus clusterStatus) { if (this.singleSource) { logger.info("Application RESUMED " + clusterStatus.toString()); } else { logger.info("Instance RESUMED " + clusterStatus.toString()); } this.startInboundInternal(); } /** * Ensures at most one instance is active. * * @param clusterStatus */ private void monitorSingleSource(ClusterStatus clusterStatus) { String currentMaster = clusterStatus.getCurrentMaster(); if (!this.ranOnce) { this.ranOnce = true; if ("".equals(currentMaster)) { // nobody is master, preemptive acquisition clusterStatus.setCurrentMaster(this.member); this.startInboundInternal(); } else if (this.member.equals(currentMaster)) { this.startInboundInternal(); } // if we're the master, start the clock so // we don't immediately think we're delinquent this.updateLastProcessedOrStop(clusterStatus); } Date now = new Date(); if (this.member.equals(currentMaster)) { this.master = true; checkMyHealth(clusterStatus, now); } else { if (this.master) { logger.error("Master status has been lost to " + currentMaster); this.stopInbound(); } this.master = false; checkMasterHealth(clusterStatus, now); } this.lastMonitor = now; } private ClusterStatus lockClusterStatus() { ClusterStatus clusterStatus = null; try { clusterStatus = this.clusterStatusRepository.lock(this.applicationId); } catch (EmptyResultDataAccessException e) {} if (clusterStatus == null) { try { this.clusterStatusRepository.create(new ClusterStatus(this.applicationId, this.member)); } catch (DuplicateKeyException e) { logger.info("Lost race to become first master"); } clusterStatus = this.clusterStatusRepository.lock(this.applicationId); } return clusterStatus; } private void checkMasterHealth(ClusterStatus clusterStatus, Date now) { long threshold = calcThreshold(now, this.monitorInterval, 1); long lastProcessed = clusterStatus.getLastProcessed().getTime(); if (lastProcessed >= threshold) { if (logger.isDebugEnabled()) { logger.debug("Master (" + clusterStatus.getCurrentMaster() + ") processing OK"); } return; } threshold = calcThreshold(now, this.monitorInterval, 2); if (lastProcessed < threshold) { if (logger.isDebugEnabled()) { logger.debug("Master (" + clusterStatus.getCurrentMaster() + ") not processed for " + ((now.getTime() - lastProcessed) / 1000) + " seconds - considering acquisition"); } if (this.member.equals(clusterStatus.getPendingUsurper())) { threshold = calcThreshold(now, this.monitorInterval, 1); if (clusterStatus.getUsurpTimestamp().getTime() < threshold) { logger.warn("Taking over as cluster master " + clusterStatus); clusterStatus.setPendingUsurper(""); clusterStatus.setUsurpTimestamp(now); clusterStatus.setCurrentMaster(this.member); this.clusterStatusRepository.updateMaster(clusterStatus); this.master = true; this.startInboundInternal(); this.sendHeartbeat(); logger.warn("Taken over as cluster master and sent heartbeat " + clusterStatus); } } else if (clusterStatus.getPendingUsurper() == null || clusterStatus.getPendingUsurper().equals("") || clusterStatus.getUsurpTimestamp().getTime() < calcThreshold(now, this.monitorInterval, 3)) { logger.debug("Beginning usurp " + clusterStatus); clusterStatus.setPendingUsurper(this.member); clusterStatus.setUsurpTimestamp(now); this.clusterStatusRepository.updateUsurper(clusterStatus); } else { logger.debug("Another usurper in process of acquisition " + clusterStatus); } } else { if (logger.isDebugEnabled()) { logger.debug("Master (" + clusterStatus.getCurrentMaster() + ") not processed for " + ((now.getTime() - lastProcessed) / 1000) + " seconds"); } } } private void checkMyHealth(ClusterStatus clusterStatus, Date now) { long threshold = calcThreshold(now, this.keepaliveInterval, 1); long lastProcessed = clusterStatus.getLastProcessed().getTime(); if (lastProcessed >= threshold) { if (logger.isDebugEnabled()) { logger.debug("Master (me: " + clusterStatus.getCurrentMaster() + ") processing OK"); } return; } threshold = calcThreshold(now, this.keepaliveInterval, 3); if (lastProcessed >= threshold) { if (logger.isDebugEnabled()) { logger.debug("Master (me: " + clusterStatus.getCurrentMaster() + ") not processed for " + ((now.getTime() - lastProcessed) / 1000) + " seconds - sending heartbeat"); } this.sendHeartbeat(); return; } // missed 3 intervals - shut down logger.error("Master (me: " + clusterStatus.getCurrentMaster() + ") not processed for " + ((now.getTime() - lastProcessed) / 1000) + " seconds - relinquishing mastership"); this.stopInbound(); this.master = false; ClusterStatus newClusterStatus = this.clusterStatusRepository.find(applicationId); if (this.member.equals(newClusterStatus.getCurrentMaster())) { clusterStatus.setCurrentMaster(""); clusterStatus.setUsurpTimestamp(new Date()); this.clusterStatusRepository.updateMaster(clusterStatus); } } private long calcThreshold(Date now, long interval, int multiplier) { return now.getTime() - interval * multiplier; } public int getMessageCount() { return this.messageCount; } /** * @return the time since last monitor in seconds */ public float getTimeSinceLastMonitor() { return ((float)(System.currentTimeMillis() - this.lastMonitor.getTime())) / 1000; } /** * @return the time since last message in seconds */ public float getTimeSinceLastMessage() { return ((float)(System.currentTimeMillis() - this.lastMessage.getTime())) / 1000; } /** * @return the time since last hearbeat in seconds */ public float getTimeSinceHearbeat() { return ((float)(System.currentTimeMillis() - this.lastHeartbeat.getTime())) / 1000; } /** * @return true if this instance is the master */ public boolean isMaster() { return this.master; } /** * @return true if this is a single-source application */ public boolean isSingleSource() { return singleSource; } public String getApplicationId() { return this.applicationId; } public void start() { this.running = true; } public void stop() { this.running = false; this.stopInbound(); } public boolean isRunning() { return this.running; } public int getPhase() { return Integer.MAX_VALUE; } public boolean isAutoStartup() { return true; } public void stop(Runnable callback) { this.stop(); callback.run(); } public boolean isPaused() { return this.paused; } @Transactional public String pause() { if (this.pausing) { return "Already pausing"; } if (this.paused) { return "Already paused"; } this.pausing = true; this.stopInbound(); try { ClusterStatus clusterStatus = lockClusterStatus(); clusterStatus.setStatus(ClusterStatus.STATUS_PAUSED); this.clusterStatusRepository.updateLastProcessed(clusterStatus); if (this.singleSource) { logger.info("Pausing application"); return "Pausing application"; } else { logger.info("Pausing instance"); return "Pausing instance"; } } catch (Throwable t) { this.pausing = false; logger.error(t); return t.getMessage(); } } @Transactional public String resume() { if (!this.paused) { return "Not paused"; } try { ClusterStatus clusterStatus = lockClusterStatus(); clusterStatus.setStatus(ClusterStatus.STATUS_RUNNING); this.clusterStatusRepository.updateLastProcessed(clusterStatus); if (this.singleSource) { logger.info("Resuming application"); return "Resuming application"; } else { logger.info("Resuming instance"); return "Resuming instance"; } } catch (Throwable t) { logger.error(t); return t.getMessage(); } } public String obtainApplicationStatus() { try { ClusterStatus clusterStatus = this.clusterStatusRepository.find(this.applicationId); return clusterStatus.toString() + " single-source:" + this.singleSource + " paused:" + this.paused; } catch (Throwable t) { logger.error(t); return t.getMessage(); } } public String pauseAll() { try { if (this.singleSource) { return "Single source application - use pause()"; } int n = this.clusterStatusRepository.updateStatusAll(this.application, ClusterStatus.STATUS_PAUSED); return "All instances will be paused (" + n + " rows updated)"; } catch (Throwable t) { logger.error(t); return t.getMessage(); } } public String resumeAll() { if (this.singleSource) { return "Single source application - use resume()"; } try { int n = this.clusterStatusRepository.updateStatusAll(this.application, ClusterStatus.STATUS_RUNNING); return "All instances will be resumed (" + n + " rows updated)"; } catch (Throwable t) { logger.error(t); return t.getMessage(); } } }