package com.zillabyte.motherbrain.flow.heartbeats; import java.util.concurrent.Callable; import java.util.concurrent.ExecutorService; import java.util.concurrent.Future; import java.util.concurrent.ScheduledFuture; import com.zillabyte.motherbrain.flow.error.strategies.FakeLocalException; import com.zillabyte.motherbrain.flow.operations.Operation; import com.zillabyte.motherbrain.flow.operations.OperationException; import com.zillabyte.motherbrain.universe.Config; import com.zillabyte.motherbrain.utils.Log4jWrapper; import com.zillabyte.motherbrain.utils.MeteredLog; import com.zillabyte.motherbrain.utils.Utils; public class Heartbeat { public final static Long DEFAULT_POLL_INTERVAL_MS = 1000L * 10; public final static Long DEFAULT_KILL_INTERVAL_MS = 1000L * 35; final long DEFAULT_HEARTBEAT_INTERVAL_MS = Config.getOrDefault("heartbeat.poll.interval", DEFAULT_POLL_INTERVAL_MS); final long HEARTBEAT_KILL_MS = Config.getOrDefault("heartbeat.kill.interval", DEFAULT_KILL_INTERVAL_MS); private Operation _op; private long _tickInterval; private ScheduledFuture<?> _timer; private ScheduledFuture<?> _errorTimer; private Log4jWrapper _log; private long _ticks = 0L; private long _lastHeartbeat = System.currentTimeMillis(); private Exception _unhandledException = null; private Future<Void> _heavyHeartbeat; private ExecutorService _executor; /*** * * @param op */ public Heartbeat(Operation op, long tickInterval, ExecutorService exec) { this(op, exec); _tickInterval = tickInterval; } /*** * * @param op */ public Heartbeat(Operation op, ExecutorService exec) { _op = op; _log = new Log4jWrapper(Heartbeat.class, _op); _tickInterval = DEFAULT_HEARTBEAT_INTERVAL_MS; _executor = exec; } /** * @throws HeartbeatException * * */ public void start() throws HeartbeatException { // Sanity check _log.debug("Starting heartbeat..."); if (_timer != null) throw new HeartbeatException("timer already exists"); if (_errorTimer != null) throw new HeartbeatException("error timer already exists"); if (_tickInterval >= HEARTBEAT_KILL_MS) throw new IllegalStateException("tick interval cannot be larger than kill interval"); // Start polling... _timer = Utils.timerDedicated(_tickInterval, new Runnable() { @Override public void run() { try { tick(); } catch(Throwable t) { _log.error("uncaught heartbeat exception: " + t); } } }); _errorTimer = Utils.timerDedicated(_tickInterval, new Runnable() { @Override public void run() { errorTick(); } }); } /*** * */ public void shutdown() { _log.info(_op.instanceName() + " Shutting down heartbeat..."); _errorTimer.cancel(true); _heavyHeartbeat.cancel(true); _timer.cancel(true); } /*** * * @return * @throws HeartbeatException */ protected String getOperationState() throws HeartbeatException { return _op.getState(); } /*** * * @return */ public Exception maybeGetHeartbeatException() { if (this._timer.isDone() || this._timer.isCancelled()) { this._unhandledException = new HeartbeatException("Internal heartbeat timer is done/cancelled"); MeteredLog.info(_log, _op.instanceName() + " Heartbeat error: " + this._unhandledException); return this._unhandledException; } if (this._unhandledException != null) { // We encountered an exception... _log.error(_op.instanceName() + " Heartbeat error: " + this._unhandledException); return this._unhandledException; } else { // We've missed the heartbeat deadline ourselves? if (_lastHeartbeat + HEARTBEAT_KILL_MS < System.currentTimeMillis()) { this._unhandledException = new HeartbeatException("Internal heartbeat miss"); _log.error(_op.instanceName() + " Heartbeat error: " + this._unhandledException); return this._unhandledException; } } return null; } /** * @throws HeartbeatException * @throws InterruptedException * * */ public synchronized void tick() { try { // Init _ticks++; _lastHeartbeat = System.currentTimeMillis(); handleHeartbeat(); // Maybe exeute a heavy-heartbeat, which is basically chunks of code that can potentially // take a long time, so we don't block the main heartbeat. if (_heavyHeartbeat == null || _heavyHeartbeat.isDone()) { if (_heavyHeartbeat != null) _heavyHeartbeat.get(); // propagate exceptions _heavyHeartbeat = _executor.submit(new Callable<Void>() { @Override public Void call() throws Exception { handleActivityCheck(); _op.handleStats_ThreadUnsafe(); _op.handleCoordination_ThreadUnsafe(); _op.handlePostHeartbeat_ThreadUnsafe(); return null; } }); } else { _log.info("skipping heavy heartbeat because it's still executing..."); } } catch(Exception e) { // Inform user... e.printStackTrace(); _log.error("Ironic heartbeat error: " + e); _op.logger().logError(e); // Propagate error back to the operation... This will get rethrown on next iteration... _unhandledException = e; } } private void debug(String string) { // System.err.println(this._op.instanceName() + ": " + string); } public synchronized void errorTick() { // Init String state; try { state = getOperationState(); switch (state) { case "ERROR": // fall through case "KILLED": // fall through case "KILLING": // fall through return; default: _op.heartbeatErrorCheck_ThreadUnsafe(); } } catch (OperationException | InterruptedException | FakeLocalException | HeartbeatException e) { _log.warn("heartbeat exception in heartbeat error checking thread " + e); } } /** * @throws HeartbeatException * * */ private void handleActivityCheck() throws HeartbeatException { try { _op.handleActivityCheck_ThreadUnsafe(); } catch (Exception e) { throw new HeartbeatException(e); } } /** * @throws HeartbeatException ** * */ protected void handleHeartbeat() throws HeartbeatException { try { _op.sendMessageToFlow_ThreadUnsafe("state", getOperationState()); } catch (Exception e) { throw new HeartbeatException(e); } } /*** * * @param operation * @return * @throws HeartbeatException */ public static Heartbeat create(Operation operation, ExecutorService exec) throws HeartbeatException { Heartbeat hb = new Heartbeat(operation, exec); hb.start(); return hb; } /*** * * @return */ public boolean isRunning() { return _timer != null && !_timer.isDone(); } public long getTicks() { return _ticks; } public long getLastHeartbeat() { return _lastHeartbeat; } }