package com.zillabyte.motherbrain.flow.operations; import java.io.IOException; import java.io.Serializable; import java.util.Collection; import java.util.Collections; import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.NoSuchElementException; import java.util.Set; import java.util.concurrent.Callable; import java.util.concurrent.ExecutionException; import java.util.concurrent.ExecutorService; import java.util.concurrent.TimeoutException; import java.util.concurrent.atomic.AtomicInteger; import net.sf.json.JSONObject; import org.apache.commons.lang.SerializationUtils; import org.codehaus.plexus.util.ExceptionUtils; import org.eclipse.jdt.annotation.NonNullByDefault; import org.javatuples.Triplet; import com.google.common.collect.Lists; import com.google.common.collect.Maps; import com.google.common.collect.Sets; import com.google.monitoring.runtime.instrumentation.common.com.google.common.collect.LinkedListMultimap; import com.zillabyte.motherbrain.benchmarking.Benchmark; import com.zillabyte.motherbrain.coordination.CoordinationException; import com.zillabyte.motherbrain.coordination.Lock; import com.zillabyte.motherbrain.coordination.MessageHandler; import com.zillabyte.motherbrain.coordination.Watcher; import com.zillabyte.motherbrain.flow.Fields; import com.zillabyte.motherbrain.flow.Flow; import com.zillabyte.motherbrain.flow.StateMachineException; import com.zillabyte.motherbrain.flow.collectors.OutputCollector; import com.zillabyte.motherbrain.flow.collectors.coordinated.CoordinatedOutputCollector; import com.zillabyte.motherbrain.flow.config.FlowConfig; import com.zillabyte.motherbrain.flow.config.OperationConfig; import com.zillabyte.motherbrain.flow.config.UserConfig; import com.zillabyte.motherbrain.flow.error.strategies.FakeLocalException; import com.zillabyte.motherbrain.flow.error.strategies.OperationErrorStrategy; import com.zillabyte.motherbrain.flow.graph.Connection; import com.zillabyte.motherbrain.flow.heartbeats.Heartbeat; import com.zillabyte.motherbrain.flow.heartbeats.HeartbeatException; import com.zillabyte.motherbrain.flow.operations.decorators.EmitDecorator; import com.zillabyte.motherbrain.flow.operations.multilang.MultiLangException; import com.zillabyte.motherbrain.metrics.Metrics; import com.zillabyte.motherbrain.relational.DefaultStreamException; import com.zillabyte.motherbrain.universe.Config; import com.zillabyte.motherbrain.universe.S3Exception; import com.zillabyte.motherbrain.universe.Universe; import com.zillabyte.motherbrain.utils.DateHelper; import com.zillabyte.motherbrain.utils.JSONUtil; import com.zillabyte.motherbrain.utils.Log4jWrapper; import com.zillabyte.motherbrain.utils.SerializableMonitor; import com.zillabyte.motherbrain.utils.Utils; import com.zillabyte.motherbrain.utils.backoff.ExponentialBackoffTicker; public abstract class Operation implements Serializable { private static final long serialVersionUID = -7294936970361954239L; public static final long IDLE_TRIGGER_PERIOD_DEFAULT = 1000L * 10; public static final long ACTIVE_OPERATION_TIMEOUT_DEFAULT = 1000L * 60 * 15; public static final String COMPONENT_CARRY_FIELD_PREFIX = "_CARRY_FIELD_"; public static final HashSet<String> NONLINEAR_OPS = Sets.newHashSet("group_by", "join"); public static final String SOURCE_STATE_KEY_PREFIX = "/__state/"; public static final Integer DEFAULT_MAX_PARALLELISM = 20; public static final long HEARTBEAT_DEATH_SLEEP = 1000L; private Log4jWrapper _log = Log4jWrapper.create(Operation.class, this); private long _lastActivity = System.currentTimeMillis(); private final String _userGivenName; private String _namespacePrefix = ""; private Integer _instanceIndex = null; private AtomicInteger _inOperationCounter = new AtomicInteger(); private int _maxParallelism = 20; private int _targetParallelism = 1; private boolean _parallelismOverriden = false; protected Flow _containerFlow = null; private Map<String, String> _extraInfo; private int _emitCount; private int _loopCalls; private int _consumedCount; private OperationConfig _operationConfig; private Long _idleTriggerPeriod; protected Long _activityTimeout; private Long _transactional_msg_timeout; private Long _initial_stage_timeout_ms; private Long _pre_prepare_stage_timeout_ms; private Long _prepare_stage_timeout_ms; protected transient ExecutorService _executor = null; private SerializableMonitor _messageMonitor = new SerializableMonitor(); protected LinkedListMultimap<String, EmitDecorator> _emitDecorators = LinkedListMultimap.create(); protected boolean _mergeIOFields = false; protected OperationLogger _operationLogger = new OperationLogger.noOp(); protected ExponentialBackoffTicker _ipcLogBackoff = new ExponentialBackoffTicker( 10000/* 100 */); protected transient Heartbeat _heartbeat = null; protected transient OperationErrorStrategy _errorStrategy = null; protected int _loopErrors = 0; protected OperationSleeper _sleeper = new OperationSleeper(); protected Map<String, Fields> _expectedFields = new HashMap<>(); protected transient OutputCollector _collector; private Fields _incomingRouteByFields = null; private Integer _actualParallelism = null; private Set<String> _opsFromSelfToSelf; private Set<String> _opsFromSourcesToSelf; private Set<String> _opsFromSelfToSinks; private Set<String> _adjacentUpStreamNonLoopOps; private Set<String> _adjacentDownStreamOps; private Watcher _flowCommandWatcher = null; /*** * * @param name */ public Operation(String name, OperationConfig opConfig) { _userGivenName = name; _operationConfig = opConfig; _idleTriggerPeriod = Config.getOrDefault("operation.idle.trigger.period", IDLE_TRIGGER_PERIOD_DEFAULT).longValue(); _transactional_msg_timeout = Config.getOrDefault("operation.state.transaction.timeout", 1000 * 30L); _initial_stage_timeout_ms = Config.getOrDefault("operation.initial.stage.timeout", 1000 * 60L * 2); _pre_prepare_stage_timeout_ms = Config.getOrDefault("operation.pre_prepare.stage.timeout", 1000 * 60L * 2); _prepare_stage_timeout_ms = Config.getOrDefault("operation.prepare.stage.timeout", 1000 * 60 * 10L); _extraInfo = Maps.newHashMap(); // Overrides? _activityTimeout = Config.getOrDefault("operation.activity.timeout", ACTIVE_OPERATION_TIMEOUT_DEFAULT).longValue(); if (opConfig.containsKey("timeout")) { Long period = DateHelper.parseDuration(opConfig.get("timeout", "")); if (period != null && period > 0) { _activityTimeout = period; _log.info("using custom timeout of: " + _activityTimeout); } } // Apply run related snapshots applySnapshotIfExists_ThreadUnsafe(); } public Operation(String name) { this(name, OperationConfig.createEmpty()); } public void parseFlowGraph() { _opsFromSelfToSelf = Sets.newHashSet(); _opsFromSourcesToSelf = Sets.newHashSet(); _opsFromSelfToSinks = Sets.newHashSet(); _adjacentDownStreamOps = Sets.newHashSet(); _adjacentUpStreamNonLoopOps = Sets.newHashSet(); if (!type().equalsIgnoreCase("source")) { _opsFromSelfToSelf = getTopFlow().graph().operationsBetween(this, this); for (Operation o : getTopFlow().graph().sources()) { _opsFromSourcesToSelf.addAll(getTopFlow().graph().operationsBetween(o, this)); } } if (!type().equalsIgnoreCase("sink")) { for (Operation o : getTopFlow().graph().sinks()) { _opsFromSelfToSinks.addAll(getTopFlow().graph().operationsBetween(this, o)); } _opsFromSelfToSinks.remove(this.namespaceName()); // exclude self } for(Connection c : getTopFlow().graph().connectionsFrom(this)) { _adjacentDownStreamOps.add(c.dest().namespaceName()); } for (Connection c : getTopFlow().graph().connectionsTo(this)) { if (!c.loopBack()) _adjacentUpStreamNonLoopOps.add(c.source().namespaceName()); } } public Set<String> opsFromSourcesToSelf() { return _opsFromSourcesToSelf; } public Set<String> opsFromSelfToSelf() { return _opsFromSelfToSelf; } public Set<String> opsFromSelfToSinks() { return _opsFromSelfToSinks; } public Set<String> adjacentUpStreamNonLoopOps() { return _adjacentUpStreamNonLoopOps; } public Set<String> adjacentDownStreamOps() { return _adjacentDownStreamOps; } /** * */ public final void handleStats_ThreadUnsafe() throws InterruptedException, CoordinationException { // Init Map<String, Object> stats = Maps.newHashMap(); if (_heartbeat != null) stats.put("last_heartbeat", _heartbeat.getLastHeartbeat()); // Stats... stats.put("last_activity", Long.toString(_lastActivity)); stats.put("errors", Long.toString(getErrorCount())); if (_collector != null) { stats.put("consumed", Long.toString(_collector.getConsumeCount())); stats.put("emitted", Long.toString(_collector.getEmitCount())); stats.put("parallelism", Integer.toString(getTargetParallelism())); stats.put("acks", Long.toString(_collector.getAckCount())); stats.put("fails", Long.toString(_collector.getFailCount())); stats.put("coord_emits", Long.toString(_collector.getCoordEmitCount())); stats.put("coord_consumed", Long.toString(_collector.getCoordConsumeCount())); if (_collector instanceof CoordinatedOutputCollector) { CoordinatedOutputCollector asCoord = (CoordinatedOutputCollector)_collector; Triplet<Integer, Integer, Integer> triplet = asCoord.getUnackedAndLocalQueueAndRemoteQueeuCount_ThreadUnsafe(); stats.put("unacked", Integer.toString(triplet.getValue0())); stats.put("queued_local", Integer.toString(triplet.getValue1())); stats.put("queued_remote", Integer.toString(triplet.getValue2())); stats.put("pressure_state", asCoord.inPressureState()); } } // Other (user defined) stats... updateMiscStats(stats); // Done this.sendMessageToFlow_ThreadUnsafe("stats", stats); } public void handleCoordination_ThreadUnsafe() { if (this._collector != null) { this._collector.handleChecks(); } } /** * */ public void handleActivityCheck_ThreadUnsafe() throws InterruptedException, OperationException { // _log.debug(instanceName()+" -- In handle activity check: "+_lastActivity+" "+_inOperation); if (_lastActivity + this._idleTriggerPeriod < System.currentTimeMillis() && _inOperationCounter.get() == 0) { handleIdleDetected(); } } public final String operationId() { return userGivenName(); } public final Integer instanceIndex() { return _instanceIndex; } public final String instanceName() { return namespaceName() + "." + instanceIndex(); } public final String userGivenName() { return _userGivenName; } public final String namespaceName() { if (_namespacePrefix.equals("")) { return _userGivenName; } return _namespacePrefix + "." + _userGivenName; } public final String namespaceOperationId() { if (_namespacePrefix.equals("")) { return operationId(); } return _namespacePrefix + "." + operationId(); } /**** * Retrhows any error that have been caught be other threads.. */ public void heartbeatErrorCheck_ThreadUnsafe() throws InterruptedException, OperationException, FakeLocalException { // Init Throwable rethrow = null; // Heartbeat error? rethrow = _heartbeat.maybeGetHeartbeatException(); if (rethrow != null) { _errorStrategy.handleHeartbeatDeath(); // throw new OperationException(this, rethrow); } // Is heartbeat even running? if (_heartbeat.isRunning() == false) { _errorStrategy.handleHeartbeatDeath(); // throw new OperationException("heartbeat is not running"); } // Do we have any fatal errors? rethrow = _errorStrategy.maybeGetFatalError(); if (rethrow != null) { throw new OperationException(this, rethrow); } } protected final static Metrics metrics() { return Universe.instance().metrics(); } /** * Override me! * @throws OperationException */ public void prepare() throws MultiLangException, InterruptedException, OperationException { } /** * Override me! */ public void prePrepare() throws InterruptedException, OperationException { } /** * Override me! */ public void postPrepare() throws InterruptedException, OperationException { } public void addEmitDecorator(String stream, EmitDecorator dec) { _emitDecorators.put(stream, dec); dec.setOperation(this); } public Collection<EmitDecorator> emitDecorators(String stream) { return _emitDecorators.get(stream); } public boolean inPressureState() { // This is our pressure valve. We sometimes need to let Storm catch up and // process all // the tuples we're emitting. To do this, we allow for up to N unacked // tuples at a time. // once that threshold has been hit, we wait for all tuples to ack (or fail) // before // proceeding. return _collector.inPressureState(); } public OperationLogger logger() { return _operationLogger; } public Heartbeat getHeartbeat() { return _heartbeat; } public void incLoop() { _loopCalls++; } public void incLoop(long l) { _loopCalls += l; } public long getLoopCalls() { return _loopCalls; } /*** * Returns a unique string suitable for using with global locks. Note that * this uses the Flow#run_id, which helps us avoid the situation where * previous locks of a flow die or otherwise don't get unlocked. * * @return */ protected String lockPrefix() { return topFlowId() + "/cycle_" + getTopFlow().getVersion(); } protected String workerHost() { return Utils.getHost(); } protected long incEmit(long l) { return _emitCount += l; } protected long incEmit() { return _emitCount++; } protected long getEmitCount() { return _emitCount; } protected long incConsumed() { return _consumedCount++; } protected long getConsumedCount() { return _consumedCount; } protected long getErrorCount() { return this._errorStrategy.getErrorCount(); } protected OperationErrorStrategy getErrorStrategy() { return this._errorStrategy; } protected Operation setConfig(OperationConfig config) { this._operationConfig = config; return this; } protected OperationConfig getConfig() { return this._operationConfig; } public OutputCollector getOutputCollector() { return _collector; } public Map<String, String> extraInfo() { return _extraInfo; } public void addExtraInfo(String key, String value){ _extraInfo.put(key, value); } public OperationSleeper getSleeper() { return _sleeper; } public final void setExtraInfo(Map<String, String> extraInfo) { _extraInfo = extraInfo; } @Override public Operation clone() { Operation op = (Operation) SerializationUtils.clone(this); op._expectedFields.clear(); op._containerFlow = null; return op; } public void setContainerFlow(Flow f) { this._containerFlow = f; } protected void reportInfo() throws OperationException, InterruptedException { Map<String, Object> basicInfo = Maps.newHashMap(); if (_extraInfo != null) basicInfo.putAll(_extraInfo); final String host = workerHost(); if (host != null) { basicInfo.put("host", host); } final Integer instanceIndex = this.instanceIndex(); if (instanceIndex != null) { basicInfo.put("instance_index", this.instanceIndex()); } basicInfo.put("instance_name", this.instanceName()); basicInfo.put("type", this.type()); basicInfo.put("name", this.namespaceName()); basicInfo.put("operation_id", this.namespaceOperationId()); try { sendTransactionalMessageToFlow("info", basicInfo); } catch (TimeoutException | CoordinationException e) { throw new OperationException(this, e); } } protected void errorCheck() { // Init Throwable rethrow = null; try { // Do we have unhandled heartbeat exceptions? if (_heartbeat.maybeGetHeartbeatException() != null) { _log.info("Error check picked up heartbeat error"); getErrorStrategy().handleHeartbeatDeath(); } // Do we have any fatal errors? rethrow = getErrorStrategy().maybeGetFatalError(); if (rethrow != null) { throw new OperationException(this, rethrow); } } catch (OperationException | FakeLocalException e) { _log.warn("Error occurred during error check in " + instanceName() + ": " + e.getMessage()); } } protected void notifyOfNewState(String newState, boolean transactional) throws TimeoutException, CoordinationException { if (transactional) { sendTransactionalMessageToFlow("state", newState); } else { sendMessageToFlow_ThreadUnsafe("state", newState); } } /*** * */ public final void markBeginActivity() { // _log.debug(instanceName() + " markBeginActivity: " + // _inOperationCounter); _lastActivity = System.currentTimeMillis(); _inOperationCounter.incrementAndGet(); } /*** * */ public final void markEndActivity() { // _log.debug(instanceName() + " markEndActivity: " + _inOperationCounter); _lastActivity = System.currentTimeMillis(); if (_inOperationCounter.decrementAndGet() < 0) throw new IllegalStateException( "a markEndActivity() looks like it was called without a corresponding markBeginActivity()"); } /** * Override me! */ public void cleanup() throws MultiLangException, OperationException, InterruptedException { /* noop */ } /*** * REACTOR SAFE * * @throws TimeoutException * @throws CoordinationException * @throws StateMachineException */ public synchronized final void handleCleanup() { // Sanity... if (inState("KILLING", "KILLED")) return; // INIT _log.info(instanceName() + " cleaning up: " + this.toString()); if (!inState("ERROR", "ERRORING")) { try { transitionToState("KILLING"); } catch (StateMachineException | CoordinationException | TimeoutException e) { e.printStackTrace(); } } // Stop listeners... try { stopWatchingFlowCommands(); } catch (CoordinationException e) { e.printStackTrace(); } try { cleanup(); } catch (OperationException | InterruptedException e) { e.printStackTrace(); } if (_heartbeat != null) _heartbeat.shutdown(); // Done if (!inState("ERROR", "ERRORING")) { try { transitionToState("KILLED"); } catch (StateMachineException | CoordinationException | TimeoutException e) { e.printStackTrace(); } } // Stop threads related to this operaiton... if (_executor != null) { _log.info("stopping executor..."); _executor.shutdownNow(); } } /*** * Sends a message to the flowinstance. REACTOR SAFE */ public void sendMessageToFlow_ThreadUnsafe(final String command, final Object payload) throws CoordinationException { synchronized (_messageMonitor) { Universe .instance() .state() .sendMessage(flowStateKey(), OperationMessage.create(Operation.this, command, payload)); } } protected void sendTransactionalMessageToFlow(String command, Object payload) throws TimeoutException, CoordinationException { synchronized (_messageMonitor) { Universe .instance() .state() .sendTransactionalMessage(_executor, this.flowStateKey(), OperationMessage.create(this, command, payload), _transactional_msg_timeout); } } protected void updateMiscStats(Map<String, Object> stats) { } protected synchronized void handleIdleDetected() throws InterruptedException, OperationException { } public void handlePostHeartbeat_ThreadUnsafe() { // called after a heartbeat. use for testing. } public void onSetExpectedFields() throws OperationException { if (_incomingRouteByFields != null) { for (Connection conn : this.prevConnections()) { for (String field : _incomingRouteByFields) { conn.source().addExpectedFields(conn.streamName(), new Fields(field)); } } } } public void onThisBatchCompleted(final Object batchId) { // Shall we 'fast-idle'? We only do this for non-rpcs that have no // loopbacks. In the case of // an RPC, the operation state should only idle when there truly hasn't been // any activity. The // no-loopback condition is a bit of a hack for now because loopback'ed // flows don't really know // when they're fully done processing. TODO: remove this condition if/when // we figure out how // to make loopback-flows signal total completion of a batch. if (!this.getTopFlow().getFlowConfig().get("rpc", false) && this.getTopFlow().graph().hasLoopbacks() == false) { try { synchronized (this) { handleIdleDetected(); if (this.inState("EMITTING", "ACTIVE")) transitionToState("IDLE"); } } catch (StateMachineException | CoordinationException | TimeoutException | InterruptedException | OperationException e) { // Not the end of the world if this errors.... Do nothing. _log.error("error in preemptive idle: " + ExceptionUtils.getFullStackTrace(e)); } } } public String flowStateKey() { return "flows/" + this.topFlowId() + "/cycle_" + getTopFlow().getVersion(); } public String operationStateKey() { return flowStateKey() + "/operations/" + this.namespaceName(); } public String instanceStateKey() { return operationStateKey() + "/instances/" + instanceName(); } public String topFlowId() { final Flow flow = getTopFlow(); if (flow == null) { throw new NullPointerException("flow has not been set! " + this.toString()); } return flow.getId(); } public Flow getTopFlow() { if (this._containerFlow == null) { throw new NullPointerException("setContainerFlow(..) has not been called! " + this.instanceName()); } return _containerFlow.getTopFlow(); } public Flow getContainerFlow() { return this._containerFlow; } public abstract String type(); public void handleFatalError(Throwable e) throws OperationException, FakeLocalException { _operationLogger.writeLog("FATAL ERROR:", OperationLogger.LogPriority.ERROR); _operationLogger.logError((Exception) e); this._errorStrategy.handleFatalError(e); } public void handleLoopError(Throwable e) throws OperationException, FakeLocalException { _operationLogger.writeLog("LOOP ERROR/WARNING:", OperationLogger.LogPriority.ERROR); _operationLogger.logError((Exception) e); this._errorStrategy.handleLoopError(e); } public void reportError(final Throwable e) throws InterruptedException, CoordinationException { this.sendMessageToFlow_ThreadUnsafe("errors", e); } public abstract void transitionToState(String s, boolean transactional) throws StateMachineException, TimeoutException, CoordinationException; public void transitionToState(String s) throws StateMachineException, CoordinationException, TimeoutException { transitionToState(s, false); } public Collection<Operation> prevNonLoopOperations() { return this.getTopFlow().graph().nonLoopOperationsTo(this); } /** * @throws OperationException */ public Operation prevNonLoopOperation() throws OperationException { final Iterator<Operation> iter = this.prevNonLoopOperations().iterator(); final Operation op = iter.next(); assert (op != null); if (iter.hasNext()) { throw (OperationException) new OperationException(this).setAllMessages("Did not expect more than one prev operation for " + this.instanceName() + ". Prev ops: " + this.prevNonLoopOperations()); } return op; } public Collection<Operation> prevOperations() { return this.getTopFlow().graph().operationsTo(this); } public Collection<Operation> nextOperations() { return this.getTopFlow().graph().operationsFrom(this); } public Collection<Operation> nextNonLoopOperations() { return this.getTopFlow().graph().nonLoopOperationsFrom(this); } public Collection<Connection> nextNonLoopConnections() { return this.getTopFlow().graph().nonLoopConnectionsFrom(this); } public Connection nextNonLoopConnection() throws OperationException { final Iterator<Connection> iter = this.nextNonLoopConnections().iterator(); final Connection conn = iter.next(); assert (conn != null); if (iter.hasNext()) { throw (OperationException) new OperationException(this).setAllMessages("Did not expect more than one next connection for op: "+namespaceName()+". Next conns: " + this.nextNonLoopConnections()); } return conn; } public Connection prevNonLoopConnection() throws OperationException { final Iterator<Connection> iter = this.prevNonLoopConnections().iterator(); final Connection conn = iter.next(); assert (conn != null); if (iter.hasNext()) { throw (OperationException) new OperationException(this).setAllMessages("Did not expect more than one prev connection for op: "+namespaceName()+". Prev conns: " + this.prevNonLoopConnections()); } return conn; } public Collection<Connection> prevNonLoopConnections() { return getTopFlow().graph().nonLoopConnectionsTo(this); } public Collection<Connection> prevConnections() { return getTopFlow().graph().connectionsTo(this.namespaceName()); } public Collection<Connection> nextConnections() { return getTopFlow().graph().connectionsFrom(this.namespaceName()); } public Operation nextNonLoopOperation() throws OperationException { final Iterator<Operation> iter = this.nextNonLoopOperations().iterator(); final Operation op = iter.next(); assert (op != null); if (iter.hasNext()) { throw (OperationException) new OperationException(this).setAllMessages("Did not expect more than one next operation for " + this.instanceName()+". Next ops: "+this.nextNonLoopOperations()); } return op; } public abstract String getState(); public boolean inState(String... states) { for (String s : states) { if (s.equalsIgnoreCase(getState())) { return true; } } return false; } /*** * */ public void addExpectedFields(final String stream, final Fields fields) throws OperationException { if (outputStreams().contains(stream) == false) { throw (OperationException) new OperationException(this).setAllMessages("The stream " + stream + " has not been declared. Declared: " + outputStreams().toString()); } if (_expectedFields.containsKey(stream) == false) { _expectedFields.put(stream, new Fields()); } _expectedFields.get(stream).addAll(fields); } /*** * */ public Fields getExpectedFields(String stream) { final Fields fields = _expectedFields.get(stream); return fields == null ? new Fields() : fields; } /** * */ public List<String> outputStreams() { return this.getTopFlow().graph().streamsFrom(this); } /** * */ public String defaultStream() throws DefaultStreamException { try { final Iterator<String> iter = outputStreams().iterator(); final String stream = iter.next(); assert (stream != null); if (iter.hasNext()) { throw new DefaultStreamException(); } return stream; } catch (NoSuchElementException | DefaultStreamException e) { throw (DefaultStreamException) new DefaultStreamException(e) .setUserMessage("You must explicitly declare the stream to emit to. Expected: " + outputStreams().toString()); } } /** * */ public void onFinalizeDeclare() throws OperationException, InterruptedException { } public int getMaxParallelism() { return this.getLocalConfig().get("parallelism", DEFAULT_MAX_PARALLELISM); } public Operation setMaxParallelism(int v) { this.getLocalConfig().set("parallelism", v); return this; } public int getTargetParallelism() { return Math.min(_targetParallelism, this.getMaxParallelism()); } public Operation setTargetParallelism(int v) { _targetParallelism = v; return this; } public boolean getParallelismOverriden() { return _parallelismOverriden; } public Operation setParallelismOverriden(boolean v) { _parallelismOverriden = v; return this; } /** Create a a snapshot for suspension */ public JSONObject createSnapshot() { return new JSONObject(); }; /** Apply a snapshot for resumption */ public void applySnapshot(JSONObject snapshot) { } private void applySnapshotIfExists_ThreadUnsafe() { // Apply a snapshot if it exists _log.info("checking for snapshot..."); try { Utils.retryUnchecked(3, new Callable<Void>() { @Override public Void call() throws Exception { try { String snapshotJSON = Universe.instance().dfsService().readFileAsString(s3SnapshotKey()); if (snapshotJSON != null) { _operationLogger.writeLog("Found snapshot, applying...", OperationLogger.LogPriority.SYSTEM); JSONObject snapshot = JSONUtil.parseObj(snapshotJSON); applySnapshot(snapshot); _log.info("applied snapshot"); // Delete old snapshot Universe.instance().dfsService().deleteFile(s3SnapshotKey()); } } catch (IOException e) { throw (OperationException) new OperationException(Operation.this, e).setAllMessages("An error occurred while handling operation snapshot."); } return null; } }); } catch (Exception e) { _operationLogger.writeLog("Error applying snapshot: " + e.getMessage(), OperationLogger.LogPriority.ERROR); } } private String s3SnapshotKey() { return Utils.prefixKey("flows/" + this.getContainerFlow().getId() + "/snapshots/" + instanceName()); } /*** * * @throws OperationException */ public void handlePause() throws OperationException { // transition to PAUSING try { if (!getState().equalsIgnoreCase("ERROR")) transitionToState("PAUSING"); } catch (StateMachineException | CoordinationException | TimeoutException e) { _operationLogger.writeLog("An error occurred while transitioning to PAUSING: " + e.getMessage(), OperationLogger.LogPriority.ERROR); } // Spin on queue for emitting // TODO add timeout while (getQueueCount() > 0) { Utils.sleep(1000); } // upload snapshot _operationLogger.writeLog("Creating snapshot", OperationLogger.LogPriority.SYSTEM); Utils.retryUnchecked(3, new Callable<Void>() { @Override public Void call() throws OperationException { JSONObject snapshot = createSnapshot(); if (!snapshot.isEmpty()) { try { _operationLogger.writeLog("created snapshot, uploading to S3... at " + s3SnapshotKey(), OperationLogger.LogPriority.SYSTEM); Universe.instance().dfsService().writeFile(s3SnapshotKey(), snapshot.toString()); _operationLogger.writeLog("Uploaded snapshot ", OperationLogger.LogPriority.SYSTEM); } catch (IOException | S3Exception | InterruptedException e) { throw (OperationException) new OperationException(Operation.this, e).setAllMessages("An error occured during snapshot creation."); } } return null; } }); // transition to PAUSED try { if (!getState().equalsIgnoreCase("ERROR")) transitionToState("PAUSED"); } catch (StateMachineException | CoordinationException | TimeoutException e) { _operationLogger.writeLog("An error occurred while transitioning to PAUSED: " + e.getMessage(), OperationLogger.LogPriority.ERROR); } } private int getQueueCount(){ if (_collector instanceof CoordinatedOutputCollector) { Triplet<Integer, Integer, Integer> queues = ((CoordinatedOutputCollector) _collector) .getUnackedAndLocalQueueAndRemoteQueeuCount_ThreadUnsafe(); int unacked = queues.getValue0(); int local = queues.getValue1(); int remote = queues.getValue2(); _operationLogger.writeLog("Queue Sizes = Unacked: " + unacked + " Local: " + local + " Remote: " + remote, OperationLogger.LogPriority.SYSTEM); return unacked + local + remote; } return 0; } /*** * * @throws OperationException */ public void handleResume() throws OperationException { _operationLogger.writeLog("Resuming...", OperationLogger.LogPriority.SYSTEM); // apply snapshot applySnapshotIfExists_ThreadUnsafe(); // resume the operation try { if (!getState().equalsIgnoreCase("ERROR")) transitionToState("EMITTING"); } catch (StateMachineException | CoordinationException | TimeoutException e) { _log.warn("An error occured while trying to resume " + e.getMessage()); } } /*** * known aliases this operation is emitting */ @SuppressWarnings("unchecked") public Map<String, String> getAliases() throws OperationException, InterruptedException { return Collections.EMPTY_MAP; } /** * */ public boolean isAlive() { return true; } /*** * */ @NonNullByDefault public void setState(String key, Object val) throws InterruptedException, CoordinationException { Universe.instance().state() .setState(operationStateKey() + SOURCE_STATE_KEY_PREFIX + key, val); } /*** * */ public final <T> T getState(String key, T def) throws InterruptedException, CoordinationException { return Universe.instance().state() .getState(operationStateKey() + SOURCE_STATE_KEY_PREFIX + key, def); } /**** * */ public final <T> T getState(String key) throws InterruptedException, CoordinationException { return Universe.instance().state() .getState(operationStateKey() + SOURCE_STATE_KEY_PREFIX + key); } /*** * */ public void onBatchCompleting(final Object batchId) throws OperationException { // Do nothing by default } /*** * Called by OutputCollector to see if okay to complete the current batch. * Aggregators override */ public boolean permissionToCompleteBatch(Object batchId) { return true; } /** * Wrapper around prepare()... */ public final void handlePrepare(final OutputCollector collector) throws InterruptedException, OperationException, OperationDeadException { try { try { // INIT _collector = collector; markBeginActivity(); _errorStrategy = Universe.instance().errorStrategyFactory() .createOperationStrategy(Operation.this); /************************************************** ** INITIAL STAGE ********************************* **************************************************/ Utils.executeWithin(_initial_stage_timeout_ms, new Callable<Void>() { @Override public Void call() throws OperationException, InterruptedException, CoordinationException, TimeoutException, HeartbeatException { // Init Benchmark.markBegin("operation.prepare.initial"); _log.info("operation instance is starting prepare(): " + instanceName()); Lock lock = Universe.instance().state().lock("flow_instance_index_" + lockPrefix()); if (lock == null) throw new NullPointerException("null lock?: " + Universe.instance().state()); // Create an executor for hearts & state services _executor = Utils.createPrefixedExecutorPool("flow-" + topFlowId() + "-operation-" + instanceName()); try { // Get the next instance id.. final Integer lastInstanceIndex = Universe.instance().state().getState(operationStateKey() + "/last_instance", 0); final Integer nextInstanceIndex = lastInstanceIndex.intValue() + 1; Universe .instance() .state() .setState(operationStateKey() + "/last_instance", nextInstanceIndex); _instanceIndex = nextInstanceIndex; } finally { lock.release(); } _log.info("using instance id: " + instanceName()); // Prepare the logger... Tell the state store where the logger islocated.. FlowConfig flowConfig = getTopFlow().getFlowConfig(); _operationLogger = Universe.instance().loggerFactory().logger(topFlowId(), instanceName(), flowConfig.getAuthToken(), flowConfig.getEmail()); // Start the heartbeat _heartbeat = Heartbeat.create(Operation.this, _executor); reportInfo(); // report to flow that we are alive // Start logging to the user... _log.info("we are a : " + type()); _operationLogger.writeLog("Starting new operation instance", OperationLogger.LogPriority.STARTUP); // Handle settings pubsubs _log.info("registering pubsubs"); // Create the error strategy _emitCount = 0; _consumedCount = 0; // Starting State watchForFlowCommands(); Benchmark.markEnd("operation.prepare.initial"); return null; } }); /************************************************** ** PRE_PREPARE STAGE ***************************** **************************************************/ Utils.executeWithin(_pre_prepare_stage_timeout_ms, new Callable<Void>() { @Override public Void call() throws OperationException, InterruptedException { Benchmark.markBegin("operation.prepare.pre_prepare"); _log.info("beginning pre-prepare stage..."); prePrepare(); _log.info("done with pre-prepare"); Benchmark.markEnd("operation.prepare.pre_prepare"); return null; } }); /************************************************** ** PREPARE STAGE ***************************** **************************************************/ Utils.executeWithin(_prepare_stage_timeout_ms, new Callable<Void>() { @Override public Void call() throws MultiLangException, OperationException, InterruptedException { _log.info("begin prepare stage"); Benchmark.markBegin("operation.prepare.actual"); prepare(); _log.info("done with prepare stage"); Benchmark.markEnd("operation.prepare.actual"); return null; } }); } catch (ExecutionException e) { handleFatalError(e); } catch (TimeoutException e) { handleFatalError(new OperationException(this, e).setUserMessage("Prepare timeout exceeded.")); } try { if (this.getState().equals("ERROR")) { _operationLogger.writeLog("Error detected during prepare phase", OperationLogger.LogPriority.ERROR); } else { // Success postPrepare(); _operationLogger.writeLog("Prepare complete", OperationLogger.LogPriority.STARTUP); } } catch (Exception e) { handleFatalError(e); } } catch (FakeLocalException e) { e.printAndWait(); } finally { markEndActivity(); } } /*** * * Functions calling reactor functions directly. * */ private final void watchForFlowCommands() throws OperationException, InterruptedException { // _log.info("before watchForFlowCommands"); try { _flowCommandWatcher = Universe .instance() .state() .watchForMessage(_executor, flowStateKey() + "/operation_commands", new MessageHandler() { @Override public final void handleNewMessage(String key, Object command) throws OperationException, InterruptedException { // Let subclasses handle it... _log.info(Operation.this.instanceName() + " received operation_command: " + command); // System.err.println("foo"); try { handleFlowCommand((String) command); } catch (Exception e) { throw (OperationException) new OperationException(Operation.this, e).setAllMessages("An error occurred while handling the flow command: "+command+"."); } } }); } catch (CoordinationException e) { throw new OperationException(this, e); } } private void stopWatchingFlowCommands() throws CoordinationException { if (_flowCommandWatcher != null) { _flowCommandWatcher.unsubscribe(); _flowCommandWatcher = null; } } /*** * * @param command * @throws Exception */ protected void handleFlowCommand(String command) throws Exception { if (command.equalsIgnoreCase("die")) { _operationLogger.writeLog("Received command to shut down.", OperationLogger.LogPriority.RUN); handleCleanup(); } else if (command.equalsIgnoreCase("report")) { _log.info("recieved report command"); reportInfo(); sendMessageToFlow_ThreadUnsafe("state", this.getState()); } else if (command.equalsIgnoreCase("pause")) { // Pause the operation handlePause(); } else if (command.equalsIgnoreCase("resume")) { // Resume the operation handleResume(); } } public boolean getOperationShouldMerge() { return this._mergeIOFields; } public void setOperationShouldMerge(boolean b) { this._mergeIOFields = b; } public void addNamespacePrefix(String prefix) { if (_namespacePrefix == null || _namespacePrefix.equals("")) { _namespacePrefix = prefix; } else { _namespacePrefix = prefix + "." + _namespacePrefix; } } public String namespacePrefix() { return this._namespacePrefix; } @Override public String toString() { return "[" + this.type() + ":" + this.instanceName() + "]"; } public String prefixifyStreamName(String stream) { // NOTE: overwritten in ComponentOutput if (namespacePrefix() != null && !namespacePrefix().isEmpty()) { return namespacePrefix() + "." + stream; } else { return stream; } } /*** * Returns a sorted list of parent flows, from the top to the bottom * * @return */ public List<Flow> getParentFlowsTopDown() { Flow container = this._containerFlow; List<Flow> ret = Lists.newLinkedList(); while (container != null) { ret.add(container); container = container.getParentFlow(); } return Lists.reverse(ret); } public void mergeNewConfig(UserConfig config) { this._operationConfig.setAll(config); } public OperationConfig getMergedConfig() { // Get's the fully merged config OperationConfig conf = new OperationConfig(_operationConfig); for (Flow flow : getParentFlowsTopDown()) { conf.setAll(flow.getFlowConfig()); } return conf; } public OperationConfig getLocalConfig() { return this._operationConfig; } public void setIncomingRouteByFields(List<String> fields) { _incomingRouteByFields = new Fields(fields); } public void setIncomingRouteByFields(Fields fields) { _incomingRouteByFields = fields; } public Fields getIncomingRouteByFields() { return _incomingRouteByFields; } public boolean hasIncomingRouteByFields() { return _incomingRouteByFields != null; } public void setActualParallelism(int parallelism) { _actualParallelism = parallelism; } public int getActualParallelism() { if (_actualParallelism == null) throw new IllegalStateException("actualParallelism hasn't been set yet."); return _actualParallelism; } public boolean hasSiblingInstances() { return getActualParallelism() > 1; } public Long getActivityTimeout() { return _activityTimeout; } public void onEnterPressureState() { this.logger().info("Tuples are backing up. Slowing down..."); } public void onLeavePressureState() { this.logger().info("Tuple backlog cleared. Speeding up..."); } }