package com.zillabyte.motherbrain.flow; import java.io.Serializable; import org.apache.commons.lang.NotImplementedException; import org.apache.log4j.Logger; import com.zillabyte.motherbrain.coordination.CoordinationException; import com.zillabyte.motherbrain.flow.error.strategies.FlowErrorStrategy; import com.zillabyte.motherbrain.flow.operations.Operation; import com.zillabyte.motherbrain.flow.operations.OperationException; import com.zillabyte.motherbrain.universe.Universe; import com.zillabyte.motherbrain.utils.Utils; public class FlowStateCoordinator implements Serializable { /** * */ private static final long serialVersionUID = 8973237210512561403L; protected App _flow; protected FlowErrorStrategy _errorStrategy; static final Logger _log = Utils.getLogger(FlowStateCoordinator.class); public FlowStateCoordinator(App flow) { _flow = flow; _errorStrategy = Universe.instance().errorStrategyFactory().createFlowStrategy(); } /*** * Update the FlowState based on current state of other datums. e.g. this is called * anytime anything interesting happens that might make us change FlowState * @throws StateMachineException * @throws FlowException * @throws InterruptedException * @throws OperationException * @throws CoordinationException */ public FlowState maybeGetNewFlowState(final FlowInstanceSetBuilder builder, final FlowState currentState) throws StateMachineException, FlowException, InterruptedException, OperationException, CoordinationException { /* * Do we have any errors in our operations? */ if (currentState != FlowState.ERROR && _errorStrategy.shouldTransitionToFlowError(builder)) { return FlowState.ERROR; } switch (currentState) { /* * We don't recover from death or ERRORs (the only way is to re-register). * * Similarly, INITIAL, STARTED and WAITING_FOR_NEXT_CYCLE require explicit * user requests before they may be transitioned from. */ case RETIRING: // fall through case RETIRED: // fall through case ERRORING: // fall through case ERROR: // fall through case KILLING: // fall through case KILLED: // fall through case INITIAL: // fall through case PAUSED: // fall through case STARTED: // fall through case WAITING_FOR_NEXT_CYCLE: break; case RECOVERING: // Special recovery states... throw new NotImplementedException(); case PAUSING: /* * The only way we can go from PAUSING to PAUSED is if all operations report in * and say they've finished their pausing sequence(i.e. they are in PAUSED) */ if (builder.assertAtLeastOneInstanceAliveFromEachOperation().withAliveHeartbeats().notInState("ERROR").allInState("PAUSED")) { // Yes: everybody is started, so we can transition STARTING to STARTED return FlowState.PAUSED; } break; case STARTING: /* * The only way we can go from STARTING to STARTED is if all operations report in * and say they've finished their start up sequence (i.e. they are in STARTED). */ // Are all operations online? for(Operation op : _flow.getOperations()) { if(builder.ofOperation(op).size() != op.getActualParallelism()) return FlowState.STARTING; } // Is there at least one alive for each operation and is everybody in the STARTED state? if (builder.assertAtLeastOneInstanceAliveFromEachOperation().withAliveHeartbeats().notInState("ERROR").allInState("STARTED", "IDLE", "CONSUMING_IDLE")) { // Yes: everybody is started, so we can transition STARTING to STARTED return FlowState.STARTED; } break; case CYCLE_COMPLETE: // Tell the sources that they may enter WAITING_FOR_NEXT_CYCLE, which will allow // them to start a new batch when we enter FlowState.RUNNING again if (builder.sources().assertAtLeastOneInstanceAliveFromEachOperation().withAliveHeartbeats().notInState("ERROR").allInState("WAITING_FOR_NEXT_CYCLE")) { // All operations are have consumed everything possible return FlowState.WAITING_FOR_NEXT_CYCLE; } else { sendCycleAcknowledged(); } break; case IDLE: // Only RPCs should be in idle, if the source goes back to EMITTING, then the flow should go back to RUNNING. if (builder.sources().assertAtLeastOneInstanceAliveFromEachOperation().anyInState("EMITTING") || builder.nonSources().assertAtLeastOneInstanceAliveFromEachOperation().anyInState("ACTIVE", "CONSUMING", "EMITTING")) { return FlowState.RUNNING; } break; case RUNNING: // Detect when all of the tuples are out of the system if (builder.sources().assertAtLeastOneInstanceAliveFromEachOperation().withAliveHeartbeats().notInState("ERROR").allInState("EMIT_COMPLETE", "EMIT_COMPLETE_ACKED", "IDLE")) { // If all non-sources are done processing, we can get out of RUNNING state if (builder.nonSources().assertAtLeastOneInstanceAliveFromEachOperation().withAliveHeartbeats().notInState("ERROR").allInState("IDLE", "EMITTING_DONE") ) { if (builder.sources().assertAtLeastOneInstanceAliveFromEachOperation().withAliveHeartbeats().notInState("ERROR").allInState("IDLE")) { // Sources are in IDLE => RPC, we idle the flow in this case return FlowState.IDLE; } else { // builder.debugStates(); // Otherwise we're a regular app, and we complete the cycle return FlowState.CYCLE_COMPLETE; } } } break; default: _log.error("Current flow state is invalid: " + currentState); break; } return currentState; } /*** * For testing * @param string * @throws CoordinationException */ protected void sendCycleAcknowledged() throws CoordinationException { Universe.instance().state().sendMessage(_flow.flowStateKey() + "/operation_commands", "cycle_acknowledged"); } }