package com.zillabyte.motherbrain.flow.operations.multilang.builder;
import java.util.Collection;
import java.util.List;
import java.util.Map;
import java.util.concurrent.Callable;
import java.util.concurrent.TimeoutException;
import net.sf.json.JSONObject;
import org.apache.commons.lang.mutable.MutableInt;
import org.apache.log4j.Logger;
import com.google.common.collect.HashMultiset;
import com.google.common.collect.Maps;
import com.google.common.collect.Multiset;
import com.google.monitoring.runtime.instrumentation.common.com.google.common.collect.Iterators;
import com.zillabyte.motherbrain.benchmarking.Benchmark;
import com.zillabyte.motherbrain.container.ContainerEnvironmentHelper;
import com.zillabyte.motherbrain.container.ContainerException;
import com.zillabyte.motherbrain.container.ContainerFactory;
import com.zillabyte.motherbrain.container.ContainerWrapper;
import com.zillabyte.motherbrain.flow.App;
import com.zillabyte.motherbrain.flow.Component;
import com.zillabyte.motherbrain.flow.Flow;
import com.zillabyte.motherbrain.flow.FlowCompilationException;
import com.zillabyte.motherbrain.flow.buffer.SinkToBuffer;
import com.zillabyte.motherbrain.flow.buffer.SourceFromBuffer;
import com.zillabyte.motherbrain.flow.components.ComponentInput;
import com.zillabyte.motherbrain.flow.components.ComponentOutput;
import com.zillabyte.motherbrain.flow.components.builtin.BuiltinComponents;
import com.zillabyte.motherbrain.flow.config.FlowConfig;
import com.zillabyte.motherbrain.flow.graph.Connection;
import com.zillabyte.motherbrain.flow.graph.FlowGraph;
import com.zillabyte.motherbrain.flow.operations.Join;
import com.zillabyte.motherbrain.flow.operations.Operation;
import com.zillabyte.motherbrain.flow.operations.OperationLogger;
import com.zillabyte.motherbrain.flow.operations.builtin.Count;
import com.zillabyte.motherbrain.flow.operations.builtin.RateLimiter;
import com.zillabyte.motherbrain.flow.operations.builtin.Unique;
import com.zillabyte.motherbrain.flow.operations.decorators.EmitDecorator;
import com.zillabyte.motherbrain.flow.operations.decorators.RemoveFields;
import com.zillabyte.motherbrain.flow.operations.decorators.RenameFields;
import com.zillabyte.motherbrain.flow.operations.decorators.RetainFields;
import com.zillabyte.motherbrain.flow.operations.multilang.MultiLangProcess;
import com.zillabyte.motherbrain.flow.operations.multilang.MultiLangProcessException;
import com.zillabyte.motherbrain.flow.operations.multilang.operations.LocalComponent;
import com.zillabyte.motherbrain.flow.operations.multilang.operations.MultiLangAggregator;
import com.zillabyte.motherbrain.flow.operations.multilang.operations.MultiLangRunEach;
import com.zillabyte.motherbrain.flow.operations.multilang.operations.MultiLangRunSource;
import com.zillabyte.motherbrain.flow.operations.multilang.operations.MultilangClumper;
import com.zillabyte.motherbrain.universe.Config;
import com.zillabyte.motherbrain.universe.Universe;
import com.zillabyte.motherbrain.utils.JSONUtil;
import com.zillabyte.motherbrain.utils.Utils;
import com.zillabyte.motherbrain.utils.VersionComparer;
@SuppressWarnings("unchecked")
public class MultilangFlowCompiler {
public final Long CLI_INFO_TIMEOUT = Config.getOrDefault("builder.cli.info.timeout", 1000L * 15);
public final Long CLI_PREP_TIMEOUT = Config.getOrDefault("builder.cli.prep.timeout", 1000L * 60 * 5);
public final String MINIMUM_REQUIRED_VERSION = Config.getOrDefault("builder.cli.min.version", "0.9.48");
public final static int MAX_SLOTS_PER_FLOW = Config.getOrDefault("builder.max.slots.per.flow", 30);
private final static Logger _log = Logger.getLogger(MultilangFlowCompiler.class);
private FlowFetcher _fetcher = null;
private FlowValidator _validator = new FlowValidator();
private ContainerFactory _containerFactory = Universe.instance().containerFactory();
private FlowConfig _flowConfig;
private ContainerWrapper _container;
private OperationLogger _logger;
private Map<String, MutableInt> _prefixes = Maps.newHashMap();
private Multiset<String> _componentCounts = HashMultiset.create();
/***
*
*/
public MultilangFlowCompiler(FlowFetcher fetcher, FlowConfig baseFlowConfig, ContainerWrapper container, OperationLogger logger) {
_fetcher = fetcher;
_flowConfig = baseFlowConfig;
_container = container;
_logger = logger;
}
/***
*
* @param flowId
* @param overrideConfig
* @param logger
* @param flowConfig
* @return
* @throws FlowCompilationException
* @throws ContainerException
*/
public Flow compileFlow(String flowId, JSONObject overrideConfig) throws FlowCompilationException, ContainerException {
// Step 1: run "zillabyte prep"
if(Universe.instance().env().isTestOrProd()) handlePrep(flowId, overrideConfig);
// Step 2: run "zillabyte info" and parse the settings
JSONObject zbInfo = handleGettingSettings(flowId);
if (zbInfo == null) {
throw (FlowCompilationException) new FlowCompilationException().setAllMessages("Unable to retrieve 'zillabyte info' response.").adviseRetry();
}
// Step 3: Build a flow from the settings...
Flow flow = buildFlowFromSettings(flowId, zbInfo, overrideConfig);
// Save the settings for later
flow.setMeta(zbInfo);
return flow;
}
/****
*
* @param overrideConfig
* @param logger
* @param settings
* @return
* @throws FlowCompilationException
*/
protected Flow buildFlowFromSettings(String flowId, JSONObject zbInfo, JSONObject overrideConfig) throws FlowCompilationException {
// Init
String flowType = zbInfo.optString("flow_type", "app");
String name = zbInfo.optString("name", flowId);
handleEarlySanityChecks(zbInfo);
// Build the base flow
Flow flow = null;
if (flowType.equalsIgnoreCase("app")) {
flow = new App(flowId, name, (FlowConfig)_flowConfig.mergeWith(overrideConfig));
} else {
flow = new Component(flowId, name, (FlowConfig)_flowConfig.mergeWith(overrideConfig));
}
flow.setLogger(this._logger);
// Build the operations
Map<String, Object> operationMap = Maps.newHashMap();
_log.info("zbInfo contains " + zbInfo.toString());
_log.info("overrideConfig contains " + overrideConfig.toString());
// Update node configurations
if (overrideConfig.containsKey("nodes")){
zbInfo.put("nodes", overrideConfig.get("nodes"));
}
for (JSONObject nodeSettings : (List<JSONObject>)zbInfo.getJSONArray("nodes")) {
Object operation = createOperationFromJSON(flowId, nodeSettings);
operationMap.put(nodeSettings.getString("name"), operation);
}
// Set the container flow
for(Object o : operationMap.values()) {
if (o instanceof Operation) {
((Operation)o).setContainerFlow(flow);
} else if (o instanceof Flow) {
((Flow) o).setParentFlow(flow);
}
}
// Now add the connections...
FlowGraph flowGraph = flow.graph();
for(JSONObject arc : (List<JSONObject>)zbInfo.getJSONArray("arcs")) {
// INIT
String originName = arc.optString("origin");
String destName = arc.optString("dest");
String arcName = arc.optString("name");
Boolean loopBack = arc.containsKey("loop_back") ? true : false;
Integer maxIter = arc.optInt("max_iterations", Connection.DEFAULT_MAX_ITER);
// Sanity
if (originName == null || operationMap.containsKey(originName) == false)
throw (FlowCompilationException) new FlowCompilationException().setAllMessages("Could not find operation with name: '" + originName + "'");
if (destName == null || operationMap.containsKey(destName) == false)
throw (FlowCompilationException) new FlowCompilationException().setAllMessages("Could not find operation with name: '" + destName + "'");
if (arcName == null)
throw (FlowCompilationException) new FlowCompilationException().setAllMessages("Connection did not have a name: '" + arc + "'");
if (loopBack) {
Object loopBackNode = operationMap.get(destName);
if(loopBackNode instanceof Operation) {
if( ((Operation) loopBackNode).type().equalsIgnoreCase("source") ) throw (FlowCompilationException) new FlowCompilationException().setAllMessages("Cannot loop back to a source");
}
}
// Add to the graph...
Object origin = operationMap.get(originName);
Object dest = operationMap.get(destName);
// Connect the nodes..
handleConnectingNodes(origin, dest, arcName, loopBack, maxIter, flowGraph);
}
// Remove all placeholders...
boolean converged;
do {
converged = true;
for(PlaceHolderOperation ph : flowGraph.getByType(PlaceHolderOperation.class)) {
if (ph.getObject() instanceof EmitDecorator) {
EmitDecorator ed = (EmitDecorator)ph.getObject();
Connection conn = Iterators.getOnlyElement(flowGraph.connectionsTo(ph).iterator());
Operation origin = conn.source();
if (origin instanceof PlaceHolderOperation == false) {
// This is a real operation...
origin.addEmitDecorator(conn.streamName(), ed);
flowGraph.pluck(ph);
} else {
// Otherwise, continue iterating, because we have not converged yet.
converged = false;
}
} else if (ph.getObject() instanceof RouteBy) {
RouteBy rb = (RouteBy)ph.getObject();
Operation origin = Iterators.getOnlyElement(flowGraph.operationsTo(ph).iterator());
Operation dest = Iterators.getOnlyElement(flowGraph.operationsFrom(ph).iterator());
flowGraph.pluck(ph);
dest.setIncomingRouteByFields(rb.getFields());
} else {
throw new IllegalStateException();
}
}
} while(!converged);
// Set parallelism
setParallelism(flow);
// Validate...
_validator.validate(flow);
// Done
return flow;
}
/***
*
* @param zbInfo
* @throws FlowCompilationException
*/
private void handleEarlySanityChecks(JSONObject zbInfo) throws FlowCompilationException {
// Multilang version...
String multilangVersion = zbInfo.optString("multilang_version", "0.0.0");
if (VersionComparer.isAtLeast(multilangVersion, MINIMUM_REQUIRED_VERSION) == false) {
throw (FlowCompilationException) new FlowCompilationException().setAllMessages("The flow is built with an older version of Zillabyte (" + multilangVersion + ") . Please upgrade dependencies.");
}
}
/****
*
* @param flow
* @throws FlowCompilationException
*/
private void setParallelism(Flow flow) throws FlowCompilationException {
final int actualNodes = flow.getExpectedNumberOfNodes();
_log.info("There are " + actualNodes + " nodes expected for this flow. (" + flow.getId() + ")");
if (actualNodes == 0) {
throw (FlowCompilationException) new FlowCompilationException().setAllMessages("The flow has no nodes");
}
// Each operation must have parallelism of at least 1.
int idealSlotsPerOperation = Math.max(MAX_SLOTS_PER_FLOW / (actualNodes), 1);
Collection<Operation> operations = flow.getOperations();
for (Operation o : operations) {
// Set a default target parallelism of atleast 1 if we have not already set it
if (!o.getParallelismOverriden()){
o.setTargetParallelism(idealSlotsPerOperation);
}
o.setTargetParallelism(Math.min(o.getTargetParallelism(), o.getMaxParallelism()));
}
}
/****
*
* @param origin
* @param dest
* @param arcName
* @param flowGraph
* @throws FlowCompilationException
*/
protected void handleConnectingNodes(Object origin, Object dest, String arcName, Boolean loopBack, Integer maxIter, FlowGraph flowGraph) throws FlowCompilationException {
if (origin instanceof Operation && dest instanceof Operation) {
// Case: operation -> operation
flowGraph.connect((Operation)origin, (Operation)dest, arcName, loopBack, maxIter);
} else if (origin instanceof Operation && dest instanceof Component) {
// Case: operation -> component
Component c = (Component)dest;
Operation o = (Operation)origin;
maybeInjectComponent(flowGraph, c);
flowGraph.connect(o, c.getOneInput(), arcName, loopBack, maxIter);
} else if (origin instanceof Component && dest instanceof Component) {
// Case: component -> component
Component o = (Component)origin;
Component d = (Component)dest;
maybeInjectComponent(flowGraph, d);
flowGraph.connect(o.getOneOutput(), d.getOneInput(), arcName, loopBack, maxIter);
} else if (origin instanceof Component && dest instanceof Operation) {
// Case: component -> operation
Component c = (Component)origin;
Operation d = (Operation)dest;
maybeInjectComponent(flowGraph, c);
flowGraph.connect(c.getOneOutput(), d, arcName, loopBack, maxIter);
} else {
throw (FlowCompilationException) new FlowCompilationException().setAllMessages("Unknown graph case. The origin is a "+origin.getClass().getName()+" while the destination is a "+dest.getClass().getName()+".");
}
}
private void maybeInjectComponent(FlowGraph flowGraph, Component c) throws FlowCompilationException {
// Only inject if it hasn't already been done above (this handles the source-from-component case)
if (flowGraph.containsAny(c.graph().allOperations()) == false) {
String prefix = c.getName() + "." + _componentCounts.count(c.getId());
flowGraph.inject(prefix, c.graph());
c.setGraph(flowGraph);
_componentCounts.add(c.getId());
}
}
/****
*
* @param c
* @return
*/
private String getPrefixFor(Component c) {
if (_prefixes.containsKey(c.getName()) == false) {
_prefixes.put(c.getName(), new MutableInt(1));
return c.getName();
} else {
_prefixes.get(c.getName()).increment();
return c.getName() + "-" + _prefixes.get(c.getName()).intValue();
}
}
/***
*
* @param container
* @throws FlowCompilationException
* @throws ContainerException
*/
protected JSONObject handleGettingSettings(final String flowId) throws FlowCompilationException, ContainerException {
// INIT
Benchmark.markBegin("multilang.container.zillabyte_info");
try {
// The retry here is mostly for tests, which can have hiccups when run in parallel...
return Utils.retry(new Callable<JSONObject>() {
@Override
public JSONObject call() throws Exception {
try {
// Execute the command...
MultiLangProcess proc = _container.buildCommand()
.withEnvironment(ContainerEnvironmentHelper.getCLIEnvironment(_flowConfig))
.withCLICommand("info")
.withSockets()
.inFlowDirectory(flowId)
.createProcess()
.addLogListener(_logger)
.addStdioLogListeners()
.start();
String message = proc.getNextMessage(CLI_INFO_TIMEOUT);
proc.waitForExit(CLI_INFO_TIMEOUT);
// Parse the results, finish..
return JSONUtil.parseObj(message);
} catch (TimeoutException ex) {
throw (FlowCompilationException)new FlowCompilationException(ex).setAllMessages("Timeout retrieving flow meta information.").adviseRetry();
} catch (ContainerException e) {
throw (FlowCompilationException)new FlowCompilationException(e).setAllMessages("Error initializing flow container.").adviseRetry();
} catch (InterruptedException e) {
throw (FlowCompilationException)new FlowCompilationException(e).setAllMessages("Flow compilation interrupted.").adviseRetry();
} catch (MultiLangProcessException e) {
throw new FlowCompilationException(e);
}
}
});
} catch(Exception e) {
throw new FlowCompilationException(e);
} finally {
Benchmark.markEnd("multilang.container.zillabyte_info");
}
}
/***
*
* @param overrideConfig
* @param container
* @param flowConfig
* @throws FlowCompilationException
*/
protected void handlePrep(String flowId, Map<String, Object> overrideConfig) throws FlowCompilationException {
try {
// Benchmark
Benchmark.markBegin("multilang.container.zillabyte_prep");
// Execute
_container.buildCommand()
.withEnvironment(ContainerEnvironmentHelper.getCLIEnvironment(this._flowConfig))
.withEnvironment("ZILLABYTE_PARAMS", _flowConfig.mergeWith(overrideConfig).toJSON().toString())
.withCLICommand("prep", "--mode", Universe.instance().env().toString())
.inFlowDirectory(flowId)
.withoutSockets()
.createProcess()
.addLogListener(_logger)
.start()
.waitForExit();
} catch (InterruptedException e) {
throw (FlowCompilationException)new FlowCompilationException(e).setUserMessage("Interrupted");
} catch (ContainerException | MultiLangProcessException e) {
throw (FlowCompilationException)new FlowCompilationException(e).setUserMessage("Error with app container");
} finally {
Benchmark.markEnd("multilang.container.zillabyte_prep");
}
}
/***
*
* @param containerFlow
* @param flow
* @param container
* @param nodeSettings
* @return
* @throws FlowCompilationException
*/
protected Object createOperationFromJSON(String flowId, JSONObject node) throws FlowCompilationException {
try {
// INIT
String nodeType = node.getString("type").toLowerCase();
Operation operation;
// Build the operation
switch(nodeType) {
case "source":
if (node.containsKey("relation") || node.containsKey("matches")) {
// SourceFromRelation has 'relation' or 'matches'
String query = node.containsKey("relation") ? node.getJSONObject("relation").getString("query") : node.getString("matches");
Integer version = node.getJSONObject("config").optInt("version", -1);
// The one true source.
return new SourceFromBuffer(node.getString("name"), query, version, flowId, this._flowConfig.getAuthToken());
} else {
// Custom sources don't have a query..
operation = new MultiLangRunSource(node, _container);
}
break;
case "each":
case "filter":
operation = new MultiLangRunEach(node, _container);
break;
case "group_by":
operation = new MultiLangAggregator(node, _container);
break;
case "rename":
operation = new PlaceHolderOperation(new RenameFields(node));
break;
case "retain":
operation = new PlaceHolderOperation(new RetainFields(node));
break;
case "remove":
operation = new PlaceHolderOperation(new RemoveFields(node));
break;
case "unique":
operation = new Unique(node);
break;
case "clump":
operation = new MultilangClumper(node, _container);
break;
case "count":
operation = new Count(node);
break;
case "join":
operation = new Join(node);
break;
case "component":
if(Universe.instance().env().isLocal()) {
// TODO: move this logic to InPlaceFlowBuilder
if (BuiltinComponents.exists(node.optString("id", ""))) {
return BuiltinComponents.create(node.optString("id"), FlowConfig.createFromJSON(node.getJSONObject("config")));
} else {
return new LocalComponent(node);
}
} else {
// Recursively build a new flow...
String compName = node.getString("id");
JSONObject config = new JSONObject();
if (node.has("config")) {
config = node.optJSONObject("config");
}
_log.info("recursively building flow: " + compName + " with config: " + config);
Flow subFlow = _fetcher.buildFlow(compName, config);
if (subFlow instanceof App)
throw (FlowCompilationException) new FlowCompilationException().setAllMessages("Only components may be nested.");
return subFlow;
}
case "sink":
operation = new SinkToBuffer(node, _flowConfig);
break;
case "input":
operation = new ComponentInput(node, _flowConfig);
break;
case "output":
operation = new ComponentOutput(node, _flowConfig);
break;
case "route_by":
operation = new PlaceHolderOperation(new RouteBy(node));
break;
case "rate_limit":
operation = new RateLimiter(node);
break;
default:
throw (FlowCompilationException) new FlowCompilationException().setAllMessages("Unknown operation type: " + nodeType+".");
}
// Set Target Parallelism
if (node.containsKey("parallelism")){
operation.setTargetParallelism(node.getInt("parallelism"));
operation.setParallelismOverriden(true);
}
return operation;
} catch(ContainerException | InterruptedException e) {
throw new FlowCompilationException(e);
}
}
}