package com.zillabyte.motherbrain.flow.operations; import java.util.Iterator; import net.sf.json.JSONObject; import org.apache.commons.lang.NotImplementedException; import com.zillabyte.motherbrain.flow.Fields; import com.zillabyte.motherbrain.flow.MapTuple; import com.zillabyte.motherbrain.flow.aggregation.AggregationException; import com.zillabyte.motherbrain.flow.aggregation.AggregationKey; import com.zillabyte.motherbrain.flow.aggregation.AggregationStoreWrapper; import com.zillabyte.motherbrain.flow.collectors.OutputCollector; import com.zillabyte.motherbrain.flow.collectors.coordinated.BatchedTuple; import com.zillabyte.motherbrain.flow.config.OperationConfig; import com.zillabyte.motherbrain.flow.error.strategies.FakeLocalException; import com.zillabyte.motherbrain.flow.graph.Connection; import com.zillabyte.motherbrain.relational.MissingFieldException; import com.zillabyte.motherbrain.top.MotherbrainException; import com.zillabyte.motherbrain.universe.Universe; import com.zillabyte.motherbrain.utils.Log4jWrapper; import com.zillabyte.motherbrain.utils.Utils; public class Join extends AggregationOperation { private static final long serialVersionUID = -2683557258333764627L; private JoinType _joinType = JoinType.INNER; private Fields _lhsGroupFields; private Fields _rhsGroupFields; private String _lhsStream; private String _rhsStream; private AggregationStoreWrapper _lhsStore; private AggregationStoreWrapper _rhsStore; private Log4jWrapper _log = Log4jWrapper.create(Join.class, this); public Join(final String name, String lhsStream, Fields lhsGroupFields, String rhsStream, Fields rhsGroupFields, JoinType joinType, OperationConfig config) { super(name, config); _lhsStream = lhsStream; _rhsStream = rhsStream; _lhsGroupFields = lhsGroupFields; _rhsGroupFields = rhsGroupFields; _lhsStore = Universe.instance().aggregationStoreFactory().getStore(this, "lhs"); _rhsStore = Universe.instance().aggregationStoreFactory().getStore(this, "rhs"); _joinType = joinType; } public Join(JSONObject node) { this( node.getString("name"), node.getString("lhs_stream"), new Fields(node.getString("lhs_fields")), node.getString("rhs_stream"), new Fields(node.getString("rhs_fields")), JoinType.valueOf(node.getString("join_type").toUpperCase()), OperationConfig.createEmpty() ); } /*** * */ @Override public String type() { return "join"; } /*** * * @param streamName */ public void setLhsStreamName(String streamName) { _lhsStream = streamName; } /*** * * @param streamName */ public void setRhsStreamName(String streamName) { _rhsStream = streamName; } /** * @throws InterruptedException * @throws OperationException * @throws OperationDeadException ** * */ @Override public void handleEmit(Object batch, Integer aggStoreKey) throws InterruptedException, OperationException, OperationDeadException { try { // Init _log.info("Aggregating all groups.."); final Iterator<AggregationKey> iter; switch(this._joinType) { case INNER: /* fall through */ case LEFT: iter = this._lhsStore.keyIterator(iterationStoreKeyPrefix(batch, aggStoreKey)); break; case RIGHT: iter = this._rhsStore.keyIterator(iterationStoreKeyPrefix(batch, aggStoreKey)); break; case OUTER: // See below throw (OperationException) new OperationException(this, new NotImplementedException()).setAllMessages("Outer join is not yet implemented."); default: throw (OperationException) new OperationException(this).setAllMessages("Unknown join type: "+this._joinType); } while(iter.hasNext()) { AggregationKey key = iter.next(); performJoin(iterationStoreKeyPrefix(batch, aggStoreKey), key); } // Done _log.info("Aggregating all groups done."); } catch(AggregationException e) { throw new OperationException(this, e); } } /*** * * @param lhs * @param rhs */ private final MapTuple joinTuples(MapTuple lhs, MapTuple rhs) { // Create the new tuple MapTuple joinedTuple = new MapTuple(); if (lhs != null) joinedTuple.values().putAll(lhs.values()); if (rhs != null) joinedTuple.values().putAll(rhs.values()); // Because we're dealing with LEFT,RIGHT, and OUTER joins, it's possible // to have NULLs; To allow the rest of the system to function as expected, we // cheat here a bit and add any missing fields to the map if (_joinType != JoinType.INNER) { if (this._expectedFields.size() > 0) { Fields expected = this._expectedFields.values().iterator().next(); for(final String s : expected) { /* * We should never insert null values into the _expectedFields hash. */ assert (s != null); if (joinedTuple.containsValueKey(s) == false) { joinedTuple.put(s, null); } } } } // Batched tuples? MapTuple inc = lhs != null ? lhs : rhs; if (inc instanceof BatchedTuple) { BatchedTuple bt = (BatchedTuple) inc; joinedTuple = new BatchedTuple(joinedTuple, bt.getId(), bt.batchId()); } return joinedTuple; } @Override public void onSetExpectedFields() throws OperationException { final String lhsStream = this.lhsPrevConnection().streamName(); final String rhsStream = this.rhsPrevConnection().streamName(); /* * Bug in nullness annotation derivations, this is true for sure. */ assert (lhsStream != null); assert (rhsStream != null); this.lhsPrevOperation().addExpectedFields(lhsStream, this._lhsGroupFields); this.rhsPrevOperation().addExpectedFields(rhsStream, this._rhsGroupFields); super.onSetExpectedFields(); } /*** * * @param key * @throws InterruptedException * @throws OperationException * @throws OperationDeadException * @throws AggregationException */ private synchronized void performJoin(String fullBatchName, AggregationKey key) throws InterruptedException, OperationException, OperationDeadException, AggregationException { try { // Init _log.info("beginning join for: " + key); try { // Init markBeginActivity(); // What join are we doing? if (_joinType == JoinType.LEFT) { Iterator<MapTuple> lhsIter = _lhsStore.getGroupIterator(fullBatchName, key); while(lhsIter.hasNext()) { // Pressure sanity... if (inPressureState()) { // Spin wait while we allow pressure to die down. Note: this is not the main thead. Utils.sleep(100L); continue; } // Init. Iterator<MapTuple> rhsIter = _rhsStore.getGroupIterator(fullBatchName, key); MapTuple lhsTuple = lhsIter.next(); // If nothing on other side, then just emit lhs tuple if (rhsIter.hasNext() == false) { _collector.emit(joinTuples(lhsTuple, null)); } else { while(rhsIter.hasNext()) { _collector.emit(joinTuples(lhsTuple, rhsIter.next())); } } } } else if (_joinType == JoinType.RIGHT) { // Same as LEFT join, but swap sides Iterator<MapTuple> rhsIter = _rhsStore.getGroupIterator(fullBatchName, key); while(rhsIter.hasNext()) { // Pressure sanity... if (inPressureState()) { // Spin wait while we allow pressure to die down. Note: this is not the main thead. Utils.sleep(100L); continue; } // Init. Iterator<MapTuple> lhsIter = _lhsStore.getGroupIterator(fullBatchName, key); MapTuple rhsTuple = rhsIter.next(); // If nothing on other side, then just emit lhs tuple if (lhsIter.hasNext() == false) { _collector.emit(joinTuples(null, rhsTuple)); } else { while(lhsIter.hasNext()) { _collector.emit(joinTuples(lhsIter.next(), rhsTuple)); } } } } else if (_joinType == JoinType.INNER) { // Only emit tuples where matches on boths ides Iterator<MapTuple> lhsIter = _lhsStore.getGroupIterator(fullBatchName, key); while(lhsIter.hasNext()) { // Pressure sanity... if (inPressureState()) { // Spin wait while we allow pressure to die down. Note: this is not the main thead. Utils.sleep(100L); continue; } // Init. MapTuple lhsTuple = lhsIter.next(); Iterator<MapTuple> rhsIter = _rhsStore.getGroupIterator(fullBatchName, key); // Iterate every possiblity.. while(rhsIter.hasNext()) { _collector.emit(joinTuples(lhsTuple, rhsIter.next())); } } } else if (_joinType == JoinType.OUTER) { throw new NotImplementedException("TODO"); // The outer join isn't necessarily complex, but 'done is better than perfect', and I'd // rather come back to it when we have use cases demanding it. Future notes: // - Just iterate lhs side and copy the LEFT join code above // - Then (nested) iterate the rhs side and pretty much copy the RIGHT join code above as well. } } catch (MotherbrainException ex) { handleLoopError(ex); } catch(Throwable e) { handleFatalError(e); } finally { // Tell the store we can release it's state _lhsStore.deleteGroup(fullBatchName, key); _rhsStore.deleteGroup(fullBatchName, key); markEndActivity(); } } catch(FakeLocalException e) { e.printAndWait(); } } /** * @throws InterruptedException * @throws OperationException * @throws MissingFieldException * @throws AggregationException * */ @Override public void handleConsume(Object batch, MapTuple tuple, String sourceStream, OutputCollector c) throws InterruptedException, MotherbrainException { // Determine what stream we're working with... if (sourceStream.equalsIgnoreCase(this._lhsStream)) { // LHS if (_outerAggregateLogBackoff.tick()) { _operationLogger.writeLog("[sampled #" + _outerAggregateLogBackoff.counter() +"] receiving LHS join tuple: " + tuple, OperationLogger.LogPriority.IPC); } this._lhsStore.addToGroup(storeKeyPrefix(batch), this.getKey(this._lhsGroupFields, tuple), tuple); } else if (sourceStream.equalsIgnoreCase(this._rhsStream)) { // RHS if (_outerAggregateLogBackoff.tick()) { _operationLogger.writeLog("[sampled #" + _outerAggregateLogBackoff.counter() +"] receiving RHS join tuple: " + tuple, OperationLogger.LogPriority.IPC); } this._rhsStore.addToGroup(storeKeyPrefix(batch), this.getKey(this._rhsGroupFields, tuple), tuple); } else { throw new OperationException(this, "unknown stream: " + sourceStream).setUserMessage("The stream '"+sourceStream+"' does not exist for the operation '"+namespaceName()+"'."); } } public Fields lhsJoinFields() { return _lhsGroupFields; } public Fields rhsJoinFields() { return _rhsGroupFields; } public Operation lhsPrevOperation() { return lhsPrevConnection().source(); } public Operation rhsPrevOperation() { return rhsPrevConnection().source(); } public Connection lhsPrevConnection() { for(Connection c : this.getTopFlow().graph().nonLoopConnectionsTo(this)) { if (c.streamName().equals(this._lhsStream)) { return c; } } throw new IllegalStateException("cannot find join stream!"); } public Connection rhsPrevConnection() { for(Connection c : this.getTopFlow().graph().nonLoopConnectionsTo(this)) { if (c.streamName().equals(this._rhsStream)) { return c; } } throw new IllegalStateException("cannot find join stream!"); } @Override public Operation prevNonLoopOperation() throws OperationException { throw new OperationException(this, "the caller must special-case for joins"); } }