package com.linkedin.parseq.batching;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ConcurrentMap;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.linkedin.parseq.Context;
import com.linkedin.parseq.Task;
import com.linkedin.parseq.batching.BatchImpl.BatchBuilder;
import com.linkedin.parseq.batching.BatchImpl.BatchEntry;
import com.linkedin.parseq.batching.BatchImpl.BatchPromise;
import com.linkedin.parseq.internal.ContextImpl;
import com.linkedin.parseq.internal.PlanContext;
import com.linkedin.parseq.promise.CountDownPromiseListener;
import com.linkedin.parseq.promise.PromiseListener;
import com.linkedin.parseq.promise.Promises;
import com.linkedin.parseq.promise.SettablePromise;
import com.linkedin.parseq.trace.Relationship;
import com.linkedin.parseq.trace.ShallowTraceBuilder;
import com.linkedin.parseq.trace.TraceBuilder;
/**
* {@code BatchingStrategy} helps build "batching clients" in ParSeq. "Client" means an object that given {@code K key}
* provides a task that returns {@code T value}. "Batching" means that it can group together keys to resolve values
* in batches. The benefit of this approach is that batching happens transparently in the background and user's code
* does not have to deal with logic needed to implement batching.
* <p>
* Example of a batching client might be ParSeq client for a key-value store that provides batch get operation. For
* the sake of simplicity of the example we are using dummy, synchronous key-value store interface:
* <blockquote><pre>
* interface KVStore {
* String get(Long key);
* Map{@code <Long, String>} batchGet(Collection{@code <Long>} keys);
* }
* </pre></blockquote>
*
* We can then implement a {@code BatchingStrategy} in the following way:
* <blockquote><pre>
* public static class BatchingKVStoreClient extends BatchingStrategy{@code <Integer, Long, String>} {
* private final KVStore _store;
* public BatchingKVStoreClient(KVStore store) {
* _store = store;
* }
*
* {@code @Override}
* public void executeBatch(Integer group, Batch{@code <Long, String>} batch) {
* Map{@code <Long, String>} batchResult = _store.batchGet(batch.keys());
* batch.foreach((key, promise) {@code ->} promise.done(batchResult.get(key)));
* }
*
* {@code @Override}
* public Integer classify(Long entry) {
* return 0;
* }
* }
* </pre></blockquote>
*
* In above example there is an assumption that all keys can be grouped together. This is why method {@code classify()}
* trivially returns a constant {@code 0}. In practice {@code classify()} returns a group for a key. Keys that have
* the same group will be batched together.
* <p>
* The interaction between ParSeq and {@code BatchingStrategy} is the following:
* <ol>
* <li>{@code batchable(String desc, K key)} is invoked to create Task instance</li>
* <li>Plan is started by {@code Engine.run()}</li>
* <li>When Task returned by {@code batchable(String desc, K key)} is started, the key {@code K} is remembered by a {@code BatchingStrategy}</li>
* <li>When Plan can't make immediate progress {@code BatchingStrategy} will be invoked to run batchable operations:
* <ol>
* <li>Every {@code K key} is classified using {@code classify(K key)} method</li>
* <li>Keys, together with adequate Promises, are batched together based on {@code G group} returned by previous step</li>
* <li>Method {@code executeBatch(G group, Batch<K, T> batch)} is invoked for every batch</li>
* </ol>
* {@code executeBatch(G group, Batch<K, T> batch)} invocations are executed
* in the context of their own Task instances with description given by {@code getBatchName(G group, Batch<K, T> batch)}.
* Implementation of {@code BatchingStrategy} has to be fast because it is executed sequentially with respect to tasks belonging
* to the plan. It means that no other task will be executed until {@code BatchingStrategy} completes. Typically classify(K key)
* is a synchronous and fast operation whilst {@code executeBatch(G group, Batch<K, T> batch)} returns quickly and completes
* promises asynchronously.
* </ol>
*
* @author Jaroslaw Odzga (jodzga@linkedin.com)
*
* @param <G> Type of a Group
* @param <K> Type of a Key
* @param <T> Type of a Value
*
* @see SimpleBatchingStrategy
* @see TaskBatchingStrategy
*/
public abstract class BatchingStrategy<G, K, T> {
public static final int DEFAULT_MAX_BATCH_SIZE = 1024;
private static final Logger LOGGER = LoggerFactory.getLogger(BatchingStrategy.class);
private static final int DEFAULT_KEY_SIZE = 1;
private final ConcurrentMap<Long, GroupBatchBuilder> _batches =
new ConcurrentHashMap<>();
private final BatchSizeMetric _batchSizeMetric = new BatchSizeMetric();
private final BatchAggregationTimeMetric _batchAggregationTimeMetric = new BatchAggregationTimeMetric();
/**
* This method returns Task that returns value for a single key allowing this strategy to batch operations.
* @param desc description of the task
* @param key key
* @return Task that returns value for a single key allowing this strategy to batch operations
*/
public Task<T> batchable(final String desc, final K key) {
Task<T> batchableTask = Task.async(desc, ctx -> {
final BatchPromise<T> result = new BatchPromise<>();
final Long planId = ctx.getPlanId();
final GroupBatchBuilder builder = _batches.computeIfAbsent(planId, k -> new GroupBatchBuilder());
final G group = classify(key);
Batch<K, T> fullBatch = builder.add(group, key, ctx.getShallowTraceBuilder(), result);
if (fullBatch != null) {
try {
ctx.run(taskForBatch(group, fullBatch, true));
} catch (Throwable t) {
//we don't care if some of promises have already been completed
//all we care is that all remaining promises have been failed
fullBatch.failAll(t);
}
}
return result;
});
batchableTask.getShallowTraceBuilder().setTaskType("batched");
return batchableTask;
}
/**
* This method returns Task that returns value for a single key allowing this strategy to batch operations.
* @param key key
* @return Task that returns value for a single key allowing this strategy to batch operations
*/
public Task<T> batchable(final K key) {
return batchable("batchableTaskForKey: " + key.toString(), key);
}
private Task<?> taskForBatch(final G group, final Batch<K, T> batch, final boolean hasParent) {
_batchSizeMetric.record(batch.batchSize());
if (LOGGER.isDebugEnabled()) {
LOGGER.debug(debugInfo(group, batch));
}
return Task.async(getBatchName(group, batch), ctx -> {
final SettablePromise<T> result = Promises.settable();
final PromiseListener<T> countDownListener =
new CountDownPromiseListener<T>(batch.keySize(), result, null);
boolean assignedParent = false;
final TraceBuilder traceBuilder = ctx.getTraceBuilder();
for (BatchEntry<T> entry : batch.values()) {
for (ShallowTraceBuilder shallowTraceBuilder: entry.getShallowTraceBuilders()) {
if (!assignedParent && !hasParent) {
traceBuilder.addRelationship(Relationship.CHILD_OF, ctx.getShallowTraceBuilder(), shallowTraceBuilder);
assignedParent = true;
} else {
traceBuilder.addRelationship(Relationship.POTENTIAL_CHILD_OF, ctx.getShallowTraceBuilder(), shallowTraceBuilder);
}
}
BatchPromise<T> promise = entry.getPromise();
promise.getInternal().addListener(countDownListener);
result.addListener(v -> promise.trigger());
}
try {
executeBatchWithContext(group, batch, ctx);
} catch (Throwable t) {
batch.failAll(t);
}
ctx.getShallowTraceBuilder().setSystemHidden(true);
return result;
});
}
private void runBatch(final PlanContext planContext, G group, final Batch<K, T> batch) {
try {
new ContextImpl(planContext, taskForBatch(group, batch, false)).runTask();
} catch (Throwable t) {
//we don't care if some of promises have already been completed
//all we care is that all remaining promises have been failed
batch.failAll(t);
}
}
void handleBatch(final PlanContext planContext) {
final GroupBatchBuilder batchBuilder = _batches.remove(planContext.getId());
if (batchBuilder != null) {
batchBuilder.batches().forEach((group, builder) -> runBatch(planContext, group, builder.build()));
}
}
private String debugInfo(G group, Batch<K, T> batch) {
StringBuilder debugInfo = new StringBuilder("\n");
debugInfo.append("group: ")
.append(group)
.append("\n")
.append("batch keys: \n");
batch.keys().forEach(key -> {
debugInfo.append(" ").append(key).append("\n");
});
return debugInfo.toString();
}
public BatchSizeMetric getBatchSizeMetric() {
return _batchSizeMetric;
}
public BatchAggregationTimeMetric getBatchAggregationTimeMetric() {
return _batchAggregationTimeMetric;
}
/**
* This method will be called for every {@code Batch}.
* Implementation of this method must make sure that all {@code SettablePromise} contained in the {@code Batch}
* will eventually be resolved - typically asynchronously. Failing to eventually resolve any
* of the promises may lead to plan that never completes i.e. appears to hung and may lead to
* a memory leak.
* @param group group that represents the batch
* @param batch batch contains collection of {@code SettablePromise} that eventually need to be resolved - typically asynchronously
*/
public abstract void executeBatch(G group, Batch<K, T> batch);
protected void executeBatchWithContext(G group, Batch<K, T> batch, Context ctx) {
executeBatch(group, batch);
}
/**
* Classify the {@code K Key} and by doing so assign it to a {@code G group}.
* If two keys are classified by the same group then they will belong to the same {@code Batch}.
* This method needs to be thread safe.
* @param key key to be classified
* @return Group that represents a batch the key will belong to
*/
public abstract G classify(K key);
/**
* Overriding this method allows specifying maximum batch size for a given group.
* Default value is {@value #DEFAULT_MAX_BATCH_SIZE}.
* @param group group for which maximum batch size needs to be decided
* @return maximum batch size for a given group
*/
public int maxBatchSizeForGroup(G group) {
return DEFAULT_MAX_BATCH_SIZE;
}
/**
* Overriding this method allows specifying size of the key for a given group.
* Default value is 1. This method is used when calculating batch size and making sure
* that it does not exceed max batch size for a group.
* @param group group
* @return max batch size for this group
* @see #maxBatchSizeForGroup(Object)
*/
public int keySize(G group, K key) {
return DEFAULT_KEY_SIZE;
}
/**
* Overriding this method allows providing custom name for a batch. Name will appear in the
* ParSeq trace as a description of the task that executes the batch.
* @param batch batch to be described
* @param group group to be described
* @return name for the batch and group
*/
public String getBatchName(G group, Batch<K, T> batch) {
return "batch(keys: " + batch.keySize() + ", size: " + batch.batchSize() + ")";
}
private class GroupBatchBuilder {
private final Map<G, BatchBuilder<K, T>> _batchesByGroup =
new HashMap<>();
/**
* Adds new entry to a batch specified by a given group and returns
* list of batches that can be executed or null if batch is still not full.
* @return list of batches that can be executed or null otherwise
*/
Batch<K, T> add(G group, K key, ShallowTraceBuilder traceBuilder, BatchPromise<T> promise) {
final int size = keySize(group, key);
BatchBuilder<K, T> builder =
_batchesByGroup.computeIfAbsent(group, x -> new BatchBuilder<>(maxBatchSizeForGroup(group), _batchAggregationTimeMetric));
//invariant: builder is not full - it is maintained by the fact that max batch size >= 1
//and that we remove builder from the map after adding to it entry that makes it full
if (builder.add(key, traceBuilder, promise, size)) {
if (builder.isFull()) {
_batchesByGroup.remove(group);
return builder.build();
} else {
return null;
}
} else {
BatchBuilder<K, T> newBuilder = new BatchBuilder<>(maxBatchSizeForGroup(group), _batchAggregationTimeMetric);
//this will be successful because builder is empty and first add is always successful as per builder contract
newBuilder.add(key, traceBuilder, promise, size);
if (newBuilder.isFull()) {
return newBuilder.build();
} else {
//return larger batch
if (builder.batchSize() > newBuilder.batchSize()) {
_batchesByGroup.put(group, newBuilder);
return builder.build();
} else {
return newBuilder.build();
}
}
}
}
Map<G, BatchBuilder<K, T>> batches() {
return _batchesByGroup;
}
}
}