package org.rakam.kume.service.ringmap;
import org.rakam.kume.Cluster;
import org.rakam.kume.KryoSerializable;
import org.rakam.kume.MigrationListener;
import org.rakam.kume.service.PausableService;
import org.rakam.kume.transport.OperationContext;
import org.rakam.kume.transport.Request;
import org.rakam.kume.util.ConsistentHashRing;
import org.rakam.kume.Member;
import org.rakam.kume.MembershipListener;
import org.rakam.kume.ServiceContext;
import org.rakam.kume.util.FutureUtil.MultipleFutureListener;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.lang.reflect.Array;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Random;
import java.util.Set;
import java.util.concurrent.CompletableFuture;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.atomic.AtomicReference;
import java.util.function.BiFunction;
import java.util.function.Supplier;
import java.util.stream.Collectors;
import static org.rakam.kume.util.ConsistentHashRing.hash;
public abstract class AbstractRingMap<C extends AbstractRingMap, M extends Map, K, V> extends PausableService<C>
implements MembershipListener {
final static Logger LOGGER = LoggerFactory.getLogger(AbstractRingMap.class);
private final MapMergePolicy<V> mergePolicy;
private final int replicationFactor;
private final Supplier<M> mapSupplier;
protected M[] map;
protected int[] bucketIds;
private ConsistentHashRing ring;
private static Random random = new Random();
private final int bucketCount;
protected final Member localMember;
private LinkedList<MigrationListener> migrationListeners = new LinkedList<>();
Map<ConsistentHashRing.TokenRange, Map<K, V>> dataWaitingForMigration = new HashMap<>();
public AbstractRingMap(ServiceContext<C> serviceContext, Supplier<M> mapSupplier, MapMergePolicy<V> mergePolicy, int bucketCount, int replicationFactor) {
super(serviceContext);
this.mergePolicy = mergePolicy;
this.replicationFactor = replicationFactor;
this.mapSupplier = mapSupplier;
this.bucketCount = bucketCount;
Cluster cluster = getContext().getCluster();
cluster.addMembershipListener(this);
localMember = cluster.getLocalMember();
// if we're the master node and initializing the service, then it's a new service.
if (cluster.getMaster().equals(localMember)) {
ConsistentHashRing newRing = new ConsistentHashRing(cluster.getMembers(), bucketCount, replicationFactor);
ring = newRing;
bucketIds = createBucketForRing(newRing);
map = createEmptyMap(ring);
} else {
CompletableFuture<ConsistentHashRing> ringFuture = getContext().ask(cluster.getMaster(), (service, ctx0) -> ctx0.reply(service.getRing()));
ConsistentHashRing ring = ringFuture.join();
setRing(ring);
}
}
protected int[] createBucketForRing(ConsistentHashRing ring) {
return ring.getBuckets().entrySet().stream()
.filter(entry -> entry.getValue().contains(localMember))
.mapToInt(entry -> entry.getKey().id).sorted().toArray();
}
public int getBucketCount() {
return bucketCount;
}
protected M[] createEmptyMap(ConsistentHashRing ring) {
Member localMember = getContext().getCluster().getLocalMember();
int count = (int) ring.getBuckets().entrySet().stream()
.filter(x -> x.getValue().contains(localMember)).count();
Class clazz;
M sample = null;
if (map != null) {
clazz = map[0].getClass();
} else {
sample = mapSupplier.get();
clazz = sample.getClass();
}
// hacky way to create generic array
M[] arr = (M[]) Array.newInstance(clazz, count);
int i = 0;
if (sample != null) {
i = 1;
arr[0] = sample;
}
for (; i < count; ++i) {
arr[i] = mapSupplier.get();
}
return arr;
}
public void logOwnedBuckets() {
StringBuilder str = new StringBuilder();
str.append("ownedBuckets[" + map.length + "]: ");
int i = 0;
for (Map m : map) {
str.append("[" + m.size() + "]");
i += m.size();
}
str.append(" = " + i);
LOGGER.debug(str.toString());
}
@Override
public synchronized void memberAdded(Member member) {
if (ring.getMembers().contains(member)) {
// it means we joined this cluster already
// via requesting ring from an existing node.
return;
}
LOGGER.debug("Adding member {} to existing cluster of {} nodes.", member, getContext().getCluster().getMembers().size());
ConsistentHashRing newRing = ring.addNode(member);
changeRing(newRing).join();
}
// @Override
// public void clusterChanged() {
// ConsistentHashRing remoteRing = getContext()
// .ask(getContext().getCluster().getMaster(), (service, ctx1) -> ctx1.reply(service.getRing()), ConsistentHashRing.class)
// .join();
//
// ConsistentHashRing newRing;
// if (remoteRing.getMembers().contains(localMember)) {
// ring = remoteRing.removeNode(localMember);
// newRing = remoteRing;
// } else {
// ring = remoteRing;
// newRing = remoteRing.addNode(localMember);
// }
// // we don't care about the old entries because the old ring doesn't have this local member so all operations will be remote.
// // the old entries will be added to the cluster when the new ring is set.
// changeRing(newRing).thenAccept(x -> {
// // maybe we can parallelize this operation in order to make it fast
//// Arrays.stream(oldBuckets).forEach(map -> map.forEach(this::put));
// Set<Member> members = ring.getMembers();
// LOGGER.info("Joined a cluster which has {} members {}.", members.size(), members);
// }
// ).join();
//
// }
private synchronized CompletableFuture<Void> setRing(ConsistentHashRing newRing) {
M[] newMap = createEmptyMap(newRing);
int[] newBucketIds = createBucketForRing(newRing);
ArrayList<CompletableFuture> migrations = new ArrayList<>();
for (Map.Entry<ConsistentHashRing.TokenRange, List<Member>> entry : newRing.getBuckets().entrySet()) {
ConsistentHashRing.TokenRange range = entry.getKey();
List<Member> members = entry.getValue();
if (members.contains(localMember)) {
List<Member> bucketMembers = entry.getValue();
Member ownerMember = bucketMembers.get(members.indexOf(localMember) % bucketMembers.size());
LOGGER.debug("asking entries [{}, {}] from {}", range.start, range.end, ownerMember);
CompletableFuture<Map<K, V>> ask = getContext().ask(ownerMember, new ChangeRingRequest(range.start, range.end));
CompletableFuture<Void> f = ask
.thenAccept(data -> {
newMap[Arrays.binarySearch(newBucketIds, range.id)].putAll(data);
if (!ownerMember.equals(localMember))
LOGGER.debug("{} elements in token[{} - {}] moved from {} to {}", data.size(), range.start, range.end, ownerMember, localMember);
});
migrations.add(f);
}
}
if (migrations.size() > 0) {
migrationListeners.forEach(l -> getContext().eventLoop().execute(() -> l.migrationStart(localMember)));
}
// resume when all migrations completed
return CompletableFuture.allOf(migrations.toArray(new CompletableFuture[migrations.size()]))
.thenRun(() -> {
LOGGER.debug("{} migration completed. New ring has {} buckets in member {}",
migrations.size(), newRing.getBuckets().size(), localMember);
synchronized (getContext()) {
bucketIds = newBucketIds;
map = newMap;
ring = newRing;
}
migrationListeners.forEach(l -> getContext().eventLoop().execute(() -> l.migrationEnd(localMember)));
logOwnedBuckets();
});
}
private CompletableFuture<Void> changeRing(ConsistentHashRing newRing) {
ConsistentHashRing oldRing = ring;
M[] newMap = createEmptyMap(newRing);
int[] newBucketIds = createBucketForRing(newRing);
ArrayList<CompletableFuture> migrations = new ArrayList<>();
for (Map.Entry<ConsistentHashRing.TokenRange, List<Member>> entry : newRing.getBuckets().entrySet()) {
ConsistentHashRing.TokenRange range = entry.getKey();
List<Member> members = entry.getValue();
int start = oldRing.findBucketIdFromToken(range.start);
int end = oldRing.findBucketIdFromToken(range.end - 1);
if (end - start < 0) end = end + oldRing.getBucketCount();
if (members.contains(localMember)) {
long cursor = range.start;
for (int bucketId = start; bucketId <= end; bucketId++) {
bucketId %= oldRing.getBucketCount();
ConsistentHashRing.Bucket oldBucket = oldRing.getBucket(bucketId);
List<Member> oldBucketMembers = oldBucket.members;
Member ownerMember;
if (oldBucketMembers.contains(localMember)) {
ownerMember = localMember;
} else {
int index = members.indexOf(localMember) % oldBucketMembers.size();
ownerMember = oldBucketMembers.get(index);
}
long queryStartToken = cursor;
long nextBucketToken = oldRing.getBucket(bucketId + 1).token;
long queryEndToken = (range.end - cursor > nextBucketToken - cursor) ? nextBucketToken - 1 : range.end - 1;
cursor = queryEndToken;
boolean isLocalOp = ownerMember.equals(localMember);
if (!isLocalOp)
LOGGER.debug("asking entries [{}, {}] from {}", queryStartToken, ownerMember);
else
LOGGER.trace("asking entries [{}, {}] from {}", queryStartToken, ownerMember);
CompletableFuture<Map<K, V>> ask = getContext().ask(ownerMember, new ChangeRingRequest(queryStartToken, queryEndToken, oldRing));
CompletableFuture<Void> f = ask
.thenAccept(data -> {
int startBucket = newRing.findBucketIdFromToken(queryStartToken);
int nextBucket = newRing.findBucketIdFromToken(queryEndToken);
if (startBucket == nextBucket) {
newMap[Arrays.binarySearch(newBucketIds, startBucket)].putAll(data);
} else {
data.forEach((key, value) -> {
int i = Arrays.binarySearch(newBucketIds, newRing.findBucketIdFromToken(ConsistentHashRing.hash(key)));
if (i >= 0) {
Map<K, V> partition = newMap[i];
partition.put(key, value);
}
});
}
if (!ownerMember.equals(localMember))
LOGGER.debug("{} elements in token[{} - {}] moved from {} to {}", data.size(), queryStartToken, queryEndToken, ownerMember, localMember);
});
migrations.add(f);
}
} else {
for (int bucketId = start; bucketId <= end; bucketId++) {
// we don't remove the old entries because
// the new member will request the entries and remove them via migration request,
// so it allows us to avoid the requirement for consensus between nodes when changing ring.
dataWaitingForMigration.put(range, getBucket(bucketId));
}
}
}
if (migrations.size() > 0) {
migrationListeners.forEach(l -> getContext().eventLoop().execute(() -> l.migrationStart(localMember)));
}
// resume when all migrations completed
return CompletableFuture.allOf(migrations.toArray(new CompletableFuture[migrations.size()]))
.thenRun(() -> {
LOGGER.debug("{} migration completed. New ring has {} buckets in member {}",
migrations.size(), newRing.getBuckets().size(), localMember);
synchronized (getContext()) {
bucketIds = newBucketIds;
System.out.println(newMap);
map = newMap;
ring = newRing;
}
migrationListeners.forEach(l -> getContext().eventLoop().execute(() -> l.migrationEnd(localMember)));
logOwnedBuckets();
});
}
protected Map<K, V> getBucket(int bucketId) {
return map[getPartitionId(bucketId)];
}
protected int getPartitionId(int bucketId) {
int i = Arrays.binarySearch(bucketIds, bucketId);
if (i < 0)
throw new IllegalArgumentException("bucket is not owned by this member");
return i;
}
@Override
public synchronized void memberRemoved(Member member) {
if (isPaused()) {
addQueueIfPaused(() -> memberRemoved(member));
} else {
changeRing(ring.removeNode(member));
}
}
@Override
public void clusterMerged(Set<Member> newMembers) {
}
public void onClose() {
Arrays.stream(map).forEach(x -> x.clear());
}
public CompletableFuture<Void> putAll(Map<K, V> fromMap) {
Map<Member, List<Map.Entry>> m = new HashMap<>();
for (Map.Entry entry : fromMap.entrySet()) {
for (Member member : ring.findBucketFromToken(ConsistentHashRing.hash(entry.getKey())).members) {
m.getOrDefault(member, new ArrayList()).add(entry);
}
}
m.forEach((key, value) -> getContext().send(key, new PutAllRequest(value)));
CompletableFuture[] completableFutures = fromMap.entrySet().stream()
.map(entry -> put(entry.getKey(), entry.getValue()))
.toArray(CompletableFuture[]::new);
return CompletableFuture.allOf(completableFutures);
}
public CompletableFuture<Void> put(K key, V val) {
int bucketId = ring.findBucketIdFromToken(ConsistentHashRing.hash(key));
ConsistentHashRing.Bucket bucket = ring.getBucket(bucketId);
MultipleFutureListener listener = new MultipleFutureListener((bucket.members.size() / 2) + 1);
for (Member next : bucket.members) {
if (next.equals(localMember)) {
putLocal(key, val);
listener.increment();
} else {
listener.listen(getContext().ask(next, new PutMapOperation(key, val)));
}
}
return listener.get();
}
public CompletableFuture<V> get(K key) {
int bucketId = ring.findBucketId(key);
ConsistentHashRing.Bucket bucket = ring.getBucket(bucketId);
ArrayList<Member> members = bucket.members;
if (members.contains(localMember)) {
return CompletableFuture.completedFuture(getBucket(bucketId).get(key));
}
return getContext().ask(members.get(random.nextInt(members.size())),
new GetRequest(this, key));
}
public CompletableFuture<V> syncAndGet(String key) {
int bucketId = ring.findBucketId(key);
ConsistentHashRing.Bucket bucket = ring.getBucket(bucketId);
AtomicReference<V> merged = new AtomicReference<>();
CompletableFuture<Void>[] res = new CompletableFuture[bucket.members.size()];
for (int i = 0; i < bucket.members.size(); i++) {
CompletableFuture<V> ask = getContext().ask(bucket.members.get(i), new GetRequest(this, key));
res[i] = ask.thenAccept(x -> {
V v = merged.get();
if (v == null) {
merged.set(x);
} else {
merged.set(mergePolicy.merge(v, x));
}
});
}
return CompletableFuture.allOf(res).thenApply(x -> merged.get());
}
public int getLocalSize() {
return Arrays.stream(map).collect(Collectors.summingInt(value -> value.size()));
}
public CompletableFuture<Map<Member, Integer>> size() {
Request<C, Integer> req = (service, ctx) -> ctx.reply(service.getLocalSize());
Map<Member, CompletableFuture<Integer>> resultMap = getContext().askAllMembers(req);
Map<Member, Integer> m = new ConcurrentHashMap<>(getContext().getCluster().getMembers().size());
m.put(localMember, getLocalSize());
CompletableFuture<Map<Member, Integer>> future = new CompletableFuture<>();
resultMap.forEach((key, f) -> f.thenAccept(x -> {
m.put(key, x);
resultMap.remove(key);
if (resultMap.size() == 0) {
future.complete(m);
}
}));
return future;
}
public ConsistentHashRing getRing() {
return ring;
}
public int getReplicationFactor() {
return replicationFactor;
}
public void listenMigrations(MigrationListener migrationListener) {
migrationListeners.add(migrationListener);
}
public <R> CompletableFuture<R> execute(K key, BiFunction<K, Modifiable<V>, R> execute) {
int bucketId = ring.findBucketId(key);
ConsistentHashRing.Bucket bucket = ring.getBucket(bucketId);
ArrayList<Member> members = bucket.members;
return getContext().ask(members.get(0), (service, ctx) -> {
Map<K, V> partition = service.getBucket(service.getRing().findBucketId(key));
Modifiable<V> vModifiable = new Modifiable<>(partition.get(key));
R apply = execute.apply(key, vModifiable);
if(vModifiable.changed()) {
partition.put(key, vModifiable.value());
}
ctx.reply(apply);
});
}
@KryoSerializable(id=10)
public static class PutMapOperation implements Request<AbstractRingMap, Void> {
Object key;
Object value;
public PutMapOperation(Object key, Object value) {
this.key = key;
this.value = value;
}
@Override
public void run(AbstractRingMap service, OperationContext ctx) {
service.putLocal(key, value);
ctx.reply(null);
}
}
protected void putLocal(K key, V value) {
Map<K, V> partition = getBucket(ring.findBucketId(key));
if (partition == null) {
LOGGER.error("Discarded put request for key {} because node doesn't own that token.", key);
} else {
partition.put(key, value);
}
}
}