/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.cassandra.utils.concurrent;
import java.util.concurrent.atomic.AtomicIntegerFieldUpdater;
/**
* <p>A class for providing synchronization between producers and consumers that do not
* communicate directly with each other, but where the consumers need to process their
* work in contiguous batches. In particular this is useful for both CommitLog and Memtable
* where the producers (writing threads) are modifying a structure that the consumer
* (flush executor) only batch syncs, but needs to know what 'position' the work is at
* for co-ordination with other processes,
*
* <p>The typical usage is something like:
* <pre>
public final class ExampleShared
{
final OpOrder order = new OpOrder();
volatile SharedState state;
static class SharedState
{
volatile Barrier barrier;
// ...
}
public void consume()
{
SharedState state = this.state;
state.setReplacement(new State())
state.doSomethingToPrepareForBarrier();
state.barrier = order.newBarrier();
// seal() MUST be called after newBarrier() else barrier.isAfter()
// will always return true, and barrier.await() will fail
state.barrier.issue();
// wait for all producer work started prior to the barrier to complete
state.barrier.await();
// change the shared state to its replacement, as the current state will no longer be used by producers
this.state = state.getReplacement();
state.doSomethingWithExclusiveAccess();
}
public void produce()
{
try (Group opGroup = order.start())
{
SharedState s = state;
while (s.barrier != null && !s.barrier.isAfter(opGroup))
s = s.getReplacement();
s.doProduceWork();
}
}
}
* </pre>
*/
public class OpOrder
{
/**
* Constant that when an Ordered.running is equal to, indicates the Ordered is complete
*/
private static final int FINISHED = -1;
/**
* A linked list starting with the most recent Ordered object, i.e. the one we should start new operations from,
* with (prev) links to any incomplete Ordered instances, and (next) links to any potential future Ordered instances.
* Once all operations started against an Ordered instance and its ancestors have been finished the next instance
* will unlink this one
*/
private volatile Group current = new Group();
/**
* Start an operation against this OpOrder.
* Once the operation is completed Ordered.close() MUST be called EXACTLY once for this operation.
*
* @return the Ordered instance that manages this OpOrder
*/
public Group start()
{
while (true)
{
Group current = this.current;
if (current.register())
return current;
}
}
/**
* Creates a new barrier. The barrier is only a placeholder until barrier.issue() is called on it,
* after which all new operations will start against a new Group that will not be accepted
* by barrier.isAfter(), and barrier.await() will return only once all operations started prior to the issue
* have completed.
*
* @return
*/
public Barrier newBarrier()
{
return new Barrier();
}
public Group getCurrent()
{
return current;
}
public void awaitNewBarrier()
{
Barrier barrier = newBarrier();
barrier.issue();
barrier.await();
}
/**
* Represents a group of identically ordered operations, i.e. all operations started in the interval between
* two barrier issuances. For each register() call this is returned, close() must be called exactly once.
* It should be treated like taking a lock().
*/
public static final class Group implements Comparable<Group>, AutoCloseable
{
/**
* In general this class goes through the following stages:
* 1) LIVE: many calls to register() and close()
* 2) FINISHING: a call to expire() (after a barrier issue), means calls to register() will now fail,
* and we are now 'in the past' (new operations will be started against a new Ordered)
* 3) FINISHED: once the last close() is called, this Ordered is done. We call unlink().
* 4) ZOMBIE: all our operations are finished, but some operations against an earlier Ordered are still
* running, or tidying up, so unlink() fails to remove us
* 5) COMPLETE: all operations started on or before us are FINISHED (and COMPLETE), so we are unlinked
* <p/>
* Another parallel states is ISBLOCKING:
* <p/>
* isBlocking => a barrier that is waiting on us (either directly, or via a future Ordered) is blocking general
* progress. This state is entered by calling Barrier.markBlocking(). If the running operations are blocked
* on a Signal that is also registered with the isBlockingSignal (probably through isSafeBlockingSignal)
* then they will be notified that they are blocking forward progress, and may take action to avoid that.
*/
private volatile Group prev, next;
private final long id; // monotonically increasing id for compareTo()
private volatile int running = 0; // number of operations currently running. < 0 means we're expired, and the count of tasks still running is -(running + 1)
private volatile boolean isBlocking; // indicates running operations are blocking future barriers
private final WaitQueue isBlockingSignal = new WaitQueue(); // signal to wait on to indicate isBlocking is true
private final WaitQueue waiting = new WaitQueue(); // signal to wait on for completion
static final AtomicIntegerFieldUpdater<Group> runningUpdater = AtomicIntegerFieldUpdater.newUpdater(Group.class, "running");
// constructs first instance only
private Group()
{
this.id = 0;
}
private Group(Group prev)
{
this.id = prev.id + 1;
this.prev = prev;
}
// prevents any further operations starting against this Ordered instance
// if there are no running operations, calls unlink; otherwise, we let the last op to close call it.
// this means issue() won't have to block for ops to finish.
private void expire()
{
while (true)
{
int current = running;
if (current < 0)
throw new IllegalStateException();
if (runningUpdater.compareAndSet(this, current, -1 - current))
{
// if we're already finished (no running ops), unlink ourselves
if (current == 0)
unlink();
return;
}
}
}
// attempts to start an operation against this Ordered instance, and returns true if successful.
private boolean register()
{
while (true)
{
int current = running;
if (current < 0)
return false;
if (runningUpdater.compareAndSet(this, current, current + 1))
return true;
}
}
/**
* To be called exactly once for each register() call this object is returned for, indicating the operation
* is complete
*/
public void close()
{
while (true)
{
int current = running;
if (current < 0)
{
if (runningUpdater.compareAndSet(this, current, current + 1))
{
if (current + 1 == FINISHED)
{
// if we're now finished, unlink ourselves
unlink();
}
return;
}
}
else if (runningUpdater.compareAndSet(this, current, current - 1))
{
return;
}
}
}
/**
* called once we know all operations started against this Ordered have completed,
* however we do not know if operations against its ancestors have completed, or
* if its descendants have completed ahead of it, so we attempt to create the longest
* chain from the oldest still linked Ordered. If we can't reach the oldest through
* an unbroken chain of completed Ordered, we abort, and leave the still completing
* ancestor to tidy up.
*/
private void unlink()
{
// walk back in time to find the start of the list
Group start = this;
while (true)
{
Group prev = start.prev;
if (prev == null)
break;
// if we haven't finished this Ordered yet abort and let it clean up when it's done
if (prev.running != FINISHED)
return;
start = prev;
}
// now walk forwards in time, in case we finished up late
Group end = this.next;
while (end.running == FINISHED)
end = end.next;
// now walk from first to last, unlinking the prev pointer and waking up any blocking threads
while (start != end)
{
Group next = start.next;
next.prev = null;
start.waiting.signalAll();
start = next;
}
}
/**
* @return true if a barrier we are behind is, or may be, blocking general progress,
* so we should try more aggressively to progress
*/
public boolean isBlocking()
{
return isBlocking;
}
/**
* register to be signalled when a barrier waiting on us is, or maybe, blocking general progress,
* so we should try more aggressively to progress
*/
public WaitQueue.Signal isBlockingSignal()
{
return isBlockingSignal.register();
}
/**
* wrap the provided signal to also be signalled if the operation gets marked blocking
*/
public WaitQueue.Signal isBlockingSignal(WaitQueue.Signal signal)
{
return WaitQueue.any(signal, isBlockingSignal());
}
public int compareTo(Group that)
{
// we deliberately use subtraction, as opposed to Long.compareTo() as we care about ordering
// not which is the smaller value, so this permits wrapping in the unlikely event we exhaust the long space
long c = this.id - that.id;
if (c > 0)
return 1;
else if (c < 0)
return -1;
else
return 0;
}
}
/**
* This class represents a synchronisation point providing ordering guarantees on operations started
* against the enclosing OpOrder. When issue() is called upon it (may only happen once per Barrier), the
* Barrier atomically partitions new operations from those already running (by expiring the current Group),
* and activates its isAfter() method
* which indicates if an operation was started before or after this partition. It offers methods to
* determine, or block until, all prior operations have finished, and a means to indicate to those operations
* that they are blocking forward progress. See {@link OpOrder} for idiomatic usage.
*/
public final class Barrier
{
// this Barrier was issued after all Group operations started against orderOnOrBefore
private volatile Group orderOnOrBefore;
/**
* @return true if @param group was started prior to the issuing of the barrier.
*
* (Until issue is called, always returns true, but if you rely on this behavior you are probably
* Doing It Wrong.)
*/
public boolean isAfter(Group group)
{
if (orderOnOrBefore == null)
return true;
// we subtract to permit wrapping round the full range of Long - so we only need to ensure
// there are never Long.MAX_VALUE * 2 total Group objects in existence at any one timem which will
// take care of itself
return orderOnOrBefore.id - group.id >= 0;
}
/**
* Issues (seals) the barrier, meaning no new operations may be issued against it, and expires the current
* Group. Must be called before await() for isAfter() to be properly synchronised.
*/
public void issue()
{
if (orderOnOrBefore != null)
throw new IllegalStateException("Can only call issue() once on each Barrier");
final Group current;
synchronized (OpOrder.this)
{
current = OpOrder.this.current;
orderOnOrBefore = current;
OpOrder.this.current = current.next = new Group(current);
}
current.expire();
}
/**
* Mark all prior operations as blocking, potentially signalling them to more aggressively make progress
*/
public void markBlocking()
{
Group current = orderOnOrBefore;
while (current != null)
{
current.isBlocking = true;
current.isBlockingSignal.signalAll();
current = current.prev;
}
}
/**
* Register to be signalled once allPriorOpsAreFinished() or allPriorOpsAreFinishedOrSafe() may return true
*/
public WaitQueue.Signal register()
{
return orderOnOrBefore.waiting.register();
}
/**
* @return true if all operations started prior to barrier.issue() have completed
*/
public boolean allPriorOpsAreFinished()
{
Group current = orderOnOrBefore;
if (current == null)
throw new IllegalStateException("This barrier needs to have issue() called on it before prior operations can complete");
if (current.next.prev == null)
return true;
return false;
}
/**
* wait for all operations started prior to issuing the barrier to complete
*/
public void await()
{
while (!allPriorOpsAreFinished())
{
WaitQueue.Signal signal = register();
if (allPriorOpsAreFinished())
{
signal.cancel();
return;
}
else
signal.awaitUninterruptibly();
}
assert orderOnOrBefore.running == FINISHED;
}
/**
* returns the Group we are waiting on - any Group with .compareTo(getSyncPoint()) <= 0
* must complete before await() returns
*/
public Group getSyncPoint()
{
return orderOnOrBefore;
}
}
}