/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package gobblin.util.limiter;
import java.io.Closeable;
import java.io.IOException;
import java.util.Collection;
import java.util.Comparator;
import java.util.Iterator;
import java.util.concurrent.Executors;
import java.util.concurrent.ScheduledExecutorService;
import java.util.concurrent.Semaphore;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicLong;
import java.util.concurrent.locks.Condition;
import java.util.concurrent.locks.Lock;
import java.util.concurrent.locks.ReentrantLock;
import com.codahale.metrics.Histogram;
import com.codahale.metrics.Timer;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Optional;
import com.google.common.base.Preconditions;
import com.google.common.base.Strings;
import com.google.common.collect.ImmutableSet;
import com.google.common.collect.Iterables;
import com.google.common.collect.Ordering;
import com.google.common.collect.TreeMultimap;
import com.linkedin.common.callback.Callback;
import com.linkedin.data.template.GetMode;
import com.linkedin.restli.client.Response;
import com.linkedin.restli.client.RestLiResponseException;
import com.linkedin.restli.common.HttpStatus;
import gobblin.metrics.MetricContext;
import gobblin.restli.throttling.PermitAllocation;
import gobblin.restli.throttling.PermitRequest;
import gobblin.util.ExecutorsUtils;
import gobblin.util.NoopCloseable;
import edu.umd.cs.findbugs.annotations.SuppressFBWarnings;
import javax.annotation.Nullable;
import javax.annotation.concurrent.NotThreadSafe;
import lombok.AccessLevel;
import lombok.Builder;
import lombok.Getter;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
/**
* An object that requests batches of permits from an external throttling server. It tries to hide the latency of doing
* external permit requests by requesting them in batches and preemptively requesting permits before the current ones
* are exhausted.
*/
@Slf4j
class BatchedPermitsRequester {
public static final String REST_REQUEST_TIMER = "limiter.restli.restRequestTimer";
public static final String REST_REQUEST_PERMITS_HISTOGRAM = "limiter.restli.restRequestPermitsHistogram";
/** These status codes are considered non-retriable. */
public static final ImmutableSet<Integer> NON_RETRIABLE_ERRORS = ImmutableSet.of(HttpStatus.S_403_FORBIDDEN.getCode(),
HttpStatus.S_422_UNPROCESSABLE_ENTITY.getCode());
/** Target frequency at which external requests are performed. */
public static final long DEFAULT_TARGET_MILLIS_BETWEEN_REQUESTS = 10000;
/** Maximum number of retries to communicate with the server. */
protected static final int MAX_RETRIES = 5;
private static final long RETRY_DELAY_ON_NON_RETRIABLE_EXCEPTION = 60000; // 10 minutes
private static final double MAX_DEPLETION_RATE = 1e20;
public static final int MAX_GROWTH_REQUEST = 2;
private static final ScheduledExecutorService SCHEDULE_EXECUTOR_SERVICE =
Executors.newScheduledThreadPool(1, ExecutorsUtils.newDaemonThreadFactory(Optional.of(log),
Optional.of(BatchedPermitsRequester.class.getName() + "-schedule-%d")));
@Getter(AccessLevel.PROTECTED) @VisibleForTesting
private final PermitBatchContainer permitBatchContainer;
private final Lock lock;
private final Condition newPermitsAvailable;
private final Semaphore requestSemaphore;
private final PermitRequest basePermitRequest;
private final RequestSender requestSender;
private final Timer restRequestTimer;
private final Histogram restRequestHistogram;
private volatile int retries = 0;
private final RetryStatus retryStatus;
private final SynchronizedAverager permitsOutstanding;
private final long targetMillisBetweenRequests;
@Builder
private BatchedPermitsRequester(String resourceId, String requestorIdentifier,
long targetMillisBetweenRequests, RequestSender requestSender, MetricContext metricContext) {
Preconditions.checkArgument(!Strings.isNullOrEmpty(resourceId), "Must provide a resource id.");
Preconditions.checkArgument(!Strings.isNullOrEmpty(requestorIdentifier), "Must provide a requestor identifier.");
this.permitBatchContainer = new PermitBatchContainer();
this.lock = new ReentrantLock();
this.newPermitsAvailable = this.lock.newCondition();
/** Ensures there is only one in-flight request at a time. */
this.requestSemaphore = new Semaphore(1);
/** Number of not-yet-satisfied permits. */
this.permitsOutstanding = new SynchronizedAverager();
this.targetMillisBetweenRequests = targetMillisBetweenRequests > 0 ? targetMillisBetweenRequests :
DEFAULT_TARGET_MILLIS_BETWEEN_REQUESTS;
this.requestSender = requestSender;
this.retryStatus = new RetryStatus();
this.basePermitRequest = new PermitRequest();
this.basePermitRequest.setResource(resourceId);
this.basePermitRequest.setRequestorIdentifier(requestorIdentifier);
this.restRequestTimer = metricContext == null ? null : metricContext.timer(REST_REQUEST_TIMER);
this.restRequestHistogram = metricContext == null ? null : metricContext.histogram(REST_REQUEST_PERMITS_HISTOGRAM);
}
/**
* Try to get a number of permits from this requester.
* @return true if permits were obtained successfully.
*/
public boolean getPermits(long permits) throws InterruptedException {
if (permits <= 0) {
return true;
}
this.permitsOutstanding.addEntryWithWeight(permits);
this.lock.lock();
try {
while (true) {
if (this.permitBatchContainer.tryTake(permits)) {
this.permitsOutstanding.removeEntryWithWeight(permits);
return true;
}
if (this.retryStatus.canRetryWithinMillis(10000)) {
maybeSendNewPermitRequest();
this.newPermitsAvailable.await();
} else {
break;
}
}
} finally {
this.lock.unlock();
}
return false;
}
/**
* Send a new permit request to the server.
*/
private void maybeSendNewPermitRequest() {
if (!this.requestSemaphore.tryAcquire()) {
return;
}
if (!this.retryStatus.canRetryNow()) {
this.requestSemaphore.release();
return;
}
try {
long permits = computeNextPermitRequest();
if (permits <= 0) {
this.requestSemaphore.release();
return;
}
PermitRequest permitRequest = this.basePermitRequest.copy();
permitRequest.setPermits(permits);
permitRequest.setMinPermits((long) this.permitsOutstanding.getAverageWeightOrZero());
if (BatchedPermitsRequester.this.restRequestHistogram != null) {
BatchedPermitsRequester.this.restRequestHistogram.update(permits);
}
log.debug("Sending permit request " + permitRequest);
this.requestSender.sendRequest(permitRequest, new AllocationCallback(
BatchedPermitsRequester.this.restRequestTimer == null ? NoopCloseable.INSTANCE :
BatchedPermitsRequester.this.restRequestTimer.time()));
} catch (CloneNotSupportedException cnse) {
// This should never happen.
this.requestSemaphore.release();
throw new RuntimeException(cnse);
}
}
/**
* @return the number of permits we should request in the next request.
*/
private long computeNextPermitRequest() {
long candidatePermits = 0;
long unsatisfiablePermits = this.permitsOutstanding.getTotalWeight() - this.permitBatchContainer.totalAvailablePermits;
if (unsatisfiablePermits > 0) {
candidatePermits = unsatisfiablePermits;
}
if (this.permitBatchContainer.batches.size() > 1) {
// If there are multiple batches in the queue, don't create a new request
return candidatePermits;
}
PermitBatch firstBatch = Iterables.getFirst(this.permitBatchContainer.batches.values(), null);
if (firstBatch != null) {
// If the current batch has more than 20% permits left, don't create a new request
if ((double) firstBatch.getPermits() / firstBatch.getInitialPermits() > 0.2) {
return candidatePermits;
}
double averageDepletionRate = firstBatch.getAverageDepletionRate();
long candidatePermitsByDepletion =
Math.min((long) (averageDepletionRate * this.targetMillisBetweenRequests), MAX_GROWTH_REQUEST *
firstBatch.getInitialPermits());
return Math.max(candidatePermits, candidatePermitsByDepletion);
} else {
return candidatePermits;
}
}
/**
* Callback for Rest request.
*/
@RequiredArgsConstructor
private class AllocationCallback implements Callback<Response<PermitAllocation>> {
private final Closeable timerContext;
@Override
public void onError(Throwable exc) {
BatchedPermitsRequester.this.lock.lock();
try {
if (exc instanceof RequestSender.NonRetriableException) {
nonRetriableFail(exc, "Encountered non retriable error. ");
}
if (exc instanceof RestLiResponseException) {
int errorCode = ((RestLiResponseException) exc).getStatus();
if (NON_RETRIABLE_ERRORS.contains(errorCode)) {
nonRetriableFail(exc, "Encountered non retriable error. HTTP response code: " + errorCode);
}
}
BatchedPermitsRequester.this.retries++;
if (BatchedPermitsRequester.this.retries >= MAX_RETRIES) {
nonRetriableFail(exc, "Too many failures trying to communicate with throttling service.");
} else {
BatchedPermitsRequester.this.requestSemaphore.release();
// retry
maybeSendNewPermitRequest();
}
} catch (Throwable t) {
log.error("Error on batched permits container.", t);
} finally {
BatchedPermitsRequester.this.lock.unlock();
try {
this.timerContext.close();
} catch (IOException ioe) {
// Do nothing
}
}
}
@Override
public void onSuccess(Response<PermitAllocation> result) {
BatchedPermitsRequester.this.retries = 0;
BatchedPermitsRequester.this.lock.lock();
try {
PermitAllocation allocation = result.getEntity();
log.debug("Received permit allocation " + allocation);
Long retryDelay = allocation.getMinRetryDelayMillis(GetMode.NULL);
if (retryDelay != null) {
BatchedPermitsRequester.this.retryStatus.blockRetries(retryDelay, null);
}
if (allocation.getPermits() > 0) {
BatchedPermitsRequester.this.permitBatchContainer.addPermitAllocation(allocation);
}
BatchedPermitsRequester.this.requestSemaphore.release();
if (allocation.getPermits() > 0) {
BatchedPermitsRequester.this.newPermitsAvailable.signalAll();
}
} finally {
try {
this.timerContext.close();
} catch (IOException ioe) {
// Do nothing
}
BatchedPermitsRequester.this.lock.unlock();
}
}
private void nonRetriableFail(Throwable exc, String msg) {
BatchedPermitsRequester.this.retryStatus.blockRetries(RETRY_DELAY_ON_NON_RETRIABLE_EXCEPTION, exc);
BatchedPermitsRequester.this.requestSemaphore.release();
log.error(msg, exc);
// Wake up all threads so they can return false
BatchedPermitsRequester.this.newPermitsAvailable.signalAll();
}
}
/**
* A batch of permits obtained from the server.
*/
@NotThreadSafe
@Getter
private static class PermitBatch {
private static final AtomicLong NEXT_KEY = new AtomicLong(0);
private volatile long permits;
private final long expiration;
private final long autoIncrementKey;
private final long initialPermits;
private long firstUseTime;
private long lastPermitUsedTime;
private int permitRequests;
PermitBatch(long permits, long expiration) {
this.permits = permits;
this.expiration = expiration;
this.initialPermits = permits;
this.autoIncrementKey = NEXT_KEY.getAndIncrement();
}
/**
* Use this number of permits. (Note, this does not check that there are enough permits).
*/
private void decrementPermits(long value) {
if (this.firstUseTime == 0) {
this.firstUseTime = System.currentTimeMillis();
}
this.permitRequests++;
this.permits -= value;
if (this.permits <= 0) {
this.lastPermitUsedTime = System.currentTimeMillis();
}
}
/**
* Get the average rate at which permits in this batch have been used.
*/
private double getAverageDepletionRate() {
if (this.firstUseTime == 0) {
return MAX_DEPLETION_RATE;
}
long endTime = this.lastPermitUsedTime > 0 ? this.lastPermitUsedTime : System.currentTimeMillis();
if (endTime > this.firstUseTime) {
return (double) (this.initialPermits - this.permits) / (endTime - this.firstUseTime);
} else {
return MAX_DEPLETION_RATE;
}
}
}
/**
* A container for {@link PermitBatch}es obtained from the server.
*/
static class PermitBatchContainer {
private final TreeMultimap<Long, PermitBatch> batches = TreeMultimap.create(Ordering.natural(), new Comparator<PermitBatch>() {
@Override
public int compare(PermitBatch o1, PermitBatch o2) {
return Long.compare(o1.autoIncrementKey, o2.autoIncrementKey);
}
});
@Getter
private volatile long totalAvailablePermits = 0;
private synchronized boolean tryTake(long permits) {
purgeExpiredBatches();
if (this.totalAvailablePermits < permits) {
return false;
}
this.totalAvailablePermits -= permits;
Iterator<PermitBatch> batchesIterator = this.batches.values().iterator();
while (batchesIterator.hasNext()) {
PermitBatch batch = batchesIterator.next();
if (batch.getPermits() < permits) {
permits -= batch.getPermits();
batchesIterator.remove();
} else {
batch.decrementPermits(permits);
return true;
}
}
// This can only happen if totalAvailablePermits is not in sync with the actual batches
throw new RuntimeException("Total permits was unsynced! This is an error in code.");
}
/** Print the state of the container. Useful for debugging. */
private synchronized void printState(String prefix) {
StringBuilder builder = new StringBuilder(prefix).append("->");
builder.append("BatchedPermitsRequester state (").append(hashCode()).append("): ");
builder.append("TotalPermits: ").append(this.totalAvailablePermits).append(" ");
builder.append("Batches(").append(this.batches.size()).append("): ");
for (PermitBatch batch : this.batches.values()) {
builder.append(batch.getPermits()).append(",");
}
log.info(builder.toString());
}
private synchronized void purgeExpiredBatches() {
long now = System.currentTimeMillis();
purgeBatches(this.batches.asMap().subMap(Long.MIN_VALUE, now).values().iterator());
}
private synchronized void purgeAll() {
purgeBatches(this.batches.asMap().values().iterator());
}
private void purgeBatches(Iterator<Collection<PermitBatch>> iterator) {
while (iterator.hasNext()) {
Collection<PermitBatch> batches = iterator.next();
for (PermitBatch batch : batches) {
Long permitsExpired = batch.getPermits();
this.totalAvailablePermits -= permitsExpired;
}
iterator.remove();
}
}
private synchronized void addPermitAllocation(PermitAllocation allocation) {
this.batches.put(allocation.getExpiration(),
new PermitBatch(allocation.getPermits(), allocation.getExpiration()));
this.totalAvailablePermits += allocation.getPermits();
}
}
private static class SynchronizedAverager {
private volatile long weight;
private volatile long entries;
@SuppressFBWarnings(value = "VO_VOLATILE_INCREMENT", justification = "All methods updating volatile variables are synchronized")
public synchronized void addEntryWithWeight(long weight) {
this.entries++;
this.weight += weight;
}
@SuppressFBWarnings(value = "VO_VOLATILE_INCREMENT", justification = "All methods updating volatile variables are synchronized")
public synchronized void removeEntryWithWeight(long weight) {
if (this.entries == 0) {
throw new IllegalStateException("Cannot have a negative number of entries.");
}
this.entries--;
this.weight -= weight;
}
public synchronized double getAverageWeightOrZero() {
if (this.entries == 0) {
return 0;
}
return (double) this.weight / this.entries;
}
public long getTotalWeight() {
return this.weight;
}
public long getNumEntries() {
return this.entries;
}
}
/**
* Stores the retry state of a {@link BatchedPermitsRequester}, e.g. whether it can keep retrying.
*/
private class RetryStatus {
private long retryAt;
@Nullable private Throwable exception;
public boolean canRetryNow() {
return canRetryWithinMillis(0);
}
public boolean canRetryWithinMillis(long millis) {
return System.currentTimeMillis() + millis >= this.retryAt;
}
public void blockRetries(long millis, Throwable exception) {
this.exception = exception;
this.retryAt = System.currentTimeMillis() + millis;
SCHEDULE_EXECUTOR_SERVICE.schedule(new Runnable() {
@Override
public void run() {
maybeSendNewPermitRequest();
}
}, millis, TimeUnit.MILLISECONDS);
}
}
/**
* Clear all stored permits.
*/
@VisibleForTesting
public void clearAllStoredPermits() {
this.getPermitBatchContainer().purgeAll();
}
}