/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package gobblin.restli.throttling;
import java.util.concurrent.TimeUnit;
import com.google.common.annotations.VisibleForTesting;
import lombok.Getter;
import lombok.extern.slf4j.Slf4j;
/**
* A wrapper around a {@link TokenBucket} that returns different number of tokens following an internal heuristic.
*
* The heuristic is as follows:
* * The calling process specifies an ideal and minimum number of token it requires, as well as a timeout.
* * If there is a large number of tokens stored (i.e. underutilization), this class may return more than the requested
* ideal number of tokens (up to 1/2 of the stored tokens). This reduces unnecessary slowdown when there is no
* contention.
* * The object computes a target timeout equal to the minimum time needed to fulfill the minimum requested permits
* (according to the configured qps) plus a {@link #baseTimeout}.
* * The object will return as many permits as it can using that timeout, bounded by minimum and desired number of permits.
*/
@Slf4j
public class DynamicTokenBucket {
@VisibleForTesting
@Getter
private final TokenBucket tokenBucket;
private final long baseTimeout;
/**
* @param qps the average qps desired.
* @param fullRequestTimeoutMillis max time to fully satisfy a token request. This is generally a small timeout, on the
* order of the network latency (e.g. ~100 ms).
* @param maxBucketSizeMillis maximum number of unused tokens that can be stored during under-utilization time, in
* milliseconds. The actual tokens stored will be 1000 * qps * maxBucketSizeMillis.
*/
DynamicTokenBucket(long qps, long fullRequestTimeoutMillis, long maxBucketSizeMillis) {
this.tokenBucket = new TokenBucket(qps, maxBucketSizeMillis);
this.baseTimeout = fullRequestTimeoutMillis;
}
/**
* Request tokens.
* @param requestedPermits the ideal number of tokens to acquire.
* @param minPermits the minimum number of tokens useful for the calling process. If this many tokens cannot be acquired,
* the method will return 0 instead,
* @param timeoutMillis the maximum wait the calling process is willing to wait for tokens.
* @return the number of allocated tokens.
*/
public long getPermits(long requestedPermits, long minPermits, long timeoutMillis) {
try {
long storedTokens = this.tokenBucket.getStoredTokens();
long eagerTokens = storedTokens / 2;
if (eagerTokens > requestedPermits && this.tokenBucket.getTokens(eagerTokens, 0, TimeUnit.MILLISECONDS)) {
return eagerTokens;
}
long millisToSatisfyMinPermits = (long) (minPermits / this.tokenBucket.getTokensPerMilli());
if (millisToSatisfyMinPermits > timeoutMillis) {
return 0;
}
long allowedTimeout = Math.min(millisToSatisfyMinPermits + this.baseTimeout, timeoutMillis);
while (requestedPermits > minPermits) {
if (this.tokenBucket.getTokens(requestedPermits, allowedTimeout, TimeUnit.MILLISECONDS)) {
return requestedPermits;
}
requestedPermits /= 2;
}
if (this.tokenBucket.getTokens(minPermits, allowedTimeout, TimeUnit.MILLISECONDS)) {
return minPermits;
}
} catch (InterruptedException ie) {
// Fallback to returning 0
}
return 0;
}
}