/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.cassandra.locator;
import java.lang.management.ManagementFactory;
import java.net.InetAddress;
import java.net.UnknownHostException;
import java.util.*;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.TimeUnit;
import javax.management.MBeanServer;
import javax.management.ObjectName;
import org.apache.cassandra.config.DatabaseDescriptor;
import org.apache.cassandra.net.MessagingService;
import org.apache.cassandra.service.StorageService;
import org.apache.cassandra.utils.FBUtilities;
import com.yammer.metrics.stats.ExponentiallyDecayingSample;
/**
* A dynamic snitch that sorts endpoints by latency with an adapted phi failure detector
*/
public class DynamicEndpointSnitch extends AbstractEndpointSnitch implements ILatencySubscriber, DynamicEndpointSnitchMBean
{
private static final double ALPHA = 0.75; // set to 0.75 to make EDS more biased to towards the newer values
private static final int WINDOW_SIZE = 100;
private int UPDATE_INTERVAL_IN_MS = DatabaseDescriptor.getDynamicUpdateInterval();
private int RESET_INTERVAL_IN_MS = DatabaseDescriptor.getDynamicResetInterval();
private double BADNESS_THRESHOLD = DatabaseDescriptor.getDynamicBadnessThreshold();
private String mbeanName;
private boolean registered = false;
private final ConcurrentHashMap<InetAddress, Double> scores = new ConcurrentHashMap<InetAddress, Double>();
private final ConcurrentHashMap<InetAddress, Long> lastReceived = new ConcurrentHashMap<InetAddress, Long>();
private final ConcurrentHashMap<InetAddress, ExponentiallyDecayingSample> samples = new ConcurrentHashMap<InetAddress, ExponentiallyDecayingSample>();
public final IEndpointSnitch subsnitch;
public DynamicEndpointSnitch(IEndpointSnitch snitch)
{
this(snitch, null);
}
public DynamicEndpointSnitch(IEndpointSnitch snitch, String instance)
{
mbeanName = "org.apache.cassandra.db:type=DynamicEndpointSnitch";
if (instance != null)
mbeanName += ",instance=" + instance;
subsnitch = snitch;
Runnable update = new Runnable()
{
public void run()
{
updateScores();
}
};
Runnable reset = new Runnable()
{
public void run()
{
// we do this so that a host considered bad has a chance to recover, otherwise would we never try
// to read from it, which would cause its score to never change
reset();
}
};
StorageService.scheduledTasks.scheduleWithFixedDelay(update, UPDATE_INTERVAL_IN_MS, UPDATE_INTERVAL_IN_MS, TimeUnit.MILLISECONDS);
StorageService.scheduledTasks.scheduleWithFixedDelay(reset, RESET_INTERVAL_IN_MS, RESET_INTERVAL_IN_MS, TimeUnit.MILLISECONDS);
registerMBean();
}
private void registerMBean()
{
MBeanServer mbs = ManagementFactory.getPlatformMBeanServer();
try
{
mbs.registerMBean(this, new ObjectName(mbeanName));
}
catch (Exception e)
{
throw new RuntimeException(e);
}
}
public void unregisterMBean()
{
MBeanServer mbs = ManagementFactory.getPlatformMBeanServer();
try
{
mbs.unregisterMBean(new ObjectName(mbeanName));
}
catch (Exception e)
{
throw new RuntimeException(e);
}
}
@Override
public void gossiperStarting()
{
subsnitch.gossiperStarting();
}
public String getRack(InetAddress endpoint)
{
return subsnitch.getRack(endpoint);
}
public String getDatacenter(InetAddress endpoint)
{
return subsnitch.getDatacenter(endpoint);
}
public List<InetAddress> getSortedListByProximity(final InetAddress address, Collection<InetAddress> addresses)
{
List<InetAddress> list = new ArrayList<InetAddress>(addresses);
sortByProximity(address, list);
return list;
}
@Override
public void sortByProximity(final InetAddress address, List<InetAddress> addresses)
{
assert address.equals(FBUtilities.getBroadcastAddress()); // we only know about ourself
if (BADNESS_THRESHOLD == 0)
{
sortByProximityWithScore(address, addresses);
}
else
{
sortByProximityWithBadness(address, addresses);
}
}
private void sortByProximityWithScore(final InetAddress address, List<InetAddress> addresses)
{
super.sortByProximity(address, addresses);
}
private void sortByProximityWithBadness(final InetAddress address, List<InetAddress> addresses)
{
if (addresses.size() < 2)
return;
subsnitch.sortByProximity(address, addresses);
Double first = scores.get(addresses.get(0));
if (first == null)
return;
for (InetAddress addr : addresses)
{
Double next = scores.get(addr);
if (next == null)
return;
if ((first - next) / first > BADNESS_THRESHOLD)
{
sortByProximityWithScore(address, addresses);
return;
}
}
}
public int compareEndpoints(InetAddress target, InetAddress a1, InetAddress a2)
{
Double scored1 = scores.get(a1);
Double scored2 = scores.get(a2);
if (scored1 == null)
{
scored1 = 0.0;
receiveTiming(a1, 0);
}
if (scored2 == null)
{
scored2 = 0.0;
receiveTiming(a2, 0);
}
if (scored1.equals(scored2))
return subsnitch.compareEndpoints(target, a1, a2);
if (scored1 < scored2)
return -1;
else
return 1;
}
public void receiveTiming(InetAddress host, long latency) // this is cheap
{
lastReceived.put(host, System.nanoTime());
ExponentiallyDecayingSample sample = samples.get(host);
if (sample == null)
{
ExponentiallyDecayingSample maybeNewSample = new ExponentiallyDecayingSample(WINDOW_SIZE, ALPHA);
sample = samples.putIfAbsent(host, maybeNewSample);
if (sample == null)
sample = maybeNewSample;
}
sample.update(latency);
}
private void updateScores() // this is expensive
{
if (!StorageService.instance.isInitialized())
return;
if (!registered)
{
if (MessagingService.instance() != null)
{
MessagingService.instance().register(this);
registered = true;
}
}
double maxLatency = 1;
// We're going to weight the latency for each host against the worst one we see, to
// arrive at sort of a 'badness percentage' for them. First, find the worst for each:
for (Map.Entry<InetAddress, ExponentiallyDecayingSample> entry : samples.entrySet())
{
double mean = entry.getValue().getSnapshot().getMedian();
if (mean > maxLatency)
maxLatency = mean;
}
// now make another pass to do the weighting based on the maximums we found before
for (Map.Entry<InetAddress, ExponentiallyDecayingSample> entry: samples.entrySet())
{
double score = entry.getValue().getSnapshot().getMedian() / maxLatency;
// finally, add the severity without any weighting, since hosts scale this relative to their own load and the size of the task causing the severity.
// "Severity" is basically a measure of compaction activity (CASSANDRA-3722).
score += StorageService.instance.getSeverity(entry.getKey());
// lowest score (least amount of badness) wins.
scores.put(entry.getKey(), score);
}
}
private void reset()
{
for (ExponentiallyDecayingSample sample : samples.values())
sample.clear();
}
public Map<InetAddress, Double> getScores()
{
return scores;
}
public int getUpdateInterval()
{
return UPDATE_INTERVAL_IN_MS;
}
public int getResetInterval()
{
return RESET_INTERVAL_IN_MS;
}
public double getBadnessThreshold()
{
return BADNESS_THRESHOLD;
}
public String getSubsnitchClassName()
{
return subsnitch.getClass().getName();
}
public List<Double> dumpTimings(String hostname) throws UnknownHostException
{
InetAddress host = InetAddress.getByName(hostname);
ArrayList<Double> timings = new ArrayList<Double>();
ExponentiallyDecayingSample sample = samples.get(host);
if (sample != null)
{
for (double time: sample.getSnapshot().getValues())
timings.add(time);
}
return timings;
}
public void setSeverity(double severity)
{
StorageService.instance.reportSeverity(severity);
}
public double getSeverity()
{
return StorageService.instance.getSeverity(FBUtilities.getBroadcastAddress());
}
public boolean isWorthMergingForRangeQuery(List<InetAddress> merged, List<InetAddress> l1, List<InetAddress> l2)
{
if (!subsnitch.isWorthMergingForRangeQuery(merged, l1, l2))
return false;
// Make sure we return the subsnitch decision (i.e true if we're here) if we lack too much scores
double maxMerged = maxScore(merged);
double maxL1 = maxScore(l1);
double maxL2 = maxScore(l2);
if (maxMerged < 0 || maxL1 < 0 || maxL2 < 0)
return true;
return maxMerged < maxL1 + maxL2;
}
// Return the max score for the endpoint in the provided list, or -1.0 if no node have a score.
private double maxScore(List<InetAddress> endpoints)
{
double maxScore = -1.0;
for (InetAddress endpoint : endpoints)
{
Double score = scores.get(endpoint);
if (score == null)
continue;
if (score > maxScore)
maxScore = score;
}
return maxScore;
}
}