// Copyright (c) 2006 Dustin Sallings <dustin@spy.net>
package net.spy.memcached;
import java.io.IOException;
import java.net.InetSocketAddress;
import java.net.SocketAddress;
import java.nio.ByteBuffer;
import java.nio.channels.SelectionKey;
import java.nio.channels.Selector;
import java.nio.channels.SocketChannel;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashSet;
import java.util.IdentityHashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.NoSuchElementException;
import java.util.Set;
import java.util.SortedMap;
import java.util.TreeMap;
import java.util.concurrent.ConcurrentLinkedQueue;
import java.util.concurrent.CountDownLatch;
import net.spy.SpyObject;
import net.spy.memcached.ops.Operation;
import net.spy.memcached.ops.OperationState;
/**
* Connection to a cluster of memcached servers.
*/
public final class MemcachedConnection extends SpyObject {
// The number of empty selects we'll allow before assuming we may have
// missed one and should check the current selectors. This generally
// indicates a bug, but we'll check it nonetheless.
private static final int DOUBLE_CHECK_EMPTY = 256;
// The number of empty selects we'll allow before blowing up. It's too
// easy to write a bug that causes it to loop uncontrollably. This helps
// find those bugs and often works around them.
private static final int EXCESSIVE_EMPTY = 0x1000000;
// maximum amount of time to wait between reconnect attempts
private static final long MAX_DELAY = 30000;
private volatile boolean shutDown=false;
// If true, get optimization will collapse multiple sequential get ops
private boolean optimizeGets=true;
private Selector selector=null;
private final NodeLocator locator;
private int emptySelects=0;
// AddedQueue is used to track the QueueAttachments for which operations
// have recently been queued.
private final ConcurrentLinkedQueue<MemcachedNode> addedQueue;
// reconnectQueue contains the attachments that need to be reconnected
// The key is the time at which they are eligible for reconnect
private final SortedMap<Long, MemcachedNode> reconnectQueue;
/**
* Construct a memcached connection.
*
* @param bufSize the size of the buffer used for reading from the server
* @param f the factory that will provide an operation queue
* @param a the addresses of the servers to connect to
*
* @throws IOException if a connection attempt fails early
*/
public MemcachedConnection(int bufSize, ConnectionFactory f,
List<InetSocketAddress> a)
throws IOException {
reconnectQueue=new TreeMap<Long, MemcachedNode>();
addedQueue=new ConcurrentLinkedQueue<MemcachedNode>();
selector=Selector.open();
List<MemcachedNode> connections=new ArrayList<MemcachedNode>(a.size());
for(SocketAddress sa : a) {
SocketChannel ch=SocketChannel.open();
ch.configureBlocking(false);
MemcachedNode qa=f.createMemcachedNode(sa, ch, bufSize);
int ops=0;
if(ch.connect(sa)) {
getLogger().info("Connected to %s immediately", qa);
qa.connected();
assert ch.isConnected();
} else {
getLogger().info("Added %s to connect queue", qa);
ops=SelectionKey.OP_CONNECT;
}
qa.setSk(ch.register(selector, ops, qa));
assert ch.isConnected()
|| qa.getSk().interestOps() == SelectionKey.OP_CONNECT
: "Not connected, and not wanting to connect";
connections.add(qa);
}
locator=f.createLocator(connections);
}
/**
* Enable or disable get optimization.
*
* When enabled (default), multiple sequential gets are collapsed into one.
*/
public void setGetOptimization(boolean to) {
optimizeGets=to;
}
private boolean selectorsMakeSense() {
for(MemcachedNode qa : locator.getAll()) {
if(qa.getSk() != null && qa.getSk().isValid()) {
if(qa.getChannel().isConnected()) {
int sops=qa.getSk().interestOps();
int expected=0;
if(qa.hasReadOp()) {
expected |= SelectionKey.OP_READ;
}
if(qa.hasWriteOp()) {
expected |= SelectionKey.OP_WRITE;
}
if(qa.getBytesRemainingToWrite() > 0) {
expected |= SelectionKey.OP_WRITE;
}
assert sops == expected : "Invalid ops: "
+ qa + ", expected " + expected + ", got " + sops;
} else {
int sops=qa.getSk().interestOps();
assert sops == SelectionKey.OP_CONNECT
: "Not connected, and not watching for connect: "
+ sops;
}
}
}
getLogger().debug("Checked the selectors.");
return true;
}
/**
* MemcachedClient calls this method to handle IO over the connections.
*/
@SuppressWarnings("unchecked")
public void handleIO() throws IOException {
if(shutDown) {
throw new IOException("No IO while shut down");
}
// Deal with all of the stuff that's been added, but may not be marked
// writable.
handleInputQueue();
getLogger().debug("Done dealing with queue.");
long delay=0;
if(!reconnectQueue.isEmpty()) {
long now=System.currentTimeMillis();
long then=reconnectQueue.firstKey();
delay=Math.max(then-now, 1);
}
getLogger().debug("Selecting with delay of %sms", delay);
assert selectorsMakeSense() : "Selectors don't make sense.";
int selected=selector.select(delay);
Set<SelectionKey> selectedKeys=selector.selectedKeys();
if(selectedKeys.isEmpty() && !shutDown) {
getLogger().debug("No selectors ready, interrupted: "
+ Thread.interrupted());
if(++emptySelects > DOUBLE_CHECK_EMPTY) {
for(SelectionKey sk : selector.keys()) {
getLogger().info("%s has %s, interested in %s",
sk, sk.readyOps(), sk.interestOps());
if(sk.readyOps() != 0) {
getLogger().info("%s has a ready op, handling IO", sk);
handleIO(sk);
} else {
queueReconnect((MemcachedNode)sk.attachment());
}
}
assert emptySelects < EXCESSIVE_EMPTY
: "Too many empty selects";
}
} else {
getLogger().debug("Selected %d, selected %d keys",
selected, selectedKeys.size());
emptySelects=0;
for(SelectionKey sk : selectedKeys) {
handleIO(sk);
} // for each selector
selectedKeys.clear();
}
if(!shutDown && !reconnectQueue.isEmpty()) {
attemptReconnects();
}
}
// Handle any requests that have been made against the client.
private void handleInputQueue() {
if(!addedQueue.isEmpty()) {
getLogger().debug("Handling queue");
// If there's stuff in the added queue. Try to process it.
Collection<MemcachedNode> toAdd=new HashSet<MemcachedNode>();
// Transfer the queue into a hashset. There are very likely more
// additions than there are nodes.
Collection<MemcachedNode> todo=new HashSet<MemcachedNode>();
try {
MemcachedNode qa=null;
while((qa=addedQueue.remove()) != null) {
todo.add(qa);
}
} catch(NoSuchElementException e) {
// Found everything
}
// Now process the queue.
for(MemcachedNode qa : todo) {
boolean readyForIO=false;
if(qa.isActive()) {
if(qa.getCurrentWriteOp() != null) {
readyForIO=true;
getLogger().debug("Handling queued write %s", qa);
}
} else {
toAdd.add(qa);
}
qa.copyInputQueue();
if(readyForIO) {
try {
if(qa.getWbuf().hasRemaining()) {
handleWrites(qa.getSk(), qa);
}
} catch(IOException e) {
getLogger().warn("Exception handling write", e);
queueReconnect(qa);
}
}
qa.fixupOps();
}
addedQueue.addAll(toAdd);
}
}
// Handle IO for a specific selector. Any IOException will cause a
// reconnect
private void handleIO(SelectionKey sk) {
MemcachedNode qa=(MemcachedNode)sk.attachment();
try {
getLogger().debug(
"Handling IO for: %s (r=%s, w=%s, c=%s, op=%s)",
sk, sk.isReadable(), sk.isWritable(),
sk.isConnectable(), sk.attachment());
if(sk.isConnectable()) {
getLogger().info("Connection state changed for %s", sk);
final SocketChannel channel=qa.getChannel();
if(channel.finishConnect()) {
assert channel.isConnected() : "Not connected.";
qa.connected();
addedQueue.offer(qa);
if(qa.getWbuf().hasRemaining()) {
handleWrites(sk, qa);
}
} else {
assert !channel.isConnected() : "connected";
}
} else {
if(sk.isReadable()) {
handleReads(sk, qa);
}
if(sk.isWritable()) {
handleWrites(sk, qa);
}
}
} catch(Exception e) {
// Various errors occur on Linux that wind up here. However, any
// particular error processing an item should simply cause us to
// reconnect to the server.
getLogger().info("Reconnecting due to exception on %s", qa, e);
queueReconnect(qa);
}
qa.fixupOps();
}
private void handleWrites(SelectionKey sk, MemcachedNode qa)
throws IOException {
qa.fillWriteBuffer(optimizeGets);
boolean canWriteMore=qa.getBytesRemainingToWrite() > 0;
while(canWriteMore) {
int wrote=qa.writeSome();
qa.fillWriteBuffer(optimizeGets);
canWriteMore = wrote > 0 && qa.getBytesRemainingToWrite() > 0;
}
}
private void handleReads(SelectionKey sk, MemcachedNode qa)
throws IOException {
Operation currentOp = qa.getCurrentReadOp();
ByteBuffer rbuf=qa.getRbuf();
final SocketChannel channel = qa.getChannel();
int read=channel.read(rbuf);
while(read > 0) {
getLogger().debug("Read %d bytes", read);
rbuf.flip();
while(rbuf.remaining() > 0) {
assert currentOp != null : "No read operation";
currentOp.readFromBuffer(rbuf);
if(currentOp.getState() == OperationState.COMPLETE) {
getLogger().debug(
"Completed read op: %s and giving the next %d bytes",
currentOp, rbuf.remaining());
Operation op=qa.removeCurrentReadOp();
assert op == currentOp
: "Expected to pop " + currentOp + " got " + op;
currentOp=qa.getCurrentReadOp();
}
}
rbuf.clear();
read=channel.read(rbuf);
}
}
// Make a debug string out of the given buffer's values
static String dbgBuffer(ByteBuffer b, int size) {
StringBuilder sb=new StringBuilder();
byte[] bytes=b.array();
for(int i=0; i<size; i++) {
char ch=(char)bytes[i];
if(Character.isWhitespace(ch) || Character.isLetterOrDigit(ch)) {
sb.append(ch);
} else {
sb.append("\\x");
sb.append(Integer.toHexString(bytes[i] & 0xff));
}
}
return sb.toString();
}
private void queueReconnect(MemcachedNode qa) {
if(!shutDown) {
getLogger().warn("Closing, and reopening %s, attempt %d.",
qa, qa.getReconnectCount());
if(qa.getSk() != null) {
qa.getSk().cancel();
assert !qa.getSk().isValid() : "Cancelled selection key is valid";
}
qa.reconnecting();
try {
if(qa.getChannel() != null && qa.getChannel().socket() != null) {
qa.getChannel().socket().close();
} else {
getLogger().info("The channel or socket was null for %s",
qa);
}
} catch(IOException e) {
getLogger().warn("IOException trying to close a socket", e);
}
qa.setChannel(null);
long delay=Math.min((100*qa.getReconnectCount()) ^ 2, MAX_DELAY);
reconnectQueue.put(System.currentTimeMillis() + delay, qa);
// Need to do a little queue management.
qa.setupResend();
}
}
private void attemptReconnects() throws IOException {
final long now=System.currentTimeMillis();
final Map<MemcachedNode, Boolean> seen=
new IdentityHashMap<MemcachedNode, Boolean>();
for(Iterator<MemcachedNode> i=
reconnectQueue.headMap(now).values().iterator(); i.hasNext();) {
final MemcachedNode qa=i.next();
i.remove();
if(!seen.containsKey(qa)) {
seen.put(qa, Boolean.TRUE);
getLogger().info("Reconnecting %s", qa);
final SocketChannel ch=SocketChannel.open();
ch.configureBlocking(false);
int ops=0;
if(ch.connect(qa.getSocketAddress())) {
getLogger().info("Immediately reconnected to %s", qa);
assert ch.isConnected();
} else {
ops=SelectionKey.OP_CONNECT;
}
qa.registerChannel(ch, ch.register(selector, ops, qa));
assert qa.getChannel() == ch : "Channel was lost.";
} else {
getLogger().debug("Skipping duplicate reconnect request for %s",
qa);
}
}
}
/**
* Get the node locator used by this connection.
*/
NodeLocator getLocator() {
return locator;
}
/**
* Add an operation to the given connection.
*
* @param which the connection offset
* @param o the operation
*/
public void addOperation(final String key, final Operation o) {
MemcachedNode placeIn=null;
MemcachedNode primary = locator.getPrimary(key);
if(primary.isActive()) {
placeIn=primary;
} else {
// Look for another node in sequence that is ready.
for(Iterator<MemcachedNode> i=locator.getSequence(key);
placeIn == null && i.hasNext(); ) {
MemcachedNode n=i.next();
if(n.isActive()) {
placeIn=n;
}
}
// If we didn't find an active node, queue it in the primary node
// and wait for it to come back online.
if(placeIn == null) {
placeIn = primary;
}
}
assert placeIn != null : "No node found for key " + key;
addOperation(placeIn, o);
}
public void addOperation(final MemcachedNode node, final Operation o) {
o.initialize();
node.addOp(o);
addedQueue.offer(node);
Selector s=selector.wakeup();
assert s == selector : "Wakeup returned the wrong selector.";
getLogger().debug("Added %s to %s", o, node);
}
public void addOperations(final Map<MemcachedNode, Operation> ops) {
for(Map.Entry<MemcachedNode, Operation> me : ops.entrySet()) {
final MemcachedNode node=me.getKey();
Operation o=me.getValue();
o.initialize();
node.addOp(o);
addedQueue.offer(node);
}
Selector s=selector.wakeup();
assert s == selector : "Wakeup returned the wrong selector.";
}
/**
* Broadcast an operation to all nodes.
*/
public CountDownLatch broadcastOperation(final BroadcastOpFactory of) {
final CountDownLatch latch=new CountDownLatch(locator.getAll().size());
for(MemcachedNode node : locator.getAll()) {
Operation op = of.newOp(node, latch);
op.initialize();
node.addOp(op);
addedQueue.offer(node);
}
Selector s=selector.wakeup();
assert s == selector : "Wakeup returned the wrong selector.";
return latch;
}
/**
* Shut down all of the connections.
*/
public void shutdown() throws IOException {
shutDown=true;
Selector s=selector.wakeup();
assert s == selector : "Wakeup returned the wrong selector.";
for(MemcachedNode qa : locator.getAll()) {
if(qa.getChannel() != null) {
qa.getChannel().close();
qa.setSk(null);
if(qa.getBytesRemainingToWrite() > 0) {
getLogger().warn(
"Shut down with %d bytes remaining to write",
qa.getBytesRemainingToWrite());
}
getLogger().debug("Shut down channel %s", qa.getChannel());
}
}
selector.close();
getLogger().debug("Shut down selector %s", selector);
}
@Override
public String toString() {
StringBuilder sb=new StringBuilder();
sb.append("{MemcachedConnection to");
for(MemcachedNode qa : locator.getAll()) {
sb.append(" ");
sb.append(qa.getSocketAddress());
}
sb.append("}");
return sb.toString();
}
}