package ch.usi.da.smr; /* * Copyright (c) 2013 Università della Svizzera italiana (USI) * * This file is part of URingPaxos. * * URingPaxos is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * URingPaxos is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with URingPaxos. If not, see <http://www.gnu.org/licenses/>. */ import java.io.IOException; import java.net.DatagramPacket; import java.net.DatagramSocket; import java.net.InetAddress; import java.net.SocketException; import java.util.ArrayList; import java.util.Collections; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Map.Entry; import java.util.SortedMap; import java.util.TreeMap; import java.util.concurrent.CountDownLatch; import java.util.concurrent.TimeUnit; import org.apache.log4j.Logger; import org.apache.thrift.transport.TTransportException; import org.apache.zookeeper.CreateMode; import org.apache.zookeeper.KeeperException; import org.apache.zookeeper.WatchedEvent; import org.apache.zookeeper.Watcher; import org.apache.zookeeper.Watcher.Event.EventType; import org.apache.zookeeper.ZooDefs.Ids; import org.apache.zookeeper.ZooKeeper; import ch.usi.da.paxos.api.PaxosRole; import ch.usi.da.paxos.lab.DummyWatcher; import ch.usi.da.paxos.ring.RingDescription; import ch.usi.da.smr.message.Command; import ch.usi.da.smr.message.Message; import ch.usi.da.smr.transport.ABListener; import ch.usi.da.smr.transport.ABSender; import ch.usi.da.smr.transport.RawABListener; import ch.usi.da.smr.transport.ThriftABListener; import ch.usi.da.smr.transport.ThriftABSender; import ch.usi.da.smr.transport.UDPListener; /** * Name: PartitionManager<br> * Description: <br> * * Creation date: Aug 28, 2013<br> * $Id$ * * @author Samuel Benz benz@geoid.ch */ public class PartitionManager implements Watcher { private final static Logger logger = Logger.getLogger(PartitionManager.class); private final String zoo_host; private ZooKeeper zoo; private final String prefix = "/smr"; private final String path = prefix + "/partitions"; private final SortedMap<Integer, Integer> circle = new TreeMap<Integer, Integer>(); private final List<Partition> partitions = new ArrayList<Partition>(); private final Map<String,ABSender> proposers = new HashMap<String,ABSender>(); private final Map<Integer,List<Integer>> loadbalancer = new HashMap<Integer,List<Integer>>(); private final int signalPort = 40444; private DatagramSocket signalSender; private UDPListener signalReceiver; private final Map<Command,CountDownLatch> signals = new HashMap<Command,CountDownLatch>(); private final Map<Command,List<String>> wait = new HashMap<Command,List<String>>(); int position = 1; private int global_ring = 16; private Replica replica = null; private final Map<Integer,Integer> connectMap; public PartitionManager(String zoo_host) { this.zoo_host = zoo_host; this.connectMap = null; } public PartitionManager(String zoo_host,Map<Integer,Integer> connectMap) { this.zoo_host = zoo_host; this.connectMap = connectMap; } public void init() throws KeeperException, InterruptedException, IOException { logger.info("Init PartitionManager"); zoo = new ZooKeeper(zoo_host,3000,new DummyWatcher()); zoo.register(this); // create path String p = ""; for(String s : path.split("/")){ if(s.length() > 0){ p = p + "/" + s; if(zoo.exists(p,false) == null){ zoo.create(p,null,Ids.OPEN_ACL_UNSAFE,CreateMode.PERSISTENT); } } } // set default global ring = 16 if(zoo.exists(path + "/all",false) == null){ zoo.create(path + "/all","16".getBytes(),Ids.OPEN_ACL_UNSAFE,CreateMode.PERSISTENT); } readPartitions(); readLBRings(); try { signalSender = new DatagramSocket(); } catch (SocketException e) { e.printStackTrace(); } } public Partition register(int replicaID, int ringID, InetAddress ip, String token){ try { signalReceiver = new UDPListener(signalPort); signalReceiver.registerReceiver(this); Thread t = new Thread(signalReceiver); t.setName("SignalReceiver"); t.start(); } catch (SocketException e) { logger.error("Error staritng signal receiver!",e); } try { if(zoo.exists(path + "/" + token,false) == null){ zoo.create(path + "/" + token,Integer.toString(ringID).getBytes(),Ids.OPEN_ACL_UNSAFE,CreateMode.PERSISTENT); }else{ zoo.setData(path + "/" + token, Integer.toString(ringID).getBytes(), -1); } byte[] data = ip.getHostAddress().getBytes(); zoo.create(path + "/" + token + "/" + Integer.toString(replicaID),data,Ids.OPEN_ACL_UNSAFE,CreateMode.EPHEMERAL); } catch (KeeperException e) { logger.error(e); } catch (InterruptedException e) { } readPartitions(); for(Partition p : partitions){ if(p.getID().equals(token)){ return p; } } return null; } public void deregister(int replicaID, String token){ try { zoo.delete(path + "/" + token + "/" + Integer.toString(replicaID),-1); zoo.delete(path + "/" + token,-1); } catch (KeeperException | InterruptedException e) { } } public List<Partition> getPartitions(){ return Collections.unmodifiableList(partitions); } public List<String> getReplicas(String token){ List<String> replicas = new ArrayList<String>(); try { List<String> ls = zoo.getChildren(path + "/" + token, true); for(String s : ls){ replicas.add(new String(zoo.getData(path + "/" + token + "/" + s, false, null))); } } catch (Exception e) { logger.error(e); } return replicas; } public List<String> getReplicaIDs(String token){ List<String> replicas = new ArrayList<String>(); try { return zoo.getChildren(path + "/" + token, true); } catch (Exception e) { logger.error(e); } return replicas; } public SortedMap<Integer, Integer> getCircle(){ return Collections.unmodifiableSortedMap(circle); } public int getGlobalRing(){ return global_ring; } public int getRing(String key){ return getRing(MurmurHash.hash32(key)); } private int getRing(int hash){ if (circle.isEmpty()) { return -1; } if (!circle.containsKey(hash)) { SortedMap<Integer, Integer> tailMap = circle.tailMap(hash); hash = tailMap.isEmpty() ? circle.firstKey() : tailMap.firstKey(); } return circle.get(hash); } public int getPartition(String key){ int hash = MurmurHash.hash32(key); if (circle.isEmpty()) { return -1; // the "all" partition } if (!circle.containsKey(hash)) { SortedMap<Integer, Integer> tailMap = circle.tailMap(hash); hash = tailMap.isEmpty() ? circle.firstKey() : tailMap.firstKey(); } return hash; } public ABListener getRawABListener(int ring, int replicaID) throws IOException, KeeperException, InterruptedException { List<PaxosRole> role = new ArrayList<PaxosRole>(); role.add(PaxosRole.Learner); List<RingDescription> rings = new ArrayList<RingDescription>(); rings.add(new RingDescription(ring,role)); // disabled for dynamic subscription /*if(getGlobalRing() > 0){ rings.add(new RingDescription(getGlobalRing(),role)); }*/ logger.debug("Create RawABListener " + rings); Thread.sleep(1000); // wait until PartitionManger is ready return new RawABListener(replicaID,zoo_host,rings); } public ABListener getThriftABListener(int ring, int replicaID) throws TTransportException { String host = "127.0.0.1"; try { host = new String(zoo.getData("/ringpaxos/topology" + ring + "/nodes/" + replicaID,false, null)); host = host.replaceAll("(;.*)",""); // Sanity check: is replicaID learner in ring(partition) && global_ring if(zoo.exists("/ringpaxos/topology" + ring + "/learners/" + replicaID,false) != null && zoo.exists("/ringpaxos/topology" + global_ring + "/learners/" + replicaID,false) != null ){ logger.debug("ABListener check for ring " + ring + " and " + global_ring + ": OK!"); }else{ logger.warn("ABListener check for ring " + ring + " and " + global_ring + ": Fail!"); } } catch (KeeperException | InterruptedException e) { logger.error(e); } logger.debug("ThriftABListener host: " + host + ":" + (9090+replicaID)); return new ThriftABListener(host,9090+replicaID); } /*public ABSender getRawABSender(int ring, int clientID) throws IOException, KeeperException, InterruptedException { if(proposers.containsKey(ring + "-" + clientID)){ return proposers.get(ring + "-" + clientID); }else{ List<PaxosRole> role = new ArrayList<PaxosRole>(); role.add(PaxosRole.Proposer); List<RingDescription> rings = new ArrayList<RingDescription>(); rings.add(new RingDescription(ring, role)); logger.debug("RawABSender " + rings); ABSender proposer = new RawABSender(clientID, zoo_host, rings); proposers.put(ring + "-" + clientID, proposer); return proposer; } }*/ public ABSender getThriftABSender(int ring, int clientID) throws TTransportException { if(proposers.containsKey(ring + "-" + clientID)){ return proposers.get(ring + "-" + clientID); }else{ String host = "127.0.0.1"; try { host = new String(zoo.getData("/ringpaxos/topology" + ring + "/nodes/" + clientID,false, null)); host = host.replaceAll("(;.*)",""); if(zoo.exists("/ringpaxos/topology" + ring + "/proposers/" + clientID,false) != null){ logger.debug("ABSender check for ring " + ring + ": OK!"); }else{ logger.warn("ABSender check for ring " + ring + ": Fail!"); } } catch (KeeperException | InterruptedException e) { logger.error(e); } logger.debug("ThriftABSender host: " + host + ":" + (9080+clientID)); ABSender proposer = new ThriftABSender(host,9080+clientID); proposers.put(ring + "-" + clientID, proposer); return proposer; } } public synchronized void sendPartition(int partition,Message m) throws Exception { int ring = loadbalancer.get(partition).get(position-1); position++; if(position > loadbalancer.get(partition).size()){ position = 1; } ABSender sender = getThriftABSender(ring,connectMap.get(ring)); sender.abroadcast(m); } public synchronized void sendRing(int ring,Message m) throws Exception { ABSender sender = getThriftABSender(ring,connectMap.get(ring)); sender.abroadcast(m); } @Override public void process(WatchedEvent event) { if(event.getType() == EventType.NodeChildrenChanged && event.getPath().startsWith(path)){ readPartitions(); readLBRings(); } if(event.getType() == EventType.NodeDataChanged && event.getPath().startsWith(path)){ readLBRings(); } } public void registerPartitionChangeNotifier(Replica replica) { this.replica = replica; } private synchronized void readLBRings(){ try { List<String> ls = zoo.getChildren(path, true); for(String s : ls){ int part = -1; if(!s.equals("all")){ part = Integer.parseInt(s,16); } String rstring = new String(zoo.getData(path + "/" + s, true, null)); List<Integer> rings = new ArrayList<Integer>(); String[] ra = rstring.split(","); for(String r : ra){ rings.add(Integer.parseInt(r)); } loadbalancer.put(part, rings); } position = 1; logger.info("PartitionManger changed LB: " + loadbalancer); } catch (Exception e) { logger.error(e); } } private synchronized void readPartitions(){ partitions.clear(); circle.clear(); try { List<String> ls = zoo.getChildren(path, true); for(String s : ls){ String rstring = new String(zoo.getData(path + "/" + s, true, null)); String rings[] = rstring.split(","); int ring = Integer.parseInt(rings[0]); if(s.equals("all")){ String sa = new String(zoo.getData(path + "/all", true, null)); String[] ra = sa.split(","); global_ring = Integer.parseInt(ra[0]); }else{ circle.put(Integer.parseInt(s,16),ring); } } for(Entry<Integer, Integer> e : circle.entrySet()){ String id = Integer.toString(e.getKey(),16); int high = e.getKey(); SortedMap<Integer, Integer> headMap = circle.headMap(high); int low = headMap.isEmpty() ? circle.lastKey() : headMap.lastKey(); Partition p = new Partition(id,e.getValue(),low+1,high); partitions.add(p); if(replica != null && replica.token.equals(id)){ replica.setPartition(p); } } } catch (Exception e) { logger.error(e); } } public static void main(String args[]) throws KeeperException, InterruptedException, IOException{ final int replicaID = 1; PartitionManager partitions = new PartitionManager("127.0.0.1:2181"); partitions.init(); // two partitions partitions.register(replicaID,1,InetAddress.getLocalHost(),"0"); partitions.register(replicaID,2,InetAddress.getLocalHost(),"7FFFFFFF"); // four partitions //partitions.register(replicaID,3,InetAddress.getLocalHost(),"3FFFFFFF"); //partitions.register(replicaID,4,InetAddress.getLocalHost(),"-3FFFFFFF"); // three partitions //partitions.register(replicaID,1,InetAddress.getLocalHost(),"0"); //partitions.register(replicaID,2,InetAddress.getLocalHost(),"55555554"); //partitions.register(replicaID,2,InetAddress.getLocalHost(),"-55555554"); List<String> ps = new ArrayList<String>(); for(Partition p : partitions.getPartitions()){ ps.add(p.getID()); System.out.println(p + " size:" + (p.getHigh()-p.getLow())); } /*Thread.sleep(1000); partitions.deregister(replicaID,"0"); Thread.sleep(1000); System.out.println("---------------------------------------------"); for(Partition p : partitions.getPartitions()){ System.out.println(p); }*/ //System.err.println(partitions.getRing(1)); for(String p : ps){ partitions.deregister(replicaID,p); } } public void singal(String token, Command c) { //TODO: set only subset List<String> await = new ArrayList<String>(); String[] tokens = new String(c.getValue()).split(";"); if(tokens.length > 1){ for(int i=1;i<tokens.length;i++){ await.add(tokens[i]); } }else{ for(Partition p : getPartitions()){ await.add(p.getID()); } } logger.info("Getrange wait for partitions: " + await); wait.put(c,await); signals.put(c, new CountDownLatch(await.size())); List<Command> cmds = new ArrayList<Command>(); cmds.add(c); Message m = new Message(c.getID(),token,"",cmds); for(Partition p : getPartitions()){ for(String replica : getReplicas(p.getID())){ try { InetAddress address = InetAddress.getByName(replica); byte[] buffer = Message.toByteArray(m); DatagramPacket packet = new DatagramPacket(buffer,0,buffer.length,address,signalPort); signalSender.send(packet); } catch (IOException e) { e.printStackTrace(); } } } } public boolean waitSignal(Command c) { boolean ret = false; try { ret = signals.get(c).await(1,TimeUnit.SECONDS); signals.remove(c); } catch (InterruptedException e) { } return ret; } public void receive(Message m) { Command c = m.getCommands().get(0); if(c != null && signals.containsKey(c)){ if(wait.get(c).contains(m.getFrom())){ wait.get(c).remove(m.getFrom()); signals.get(m.getCommands().get(0)).countDown(); if(wait.get(c).isEmpty()){ wait.remove(c); } } } } }