/* * Copyright 2007-2010 Sun Microsystems, Inc. * * This file is part of Project Darkstar Server. * * Project Darkstar Server is free software: you can redistribute it * and/or modify it under the terms of the GNU General Public License * version 2 as published by the Free Software Foundation and * distributed hereunder to you. * * Project Darkstar Server is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see <http://www.gnu.org/licenses/>. * * -- */ package com.sun.sgs.impl.service.nodemap.affinity; import com.sun.sgs.impl.service.nodemap.affinity.graph.AffinityGraphBuilder; import com.sun.sgs.impl.service.nodemap.affinity.graph.LabelVertex; import com.sun.sgs.impl.service.nodemap.affinity.graph.WeightedEdge; import com.sun.sgs.impl.sharedutil.LoggerWrapper; import com.sun.sgs.impl.sharedutil.PropertiesWrapper; import com.sun.sgs.impl.util.NamedThreadFactory; import edu.uci.ics.jung.graph.UndirectedGraph; import java.util.ArrayList; import java.util.Collection; import java.util.Collections; import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Properties; import java.util.Random; import java.util.Set; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; import java.util.logging.Level; import java.util.logging.Logger; /** * Abstract class implementing parts of the label propagation algorithm * used by both the single node and distributed versions. * <p> * The following property is supported: * <p> * <dl style="margin-left: 1em"> * * <dt> <i>Property:</i> <code><b> * com.sun.sgs.impl.service.nodemap.affinity.numThreads * </b></code><br> * <i>Default:</i> * {@code 4} * <br> * * <dd style="padding-top: .5em">The number of threads to use while running * the algorithm. Set to {@code 1} to run single-threaded. * <p> * </dl> * The logger for the affinity group finding system is named * {@value #PROP_NAME}. * <p> * Set logging to Level.FINEST for a trace of the algorithm (very verbose * and slow). * Set logging to Level.FINER to see the final labeled graph. * Set logging to Level.FINE for any errors or unexpected conditions encountered * during the run. */ public abstract class AbstractLPA extends BasicState { /** Our base property name. */ protected static final String PROP_NAME = "com.sun.sgs.impl.service.nodemap.affinity"; /** Our logger. Note this is shared between graph builders and group * finders. */ protected static final LoggerWrapper logger = new LoggerWrapper(Logger.getLogger(PROP_NAME)); /** The property name for the number of threads to use. */ public static final String NUM_THREADS_PROPERTY = PROP_NAME + ".numThreads"; /** The default value for the number of threads to use. */ public static final int DEFAULT_NUM_THREADS = 4; /** The local node id. */ protected final long localNodeId; /** A random number generator, to break ties. */ protected final Random ran = new Random(); /** Our executor, for running tasks in parallel. */ // TBD: use taskScheduler? protected final ExecutorService executor; /** The number of threads this algorithm should use. */ protected final int numThreads; /** The number of iterations required for the last run. */ protected int iterations; /** The graph in which we're finding communities. This is a live * graph for some graph builders; we have to be able to handle changes. */ protected volatile UndirectedGraph<LabelVertex, WeightedEdge> graph; /** For now, we're only grabbing the vertices of interest at the * start of the algorithm. This could change so we update for each run, * but for now it's easiest to leave this list fixed. */ protected volatile List<LabelVertex> vertices; /** * Constructs a new instance of the label propagation algorithm. * @param nodeId the local node ID * @param properties the properties for configuring this service * * @throws IllegalArgumentException if {@code numThreads} is * less than {@code 1} * @throws Exception if any other error occurs */ public AbstractLPA(long nodeId, Properties properties) throws Exception { localNodeId = nodeId; PropertiesWrapper wrappedProps = new PropertiesWrapper(properties); numThreads = wrappedProps.getIntProperty( NUM_THREADS_PROPERTY, DEFAULT_NUM_THREADS, 1, 65535); if (numThreads > 1) { executor = Executors.newFixedThreadPool(numThreads, new NamedThreadFactory("LPA")); } else { executor = null; } logger.log(Level.CONFIG, "Creating LPA with properties:" + "\n " + NUM_THREADS_PROPERTY + "=" + numThreads); } /** * Initialize ourselves for a run of the algorithm. * @param builder the graph producer */ protected void initializeLPARun(AffinityGraphBuilder builder) { logger.log(Level.FINEST, "{0}: initializing LPA run", localNodeId); // Grab the graph and a snapshot of the vertices. // Most graph builders return a pointer to the "live" graph, // but the BipartiteGraphBuilder constructs the graph on the fly // with each call. As a result, graph cannot simply be a final field. // Additionally, getAffinityGraph should only be called ONCE per // alogorithm run, or we'll lose the labels when the graph is rebuilt. graph = builder.getAffinityGraph(); assert (graph != null); // The set of vertices we iterate over is fixed (e.g. we don't // consider new vertices as we process this graph). If processing // takes a long time, or if we use a more dynamic work queue, we'll // want to revisit this. // Note that there is no guarantee that the set of vertices represents // different identities on each node (we could be unlucky and have // an identity move to a new node while each node takes this snapshot). // There is no guarantee that, in a given set of affinity groups, each // identity exists in only one group. Collection<LabelVertex> graphVertices = graph.getVertices(); if (graphVertices == null) { vertices = new ArrayList<LabelVertex>(); } else { vertices = new ArrayList<LabelVertex>(graphVertices); } // Initialize algorithm-specific info doOtherInitialization(); logger.log(Level.FINEST, "{0}: finished initializing LPA run", localNodeId); } /** * Perform any algorithm specific initialization for an algorithm run. */ protected abstract void doOtherInitialization(); /** * Sets the label of {@code vertex} to the label used most frequently * by {@code vertex}'s neighbors. Returns {@code true} if {@code vertex}'s * label changed. * * @param vertex a vertex in the graph * @param self {@code true} if we should pick our own label if it is * in the set of highest labels * @return {@code true} if {@code vertex}'s label is changed, {@code false} * if it is not changed */ protected boolean setMostFrequentLabel(LabelVertex vertex, boolean self) { List<Integer> highestSet = getMaxCountLabels(vertex); // If we got back an empty set, no neighbors were found and we're done. if (highestSet.isEmpty()) { return false; } // If our current label is in the set of highest labels, we're done. if (self && highestSet.contains(vertex.getLabel())) { return false; } // Otherwise, choose a label at random vertex.setLabel(highestSet.get(ran.nextInt(highestSet.size()))); logger.log(Level.FINEST, "{0} : Returning true: vertex is now {1}", localNodeId, vertex); return true; } /** * Given a graph, and a vertex within that graph, find the set of labels * with the highest count amongst {@code vertex}'s neighbors. * * @param vertex the vertex whose neighbors labels will be examined * @return an unmodifiable list of labels with the highest counts */ private List<Integer> getMaxCountLabels(LabelVertex vertex) { // Get the neighbor edges. Collection<WeightedEdge> edges = graph.getIncidentEdges(vertex); if (edges == null) { // JUNG returns null if vertex is not present; this can occur // if our graph was pruned while the algorithm is running return Collections.emptyList(); } // A map of labels -> counts, counting how many // of our neighbors use a particular label. Map<Integer, Long> labelMap = new HashMap<Integer, Long>(edges.size()); // Put our neighbors labels into the label map. We assume there // are no parallel edges, but edges will have weights. // // As we iterate, calculate the maximum count of any particular label // for use later long maxCount = -1L; StringBuilder logSB = new StringBuilder(); // for logging for (WeightedEdge edge : edges) { LabelVertex neighbor = graph.getOpposite(vertex, edge); Integer label = neighbor.getLabel(); Long value = labelMap.containsKey(label) ? labelMap.get(label) : 0; if (logger.isLoggable(Level.FINEST)) { logSB.append(neighbor + "(" + edge.getWeight() + ") "); } value += edge.getWeight(); labelMap.put(label, value); if (value > maxCount) { maxCount = value; } } // Allow algorithms a shot at updating the labelMap. In particular, // the distributed algorithm needs to update information based on // cache eviction data. long maxOtherCount = doOtherNeighbors(vertex, labelMap, logSB); if (maxOtherCount > maxCount) { maxCount = maxOtherCount; } if (logger.isLoggable(Level.FINEST)) { logger.log(Level.FINEST, "{0}: Neighbors of {1} : {2}", localNodeId, vertex, logSB.toString()); } // Find the set of labels used the max number of times List<Integer> maxLabelList = new ArrayList<Integer>(); for (Map.Entry<Integer, Long> entry : labelMap.entrySet()) { if (entry.getValue() == maxCount) { maxLabelList.add(entry.getKey()); } } return Collections.unmodifiableList(maxLabelList); } /** * Update the label map with any other neighbors known to a * particular algorithm. * @param vertex the vertex whose neighbors labels will be examined * @param labelMap a map of labels to counts of neighbors using that label * @param logSB a StringBuilder for gathering log info about neighbors * @return the highest number of times a particular label is used among the * other neighbors, or {@code -1L} if there are no other neighbors. */ protected abstract long doOtherNeighbors(LabelVertex vertex, Map<Integer, Long> labelMap, StringBuilder logSB); /** * Return the affinity groups found within the given vertices, putting all * vertices with the same label in a group. The affinity group's id * will be the common label of the group. As an optimization, this method * can reinitialize the labels in the graph to their initial setting. Each * affinity group in the returned set will have the same generation number, * which will be {@code gen}. * <p> * @param vertices the vertices that we gather groups from * @param reinitialize if {@code true}, reinitialize the labels * @param gen the generation number * @return an unmodifiable set of affinity groups found in the graph */ protected static Set<AffinityGroup> gatherGroups( List<LabelVertex> vertices, boolean reinitialize, long gen) { assert (vertices != null); // All nodes with the same label are in the same community. Map<Integer, AffinitySet> groupMap = new HashMap<Integer, AffinitySet>(); for (LabelVertex vertex : vertices) { int label = vertex.getLabel(); AffinitySet ag = groupMap.get(label); if (ag == null) { ag = new AffinitySet(label, gen, vertex.getIdentity()); groupMap.put(label, ag); } else { ag.addIdentity(vertex.getIdentity()); } if (reinitialize) { // At the end of an algorithm run, we save a pass through // all vertices in the graph if we reinitialize the vertices // while we gather the final groups. vertex.initializeLabel(); } } return Collections.unmodifiableSet( new HashSet<AffinityGroup>(groupMap.values())); } }