package net.sf.egonet.network; import java.lang.reflect.Method; import java.util.Collection; import java.util.Collections; import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Set; import com.google.common.collect.Lists; import com.google.common.collect.Maps; import com.google.common.collect.Sets; public class Statistics<N> { Network<N> network; public Statistics(Network<N> network) { this.network = network; } /* * Density is the number of connections in the actual * network divided by the number of possible connections * for networks with that number of nodes. */ public Double density() { if(density == null) { int nodes = network.getNodes().size(); int edges = network.getEdges().size(); int possibleEdges = 0; for(int i = 0; i < nodes; i++) { // possibleEdges = additorial(nodes) = (nodes-1)+(nodes-2)+...+1+0 possibleEdges += i; } density = possibleEdges < 1 ? 0.0 : ((double) edges) / possibleEdges; } return density; } private Double density; public Set<Set<N>> cliques() { if(cliques == null) { java.util.Map<N,Set<N>> netAsMap = Maps.newHashMap(); for(N node : network.getNodes()) { netAsMap.put(node,network.connections(node)); } cliques = (Set<Set<N>>) (new clj.graph.Core().cliques(netAsMap)); } return cliques; } private Set<Set<N>> cliques = null; private void initComponents() { if(components == null) { isolates = Sets.newHashSet(); dyads = Sets.newHashSet(); components = Sets.newHashSet(); Set<N> nodes = network.getNodes(); for(N seed : nodes) { Boolean seedRepresentsNewComponent = true; for(Set<N> component : components) { if(component.contains(seed)) { seedRepresentsNewComponent = false; } } if(seedRepresentsNewComponent) { Set<N> component = new HashSet<N>(); for(N node : nodes) { if(network.distance(seed, node) != null) { component.add(node); } } components.add(Collections.unmodifiableSet(component)); if(component.size() < 2) { isolates.add(seed); } else if(component.size() < 3) { dyads.add(Collections.unmodifiableSet(component)); } } } } } private Set<N> isolates; private Set<Set<N>> dyads; private Set<Set<N>> components; public Set<N> isolates() { initComponents(); return isolates; } public Set<Set<N>> dyads() { initComponents(); return dyads; } public Set<Set<N>> components() { initComponents(); return components; } /* * For each of these properties x, define two methods: * 1) Double xCentrality(N node) * 2) Double xCentralityMaxDifference(Integer nodes) * * Where xMaxCentralityDifference gives the maximum possible * sum of centrality differences for that many nodes. That * maximum is typically realized for a star network, in which * one node connects to all others but none of the others connect * to each other. */ public static String[] centralityProperties = {"degree","closeness","betweenness","eigenvector"}; // Implement the following with reflection. See deprecated *betweenness* methods for examples. public Double centrality(String property, N node) { try { Method centralityMethod = Statistics.class.getDeclaredMethod( property+"Centrality", new Class[]{Object.class}); // Would be node.getClass() except generics erased at runtime. return (Double) centralityMethod.invoke(this, node); } catch (Exception ex) { throw new RuntimeException("Unable to determine "+property+"Centrality for "+node,ex); } } public Double centralityMean(String property) { Double total = 0.0; for(N node : network.getNodes()) { total += centrality(property,node); } return network.getNodes().size() < 1 ? 0.0 : total / network.getNodes().size(); } public Double maxCentrality(String property) { N node = maxCentralityNode(property); return node == null ? 0.0 : centrality(property,node); } public N maxCentralityNode(String property) { Double maxValue = null; N maxNode = null; for(N node : network.getNodes()) { if(maxValue == null || centrality(property,node) > maxValue) { maxValue = centrality(property,node); maxNode = node; } } return maxNode; } public Double centralization(String property) { Double maximumCentrality = maxCentrality(property); Double totalCentralityDifference = 0.0; Set<N> nodes = network.getNodes(); for(N node : nodes) { totalCentralityDifference += maximumCentrality - centrality(property,node); } Integer n = nodes.size(); return n < 3 ? 0.0 : totalCentralityDifference / centralityMaxDifference(property, n); } public static Double centralityMaxDifference(String property, Integer nodes) { try { Method centralityMethod = Statistics.class.getDeclaredMethod( property+"CentralityMaxDifference", new Class[]{Integer.class}); return (Double) centralityMethod.invoke(null, nodes); } catch (Exception ex) { throw new RuntimeException("Unable to determine "+property+"CentralityMaxDifference for "+nodes+" nodes.",ex); } } /* * Degree centrality is the number of direct connections * to a node divided by the number of possible direct * connections to a node in a network of that size. */ public Double degreeCentrality(N node) { Integer nodes = network.getNodes().size(); return nodes < 2 ? 0.0 : network.connections(node).size() * 1.0 / (nodes-1); } public static Double degreeCentralityMaxDifference(Integer nodes) { return nodes < 3 ? null : (nodes-1) * (1 - 1.0/(nodes-1)); } /* * For fully connected network, closeness is the reciprocal * of the average distance to other nodes. For disconnected * networks, it is the closeness within a component multiplied * by the portion of other nodes that are in that component. */ public Double closenessCentrality(N node) { if(! nodeToCloseness.containsKey(node)) { Integer reachable = 0; Integer totalDistance = 0; Set<N> nodes = network.getNodes(); for(N n : nodes) { Integer distance = network.distance(node, n); if(distance != null && distance > 0) { reachable++; totalDistance += distance; } } if(reachable < 1) { return 0.0; } Double averageDistance = totalDistance*1.0/reachable; nodeToCloseness.put(node, reachable / (averageDistance * (nodes.size()-1))); } return nodeToCloseness.get(node); } private Map<N,Double> nodeToCloseness = Maps.newHashMap(); public static Double closenessCentralityMaxDifference(Integer nodes) { return nodes < 3 ? null : (nodes-2) * (nodes-1) / (2*nodes - 3.0); } /* * Sum over pairs of nodes a,b (such that none of a,b,n are equal) * of the number of shortest paths from a to b that pass through * n divided by the total number of shortest paths from a to b. * Disconnected networks are addressed by choosing that 0/0 => 0. */ public Double betweennessCentrality(N node) { if(! nodeToBetweenness.containsKey(node)) { List<N> nodes = Lists.newArrayList(network.getNodes()); Double result = 0.0; for(Integer i = 0; i < nodes.size(); i++) { N node1 = nodes.get(i); for(Integer j = i+1; j < nodes.size(); j++) { N node2 = nodes.get(j); if(! (node.equals(node1) || node.equals(node2))) { result += portionOfShortestPathsBetweenAandBthroughN(node1, node2, node); } } } nodeToBetweenness.put(node, nodes.size() < 3 ? 0.0 : result * 2 / (nodes.size()-1) / (nodes.size()-2)); } return nodeToBetweenness.get(node); } private Map<N,Double> nodeToBetweenness = Maps.newHashMap(); public static Double betweennessCentralityMaxDifference(Integer nodes) { return nodes < 3 ? null : (nodes-1)*(nodes-1)*(nodes-2) / 2.0; } private Double portionOfShortestPathsBetweenAandBthroughN(N a, N b, N n) { Integer totalDistance = network.distance(a, b); Integer distance1 = network.distance(a, n); Integer distance2 = network.distance(b, n); if(totalDistance == null || distance1 == null || ! totalDistance.equals(distance1+distance2)) { return 0.0; } Integer totalPaths = numberOfShortestPaths(a,b); Integer inclusivePaths = numberOfShortestPaths(a,n)*numberOfShortestPaths(b,n); return inclusivePaths * 1.0 / totalPaths; } private Integer numberOfShortestPaths(N a, N b) { Integer distance = network.distance(a,b); if(distance == null) { return 0; } if(distance < 1) { return 1; } Integer paths = 0; for(N n : network.connections(a)) { if(network.distance(n, b) < distance) { paths += numberOfShortestPaths(n,b); } } return paths; } /* * The eigenvector centrality of a node is proportional to the sum * of the eigenvector centralities of its neighbors. I compute * the eigenvector centrality iteratively, using closeness as an * initial guess. */ public Double eigenvectorCentrality(N n) { if(eigenvectorCentralities == null) { Integer tries = (network.getNodes().size()+5)*(network.getNodes().size()+5); Map<N,Double> guess = initialEigenvectorGuess(); while(true) { Map<N,Double> nextGuess = nextEigenvectorGuess(guess); if(change(guess,nextGuess) < tinyNum || tries < 0) { eigenvectorCentralities = nextGuess; return eigenvectorCentrality(n); } guess = nextGuess; tries--; } } return eigenvectorCentralities.get(n) < Math.sqrt(tinyNum) ? 0.0 : eigenvectorCentralities.get(n); } private Map<N,Double> eigenvectorCentralities = null; public static Double eigenvectorCentralityMaxDifference(Integer nodes) { return nodes < 3 ? null : (nodes-1)*Math.sqrt(0.5) - Math.sqrt(0.5*(nodes-1)); } private Map<N,Double> nextEigenvectorGuess(Map<N,Double> guess) { Map<N,Double> results = Maps.newHashMap(); for(N node : guess.keySet()) { Double result = 0.0; for(N neighbor : network.connections(node)) { result += guess.get(neighbor); } results.put(node, result); } return normalize(results); } private Double change(Map<N,Double> vec1, Map<N,Double> vec2) { Double total = 0.0; for(N node : vec1.keySet()) { total += Math.abs(vec1.get(node) - vec2.get(node)); } return total; } private Double tinyNum = 0.0000001; private Map<N,Double> normalize(Map<N,Double> vec) { Double magnitudeSquared = 0.0; for(Double component : vec.values()) { magnitudeSquared += component * component; } Double magnitude = Math.sqrt(magnitudeSquared); Double factor = 1 / (magnitude < tinyNum ? tinyNum : magnitude); Map<N,Double> normalized = Maps.newHashMap(); for(N node : vec.keySet()) { normalized.put(node, vec.get(node)*factor); } return normalized; } private Map<N,Double> initialEigenvectorGuess() { Map<N,Double> guess = Maps.newHashMap(); for(N node : network.getNodes()) { guess.put(node, closenessCentrality(node)); } return guess; } }