/* * CCVisu is a tool for visual graph clustering * and general force-directed graph layout. * This file is part of CCVisu. * * Copyright (C) 2005-2012 Dirk Beyer * * CCVisu is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public License * as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * CCVisu is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public License * along with CCVisu; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA * * Please find the GNU Lesser General Public License in file * license_lgpl.txt or http://www.gnu.org/licenses/lgpl.txt * * Dirk Beyer (firstname.lastname@uni-passau.de) * University of Passau, Bavaria, Germany */ package org.sosy_lab.ccvisu.measuring; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Map.Entry; import org.apache.commons.math.util.MathUtils; import org.sosy_lab.ccvisu.graph.GraphData; import org.sosy_lab.ccvisu.graph.GraphEdge; import org.sosy_lab.ccvisu.graph.GraphVertex; import org.sosy_lab.ccvisu.graph.Group; import org.sosy_lab.ccvisu.graph.Group.GroupKind; public class ClusterQuality { /** * Calculate the cut of a group/cluster. * @param group The group/cluster for that the cut should be calculated. * @return The calculated cut. */ public static int cutOfGroup (Group group, GraphData graphData) { int cut = 0; List<GraphVertex> groupNodes = group.getNodes(); for (GraphVertex vertex : group.getNodes()) { for (GraphEdge edge : graphData.getEdges()) { if (edge.getSource() == vertex) { if (!groupNodes.contains(edge.getTarget())) { cut++; } } if (edge.getTarget() == vertex) { if (!groupNodes.contains(edge.getSource())) { cut++; } } } } return cut; } /** * Cut of the graphs clustering. * @return cut. */ public static int cutOfClustering(GraphData graph) { int cut = 0; for (int i = graph.getGroups().size()-1; i > -1; i--) { Group group = (Group) graph.getGroups().get(i); if (group.getKind() == GroupKind.CLUSTER) { cut += ClusterQuality.cutOfGroup(group, graph); } } return new Float (cut / (double) 2).intValue(); } /** * Calculate the cut between two clusters. * @param graphData * @param group1 * @param group2 * @return */ public static int cutBetweenClusters (GraphData graphData, Group group1, Group group2) { int cut = 0; List<GraphVertex> group1Nodes = group1.getNodes(); List<GraphVertex> group2Nodes = group2.getNodes(); for (GraphVertex group1Vertex : group1Nodes) { for (GraphEdge edge : graphData.getEdges()) { if (edge.getSource() == group1Vertex) { if (group2Nodes.contains(edge.getTarget())) { cut++; } } if (edge.getTarget() == group1Vertex) { if (group2Nodes.contains(edge.getSource())) { cut++; } } } } return cut; } /** * Calculate the Modularity measure as described by Newman and Girvan 2004 in * "Finding and evaluating community structure in network" * more on Modularity can be found in Brandes et al. 2007 in * "On Modularity Clustering" * @param graph * @return */ public static double modularityOfClustering (GraphData graph) { double result = 0.0d; Map<Group, ClusterMeasures> clusterMeasures = ClusterQuality.calulateClusterMeasures(graph); double edgesOfGraph = graph.getEdges().size(); for (Entry<Group, ClusterMeasures> entry : clusterMeasures.entrySet()) { Group group = entry.getKey(); ClusterMeasures measures = entry.getValue(); if (group.getKind() == GroupKind.CLUSTER) { double q1 = measures.countOfIntraEdges / edgesOfGraph; double q2 = measures.degreeOfCluster / (2.0d * edgesOfGraph); result += (q1 - (q2 * q2)); } } return result; } public static class ClusterMeasures { double sumOfInterEdgeWeight = 0.d; // between different clusters double sumOfIntraEdgeWeight = 0.d; // inside a cluster int countOfIntraEdges = 0; // inside a cluster int countOfInterEdges = 0; // between different clusters int degreeOfCluster = 0; // = deg(V_x), where V_x is the set of Nodes in the Cluster. double clusterFactor = 0.d; // as described by Mitchel and Mancoridis 2002 public int cutOfCluster() { return this.countOfInterEdges; } } /** * @param graph * @return MQ */ public static Map<Group, ClusterMeasures> calulateClusterMeasures (GraphData graph) { HashMap<Group, ClusterMeasures> result = new HashMap<Group, ClusterMeasures>(); for (GraphEdge edge : graph.getEdges()) { for (int i = graph.getGroups().size()-1; i > -1; i--) { Group group = (Group) graph.getGroups().get(i); if (group.getKind() == GroupKind.CLUSTER) { ClusterMeasures clusterMeasures = result.get(group); if (clusterMeasures == null) { clusterMeasures = new ClusterMeasures(); result.put(group, clusterMeasures); } List<GraphVertex> groupNodes = group.getNodes(); if (edge.getSource() != edge.getTarget()) { boolean containsSource = groupNodes.contains(edge.getSource()); boolean containsTarget = groupNodes.contains(edge.getTarget()); double edgeWeight = edge.getWeight(); if (edgeWeight == 0) { edgeWeight = 1; } if (containsSource && containsTarget) { clusterMeasures.countOfIntraEdges +=1; clusterMeasures.sumOfIntraEdgeWeight += edgeWeight; } else if (containsTarget || containsSource) { clusterMeasures.countOfInterEdges += 1; clusterMeasures.sumOfInterEdgeWeight += edgeWeight; } } } } } for (int i = graph.getGroups().size()-1; i > -1; i--) { Group group = (Group) graph.getGroups().get(i); if (group.getKind() == GroupKind.CLUSTER) { ClusterMeasures clusterMeasures = result.get(group); double sumWeightInside = clusterMeasures.sumOfIntraEdgeWeight; double sumWeightBetween = clusterMeasures.sumOfInterEdgeWeight; // Cluster-Factor as described by Mitchel and Mancoridis 2002 clusterMeasures.clusterFactor = (2 * sumWeightInside) / ((2 * sumWeightInside) + sumWeightBetween); for (GraphVertex vertex : group.getNodes()) { clusterMeasures.degreeOfCluster += vertex.getDegree(); } } } return result; } /** * Calculate the edge-normalized cut for the graph by summing up the * edge-normalied cuts of the included clusters. The edge-normalized cut is described * in by Noack 2004 "Energy-Based CLustering of Gaphs with Nonuniform Degrees" * @param graphData * @return */ public static double edgeNormalizedCutOfClustering (GraphData graphData) { double result = 0; int degreeOfGraph = 0; Map<Group, ClusterMeasures> clusterMeasures = ClusterQuality.calulateClusterMeasures(graphData); for (GraphVertex vertex : graphData.getVertices()) { degreeOfGraph += vertex.getDegree(); } for (Entry<Group, ClusterMeasures> entry : clusterMeasures.entrySet()) { Group group = entry.getKey(); ClusterMeasures measures = entry.getValue(); if (group.getKind() == GroupKind.CLUSTER) { int divisor = measures.degreeOfCluster * (degreeOfGraph - measures.degreeOfCluster); double edgeNormaliedCutOfGroup = 0; if (divisor > 0) { edgeNormaliedCutOfGroup = measures.countOfInterEdges / (double) divisor; } result += edgeNormaliedCutOfGroup; } } return result; } /** * Calculate the edge-normalized cut between two clusters. * The edge-normalized cut is described in by Noack 2004 * "Energy-Based CLustering of Graphs with Nonuniform Degrees" */ public static double edgeNormalizedCutBetween (GraphData graphDate, Group group1, Group group2) { Map<Group, ClusterMeasures> clusterMeasures = ClusterQuality.calulateClusterMeasures(graphDate); ClusterMeasures group1Measures = clusterMeasures.get(group1); ClusterMeasures group2Measures = clusterMeasures.get(group2); double cutBetween = ClusterQuality.cutBetweenClusters(graphDate, group1, group2); double divisor = group1Measures.degreeOfCluster * group2Measures.degreeOfCluster; if (divisor > 0) { return cutBetween / divisor; } else { return 0; } } public static double edgeNormalizedCutOfClusteringV2 (GraphData graphData) { double result = 0; for (int i = 0; i < graphData.getGroups().size(); i++) { Group groupA = (Group) graphData.getGroups().get(i); if (groupA.getKind() == GroupKind.CLUSTER) { for (int j = i+1; j < graphData.getGroups().size(); j++) { Group groupB = (Group) graphData.getGroups().get(j); if (groupB.getKind() == GroupKind.CLUSTER) { double encBetweenAandB = ClusterQuality.edgeNormalizedCutBetween(graphData, groupA, groupB); result += encBetweenAandB; } } } } return result; } /** * Calculate the density measure as described by Delling et al. 2007 in * "How to Evaluate Clustering Techniques" * @param graphData * @return Density */ public static double densityOfGraphClustering (GraphData graphData) { double intraClusterDensity = 0; double interClusterSparsity = 0; int graphInterClusterEdgeCount = 0; long graphBinCoeffSum = 0; Map<Group, ClusterMeasures> clusterMeasures = ClusterQuality.calulateClusterMeasures (graphData); for (Entry<Group, ClusterMeasures> entry : clusterMeasures.entrySet()) { Group group = entry.getKey(); ClusterMeasures measures = entry.getValue(); if (group.getKind() == GroupKind.CLUSTER) { long clusterBinCoeff = MathUtils.binomialCoefficient(group.getNodes().size(), 2); graphInterClusterEdgeCount += measures.countOfInterEdges; intraClusterDensity += measures.countOfIntraEdges / (double) clusterBinCoeff; graphBinCoeffSum += clusterBinCoeff; } } intraClusterDensity = intraClusterDensity / graphData.getVertices().size(); interClusterSparsity = 1 - (graphInterClusterEdgeCount / ((double) (MathUtils.binomialCoefficient(graphData.getVertices().size(), 2) - graphBinCoeffSum))); return (intraClusterDensity / 2) + (interClusterSparsity / 2); } /** * Calculate the modularization quality as described by Mitchel and Mancoridis 2002 in * "Using Heuristic Search Techniques to Extract * Design Abstractions from Source Code" */ public static double modularizationQualityOfGraph (GraphData graphData) { double result = 0; Map<Group, ClusterMeasures> clusterMeasures = ClusterQuality.calulateClusterMeasures (graphData); for (Entry<Group, ClusterMeasures> entry : clusterMeasures.entrySet()) { Group group = entry.getKey(); ClusterMeasures measures = entry.getValue(); if (group.getKind() == GroupKind.CLUSTER) { result += measures.clusterFactor; } } return result; } }