Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.9/dist-packages/networkx/algorithms/community/quality.py: 28%
75 statements
« prev ^ index » next coverage.py v7.3.2, created at 2023-10-20 07:00 +0000
« prev ^ index » next coverage.py v7.3.2, created at 2023-10-20 07:00 +0000
1"""Functions for measuring the quality of a partition (into
2communities).
4"""
6from itertools import combinations
8import networkx as nx
9from networkx import NetworkXError
10from networkx.algorithms.community.community_utils import is_partition
11from networkx.utils.decorators import argmap
13__all__ = ["modularity", "partition_quality"]
16class NotAPartition(NetworkXError):
17 """Raised if a given collection is not a partition."""
19 def __init__(self, G, collection):
20 msg = f"{collection} is not a valid partition of the graph {G}"
21 super().__init__(msg)
24def _require_partition(G, partition):
25 """Decorator to check that a valid partition is input to a function
27 Raises :exc:`networkx.NetworkXError` if the partition is not valid.
29 This decorator should be used on functions whose first two arguments
30 are a graph and a partition of the nodes of that graph (in that
31 order)::
33 >>> @require_partition
34 ... def foo(G, partition):
35 ... print("partition is valid!")
36 ...
37 >>> G = nx.complete_graph(5)
38 >>> partition = [{0, 1}, {2, 3}, {4}]
39 >>> foo(G, partition)
40 partition is valid!
41 >>> partition = [{0}, {2, 3}, {4}]
42 >>> foo(G, partition)
43 Traceback (most recent call last):
44 ...
45 networkx.exception.NetworkXError: `partition` is not a valid partition of the nodes of G
46 >>> partition = [{0, 1}, {1, 2, 3}, {4}]
47 >>> foo(G, partition)
48 Traceback (most recent call last):
49 ...
50 networkx.exception.NetworkXError: `partition` is not a valid partition of the nodes of G
52 """
53 if is_partition(G, partition):
54 return G, partition
55 raise nx.NetworkXError("`partition` is not a valid partition of the nodes of G")
58require_partition = argmap(_require_partition, (0, 1))
61@nx._dispatch
62def intra_community_edges(G, partition):
63 """Returns the number of intra-community edges for a partition of `G`.
65 Parameters
66 ----------
67 G : NetworkX graph.
69 partition : iterable of sets of nodes
70 This must be a partition of the nodes of `G`.
72 The "intra-community edges" are those edges joining a pair of nodes
73 in the same block of the partition.
75 """
76 return sum(G.subgraph(block).size() for block in partition)
79@nx._dispatch
80def inter_community_edges(G, partition):
81 """Returns the number of inter-community edges for a partition of `G`.
82 according to the given
83 partition of the nodes of `G`.
85 Parameters
86 ----------
87 G : NetworkX graph.
89 partition : iterable of sets of nodes
90 This must be a partition of the nodes of `G`.
92 The *inter-community edges* are those edges joining a pair of nodes
93 in different blocks of the partition.
95 Implementation note: this function creates an intermediate graph
96 that may require the same amount of memory as that of `G`.
98 """
99 # Alternate implementation that does not require constructing a new
100 # graph object (but does require constructing an affiliation
101 # dictionary):
102 #
103 # aff = dict(chain.from_iterable(((v, block) for v in block)
104 # for block in partition))
105 # return sum(1 for u, v in G.edges() if aff[u] != aff[v])
106 #
107 MG = nx.MultiDiGraph if G.is_directed() else nx.MultiGraph
108 return nx.quotient_graph(G, partition, create_using=MG).size()
111@nx._dispatch
112def inter_community_non_edges(G, partition):
113 """Returns the number of inter-community non-edges according to the
114 given partition of the nodes of `G`.
116 Parameters
117 ----------
118 G : NetworkX graph.
120 partition : iterable of sets of nodes
121 This must be a partition of the nodes of `G`.
123 A *non-edge* is a pair of nodes (undirected if `G` is undirected)
124 that are not adjacent in `G`. The *inter-community non-edges* are
125 those non-edges on a pair of nodes in different blocks of the
126 partition.
128 Implementation note: this function creates two intermediate graphs,
129 which may require up to twice the amount of memory as required to
130 store `G`.
132 """
133 # Alternate implementation that does not require constructing two
134 # new graph objects (but does require constructing an affiliation
135 # dictionary):
136 #
137 # aff = dict(chain.from_iterable(((v, block) for v in block)
138 # for block in partition))
139 # return sum(1 for u, v in nx.non_edges(G) if aff[u] != aff[v])
140 #
141 return inter_community_edges(nx.complement(G), partition)
144@nx._dispatch(edge_attrs="weight")
145def modularity(G, communities, weight="weight", resolution=1):
146 r"""Returns the modularity of the given partition of the graph.
148 Modularity is defined in [1]_ as
150 .. math::
151 Q = \frac{1}{2m} \sum_{ij} \left( A_{ij} - \gamma\frac{k_ik_j}{2m}\right)
152 \delta(c_i,c_j)
154 where $m$ is the number of edges (or sum of all edge weights as in [5]_),
155 $A$ is the adjacency matrix of `G`, $k_i$ is the (weighted) degree of $i$,
156 $\gamma$ is the resolution parameter, and $\delta(c_i, c_j)$ is 1 if $i$ and
157 $j$ are in the same community else 0.
159 According to [2]_ (and verified by some algebra) this can be reduced to
161 .. math::
162 Q = \sum_{c=1}^{n}
163 \left[ \frac{L_c}{m} - \gamma\left( \frac{k_c}{2m} \right) ^2 \right]
165 where the sum iterates over all communities $c$, $m$ is the number of edges,
166 $L_c$ is the number of intra-community links for community $c$,
167 $k_c$ is the sum of degrees of the nodes in community $c$,
168 and $\gamma$ is the resolution parameter.
170 The resolution parameter sets an arbitrary tradeoff between intra-group
171 edges and inter-group edges. More complex grouping patterns can be
172 discovered by analyzing the same network with multiple values of gamma
173 and then combining the results [3]_. That said, it is very common to
174 simply use gamma=1. More on the choice of gamma is in [4]_.
176 The second formula is the one actually used in calculation of the modularity.
177 For directed graphs the second formula replaces $k_c$ with $k^{in}_c k^{out}_c$.
179 Parameters
180 ----------
181 G : NetworkX Graph
183 communities : list or iterable of set of nodes
184 These node sets must represent a partition of G's nodes.
186 weight : string or None, optional (default="weight")
187 The edge attribute that holds the numerical value used
188 as a weight. If None or an edge does not have that attribute,
189 then that edge has weight 1.
191 resolution : float (default=1)
192 If resolution is less than 1, modularity favors larger communities.
193 Greater than 1 favors smaller communities.
195 Returns
196 -------
197 Q : float
198 The modularity of the partition.
200 Raises
201 ------
202 NotAPartition
203 If `communities` is not a partition of the nodes of `G`.
205 Examples
206 --------
207 >>> G = nx.barbell_graph(3, 0)
208 >>> nx.community.modularity(G, [{0, 1, 2}, {3, 4, 5}])
209 0.35714285714285715
210 >>> nx.community.modularity(G, nx.community.label_propagation_communities(G))
211 0.35714285714285715
213 References
214 ----------
215 .. [1] M. E. J. Newman "Networks: An Introduction", page 224.
216 Oxford University Press, 2011.
217 .. [2] Clauset, Aaron, Mark EJ Newman, and Cristopher Moore.
218 "Finding community structure in very large networks."
219 Phys. Rev. E 70.6 (2004). <https://arxiv.org/abs/cond-mat/0408187>
220 .. [3] Reichardt and Bornholdt "Statistical Mechanics of Community Detection"
221 Phys. Rev. E 74, 016110, 2006. https://doi.org/10.1103/PhysRevE.74.016110
222 .. [4] M. E. J. Newman, "Equivalence between modularity optimization and
223 maximum likelihood methods for community detection"
224 Phys. Rev. E 94, 052315, 2016. https://doi.org/10.1103/PhysRevE.94.052315
225 .. [5] Blondel, V.D. et al. "Fast unfolding of communities in large
226 networks" J. Stat. Mech 10008, 1-12 (2008).
227 https://doi.org/10.1088/1742-5468/2008/10/P10008
228 """
229 if not isinstance(communities, list):
230 communities = list(communities)
231 if not is_partition(G, communities):
232 raise NotAPartition(G, communities)
234 directed = G.is_directed()
235 if directed:
236 out_degree = dict(G.out_degree(weight=weight))
237 in_degree = dict(G.in_degree(weight=weight))
238 m = sum(out_degree.values())
239 norm = 1 / m**2
240 else:
241 out_degree = in_degree = dict(G.degree(weight=weight))
242 deg_sum = sum(out_degree.values())
243 m = deg_sum / 2
244 norm = 1 / deg_sum**2
246 def community_contribution(community):
247 comm = set(community)
248 L_c = sum(wt for u, v, wt in G.edges(comm, data=weight, default=1) if v in comm)
250 out_degree_sum = sum(out_degree[u] for u in comm)
251 in_degree_sum = sum(in_degree[u] for u in comm) if directed else out_degree_sum
253 return L_c / m - resolution * out_degree_sum * in_degree_sum * norm
255 return sum(map(community_contribution, communities))
258@require_partition
259@nx._dispatch
260def partition_quality(G, partition):
261 """Returns the coverage and performance of a partition of G.
263 The *coverage* of a partition is the ratio of the number of
264 intra-community edges to the total number of edges in the graph.
266 The *performance* of a partition is the number of
267 intra-community edges plus inter-community non-edges divided by the total
268 number of potential edges.
270 This algorithm has complexity $O(C^2 + L)$ where C is the number of communities and L is the number of links.
272 Parameters
273 ----------
274 G : NetworkX graph
276 partition : sequence
277 Partition of the nodes of `G`, represented as a sequence of
278 sets of nodes (blocks). Each block of the partition represents a
279 community.
281 Returns
282 -------
283 (float, float)
284 The (coverage, performance) tuple of the partition, as defined above.
286 Raises
287 ------
288 NetworkXError
289 If `partition` is not a valid partition of the nodes of `G`.
291 Notes
292 -----
293 If `G` is a multigraph;
294 - for coverage, the multiplicity of edges is counted
295 - for performance, the result is -1 (total number of possible edges is not defined)
297 References
298 ----------
299 .. [1] Santo Fortunato.
300 "Community Detection in Graphs".
301 *Physical Reports*, Volume 486, Issue 3--5 pp. 75--174
302 <https://arxiv.org/abs/0906.0612>
303 """
305 node_community = {}
306 for i, community in enumerate(partition):
307 for node in community:
308 node_community[node] = i
310 # `performance` is not defined for multigraphs
311 if not G.is_multigraph():
312 # Iterate over the communities, quadratic, to calculate `possible_inter_community_edges`
313 possible_inter_community_edges = sum(
314 len(p1) * len(p2) for p1, p2 in combinations(partition, 2)
315 )
317 if G.is_directed():
318 possible_inter_community_edges *= 2
319 else:
320 possible_inter_community_edges = 0
322 # Compute the number of edges in the complete graph -- `n` nodes,
323 # directed or undirected, depending on `G`
324 n = len(G)
325 total_pairs = n * (n - 1)
326 if not G.is_directed():
327 total_pairs //= 2
329 intra_community_edges = 0
330 inter_community_non_edges = possible_inter_community_edges
332 # Iterate over the links to count `intra_community_edges` and `inter_community_non_edges`
333 for e in G.edges():
334 if node_community[e[0]] == node_community[e[1]]:
335 intra_community_edges += 1
336 else:
337 inter_community_non_edges -= 1
339 coverage = intra_community_edges / len(G.edges)
341 if G.is_multigraph():
342 performance = -1.0
343 else:
344 performance = (intra_community_edges + inter_community_non_edges) / total_pairs
346 return coverage, performance