Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.9/dist-packages/networkx/algorithms/cluster.py: 17%
155 statements
« prev ^ index » next coverage.py v7.3.2, created at 2023-10-20 07:00 +0000
« prev ^ index » next coverage.py v7.3.2, created at 2023-10-20 07:00 +0000
1"""Algorithms to characterize the number of triangles in a graph."""
3from collections import Counter
4from itertools import chain, combinations
6import networkx as nx
7from networkx.utils import not_implemented_for
9__all__ = [
10 "triangles",
11 "average_clustering",
12 "clustering",
13 "transitivity",
14 "square_clustering",
15 "generalized_degree",
16]
19@not_implemented_for("directed")
20@nx._dispatch
21def triangles(G, nodes=None):
22 """Compute the number of triangles.
24 Finds the number of triangles that include a node as one vertex.
26 Parameters
27 ----------
28 G : graph
29 A networkx graph
31 nodes : node, iterable of nodes, or None (default=None)
32 If a singleton node, return the number of triangles for that node.
33 If an iterable, compute the number of triangles for each of those nodes.
34 If `None` (the default) compute the number of triangles for all nodes in `G`.
36 Returns
37 -------
38 out : dict or int
39 If `nodes` is a container of nodes, returns number of triangles keyed by node (dict).
40 If `nodes` is a specific node, returns number of triangles for the node (int).
42 Examples
43 --------
44 >>> G = nx.complete_graph(5)
45 >>> print(nx.triangles(G, 0))
46 6
47 >>> print(nx.triangles(G))
48 {0: 6, 1: 6, 2: 6, 3: 6, 4: 6}
49 >>> print(list(nx.triangles(G, [0, 1]).values()))
50 [6, 6]
52 Notes
53 -----
54 Self loops are ignored.
56 """
57 if nodes is not None:
58 # If `nodes` represents a single node, return only its number of triangles
59 if nodes in G:
60 return next(_triangles_and_degree_iter(G, nodes))[2] // 2
62 # if `nodes` is a container of nodes, then return a
63 # dictionary mapping node to number of triangles.
64 return {v: t // 2 for v, d, t, _ in _triangles_and_degree_iter(G, nodes)}
66 # if nodes is None, then compute triangles for the complete graph
68 # dict used to avoid visiting the same nodes twice
69 # this allows calculating/counting each triangle only once
70 later_neighbors = {}
72 # iterate over the nodes in a graph
73 for node, neighbors in G.adjacency():
74 later_neighbors[node] = {
75 n for n in neighbors if n not in later_neighbors and n is not node
76 }
78 # instantiate Counter for each node to include isolated nodes
79 # add 1 to the count if a nodes neighbor's neighbor is also a neighbor
80 triangle_counts = Counter(dict.fromkeys(G, 0))
81 for node1, neighbors in later_neighbors.items():
82 for node2 in neighbors:
83 third_nodes = neighbors & later_neighbors[node2]
84 m = len(third_nodes)
85 triangle_counts[node1] += m
86 triangle_counts[node2] += m
87 triangle_counts.update(third_nodes)
89 return dict(triangle_counts)
92@not_implemented_for("multigraph")
93def _triangles_and_degree_iter(G, nodes=None):
94 """Return an iterator of (node, degree, triangles, generalized degree).
96 This double counts triangles so you may want to divide by 2.
97 See degree(), triangles() and generalized_degree() for definitions
98 and details.
100 """
101 if nodes is None:
102 nodes_nbrs = G.adj.items()
103 else:
104 nodes_nbrs = ((n, G[n]) for n in G.nbunch_iter(nodes))
106 for v, v_nbrs in nodes_nbrs:
107 vs = set(v_nbrs) - {v}
108 gen_degree = Counter(len(vs & (set(G[w]) - {w})) for w in vs)
109 ntriangles = sum(k * val for k, val in gen_degree.items())
110 yield (v, len(vs), ntriangles, gen_degree)
113@not_implemented_for("multigraph")
114def _weighted_triangles_and_degree_iter(G, nodes=None, weight="weight"):
115 """Return an iterator of (node, degree, weighted_triangles).
117 Used for weighted clustering.
118 Note: this returns the geometric average weight of edges in the triangle.
119 Also, each triangle is counted twice (each direction).
120 So you may want to divide by 2.
122 """
123 import numpy as np
125 if weight is None or G.number_of_edges() == 0:
126 max_weight = 1
127 else:
128 max_weight = max(d.get(weight, 1) for u, v, d in G.edges(data=True))
129 if nodes is None:
130 nodes_nbrs = G.adj.items()
131 else:
132 nodes_nbrs = ((n, G[n]) for n in G.nbunch_iter(nodes))
134 def wt(u, v):
135 return G[u][v].get(weight, 1) / max_weight
137 for i, nbrs in nodes_nbrs:
138 inbrs = set(nbrs) - {i}
139 weighted_triangles = 0
140 seen = set()
141 for j in inbrs:
142 seen.add(j)
143 # This avoids counting twice -- we double at the end.
144 jnbrs = set(G[j]) - seen
145 # Only compute the edge weight once, before the inner inner
146 # loop.
147 wij = wt(i, j)
148 weighted_triangles += sum(
149 np.cbrt([(wij * wt(j, k) * wt(k, i)) for k in inbrs & jnbrs])
150 )
151 yield (i, len(inbrs), 2 * weighted_triangles)
154@not_implemented_for("multigraph")
155def _directed_triangles_and_degree_iter(G, nodes=None):
156 """Return an iterator of
157 (node, total_degree, reciprocal_degree, directed_triangles).
159 Used for directed clustering.
160 Note that unlike `_triangles_and_degree_iter()`, this function counts
161 directed triangles so does not count triangles twice.
163 """
164 nodes_nbrs = ((n, G._pred[n], G._succ[n]) for n in G.nbunch_iter(nodes))
166 for i, preds, succs in nodes_nbrs:
167 ipreds = set(preds) - {i}
168 isuccs = set(succs) - {i}
170 directed_triangles = 0
171 for j in chain(ipreds, isuccs):
172 jpreds = set(G._pred[j]) - {j}
173 jsuccs = set(G._succ[j]) - {j}
174 directed_triangles += sum(
175 1
176 for k in chain(
177 (ipreds & jpreds),
178 (ipreds & jsuccs),
179 (isuccs & jpreds),
180 (isuccs & jsuccs),
181 )
182 )
183 dtotal = len(ipreds) + len(isuccs)
184 dbidirectional = len(ipreds & isuccs)
185 yield (i, dtotal, dbidirectional, directed_triangles)
188@not_implemented_for("multigraph")
189def _directed_weighted_triangles_and_degree_iter(G, nodes=None, weight="weight"):
190 """Return an iterator of
191 (node, total_degree, reciprocal_degree, directed_weighted_triangles).
193 Used for directed weighted clustering.
194 Note that unlike `_weighted_triangles_and_degree_iter()`, this function counts
195 directed triangles so does not count triangles twice.
197 """
198 import numpy as np
200 if weight is None or G.number_of_edges() == 0:
201 max_weight = 1
202 else:
203 max_weight = max(d.get(weight, 1) for u, v, d in G.edges(data=True))
205 nodes_nbrs = ((n, G._pred[n], G._succ[n]) for n in G.nbunch_iter(nodes))
207 def wt(u, v):
208 return G[u][v].get(weight, 1) / max_weight
210 for i, preds, succs in nodes_nbrs:
211 ipreds = set(preds) - {i}
212 isuccs = set(succs) - {i}
214 directed_triangles = 0
215 for j in ipreds:
216 jpreds = set(G._pred[j]) - {j}
217 jsuccs = set(G._succ[j]) - {j}
218 directed_triangles += sum(
219 np.cbrt([(wt(j, i) * wt(k, i) * wt(k, j)) for k in ipreds & jpreds])
220 )
221 directed_triangles += sum(
222 np.cbrt([(wt(j, i) * wt(k, i) * wt(j, k)) for k in ipreds & jsuccs])
223 )
224 directed_triangles += sum(
225 np.cbrt([(wt(j, i) * wt(i, k) * wt(k, j)) for k in isuccs & jpreds])
226 )
227 directed_triangles += sum(
228 np.cbrt([(wt(j, i) * wt(i, k) * wt(j, k)) for k in isuccs & jsuccs])
229 )
231 for j in isuccs:
232 jpreds = set(G._pred[j]) - {j}
233 jsuccs = set(G._succ[j]) - {j}
234 directed_triangles += sum(
235 np.cbrt([(wt(i, j) * wt(k, i) * wt(k, j)) for k in ipreds & jpreds])
236 )
237 directed_triangles += sum(
238 np.cbrt([(wt(i, j) * wt(k, i) * wt(j, k)) for k in ipreds & jsuccs])
239 )
240 directed_triangles += sum(
241 np.cbrt([(wt(i, j) * wt(i, k) * wt(k, j)) for k in isuccs & jpreds])
242 )
243 directed_triangles += sum(
244 np.cbrt([(wt(i, j) * wt(i, k) * wt(j, k)) for k in isuccs & jsuccs])
245 )
247 dtotal = len(ipreds) + len(isuccs)
248 dbidirectional = len(ipreds & isuccs)
249 yield (i, dtotal, dbidirectional, directed_triangles)
252@nx._dispatch(edge_attrs="weight")
253def average_clustering(G, nodes=None, weight=None, count_zeros=True):
254 r"""Compute the average clustering coefficient for the graph G.
256 The clustering coefficient for the graph is the average,
258 .. math::
260 C = \frac{1}{n}\sum_{v \in G} c_v,
262 where :math:`n` is the number of nodes in `G`.
264 Parameters
265 ----------
266 G : graph
268 nodes : container of nodes, optional (default=all nodes in G)
269 Compute average clustering for nodes in this container.
271 weight : string or None, optional (default=None)
272 The edge attribute that holds the numerical value used as a weight.
273 If None, then each edge has weight 1.
275 count_zeros : bool
276 If False include only the nodes with nonzero clustering in the average.
278 Returns
279 -------
280 avg : float
281 Average clustering
283 Examples
284 --------
285 >>> G = nx.complete_graph(5)
286 >>> print(nx.average_clustering(G))
287 1.0
289 Notes
290 -----
291 This is a space saving routine; it might be faster
292 to use the clustering function to get a list and then take the average.
294 Self loops are ignored.
296 References
297 ----------
298 .. [1] Generalizations of the clustering coefficient to weighted
299 complex networks by J. Saramäki, M. Kivelä, J.-P. Onnela,
300 K. Kaski, and J. Kertész, Physical Review E, 75 027105 (2007).
301 http://jponnela.com/web_documents/a9.pdf
302 .. [2] Marcus Kaiser, Mean clustering coefficients: the role of isolated
303 nodes and leafs on clustering measures for small-world networks.
304 https://arxiv.org/abs/0802.2512
305 """
306 c = clustering(G, nodes, weight=weight).values()
307 if not count_zeros:
308 c = [v for v in c if abs(v) > 0]
309 return sum(c) / len(c)
312@nx._dispatch(edge_attrs="weight")
313def clustering(G, nodes=None, weight=None):
314 r"""Compute the clustering coefficient for nodes.
316 For unweighted graphs, the clustering of a node :math:`u`
317 is the fraction of possible triangles through that node that exist,
319 .. math::
321 c_u = \frac{2 T(u)}{deg(u)(deg(u)-1)},
323 where :math:`T(u)` is the number of triangles through node :math:`u` and
324 :math:`deg(u)` is the degree of :math:`u`.
326 For weighted graphs, there are several ways to define clustering [1]_.
327 the one used here is defined
328 as the geometric average of the subgraph edge weights [2]_,
330 .. math::
332 c_u = \frac{1}{deg(u)(deg(u)-1))}
333 \sum_{vw} (\hat{w}_{uv} \hat{w}_{uw} \hat{w}_{vw})^{1/3}.
335 The edge weights :math:`\hat{w}_{uv}` are normalized by the maximum weight
336 in the network :math:`\hat{w}_{uv} = w_{uv}/\max(w)`.
338 The value of :math:`c_u` is assigned to 0 if :math:`deg(u) < 2`.
340 Additionally, this weighted definition has been generalized to support negative edge weights [3]_.
342 For directed graphs, the clustering is similarly defined as the fraction
343 of all possible directed triangles or geometric average of the subgraph
344 edge weights for unweighted and weighted directed graph respectively [4]_.
346 .. math::
348 c_u = \frac{T(u)}{2(deg^{tot}(u)(deg^{tot}(u)-1) - 2deg^{\leftrightarrow}(u))},
350 where :math:`T(u)` is the number of directed triangles through node
351 :math:`u`, :math:`deg^{tot}(u)` is the sum of in degree and out degree of
352 :math:`u` and :math:`deg^{\leftrightarrow}(u)` is the reciprocal degree of
353 :math:`u`.
356 Parameters
357 ----------
358 G : graph
360 nodes : node, iterable of nodes, or None (default=None)
361 If a singleton node, return the number of triangles for that node.
362 If an iterable, compute the number of triangles for each of those nodes.
363 If `None` (the default) compute the number of triangles for all nodes in `G`.
365 weight : string or None, optional (default=None)
366 The edge attribute that holds the numerical value used as a weight.
367 If None, then each edge has weight 1.
369 Returns
370 -------
371 out : float, or dictionary
372 Clustering coefficient at specified nodes
374 Examples
375 --------
376 >>> G = nx.complete_graph(5)
377 >>> print(nx.clustering(G, 0))
378 1.0
379 >>> print(nx.clustering(G))
380 {0: 1.0, 1: 1.0, 2: 1.0, 3: 1.0, 4: 1.0}
382 Notes
383 -----
384 Self loops are ignored.
386 References
387 ----------
388 .. [1] Generalizations of the clustering coefficient to weighted
389 complex networks by J. Saramäki, M. Kivelä, J.-P. Onnela,
390 K. Kaski, and J. Kertész, Physical Review E, 75 027105 (2007).
391 http://jponnela.com/web_documents/a9.pdf
392 .. [2] Intensity and coherence of motifs in weighted complex
393 networks by J. P. Onnela, J. Saramäki, J. Kertész, and K. Kaski,
394 Physical Review E, 71(6), 065103 (2005).
395 .. [3] Generalization of Clustering Coefficients to Signed Correlation Networks
396 by G. Costantini and M. Perugini, PloS one, 9(2), e88669 (2014).
397 .. [4] Clustering in complex directed networks by G. Fagiolo,
398 Physical Review E, 76(2), 026107 (2007).
399 """
400 if G.is_directed():
401 if weight is not None:
402 td_iter = _directed_weighted_triangles_and_degree_iter(G, nodes, weight)
403 clusterc = {
404 v: 0 if t == 0 else t / ((dt * (dt - 1) - 2 * db) * 2)
405 for v, dt, db, t in td_iter
406 }
407 else:
408 td_iter = _directed_triangles_and_degree_iter(G, nodes)
409 clusterc = {
410 v: 0 if t == 0 else t / ((dt * (dt - 1) - 2 * db) * 2)
411 for v, dt, db, t in td_iter
412 }
413 else:
414 # The formula 2*T/(d*(d-1)) from docs is t/(d*(d-1)) here b/c t==2*T
415 if weight is not None:
416 td_iter = _weighted_triangles_and_degree_iter(G, nodes, weight)
417 clusterc = {v: 0 if t == 0 else t / (d * (d - 1)) for v, d, t in td_iter}
418 else:
419 td_iter = _triangles_and_degree_iter(G, nodes)
420 clusterc = {v: 0 if t == 0 else t / (d * (d - 1)) for v, d, t, _ in td_iter}
421 if nodes in G:
422 # Return the value of the sole entry in the dictionary.
423 return clusterc[nodes]
424 return clusterc
427@nx._dispatch
428def transitivity(G):
429 r"""Compute graph transitivity, the fraction of all possible triangles
430 present in G.
432 Possible triangles are identified by the number of "triads"
433 (two edges with a shared vertex).
435 The transitivity is
437 .. math::
439 T = 3\frac{\#triangles}{\#triads}.
441 Parameters
442 ----------
443 G : graph
445 Returns
446 -------
447 out : float
448 Transitivity
450 Examples
451 --------
452 >>> G = nx.complete_graph(5)
453 >>> print(nx.transitivity(G))
454 1.0
455 """
456 triangles_contri = [
457 (t, d * (d - 1)) for v, d, t, _ in _triangles_and_degree_iter(G)
458 ]
459 # If the graph is empty
460 if len(triangles_contri) == 0:
461 return 0
462 triangles, contri = map(sum, zip(*triangles_contri))
463 return 0 if triangles == 0 else triangles / contri
466@nx._dispatch
467def square_clustering(G, nodes=None):
468 r"""Compute the squares clustering coefficient for nodes.
470 For each node return the fraction of possible squares that exist at
471 the node [1]_
473 .. math::
474 C_4(v) = \frac{ \sum_{u=1}^{k_v}
475 \sum_{w=u+1}^{k_v} q_v(u,w) }{ \sum_{u=1}^{k_v}
476 \sum_{w=u+1}^{k_v} [a_v(u,w) + q_v(u,w)]},
478 where :math:`q_v(u,w)` are the number of common neighbors of :math:`u` and
479 :math:`w` other than :math:`v` (ie squares), and :math:`a_v(u,w) = (k_u -
480 (1+q_v(u,w)+\theta_{uv})) + (k_w - (1+q_v(u,w)+\theta_{uw}))`, where
481 :math:`\theta_{uw} = 1` if :math:`u` and :math:`w` are connected and 0
482 otherwise. [2]_
484 Parameters
485 ----------
486 G : graph
488 nodes : container of nodes, optional (default=all nodes in G)
489 Compute clustering for nodes in this container.
491 Returns
492 -------
493 c4 : dictionary
494 A dictionary keyed by node with the square clustering coefficient value.
496 Examples
497 --------
498 >>> G = nx.complete_graph(5)
499 >>> print(nx.square_clustering(G, 0))
500 1.0
501 >>> print(nx.square_clustering(G))
502 {0: 1.0, 1: 1.0, 2: 1.0, 3: 1.0, 4: 1.0}
504 Notes
505 -----
506 While :math:`C_3(v)` (triangle clustering) gives the probability that
507 two neighbors of node v are connected with each other, :math:`C_4(v)` is
508 the probability that two neighbors of node v share a common
509 neighbor different from v. This algorithm can be applied to both
510 bipartite and unipartite networks.
512 References
513 ----------
514 .. [1] Pedro G. Lind, Marta C. González, and Hans J. Herrmann. 2005
515 Cycles and clustering in bipartite networks.
516 Physical Review E (72) 056127.
517 .. [2] Zhang, Peng et al. Clustering Coefficient and Community Structure of
518 Bipartite Networks. Physica A: Statistical Mechanics and its Applications 387.27 (2008): 6869–6875.
519 https://arxiv.org/abs/0710.0117v1
520 """
521 if nodes is None:
522 node_iter = G
523 else:
524 node_iter = G.nbunch_iter(nodes)
525 clustering = {}
526 for v in node_iter:
527 clustering[v] = 0
528 potential = 0
529 for u, w in combinations(G[v], 2):
530 squares = len((set(G[u]) & set(G[w])) - {v})
531 clustering[v] += squares
532 degm = squares + 1
533 if w in G[u]:
534 degm += 1
535 potential += (len(G[u]) - degm) + (len(G[w]) - degm) + squares
536 if potential > 0:
537 clustering[v] /= potential
538 if nodes in G:
539 # Return the value of the sole entry in the dictionary.
540 return clustering[nodes]
541 return clustering
544@not_implemented_for("directed")
545@nx._dispatch
546def generalized_degree(G, nodes=None):
547 r"""Compute the generalized degree for nodes.
549 For each node, the generalized degree shows how many edges of given
550 triangle multiplicity the node is connected to. The triangle multiplicity
551 of an edge is the number of triangles an edge participates in. The
552 generalized degree of node :math:`i` can be written as a vector
553 :math:`\mathbf{k}_i=(k_i^{(0)}, \dotsc, k_i^{(N-2)})` where
554 :math:`k_i^{(j)}` is the number of edges attached to node :math:`i` that
555 participate in :math:`j` triangles.
557 Parameters
558 ----------
559 G : graph
561 nodes : container of nodes, optional (default=all nodes in G)
562 Compute the generalized degree for nodes in this container.
564 Returns
565 -------
566 out : Counter, or dictionary of Counters
567 Generalized degree of specified nodes. The Counter is keyed by edge
568 triangle multiplicity.
570 Examples
571 --------
572 >>> G = nx.complete_graph(5)
573 >>> print(nx.generalized_degree(G, 0))
574 Counter({3: 4})
575 >>> print(nx.generalized_degree(G))
576 {0: Counter({3: 4}), 1: Counter({3: 4}), 2: Counter({3: 4}), 3: Counter({3: 4}), 4: Counter({3: 4})}
578 To recover the number of triangles attached to a node:
580 >>> k1 = nx.generalized_degree(G, 0)
581 >>> sum([k * v for k, v in k1.items()]) / 2 == nx.triangles(G, 0)
582 True
584 Notes
585 -----
586 In a network of N nodes, the highest triangle multiplicity an edge can have
587 is N-2.
589 The return value does not include a `zero` entry if no edges of a
590 particular triangle multiplicity are present.
592 The number of triangles node :math:`i` is attached to can be recovered from
593 the generalized degree :math:`\mathbf{k}_i=(k_i^{(0)}, \dotsc,
594 k_i^{(N-2)})` by :math:`(k_i^{(1)}+2k_i^{(2)}+\dotsc +(N-2)k_i^{(N-2)})/2`.
596 References
597 ----------
598 .. [1] Networks with arbitrary edge multiplicities by V. Zlatić,
599 D. Garlaschelli and G. Caldarelli, EPL (Europhysics Letters),
600 Volume 97, Number 2 (2012).
601 https://iopscience.iop.org/article/10.1209/0295-5075/97/28005
602 """
603 if nodes in G:
604 return next(_triangles_and_degree_iter(G, nodes))[3]
605 return {v: gd for v, d, t, gd in _triangles_and_degree_iter(G, nodes)}