Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.9/dist-packages/networkx/algorithms/cluster.py: 17%

155 statements  

« prev     ^ index     » next       coverage.py v7.3.2, created at 2023-10-20 07:00 +0000

1"""Algorithms to characterize the number of triangles in a graph.""" 

2 

3from collections import Counter 

4from itertools import chain, combinations 

5 

6import networkx as nx 

7from networkx.utils import not_implemented_for 

8 

9__all__ = [ 

10 "triangles", 

11 "average_clustering", 

12 "clustering", 

13 "transitivity", 

14 "square_clustering", 

15 "generalized_degree", 

16] 

17 

18 

19@not_implemented_for("directed") 

20@nx._dispatch 

21def triangles(G, nodes=None): 

22 """Compute the number of triangles. 

23 

24 Finds the number of triangles that include a node as one vertex. 

25 

26 Parameters 

27 ---------- 

28 G : graph 

29 A networkx graph 

30 

31 nodes : node, iterable of nodes, or None (default=None) 

32 If a singleton node, return the number of triangles for that node. 

33 If an iterable, compute the number of triangles for each of those nodes. 

34 If `None` (the default) compute the number of triangles for all nodes in `G`. 

35 

36 Returns 

37 ------- 

38 out : dict or int 

39 If `nodes` is a container of nodes, returns number of triangles keyed by node (dict). 

40 If `nodes` is a specific node, returns number of triangles for the node (int). 

41 

42 Examples 

43 -------- 

44 >>> G = nx.complete_graph(5) 

45 >>> print(nx.triangles(G, 0)) 

46 6 

47 >>> print(nx.triangles(G)) 

48 {0: 6, 1: 6, 2: 6, 3: 6, 4: 6} 

49 >>> print(list(nx.triangles(G, [0, 1]).values())) 

50 [6, 6] 

51 

52 Notes 

53 ----- 

54 Self loops are ignored. 

55 

56 """ 

57 if nodes is not None: 

58 # If `nodes` represents a single node, return only its number of triangles 

59 if nodes in G: 

60 return next(_triangles_and_degree_iter(G, nodes))[2] // 2 

61 

62 # if `nodes` is a container of nodes, then return a 

63 # dictionary mapping node to number of triangles. 

64 return {v: t // 2 for v, d, t, _ in _triangles_and_degree_iter(G, nodes)} 

65 

66 # if nodes is None, then compute triangles for the complete graph 

67 

68 # dict used to avoid visiting the same nodes twice 

69 # this allows calculating/counting each triangle only once 

70 later_neighbors = {} 

71 

72 # iterate over the nodes in a graph 

73 for node, neighbors in G.adjacency(): 

74 later_neighbors[node] = { 

75 n for n in neighbors if n not in later_neighbors and n is not node 

76 } 

77 

78 # instantiate Counter for each node to include isolated nodes 

79 # add 1 to the count if a nodes neighbor's neighbor is also a neighbor 

80 triangle_counts = Counter(dict.fromkeys(G, 0)) 

81 for node1, neighbors in later_neighbors.items(): 

82 for node2 in neighbors: 

83 third_nodes = neighbors & later_neighbors[node2] 

84 m = len(third_nodes) 

85 triangle_counts[node1] += m 

86 triangle_counts[node2] += m 

87 triangle_counts.update(third_nodes) 

88 

89 return dict(triangle_counts) 

90 

91 

92@not_implemented_for("multigraph") 

93def _triangles_and_degree_iter(G, nodes=None): 

94 """Return an iterator of (node, degree, triangles, generalized degree). 

95 

96 This double counts triangles so you may want to divide by 2. 

97 See degree(), triangles() and generalized_degree() for definitions 

98 and details. 

99 

100 """ 

101 if nodes is None: 

102 nodes_nbrs = G.adj.items() 

103 else: 

104 nodes_nbrs = ((n, G[n]) for n in G.nbunch_iter(nodes)) 

105 

106 for v, v_nbrs in nodes_nbrs: 

107 vs = set(v_nbrs) - {v} 

108 gen_degree = Counter(len(vs & (set(G[w]) - {w})) for w in vs) 

109 ntriangles = sum(k * val for k, val in gen_degree.items()) 

110 yield (v, len(vs), ntriangles, gen_degree) 

111 

112 

113@not_implemented_for("multigraph") 

114def _weighted_triangles_and_degree_iter(G, nodes=None, weight="weight"): 

115 """Return an iterator of (node, degree, weighted_triangles). 

116 

117 Used for weighted clustering. 

118 Note: this returns the geometric average weight of edges in the triangle. 

119 Also, each triangle is counted twice (each direction). 

120 So you may want to divide by 2. 

121 

122 """ 

123 import numpy as np 

124 

125 if weight is None or G.number_of_edges() == 0: 

126 max_weight = 1 

127 else: 

128 max_weight = max(d.get(weight, 1) for u, v, d in G.edges(data=True)) 

129 if nodes is None: 

130 nodes_nbrs = G.adj.items() 

131 else: 

132 nodes_nbrs = ((n, G[n]) for n in G.nbunch_iter(nodes)) 

133 

134 def wt(u, v): 

135 return G[u][v].get(weight, 1) / max_weight 

136 

137 for i, nbrs in nodes_nbrs: 

138 inbrs = set(nbrs) - {i} 

139 weighted_triangles = 0 

140 seen = set() 

141 for j in inbrs: 

142 seen.add(j) 

143 # This avoids counting twice -- we double at the end. 

144 jnbrs = set(G[j]) - seen 

145 # Only compute the edge weight once, before the inner inner 

146 # loop. 

147 wij = wt(i, j) 

148 weighted_triangles += sum( 

149 np.cbrt([(wij * wt(j, k) * wt(k, i)) for k in inbrs & jnbrs]) 

150 ) 

151 yield (i, len(inbrs), 2 * weighted_triangles) 

152 

153 

154@not_implemented_for("multigraph") 

155def _directed_triangles_and_degree_iter(G, nodes=None): 

156 """Return an iterator of 

157 (node, total_degree, reciprocal_degree, directed_triangles). 

158 

159 Used for directed clustering. 

160 Note that unlike `_triangles_and_degree_iter()`, this function counts 

161 directed triangles so does not count triangles twice. 

162 

163 """ 

164 nodes_nbrs = ((n, G._pred[n], G._succ[n]) for n in G.nbunch_iter(nodes)) 

165 

166 for i, preds, succs in nodes_nbrs: 

167 ipreds = set(preds) - {i} 

168 isuccs = set(succs) - {i} 

169 

170 directed_triangles = 0 

171 for j in chain(ipreds, isuccs): 

172 jpreds = set(G._pred[j]) - {j} 

173 jsuccs = set(G._succ[j]) - {j} 

174 directed_triangles += sum( 

175 1 

176 for k in chain( 

177 (ipreds & jpreds), 

178 (ipreds & jsuccs), 

179 (isuccs & jpreds), 

180 (isuccs & jsuccs), 

181 ) 

182 ) 

183 dtotal = len(ipreds) + len(isuccs) 

184 dbidirectional = len(ipreds & isuccs) 

185 yield (i, dtotal, dbidirectional, directed_triangles) 

186 

187 

188@not_implemented_for("multigraph") 

189def _directed_weighted_triangles_and_degree_iter(G, nodes=None, weight="weight"): 

190 """Return an iterator of 

191 (node, total_degree, reciprocal_degree, directed_weighted_triangles). 

192 

193 Used for directed weighted clustering. 

194 Note that unlike `_weighted_triangles_and_degree_iter()`, this function counts 

195 directed triangles so does not count triangles twice. 

196 

197 """ 

198 import numpy as np 

199 

200 if weight is None or G.number_of_edges() == 0: 

201 max_weight = 1 

202 else: 

203 max_weight = max(d.get(weight, 1) for u, v, d in G.edges(data=True)) 

204 

205 nodes_nbrs = ((n, G._pred[n], G._succ[n]) for n in G.nbunch_iter(nodes)) 

206 

207 def wt(u, v): 

208 return G[u][v].get(weight, 1) / max_weight 

209 

210 for i, preds, succs in nodes_nbrs: 

211 ipreds = set(preds) - {i} 

212 isuccs = set(succs) - {i} 

213 

214 directed_triangles = 0 

215 for j in ipreds: 

216 jpreds = set(G._pred[j]) - {j} 

217 jsuccs = set(G._succ[j]) - {j} 

218 directed_triangles += sum( 

219 np.cbrt([(wt(j, i) * wt(k, i) * wt(k, j)) for k in ipreds & jpreds]) 

220 ) 

221 directed_triangles += sum( 

222 np.cbrt([(wt(j, i) * wt(k, i) * wt(j, k)) for k in ipreds & jsuccs]) 

223 ) 

224 directed_triangles += sum( 

225 np.cbrt([(wt(j, i) * wt(i, k) * wt(k, j)) for k in isuccs & jpreds]) 

226 ) 

227 directed_triangles += sum( 

228 np.cbrt([(wt(j, i) * wt(i, k) * wt(j, k)) for k in isuccs & jsuccs]) 

229 ) 

230 

231 for j in isuccs: 

232 jpreds = set(G._pred[j]) - {j} 

233 jsuccs = set(G._succ[j]) - {j} 

234 directed_triangles += sum( 

235 np.cbrt([(wt(i, j) * wt(k, i) * wt(k, j)) for k in ipreds & jpreds]) 

236 ) 

237 directed_triangles += sum( 

238 np.cbrt([(wt(i, j) * wt(k, i) * wt(j, k)) for k in ipreds & jsuccs]) 

239 ) 

240 directed_triangles += sum( 

241 np.cbrt([(wt(i, j) * wt(i, k) * wt(k, j)) for k in isuccs & jpreds]) 

242 ) 

243 directed_triangles += sum( 

244 np.cbrt([(wt(i, j) * wt(i, k) * wt(j, k)) for k in isuccs & jsuccs]) 

245 ) 

246 

247 dtotal = len(ipreds) + len(isuccs) 

248 dbidirectional = len(ipreds & isuccs) 

249 yield (i, dtotal, dbidirectional, directed_triangles) 

250 

251 

252@nx._dispatch(edge_attrs="weight") 

253def average_clustering(G, nodes=None, weight=None, count_zeros=True): 

254 r"""Compute the average clustering coefficient for the graph G. 

255 

256 The clustering coefficient for the graph is the average, 

257 

258 .. math:: 

259 

260 C = \frac{1}{n}\sum_{v \in G} c_v, 

261 

262 where :math:`n` is the number of nodes in `G`. 

263 

264 Parameters 

265 ---------- 

266 G : graph 

267 

268 nodes : container of nodes, optional (default=all nodes in G) 

269 Compute average clustering for nodes in this container. 

270 

271 weight : string or None, optional (default=None) 

272 The edge attribute that holds the numerical value used as a weight. 

273 If None, then each edge has weight 1. 

274 

275 count_zeros : bool 

276 If False include only the nodes with nonzero clustering in the average. 

277 

278 Returns 

279 ------- 

280 avg : float 

281 Average clustering 

282 

283 Examples 

284 -------- 

285 >>> G = nx.complete_graph(5) 

286 >>> print(nx.average_clustering(G)) 

287 1.0 

288 

289 Notes 

290 ----- 

291 This is a space saving routine; it might be faster 

292 to use the clustering function to get a list and then take the average. 

293 

294 Self loops are ignored. 

295 

296 References 

297 ---------- 

298 .. [1] Generalizations of the clustering coefficient to weighted 

299 complex networks by J. Saramäki, M. Kivelä, J.-P. Onnela, 

300 K. Kaski, and J. Kertész, Physical Review E, 75 027105 (2007). 

301 http://jponnela.com/web_documents/a9.pdf 

302 .. [2] Marcus Kaiser, Mean clustering coefficients: the role of isolated 

303 nodes and leafs on clustering measures for small-world networks. 

304 https://arxiv.org/abs/0802.2512 

305 """ 

306 c = clustering(G, nodes, weight=weight).values() 

307 if not count_zeros: 

308 c = [v for v in c if abs(v) > 0] 

309 return sum(c) / len(c) 

310 

311 

312@nx._dispatch(edge_attrs="weight") 

313def clustering(G, nodes=None, weight=None): 

314 r"""Compute the clustering coefficient for nodes. 

315 

316 For unweighted graphs, the clustering of a node :math:`u` 

317 is the fraction of possible triangles through that node that exist, 

318 

319 .. math:: 

320 

321 c_u = \frac{2 T(u)}{deg(u)(deg(u)-1)}, 

322 

323 where :math:`T(u)` is the number of triangles through node :math:`u` and 

324 :math:`deg(u)` is the degree of :math:`u`. 

325 

326 For weighted graphs, there are several ways to define clustering [1]_. 

327 the one used here is defined 

328 as the geometric average of the subgraph edge weights [2]_, 

329 

330 .. math:: 

331 

332 c_u = \frac{1}{deg(u)(deg(u)-1))} 

333 \sum_{vw} (\hat{w}_{uv} \hat{w}_{uw} \hat{w}_{vw})^{1/3}. 

334 

335 The edge weights :math:`\hat{w}_{uv}` are normalized by the maximum weight 

336 in the network :math:`\hat{w}_{uv} = w_{uv}/\max(w)`. 

337 

338 The value of :math:`c_u` is assigned to 0 if :math:`deg(u) < 2`. 

339 

340 Additionally, this weighted definition has been generalized to support negative edge weights [3]_. 

341 

342 For directed graphs, the clustering is similarly defined as the fraction 

343 of all possible directed triangles or geometric average of the subgraph 

344 edge weights for unweighted and weighted directed graph respectively [4]_. 

345 

346 .. math:: 

347 

348 c_u = \frac{T(u)}{2(deg^{tot}(u)(deg^{tot}(u)-1) - 2deg^{\leftrightarrow}(u))}, 

349 

350 where :math:`T(u)` is the number of directed triangles through node 

351 :math:`u`, :math:`deg^{tot}(u)` is the sum of in degree and out degree of 

352 :math:`u` and :math:`deg^{\leftrightarrow}(u)` is the reciprocal degree of 

353 :math:`u`. 

354 

355 

356 Parameters 

357 ---------- 

358 G : graph 

359 

360 nodes : node, iterable of nodes, or None (default=None) 

361 If a singleton node, return the number of triangles for that node. 

362 If an iterable, compute the number of triangles for each of those nodes. 

363 If `None` (the default) compute the number of triangles for all nodes in `G`. 

364 

365 weight : string or None, optional (default=None) 

366 The edge attribute that holds the numerical value used as a weight. 

367 If None, then each edge has weight 1. 

368 

369 Returns 

370 ------- 

371 out : float, or dictionary 

372 Clustering coefficient at specified nodes 

373 

374 Examples 

375 -------- 

376 >>> G = nx.complete_graph(5) 

377 >>> print(nx.clustering(G, 0)) 

378 1.0 

379 >>> print(nx.clustering(G)) 

380 {0: 1.0, 1: 1.0, 2: 1.0, 3: 1.0, 4: 1.0} 

381 

382 Notes 

383 ----- 

384 Self loops are ignored. 

385 

386 References 

387 ---------- 

388 .. [1] Generalizations of the clustering coefficient to weighted 

389 complex networks by J. Saramäki, M. Kivelä, J.-P. Onnela, 

390 K. Kaski, and J. Kertész, Physical Review E, 75 027105 (2007). 

391 http://jponnela.com/web_documents/a9.pdf 

392 .. [2] Intensity and coherence of motifs in weighted complex 

393 networks by J. P. Onnela, J. Saramäki, J. Kertész, and K. Kaski, 

394 Physical Review E, 71(6), 065103 (2005). 

395 .. [3] Generalization of Clustering Coefficients to Signed Correlation Networks 

396 by G. Costantini and M. Perugini, PloS one, 9(2), e88669 (2014). 

397 .. [4] Clustering in complex directed networks by G. Fagiolo, 

398 Physical Review E, 76(2), 026107 (2007). 

399 """ 

400 if G.is_directed(): 

401 if weight is not None: 

402 td_iter = _directed_weighted_triangles_and_degree_iter(G, nodes, weight) 

403 clusterc = { 

404 v: 0 if t == 0 else t / ((dt * (dt - 1) - 2 * db) * 2) 

405 for v, dt, db, t in td_iter 

406 } 

407 else: 

408 td_iter = _directed_triangles_and_degree_iter(G, nodes) 

409 clusterc = { 

410 v: 0 if t == 0 else t / ((dt * (dt - 1) - 2 * db) * 2) 

411 for v, dt, db, t in td_iter 

412 } 

413 else: 

414 # The formula 2*T/(d*(d-1)) from docs is t/(d*(d-1)) here b/c t==2*T 

415 if weight is not None: 

416 td_iter = _weighted_triangles_and_degree_iter(G, nodes, weight) 

417 clusterc = {v: 0 if t == 0 else t / (d * (d - 1)) for v, d, t in td_iter} 

418 else: 

419 td_iter = _triangles_and_degree_iter(G, nodes) 

420 clusterc = {v: 0 if t == 0 else t / (d * (d - 1)) for v, d, t, _ in td_iter} 

421 if nodes in G: 

422 # Return the value of the sole entry in the dictionary. 

423 return clusterc[nodes] 

424 return clusterc 

425 

426 

427@nx._dispatch 

428def transitivity(G): 

429 r"""Compute graph transitivity, the fraction of all possible triangles 

430 present in G. 

431 

432 Possible triangles are identified by the number of "triads" 

433 (two edges with a shared vertex). 

434 

435 The transitivity is 

436 

437 .. math:: 

438 

439 T = 3\frac{\#triangles}{\#triads}. 

440 

441 Parameters 

442 ---------- 

443 G : graph 

444 

445 Returns 

446 ------- 

447 out : float 

448 Transitivity 

449 

450 Examples 

451 -------- 

452 >>> G = nx.complete_graph(5) 

453 >>> print(nx.transitivity(G)) 

454 1.0 

455 """ 

456 triangles_contri = [ 

457 (t, d * (d - 1)) for v, d, t, _ in _triangles_and_degree_iter(G) 

458 ] 

459 # If the graph is empty 

460 if len(triangles_contri) == 0: 

461 return 0 

462 triangles, contri = map(sum, zip(*triangles_contri)) 

463 return 0 if triangles == 0 else triangles / contri 

464 

465 

466@nx._dispatch 

467def square_clustering(G, nodes=None): 

468 r"""Compute the squares clustering coefficient for nodes. 

469 

470 For each node return the fraction of possible squares that exist at 

471 the node [1]_ 

472 

473 .. math:: 

474 C_4(v) = \frac{ \sum_{u=1}^{k_v} 

475 \sum_{w=u+1}^{k_v} q_v(u,w) }{ \sum_{u=1}^{k_v} 

476 \sum_{w=u+1}^{k_v} [a_v(u,w) + q_v(u,w)]}, 

477 

478 where :math:`q_v(u,w)` are the number of common neighbors of :math:`u` and 

479 :math:`w` other than :math:`v` (ie squares), and :math:`a_v(u,w) = (k_u - 

480 (1+q_v(u,w)+\theta_{uv})) + (k_w - (1+q_v(u,w)+\theta_{uw}))`, where 

481 :math:`\theta_{uw} = 1` if :math:`u` and :math:`w` are connected and 0 

482 otherwise. [2]_ 

483 

484 Parameters 

485 ---------- 

486 G : graph 

487 

488 nodes : container of nodes, optional (default=all nodes in G) 

489 Compute clustering for nodes in this container. 

490 

491 Returns 

492 ------- 

493 c4 : dictionary 

494 A dictionary keyed by node with the square clustering coefficient value. 

495 

496 Examples 

497 -------- 

498 >>> G = nx.complete_graph(5) 

499 >>> print(nx.square_clustering(G, 0)) 

500 1.0 

501 >>> print(nx.square_clustering(G)) 

502 {0: 1.0, 1: 1.0, 2: 1.0, 3: 1.0, 4: 1.0} 

503 

504 Notes 

505 ----- 

506 While :math:`C_3(v)` (triangle clustering) gives the probability that 

507 two neighbors of node v are connected with each other, :math:`C_4(v)` is 

508 the probability that two neighbors of node v share a common 

509 neighbor different from v. This algorithm can be applied to both 

510 bipartite and unipartite networks. 

511 

512 References 

513 ---------- 

514 .. [1] Pedro G. Lind, Marta C. González, and Hans J. Herrmann. 2005 

515 Cycles and clustering in bipartite networks. 

516 Physical Review E (72) 056127. 

517 .. [2] Zhang, Peng et al. Clustering Coefficient and Community Structure of 

518 Bipartite Networks. Physica A: Statistical Mechanics and its Applications 387.27 (2008): 6869–6875. 

519 https://arxiv.org/abs/0710.0117v1 

520 """ 

521 if nodes is None: 

522 node_iter = G 

523 else: 

524 node_iter = G.nbunch_iter(nodes) 

525 clustering = {} 

526 for v in node_iter: 

527 clustering[v] = 0 

528 potential = 0 

529 for u, w in combinations(G[v], 2): 

530 squares = len((set(G[u]) & set(G[w])) - {v}) 

531 clustering[v] += squares 

532 degm = squares + 1 

533 if w in G[u]: 

534 degm += 1 

535 potential += (len(G[u]) - degm) + (len(G[w]) - degm) + squares 

536 if potential > 0: 

537 clustering[v] /= potential 

538 if nodes in G: 

539 # Return the value of the sole entry in the dictionary. 

540 return clustering[nodes] 

541 return clustering 

542 

543 

544@not_implemented_for("directed") 

545@nx._dispatch 

546def generalized_degree(G, nodes=None): 

547 r"""Compute the generalized degree for nodes. 

548 

549 For each node, the generalized degree shows how many edges of given 

550 triangle multiplicity the node is connected to. The triangle multiplicity 

551 of an edge is the number of triangles an edge participates in. The 

552 generalized degree of node :math:`i` can be written as a vector 

553 :math:`\mathbf{k}_i=(k_i^{(0)}, \dotsc, k_i^{(N-2)})` where 

554 :math:`k_i^{(j)}` is the number of edges attached to node :math:`i` that 

555 participate in :math:`j` triangles. 

556 

557 Parameters 

558 ---------- 

559 G : graph 

560 

561 nodes : container of nodes, optional (default=all nodes in G) 

562 Compute the generalized degree for nodes in this container. 

563 

564 Returns 

565 ------- 

566 out : Counter, or dictionary of Counters 

567 Generalized degree of specified nodes. The Counter is keyed by edge 

568 triangle multiplicity. 

569 

570 Examples 

571 -------- 

572 >>> G = nx.complete_graph(5) 

573 >>> print(nx.generalized_degree(G, 0)) 

574 Counter({3: 4}) 

575 >>> print(nx.generalized_degree(G)) 

576 {0: Counter({3: 4}), 1: Counter({3: 4}), 2: Counter({3: 4}), 3: Counter({3: 4}), 4: Counter({3: 4})} 

577 

578 To recover the number of triangles attached to a node: 

579 

580 >>> k1 = nx.generalized_degree(G, 0) 

581 >>> sum([k * v for k, v in k1.items()]) / 2 == nx.triangles(G, 0) 

582 True 

583 

584 Notes 

585 ----- 

586 In a network of N nodes, the highest triangle multiplicity an edge can have 

587 is N-2. 

588 

589 The return value does not include a `zero` entry if no edges of a 

590 particular triangle multiplicity are present. 

591 

592 The number of triangles node :math:`i` is attached to can be recovered from 

593 the generalized degree :math:`\mathbf{k}_i=(k_i^{(0)}, \dotsc, 

594 k_i^{(N-2)})` by :math:`(k_i^{(1)}+2k_i^{(2)}+\dotsc +(N-2)k_i^{(N-2)})/2`. 

595 

596 References 

597 ---------- 

598 .. [1] Networks with arbitrary edge multiplicities by V. Zlatić, 

599 D. Garlaschelli and G. Caldarelli, EPL (Europhysics Letters), 

600 Volume 97, Number 2 (2012). 

601 https://iopscience.iop.org/article/10.1209/0295-5075/97/28005 

602 """ 

603 if nodes in G: 

604 return next(_triangles_and_degree_iter(G, nodes))[3] 

605 return {v: gd for v, d, t, gd in _triangles_and_degree_iter(G, nodes)}