Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.9/dist-packages/networkx/algorithms/link_prediction.py: 38%

101 statements  

« prev     ^ index     » next       coverage.py v7.3.2, created at 2023-10-20 07:00 +0000

1""" 

2Link prediction algorithms. 

3""" 

4 

5 

6from math import log 

7 

8import networkx as nx 

9from networkx.utils import not_implemented_for 

10 

11__all__ = [ 

12 "resource_allocation_index", 

13 "jaccard_coefficient", 

14 "adamic_adar_index", 

15 "preferential_attachment", 

16 "cn_soundarajan_hopcroft", 

17 "ra_index_soundarajan_hopcroft", 

18 "within_inter_cluster", 

19 "common_neighbor_centrality", 

20] 

21 

22 

23def _apply_prediction(G, func, ebunch=None): 

24 """Applies the given function to each edge in the specified iterable 

25 of edges. 

26 

27 `G` is an instance of :class:`networkx.Graph`. 

28 

29 `func` is a function on two inputs, each of which is a node in the 

30 graph. The function can return anything, but it should return a 

31 value representing a prediction of the likelihood of a "link" 

32 joining the two nodes. 

33 

34 `ebunch` is an iterable of pairs of nodes. If not specified, all 

35 non-edges in the graph `G` will be used. 

36 

37 """ 

38 if ebunch is None: 

39 ebunch = nx.non_edges(G) 

40 return ((u, v, func(u, v)) for u, v in ebunch) 

41 

42 

43@not_implemented_for("directed") 

44@not_implemented_for("multigraph") 

45@nx._dispatch 

46def resource_allocation_index(G, ebunch=None): 

47 r"""Compute the resource allocation index of all node pairs in ebunch. 

48 

49 Resource allocation index of `u` and `v` is defined as 

50 

51 .. math:: 

52 

53 \sum_{w \in \Gamma(u) \cap \Gamma(v)} \frac{1}{|\Gamma(w)|} 

54 

55 where $\Gamma(u)$ denotes the set of neighbors of $u$. 

56 

57 Parameters 

58 ---------- 

59 G : graph 

60 A NetworkX undirected graph. 

61 

62 ebunch : iterable of node pairs, optional (default = None) 

63 Resource allocation index will be computed for each pair of 

64 nodes given in the iterable. The pairs must be given as 

65 2-tuples (u, v) where u and v are nodes in the graph. If ebunch 

66 is None then all nonexistent edges in the graph will be used. 

67 Default value: None. 

68 

69 Returns 

70 ------- 

71 piter : iterator 

72 An iterator of 3-tuples in the form (u, v, p) where (u, v) is a 

73 pair of nodes and p is their resource allocation index. 

74 

75 Examples 

76 -------- 

77 >>> G = nx.complete_graph(5) 

78 >>> preds = nx.resource_allocation_index(G, [(0, 1), (2, 3)]) 

79 >>> for u, v, p in preds: 

80 ... print(f"({u}, {v}) -> {p:.8f}") 

81 (0, 1) -> 0.75000000 

82 (2, 3) -> 0.75000000 

83 

84 References 

85 ---------- 

86 .. [1] T. Zhou, L. Lu, Y.-C. Zhang. 

87 Predicting missing links via local information. 

88 Eur. Phys. J. B 71 (2009) 623. 

89 https://arxiv.org/pdf/0901.0553.pdf 

90 """ 

91 

92 def predict(u, v): 

93 return sum(1 / G.degree(w) for w in nx.common_neighbors(G, u, v)) 

94 

95 return _apply_prediction(G, predict, ebunch) 

96 

97 

98@not_implemented_for("directed") 

99@not_implemented_for("multigraph") 

100@nx._dispatch 

101def jaccard_coefficient(G, ebunch=None): 

102 r"""Compute the Jaccard coefficient of all node pairs in ebunch. 

103 

104 Jaccard coefficient of nodes `u` and `v` is defined as 

105 

106 .. math:: 

107 

108 \frac{|\Gamma(u) \cap \Gamma(v)|}{|\Gamma(u) \cup \Gamma(v)|} 

109 

110 where $\Gamma(u)$ denotes the set of neighbors of $u$. 

111 

112 Parameters 

113 ---------- 

114 G : graph 

115 A NetworkX undirected graph. 

116 

117 ebunch : iterable of node pairs, optional (default = None) 

118 Jaccard coefficient will be computed for each pair of nodes 

119 given in the iterable. The pairs must be given as 2-tuples 

120 (u, v) where u and v are nodes in the graph. If ebunch is None 

121 then all nonexistent edges in the graph will be used. 

122 Default value: None. 

123 

124 Returns 

125 ------- 

126 piter : iterator 

127 An iterator of 3-tuples in the form (u, v, p) where (u, v) is a 

128 pair of nodes and p is their Jaccard coefficient. 

129 

130 Examples 

131 -------- 

132 >>> G = nx.complete_graph(5) 

133 >>> preds = nx.jaccard_coefficient(G, [(0, 1), (2, 3)]) 

134 >>> for u, v, p in preds: 

135 ... print(f"({u}, {v}) -> {p:.8f}") 

136 (0, 1) -> 0.60000000 

137 (2, 3) -> 0.60000000 

138 

139 References 

140 ---------- 

141 .. [1] D. Liben-Nowell, J. Kleinberg. 

142 The Link Prediction Problem for Social Networks (2004). 

143 http://www.cs.cornell.edu/home/kleinber/link-pred.pdf 

144 """ 

145 

146 def predict(u, v): 

147 union_size = len(set(G[u]) | set(G[v])) 

148 if union_size == 0: 

149 return 0 

150 return len(list(nx.common_neighbors(G, u, v))) / union_size 

151 

152 return _apply_prediction(G, predict, ebunch) 

153 

154 

155@not_implemented_for("directed") 

156@not_implemented_for("multigraph") 

157@nx._dispatch 

158def adamic_adar_index(G, ebunch=None): 

159 r"""Compute the Adamic-Adar index of all node pairs in ebunch. 

160 

161 Adamic-Adar index of `u` and `v` is defined as 

162 

163 .. math:: 

164 

165 \sum_{w \in \Gamma(u) \cap \Gamma(v)} \frac{1}{\log |\Gamma(w)|} 

166 

167 where $\Gamma(u)$ denotes the set of neighbors of $u$. 

168 This index leads to zero-division for nodes only connected via self-loops. 

169 It is intended to be used when no self-loops are present. 

170 

171 Parameters 

172 ---------- 

173 G : graph 

174 NetworkX undirected graph. 

175 

176 ebunch : iterable of node pairs, optional (default = None) 

177 Adamic-Adar index will be computed for each pair of nodes given 

178 in the iterable. The pairs must be given as 2-tuples (u, v) 

179 where u and v are nodes in the graph. If ebunch is None then all 

180 nonexistent edges in the graph will be used. 

181 Default value: None. 

182 

183 Returns 

184 ------- 

185 piter : iterator 

186 An iterator of 3-tuples in the form (u, v, p) where (u, v) is a 

187 pair of nodes and p is their Adamic-Adar index. 

188 

189 Examples 

190 -------- 

191 >>> G = nx.complete_graph(5) 

192 >>> preds = nx.adamic_adar_index(G, [(0, 1), (2, 3)]) 

193 >>> for u, v, p in preds: 

194 ... print(f"({u}, {v}) -> {p:.8f}") 

195 (0, 1) -> 2.16404256 

196 (2, 3) -> 2.16404256 

197 

198 References 

199 ---------- 

200 .. [1] D. Liben-Nowell, J. Kleinberg. 

201 The Link Prediction Problem for Social Networks (2004). 

202 http://www.cs.cornell.edu/home/kleinber/link-pred.pdf 

203 """ 

204 

205 def predict(u, v): 

206 return sum(1 / log(G.degree(w)) for w in nx.common_neighbors(G, u, v)) 

207 

208 return _apply_prediction(G, predict, ebunch) 

209 

210 

211@not_implemented_for("directed") 

212@not_implemented_for("multigraph") 

213@nx._dispatch 

214def common_neighbor_centrality(G, ebunch=None, alpha=0.8): 

215 r"""Return the CCPA score for each pair of nodes. 

216 

217 Compute the Common Neighbor and Centrality based Parameterized Algorithm(CCPA) 

218 score of all node pairs in ebunch. 

219 

220 CCPA score of `u` and `v` is defined as 

221 

222 .. math:: 

223 

224 \alpha \cdot (|\Gamma (u){\cap }^{}\Gamma (v)|)+(1-\alpha )\cdot \frac{N}{{d}_{uv}} 

225 

226 where $\Gamma(u)$ denotes the set of neighbors of $u$, $\Gamma(v)$ denotes the 

227 set of neighbors of $v$, $\alpha$ is parameter varies between [0,1], $N$ denotes 

228 total number of nodes in the Graph and ${d}_{uv}$ denotes shortest distance 

229 between $u$ and $v$. 

230 

231 This algorithm is based on two vital properties of nodes, namely the number 

232 of common neighbors and their centrality. Common neighbor refers to the common 

233 nodes between two nodes. Centrality refers to the prestige that a node enjoys 

234 in a network. 

235 

236 .. seealso:: 

237 

238 :func:`common_neighbors` 

239 

240 Parameters 

241 ---------- 

242 G : graph 

243 NetworkX undirected graph. 

244 

245 ebunch : iterable of node pairs, optional (default = None) 

246 Preferential attachment score will be computed for each pair of 

247 nodes given in the iterable. The pairs must be given as 

248 2-tuples (u, v) where u and v are nodes in the graph. If ebunch 

249 is None then all nonexistent edges in the graph will be used. 

250 Default value: None. 

251 

252 alpha : Parameter defined for participation of Common Neighbor 

253 and Centrality Algorithm share. Values for alpha should 

254 normally be between 0 and 1. Default value set to 0.8 

255 because author found better performance at 0.8 for all the 

256 dataset. 

257 Default value: 0.8 

258 

259 

260 Returns 

261 ------- 

262 piter : iterator 

263 An iterator of 3-tuples in the form (u, v, p) where (u, v) is a 

264 pair of nodes and p is their Common Neighbor and Centrality based 

265 Parameterized Algorithm(CCPA) score. 

266 

267 Examples 

268 -------- 

269 >>> G = nx.complete_graph(5) 

270 >>> preds = nx.common_neighbor_centrality(G, [(0, 1), (2, 3)]) 

271 >>> for u, v, p in preds: 

272 ... print(f"({u}, {v}) -> {p}") 

273 (0, 1) -> 3.4000000000000004 

274 (2, 3) -> 3.4000000000000004 

275 

276 References 

277 ---------- 

278 .. [1] Ahmad, I., Akhtar, M.U., Noor, S. et al. 

279 Missing Link Prediction using Common Neighbor and Centrality based Parameterized Algorithm. 

280 Sci Rep 10, 364 (2020). 

281 https://doi.org/10.1038/s41598-019-57304-y 

282 """ 

283 

284 # When alpha == 1, the CCPA score simplifies to the number of common neighbors. 

285 if alpha == 1: 

286 

287 def predict(u, v): 

288 if u == v: 

289 raise nx.NetworkXAlgorithmError("Self links are not supported") 

290 

291 return sum(1 for _ in nx.common_neighbors(G, u, v)) 

292 

293 else: 

294 spl = dict(nx.shortest_path_length(G)) 

295 inf = float("inf") 

296 

297 def predict(u, v): 

298 if u == v: 

299 raise nx.NetworkXAlgorithmError("Self links are not supported") 

300 path_len = spl[u].get(v, inf) 

301 

302 return alpha * sum(1 for _ in nx.common_neighbors(G, u, v)) + ( 

303 1 - alpha 

304 ) * (G.number_of_nodes() / path_len) 

305 

306 return _apply_prediction(G, predict, ebunch) 

307 

308 

309@not_implemented_for("directed") 

310@not_implemented_for("multigraph") 

311@nx._dispatch 

312def preferential_attachment(G, ebunch=None): 

313 r"""Compute the preferential attachment score of all node pairs in ebunch. 

314 

315 Preferential attachment score of `u` and `v` is defined as 

316 

317 .. math:: 

318 

319 |\Gamma(u)| |\Gamma(v)| 

320 

321 where $\Gamma(u)$ denotes the set of neighbors of $u$. 

322 

323 Parameters 

324 ---------- 

325 G : graph 

326 NetworkX undirected graph. 

327 

328 ebunch : iterable of node pairs, optional (default = None) 

329 Preferential attachment score will be computed for each pair of 

330 nodes given in the iterable. The pairs must be given as 

331 2-tuples (u, v) where u and v are nodes in the graph. If ebunch 

332 is None then all nonexistent edges in the graph will be used. 

333 Default value: None. 

334 

335 Returns 

336 ------- 

337 piter : iterator 

338 An iterator of 3-tuples in the form (u, v, p) where (u, v) is a 

339 pair of nodes and p is their preferential attachment score. 

340 

341 Examples 

342 -------- 

343 >>> G = nx.complete_graph(5) 

344 >>> preds = nx.preferential_attachment(G, [(0, 1), (2, 3)]) 

345 >>> for u, v, p in preds: 

346 ... print(f"({u}, {v}) -> {p}") 

347 (0, 1) -> 16 

348 (2, 3) -> 16 

349 

350 References 

351 ---------- 

352 .. [1] D. Liben-Nowell, J. Kleinberg. 

353 The Link Prediction Problem for Social Networks (2004). 

354 http://www.cs.cornell.edu/home/kleinber/link-pred.pdf 

355 """ 

356 

357 def predict(u, v): 

358 return G.degree(u) * G.degree(v) 

359 

360 return _apply_prediction(G, predict, ebunch) 

361 

362 

363@not_implemented_for("directed") 

364@not_implemented_for("multigraph") 

365@nx._dispatch(node_attrs="community") 

366def cn_soundarajan_hopcroft(G, ebunch=None, community="community"): 

367 r"""Count the number of common neighbors of all node pairs in ebunch 

368 using community information. 

369 

370 For two nodes $u$ and $v$, this function computes the number of 

371 common neighbors and bonus one for each common neighbor belonging to 

372 the same community as $u$ and $v$. Mathematically, 

373 

374 .. math:: 

375 

376 |\Gamma(u) \cap \Gamma(v)| + \sum_{w \in \Gamma(u) \cap \Gamma(v)} f(w) 

377 

378 where $f(w)$ equals 1 if $w$ belongs to the same community as $u$ 

379 and $v$ or 0 otherwise and $\Gamma(u)$ denotes the set of 

380 neighbors of $u$. 

381 

382 Parameters 

383 ---------- 

384 G : graph 

385 A NetworkX undirected graph. 

386 

387 ebunch : iterable of node pairs, optional (default = None) 

388 The score will be computed for each pair of nodes given in the 

389 iterable. The pairs must be given as 2-tuples (u, v) where u 

390 and v are nodes in the graph. If ebunch is None then all 

391 nonexistent edges in the graph will be used. 

392 Default value: None. 

393 

394 community : string, optional (default = 'community') 

395 Nodes attribute name containing the community information. 

396 G[u][community] identifies which community u belongs to. Each 

397 node belongs to at most one community. Default value: 'community'. 

398 

399 Returns 

400 ------- 

401 piter : iterator 

402 An iterator of 3-tuples in the form (u, v, p) where (u, v) is a 

403 pair of nodes and p is their score. 

404 

405 Examples 

406 -------- 

407 >>> G = nx.path_graph(3) 

408 >>> G.nodes[0]["community"] = 0 

409 >>> G.nodes[1]["community"] = 0 

410 >>> G.nodes[2]["community"] = 0 

411 >>> preds = nx.cn_soundarajan_hopcroft(G, [(0, 2)]) 

412 >>> for u, v, p in preds: 

413 ... print(f"({u}, {v}) -> {p}") 

414 (0, 2) -> 2 

415 

416 References 

417 ---------- 

418 .. [1] Sucheta Soundarajan and John Hopcroft. 

419 Using community information to improve the precision of link 

420 prediction methods. 

421 In Proceedings of the 21st international conference companion on 

422 World Wide Web (WWW '12 Companion). ACM, New York, NY, USA, 607-608. 

423 http://doi.acm.org/10.1145/2187980.2188150 

424 """ 

425 

426 def predict(u, v): 

427 Cu = _community(G, u, community) 

428 Cv = _community(G, v, community) 

429 cnbors = list(nx.common_neighbors(G, u, v)) 

430 neighbors = ( 

431 sum(_community(G, w, community) == Cu for w in cnbors) if Cu == Cv else 0 

432 ) 

433 return len(cnbors) + neighbors 

434 

435 return _apply_prediction(G, predict, ebunch) 

436 

437 

438@not_implemented_for("directed") 

439@not_implemented_for("multigraph") 

440@nx._dispatch(node_attrs="community") 

441def ra_index_soundarajan_hopcroft(G, ebunch=None, community="community"): 

442 r"""Compute the resource allocation index of all node pairs in 

443 ebunch using community information. 

444 

445 For two nodes $u$ and $v$, this function computes the resource 

446 allocation index considering only common neighbors belonging to the 

447 same community as $u$ and $v$. Mathematically, 

448 

449 .. math:: 

450 

451 \sum_{w \in \Gamma(u) \cap \Gamma(v)} \frac{f(w)}{|\Gamma(w)|} 

452 

453 where $f(w)$ equals 1 if $w$ belongs to the same community as $u$ 

454 and $v$ or 0 otherwise and $\Gamma(u)$ denotes the set of 

455 neighbors of $u$. 

456 

457 Parameters 

458 ---------- 

459 G : graph 

460 A NetworkX undirected graph. 

461 

462 ebunch : iterable of node pairs, optional (default = None) 

463 The score will be computed for each pair of nodes given in the 

464 iterable. The pairs must be given as 2-tuples (u, v) where u 

465 and v are nodes in the graph. If ebunch is None then all 

466 nonexistent edges in the graph will be used. 

467 Default value: None. 

468 

469 community : string, optional (default = 'community') 

470 Nodes attribute name containing the community information. 

471 G[u][community] identifies which community u belongs to. Each 

472 node belongs to at most one community. Default value: 'community'. 

473 

474 Returns 

475 ------- 

476 piter : iterator 

477 An iterator of 3-tuples in the form (u, v, p) where (u, v) is a 

478 pair of nodes and p is their score. 

479 

480 Examples 

481 -------- 

482 >>> G = nx.Graph() 

483 >>> G.add_edges_from([(0, 1), (0, 2), (1, 3), (2, 3)]) 

484 >>> G.nodes[0]["community"] = 0 

485 >>> G.nodes[1]["community"] = 0 

486 >>> G.nodes[2]["community"] = 1 

487 >>> G.nodes[3]["community"] = 0 

488 >>> preds = nx.ra_index_soundarajan_hopcroft(G, [(0, 3)]) 

489 >>> for u, v, p in preds: 

490 ... print(f"({u}, {v}) -> {p:.8f}") 

491 (0, 3) -> 0.50000000 

492 

493 References 

494 ---------- 

495 .. [1] Sucheta Soundarajan and John Hopcroft. 

496 Using community information to improve the precision of link 

497 prediction methods. 

498 In Proceedings of the 21st international conference companion on 

499 World Wide Web (WWW '12 Companion). ACM, New York, NY, USA, 607-608. 

500 http://doi.acm.org/10.1145/2187980.2188150 

501 """ 

502 

503 def predict(u, v): 

504 Cu = _community(G, u, community) 

505 Cv = _community(G, v, community) 

506 if Cu != Cv: 

507 return 0 

508 cnbors = nx.common_neighbors(G, u, v) 

509 return sum(1 / G.degree(w) for w in cnbors if _community(G, w, community) == Cu) 

510 

511 return _apply_prediction(G, predict, ebunch) 

512 

513 

514@not_implemented_for("directed") 

515@not_implemented_for("multigraph") 

516@nx._dispatch(node_attrs="community") 

517def within_inter_cluster(G, ebunch=None, delta=0.001, community="community"): 

518 """Compute the ratio of within- and inter-cluster common neighbors 

519 of all node pairs in ebunch. 

520 

521 For two nodes `u` and `v`, if a common neighbor `w` belongs to the 

522 same community as them, `w` is considered as within-cluster common 

523 neighbor of `u` and `v`. Otherwise, it is considered as 

524 inter-cluster common neighbor of `u` and `v`. The ratio between the 

525 size of the set of within- and inter-cluster common neighbors is 

526 defined as the WIC measure. [1]_ 

527 

528 Parameters 

529 ---------- 

530 G : graph 

531 A NetworkX undirected graph. 

532 

533 ebunch : iterable of node pairs, optional (default = None) 

534 The WIC measure will be computed for each pair of nodes given in 

535 the iterable. The pairs must be given as 2-tuples (u, v) where 

536 u and v are nodes in the graph. If ebunch is None then all 

537 nonexistent edges in the graph will be used. 

538 Default value: None. 

539 

540 delta : float, optional (default = 0.001) 

541 Value to prevent division by zero in case there is no 

542 inter-cluster common neighbor between two nodes. See [1]_ for 

543 details. Default value: 0.001. 

544 

545 community : string, optional (default = 'community') 

546 Nodes attribute name containing the community information. 

547 G[u][community] identifies which community u belongs to. Each 

548 node belongs to at most one community. Default value: 'community'. 

549 

550 Returns 

551 ------- 

552 piter : iterator 

553 An iterator of 3-tuples in the form (u, v, p) where (u, v) is a 

554 pair of nodes and p is their WIC measure. 

555 

556 Examples 

557 -------- 

558 >>> G = nx.Graph() 

559 >>> G.add_edges_from([(0, 1), (0, 2), (0, 3), (1, 4), (2, 4), (3, 4)]) 

560 >>> G.nodes[0]["community"] = 0 

561 >>> G.nodes[1]["community"] = 1 

562 >>> G.nodes[2]["community"] = 0 

563 >>> G.nodes[3]["community"] = 0 

564 >>> G.nodes[4]["community"] = 0 

565 >>> preds = nx.within_inter_cluster(G, [(0, 4)]) 

566 >>> for u, v, p in preds: 

567 ... print(f"({u}, {v}) -> {p:.8f}") 

568 (0, 4) -> 1.99800200 

569 >>> preds = nx.within_inter_cluster(G, [(0, 4)], delta=0.5) 

570 >>> for u, v, p in preds: 

571 ... print(f"({u}, {v}) -> {p:.8f}") 

572 (0, 4) -> 1.33333333 

573 

574 References 

575 ---------- 

576 .. [1] Jorge Carlos Valverde-Rebaza and Alneu de Andrade Lopes. 

577 Link prediction in complex networks based on cluster information. 

578 In Proceedings of the 21st Brazilian conference on Advances in 

579 Artificial Intelligence (SBIA'12) 

580 https://doi.org/10.1007/978-3-642-34459-6_10 

581 """ 

582 if delta <= 0: 

583 raise nx.NetworkXAlgorithmError("Delta must be greater than zero") 

584 

585 def predict(u, v): 

586 Cu = _community(G, u, community) 

587 Cv = _community(G, v, community) 

588 if Cu != Cv: 

589 return 0 

590 cnbors = set(nx.common_neighbors(G, u, v)) 

591 within = {w for w in cnbors if _community(G, w, community) == Cu} 

592 inter = cnbors - within 

593 return len(within) / (len(inter) + delta) 

594 

595 return _apply_prediction(G, predict, ebunch) 

596 

597 

598def _community(G, u, community): 

599 """Get the community of the given node.""" 

600 node_u = G.nodes[u] 

601 try: 

602 return node_u[community] 

603 except KeyError as err: 

604 raise nx.NetworkXAlgorithmError("No community information") from err