1"""
2Link prediction algorithms.
3"""
4
5from math import log
6
7import networkx as nx
8from networkx.utils import not_implemented_for
9
10__all__ = [
11 "resource_allocation_index",
12 "jaccard_coefficient",
13 "adamic_adar_index",
14 "preferential_attachment",
15 "cn_soundarajan_hopcroft",
16 "ra_index_soundarajan_hopcroft",
17 "within_inter_cluster",
18 "common_neighbor_centrality",
19]
20
21
22def _apply_prediction(G, func, ebunch=None):
23 """Applies the given function to each edge in the specified iterable
24 of edges.
25
26 `G` is an instance of :class:`networkx.Graph`.
27
28 `func` is a function on two inputs, each of which is a node in the
29 graph. The function can return anything, but it should return a
30 value representing a prediction of the likelihood of a "link"
31 joining the two nodes.
32
33 `ebunch` is an iterable of pairs of nodes. If not specified, all
34 non-edges in the graph `G` will be used.
35
36 """
37 if ebunch is None:
38 ebunch = nx.non_edges(G)
39 else:
40 for u, v in ebunch:
41 if u not in G:
42 raise nx.NodeNotFound(f"Node {u} not in G.")
43 if v not in G:
44 raise nx.NodeNotFound(f"Node {v} not in G.")
45 return ((u, v, func(u, v)) for u, v in ebunch)
46
47
48@not_implemented_for("directed")
49@not_implemented_for("multigraph")
50@nx._dispatchable
51def resource_allocation_index(G, ebunch=None):
52 r"""Compute the resource allocation index of all node pairs in ebunch.
53
54 Resource allocation index of `u` and `v` is defined as
55
56 .. math::
57
58 \sum_{w \in \Gamma(u) \cap \Gamma(v)} \frac{1}{|\Gamma(w)|}
59
60 where $\Gamma(u)$ denotes the set of neighbors of $u$.
61
62 Parameters
63 ----------
64 G : graph
65 A NetworkX undirected graph.
66
67 ebunch : iterable of node pairs, optional (default = None)
68 Resource allocation index will be computed for each pair of
69 nodes given in the iterable. The pairs must be given as
70 2-tuples (u, v) where u and v are nodes in the graph. If ebunch
71 is None then all nonexistent edges in the graph will be used.
72 Default value: None.
73
74 Returns
75 -------
76 piter : iterator
77 An iterator of 3-tuples in the form (u, v, p) where (u, v) is a
78 pair of nodes and p is their resource allocation index.
79
80 Raises
81 ------
82 NetworkXNotImplemented
83 If `G` is a `DiGraph`, a `Multigraph` or a `MultiDiGraph`.
84
85 NodeNotFound
86 If `ebunch` has a node that is not in `G`.
87
88 Examples
89 --------
90 >>> G = nx.complete_graph(5)
91 >>> preds = nx.resource_allocation_index(G, [(0, 1), (2, 3)])
92 >>> for u, v, p in preds:
93 ... print(f"({u}, {v}) -> {p:.8f}")
94 (0, 1) -> 0.75000000
95 (2, 3) -> 0.75000000
96
97 References
98 ----------
99 .. [1] T. Zhou, L. Lu, Y.-C. Zhang.
100 Predicting missing links via local information.
101 Eur. Phys. J. B 71 (2009) 623.
102 https://arxiv.org/pdf/0901.0553.pdf
103 """
104
105 def predict(u, v):
106 return sum(1 / G.degree(w) for w in nx.common_neighbors(G, u, v))
107
108 return _apply_prediction(G, predict, ebunch)
109
110
111@not_implemented_for("directed")
112@not_implemented_for("multigraph")
113@nx._dispatchable
114def jaccard_coefficient(G, ebunch=None):
115 r"""Compute the Jaccard coefficient of all node pairs in ebunch.
116
117 Jaccard coefficient of nodes `u` and `v` is defined as
118
119 .. math::
120
121 \frac{|\Gamma(u) \cap \Gamma(v)|}{|\Gamma(u) \cup \Gamma(v)|}
122
123 where $\Gamma(u)$ denotes the set of neighbors of $u$.
124
125 Parameters
126 ----------
127 G : graph
128 A NetworkX undirected graph.
129
130 ebunch : iterable of node pairs, optional (default = None)
131 Jaccard coefficient will be computed for each pair of nodes
132 given in the iterable. The pairs must be given as 2-tuples
133 (u, v) where u and v are nodes in the graph. If ebunch is None
134 then all nonexistent edges in the graph will be used.
135 Default value: None.
136
137 Returns
138 -------
139 piter : iterator
140 An iterator of 3-tuples in the form (u, v, p) where (u, v) is a
141 pair of nodes and p is their Jaccard coefficient.
142
143 Raises
144 ------
145 NetworkXNotImplemented
146 If `G` is a `DiGraph`, a `Multigraph` or a `MultiDiGraph`.
147
148 NodeNotFound
149 If `ebunch` has a node that is not in `G`.
150
151 Examples
152 --------
153 >>> G = nx.complete_graph(5)
154 >>> preds = nx.jaccard_coefficient(G, [(0, 1), (2, 3)])
155 >>> for u, v, p in preds:
156 ... print(f"({u}, {v}) -> {p:.8f}")
157 (0, 1) -> 0.60000000
158 (2, 3) -> 0.60000000
159
160 References
161 ----------
162 .. [1] D. Liben-Nowell, J. Kleinberg.
163 The Link Prediction Problem for Social Networks (2004).
164 http://www.cs.cornell.edu/home/kleinber/link-pred.pdf
165 """
166
167 def predict(u, v):
168 union_size = len(set(G[u]) | set(G[v]))
169 if union_size == 0:
170 return 0
171 return len(nx.common_neighbors(G, u, v)) / union_size
172
173 return _apply_prediction(G, predict, ebunch)
174
175
176@not_implemented_for("directed")
177@not_implemented_for("multigraph")
178@nx._dispatchable
179def adamic_adar_index(G, ebunch=None):
180 r"""Compute the Adamic-Adar index of all node pairs in ebunch.
181
182 Adamic-Adar index of `u` and `v` is defined as
183
184 .. math::
185
186 \sum_{w \in \Gamma(u) \cap \Gamma(v)} \frac{1}{\log |\Gamma(w)|}
187
188 where $\Gamma(u)$ denotes the set of neighbors of $u$.
189 This index leads to zero-division for nodes only connected via self-loops.
190 It is intended to be used when no self-loops are present.
191
192 Parameters
193 ----------
194 G : graph
195 NetworkX undirected graph.
196
197 ebunch : iterable of node pairs, optional (default = None)
198 Adamic-Adar index will be computed for each pair of nodes given
199 in the iterable. The pairs must be given as 2-tuples (u, v)
200 where u and v are nodes in the graph. If ebunch is None then all
201 nonexistent edges in the graph will be used.
202 Default value: None.
203
204 Returns
205 -------
206 piter : iterator
207 An iterator of 3-tuples in the form (u, v, p) where (u, v) is a
208 pair of nodes and p is their Adamic-Adar index.
209
210 Raises
211 ------
212 NetworkXNotImplemented
213 If `G` is a `DiGraph`, a `Multigraph` or a `MultiDiGraph`.
214
215 NodeNotFound
216 If `ebunch` has a node that is not in `G`.
217
218 Examples
219 --------
220 >>> G = nx.complete_graph(5)
221 >>> preds = nx.adamic_adar_index(G, [(0, 1), (2, 3)])
222 >>> for u, v, p in preds:
223 ... print(f"({u}, {v}) -> {p:.8f}")
224 (0, 1) -> 2.16404256
225 (2, 3) -> 2.16404256
226
227 References
228 ----------
229 .. [1] D. Liben-Nowell, J. Kleinberg.
230 The Link Prediction Problem for Social Networks (2004).
231 http://www.cs.cornell.edu/home/kleinber/link-pred.pdf
232 """
233
234 def predict(u, v):
235 return sum(1 / log(G.degree(w)) for w in nx.common_neighbors(G, u, v))
236
237 return _apply_prediction(G, predict, ebunch)
238
239
240@not_implemented_for("directed")
241@not_implemented_for("multigraph")
242@nx._dispatchable
243def common_neighbor_centrality(G, ebunch=None, alpha=0.8):
244 r"""Return the CCPA score for each pair of nodes.
245
246 Compute the Common Neighbor and Centrality based Parameterized Algorithm(CCPA)
247 score of all node pairs in ebunch.
248
249 CCPA score of `u` and `v` is defined as
250
251 .. math::
252
253 \alpha \cdot (|\Gamma (u){\cap }^{}\Gamma (v)|)+(1-\alpha )\cdot \frac{N}{{d}_{uv}}
254
255 where $\Gamma(u)$ denotes the set of neighbors of $u$, $\Gamma(v)$ denotes the
256 set of neighbors of $v$, $\alpha$ is parameter varies between [0,1], $N$ denotes
257 total number of nodes in the Graph and ${d}_{uv}$ denotes shortest distance
258 between $u$ and $v$.
259
260 This algorithm is based on two vital properties of nodes, namely the number
261 of common neighbors and their centrality. Common neighbor refers to the common
262 nodes between two nodes. Centrality refers to the prestige that a node enjoys
263 in a network.
264
265 .. seealso::
266
267 :func:`common_neighbors`
268
269 Parameters
270 ----------
271 G : graph
272 NetworkX undirected graph.
273
274 ebunch : iterable of node pairs, optional (default = None)
275 Preferential attachment score will be computed for each pair of
276 nodes given in the iterable. The pairs must be given as
277 2-tuples (u, v) where u and v are nodes in the graph. If ebunch
278 is None then all nonexistent edges in the graph will be used.
279 Default value: None.
280
281 alpha : Parameter defined for participation of Common Neighbor
282 and Centrality Algorithm share. Values for alpha should
283 normally be between 0 and 1. Default value set to 0.8
284 because author found better performance at 0.8 for all the
285 dataset.
286 Default value: 0.8
287
288
289 Returns
290 -------
291 piter : iterator
292 An iterator of 3-tuples in the form (u, v, p) where (u, v) is a
293 pair of nodes and p is their Common Neighbor and Centrality based
294 Parameterized Algorithm(CCPA) score.
295
296 Raises
297 ------
298 NetworkXNotImplemented
299 If `G` is a `DiGraph`, a `Multigraph` or a `MultiDiGraph`.
300
301 NetworkXAlgorithmError
302 If self loops exist in `ebunch` or in `G` (if `ebunch` is `None`).
303
304 NodeNotFound
305 If `ebunch` has a node that is not in `G`.
306
307 Examples
308 --------
309 >>> G = nx.complete_graph(5)
310 >>> preds = nx.common_neighbor_centrality(G, [(0, 1), (2, 3)])
311 >>> for u, v, p in preds:
312 ... print(f"({u}, {v}) -> {p}")
313 (0, 1) -> 3.4000000000000004
314 (2, 3) -> 3.4000000000000004
315
316 References
317 ----------
318 .. [1] Ahmad, I., Akhtar, M.U., Noor, S. et al.
319 Missing Link Prediction using Common Neighbor and Centrality based Parameterized Algorithm.
320 Sci Rep 10, 364 (2020).
321 https://doi.org/10.1038/s41598-019-57304-y
322 """
323
324 # When alpha == 1, the CCPA score simplifies to the number of common neighbors.
325 if alpha == 1:
326
327 def predict(u, v):
328 if u == v:
329 raise nx.NetworkXAlgorithmError("Self loops are not supported")
330
331 return len(nx.common_neighbors(G, u, v))
332
333 else:
334 spl = dict(nx.shortest_path_length(G))
335 inf = float("inf")
336
337 def predict(u, v):
338 if u == v:
339 raise nx.NetworkXAlgorithmError("Self loops are not supported")
340 path_len = spl[u].get(v, inf)
341
342 n_nbrs = len(nx.common_neighbors(G, u, v))
343 return alpha * n_nbrs + (1 - alpha) * len(G) / path_len
344
345 return _apply_prediction(G, predict, ebunch)
346
347
348@not_implemented_for("directed")
349@not_implemented_for("multigraph")
350@nx._dispatchable
351def preferential_attachment(G, ebunch=None):
352 r"""Compute the preferential attachment score of all node pairs in ebunch.
353
354 Preferential attachment score of `u` and `v` is defined as
355
356 .. math::
357
358 |\Gamma(u)| |\Gamma(v)|
359
360 where $\Gamma(u)$ denotes the set of neighbors of $u$.
361
362 Parameters
363 ----------
364 G : graph
365 NetworkX undirected graph.
366
367 ebunch : iterable of node pairs, optional (default = None)
368 Preferential attachment score will be computed for each pair of
369 nodes given in the iterable. The pairs must be given as
370 2-tuples (u, v) where u and v are nodes in the graph. If ebunch
371 is None then all nonexistent edges in the graph will be used.
372 Default value: None.
373
374 Returns
375 -------
376 piter : iterator
377 An iterator of 3-tuples in the form (u, v, p) where (u, v) is a
378 pair of nodes and p is their preferential attachment score.
379
380 Raises
381 ------
382 NetworkXNotImplemented
383 If `G` is a `DiGraph`, a `Multigraph` or a `MultiDiGraph`.
384
385 NodeNotFound
386 If `ebunch` has a node that is not in `G`.
387
388 Examples
389 --------
390 >>> G = nx.complete_graph(5)
391 >>> preds = nx.preferential_attachment(G, [(0, 1), (2, 3)])
392 >>> for u, v, p in preds:
393 ... print(f"({u}, {v}) -> {p}")
394 (0, 1) -> 16
395 (2, 3) -> 16
396
397 References
398 ----------
399 .. [1] D. Liben-Nowell, J. Kleinberg.
400 The Link Prediction Problem for Social Networks (2004).
401 http://www.cs.cornell.edu/home/kleinber/link-pred.pdf
402 """
403
404 def predict(u, v):
405 return G.degree(u) * G.degree(v)
406
407 return _apply_prediction(G, predict, ebunch)
408
409
410@not_implemented_for("directed")
411@not_implemented_for("multigraph")
412@nx._dispatchable(node_attrs="community")
413def cn_soundarajan_hopcroft(G, ebunch=None, community="community"):
414 r"""Count the number of common neighbors of all node pairs in ebunch
415 using community information.
416
417 For two nodes $u$ and $v$, this function computes the number of
418 common neighbors and bonus one for each common neighbor belonging to
419 the same community as $u$ and $v$. Mathematically,
420
421 .. math::
422
423 |\Gamma(u) \cap \Gamma(v)| + \sum_{w \in \Gamma(u) \cap \Gamma(v)} f(w)
424
425 where $f(w)$ equals 1 if $w$ belongs to the same community as $u$
426 and $v$ or 0 otherwise and $\Gamma(u)$ denotes the set of
427 neighbors of $u$.
428
429 Parameters
430 ----------
431 G : graph
432 A NetworkX undirected graph.
433
434 ebunch : iterable of node pairs, optional (default = None)
435 The score will be computed for each pair of nodes given in the
436 iterable. The pairs must be given as 2-tuples (u, v) where u
437 and v are nodes in the graph. If ebunch is None then all
438 nonexistent edges in the graph will be used.
439 Default value: None.
440
441 community : string, optional (default = 'community')
442 Nodes attribute name containing the community information.
443 G[u][community] identifies which community u belongs to. Each
444 node belongs to at most one community. Default value: 'community'.
445
446 Returns
447 -------
448 piter : iterator
449 An iterator of 3-tuples in the form (u, v, p) where (u, v) is a
450 pair of nodes and p is their score.
451
452 Raises
453 ------
454 NetworkXNotImplemented
455 If `G` is a `DiGraph`, a `Multigraph` or a `MultiDiGraph`.
456
457 NetworkXAlgorithmError
458 If no community information is available for a node in `ebunch` or in `G` (if `ebunch` is `None`).
459
460 NodeNotFound
461 If `ebunch` has a node that is not in `G`.
462
463 Examples
464 --------
465 >>> G = nx.path_graph(3)
466 >>> G.nodes[0]["community"] = 0
467 >>> G.nodes[1]["community"] = 0
468 >>> G.nodes[2]["community"] = 0
469 >>> preds = nx.cn_soundarajan_hopcroft(G, [(0, 2)])
470 >>> for u, v, p in preds:
471 ... print(f"({u}, {v}) -> {p}")
472 (0, 2) -> 2
473
474 References
475 ----------
476 .. [1] Sucheta Soundarajan and John Hopcroft.
477 Using community information to improve the precision of link
478 prediction methods.
479 In Proceedings of the 21st international conference companion on
480 World Wide Web (WWW '12 Companion). ACM, New York, NY, USA, 607-608.
481 http://doi.acm.org/10.1145/2187980.2188150
482 """
483
484 def predict(u, v):
485 Cu = _community(G, u, community)
486 Cv = _community(G, v, community)
487 cnbors = nx.common_neighbors(G, u, v)
488 neighbors = (
489 sum(_community(G, w, community) == Cu for w in cnbors) if Cu == Cv else 0
490 )
491 return len(cnbors) + neighbors
492
493 return _apply_prediction(G, predict, ebunch)
494
495
496@not_implemented_for("directed")
497@not_implemented_for("multigraph")
498@nx._dispatchable(node_attrs="community")
499def ra_index_soundarajan_hopcroft(G, ebunch=None, community="community"):
500 r"""Compute the resource allocation index of all node pairs in
501 ebunch using community information.
502
503 For two nodes $u$ and $v$, this function computes the resource
504 allocation index considering only common neighbors belonging to the
505 same community as $u$ and $v$. Mathematically,
506
507 .. math::
508
509 \sum_{w \in \Gamma(u) \cap \Gamma(v)} \frac{f(w)}{|\Gamma(w)|}
510
511 where $f(w)$ equals 1 if $w$ belongs to the same community as $u$
512 and $v$ or 0 otherwise and $\Gamma(u)$ denotes the set of
513 neighbors of $u$.
514
515 Parameters
516 ----------
517 G : graph
518 A NetworkX undirected graph.
519
520 ebunch : iterable of node pairs, optional (default = None)
521 The score will be computed for each pair of nodes given in the
522 iterable. The pairs must be given as 2-tuples (u, v) where u
523 and v are nodes in the graph. If ebunch is None then all
524 nonexistent edges in the graph will be used.
525 Default value: None.
526
527 community : string, optional (default = 'community')
528 Nodes attribute name containing the community information.
529 G[u][community] identifies which community u belongs to. Each
530 node belongs to at most one community. Default value: 'community'.
531
532 Returns
533 -------
534 piter : iterator
535 An iterator of 3-tuples in the form (u, v, p) where (u, v) is a
536 pair of nodes and p is their score.
537
538 Raises
539 ------
540 NetworkXNotImplemented
541 If `G` is a `DiGraph`, a `Multigraph` or a `MultiDiGraph`.
542
543 NetworkXAlgorithmError
544 If no community information is available for a node in `ebunch` or in `G` (if `ebunch` is `None`).
545
546 NodeNotFound
547 If `ebunch` has a node that is not in `G`.
548
549 Examples
550 --------
551 >>> G = nx.Graph()
552 >>> G.add_edges_from([(0, 1), (0, 2), (1, 3), (2, 3)])
553 >>> G.nodes[0]["community"] = 0
554 >>> G.nodes[1]["community"] = 0
555 >>> G.nodes[2]["community"] = 1
556 >>> G.nodes[3]["community"] = 0
557 >>> preds = nx.ra_index_soundarajan_hopcroft(G, [(0, 3)])
558 >>> for u, v, p in preds:
559 ... print(f"({u}, {v}) -> {p:.8f}")
560 (0, 3) -> 0.50000000
561
562 References
563 ----------
564 .. [1] Sucheta Soundarajan and John Hopcroft.
565 Using community information to improve the precision of link
566 prediction methods.
567 In Proceedings of the 21st international conference companion on
568 World Wide Web (WWW '12 Companion). ACM, New York, NY, USA, 607-608.
569 http://doi.acm.org/10.1145/2187980.2188150
570 """
571
572 def predict(u, v):
573 Cu = _community(G, u, community)
574 Cv = _community(G, v, community)
575 if Cu != Cv:
576 return 0
577 cnbors = nx.common_neighbors(G, u, v)
578 return sum(1 / G.degree(w) for w in cnbors if _community(G, w, community) == Cu)
579
580 return _apply_prediction(G, predict, ebunch)
581
582
583@not_implemented_for("directed")
584@not_implemented_for("multigraph")
585@nx._dispatchable(node_attrs="community")
586def within_inter_cluster(G, ebunch=None, delta=0.001, community="community"):
587 """Compute the ratio of within- and inter-cluster common neighbors
588 of all node pairs in ebunch.
589
590 For two nodes `u` and `v`, if a common neighbor `w` belongs to the
591 same community as them, `w` is considered as within-cluster common
592 neighbor of `u` and `v`. Otherwise, it is considered as
593 inter-cluster common neighbor of `u` and `v`. The ratio between the
594 size of the set of within- and inter-cluster common neighbors is
595 defined as the WIC measure. [1]_
596
597 Parameters
598 ----------
599 G : graph
600 A NetworkX undirected graph.
601
602 ebunch : iterable of node pairs, optional (default = None)
603 The WIC measure will be computed for each pair of nodes given in
604 the iterable. The pairs must be given as 2-tuples (u, v) where
605 u and v are nodes in the graph. If ebunch is None then all
606 nonexistent edges in the graph will be used.
607 Default value: None.
608
609 delta : float, optional (default = 0.001)
610 Value to prevent division by zero in case there is no
611 inter-cluster common neighbor between two nodes. See [1]_ for
612 details. Default value: 0.001.
613
614 community : string, optional (default = 'community')
615 Nodes attribute name containing the community information.
616 G[u][community] identifies which community u belongs to. Each
617 node belongs to at most one community. Default value: 'community'.
618
619 Returns
620 -------
621 piter : iterator
622 An iterator of 3-tuples in the form (u, v, p) where (u, v) is a
623 pair of nodes and p is their WIC measure.
624
625 Raises
626 ------
627 NetworkXNotImplemented
628 If `G` is a `DiGraph`, a `Multigraph` or a `MultiDiGraph`.
629
630 NetworkXAlgorithmError
631 - If `delta` is less than or equal to zero.
632 - If no community information is available for a node in `ebunch` or in `G` (if `ebunch` is `None`).
633
634 NodeNotFound
635 If `ebunch` has a node that is not in `G`.
636
637 Examples
638 --------
639 >>> G = nx.Graph()
640 >>> G.add_edges_from([(0, 1), (0, 2), (0, 3), (1, 4), (2, 4), (3, 4)])
641 >>> G.nodes[0]["community"] = 0
642 >>> G.nodes[1]["community"] = 1
643 >>> G.nodes[2]["community"] = 0
644 >>> G.nodes[3]["community"] = 0
645 >>> G.nodes[4]["community"] = 0
646 >>> preds = nx.within_inter_cluster(G, [(0, 4)])
647 >>> for u, v, p in preds:
648 ... print(f"({u}, {v}) -> {p:.8f}")
649 (0, 4) -> 1.99800200
650 >>> preds = nx.within_inter_cluster(G, [(0, 4)], delta=0.5)
651 >>> for u, v, p in preds:
652 ... print(f"({u}, {v}) -> {p:.8f}")
653 (0, 4) -> 1.33333333
654
655 References
656 ----------
657 .. [1] Jorge Carlos Valverde-Rebaza and Alneu de Andrade Lopes.
658 Link prediction in complex networks based on cluster information.
659 In Proceedings of the 21st Brazilian conference on Advances in
660 Artificial Intelligence (SBIA'12)
661 https://doi.org/10.1007/978-3-642-34459-6_10
662 """
663 if delta <= 0:
664 raise nx.NetworkXAlgorithmError("Delta must be greater than zero")
665
666 def predict(u, v):
667 Cu = _community(G, u, community)
668 Cv = _community(G, v, community)
669 if Cu != Cv:
670 return 0
671 cnbors = nx.common_neighbors(G, u, v)
672 within = {w for w in cnbors if _community(G, w, community) == Cu}
673 inter = cnbors - within
674 return len(within) / (len(inter) + delta)
675
676 return _apply_prediction(G, predict, ebunch)
677
678
679def _community(G, u, community):
680 """Get the community of the given node."""
681 node_u = G.nodes[u]
682 try:
683 return node_u[community]
684 except KeyError as err:
685 raise nx.NetworkXAlgorithmError(
686 f"No community information available for Node {u}"
687 ) from err