Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.9/dist-packages/networkx/algorithms/link_prediction.py: 38%
101 statements
« prev ^ index » next coverage.py v7.3.2, created at 2023-10-20 07:00 +0000
« prev ^ index » next coverage.py v7.3.2, created at 2023-10-20 07:00 +0000
1"""
2Link prediction algorithms.
3"""
6from math import log
8import networkx as nx
9from networkx.utils import not_implemented_for
11__all__ = [
12 "resource_allocation_index",
13 "jaccard_coefficient",
14 "adamic_adar_index",
15 "preferential_attachment",
16 "cn_soundarajan_hopcroft",
17 "ra_index_soundarajan_hopcroft",
18 "within_inter_cluster",
19 "common_neighbor_centrality",
20]
23def _apply_prediction(G, func, ebunch=None):
24 """Applies the given function to each edge in the specified iterable
25 of edges.
27 `G` is an instance of :class:`networkx.Graph`.
29 `func` is a function on two inputs, each of which is a node in the
30 graph. The function can return anything, but it should return a
31 value representing a prediction of the likelihood of a "link"
32 joining the two nodes.
34 `ebunch` is an iterable of pairs of nodes. If not specified, all
35 non-edges in the graph `G` will be used.
37 """
38 if ebunch is None:
39 ebunch = nx.non_edges(G)
40 return ((u, v, func(u, v)) for u, v in ebunch)
43@not_implemented_for("directed")
44@not_implemented_for("multigraph")
45@nx._dispatch
46def resource_allocation_index(G, ebunch=None):
47 r"""Compute the resource allocation index of all node pairs in ebunch.
49 Resource allocation index of `u` and `v` is defined as
51 .. math::
53 \sum_{w \in \Gamma(u) \cap \Gamma(v)} \frac{1}{|\Gamma(w)|}
55 where $\Gamma(u)$ denotes the set of neighbors of $u$.
57 Parameters
58 ----------
59 G : graph
60 A NetworkX undirected graph.
62 ebunch : iterable of node pairs, optional (default = None)
63 Resource allocation index will be computed for each pair of
64 nodes given in the iterable. The pairs must be given as
65 2-tuples (u, v) where u and v are nodes in the graph. If ebunch
66 is None then all nonexistent edges in the graph will be used.
67 Default value: None.
69 Returns
70 -------
71 piter : iterator
72 An iterator of 3-tuples in the form (u, v, p) where (u, v) is a
73 pair of nodes and p is their resource allocation index.
75 Examples
76 --------
77 >>> G = nx.complete_graph(5)
78 >>> preds = nx.resource_allocation_index(G, [(0, 1), (2, 3)])
79 >>> for u, v, p in preds:
80 ... print(f"({u}, {v}) -> {p:.8f}")
81 (0, 1) -> 0.75000000
82 (2, 3) -> 0.75000000
84 References
85 ----------
86 .. [1] T. Zhou, L. Lu, Y.-C. Zhang.
87 Predicting missing links via local information.
88 Eur. Phys. J. B 71 (2009) 623.
89 https://arxiv.org/pdf/0901.0553.pdf
90 """
92 def predict(u, v):
93 return sum(1 / G.degree(w) for w in nx.common_neighbors(G, u, v))
95 return _apply_prediction(G, predict, ebunch)
98@not_implemented_for("directed")
99@not_implemented_for("multigraph")
100@nx._dispatch
101def jaccard_coefficient(G, ebunch=None):
102 r"""Compute the Jaccard coefficient of all node pairs in ebunch.
104 Jaccard coefficient of nodes `u` and `v` is defined as
106 .. math::
108 \frac{|\Gamma(u) \cap \Gamma(v)|}{|\Gamma(u) \cup \Gamma(v)|}
110 where $\Gamma(u)$ denotes the set of neighbors of $u$.
112 Parameters
113 ----------
114 G : graph
115 A NetworkX undirected graph.
117 ebunch : iterable of node pairs, optional (default = None)
118 Jaccard coefficient will be computed for each pair of nodes
119 given in the iterable. The pairs must be given as 2-tuples
120 (u, v) where u and v are nodes in the graph. If ebunch is None
121 then all nonexistent edges in the graph will be used.
122 Default value: None.
124 Returns
125 -------
126 piter : iterator
127 An iterator of 3-tuples in the form (u, v, p) where (u, v) is a
128 pair of nodes and p is their Jaccard coefficient.
130 Examples
131 --------
132 >>> G = nx.complete_graph(5)
133 >>> preds = nx.jaccard_coefficient(G, [(0, 1), (2, 3)])
134 >>> for u, v, p in preds:
135 ... print(f"({u}, {v}) -> {p:.8f}")
136 (0, 1) -> 0.60000000
137 (2, 3) -> 0.60000000
139 References
140 ----------
141 .. [1] D. Liben-Nowell, J. Kleinberg.
142 The Link Prediction Problem for Social Networks (2004).
143 http://www.cs.cornell.edu/home/kleinber/link-pred.pdf
144 """
146 def predict(u, v):
147 union_size = len(set(G[u]) | set(G[v]))
148 if union_size == 0:
149 return 0
150 return len(list(nx.common_neighbors(G, u, v))) / union_size
152 return _apply_prediction(G, predict, ebunch)
155@not_implemented_for("directed")
156@not_implemented_for("multigraph")
157@nx._dispatch
158def adamic_adar_index(G, ebunch=None):
159 r"""Compute the Adamic-Adar index of all node pairs in ebunch.
161 Adamic-Adar index of `u` and `v` is defined as
163 .. math::
165 \sum_{w \in \Gamma(u) \cap \Gamma(v)} \frac{1}{\log |\Gamma(w)|}
167 where $\Gamma(u)$ denotes the set of neighbors of $u$.
168 This index leads to zero-division for nodes only connected via self-loops.
169 It is intended to be used when no self-loops are present.
171 Parameters
172 ----------
173 G : graph
174 NetworkX undirected graph.
176 ebunch : iterable of node pairs, optional (default = None)
177 Adamic-Adar index will be computed for each pair of nodes given
178 in the iterable. The pairs must be given as 2-tuples (u, v)
179 where u and v are nodes in the graph. If ebunch is None then all
180 nonexistent edges in the graph will be used.
181 Default value: None.
183 Returns
184 -------
185 piter : iterator
186 An iterator of 3-tuples in the form (u, v, p) where (u, v) is a
187 pair of nodes and p is their Adamic-Adar index.
189 Examples
190 --------
191 >>> G = nx.complete_graph(5)
192 >>> preds = nx.adamic_adar_index(G, [(0, 1), (2, 3)])
193 >>> for u, v, p in preds:
194 ... print(f"({u}, {v}) -> {p:.8f}")
195 (0, 1) -> 2.16404256
196 (2, 3) -> 2.16404256
198 References
199 ----------
200 .. [1] D. Liben-Nowell, J. Kleinberg.
201 The Link Prediction Problem for Social Networks (2004).
202 http://www.cs.cornell.edu/home/kleinber/link-pred.pdf
203 """
205 def predict(u, v):
206 return sum(1 / log(G.degree(w)) for w in nx.common_neighbors(G, u, v))
208 return _apply_prediction(G, predict, ebunch)
211@not_implemented_for("directed")
212@not_implemented_for("multigraph")
213@nx._dispatch
214def common_neighbor_centrality(G, ebunch=None, alpha=0.8):
215 r"""Return the CCPA score for each pair of nodes.
217 Compute the Common Neighbor and Centrality based Parameterized Algorithm(CCPA)
218 score of all node pairs in ebunch.
220 CCPA score of `u` and `v` is defined as
222 .. math::
224 \alpha \cdot (|\Gamma (u){\cap }^{}\Gamma (v)|)+(1-\alpha )\cdot \frac{N}{{d}_{uv}}
226 where $\Gamma(u)$ denotes the set of neighbors of $u$, $\Gamma(v)$ denotes the
227 set of neighbors of $v$, $\alpha$ is parameter varies between [0,1], $N$ denotes
228 total number of nodes in the Graph and ${d}_{uv}$ denotes shortest distance
229 between $u$ and $v$.
231 This algorithm is based on two vital properties of nodes, namely the number
232 of common neighbors and their centrality. Common neighbor refers to the common
233 nodes between two nodes. Centrality refers to the prestige that a node enjoys
234 in a network.
236 .. seealso::
238 :func:`common_neighbors`
240 Parameters
241 ----------
242 G : graph
243 NetworkX undirected graph.
245 ebunch : iterable of node pairs, optional (default = None)
246 Preferential attachment score will be computed for each pair of
247 nodes given in the iterable. The pairs must be given as
248 2-tuples (u, v) where u and v are nodes in the graph. If ebunch
249 is None then all nonexistent edges in the graph will be used.
250 Default value: None.
252 alpha : Parameter defined for participation of Common Neighbor
253 and Centrality Algorithm share. Values for alpha should
254 normally be between 0 and 1. Default value set to 0.8
255 because author found better performance at 0.8 for all the
256 dataset.
257 Default value: 0.8
260 Returns
261 -------
262 piter : iterator
263 An iterator of 3-tuples in the form (u, v, p) where (u, v) is a
264 pair of nodes and p is their Common Neighbor and Centrality based
265 Parameterized Algorithm(CCPA) score.
267 Examples
268 --------
269 >>> G = nx.complete_graph(5)
270 >>> preds = nx.common_neighbor_centrality(G, [(0, 1), (2, 3)])
271 >>> for u, v, p in preds:
272 ... print(f"({u}, {v}) -> {p}")
273 (0, 1) -> 3.4000000000000004
274 (2, 3) -> 3.4000000000000004
276 References
277 ----------
278 .. [1] Ahmad, I., Akhtar, M.U., Noor, S. et al.
279 Missing Link Prediction using Common Neighbor and Centrality based Parameterized Algorithm.
280 Sci Rep 10, 364 (2020).
281 https://doi.org/10.1038/s41598-019-57304-y
282 """
284 # When alpha == 1, the CCPA score simplifies to the number of common neighbors.
285 if alpha == 1:
287 def predict(u, v):
288 if u == v:
289 raise nx.NetworkXAlgorithmError("Self links are not supported")
291 return sum(1 for _ in nx.common_neighbors(G, u, v))
293 else:
294 spl = dict(nx.shortest_path_length(G))
295 inf = float("inf")
297 def predict(u, v):
298 if u == v:
299 raise nx.NetworkXAlgorithmError("Self links are not supported")
300 path_len = spl[u].get(v, inf)
302 return alpha * sum(1 for _ in nx.common_neighbors(G, u, v)) + (
303 1 - alpha
304 ) * (G.number_of_nodes() / path_len)
306 return _apply_prediction(G, predict, ebunch)
309@not_implemented_for("directed")
310@not_implemented_for("multigraph")
311@nx._dispatch
312def preferential_attachment(G, ebunch=None):
313 r"""Compute the preferential attachment score of all node pairs in ebunch.
315 Preferential attachment score of `u` and `v` is defined as
317 .. math::
319 |\Gamma(u)| |\Gamma(v)|
321 where $\Gamma(u)$ denotes the set of neighbors of $u$.
323 Parameters
324 ----------
325 G : graph
326 NetworkX undirected graph.
328 ebunch : iterable of node pairs, optional (default = None)
329 Preferential attachment score will be computed for each pair of
330 nodes given in the iterable. The pairs must be given as
331 2-tuples (u, v) where u and v are nodes in the graph. If ebunch
332 is None then all nonexistent edges in the graph will be used.
333 Default value: None.
335 Returns
336 -------
337 piter : iterator
338 An iterator of 3-tuples in the form (u, v, p) where (u, v) is a
339 pair of nodes and p is their preferential attachment score.
341 Examples
342 --------
343 >>> G = nx.complete_graph(5)
344 >>> preds = nx.preferential_attachment(G, [(0, 1), (2, 3)])
345 >>> for u, v, p in preds:
346 ... print(f"({u}, {v}) -> {p}")
347 (0, 1) -> 16
348 (2, 3) -> 16
350 References
351 ----------
352 .. [1] D. Liben-Nowell, J. Kleinberg.
353 The Link Prediction Problem for Social Networks (2004).
354 http://www.cs.cornell.edu/home/kleinber/link-pred.pdf
355 """
357 def predict(u, v):
358 return G.degree(u) * G.degree(v)
360 return _apply_prediction(G, predict, ebunch)
363@not_implemented_for("directed")
364@not_implemented_for("multigraph")
365@nx._dispatch(node_attrs="community")
366def cn_soundarajan_hopcroft(G, ebunch=None, community="community"):
367 r"""Count the number of common neighbors of all node pairs in ebunch
368 using community information.
370 For two nodes $u$ and $v$, this function computes the number of
371 common neighbors and bonus one for each common neighbor belonging to
372 the same community as $u$ and $v$. Mathematically,
374 .. math::
376 |\Gamma(u) \cap \Gamma(v)| + \sum_{w \in \Gamma(u) \cap \Gamma(v)} f(w)
378 where $f(w)$ equals 1 if $w$ belongs to the same community as $u$
379 and $v$ or 0 otherwise and $\Gamma(u)$ denotes the set of
380 neighbors of $u$.
382 Parameters
383 ----------
384 G : graph
385 A NetworkX undirected graph.
387 ebunch : iterable of node pairs, optional (default = None)
388 The score will be computed for each pair of nodes given in the
389 iterable. The pairs must be given as 2-tuples (u, v) where u
390 and v are nodes in the graph. If ebunch is None then all
391 nonexistent edges in the graph will be used.
392 Default value: None.
394 community : string, optional (default = 'community')
395 Nodes attribute name containing the community information.
396 G[u][community] identifies which community u belongs to. Each
397 node belongs to at most one community. Default value: 'community'.
399 Returns
400 -------
401 piter : iterator
402 An iterator of 3-tuples in the form (u, v, p) where (u, v) is a
403 pair of nodes and p is their score.
405 Examples
406 --------
407 >>> G = nx.path_graph(3)
408 >>> G.nodes[0]["community"] = 0
409 >>> G.nodes[1]["community"] = 0
410 >>> G.nodes[2]["community"] = 0
411 >>> preds = nx.cn_soundarajan_hopcroft(G, [(0, 2)])
412 >>> for u, v, p in preds:
413 ... print(f"({u}, {v}) -> {p}")
414 (0, 2) -> 2
416 References
417 ----------
418 .. [1] Sucheta Soundarajan and John Hopcroft.
419 Using community information to improve the precision of link
420 prediction methods.
421 In Proceedings of the 21st international conference companion on
422 World Wide Web (WWW '12 Companion). ACM, New York, NY, USA, 607-608.
423 http://doi.acm.org/10.1145/2187980.2188150
424 """
426 def predict(u, v):
427 Cu = _community(G, u, community)
428 Cv = _community(G, v, community)
429 cnbors = list(nx.common_neighbors(G, u, v))
430 neighbors = (
431 sum(_community(G, w, community) == Cu for w in cnbors) if Cu == Cv else 0
432 )
433 return len(cnbors) + neighbors
435 return _apply_prediction(G, predict, ebunch)
438@not_implemented_for("directed")
439@not_implemented_for("multigraph")
440@nx._dispatch(node_attrs="community")
441def ra_index_soundarajan_hopcroft(G, ebunch=None, community="community"):
442 r"""Compute the resource allocation index of all node pairs in
443 ebunch using community information.
445 For two nodes $u$ and $v$, this function computes the resource
446 allocation index considering only common neighbors belonging to the
447 same community as $u$ and $v$. Mathematically,
449 .. math::
451 \sum_{w \in \Gamma(u) \cap \Gamma(v)} \frac{f(w)}{|\Gamma(w)|}
453 where $f(w)$ equals 1 if $w$ belongs to the same community as $u$
454 and $v$ or 0 otherwise and $\Gamma(u)$ denotes the set of
455 neighbors of $u$.
457 Parameters
458 ----------
459 G : graph
460 A NetworkX undirected graph.
462 ebunch : iterable of node pairs, optional (default = None)
463 The score will be computed for each pair of nodes given in the
464 iterable. The pairs must be given as 2-tuples (u, v) where u
465 and v are nodes in the graph. If ebunch is None then all
466 nonexistent edges in the graph will be used.
467 Default value: None.
469 community : string, optional (default = 'community')
470 Nodes attribute name containing the community information.
471 G[u][community] identifies which community u belongs to. Each
472 node belongs to at most one community. Default value: 'community'.
474 Returns
475 -------
476 piter : iterator
477 An iterator of 3-tuples in the form (u, v, p) where (u, v) is a
478 pair of nodes and p is their score.
480 Examples
481 --------
482 >>> G = nx.Graph()
483 >>> G.add_edges_from([(0, 1), (0, 2), (1, 3), (2, 3)])
484 >>> G.nodes[0]["community"] = 0
485 >>> G.nodes[1]["community"] = 0
486 >>> G.nodes[2]["community"] = 1
487 >>> G.nodes[3]["community"] = 0
488 >>> preds = nx.ra_index_soundarajan_hopcroft(G, [(0, 3)])
489 >>> for u, v, p in preds:
490 ... print(f"({u}, {v}) -> {p:.8f}")
491 (0, 3) -> 0.50000000
493 References
494 ----------
495 .. [1] Sucheta Soundarajan and John Hopcroft.
496 Using community information to improve the precision of link
497 prediction methods.
498 In Proceedings of the 21st international conference companion on
499 World Wide Web (WWW '12 Companion). ACM, New York, NY, USA, 607-608.
500 http://doi.acm.org/10.1145/2187980.2188150
501 """
503 def predict(u, v):
504 Cu = _community(G, u, community)
505 Cv = _community(G, v, community)
506 if Cu != Cv:
507 return 0
508 cnbors = nx.common_neighbors(G, u, v)
509 return sum(1 / G.degree(w) for w in cnbors if _community(G, w, community) == Cu)
511 return _apply_prediction(G, predict, ebunch)
514@not_implemented_for("directed")
515@not_implemented_for("multigraph")
516@nx._dispatch(node_attrs="community")
517def within_inter_cluster(G, ebunch=None, delta=0.001, community="community"):
518 """Compute the ratio of within- and inter-cluster common neighbors
519 of all node pairs in ebunch.
521 For two nodes `u` and `v`, if a common neighbor `w` belongs to the
522 same community as them, `w` is considered as within-cluster common
523 neighbor of `u` and `v`. Otherwise, it is considered as
524 inter-cluster common neighbor of `u` and `v`. The ratio between the
525 size of the set of within- and inter-cluster common neighbors is
526 defined as the WIC measure. [1]_
528 Parameters
529 ----------
530 G : graph
531 A NetworkX undirected graph.
533 ebunch : iterable of node pairs, optional (default = None)
534 The WIC measure will be computed for each pair of nodes given in
535 the iterable. The pairs must be given as 2-tuples (u, v) where
536 u and v are nodes in the graph. If ebunch is None then all
537 nonexistent edges in the graph will be used.
538 Default value: None.
540 delta : float, optional (default = 0.001)
541 Value to prevent division by zero in case there is no
542 inter-cluster common neighbor between two nodes. See [1]_ for
543 details. Default value: 0.001.
545 community : string, optional (default = 'community')
546 Nodes attribute name containing the community information.
547 G[u][community] identifies which community u belongs to. Each
548 node belongs to at most one community. Default value: 'community'.
550 Returns
551 -------
552 piter : iterator
553 An iterator of 3-tuples in the form (u, v, p) where (u, v) is a
554 pair of nodes and p is their WIC measure.
556 Examples
557 --------
558 >>> G = nx.Graph()
559 >>> G.add_edges_from([(0, 1), (0, 2), (0, 3), (1, 4), (2, 4), (3, 4)])
560 >>> G.nodes[0]["community"] = 0
561 >>> G.nodes[1]["community"] = 1
562 >>> G.nodes[2]["community"] = 0
563 >>> G.nodes[3]["community"] = 0
564 >>> G.nodes[4]["community"] = 0
565 >>> preds = nx.within_inter_cluster(G, [(0, 4)])
566 >>> for u, v, p in preds:
567 ... print(f"({u}, {v}) -> {p:.8f}")
568 (0, 4) -> 1.99800200
569 >>> preds = nx.within_inter_cluster(G, [(0, 4)], delta=0.5)
570 >>> for u, v, p in preds:
571 ... print(f"({u}, {v}) -> {p:.8f}")
572 (0, 4) -> 1.33333333
574 References
575 ----------
576 .. [1] Jorge Carlos Valverde-Rebaza and Alneu de Andrade Lopes.
577 Link prediction in complex networks based on cluster information.
578 In Proceedings of the 21st Brazilian conference on Advances in
579 Artificial Intelligence (SBIA'12)
580 https://doi.org/10.1007/978-3-642-34459-6_10
581 """
582 if delta <= 0:
583 raise nx.NetworkXAlgorithmError("Delta must be greater than zero")
585 def predict(u, v):
586 Cu = _community(G, u, community)
587 Cv = _community(G, v, community)
588 if Cu != Cv:
589 return 0
590 cnbors = set(nx.common_neighbors(G, u, v))
591 within = {w for w in cnbors if _community(G, w, community) == Cu}
592 inter = cnbors - within
593 return len(within) / (len(inter) + delta)
595 return _apply_prediction(G, predict, ebunch)
598def _community(G, u, community):
599 """Get the community of the given node."""
600 node_u = G.nodes[u]
601 try:
602 return node_u[community]
603 except KeyError as err:
604 raise nx.NetworkXAlgorithmError("No community information") from err