1"""Betweenness centrality measures."""
2
3import math
4from collections import deque
5from heapq import heappop, heappush
6from itertools import count
7
8import networkx as nx
9from networkx.algorithms.shortest_paths.weighted import _weight_function
10from networkx.utils import py_random_state
11from networkx.utils.decorators import not_implemented_for
12
13__all__ = ["betweenness_centrality", "edge_betweenness_centrality"]
14
15
16@py_random_state("seed")
17@nx._dispatchable(edge_attrs="weight")
18def betweenness_centrality(
19 G, k=None, normalized=True, weight=None, endpoints=False, seed=None
20):
21 r"""Compute the shortest-path betweenness centrality for nodes.
22
23 Betweenness centrality of a node $v$ is the sum of the
24 fraction of all-pairs shortest paths that pass through $v$.
25
26 .. math::
27
28 c_B(v) = \frac{1}{P} \sum_{s, t \in V} \frac{\sigma(s, t | v)}{\sigma(s, t)}
29
30 where $V$ is the set of nodes, $\sigma(s, t)$ is the number of
31 shortest $(s, t)$-paths, and $\sigma(s, t | v)$ is the number of
32 those paths passing through some node $v$ other than $s$ and $t$.
33 If $s = t$, $\sigma(s, t) = 1$, and if $v \in \{s, t\}$,
34 $\sigma(s, t | v) = 0$ (when `endpoints` takes its default value of
35 `False`) [2]_.
36 $P$ is a normalization factor representing the number of pairs of nodes
37 that have counted shortest paths. Its value depends on the values of `normalized`
38 and `endpoints`, and on whether the graph is directed (see Notes). It can
39 be set to ``1`` with ``normalized=False``.
40
41 Parameters
42 ----------
43 G : graph
44 A NetworkX graph.
45
46 k : int, optional (default=None)
47 If `k` is not `None`, use `k` sampled nodes as sources for the considered paths.
48 The resulting sampled counts are then inflated to approximate betweenness.
49 Higher values of `k` give better approximation. Must have ``k <= len(G)``.
50
51 normalized : bool, optional (default=True)
52 If `True`, the betweenness values are rescaled by dividing by the number of
53 possible $(s, t)$-pairs in the graph.
54
55 weight : None or string, optional (default=None)
56 If `None`, all edge weights are 1.
57 Otherwise holds the name of the edge attribute used as weight.
58 Weights are used to calculate weighted shortest paths, so they are
59 interpreted as distances.
60
61 endpoints : bool, optional (default=False)
62 If `True`, include the endpoints $s$ and $t$ in the shortest path counts.
63 This is taken into account when rescaling the values.
64
65 seed : integer, random_state, or None (default)
66 Indicator of random number generation state.
67 See :ref:`Randomness<randomness>`.
68 Note that this is only used if ``k is not None``.
69
70 Returns
71 -------
72 nodes : dict
73 Dictionary of nodes with betweenness centrality as the value.
74
75 See Also
76 --------
77 betweenness_centrality_subset
78 edge_betweenness_centrality
79 load_centrality
80
81 Notes
82 -----
83 The algorithm is from Ulrik Brandes [1]_.
84 See [4]_ for the original first published version and [2]_ for details on
85 algorithms for variations and related metrics.
86
87 For approximate betweenness calculations, set `k` to the number of sampled
88 nodes ("pivots") used as sources to estimate the betweenness values.
89 The formula then sums over $s$ is in these pivots, instead of over all nodes.
90 The resulting sum is then inflated to approximate the full sum.
91 For a discussion of how to choose `k` for efficiency, see [3]_.
92
93 For weighted graphs the edge weights must be greater than zero.
94 Zero edge weights can produce an infinite number of equal length
95 paths between pairs of nodes.
96
97 Directed graphs and undirected graphs count paths differently.
98 In directed graphs, each pair of source-target nodes is considered separately
99 in each direction, as the shortest paths can differ by direction.
100 However, in undirected graphs, each pair of nodes is considered only once,
101 as the shortest paths are symmetric.
102 This means the normalization factor to divide by is $N(N-1)$ for directed graphs
103 and $N(N-1)/2$ for undirected graphs, where $N = n$ (the number of nodes)
104 if endpoints are included and $N = n-1$ otherwise.
105
106 This algorithm is not guaranteed to be correct if edge weights
107 are floating point numbers. As a workaround you can use integer
108 numbers by multiplying the relevant edge attributes by a convenient
109 constant factor (e.g. 100) and converting to integers.
110
111 References
112 ----------
113 .. [1] Ulrik Brandes:
114 A Faster Algorithm for Betweenness Centrality.
115 Journal of Mathematical Sociology 25(2):163--177, 2001.
116 https://doi.org/10.1080/0022250X.2001.9990249
117 .. [2] Ulrik Brandes:
118 On Variants of Shortest-Path Betweenness
119 Centrality and their Generic Computation.
120 Social Networks 30(2):136--145, 2008.
121 https://doi.org/10.1016/j.socnet.2007.11.001
122 .. [3] Ulrik Brandes and Christian Pich:
123 Centrality Estimation in Large Networks.
124 International Journal of Bifurcation and Chaos 17(7):2303--2318, 2007.
125 https://dx.doi.org/10.1142/S0218127407018403
126 .. [4] Linton C. Freeman:
127 A set of measures of centrality based on betweenness.
128 Sociometry 40: 35--41, 1977
129 https://doi.org/10.2307/3033543
130
131 Examples
132 --------
133 Consider an undirected 3-path. Each pair of nodes has exactly one shortest
134 path between them. Since the graph is undirected, only ordered pairs are counted.
135 Of these (and when `endpoints` is `False`), none of the shortest paths pass
136 through 0 and 2, and only the shortest path between 0 and 2 passes through 1.
137 As such, the counts should be ``{0: 0, 1: 1, 2: 0}``.
138
139 >>> G = nx.path_graph(3)
140 >>> nx.betweenness_centrality(G, normalized=False, endpoints=False)
141 {0: 0.0, 1: 1.0, 2: 0.0}
142
143 If `endpoints` is `True`, we also need to count endpoints as being on the path:
144 $\sigma(s, t | s) = \sigma(s, t | t) = \sigma(s, t)$.
145 In our example, 0 is then part of two shortest paths (0 to 1 and 0 to 2);
146 similarly, 2 is part of two shortest paths (0 to 2 and 1 to 2).
147 1 is part of all three shortest paths. This makes the new raw
148 counts ``{0: 2, 1: 3, 2: 2}``.
149
150 >>> nx.betweenness_centrality(G, normalized=False, endpoints=True)
151 {0: 2.0, 1: 3.0, 2: 2.0}
152
153 With normalization, the values are divided by the number of ordered $(s, t)$-pairs.
154 If we are not counting endpoints, there are $n - 1$ possible choices for $s$
155 (all except the node we are computing betweenness centrality for), which in turn
156 leaves $n - 2$ possible choices for $t$ as $s \ne t$.
157 The total number of ordered pairs when `endpoints` is `False` is $(n - 1)(n - 2)/2 = 1$.
158 If `endpoints` is `True`, there are $n(n - 1)/2 = 3$ ordered $(s, t)$-pairs to divide by.
159
160 >>> nx.betweenness_centrality(G, normalized=True, endpoints=False)
161 {0: 0.0, 1: 1.0, 2: 0.0}
162 >>> nx.betweenness_centrality(G, normalized=True, endpoints=True)
163 {0: 0.6666666666666666, 1: 1.0, 2: 0.6666666666666666}
164
165 If the graph is directed instead, we now need to consider $(s, t)$-pairs
166 in both directions. Our example becomes a directed 3-path.
167 Without counting endpoints, we only have one path through 1 (0 to 2).
168 This means the raw counts are ``{0: 0, 1: 1, 2: 0}``.
169
170 >>> DG = nx.path_graph(3, create_using=nx.DiGraph)
171 >>> nx.betweenness_centrality(DG, normalized=False, endpoints=False)
172 {0: 0.0, 1: 1.0, 2: 0.0}
173
174 If we do include endpoints, the raw counts are ``{0: 2, 1: 3, 2: 2}``.
175
176 >>> nx.betweenness_centrality(DG, normalized=False, endpoints=True)
177 {0: 2.0, 1: 3.0, 2: 2.0}
178
179 If we want to normalize directed betweenness centrality, the raw counts
180 are normalized by the number of $(s, t)$-pairs. There are $n(n - 1)$
181 possible paths with endpoints and $(n - 1)(n - 2)$ without endpoints.
182 In our example, that's 6 with endpoints and 2 without endpoints.
183
184 >>> nx.betweenness_centrality(DG, normalized=True, endpoints=True)
185 {0: 0.3333333333333333, 1: 0.5, 2: 0.3333333333333333}
186 >>> nx.betweenness_centrality(DG, normalized=True, endpoints=False)
187 {0: 0.0, 1: 0.5, 2: 0.0}
188
189 Computing the full betweenness centrality can be costly.
190 This function can also be used to compute approximate betweenness centrality
191 by setting `k`. This only determines the number of source nodes to sample;
192 all nodes are targets.
193
194 For simplicity, we only consider the case where endpoints are included in the counts.
195 Since the partial sums only include `k` terms, instead of ``n``,
196 we multiply them by ``n / k``, to approximate the full sum.
197 As the sets of sources and targets are not the same anymore,
198 paths have to be counted in a directed way. We thus count each as half a path.
199 This ensures that the results approximate the standard betweenness for ``k == n``.
200
201 For instance, in the undirected 3-path graph case, setting ``k = 2`` (with ``seed=42``)
202 selects nodes 0 and 2 as sources.
203 This means only shortest paths starting at these nodes are considered.
204 The raw counts with endpoints are ``{0: 3, 1: 4, 2: 3}``. Accounting for the partial sum
205 and applying the undirectedness half-path correction, we get
206
207 >>> nx.betweenness_centrality(G, k=2, normalized=False, endpoints=True, seed=42)
208 {0: 2.25, 1: 3.0, 2: 2.25}
209
210 When normalizing, we instead want to divide by the total number of $(s, t)$-pairs.
211 This is $k(n - 1)$ with endpoints.
212
213 >>> nx.betweenness_centrality(G, k=2, normalized=True, endpoints=True, seed=42)
214 {0: 0.75, 1: 1.0, 2: 0.75}
215 """
216 betweenness = dict.fromkeys(G, 0.0) # b[v]=0 for v in G
217 if k == len(G):
218 # This is done for performance; the result is the same regardless.
219 k = None
220 if k is None:
221 nodes = G
222 else:
223 nodes = seed.sample(list(G.nodes()), k)
224 for s in nodes:
225 # single source shortest paths
226 if weight is None: # use BFS
227 S, P, sigma, _ = _single_source_shortest_path_basic(G, s)
228 else: # use Dijkstra's algorithm
229 S, P, sigma, _ = _single_source_dijkstra_path_basic(G, s, weight)
230 # accumulation
231 if endpoints:
232 betweenness, _ = _accumulate_endpoints(betweenness, S, P, sigma, s)
233 else:
234 betweenness, _ = _accumulate_basic(betweenness, S, P, sigma, s)
235 # rescaling
236 betweenness = _rescale(
237 betweenness,
238 len(G),
239 normalized=normalized,
240 directed=G.is_directed(),
241 endpoints=endpoints,
242 sampled_nodes=None if k is None else nodes,
243 )
244 return betweenness
245
246
247@py_random_state("seed")
248@nx._dispatchable(edge_attrs="weight")
249def edge_betweenness_centrality(G, k=None, normalized=True, weight=None, seed=None):
250 r"""Compute betweenness centrality for edges.
251
252 Betweenness centrality of an edge $e$ is the sum of the
253 fraction of all-pairs shortest paths that pass through $e$.
254
255 .. math::
256
257 c_B(e) = \frac{1}{P} \sum_{s, t \in V} \frac{\sigma(s, t | e)}{\sigma(s, t)}
258
259 where $V$ is the set of nodes, $\sigma(s, t)$ is the number of
260 shortest $(s, t)$-paths, and $\sigma(s, t | e)$ is the number of
261 those paths passing through edge $e$ [1]_.
262 $P$ is a normalization factor representing the number of pairs of nodes
263 that have counted shortest paths. Its value depends on the values of `normalized`
264 and `endpoints`, and on whether the graph is directed (see Notes). It can
265 be set to ``1`` with ``normalized=False``.
266
267 Parameters
268 ----------
269 G : graph
270 A NetworkX graph.
271
272 k : int, optional (default=None)
273 If `k` is not `None`, use `k` sampled nodes as sources for the considered paths.
274 The resulting sampled counts are then inflated to approximate betweenness.
275 Higher values of `k` give better approximation. Must have ``k <= len(G)``.
276
277 normalized : bool, optional (default=True)
278 If `True`, the betweenness values are rescaled by dividing by the number of
279 possible $(s, t)$-pairs in the graph.
280
281 weight : None or string, optional (default=None)
282 If `None`, all edge weights are 1.
283 Otherwise holds the name of the edge attribute used as weight.
284 Weights are used to calculate weighted shortest paths, so they are
285 interpreted as distances.
286
287 seed : integer, random_state, or None (default)
288 Indicator of random number generation state.
289 See :ref:`Randomness<randomness>`.
290 Note that this is only used if ``k is not None``.
291
292 Returns
293 -------
294 edges : dict
295 Dictionary of edges with betweenness centrality as the value.
296
297 See Also
298 --------
299 betweenness_centrality
300 edge_betweenness_centrality_subset
301 edge_load
302
303 Notes
304 -----
305 The algorithm is from Ulrik Brandes [1]_.
306
307 For weighted graphs the edge weights must be greater than zero.
308 Zero edge weights can produce an infinite number of equal length
309 paths between pairs of nodes.
310
311 References
312 ----------
313 .. [1] Ulrik Brandes: On Variants of Shortest-Path Betweenness
314 Centrality and their Generic Computation.
315 Social Networks 30(2):136--145, 2008.
316 https://doi.org/10.1016/j.socnet.2007.11.001
317
318 Examples
319 --------
320 Consider an undirected 3-path. Each pair of nodes has exactly one shortest
321 path between them. Since the graph is undirected, only ordered pairs are counted.
322 Each edge has two shortest paths passing through it.
323 As such, the raw counts should be ``{(0, 1): 2, (1, 2): 2}``.
324
325 >>> G = nx.path_graph(3)
326 >>> nx.edge_betweenness_centrality(G, normalized=False)
327 {(0, 1): 2.0, (1, 2): 2.0}
328
329 With normalization, the values are divided by the number of ordered $(s, t)$-pairs,
330 which is $n(n-1)/2$. For the 3-path, this is $3(3-1)/2 = 3$.
331
332 >>> nx.edge_betweenness_centrality(G, normalized=True)
333 {(0, 1): 0.6666666666666666, (1, 2): 0.6666666666666666}
334
335 For a directed graph, all $(s, t)$-pairs are considered. The normalization factor
336 is $n(n-1)$ to reflect this.
337
338 >>> DG = nx.path_graph(3, create_using=nx.DiGraph)
339 >>> nx.edge_betweenness_centrality(DG, normalized=False)
340 {(0, 1): 2.0, (1, 2): 2.0}
341 >>> nx.edge_betweenness_centrality(DG, normalized=True)
342 {(0, 1): 0.3333333333333333, (1, 2): 0.3333333333333333}
343
344 Computing the full edge betweenness centrality can be costly.
345 This function can also be used to compute approximate edge betweenness centrality
346 by setting `k`. This determines the number of source nodes to sample.
347
348 Since the partial sums only include `k` terms, instead of ``n``,
349 we multiply them by ``n / k``, to approximate the full sum.
350 As the sets of sources and targets are not the same anymore,
351 paths have to be counted in a directed way. We thus count each as half a path.
352 This ensures that the results approximate the standard betweenness for ``k == n``.
353
354 For instance, in the undirected 3-path graph case, setting ``k = 2`` (with ``seed=42``)
355 selects nodes 0 and 2 as sources.
356 This means only shortest paths starting at these nodes are considered.
357 The raw counts are ``{(0, 1): 3, (1, 2): 3}``. Accounting for the partial sum
358 and applying the undirectedness half-path correction, we get
359
360 >>> nx.edge_betweenness_centrality(G, k=2, normalized=False, seed=42)
361 {(0, 1): 2.25, (1, 2): 2.25}
362
363 When normalizing, we instead want to divide by the total number of $(s, t)$-pairs.
364 This is $k(n-1)$, which is $4$ in our case.
365
366 >>> nx.edge_betweenness_centrality(G, k=2, normalized=True, seed=42)
367 {(0, 1): 0.75, (1, 2): 0.75}
368 """
369 betweenness = dict.fromkeys(G, 0.0) # b[v]=0 for v in G
370 # b[e]=0 for e in G.edges()
371 betweenness.update(dict.fromkeys(G.edges(), 0.0))
372 if k is None:
373 nodes = G
374 else:
375 nodes = seed.sample(list(G.nodes()), k)
376 for s in nodes:
377 # single source shortest paths
378 if weight is None: # use BFS
379 S, P, sigma, _ = _single_source_shortest_path_basic(G, s)
380 else: # use Dijkstra's algorithm
381 S, P, sigma, _ = _single_source_dijkstra_path_basic(G, s, weight)
382 # accumulation
383 betweenness = _accumulate_edges(betweenness, S, P, sigma, s)
384 # rescaling
385 for n in G: # remove nodes to only return edges
386 del betweenness[n]
387 betweenness = _rescale(
388 betweenness,
389 len(G),
390 normalized=normalized,
391 directed=G.is_directed(),
392 sampled_nodes=None if k is None else nodes,
393 )
394 if G.is_multigraph():
395 betweenness = _add_edge_keys(G, betweenness, weight=weight)
396 return betweenness
397
398
399# helpers for betweenness centrality
400
401
402def _single_source_shortest_path_basic(G, s):
403 S = []
404 P = {}
405 for v in G:
406 P[v] = []
407 sigma = dict.fromkeys(G, 0.0) # sigma[v]=0 for v in G
408 D = {}
409 sigma[s] = 1.0
410 D[s] = 0
411 Q = deque([s])
412 while Q: # use BFS to find shortest paths
413 v = Q.popleft()
414 S.append(v)
415 Dv = D[v]
416 sigmav = sigma[v]
417 for w in G[v]:
418 if w not in D:
419 Q.append(w)
420 D[w] = Dv + 1
421 if D[w] == Dv + 1: # this is a shortest path, count paths
422 sigma[w] += sigmav
423 P[w].append(v) # predecessors
424 return S, P, sigma, D
425
426
427def _single_source_dijkstra_path_basic(G, s, weight):
428 weight = _weight_function(G, weight)
429 # modified from Eppstein
430 S = []
431 P = {}
432 for v in G:
433 P[v] = []
434 sigma = dict.fromkeys(G, 0.0) # sigma[v]=0 for v in G
435 D = {}
436 sigma[s] = 1.0
437 seen = {s: 0}
438 c = count()
439 Q = [] # use Q as heap with (distance,node id) tuples
440 heappush(Q, (0, next(c), s, s))
441 while Q:
442 (dist, _, pred, v) = heappop(Q)
443 if v in D:
444 continue # already searched this node.
445 sigma[v] += sigma[pred] # count paths
446 S.append(v)
447 D[v] = dist
448 for w, edgedata in G[v].items():
449 vw_dist = dist + weight(v, w, edgedata)
450 if w not in D and (w not in seen or vw_dist < seen[w]):
451 seen[w] = vw_dist
452 heappush(Q, (vw_dist, next(c), v, w))
453 sigma[w] = 0.0
454 P[w] = [v]
455 elif vw_dist == seen[w]: # handle equal paths
456 sigma[w] += sigma[v]
457 P[w].append(v)
458 return S, P, sigma, D
459
460
461def _accumulate_basic(betweenness, S, P, sigma, s):
462 delta = dict.fromkeys(S, 0)
463 while S:
464 w = S.pop()
465 coeff = (1 + delta[w]) / sigma[w]
466 for v in P[w]:
467 delta[v] += sigma[v] * coeff
468 if w != s:
469 betweenness[w] += delta[w]
470 return betweenness, delta
471
472
473def _accumulate_endpoints(betweenness, S, P, sigma, s):
474 betweenness[s] += len(S) - 1
475 delta = dict.fromkeys(S, 0)
476 while S:
477 w = S.pop()
478 coeff = (1 + delta[w]) / sigma[w]
479 for v in P[w]:
480 delta[v] += sigma[v] * coeff
481 if w != s:
482 betweenness[w] += delta[w] + 1
483 return betweenness, delta
484
485
486def _accumulate_edges(betweenness, S, P, sigma, s):
487 delta = dict.fromkeys(S, 0)
488 while S:
489 w = S.pop()
490 coeff = (1 + delta[w]) / sigma[w]
491 for v in P[w]:
492 c = sigma[v] * coeff
493 if (v, w) not in betweenness:
494 betweenness[(w, v)] += c
495 else:
496 betweenness[(v, w)] += c
497 delta[v] += c
498 if w != s:
499 betweenness[w] += delta[w]
500 return betweenness
501
502
503def _rescale(
504 betweenness, n, *, normalized, directed, endpoints=True, sampled_nodes=None
505):
506 # For edge betweenness, `endpoints` is always `True`.
507
508 k = None if sampled_nodes is None else len(sampled_nodes)
509 # N is used to count the number of valid (s, t) pairs where s != t that
510 # could have a path pass through v. If endpoints is False, then v must
511 # not be the target t, hence why we subtract by 1.
512 N = n if endpoints else n - 1
513 if N < 2:
514 # No rescaling necessary: b=0 for all nodes
515 return betweenness
516
517 K_source = N if k is None else k
518
519 if k is None or endpoints:
520 # No sampling adjustment needed
521 if normalized:
522 # Divide by the number of valid (s, t) node pairs that could have
523 # a path through v where s != t.
524 scale = 1 / (K_source * (N - 1))
525 else:
526 # Scale to the full BC
527 if not directed:
528 # The non-normalized BC values are computed the same way for
529 # directed and undirected graphs: shortest paths are computed and
530 # counted for each *ordered* (s, t) pair. Undirected graphs should
531 # only count valid *unordered* node pairs {s, t}; that is, (s, t)
532 # and (t, s) should be counted only once. We correct for this here.
533 correction = 2
534 else:
535 correction = 1
536 scale = N / (K_source * correction)
537
538 if scale != 1:
539 for v in betweenness:
540 betweenness[v] *= scale
541 return betweenness
542
543 # Sampling adjustment needed when excluding endpoints when using k. In this
544 # case, we need to handle source nodes differently from non-source nodes,
545 # because source nodes can't include themselves since endpoints are excluded.
546 # Without this, k == n would be a special case that would violate the
547 # assumption that node `v` is not one of the (s, t) node pairs.
548 if normalized:
549 # NaN for undefined 0/0; there is no data for source node when k=1
550 scale_source = 1 / ((K_source - 1) * (N - 1)) if K_source > 1 else math.nan
551 scale_nonsource = 1 / (K_source * (N - 1))
552 else:
553 correction = 1 if directed else 2
554 scale_source = N / ((K_source - 1) * correction) if K_source > 1 else math.nan
555 scale_nonsource = N / (K_source * correction)
556
557 sampled_nodes = set(sampled_nodes)
558 for v in betweenness:
559 betweenness[v] *= scale_source if v in sampled_nodes else scale_nonsource
560 return betweenness
561
562
563@not_implemented_for("graph")
564def _add_edge_keys(G, betweenness, weight=None):
565 r"""Adds the corrected betweenness centrality (BC) values for multigraphs.
566
567 Parameters
568 ----------
569 G : NetworkX graph.
570
571 betweenness : dictionary
572 Dictionary mapping adjacent node tuples to betweenness centrality values.
573
574 weight : string or function
575 See `_weight_function` for details. Defaults to `None`.
576
577 Returns
578 -------
579 edges : dictionary
580 The parameter `betweenness` including edges with keys and their
581 betweenness centrality values.
582
583 The BC value is divided among edges of equal weight.
584 """
585 _weight = _weight_function(G, weight)
586
587 edge_bc = dict.fromkeys(G.edges, 0.0)
588 for u, v in betweenness:
589 d = G[u][v]
590 wt = _weight(u, v, d)
591 keys = [k for k in d if _weight(u, v, {k: d[k]}) == wt]
592 bc = betweenness[(u, v)] / len(keys)
593 for k in keys:
594 edge_bc[(u, v, k)] = bc
595
596 return edge_bc