1"""Betweenness centrality measures."""
2
3import math
4from collections import deque
5from heapq import heappop, heappush
6from itertools import count
7
8import networkx as nx
9from networkx.algorithms.shortest_paths.weighted import _weight_function
10from networkx.utils import py_random_state
11from networkx.utils.decorators import not_implemented_for
12
13__all__ = ["betweenness_centrality", "edge_betweenness_centrality"]
14
15
16@py_random_state("seed")
17@nx._dispatchable(edge_attrs="weight")
18def betweenness_centrality(
19 G, k=None, normalized=True, weight=None, endpoints=False, seed=None
20):
21 r"""Compute the shortest-path betweenness centrality for nodes.
22
23 Betweenness centrality of a node $v$ is the sum of the
24 fraction of all-pairs shortest paths that pass through $v$.
25
26 .. math::
27
28 c_B(v) = \sum_{s, t \in V} \frac{\sigma(s, t | v)}{\sigma(s, t)}
29
30 where $V$ is the set of nodes, $\sigma(s, t)$ is the number of
31 shortest $(s, t)$-paths, and $\sigma(s, t | v)$ is the number of
32 those paths passing through some node $v$ other than $s$ and $t$.
33 If $s = t$, $\sigma(s, t) = 1$, and if $v \in \{s, t\}$,
34 $\sigma(s, t | v) = 0$ [2]_.
35 The denominator $\sigma(s, t)$ is a normalization factor that can be
36 turned off to get the raw path counts.
37
38 Parameters
39 ----------
40 G : graph
41 A NetworkX graph.
42
43 k : int, optional (default=None)
44 If `k` is not `None`, use `k` sampled nodes as sources for the considered paths.
45 The resulting sampled counts are then inflated to approximate betweenness.
46 Higher values of `k` give better approximation. Must have ``k <= len(G)``.
47
48 normalized : bool, optional (default=True)
49 If `True`, the betweenness values are rescaled by dividing by the number of
50 possible $(s, t)$-pairs in the graph.
51
52 weight : None or string, optional (default=None)
53 If `None`, all edge weights are 1.
54 Otherwise holds the name of the edge attribute used as weight.
55 Weights are used to calculate weighted shortest paths, so they are
56 interpreted as distances.
57
58 endpoints : bool, optional (default=False)
59 If `True`, include the endpoints $s$ and $t$ in the shortest path counts.
60 This is taken into account when rescaling the values.
61
62 seed : integer, random_state, or None (default)
63 Indicator of random number generation state.
64 See :ref:`Randomness<randomness>`.
65 Note that this is only used if ``k is not None``.
66
67 Returns
68 -------
69 nodes : dict
70 Dictionary of nodes with betweenness centrality as the value.
71
72 See Also
73 --------
74 betweenness_centrality_subset
75 edge_betweenness_centrality
76 load_centrality
77
78 Notes
79 -----
80 The algorithm is from Ulrik Brandes [1]_.
81 See [4]_ for the original first published version and [2]_ for details on
82 algorithms for variations and related metrics.
83
84 For approximate betweenness calculations, set `k` to the number of sampled
85 nodes ("pivots") used as sources to estimate the betweenness values.
86 The formula then sums over $s$ is in these pivots, instead of over all nodes.
87 The resulting sum is then inflated to approximate the full sum.
88 For a discussion of how to choose `k` for efficiency, see [3]_.
89
90 For weighted graphs the edge weights must be greater than zero.
91 Zero edge weights can produce an infinite number of equal length
92 paths between pairs of nodes.
93
94 Directed graphs and undirected graphs count paths differently.
95 In directed graphs, each pair of source-target nodes is considered separately
96 in each direction, as the shortest paths can differ by direction.
97 However, in undirected graphs, each pair of nodes is considered only once,
98 as the shortest paths are symmetric.
99 This means the normalization factor to divide by is $N(N-1)$ for directed graphs
100 and $N(N-1)/2$ for undirected graphs, where $N = n$ (the number of nodes)
101 if endpoints are included and $N = n-1$ otherwise.
102
103 This algorithm is not guaranteed to be correct if edge weights
104 are floating point numbers. As a workaround you can use integer
105 numbers by multiplying the relevant edge attributes by a convenient
106 constant factor (e.g. 100) and converting to integers.
107
108 References
109 ----------
110 .. [1] Ulrik Brandes:
111 A Faster Algorithm for Betweenness Centrality.
112 Journal of Mathematical Sociology 25(2):163--177, 2001.
113 https://doi.org/10.1080/0022250X.2001.9990249
114 .. [2] Ulrik Brandes:
115 On Variants of Shortest-Path Betweenness
116 Centrality and their Generic Computation.
117 Social Networks 30(2):136--145, 2008.
118 https://doi.org/10.1016/j.socnet.2007.11.001
119 .. [3] Ulrik Brandes and Christian Pich:
120 Centrality Estimation in Large Networks.
121 International Journal of Bifurcation and Chaos 17(7):2303--2318, 2007.
122 https://dx.doi.org/10.1142/S0218127407018403
123 .. [4] Linton C. Freeman:
124 A set of measures of centrality based on betweenness.
125 Sociometry 40: 35--41, 1977
126 https://doi.org/10.2307/3033543
127
128 Examples
129 --------
130 Consider an undirected 3-path. Each pair of nodes has exactly one shortest
131 path between them. Since the graph is undirected, only ordered pairs are counted.
132 Of these (and when `endpoints` is `False`), none of the shortest paths pass
133 through 0 and 2, and only the shortest path between 0 and 2 passes through 1.
134 As such, the counts should be ``{0: 0, 1: 1, 2: 0}``.
135
136 >>> G = nx.path_graph(3)
137 >>> nx.betweenness_centrality(G, normalized=False, endpoints=False)
138 {0: 0.0, 1: 1.0, 2: 0.0}
139
140 If `endpoints` is `True`, we also need to count endpoints as being on the path:
141 $\sigma(s, t | s) = \sigma(s, t | t) = \sigma(s, t)$.
142 In our example, 0 is then part of two shortest paths (0 to 1 and 0 to 2);
143 similarly, 2 is part of two shortest paths (0 to 2 and 1 to 2).
144 1 is part of all three shortest paths. This makes the new raw
145 counts ``{0: 2, 1: 3, 2: 2}``.
146
147 >>> nx.betweenness_centrality(G, normalized=False, endpoints=True)
148 {0: 2.0, 1: 3.0, 2: 2.0}
149
150 With normalization, the values are divided by the number of ordered $(s, t)$-pairs.
151 If we are not counting endpoints, there are $n - 1$ possible choices for $s$
152 (all except the node we are computing betweenness centrality for), which in turn
153 leaves $n - 2$ possible choices for $t$ as $s \ne t$.
154 The total number of ordered pairs when `endpoints` is `False` is $(n - 1)(n - 2)/2 = 1$.
155 If `endpoints` is `True`, there are $n(n - 1)/2 = 3$ ordered $(s, t)$-pairs to divide by.
156
157 >>> nx.betweenness_centrality(G, normalized=True, endpoints=False)
158 {0: 0.0, 1: 1.0, 2: 0.0}
159 >>> nx.betweenness_centrality(G, normalized=True, endpoints=True)
160 {0: 0.6666666666666666, 1: 1.0, 2: 0.6666666666666666}
161
162 If the graph is directed instead, we now need to consider $(s, t)$-pairs
163 in both directions. Our example becomes a directed 3-path.
164 Without counting endpoints, we only have one path through 1 (0 to 2).
165 This means the raw counts are ``{0: 0, 1: 1, 2: 0}``.
166
167 >>> DG = nx.path_graph(3, create_using=nx.DiGraph)
168 >>> nx.betweenness_centrality(DG, normalized=False, endpoints=False)
169 {0: 0.0, 1: 1.0, 2: 0.0}
170
171 If we do include endpoints, the raw counts are ``{0: 2, 1: 3, 2: 2}``.
172
173 >>> nx.betweenness_centrality(DG, normalized=False, endpoints=True)
174 {0: 2.0, 1: 3.0, 2: 2.0}
175
176 If we want to normalize directed betweenness centrality, the raw counts
177 are normalized by the number of $(s, t)$-pairs. There are $n(n - 1)$
178 possible paths with endpoints and $(n - 1)(n - 2)$ without endpoints.
179 In our example, that's 6 with endpoints and 2 without endpoints.
180
181 >>> nx.betweenness_centrality(DG, normalized=True, endpoints=True)
182 {0: 0.3333333333333333, 1: 0.5, 2: 0.3333333333333333}
183 >>> nx.betweenness_centrality(DG, normalized=True, endpoints=False)
184 {0: 0.0, 1: 0.5, 2: 0.0}
185
186 Computing the full betweenness centrality can be costly.
187 This function can also be used to compute approximate betweenness centrality
188 by setting `k`. This only determines the number of source nodes to sample;
189 all nodes are targets.
190
191 For simplicity, we only consider the case where endpoints are included in the counts.
192 Since the partial sums only include `k` terms, instead of ``n``,
193 we multiply them by ``n / k``, to approximate the full sum.
194 As the sets of sources and targets are not the same anymore,
195 paths have to be counted in a directed way. We thus count each as half a path.
196 This ensures that the results approximate the standard betweenness for ``k == n``.
197
198 For instance, in the undirected 3-path graph case, setting ``k = 2`` (with ``seed=42``)
199 selects nodes 0 and 2 as sources.
200 This means only shortest paths starting at these nodes are considered.
201 The raw counts with endpoints are ``{0: 3, 1: 4, 2: 3}``. Accounting for the partial sum
202 and applying the undirectedness half-path correction, we get
203
204 >>> nx.betweenness_centrality(G, k=2, normalized=False, endpoints=True, seed=42)
205 {0: 2.25, 1: 3.0, 2: 2.25}
206
207 When normalizing, we instead want to divide by the total number of $(s, t)$-pairs.
208 This is $k(n - 1)$ with endpoints.
209
210 >>> nx.betweenness_centrality(G, k=2, normalized=True, endpoints=True, seed=42)
211 {0: 0.75, 1: 1.0, 2: 0.75}
212 """
213 betweenness = dict.fromkeys(G, 0.0) # b[v]=0 for v in G
214 if k == len(G):
215 # This is done for performance; the result is the same regardless.
216 k = None
217 if k is None:
218 nodes = G
219 else:
220 nodes = seed.sample(list(G.nodes()), k)
221 for s in nodes:
222 # single source shortest paths
223 if weight is None: # use BFS
224 S, P, sigma, _ = _single_source_shortest_path_basic(G, s)
225 else: # use Dijkstra's algorithm
226 S, P, sigma, _ = _single_source_dijkstra_path_basic(G, s, weight)
227 # accumulation
228 if endpoints:
229 betweenness, _ = _accumulate_endpoints(betweenness, S, P, sigma, s)
230 else:
231 betweenness, _ = _accumulate_basic(betweenness, S, P, sigma, s)
232 # rescaling
233 betweenness = _rescale(
234 betweenness,
235 len(G),
236 normalized=normalized,
237 directed=G.is_directed(),
238 endpoints=endpoints,
239 sampled_nodes=None if k is None else nodes,
240 )
241 return betweenness
242
243
244@py_random_state("seed")
245@nx._dispatchable(edge_attrs="weight")
246def edge_betweenness_centrality(G, k=None, normalized=True, weight=None, seed=None):
247 r"""Compute betweenness centrality for edges.
248
249 Betweenness centrality of an edge $e$ is the sum of the
250 fraction of all-pairs shortest paths that pass through $e$.
251
252 .. math::
253
254 c_B(e) = \sum_{s, t \in V} \frac{\sigma(s, t | e)}{\sigma(s, t)}
255
256 where $V$ is the set of nodes, $\sigma(s, t)$ is the number of
257 shortest $(s, t)$-paths, and $\sigma(s, t | e)$ is the number of
258 those paths passing through edge $e$ [1]_.
259 The denominator $\sigma(s, t)$ is a normalization factor that can be
260 turned off to get the raw path counts.
261
262 Parameters
263 ----------
264 G : graph
265 A NetworkX graph.
266
267 k : int, optional (default=None)
268 If `k` is not `None`, use `k` sampled nodes as sources for the considered paths.
269 The resulting sampled counts are then inflated to approximate betweenness.
270 Higher values of `k` give better approximation. Must have ``k <= len(G)``.
271
272 normalized : bool, optional (default=True)
273 If `True`, the betweenness values are rescaled by dividing by the number of
274 possible $(s, t)$-pairs in the graph.
275
276 weight : None or string, optional (default=None)
277 If `None`, all edge weights are 1.
278 Otherwise holds the name of the edge attribute used as weight.
279 Weights are used to calculate weighted shortest paths, so they are
280 interpreted as distances.
281
282 seed : integer, random_state, or None (default)
283 Indicator of random number generation state.
284 See :ref:`Randomness<randomness>`.
285 Note that this is only used if ``k is not None``.
286
287 Returns
288 -------
289 edges : dict
290 Dictionary of edges with betweenness centrality as the value.
291
292 See Also
293 --------
294 betweenness_centrality
295 edge_betweenness_centrality_subset
296 edge_load
297
298 Notes
299 -----
300 The algorithm is from Ulrik Brandes [1]_.
301
302 For weighted graphs the edge weights must be greater than zero.
303 Zero edge weights can produce an infinite number of equal length
304 paths between pairs of nodes.
305
306 References
307 ----------
308 .. [1] Ulrik Brandes: On Variants of Shortest-Path Betweenness
309 Centrality and their Generic Computation.
310 Social Networks 30(2):136--145, 2008.
311 https://doi.org/10.1016/j.socnet.2007.11.001
312
313 Examples
314 --------
315 Consider an undirected 3-path. Each pair of nodes has exactly one shortest
316 path between them. Since the graph is undirected, only ordered pairs are counted.
317 Each edge has two shortest paths passing through it.
318 As such, the raw counts should be ``{(0, 1): 2, (1, 2): 2}``.
319
320 >>> G = nx.path_graph(3)
321 >>> nx.edge_betweenness_centrality(G, normalized=False)
322 {(0, 1): 2.0, (1, 2): 2.0}
323
324 With normalization, the values are divided by the number of ordered $(s, t)$-pairs,
325 which is $n(n-1)/2$. For the 3-path, this is $3(3-1)/2 = 3$.
326
327 >>> nx.edge_betweenness_centrality(G, normalized=True)
328 {(0, 1): 0.6666666666666666, (1, 2): 0.6666666666666666}
329
330 For a directed graph, all $(s, t)$-pairs are considered. The normalization factor
331 is $n(n-1)$ to reflect this.
332
333 >>> DG = nx.path_graph(3, create_using=nx.DiGraph)
334 >>> nx.edge_betweenness_centrality(DG, normalized=False)
335 {(0, 1): 2.0, (1, 2): 2.0}
336 >>> nx.edge_betweenness_centrality(DG, normalized=True)
337 {(0, 1): 0.3333333333333333, (1, 2): 0.3333333333333333}
338
339 Computing the full edge betweenness centrality can be costly.
340 This function can also be used to compute approximate edge betweenness centrality
341 by setting `k`. This determines the number of source nodes to sample.
342
343 Since the partial sums only include `k` terms, instead of ``n``,
344 we multiply them by ``n / k``, to approximate the full sum.
345 As the sets of sources and targets are not the same anymore,
346 paths have to be counted in a directed way. We thus count each as half a path.
347 This ensures that the results approximate the standard betweenness for ``k == n``.
348
349 For instance, in the undirected 3-path graph case, setting ``k = 2`` (with ``seed=42``)
350 selects nodes 0 and 2 as sources.
351 This means only shortest paths starting at these nodes are considered.
352 The raw counts are ``{(0, 1): 3, (1, 2): 3}``. Accounting for the partial sum
353 and applying the undirectedness half-path correction, we get
354
355 >>> nx.edge_betweenness_centrality(G, k=2, normalized=False, seed=42)
356 {(0, 1): 2.25, (1, 2): 2.25}
357
358 When normalizing, we instead want to divide by the total number of $(s, t)$-pairs.
359 This is $k(n-1)$, which is $4$ in our case.
360
361 >>> nx.edge_betweenness_centrality(G, k=2, normalized=True, seed=42)
362 {(0, 1): 0.75, (1, 2): 0.75}
363 """
364 betweenness = dict.fromkeys(G, 0.0) # b[v]=0 for v in G
365 # b[e]=0 for e in G.edges()
366 betweenness.update(dict.fromkeys(G.edges(), 0.0))
367 if k is None:
368 nodes = G
369 else:
370 nodes = seed.sample(list(G.nodes()), k)
371 for s in nodes:
372 # single source shortest paths
373 if weight is None: # use BFS
374 S, P, sigma, _ = _single_source_shortest_path_basic(G, s)
375 else: # use Dijkstra's algorithm
376 S, P, sigma, _ = _single_source_dijkstra_path_basic(G, s, weight)
377 # accumulation
378 betweenness = _accumulate_edges(betweenness, S, P, sigma, s)
379 # rescaling
380 for n in G: # remove nodes to only return edges
381 del betweenness[n]
382 betweenness = _rescale(
383 betweenness,
384 len(G),
385 normalized=normalized,
386 directed=G.is_directed(),
387 sampled_nodes=None if k is None else nodes,
388 )
389 if G.is_multigraph():
390 betweenness = _add_edge_keys(G, betweenness, weight=weight)
391 return betweenness
392
393
394# helpers for betweenness centrality
395
396
397def _single_source_shortest_path_basic(G, s):
398 S = []
399 P = {}
400 for v in G:
401 P[v] = []
402 sigma = dict.fromkeys(G, 0.0) # sigma[v]=0 for v in G
403 D = {}
404 sigma[s] = 1.0
405 D[s] = 0
406 Q = deque([s])
407 while Q: # use BFS to find shortest paths
408 v = Q.popleft()
409 S.append(v)
410 Dv = D[v]
411 sigmav = sigma[v]
412 for w in G[v]:
413 if w not in D:
414 Q.append(w)
415 D[w] = Dv + 1
416 if D[w] == Dv + 1: # this is a shortest path, count paths
417 sigma[w] += sigmav
418 P[w].append(v) # predecessors
419 return S, P, sigma, D
420
421
422def _single_source_dijkstra_path_basic(G, s, weight):
423 weight = _weight_function(G, weight)
424 # modified from Eppstein
425 S = []
426 P = {}
427 for v in G:
428 P[v] = []
429 sigma = dict.fromkeys(G, 0.0) # sigma[v]=0 for v in G
430 D = {}
431 sigma[s] = 1.0
432 seen = {s: 0}
433 c = count()
434 Q = [] # use Q as heap with (distance,node id) tuples
435 heappush(Q, (0, next(c), s, s))
436 while Q:
437 (dist, _, pred, v) = heappop(Q)
438 if v in D:
439 continue # already searched this node.
440 sigma[v] += sigma[pred] # count paths
441 S.append(v)
442 D[v] = dist
443 for w, edgedata in G[v].items():
444 vw_dist = dist + weight(v, w, edgedata)
445 if w not in D and (w not in seen or vw_dist < seen[w]):
446 seen[w] = vw_dist
447 heappush(Q, (vw_dist, next(c), v, w))
448 sigma[w] = 0.0
449 P[w] = [v]
450 elif vw_dist == seen[w]: # handle equal paths
451 sigma[w] += sigma[v]
452 P[w].append(v)
453 return S, P, sigma, D
454
455
456def _accumulate_basic(betweenness, S, P, sigma, s):
457 delta = dict.fromkeys(S, 0)
458 while S:
459 w = S.pop()
460 coeff = (1 + delta[w]) / sigma[w]
461 for v in P[w]:
462 delta[v] += sigma[v] * coeff
463 if w != s:
464 betweenness[w] += delta[w]
465 return betweenness, delta
466
467
468def _accumulate_endpoints(betweenness, S, P, sigma, s):
469 betweenness[s] += len(S) - 1
470 delta = dict.fromkeys(S, 0)
471 while S:
472 w = S.pop()
473 coeff = (1 + delta[w]) / sigma[w]
474 for v in P[w]:
475 delta[v] += sigma[v] * coeff
476 if w != s:
477 betweenness[w] += delta[w] + 1
478 return betweenness, delta
479
480
481def _accumulate_edges(betweenness, S, P, sigma, s):
482 delta = dict.fromkeys(S, 0)
483 while S:
484 w = S.pop()
485 coeff = (1 + delta[w]) / sigma[w]
486 for v in P[w]:
487 c = sigma[v] * coeff
488 if (v, w) not in betweenness:
489 betweenness[(w, v)] += c
490 else:
491 betweenness[(v, w)] += c
492 delta[v] += c
493 if w != s:
494 betweenness[w] += delta[w]
495 return betweenness
496
497
498def _rescale(
499 betweenness, n, *, normalized, directed, endpoints=True, sampled_nodes=None
500):
501 # For edge betweenness, `endpoints` is always `True`.
502
503 k = None if sampled_nodes is None else len(sampled_nodes)
504 # N is used to count the number of valid (s, t) pairs where s != t that
505 # could have a path pass through v. If endpoints is False, then v must
506 # not be the target t, hence why we subtract by 1.
507 N = n if endpoints else n - 1
508 if N < 2:
509 # No rescaling necessary: b=0 for all nodes
510 return betweenness
511
512 K_source = N if k is None else k
513
514 if k is None or endpoints:
515 # No sampling adjustment needed
516 if normalized:
517 # Divide by the number of valid (s, t) node pairs that could have
518 # a path through v where s != t.
519 scale = 1 / (K_source * (N - 1))
520 else:
521 # Scale to the full BC
522 if not directed:
523 # The non-normalized BC values are computed the same way for
524 # directed and undirected graphs: shortest paths are computed and
525 # counted for each *ordered* (s, t) pair. Undirected graphs should
526 # only count valid *unordered* node pairs {s, t}; that is, (s, t)
527 # and (t, s) should be counted only once. We correct for this here.
528 correction = 2
529 else:
530 correction = 1
531 scale = N / (K_source * correction)
532
533 if scale != 1:
534 for v in betweenness:
535 betweenness[v] *= scale
536 return betweenness
537
538 # Sampling adjustment needed when excluding endpoints when using k. In this
539 # case, we need to handle source nodes differently from non-source nodes,
540 # because source nodes can't include themselves since endpoints are excluded.
541 # Without this, k == n would be a special case that would violate the
542 # assumption that node `v` is not one of the (s, t) node pairs.
543 if normalized:
544 # NaN for undefined 0/0; there is no data for source node when k=1
545 scale_source = 1 / ((K_source - 1) * (N - 1)) if K_source > 1 else math.nan
546 scale_nonsource = 1 / (K_source * (N - 1))
547 else:
548 correction = 1 if directed else 2
549 scale_source = N / ((K_source - 1) * correction) if K_source > 1 else math.nan
550 scale_nonsource = N / (K_source * correction)
551
552 sampled_nodes = set(sampled_nodes)
553 for v in betweenness:
554 betweenness[v] *= scale_source if v in sampled_nodes else scale_nonsource
555 return betweenness
556
557
558@not_implemented_for("graph")
559def _add_edge_keys(G, betweenness, weight=None):
560 r"""Adds the corrected betweenness centrality (BC) values for multigraphs.
561
562 Parameters
563 ----------
564 G : NetworkX graph.
565
566 betweenness : dictionary
567 Dictionary mapping adjacent node tuples to betweenness centrality values.
568
569 weight : string or function
570 See `_weight_function` for details. Defaults to `None`.
571
572 Returns
573 -------
574 edges : dictionary
575 The parameter `betweenness` including edges with keys and their
576 betweenness centrality values.
577
578 The BC value is divided among edges of equal weight.
579 """
580 _weight = _weight_function(G, weight)
581
582 edge_bc = dict.fromkeys(G.edges, 0.0)
583 for u, v in betweenness:
584 d = G[u][v]
585 wt = _weight(u, v, d)
586 keys = [k for k in d if _weight(u, v, {k: d[k]}) == wt]
587 bc = betweenness[(u, v)] / len(keys)
588 for k in keys:
589 edge_bc[(u, v, k)] = bc
590
591 return edge_bc