1"""Betweenness centrality measures."""
2
3import math
4from collections import deque
5from heapq import heappop, heappush
6from itertools import count
7
8import networkx as nx
9from networkx.algorithms.shortest_paths.weighted import _weight_function
10from networkx.utils import py_random_state
11from networkx.utils.decorators import not_implemented_for
12
13__all__ = ["betweenness_centrality", "edge_betweenness_centrality"]
14
15
16@py_random_state(5)
17@nx._dispatchable(edge_attrs="weight")
18def betweenness_centrality(
19 G, k=None, normalized=True, weight=None, endpoints=False, seed=None
20):
21 r"""Compute the shortest-path betweenness centrality for nodes.
22
23 Betweenness centrality of a node $v$ is the sum of the
24 fraction of all-pairs shortest paths that pass through $v$
25
26 .. math::
27
28 c_B(v) =\sum_{s,t \in V} \frac{\sigma(s, t|v)}{\sigma(s, t)}
29
30 where $V$ is the set of nodes, $\sigma(s, t)$ is the number of
31 shortest $(s, t)$-paths, and $\sigma(s, t|v)$ is the number of
32 those paths passing through some node $v$ other than $s, t$.
33 If $s = t$, $\sigma(s, t) = 1$, and if $v \in {s, t}$,
34 $\sigma(s, t|v) = 0$ [2]_.
35
36 Parameters
37 ----------
38 G : graph
39 A NetworkX graph.
40
41 k : int, optional (default=None)
42 If k is not None use k node samples to estimate betweenness.
43 The value of k <= n where n is the number of nodes in the graph.
44 Higher values give better approximation.
45
46 normalized : bool, optional
47 If True the betweenness values are normalized by `2/((n-1)(n-2))`
48 for graphs, and `1/((n-1)(n-2))` for directed graphs where `n`
49 is the number of nodes in G.
50
51 weight : None or string, optional (default=None)
52 If None, all edge weights are considered equal.
53 Otherwise holds the name of the edge attribute used as weight.
54 Weights are used to calculate weighted shortest paths, so they are
55 interpreted as distances.
56
57 endpoints : bool, optional
58 If True include the endpoints in the shortest path counts.
59
60 seed : integer, random_state, or None (default)
61 Indicator of random number generation state.
62 See :ref:`Randomness<randomness>`.
63 Note that this is only used if k is not None.
64
65 Returns
66 -------
67 nodes : dictionary
68 Dictionary of nodes with betweenness centrality as the value.
69
70 See Also
71 --------
72 edge_betweenness_centrality
73 load_centrality
74
75 Notes
76 -----
77 The algorithm is from Ulrik Brandes [1]_.
78 See [4]_ for the original first published version and [2]_ for details on
79 algorithms for variations and related metrics.
80
81 For approximate betweenness calculations set k=#samples to use
82 k nodes ("pivots") to estimate the betweenness values. For an estimate
83 of the number of pivots needed see [3]_.
84
85 For weighted graphs the edge weights must be greater than zero.
86 Zero edge weights can produce an infinite number of equal length
87 paths between pairs of nodes.
88
89 The total number of paths between source and target is counted
90 differently for directed and undirected graphs. Directed paths
91 are easy to count. Undirected paths are tricky: should a path
92 from "u" to "v" count as 1 undirected path or as 2 directed paths?
93
94 For betweenness_centrality we report the number of undirected
95 paths when G is undirected.
96
97 For betweenness_centrality_subset the reporting is different.
98 If the source and target subsets are the same, then we want
99 to count undirected paths. But if the source and target subsets
100 differ -- for example, if sources is {0} and targets is {1},
101 then we are only counting the paths in one direction. They are
102 undirected paths but we are counting them in a directed way.
103 To count them as undirected paths, each should count as half a path.
104
105 This algorithm is not guaranteed to be correct if edge weights
106 are floating point numbers. As a workaround you can use integer
107 numbers by multiplying the relevant edge attributes by a convenient
108 constant factor (eg 100) and converting to integers.
109
110 References
111 ----------
112 .. [1] Ulrik Brandes:
113 A Faster Algorithm for Betweenness Centrality.
114 Journal of Mathematical Sociology 25(2):163-177, 2001.
115 https://doi.org/10.1080/0022250X.2001.9990249
116 .. [2] Ulrik Brandes:
117 On Variants of Shortest-Path Betweenness
118 Centrality and their Generic Computation.
119 Social Networks 30(2):136-145, 2008.
120 https://doi.org/10.1016/j.socnet.2007.11.001
121 .. [3] Ulrik Brandes and Christian Pich:
122 Centrality Estimation in Large Networks.
123 International Journal of Bifurcation and Chaos 17(7):2303-2318, 2007.
124 https://dx.doi.org/10.1142/S0218127407018403
125 .. [4] Linton C. Freeman:
126 A set of measures of centrality based on betweenness.
127 Sociometry 40: 35–41, 1977
128 https://doi.org/10.2307/3033543
129 """
130 betweenness = dict.fromkeys(G, 0.0) # b[v]=0 for v in G
131 if k == len(G):
132 # This is done for performance; the result is the same regardless.
133 k = None
134 if k is None:
135 nodes = G
136 else:
137 nodes = seed.sample(list(G.nodes()), k)
138 for s in nodes:
139 # single source shortest paths
140 if weight is None: # use BFS
141 S, P, sigma, _ = _single_source_shortest_path_basic(G, s)
142 else: # use Dijkstra's algorithm
143 S, P, sigma, _ = _single_source_dijkstra_path_basic(G, s, weight)
144 # accumulation
145 if endpoints:
146 betweenness, _ = _accumulate_endpoints(betweenness, S, P, sigma, s)
147 else:
148 betweenness, _ = _accumulate_basic(betweenness, S, P, sigma, s)
149 # rescaling
150 betweenness = _rescale(
151 betweenness,
152 len(G),
153 normalized=normalized,
154 directed=G.is_directed(),
155 endpoints=endpoints,
156 sampled_nodes=None if k is None else nodes,
157 )
158 return betweenness
159
160
161@py_random_state(4)
162@nx._dispatchable(edge_attrs="weight")
163def edge_betweenness_centrality(G, k=None, normalized=True, weight=None, seed=None):
164 r"""Compute betweenness centrality for edges.
165
166 Betweenness centrality of an edge $e$ is the sum of the
167 fraction of all-pairs shortest paths that pass through $e$
168
169 .. math::
170
171 c_B(e) =\sum_{s,t \in V} \frac{\sigma(s, t|e)}{\sigma(s, t)}
172
173 where $V$ is the set of nodes, $\sigma(s, t)$ is the number of
174 shortest $(s, t)$-paths, and $\sigma(s, t|e)$ is the number of
175 those paths passing through edge $e$ [2]_.
176
177 Parameters
178 ----------
179 G : graph
180 A NetworkX graph.
181
182 k : int, optional (default=None)
183 If k is not None use k node samples to estimate betweenness.
184 The value of k <= n where n is the number of nodes in the graph.
185 Higher values give better approximation.
186
187 normalized : bool, optional
188 If True the betweenness values are normalized by $2/(n(n-1))$
189 for graphs, and $1/(n(n-1))$ for directed graphs where $n$
190 is the number of nodes in G.
191
192 weight : None or string, optional (default=None)
193 If None, all edge weights are considered equal.
194 Otherwise holds the name of the edge attribute used as weight.
195 Weights are used to calculate weighted shortest paths, so they are
196 interpreted as distances.
197
198 seed : integer, random_state, or None (default)
199 Indicator of random number generation state.
200 See :ref:`Randomness<randomness>`.
201 Note that this is only used if k is not None.
202
203 Returns
204 -------
205 edges : dictionary
206 Dictionary of edges with betweenness centrality as the value.
207
208 See Also
209 --------
210 betweenness_centrality
211 edge_load
212
213 Notes
214 -----
215 The algorithm is from Ulrik Brandes [1]_.
216
217 For weighted graphs the edge weights must be greater than zero.
218 Zero edge weights can produce an infinite number of equal length
219 paths between pairs of nodes.
220
221 References
222 ----------
223 .. [1] Ulrik Brandes: On Variants of Shortest-Path Betweenness
224 Centrality and their Generic Computation.
225 Social Networks 30(2):136-145, 2008.
226 https://doi.org/10.1016/j.socnet.2007.11.001
227 """
228 betweenness = dict.fromkeys(G, 0.0) # b[v]=0 for v in G
229 # b[e]=0 for e in G.edges()
230 betweenness.update(dict.fromkeys(G.edges(), 0.0))
231 if k is None:
232 nodes = G
233 else:
234 nodes = seed.sample(list(G.nodes()), k)
235 for s in nodes:
236 # single source shortest paths
237 if weight is None: # use BFS
238 S, P, sigma, _ = _single_source_shortest_path_basic(G, s)
239 else: # use Dijkstra's algorithm
240 S, P, sigma, _ = _single_source_dijkstra_path_basic(G, s, weight)
241 # accumulation
242 betweenness = _accumulate_edges(betweenness, S, P, sigma, s)
243 # rescaling
244 for n in G: # remove nodes to only return edges
245 del betweenness[n]
246 betweenness = _rescale(
247 betweenness,
248 len(G),
249 normalized=normalized,
250 directed=G.is_directed(),
251 sampled_nodes=None if k is None else nodes,
252 )
253 if G.is_multigraph():
254 betweenness = _add_edge_keys(G, betweenness, weight=weight)
255 return betweenness
256
257
258# helpers for betweenness centrality
259
260
261def _single_source_shortest_path_basic(G, s):
262 S = []
263 P = {}
264 for v in G:
265 P[v] = []
266 sigma = dict.fromkeys(G, 0.0) # sigma[v]=0 for v in G
267 D = {}
268 sigma[s] = 1.0
269 D[s] = 0
270 Q = deque([s])
271 while Q: # use BFS to find shortest paths
272 v = Q.popleft()
273 S.append(v)
274 Dv = D[v]
275 sigmav = sigma[v]
276 for w in G[v]:
277 if w not in D:
278 Q.append(w)
279 D[w] = Dv + 1
280 if D[w] == Dv + 1: # this is a shortest path, count paths
281 sigma[w] += sigmav
282 P[w].append(v) # predecessors
283 return S, P, sigma, D
284
285
286def _single_source_dijkstra_path_basic(G, s, weight):
287 weight = _weight_function(G, weight)
288 # modified from Eppstein
289 S = []
290 P = {}
291 for v in G:
292 P[v] = []
293 sigma = dict.fromkeys(G, 0.0) # sigma[v]=0 for v in G
294 D = {}
295 sigma[s] = 1.0
296 seen = {s: 0}
297 c = count()
298 Q = [] # use Q as heap with (distance,node id) tuples
299 heappush(Q, (0, next(c), s, s))
300 while Q:
301 (dist, _, pred, v) = heappop(Q)
302 if v in D:
303 continue # already searched this node.
304 sigma[v] += sigma[pred] # count paths
305 S.append(v)
306 D[v] = dist
307 for w, edgedata in G[v].items():
308 vw_dist = dist + weight(v, w, edgedata)
309 if w not in D and (w not in seen or vw_dist < seen[w]):
310 seen[w] = vw_dist
311 heappush(Q, (vw_dist, next(c), v, w))
312 sigma[w] = 0.0
313 P[w] = [v]
314 elif vw_dist == seen[w]: # handle equal paths
315 sigma[w] += sigma[v]
316 P[w].append(v)
317 return S, P, sigma, D
318
319
320def _accumulate_basic(betweenness, S, P, sigma, s):
321 delta = dict.fromkeys(S, 0)
322 while S:
323 w = S.pop()
324 coeff = (1 + delta[w]) / sigma[w]
325 for v in P[w]:
326 delta[v] += sigma[v] * coeff
327 if w != s:
328 betweenness[w] += delta[w]
329 return betweenness, delta
330
331
332def _accumulate_endpoints(betweenness, S, P, sigma, s):
333 betweenness[s] += len(S) - 1
334 delta = dict.fromkeys(S, 0)
335 while S:
336 w = S.pop()
337 coeff = (1 + delta[w]) / sigma[w]
338 for v in P[w]:
339 delta[v] += sigma[v] * coeff
340 if w != s:
341 betweenness[w] += delta[w] + 1
342 return betweenness, delta
343
344
345def _accumulate_edges(betweenness, S, P, sigma, s):
346 delta = dict.fromkeys(S, 0)
347 while S:
348 w = S.pop()
349 coeff = (1 + delta[w]) / sigma[w]
350 for v in P[w]:
351 c = sigma[v] * coeff
352 if (v, w) not in betweenness:
353 betweenness[(w, v)] += c
354 else:
355 betweenness[(v, w)] += c
356 delta[v] += c
357 if w != s:
358 betweenness[w] += delta[w]
359 return betweenness
360
361
362def _rescale(
363 betweenness, n, *, normalized, directed, endpoints=True, sampled_nodes=None
364):
365 # For edge betweenness, `endpoints` is always `True`.
366
367 k = None if sampled_nodes is None else len(sampled_nodes)
368 # N is used to count the number of valid (s, t) pairs where s != t that
369 # could have a path pass through v. If endpoints is False, then v must
370 # not be the target t, hence why we subtract by 1.
371 N = n if endpoints else n - 1
372 if N < 2:
373 # No rescaling necessary: b=0 for all nodes
374 return betweenness
375
376 K_source = N if k is None else k
377
378 if k is None or endpoints:
379 # No sampling adjustment needed
380 if normalized:
381 # Divide by the number of valid (s, t) node pairs that could have
382 # a path through v where s != t.
383 scale = 1 / (K_source * (N - 1))
384 else:
385 # Scale to the full BC
386 if not directed:
387 # The non-normalized BC values are computed the same way for
388 # directed and undirected graphs: shortest paths are computed and
389 # counted for each *ordered* (s, t) pair. Undirected graphs should
390 # only count valid *unordered* node pairs {s, t}; that is, (s, t)
391 # and (t, s) should be counted only once. We correct for this here.
392 correction = 2
393 else:
394 correction = 1
395 scale = N / (K_source * correction)
396
397 if scale != 1:
398 for v in betweenness:
399 betweenness[v] *= scale
400 return betweenness
401
402 # Sampling adjustment needed when excluding endpoints when using k. In this
403 # case, we need to handle source nodes differently from non-source nodes,
404 # because source nodes can't include themselves since endpoints are excluded.
405 # Without this, k == n would be a special case that would violate the
406 # assumption that node `v` is not one of the (s, t) node pairs.
407 if normalized:
408 # NaN for undefined 0/0; there is no data for source node when k=1
409 scale_source = 1 / ((K_source - 1) * (N - 1)) if K_source > 1 else math.nan
410 scale_nonsource = 1 / (K_source * (N - 1))
411 else:
412 correction = 1 if directed else 2
413 scale_source = N / ((K_source - 1) * correction) if K_source > 1 else math.nan
414 scale_nonsource = N / (K_source * correction)
415
416 sampled_nodes = set(sampled_nodes)
417 for v in betweenness:
418 betweenness[v] *= scale_source if v in sampled_nodes else scale_nonsource
419 return betweenness
420
421
422@not_implemented_for("graph")
423def _add_edge_keys(G, betweenness, weight=None):
424 r"""Adds the corrected betweenness centrality (BC) values for multigraphs.
425
426 Parameters
427 ----------
428 G : NetworkX graph.
429
430 betweenness : dictionary
431 Dictionary mapping adjacent node tuples to betweenness centrality values.
432
433 weight : string or function
434 See `_weight_function` for details. Defaults to `None`.
435
436 Returns
437 -------
438 edges : dictionary
439 The parameter `betweenness` including edges with keys and their
440 betweenness centrality values.
441
442 The BC value is divided among edges of equal weight.
443 """
444 _weight = _weight_function(G, weight)
445
446 edge_bc = dict.fromkeys(G.edges, 0.0)
447 for u, v in betweenness:
448 d = G[u][v]
449 wt = _weight(u, v, d)
450 keys = [k for k in d if _weight(u, v, {k: d[k]}) == wt]
451 bc = betweenness[(u, v)] / len(keys)
452 for k in keys:
453 edge_bc[(u, v, k)] = bc
454
455 return edge_bc