Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.9/dist-packages/networkx/algorithms/centrality/betweenness.py: 13%
178 statements
« prev ^ index » next coverage.py v7.3.2, created at 2023-10-20 07:00 +0000
« prev ^ index » next coverage.py v7.3.2, created at 2023-10-20 07:00 +0000
1"""Betweenness centrality measures."""
2from collections import deque
3from heapq import heappop, heappush
4from itertools import count
6import networkx as nx
7from networkx.algorithms.shortest_paths.weighted import _weight_function
8from networkx.utils import py_random_state
9from networkx.utils.decorators import not_implemented_for
11__all__ = ["betweenness_centrality", "edge_betweenness_centrality"]
14@py_random_state(5)
15@nx._dispatch(edge_attrs="weight")
16def betweenness_centrality(
17 G, k=None, normalized=True, weight=None, endpoints=False, seed=None
18):
19 r"""Compute the shortest-path betweenness centrality for nodes.
21 Betweenness centrality of a node $v$ is the sum of the
22 fraction of all-pairs shortest paths that pass through $v$
24 .. math::
26 c_B(v) =\sum_{s,t \in V} \frac{\sigma(s, t|v)}{\sigma(s, t)}
28 where $V$ is the set of nodes, $\sigma(s, t)$ is the number of
29 shortest $(s, t)$-paths, and $\sigma(s, t|v)$ is the number of
30 those paths passing through some node $v$ other than $s, t$.
31 If $s = t$, $\sigma(s, t) = 1$, and if $v \in {s, t}$,
32 $\sigma(s, t|v) = 0$ [2]_.
34 Parameters
35 ----------
36 G : graph
37 A NetworkX graph.
39 k : int, optional (default=None)
40 If k is not None use k node samples to estimate betweenness.
41 The value of k <= n where n is the number of nodes in the graph.
42 Higher values give better approximation.
44 normalized : bool, optional
45 If True the betweenness values are normalized by `2/((n-1)(n-2))`
46 for graphs, and `1/((n-1)(n-2))` for directed graphs where `n`
47 is the number of nodes in G.
49 weight : None or string, optional (default=None)
50 If None, all edge weights are considered equal.
51 Otherwise holds the name of the edge attribute used as weight.
52 Weights are used to calculate weighted shortest paths, so they are
53 interpreted as distances.
55 endpoints : bool, optional
56 If True include the endpoints in the shortest path counts.
58 seed : integer, random_state, or None (default)
59 Indicator of random number generation state.
60 See :ref:`Randomness<randomness>`.
61 Note that this is only used if k is not None.
63 Returns
64 -------
65 nodes : dictionary
66 Dictionary of nodes with betweenness centrality as the value.
68 See Also
69 --------
70 edge_betweenness_centrality
71 load_centrality
73 Notes
74 -----
75 The algorithm is from Ulrik Brandes [1]_.
76 See [4]_ for the original first published version and [2]_ for details on
77 algorithms for variations and related metrics.
79 For approximate betweenness calculations set k=#samples to use
80 k nodes ("pivots") to estimate the betweenness values. For an estimate
81 of the number of pivots needed see [3]_.
83 For weighted graphs the edge weights must be greater than zero.
84 Zero edge weights can produce an infinite number of equal length
85 paths between pairs of nodes.
87 The total number of paths between source and target is counted
88 differently for directed and undirected graphs. Directed paths
89 are easy to count. Undirected paths are tricky: should a path
90 from "u" to "v" count as 1 undirected path or as 2 directed paths?
92 For betweenness_centrality we report the number of undirected
93 paths when G is undirected.
95 For betweenness_centrality_subset the reporting is different.
96 If the source and target subsets are the same, then we want
97 to count undirected paths. But if the source and target subsets
98 differ -- for example, if sources is {0} and targets is {1},
99 then we are only counting the paths in one direction. They are
100 undirected paths but we are counting them in a directed way.
101 To count them as undirected paths, each should count as half a path.
103 This algorithm is not guaranteed to be correct if edge weights
104 are floating point numbers. As a workaround you can use integer
105 numbers by multiplying the relevant edge attributes by a convenient
106 constant factor (eg 100) and converting to integers.
108 References
109 ----------
110 .. [1] Ulrik Brandes:
111 A Faster Algorithm for Betweenness Centrality.
112 Journal of Mathematical Sociology 25(2):163-177, 2001.
113 https://doi.org/10.1080/0022250X.2001.9990249
114 .. [2] Ulrik Brandes:
115 On Variants of Shortest-Path Betweenness
116 Centrality and their Generic Computation.
117 Social Networks 30(2):136-145, 2008.
118 https://doi.org/10.1016/j.socnet.2007.11.001
119 .. [3] Ulrik Brandes and Christian Pich:
120 Centrality Estimation in Large Networks.
121 International Journal of Bifurcation and Chaos 17(7):2303-2318, 2007.
122 https://dx.doi.org/10.1142/S0218127407018403
123 .. [4] Linton C. Freeman:
124 A set of measures of centrality based on betweenness.
125 Sociometry 40: 35–41, 1977
126 https://doi.org/10.2307/3033543
127 """
128 betweenness = dict.fromkeys(G, 0.0) # b[v]=0 for v in G
129 if k is None:
130 nodes = G
131 else:
132 nodes = seed.sample(list(G.nodes()), k)
133 for s in nodes:
134 # single source shortest paths
135 if weight is None: # use BFS
136 S, P, sigma, _ = _single_source_shortest_path_basic(G, s)
137 else: # use Dijkstra's algorithm
138 S, P, sigma, _ = _single_source_dijkstra_path_basic(G, s, weight)
139 # accumulation
140 if endpoints:
141 betweenness, _ = _accumulate_endpoints(betweenness, S, P, sigma, s)
142 else:
143 betweenness, _ = _accumulate_basic(betweenness, S, P, sigma, s)
144 # rescaling
145 betweenness = _rescale(
146 betweenness,
147 len(G),
148 normalized=normalized,
149 directed=G.is_directed(),
150 k=k,
151 endpoints=endpoints,
152 )
153 return betweenness
156@py_random_state(4)
157@nx._dispatch(edge_attrs="weight")
158def edge_betweenness_centrality(G, k=None, normalized=True, weight=None, seed=None):
159 r"""Compute betweenness centrality for edges.
161 Betweenness centrality of an edge $e$ is the sum of the
162 fraction of all-pairs shortest paths that pass through $e$
164 .. math::
166 c_B(e) =\sum_{s,t \in V} \frac{\sigma(s, t|e)}{\sigma(s, t)}
168 where $V$ is the set of nodes, $\sigma(s, t)$ is the number of
169 shortest $(s, t)$-paths, and $\sigma(s, t|e)$ is the number of
170 those paths passing through edge $e$ [2]_.
172 Parameters
173 ----------
174 G : graph
175 A NetworkX graph.
177 k : int, optional (default=None)
178 If k is not None use k node samples to estimate betweenness.
179 The value of k <= n where n is the number of nodes in the graph.
180 Higher values give better approximation.
182 normalized : bool, optional
183 If True the betweenness values are normalized by $2/(n(n-1))$
184 for graphs, and $1/(n(n-1))$ for directed graphs where $n$
185 is the number of nodes in G.
187 weight : None or string, optional (default=None)
188 If None, all edge weights are considered equal.
189 Otherwise holds the name of the edge attribute used as weight.
190 Weights are used to calculate weighted shortest paths, so they are
191 interpreted as distances.
193 seed : integer, random_state, or None (default)
194 Indicator of random number generation state.
195 See :ref:`Randomness<randomness>`.
196 Note that this is only used if k is not None.
198 Returns
199 -------
200 edges : dictionary
201 Dictionary of edges with betweenness centrality as the value.
203 See Also
204 --------
205 betweenness_centrality
206 edge_load
208 Notes
209 -----
210 The algorithm is from Ulrik Brandes [1]_.
212 For weighted graphs the edge weights must be greater than zero.
213 Zero edge weights can produce an infinite number of equal length
214 paths between pairs of nodes.
216 References
217 ----------
218 .. [1] A Faster Algorithm for Betweenness Centrality. Ulrik Brandes,
219 Journal of Mathematical Sociology 25(2):163-177, 2001.
220 https://doi.org/10.1080/0022250X.2001.9990249
221 .. [2] Ulrik Brandes: On Variants of Shortest-Path Betweenness
222 Centrality and their Generic Computation.
223 Social Networks 30(2):136-145, 2008.
224 https://doi.org/10.1016/j.socnet.2007.11.001
225 """
226 betweenness = dict.fromkeys(G, 0.0) # b[v]=0 for v in G
227 # b[e]=0 for e in G.edges()
228 betweenness.update(dict.fromkeys(G.edges(), 0.0))
229 if k is None:
230 nodes = G
231 else:
232 nodes = seed.sample(list(G.nodes()), k)
233 for s in nodes:
234 # single source shortest paths
235 if weight is None: # use BFS
236 S, P, sigma, _ = _single_source_shortest_path_basic(G, s)
237 else: # use Dijkstra's algorithm
238 S, P, sigma, _ = _single_source_dijkstra_path_basic(G, s, weight)
239 # accumulation
240 betweenness = _accumulate_edges(betweenness, S, P, sigma, s)
241 # rescaling
242 for n in G: # remove nodes to only return edges
243 del betweenness[n]
244 betweenness = _rescale_e(
245 betweenness, len(G), normalized=normalized, directed=G.is_directed()
246 )
247 if G.is_multigraph():
248 betweenness = _add_edge_keys(G, betweenness, weight=weight)
249 return betweenness
252# helpers for betweenness centrality
255def _single_source_shortest_path_basic(G, s):
256 S = []
257 P = {}
258 for v in G:
259 P[v] = []
260 sigma = dict.fromkeys(G, 0.0) # sigma[v]=0 for v in G
261 D = {}
262 sigma[s] = 1.0
263 D[s] = 0
264 Q = deque([s])
265 while Q: # use BFS to find shortest paths
266 v = Q.popleft()
267 S.append(v)
268 Dv = D[v]
269 sigmav = sigma[v]
270 for w in G[v]:
271 if w not in D:
272 Q.append(w)
273 D[w] = Dv + 1
274 if D[w] == Dv + 1: # this is a shortest path, count paths
275 sigma[w] += sigmav
276 P[w].append(v) # predecessors
277 return S, P, sigma, D
280def _single_source_dijkstra_path_basic(G, s, weight):
281 weight = _weight_function(G, weight)
282 # modified from Eppstein
283 S = []
284 P = {}
285 for v in G:
286 P[v] = []
287 sigma = dict.fromkeys(G, 0.0) # sigma[v]=0 for v in G
288 D = {}
289 sigma[s] = 1.0
290 push = heappush
291 pop = heappop
292 seen = {s: 0}
293 c = count()
294 Q = [] # use Q as heap with (distance,node id) tuples
295 push(Q, (0, next(c), s, s))
296 while Q:
297 (dist, _, pred, v) = pop(Q)
298 if v in D:
299 continue # already searched this node.
300 sigma[v] += sigma[pred] # count paths
301 S.append(v)
302 D[v] = dist
303 for w, edgedata in G[v].items():
304 vw_dist = dist + weight(v, w, edgedata)
305 if w not in D and (w not in seen or vw_dist < seen[w]):
306 seen[w] = vw_dist
307 push(Q, (vw_dist, next(c), v, w))
308 sigma[w] = 0.0
309 P[w] = [v]
310 elif vw_dist == seen[w]: # handle equal paths
311 sigma[w] += sigma[v]
312 P[w].append(v)
313 return S, P, sigma, D
316def _accumulate_basic(betweenness, S, P, sigma, s):
317 delta = dict.fromkeys(S, 0)
318 while S:
319 w = S.pop()
320 coeff = (1 + delta[w]) / sigma[w]
321 for v in P[w]:
322 delta[v] += sigma[v] * coeff
323 if w != s:
324 betweenness[w] += delta[w]
325 return betweenness, delta
328def _accumulate_endpoints(betweenness, S, P, sigma, s):
329 betweenness[s] += len(S) - 1
330 delta = dict.fromkeys(S, 0)
331 while S:
332 w = S.pop()
333 coeff = (1 + delta[w]) / sigma[w]
334 for v in P[w]:
335 delta[v] += sigma[v] * coeff
336 if w != s:
337 betweenness[w] += delta[w] + 1
338 return betweenness, delta
341def _accumulate_edges(betweenness, S, P, sigma, s):
342 delta = dict.fromkeys(S, 0)
343 while S:
344 w = S.pop()
345 coeff = (1 + delta[w]) / sigma[w]
346 for v in P[w]:
347 c = sigma[v] * coeff
348 if (v, w) not in betweenness:
349 betweenness[(w, v)] += c
350 else:
351 betweenness[(v, w)] += c
352 delta[v] += c
353 if w != s:
354 betweenness[w] += delta[w]
355 return betweenness
358def _rescale(betweenness, n, normalized, directed=False, k=None, endpoints=False):
359 if normalized:
360 if endpoints:
361 if n < 2:
362 scale = None # no normalization
363 else:
364 # Scale factor should include endpoint nodes
365 scale = 1 / (n * (n - 1))
366 elif n <= 2:
367 scale = None # no normalization b=0 for all nodes
368 else:
369 scale = 1 / ((n - 1) * (n - 2))
370 else: # rescale by 2 for undirected graphs
371 if not directed:
372 scale = 0.5
373 else:
374 scale = None
375 if scale is not None:
376 if k is not None:
377 scale = scale * n / k
378 for v in betweenness:
379 betweenness[v] *= scale
380 return betweenness
383def _rescale_e(betweenness, n, normalized, directed=False, k=None):
384 if normalized:
385 if n <= 1:
386 scale = None # no normalization b=0 for all nodes
387 else:
388 scale = 1 / (n * (n - 1))
389 else: # rescale by 2 for undirected graphs
390 if not directed:
391 scale = 0.5
392 else:
393 scale = None
394 if scale is not None:
395 if k is not None:
396 scale = scale * n / k
397 for v in betweenness:
398 betweenness[v] *= scale
399 return betweenness
402@not_implemented_for("graph")
403def _add_edge_keys(G, betweenness, weight=None):
404 r"""Adds the corrected betweenness centrality (BC) values for multigraphs.
406 Parameters
407 ----------
408 G : NetworkX graph.
410 betweenness : dictionary
411 Dictionary mapping adjacent node tuples to betweenness centrality values.
413 weight : string or function
414 See `_weight_function` for details. Defaults to `None`.
416 Returns
417 -------
418 edges : dictionary
419 The parameter `betweenness` including edges with keys and their
420 betweenness centrality values.
422 The BC value is divided among edges of equal weight.
423 """
424 _weight = _weight_function(G, weight)
426 edge_bc = dict.fromkeys(G.edges, 0.0)
427 for u, v in betweenness:
428 d = G[u][v]
429 wt = _weight(u, v, d)
430 keys = [k for k in d if _weight(u, v, {k: d[k]}) == wt]
431 bc = betweenness[(u, v)] / len(keys)
432 for k in keys:
433 edge_bc[(u, v, k)] = bc
435 return edge_bc