Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/networkx/algorithms/centrality/betweenness.py: 14%

1"""Betweenness centrality measures."""

3import math

4from collections import deque

5from heapq import heappop, heappush

6from itertools import count

8import networkx as nx

9from networkx.algorithms.shortest_paths.weighted import _weight_function

10from networkx.utils import py_random_state

11from networkx.utils.decorators import not_implemented_for

13__all__ = ["betweenness_centrality", "edge_betweenness_centrality"]

16@py_random_state("seed")

17@nx._dispatchable(edge_attrs="weight")

18def betweenness_centrality(

19 G, k=None, normalized=True, weight=None, endpoints=False, seed=None

20):

21 r"""Compute the shortest-path betweenness centrality for nodes.

23 Betweenness centrality of a node $v$ is the sum of the

24 fraction of all-pairs shortest paths that pass through $v$.

26 .. math::

28 c_B(v) = \frac{1}{P} \sum_{s, t \in V} \frac{\sigma(s, t | v)}{\sigma(s, t)}

30 where $V$ is the set of nodes, $\sigma(s, t)$ is the number of

31 shortest $(s, t)$-paths, and $\sigma(s, t | v)$ is the number of

32 those paths passing through some node $v$ other than $s$ and $t$.

33 If $s = t$, $\sigma(s, t) = 1$, and if $v \in \{s, t\}$,

34 $\sigma(s, t | v) = 0$ (when `endpoints` takes its default value of

35 `False`) [2]_.

36 $P$ is a normalization factor representing the number of pairs of nodes

37 that have counted shortest paths. Its value depends on the values of `normalized`

38 and `endpoints`, and on whether the graph is directed (see Notes). It can

39 be set to ``1`` with ``normalized=False``.

41 Parameters

42 ----------

43 G : graph

44 A NetworkX graph.

46 k : int, optional (default=None)

47 If `k` is not `None`, use `k` sampled nodes as sources for the considered paths.

48 The resulting sampled counts are then inflated to approximate betweenness.

49 Higher values of `k` give better approximation. Must have ``k <= len(G)``.

51 normalized : bool, optional (default=True)

52 If `True`, the betweenness values are rescaled by dividing by the number of

53 possible $(s, t)$-pairs in the graph.

55 weight : None or string, optional (default=None)

56 If `None`, all edge weights are 1.

57 Otherwise holds the name of the edge attribute used as weight.

58 Weights are used to calculate weighted shortest paths, so they are

59 interpreted as distances.

61 endpoints : bool, optional (default=False)

62 If `True`, include the endpoints $s$ and $t$ in the shortest path counts.

63 This is taken into account when rescaling the values.

65 seed : integer, random_state, or None (default)

66 Indicator of random number generation state.

67 See :ref:`Randomness<randomness>`.

68 Note that this is only used if ``k is not None``.

70 Returns

71 -------

72 nodes : dict

73 Dictionary of nodes with betweenness centrality as the value.

75 See Also

76 --------

77 betweenness_centrality_subset

78 edge_betweenness_centrality

79 load_centrality

81 Notes

82 -----

83 The algorithm is from Ulrik Brandes [1]_.

84 See [4]_ for the original first published version and [2]_ for details on

85 algorithms for variations and related metrics.

87 For approximate betweenness calculations, set `k` to the number of sampled

88 nodes ("pivots") used as sources to estimate the betweenness values.

89 The formula then sums over $s$ is in these pivots, instead of over all nodes.

90 The resulting sum is then inflated to approximate the full sum.

91 For a discussion of how to choose `k` for efficiency, see [3]_.

93 For weighted graphs the edge weights must be greater than zero.

94 Zero edge weights can produce an infinite number of equal length

95 paths between pairs of nodes.

97 Directed graphs and undirected graphs count paths differently.

98 In directed graphs, each pair of source-target nodes is considered separately

99 in each direction, as the shortest paths can differ by direction.

100 However, in undirected graphs, each pair of nodes is considered only once,

101 as the shortest paths are symmetric.

102 This means the normalization factor to divide by is $N(N-1)$ for directed graphs

103 and $N(N-1)/2$ for undirected graphs, where $N = n$ (the number of nodes)

104 if endpoints are included and $N = n-1$ otherwise.

105

106 This algorithm is not guaranteed to be correct if edge weights

107 are floating point numbers. As a workaround you can use integer

108 numbers by multiplying the relevant edge attributes by a convenient

109 constant factor (e.g. 100) and converting to integers.

110

111 References

112 ----------

113 .. [1] Ulrik Brandes:

114 A Faster Algorithm for Betweenness Centrality.

115 Journal of Mathematical Sociology 25(2):163--177, 2001.

116 https://doi.org/10.1080/0022250X.2001.9990249

117 .. [2] Ulrik Brandes:

118 On Variants of Shortest-Path Betweenness

119 Centrality and their Generic Computation.

120 Social Networks 30(2):136--145, 2008.

121 https://doi.org/10.1016/j.socnet.2007.11.001

122 .. [3] Ulrik Brandes and Christian Pich:

123 Centrality Estimation in Large Networks.

124 International Journal of Bifurcation and Chaos 17(7):2303--2318, 2007.

125 https://dx.doi.org/10.1142/S0218127407018403

126 .. [4] Linton C. Freeman:

127 A set of measures of centrality based on betweenness.

128 Sociometry 40: 35--41, 1977

129 https://doi.org/10.2307/3033543

130

131 Examples

132 --------

133 Consider an undirected 3-path. Each pair of nodes has exactly one shortest

134 path between them. Since the graph is undirected, only ordered pairs are counted.

135 Of these (and when `endpoints` is `False`), none of the shortest paths pass

136 through 0 and 2, and only the shortest path between 0 and 2 passes through 1.

137 As such, the counts should be ``{0: 0, 1: 1, 2: 0}``.

138

139 >>> G = nx.path_graph(3)

140 >>> nx.betweenness_centrality(G, normalized=False, endpoints=False)

141 {0: 0.0, 1: 1.0, 2: 0.0}

142

143 If `endpoints` is `True`, we also need to count endpoints as being on the path:

144 $\sigma(s, t | s) = \sigma(s, t | t) = \sigma(s, t)$.

145 In our example, 0 is then part of two shortest paths (0 to 1 and 0 to 2);

146 similarly, 2 is part of two shortest paths (0 to 2 and 1 to 2).

147 1 is part of all three shortest paths. This makes the new raw

148 counts ``{0: 2, 1: 3, 2: 2}``.

149

150 >>> nx.betweenness_centrality(G, normalized=False, endpoints=True)

151 {0: 2.0, 1: 3.0, 2: 2.0}

152

153 With normalization, the values are divided by the number of ordered $(s, t)$-pairs.

154 If we are not counting endpoints, there are $n - 1$ possible choices for $s$

155 (all except the node we are computing betweenness centrality for), which in turn

156 leaves $n - 2$ possible choices for $t$ as $s \ne t$.

157 The total number of ordered pairs when `endpoints` is `False` is $(n - 1)(n - 2)/2 = 1$.

158 If `endpoints` is `True`, there are $n(n - 1)/2 = 3$ ordered $(s, t)$-pairs to divide by.

159

160 >>> nx.betweenness_centrality(G, normalized=True, endpoints=False)

161 {0: 0.0, 1: 1.0, 2: 0.0}

162 >>> nx.betweenness_centrality(G, normalized=True, endpoints=True)

163 {0: 0.6666666666666666, 1: 1.0, 2: 0.6666666666666666}

164

165 If the graph is directed instead, we now need to consider $(s, t)$-pairs

166 in both directions. Our example becomes a directed 3-path.

167 Without counting endpoints, we only have one path through 1 (0 to 2).

168 This means the raw counts are ``{0: 0, 1: 1, 2: 0}``.

169

170 >>> DG = nx.path_graph(3, create_using=nx.DiGraph)

171 >>> nx.betweenness_centrality(DG, normalized=False, endpoints=False)

172 {0: 0.0, 1: 1.0, 2: 0.0}

173

174 If we do include endpoints, the raw counts are ``{0: 2, 1: 3, 2: 2}``.

175

176 >>> nx.betweenness_centrality(DG, normalized=False, endpoints=True)

177 {0: 2.0, 1: 3.0, 2: 2.0}

178

179 If we want to normalize directed betweenness centrality, the raw counts

180 are normalized by the number of $(s, t)$-pairs. There are $n(n - 1)$

181 possible paths with endpoints and $(n - 1)(n - 2)$ without endpoints.

182 In our example, that's 6 with endpoints and 2 without endpoints.

183

184 >>> nx.betweenness_centrality(DG, normalized=True, endpoints=True)

185 {0: 0.3333333333333333, 1: 0.5, 2: 0.3333333333333333}

186 >>> nx.betweenness_centrality(DG, normalized=True, endpoints=False)

187 {0: 0.0, 1: 0.5, 2: 0.0}

188

189 Computing the full betweenness centrality can be costly.

190 This function can also be used to compute approximate betweenness centrality

191 by setting `k`. This only determines the number of source nodes to sample;

192 all nodes are targets.

193

194 For simplicity, we only consider the case where endpoints are included in the counts.

195 Since the partial sums only include `k` terms, instead of ``n``,

196 we multiply them by ``n / k``, to approximate the full sum.

197 As the sets of sources and targets are not the same anymore,

198 paths have to be counted in a directed way. We thus count each as half a path.

199 This ensures that the results approximate the standard betweenness for ``k == n``.

200

201 For instance, in the undirected 3-path graph case, setting ``k = 2`` (with ``seed=42``)

202 selects nodes 0 and 2 as sources.

203 This means only shortest paths starting at these nodes are considered.

204 The raw counts with endpoints are ``{0: 3, 1: 4, 2: 3}``. Accounting for the partial sum

205 and applying the undirectedness half-path correction, we get

206

207 >>> nx.betweenness_centrality(G, k=2, normalized=False, endpoints=True, seed=42)

208 {0: 2.25, 1: 3.0, 2: 2.25}

209

210 When normalizing, we instead want to divide by the total number of $(s, t)$-pairs.

211 This is $k(n - 1)$ with endpoints.

212

213 >>> nx.betweenness_centrality(G, k=2, normalized=True, endpoints=True, seed=42)

214 {0: 0.75, 1: 1.0, 2: 0.75}

215 """

216 betweenness = dict.fromkeys(G, 0.0) # b[v]=0 for v in G

217 if k == len(G):

218 # This is done for performance; the result is the same regardless.

219 k = None

220 if k is None:

221 nodes = G

222 else:

223 nodes = seed.sample(list(G.nodes()), k)

224 for s in nodes:

225 # single source shortest paths

226 if weight is None: # use BFS

227 S, P, sigma, _ = _single_source_shortest_path_basic(G, s)

228 else: # use Dijkstra's algorithm

229 S, P, sigma, _ = _single_source_dijkstra_path_basic(G, s, weight)

230 # accumulation

231 if endpoints:

232 betweenness, _ = _accumulate_endpoints(betweenness, S, P, sigma, s)

233 else:

234 betweenness, _ = _accumulate_basic(betweenness, S, P, sigma, s)

235 # rescaling

236 betweenness = _rescale(

237 betweenness,

238 len(G),

239 normalized=normalized,

240 directed=G.is_directed(),

241 endpoints=endpoints,

242 sampled_nodes=None if k is None else nodes,

243 )

244 return betweenness

245

246

247@py_random_state("seed")

248@nx._dispatchable(edge_attrs="weight")

249def edge_betweenness_centrality(G, k=None, normalized=True, weight=None, seed=None):

250 r"""Compute betweenness centrality for edges.

251

252 Betweenness centrality of an edge $e$ is the sum of the

253 fraction of all-pairs shortest paths that pass through $e$.

254

255 .. math::

256

257 c_B(e) = \frac{1}{P} \sum_{s, t \in V} \frac{\sigma(s, t | e)}{\sigma(s, t)}

258

259 where $V$ is the set of nodes, $\sigma(s, t)$ is the number of

260 shortest $(s, t)$-paths, and $\sigma(s, t | e)$ is the number of

261 those paths passing through edge $e$ [1]_.

262 $P$ is a normalization factor representing the number of pairs of nodes

263 that have counted shortest paths. Its value depends on the values of `normalized`

264 and `endpoints`, and on whether the graph is directed (see Notes). It can

265 be set to ``1`` with ``normalized=False``.

266

267 Parameters

268 ----------

269 G : graph

270 A NetworkX graph.

271

272 k : int, optional (default=None)

273 If `k` is not `None`, use `k` sampled nodes as sources for the considered paths.

274 The resulting sampled counts are then inflated to approximate betweenness.

275 Higher values of `k` give better approximation. Must have ``k <= len(G)``.

276

277 normalized : bool, optional (default=True)

278 If `True`, the betweenness values are rescaled by dividing by the number of

279 possible $(s, t)$-pairs in the graph.

280

281 weight : None or string, optional (default=None)

282 If `None`, all edge weights are 1.

283 Otherwise holds the name of the edge attribute used as weight.

284 Weights are used to calculate weighted shortest paths, so they are

285 interpreted as distances.

286

287 seed : integer, random_state, or None (default)

288 Indicator of random number generation state.

289 See :ref:`Randomness<randomness>`.

290 Note that this is only used if ``k is not None``.

291

292 Returns

293 -------

294 edges : dict

295 Dictionary of edges with betweenness centrality as the value.

296

297 See Also

298 --------

299 betweenness_centrality

300 edge_betweenness_centrality_subset

301 edge_load

302

303 Notes

304 -----

305 The algorithm is from Ulrik Brandes [1]_.

306

307 For weighted graphs the edge weights must be greater than zero.

308 Zero edge weights can produce an infinite number of equal length

309 paths between pairs of nodes.

310

311 References

312 ----------

313 .. [1] Ulrik Brandes: On Variants of Shortest-Path Betweenness

314 Centrality and their Generic Computation.

315 Social Networks 30(2):136--145, 2008.

316 https://doi.org/10.1016/j.socnet.2007.11.001

317

318 Examples

319 --------

320 Consider an undirected 3-path. Each pair of nodes has exactly one shortest

321 path between them. Since the graph is undirected, only ordered pairs are counted.

322 Each edge has two shortest paths passing through it.

323 As such, the raw counts should be ``{(0, 1): 2, (1, 2): 2}``.

324

325 >>> G = nx.path_graph(3)

326 >>> nx.edge_betweenness_centrality(G, normalized=False)

327 {(0, 1): 2.0, (1, 2): 2.0}

328

329 With normalization, the values are divided by the number of ordered $(s, t)$-pairs,

330 which is $n(n-1)/2$. For the 3-path, this is $3(3-1)/2 = 3$.

331

332 >>> nx.edge_betweenness_centrality(G, normalized=True)

333 {(0, 1): 0.6666666666666666, (1, 2): 0.6666666666666666}

334

335 For a directed graph, all $(s, t)$-pairs are considered. The normalization factor

336 is $n(n-1)$ to reflect this.

337

338 >>> DG = nx.path_graph(3, create_using=nx.DiGraph)

339 >>> nx.edge_betweenness_centrality(DG, normalized=False)

340 {(0, 1): 2.0, (1, 2): 2.0}

341 >>> nx.edge_betweenness_centrality(DG, normalized=True)

342 {(0, 1): 0.3333333333333333, (1, 2): 0.3333333333333333}

343

344 Computing the full edge betweenness centrality can be costly.

345 This function can also be used to compute approximate edge betweenness centrality

346 by setting `k`. This determines the number of source nodes to sample.

347

348 Since the partial sums only include `k` terms, instead of ``n``,

349 we multiply them by ``n / k``, to approximate the full sum.

350 As the sets of sources and targets are not the same anymore,

351 paths have to be counted in a directed way. We thus count each as half a path.

352 This ensures that the results approximate the standard betweenness for ``k == n``.

353

354 For instance, in the undirected 3-path graph case, setting ``k = 2`` (with ``seed=42``)

355 selects nodes 0 and 2 as sources.

356 This means only shortest paths starting at these nodes are considered.

357 The raw counts are ``{(0, 1): 3, (1, 2): 3}``. Accounting for the partial sum

358 and applying the undirectedness half-path correction, we get

359

360 >>> nx.edge_betweenness_centrality(G, k=2, normalized=False, seed=42)

361 {(0, 1): 2.25, (1, 2): 2.25}

362

363 When normalizing, we instead want to divide by the total number of $(s, t)$-pairs.

364 This is $k(n-1)$, which is $4$ in our case.

365

366 >>> nx.edge_betweenness_centrality(G, k=2, normalized=True, seed=42)

367 {(0, 1): 0.75, (1, 2): 0.75}

368 """

369 betweenness = dict.fromkeys(G, 0.0) # b[v]=0 for v in G

370 # b[e]=0 for e in G.edges()

371 betweenness.update(dict.fromkeys(G.edges(), 0.0))

372 if k is None:

373 nodes = G

374 else:

375 nodes = seed.sample(list(G.nodes()), k)

376 for s in nodes:

377 # single source shortest paths

378 if weight is None: # use BFS

379 S, P, sigma, _ = _single_source_shortest_path_basic(G, s)

380 else: # use Dijkstra's algorithm

381 S, P, sigma, _ = _single_source_dijkstra_path_basic(G, s, weight)

382 # accumulation

383 betweenness = _accumulate_edges(betweenness, S, P, sigma, s)

384 # rescaling

385 for n in G: # remove nodes to only return edges

386 del betweenness[n]

387 betweenness = _rescale(

388 betweenness,

389 len(G),

390 normalized=normalized,

391 directed=G.is_directed(),

392 sampled_nodes=None if k is None else nodes,

393 )

394 if G.is_multigraph():

395 betweenness = _add_edge_keys(G, betweenness, weight=weight)

396 return betweenness

397

398

399# helpers for betweenness centrality

400

401

402def _single_source_shortest_path_basic(G, s):

403 S = []

404 P = {}

405 for v in G:

406 P[v] = []

407 sigma = dict.fromkeys(G, 0.0) # sigma[v]=0 for v in G

408 D = {}

409 sigma[s] = 1.0

410 D[s] = 0

411 Q = deque([s])

412 while Q: # use BFS to find shortest paths

413 v = Q.popleft()

414 S.append(v)

415 Dv = D[v]

416 sigmav = sigma[v]

417 for w in G[v]:

418 if w not in D:

419 Q.append(w)

420 D[w] = Dv + 1

421 if D[w] == Dv + 1: # this is a shortest path, count paths

422 sigma[w] += sigmav

423 P[w].append(v) # predecessors

424 return S, P, sigma, D

425

426

427def _single_source_dijkstra_path_basic(G, s, weight):

428 weight = _weight_function(G, weight)

429 # modified from Eppstein

430 S = []

431 P = {}

432 for v in G:

433 P[v] = []

434 sigma = dict.fromkeys(G, 0.0) # sigma[v]=0 for v in G

435 D = {}

436 sigma[s] = 1.0

437 seen = {s: 0}

438 c = count()

439 Q = [] # use Q as heap with (distance,node id) tuples

440 heappush(Q, (0, next(c), s, s))

441 while Q:

442 (dist, _, pred, v) = heappop(Q)

443 if v in D:

444 continue # already searched this node.

445 sigma[v] += sigma[pred] # count paths

446 S.append(v)

447 D[v] = dist

448 for w, edgedata in G[v].items():

449 vw_dist = dist + weight(v, w, edgedata)

450 if w not in D and (w not in seen or vw_dist < seen[w]):

451 seen[w] = vw_dist

452 heappush(Q, (vw_dist, next(c), v, w))

453 sigma[w] = 0.0

454 P[w] = [v]

455 elif vw_dist == seen[w]: # handle equal paths

456 sigma[w] += sigma[v]

457 P[w].append(v)

458 return S, P, sigma, D

459

460

461def _accumulate_basic(betweenness, S, P, sigma, s):

462 delta = dict.fromkeys(S, 0)

463 while S:

464 w = S.pop()

465 coeff = (1 + delta[w]) / sigma[w]

466 for v in P[w]:

467 delta[v] += sigma[v] * coeff

468 if w != s:

469 betweenness[w] += delta[w]

470 return betweenness, delta

471

472

473def _accumulate_endpoints(betweenness, S, P, sigma, s):

474 betweenness[s] += len(S) - 1

475 delta = dict.fromkeys(S, 0)

476 while S:

477 w = S.pop()

478 coeff = (1 + delta[w]) / sigma[w]

479 for v in P[w]:

480 delta[v] += sigma[v] * coeff

481 if w != s:

482 betweenness[w] += delta[w] + 1

483 return betweenness, delta

484

485

486def _accumulate_edges(betweenness, S, P, sigma, s):

487 delta = dict.fromkeys(S, 0)

488 while S:

489 w = S.pop()

490 coeff = (1 + delta[w]) / sigma[w]

491 for v in P[w]:

492 c = sigma[v] * coeff

493 if (v, w) not in betweenness:

494 betweenness[(w, v)] += c

495 else:

496 betweenness[(v, w)] += c

497 delta[v] += c

498 if w != s:

499 betweenness[w] += delta[w]

500 return betweenness

501

502

503def _rescale(

504 betweenness, n, *, normalized, directed, endpoints=True, sampled_nodes=None

505):

506 # For edge betweenness, `endpoints` is always `True`.

507

508 k = None if sampled_nodes is None else len(sampled_nodes)

509 # N is used to count the number of valid (s, t) pairs where s != t that

510 # could have a path pass through v. If endpoints is False, then v must

511 # not be the target t, hence why we subtract by 1.

512 N = n if endpoints else n - 1

513 if N < 2:

514 # No rescaling necessary: b=0 for all nodes

515 return betweenness

516

517 K_source = N if k is None else k

518

519 if k is None or endpoints:

520 # No sampling adjustment needed

521 if normalized:

522 # Divide by the number of valid (s, t) node pairs that could have

523 # a path through v where s != t.

524 scale = 1 / (K_source * (N - 1))

525 else:

526 # Scale to the full BC

527 if not directed:

528 # The non-normalized BC values are computed the same way for

529 # directed and undirected graphs: shortest paths are computed and

530 # counted for each *ordered* (s, t) pair. Undirected graphs should

531 # only count valid *unordered* node pairs {s, t}; that is, (s, t)

532 # and (t, s) should be counted only once. We correct for this here.

533 correction = 2

534 else:

535 correction = 1

536 scale = N / (K_source * correction)

537

538 if scale != 1:

539 for v in betweenness:

540 betweenness[v] *= scale

541 return betweenness

542

543 # Sampling adjustment needed when excluding endpoints when using k. In this

544 # case, we need to handle source nodes differently from non-source nodes,

545 # because source nodes can't include themselves since endpoints are excluded.

546 # Without this, k == n would be a special case that would violate the

547 # assumption that node `v` is not one of the (s, t) node pairs.

548 if normalized:

549 # NaN for undefined 0/0; there is no data for source node when k=1

550 scale_source = 1 / ((K_source - 1) * (N - 1)) if K_source > 1 else math.nan

551 scale_nonsource = 1 / (K_source * (N - 1))

552 else:

553 correction = 1 if directed else 2

554 scale_source = N / ((K_source - 1) * correction) if K_source > 1 else math.nan

555 scale_nonsource = N / (K_source * correction)

556

557 sampled_nodes = set(sampled_nodes)

558 for v in betweenness:

559 betweenness[v] *= scale_source if v in sampled_nodes else scale_nonsource

560 return betweenness

561

562

563@not_implemented_for("graph")

564def _add_edge_keys(G, betweenness, weight=None):

565 r"""Adds the corrected betweenness centrality (BC) values for multigraphs.

566

567 Parameters

568 ----------

569 G : NetworkX graph.

570

571 betweenness : dictionary

572 Dictionary mapping adjacent node tuples to betweenness centrality values.

573

574 weight : string or function

575 See `_weight_function` for details. Defaults to `None`.

576

577 Returns

578 -------

579 edges : dictionary

580 The parameter `betweenness` including edges with keys and their

581 betweenness centrality values.

582

583 The BC value is divided among edges of equal weight.

584 """

585 _weight = _weight_function(G, weight)

586

587 edge_bc = dict.fromkeys(G.edges, 0.0)

588 for u, v in betweenness:

589 d = G[u][v]

590 wt = _weight(u, v, d)

591 keys = [k for k in d if _weight(u, v, {k: d[k]}) == wt]

592 bc = betweenness[(u, v)] / len(keys)

593 for k in keys:

594 edge_bc[(u, v, k)] = bc

595

596 return edge_bc