Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/networkx/algorithms/centrality/betweenness.py: 14%

1"""Betweenness centrality measures."""

3import math

4from collections import deque

5from heapq import heappop, heappush

6from itertools import count

8import networkx as nx

9from networkx.algorithms.shortest_paths.weighted import _weight_function

10from networkx.utils import py_random_state

11from networkx.utils.decorators import not_implemented_for

13__all__ = ["betweenness_centrality", "edge_betweenness_centrality"]

16@py_random_state("seed")

17@nx._dispatchable(edge_attrs="weight")

18def betweenness_centrality(

19 G, k=None, normalized=True, weight=None, endpoints=False, seed=None

20):

21 r"""Compute the shortest-path betweenness centrality for nodes.

23 Betweenness centrality of a node $v$ is the sum of the

24 fraction of all-pairs shortest paths that pass through $v$.

26 .. math::

28 c_B(v) = \sum_{s, t \in V} \frac{\sigma(s, t | v)}{\sigma(s, t)}

30 where $V$ is the set of nodes, $\sigma(s, t)$ is the number of

31 shortest $(s, t)$-paths, and $\sigma(s, t | v)$ is the number of

32 those paths passing through some node $v$ other than $s$ and $t$.

33 If $s = t$, $\sigma(s, t) = 1$, and if $v \in \{s, t\}$,

34 $\sigma(s, t | v) = 0$ [2]_.

35 The denominator $\sigma(s, t)$ is a normalization factor that can be

36 turned off to get the raw path counts.

38 Parameters

39 ----------

40 G : graph

41 A NetworkX graph.

43 k : int, optional (default=None)

44 If `k` is not `None`, use `k` sampled nodes as sources for the considered paths.

45 The resulting sampled counts are then inflated to approximate betweenness.

46 Higher values of `k` give better approximation. Must have ``k <= len(G)``.

48 normalized : bool, optional (default=True)

49 If `True`, the betweenness values are rescaled by dividing by the number of

50 possible $(s, t)$-pairs in the graph.

52 weight : None or string, optional (default=None)

53 If `None`, all edge weights are 1.

54 Otherwise holds the name of the edge attribute used as weight.

55 Weights are used to calculate weighted shortest paths, so they are

56 interpreted as distances.

58 endpoints : bool, optional (default=False)

59 If `True`, include the endpoints $s$ and $t$ in the shortest path counts.

60 This is taken into account when rescaling the values.

62 seed : integer, random_state, or None (default)

63 Indicator of random number generation state.

64 See :ref:`Randomness<randomness>`.

65 Note that this is only used if ``k is not None``.

67 Returns

68 -------

69 nodes : dict

70 Dictionary of nodes with betweenness centrality as the value.

72 See Also

73 --------

74 betweenness_centrality_subset

75 edge_betweenness_centrality

76 load_centrality

78 Notes

79 -----

80 The algorithm is from Ulrik Brandes [1]_.

81 See [4]_ for the original first published version and [2]_ for details on

82 algorithms for variations and related metrics.

84 For approximate betweenness calculations, set `k` to the number of sampled

85 nodes ("pivots") used as sources to estimate the betweenness values.

86 The formula then sums over $s$ is in these pivots, instead of over all nodes.

87 The resulting sum is then inflated to approximate the full sum.

88 For a discussion of how to choose `k` for efficiency, see [3]_.

90 For weighted graphs the edge weights must be greater than zero.

91 Zero edge weights can produce an infinite number of equal length

92 paths between pairs of nodes.

94 Directed graphs and undirected graphs count paths differently.

95 In directed graphs, each pair of source-target nodes is considered separately

96 in each direction, as the shortest paths can differ by direction.

97 However, in undirected graphs, each pair of nodes is considered only once,

98 as the shortest paths are symmetric.

99 This means the normalization factor to divide by is $N(N-1)$ for directed graphs

100 and $N(N-1)/2$ for undirected graphs, where $N = n$ (the number of nodes)

101 if endpoints are included and $N = n-1$ otherwise.

102

103 This algorithm is not guaranteed to be correct if edge weights

104 are floating point numbers. As a workaround you can use integer

105 numbers by multiplying the relevant edge attributes by a convenient

106 constant factor (e.g. 100) and converting to integers.

107

108 References

109 ----------

110 .. [1] Ulrik Brandes:

111 A Faster Algorithm for Betweenness Centrality.

112 Journal of Mathematical Sociology 25(2):163--177, 2001.

113 https://doi.org/10.1080/0022250X.2001.9990249

114 .. [2] Ulrik Brandes:

115 On Variants of Shortest-Path Betweenness

116 Centrality and their Generic Computation.

117 Social Networks 30(2):136--145, 2008.

118 https://doi.org/10.1016/j.socnet.2007.11.001

119 .. [3] Ulrik Brandes and Christian Pich:

120 Centrality Estimation in Large Networks.

121 International Journal of Bifurcation and Chaos 17(7):2303--2318, 2007.

122 https://dx.doi.org/10.1142/S0218127407018403

123 .. [4] Linton C. Freeman:

124 A set of measures of centrality based on betweenness.

125 Sociometry 40: 35--41, 1977

126 https://doi.org/10.2307/3033543

127

128 Examples

129 --------

130 Consider an undirected 3-path. Each pair of nodes has exactly one shortest

131 path between them. Since the graph is undirected, only ordered pairs are counted.

132 Of these (and when `endpoints` is `False`), none of the shortest paths pass

133 through 0 and 2, and only the shortest path between 0 and 2 passes through 1.

134 As such, the counts should be ``{0: 0, 1: 1, 2: 0}``.

135

136 >>> G = nx.path_graph(3)

137 >>> nx.betweenness_centrality(G, normalized=False, endpoints=False)

138 {0: 0.0, 1: 1.0, 2: 0.0}

139

140 If `endpoints` is `True`, we also need to count endpoints as being on the path:

141 $\sigma(s, t | s) = \sigma(s, t | t) = \sigma(s, t)$.

142 In our example, 0 is then part of two shortest paths (0 to 1 and 0 to 2);

143 similarly, 2 is part of two shortest paths (0 to 2 and 1 to 2).

144 1 is part of all three shortest paths. This makes the new raw

145 counts ``{0: 2, 1: 3, 2: 2}``.

146

147 >>> nx.betweenness_centrality(G, normalized=False, endpoints=True)

148 {0: 2.0, 1: 3.0, 2: 2.0}

149

150 With normalization, the values are divided by the number of ordered $(s, t)$-pairs.

151 If we are not counting endpoints, there are $n - 1$ possible choices for $s$

152 (all except the node we are computing betweenness centrality for), which in turn

153 leaves $n - 2$ possible choices for $t$ as $s \ne t$.

154 The total number of ordered pairs when `endpoints` is `False` is $(n - 1)(n - 2)/2 = 1$.

155 If `endpoints` is `True`, there are $n(n - 1)/2 = 3$ ordered $(s, t)$-pairs to divide by.

156

157 >>> nx.betweenness_centrality(G, normalized=True, endpoints=False)

158 {0: 0.0, 1: 1.0, 2: 0.0}

159 >>> nx.betweenness_centrality(G, normalized=True, endpoints=True)

160 {0: 0.6666666666666666, 1: 1.0, 2: 0.6666666666666666}

161

162 If the graph is directed instead, we now need to consider $(s, t)$-pairs

163 in both directions. Our example becomes a directed 3-path.

164 Without counting endpoints, we only have one path through 1 (0 to 2).

165 This means the raw counts are ``{0: 0, 1: 1, 2: 0}``.

166

167 >>> DG = nx.path_graph(3, create_using=nx.DiGraph)

168 >>> nx.betweenness_centrality(DG, normalized=False, endpoints=False)

169 {0: 0.0, 1: 1.0, 2: 0.0}

170

171 If we do include endpoints, the raw counts are ``{0: 2, 1: 3, 2: 2}``.

172

173 >>> nx.betweenness_centrality(DG, normalized=False, endpoints=True)

174 {0: 2.0, 1: 3.0, 2: 2.0}

175

176 If we want to normalize directed betweenness centrality, the raw counts

177 are normalized by the number of $(s, t)$-pairs. There are $n(n - 1)$

178 possible paths with endpoints and $(n - 1)(n - 2)$ without endpoints.

179 In our example, that's 6 with endpoints and 2 without endpoints.

180

181 >>> nx.betweenness_centrality(DG, normalized=True, endpoints=True)

182 {0: 0.3333333333333333, 1: 0.5, 2: 0.3333333333333333}

183 >>> nx.betweenness_centrality(DG, normalized=True, endpoints=False)

184 {0: 0.0, 1: 0.5, 2: 0.0}

185

186 Computing the full betweenness centrality can be costly.

187 This function can also be used to compute approximate betweenness centrality

188 by setting `k`. This only determines the number of source nodes to sample;

189 all nodes are targets.

190

191 For simplicity, we only consider the case where endpoints are included in the counts.

192 Since the partial sums only include `k` terms, instead of ``n``,

193 we multiply them by ``n / k``, to approximate the full sum.

194 As the sets of sources and targets are not the same anymore,

195 paths have to be counted in a directed way. We thus count each as half a path.

196 This ensures that the results approximate the standard betweenness for ``k == n``.

197

198 For instance, in the undirected 3-path graph case, setting ``k = 2`` (with ``seed=42``)

199 selects nodes 0 and 2 as sources.

200 This means only shortest paths starting at these nodes are considered.

201 The raw counts with endpoints are ``{0: 3, 1: 4, 2: 3}``. Accounting for the partial sum

202 and applying the undirectedness half-path correction, we get

203

204 >>> nx.betweenness_centrality(G, k=2, normalized=False, endpoints=True, seed=42)

205 {0: 2.25, 1: 3.0, 2: 2.25}

206

207 When normalizing, we instead want to divide by the total number of $(s, t)$-pairs.

208 This is $k(n - 1)$ with endpoints.

209

210 >>> nx.betweenness_centrality(G, k=2, normalized=True, endpoints=True, seed=42)

211 {0: 0.75, 1: 1.0, 2: 0.75}

212 """

213 betweenness = dict.fromkeys(G, 0.0) # b[v]=0 for v in G

214 if k == len(G):

215 # This is done for performance; the result is the same regardless.

216 k = None

217 if k is None:

218 nodes = G

219 else:

220 nodes = seed.sample(list(G.nodes()), k)

221 for s in nodes:

222 # single source shortest paths

223 if weight is None: # use BFS

224 S, P, sigma, _ = _single_source_shortest_path_basic(G, s)

225 else: # use Dijkstra's algorithm

226 S, P, sigma, _ = _single_source_dijkstra_path_basic(G, s, weight)

227 # accumulation

228 if endpoints:

229 betweenness, _ = _accumulate_endpoints(betweenness, S, P, sigma, s)

230 else:

231 betweenness, _ = _accumulate_basic(betweenness, S, P, sigma, s)

232 # rescaling

233 betweenness = _rescale(

234 betweenness,

235 len(G),

236 normalized=normalized,

237 directed=G.is_directed(),

238 endpoints=endpoints,

239 sampled_nodes=None if k is None else nodes,

240 )

241 return betweenness

242

243

244@py_random_state("seed")

245@nx._dispatchable(edge_attrs="weight")

246def edge_betweenness_centrality(G, k=None, normalized=True, weight=None, seed=None):

247 r"""Compute betweenness centrality for edges.

248

249 Betweenness centrality of an edge $e$ is the sum of the

250 fraction of all-pairs shortest paths that pass through $e$.

251

252 .. math::

253

254 c_B(e) = \sum_{s, t \in V} \frac{\sigma(s, t | e)}{\sigma(s, t)}

255

256 where $V$ is the set of nodes, $\sigma(s, t)$ is the number of

257 shortest $(s, t)$-paths, and $\sigma(s, t | e)$ is the number of

258 those paths passing through edge $e$ [1]_.

259 The denominator $\sigma(s, t)$ is a normalization factor that can be

260 turned off to get the raw path counts.

261

262 Parameters

263 ----------

264 G : graph

265 A NetworkX graph.

266

267 k : int, optional (default=None)

268 If `k` is not `None`, use `k` sampled nodes as sources for the considered paths.

269 The resulting sampled counts are then inflated to approximate betweenness.

270 Higher values of `k` give better approximation. Must have ``k <= len(G)``.

271

272 normalized : bool, optional (default=True)

273 If `True`, the betweenness values are rescaled by dividing by the number of

274 possible $(s, t)$-pairs in the graph.

275

276 weight : None or string, optional (default=None)

277 If `None`, all edge weights are 1.

278 Otherwise holds the name of the edge attribute used as weight.

279 Weights are used to calculate weighted shortest paths, so they are

280 interpreted as distances.

281

282 seed : integer, random_state, or None (default)

283 Indicator of random number generation state.

284 See :ref:`Randomness<randomness>`.

285 Note that this is only used if ``k is not None``.

286

287 Returns

288 -------

289 edges : dict

290 Dictionary of edges with betweenness centrality as the value.

291

292 See Also

293 --------

294 betweenness_centrality

295 edge_betweenness_centrality_subset

296 edge_load

297

298 Notes

299 -----

300 The algorithm is from Ulrik Brandes [1]_.

301

302 For weighted graphs the edge weights must be greater than zero.

303 Zero edge weights can produce an infinite number of equal length

304 paths between pairs of nodes.

305

306 References

307 ----------

308 .. [1] Ulrik Brandes: On Variants of Shortest-Path Betweenness

309 Centrality and their Generic Computation.

310 Social Networks 30(2):136--145, 2008.

311 https://doi.org/10.1016/j.socnet.2007.11.001

312

313 Examples

314 --------

315 Consider an undirected 3-path. Each pair of nodes has exactly one shortest

316 path between them. Since the graph is undirected, only ordered pairs are counted.

317 Each edge has two shortest paths passing through it.

318 As such, the raw counts should be ``{(0, 1): 2, (1, 2): 2}``.

319

320 >>> G = nx.path_graph(3)

321 >>> nx.edge_betweenness_centrality(G, normalized=False)

322 {(0, 1): 2.0, (1, 2): 2.0}

323

324 With normalization, the values are divided by the number of ordered $(s, t)$-pairs,

325 which is $n(n-1)/2$. For the 3-path, this is $3(3-1)/2 = 3$.

326

327 >>> nx.edge_betweenness_centrality(G, normalized=True)

328 {(0, 1): 0.6666666666666666, (1, 2): 0.6666666666666666}

329

330 For a directed graph, all $(s, t)$-pairs are considered. The normalization factor

331 is $n(n-1)$ to reflect this.

332

333 >>> DG = nx.path_graph(3, create_using=nx.DiGraph)

334 >>> nx.edge_betweenness_centrality(DG, normalized=False)

335 {(0, 1): 2.0, (1, 2): 2.0}

336 >>> nx.edge_betweenness_centrality(DG, normalized=True)

337 {(0, 1): 0.3333333333333333, (1, 2): 0.3333333333333333}

338

339 Computing the full edge betweenness centrality can be costly.

340 This function can also be used to compute approximate edge betweenness centrality

341 by setting `k`. This determines the number of source nodes to sample.

342

343 Since the partial sums only include `k` terms, instead of ``n``,

344 we multiply them by ``n / k``, to approximate the full sum.

345 As the sets of sources and targets are not the same anymore,

346 paths have to be counted in a directed way. We thus count each as half a path.

347 This ensures that the results approximate the standard betweenness for ``k == n``.

348

349 For instance, in the undirected 3-path graph case, setting ``k = 2`` (with ``seed=42``)

350 selects nodes 0 and 2 as sources.

351 This means only shortest paths starting at these nodes are considered.

352 The raw counts are ``{(0, 1): 3, (1, 2): 3}``. Accounting for the partial sum

353 and applying the undirectedness half-path correction, we get

354

355 >>> nx.edge_betweenness_centrality(G, k=2, normalized=False, seed=42)

356 {(0, 1): 2.25, (1, 2): 2.25}

357

358 When normalizing, we instead want to divide by the total number of $(s, t)$-pairs.

359 This is $k(n-1)$, which is $4$ in our case.

360

361 >>> nx.edge_betweenness_centrality(G, k=2, normalized=True, seed=42)

362 {(0, 1): 0.75, (1, 2): 0.75}

363 """

364 betweenness = dict.fromkeys(G, 0.0) # b[v]=0 for v in G

365 # b[e]=0 for e in G.edges()

366 betweenness.update(dict.fromkeys(G.edges(), 0.0))

367 if k is None:

368 nodes = G

369 else:

370 nodes = seed.sample(list(G.nodes()), k)

371 for s in nodes:

372 # single source shortest paths

373 if weight is None: # use BFS

374 S, P, sigma, _ = _single_source_shortest_path_basic(G, s)

375 else: # use Dijkstra's algorithm

376 S, P, sigma, _ = _single_source_dijkstra_path_basic(G, s, weight)

377 # accumulation

378 betweenness = _accumulate_edges(betweenness, S, P, sigma, s)

379 # rescaling

380 for n in G: # remove nodes to only return edges

381 del betweenness[n]

382 betweenness = _rescale(

383 betweenness,

384 len(G),

385 normalized=normalized,

386 directed=G.is_directed(),

387 sampled_nodes=None if k is None else nodes,

388 )

389 if G.is_multigraph():

390 betweenness = _add_edge_keys(G, betweenness, weight=weight)

391 return betweenness

392

393

394# helpers for betweenness centrality

395

396

397def _single_source_shortest_path_basic(G, s):

398 S = []

399 P = {}

400 for v in G:

401 P[v] = []

402 sigma = dict.fromkeys(G, 0.0) # sigma[v]=0 for v in G

403 D = {}

404 sigma[s] = 1.0

405 D[s] = 0

406 Q = deque([s])

407 while Q: # use BFS to find shortest paths

408 v = Q.popleft()

409 S.append(v)

410 Dv = D[v]

411 sigmav = sigma[v]

412 for w in G[v]:

413 if w not in D:

414 Q.append(w)

415 D[w] = Dv + 1

416 if D[w] == Dv + 1: # this is a shortest path, count paths

417 sigma[w] += sigmav

418 P[w].append(v) # predecessors

419 return S, P, sigma, D

420

421

422def _single_source_dijkstra_path_basic(G, s, weight):

423 weight = _weight_function(G, weight)

424 # modified from Eppstein

425 S = []

426 P = {}

427 for v in G:

428 P[v] = []

429 sigma = dict.fromkeys(G, 0.0) # sigma[v]=0 for v in G

430 D = {}

431 sigma[s] = 1.0

432 seen = {s: 0}

433 c = count()

434 Q = [] # use Q as heap with (distance,node id) tuples

435 heappush(Q, (0, next(c), s, s))

436 while Q:

437 (dist, _, pred, v) = heappop(Q)

438 if v in D:

439 continue # already searched this node.

440 sigma[v] += sigma[pred] # count paths

441 S.append(v)

442 D[v] = dist

443 for w, edgedata in G[v].items():

444 vw_dist = dist + weight(v, w, edgedata)

445 if w not in D and (w not in seen or vw_dist < seen[w]):

446 seen[w] = vw_dist

447 heappush(Q, (vw_dist, next(c), v, w))

448 sigma[w] = 0.0

449 P[w] = [v]

450 elif vw_dist == seen[w]: # handle equal paths

451 sigma[w] += sigma[v]

452 P[w].append(v)

453 return S, P, sigma, D

454

455

456def _accumulate_basic(betweenness, S, P, sigma, s):

457 delta = dict.fromkeys(S, 0)

458 while S:

459 w = S.pop()

460 coeff = (1 + delta[w]) / sigma[w]

461 for v in P[w]:

462 delta[v] += sigma[v] * coeff

463 if w != s:

464 betweenness[w] += delta[w]

465 return betweenness, delta

466

467

468def _accumulate_endpoints(betweenness, S, P, sigma, s):

469 betweenness[s] += len(S) - 1

470 delta = dict.fromkeys(S, 0)

471 while S:

472 w = S.pop()

473 coeff = (1 + delta[w]) / sigma[w]

474 for v in P[w]:

475 delta[v] += sigma[v] * coeff

476 if w != s:

477 betweenness[w] += delta[w] + 1

478 return betweenness, delta

479

480

481def _accumulate_edges(betweenness, S, P, sigma, s):

482 delta = dict.fromkeys(S, 0)

483 while S:

484 w = S.pop()

485 coeff = (1 + delta[w]) / sigma[w]

486 for v in P[w]:

487 c = sigma[v] * coeff

488 if (v, w) not in betweenness:

489 betweenness[(w, v)] += c

490 else:

491 betweenness[(v, w)] += c

492 delta[v] += c

493 if w != s:

494 betweenness[w] += delta[w]

495 return betweenness

496

497

498def _rescale(

499 betweenness, n, *, normalized, directed, endpoints=True, sampled_nodes=None

500):

501 # For edge betweenness, `endpoints` is always `True`.

502

503 k = None if sampled_nodes is None else len(sampled_nodes)

504 # N is used to count the number of valid (s, t) pairs where s != t that

505 # could have a path pass through v. If endpoints is False, then v must

506 # not be the target t, hence why we subtract by 1.

507 N = n if endpoints else n - 1

508 if N < 2:

509 # No rescaling necessary: b=0 for all nodes

510 return betweenness

511

512 K_source = N if k is None else k

513

514 if k is None or endpoints:

515 # No sampling adjustment needed

516 if normalized:

517 # Divide by the number of valid (s, t) node pairs that could have

518 # a path through v where s != t.

519 scale = 1 / (K_source * (N - 1))

520 else:

521 # Scale to the full BC

522 if not directed:

523 # The non-normalized BC values are computed the same way for

524 # directed and undirected graphs: shortest paths are computed and

525 # counted for each *ordered* (s, t) pair. Undirected graphs should

526 # only count valid *unordered* node pairs {s, t}; that is, (s, t)

527 # and (t, s) should be counted only once. We correct for this here.

528 correction = 2

529 else:

530 correction = 1

531 scale = N / (K_source * correction)

532

533 if scale != 1:

534 for v in betweenness:

535 betweenness[v] *= scale

536 return betweenness

537

538 # Sampling adjustment needed when excluding endpoints when using k. In this

539 # case, we need to handle source nodes differently from non-source nodes,

540 # because source nodes can't include themselves since endpoints are excluded.

541 # Without this, k == n would be a special case that would violate the

542 # assumption that node `v` is not one of the (s, t) node pairs.

543 if normalized:

544 # NaN for undefined 0/0; there is no data for source node when k=1

545 scale_source = 1 / ((K_source - 1) * (N - 1)) if K_source > 1 else math.nan

546 scale_nonsource = 1 / (K_source * (N - 1))

547 else:

548 correction = 1 if directed else 2

549 scale_source = N / ((K_source - 1) * correction) if K_source > 1 else math.nan

550 scale_nonsource = N / (K_source * correction)

551

552 sampled_nodes = set(sampled_nodes)

553 for v in betweenness:

554 betweenness[v] *= scale_source if v in sampled_nodes else scale_nonsource

555 return betweenness

556

557

558@not_implemented_for("graph")

559def _add_edge_keys(G, betweenness, weight=None):

560 r"""Adds the corrected betweenness centrality (BC) values for multigraphs.

561

562 Parameters

563 ----------

564 G : NetworkX graph.

565

566 betweenness : dictionary

567 Dictionary mapping adjacent node tuples to betweenness centrality values.

568

569 weight : string or function

570 See `_weight_function` for details. Defaults to `None`.

571

572 Returns

573 -------

574 edges : dictionary

575 The parameter `betweenness` including edges with keys and their

576 betweenness centrality values.

577

578 The BC value is divided among edges of equal weight.

579 """

580 _weight = _weight_function(G, weight)

581

582 edge_bc = dict.fromkeys(G.edges, 0.0)

583 for u, v in betweenness:

584 d = G[u][v]

585 wt = _weight(u, v, d)

586 keys = [k for k in d if _weight(u, v, {k: d[k]}) == wt]

587 bc = betweenness[(u, v)] / len(keys)

588 for k in keys:

589 edge_bc[(u, v, k)] = bc

590

591 return edge_bc