Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/networkx/algorithms/centrality/betweenness.py: 14%

1"""Betweenness centrality measures."""

3import math

4from collections import deque

5from heapq import heappop, heappush

6from itertools import count

8import networkx as nx

9from networkx.algorithms.shortest_paths.weighted import _weight_function

10from networkx.utils import py_random_state

11from networkx.utils.decorators import not_implemented_for

13__all__ = ["betweenness_centrality", "edge_betweenness_centrality"]

16@py_random_state(5)

17@nx._dispatchable(edge_attrs="weight")

18def betweenness_centrality(

19 G, k=None, normalized=True, weight=None, endpoints=False, seed=None

20):

21 r"""Compute the shortest-path betweenness centrality for nodes.

23 Betweenness centrality of a node $v$ is the sum of the

24 fraction of all-pairs shortest paths that pass through $v$

26 .. math::

28 c_B(v) =\sum_{s,t \in V} \frac{\sigma(s, t|v)}{\sigma(s, t)}

30 where $V$ is the set of nodes, $\sigma(s, t)$ is the number of

31 shortest $(s, t)$-paths, and $\sigma(s, t|v)$ is the number of

32 those paths passing through some node $v$ other than $s, t$.

33 If $s = t$, $\sigma(s, t) = 1$, and if $v \in {s, t}$,

34 $\sigma(s, t|v) = 0$ [2]_.

36 Parameters

37 ----------

38 G : graph

39 A NetworkX graph.

41 k : int, optional (default=None)

42 If k is not None use k node samples to estimate betweenness.

43 The value of k <= n where n is the number of nodes in the graph.

44 Higher values give better approximation.

46 normalized : bool, optional

47 If True the betweenness values are normalized by `2/((n-1)(n-2))`

48 for graphs, and `1/((n-1)(n-2))` for directed graphs where `n`

49 is the number of nodes in G.

51 weight : None or string, optional (default=None)

52 If None, all edge weights are considered equal.

53 Otherwise holds the name of the edge attribute used as weight.

54 Weights are used to calculate weighted shortest paths, so they are

55 interpreted as distances.

57 endpoints : bool, optional

58 If True include the endpoints in the shortest path counts.

60 seed : integer, random_state, or None (default)

61 Indicator of random number generation state.

62 See :ref:`Randomness<randomness>`.

63 Note that this is only used if k is not None.

65 Returns

66 -------

67 nodes : dictionary

68 Dictionary of nodes with betweenness centrality as the value.

70 See Also

71 --------

72 edge_betweenness_centrality

73 load_centrality

75 Notes

76 -----

77 The algorithm is from Ulrik Brandes [1]_.

78 See [4]_ for the original first published version and [2]_ for details on

79 algorithms for variations and related metrics.

81 For approximate betweenness calculations set k=#samples to use

82 k nodes ("pivots") to estimate the betweenness values. For an estimate

83 of the number of pivots needed see [3]_.

85 For weighted graphs the edge weights must be greater than zero.

86 Zero edge weights can produce an infinite number of equal length

87 paths between pairs of nodes.

89 The total number of paths between source and target is counted

90 differently for directed and undirected graphs. Directed paths

91 are easy to count. Undirected paths are tricky: should a path

92 from "u" to "v" count as 1 undirected path or as 2 directed paths?

94 For betweenness_centrality we report the number of undirected

95 paths when G is undirected.

97 For betweenness_centrality_subset the reporting is different.

98 If the source and target subsets are the same, then we want

99 to count undirected paths. But if the source and target subsets

100 differ -- for example, if sources is {0} and targets is {1},

101 then we are only counting the paths in one direction. They are

102 undirected paths but we are counting them in a directed way.

103 To count them as undirected paths, each should count as half a path.

104

105 This algorithm is not guaranteed to be correct if edge weights

106 are floating point numbers. As a workaround you can use integer

107 numbers by multiplying the relevant edge attributes by a convenient

108 constant factor (eg 100) and converting to integers.

109

110 References

111 ----------

112 .. [1] Ulrik Brandes:

113 A Faster Algorithm for Betweenness Centrality.

114 Journal of Mathematical Sociology 25(2):163-177, 2001.

115 https://doi.org/10.1080/0022250X.2001.9990249

116 .. [2] Ulrik Brandes:

117 On Variants of Shortest-Path Betweenness

118 Centrality and their Generic Computation.

119 Social Networks 30(2):136-145, 2008.

120 https://doi.org/10.1016/j.socnet.2007.11.001

121 .. [3] Ulrik Brandes and Christian Pich:

122 Centrality Estimation in Large Networks.

123 International Journal of Bifurcation and Chaos 17(7):2303-2318, 2007.

124 https://dx.doi.org/10.1142/S0218127407018403

125 .. [4] Linton C. Freeman:

126 A set of measures of centrality based on betweenness.

127 Sociometry 40: 35–41, 1977

128 https://doi.org/10.2307/3033543

129 """

130 betweenness = dict.fromkeys(G, 0.0) # b[v]=0 for v in G

131 if k == len(G):

132 # This is done for performance; the result is the same regardless.

133 k = None

134 if k is None:

135 nodes = G

136 else:

137 nodes = seed.sample(list(G.nodes()), k)

138 for s in nodes:

139 # single source shortest paths

140 if weight is None: # use BFS

141 S, P, sigma, _ = _single_source_shortest_path_basic(G, s)

142 else: # use Dijkstra's algorithm

143 S, P, sigma, _ = _single_source_dijkstra_path_basic(G, s, weight)

144 # accumulation

145 if endpoints:

146 betweenness, _ = _accumulate_endpoints(betweenness, S, P, sigma, s)

147 else:

148 betweenness, _ = _accumulate_basic(betweenness, S, P, sigma, s)

149 # rescaling

150 betweenness = _rescale(

151 betweenness,

152 len(G),

153 normalized=normalized,

154 directed=G.is_directed(),

155 endpoints=endpoints,

156 sampled_nodes=None if k is None else nodes,

157 )

158 return betweenness

159

160

161@py_random_state(4)

162@nx._dispatchable(edge_attrs="weight")

163def edge_betweenness_centrality(G, k=None, normalized=True, weight=None, seed=None):

164 r"""Compute betweenness centrality for edges.

165

166 Betweenness centrality of an edge $e$ is the sum of the

167 fraction of all-pairs shortest paths that pass through $e$

168

169 .. math::

170

171 c_B(e) =\sum_{s,t \in V} \frac{\sigma(s, t|e)}{\sigma(s, t)}

172

173 where $V$ is the set of nodes, $\sigma(s, t)$ is the number of

174 shortest $(s, t)$-paths, and $\sigma(s, t|e)$ is the number of

175 those paths passing through edge $e$ [2]_.

176

177 Parameters

178 ----------

179 G : graph

180 A NetworkX graph.

181

182 k : int, optional (default=None)

183 If k is not None use k node samples to estimate betweenness.

184 The value of k <= n where n is the number of nodes in the graph.

185 Higher values give better approximation.

186

187 normalized : bool, optional

188 If True the betweenness values are normalized by $2/(n(n-1))$

189 for graphs, and $1/(n(n-1))$ for directed graphs where $n$

190 is the number of nodes in G.

191

192 weight : None or string, optional (default=None)

193 If None, all edge weights are considered equal.

194 Otherwise holds the name of the edge attribute used as weight.

195 Weights are used to calculate weighted shortest paths, so they are

196 interpreted as distances.

197

198 seed : integer, random_state, or None (default)

199 Indicator of random number generation state.

200 See :ref:`Randomness<randomness>`.

201 Note that this is only used if k is not None.

202

203 Returns

204 -------

205 edges : dictionary

206 Dictionary of edges with betweenness centrality as the value.

207

208 See Also

209 --------

210 betweenness_centrality

211 edge_load

212

213 Notes

214 -----

215 The algorithm is from Ulrik Brandes [1]_.

216

217 For weighted graphs the edge weights must be greater than zero.

218 Zero edge weights can produce an infinite number of equal length

219 paths between pairs of nodes.

220

221 References

222 ----------

223 .. [1] Ulrik Brandes: On Variants of Shortest-Path Betweenness

224 Centrality and their Generic Computation.

225 Social Networks 30(2):136-145, 2008.

226 https://doi.org/10.1016/j.socnet.2007.11.001

227 """

228 betweenness = dict.fromkeys(G, 0.0) # b[v]=0 for v in G

229 # b[e]=0 for e in G.edges()

230 betweenness.update(dict.fromkeys(G.edges(), 0.0))

231 if k is None:

232 nodes = G

233 else:

234 nodes = seed.sample(list(G.nodes()), k)

235 for s in nodes:

236 # single source shortest paths

237 if weight is None: # use BFS

238 S, P, sigma, _ = _single_source_shortest_path_basic(G, s)

239 else: # use Dijkstra's algorithm

240 S, P, sigma, _ = _single_source_dijkstra_path_basic(G, s, weight)

241 # accumulation

242 betweenness = _accumulate_edges(betweenness, S, P, sigma, s)

243 # rescaling

244 for n in G: # remove nodes to only return edges

245 del betweenness[n]

246 betweenness = _rescale(

247 betweenness,

248 len(G),

249 normalized=normalized,

250 directed=G.is_directed(),

251 sampled_nodes=None if k is None else nodes,

252 )

253 if G.is_multigraph():

254 betweenness = _add_edge_keys(G, betweenness, weight=weight)

255 return betweenness

256

257

258# helpers for betweenness centrality

259

260

261def _single_source_shortest_path_basic(G, s):

262 S = []

263 P = {}

264 for v in G:

265 P[v] = []

266 sigma = dict.fromkeys(G, 0.0) # sigma[v]=0 for v in G

267 D = {}

268 sigma[s] = 1.0

269 D[s] = 0

270 Q = deque([s])

271 while Q: # use BFS to find shortest paths

272 v = Q.popleft()

273 S.append(v)

274 Dv = D[v]

275 sigmav = sigma[v]

276 for w in G[v]:

277 if w not in D:

278 Q.append(w)

279 D[w] = Dv + 1

280 if D[w] == Dv + 1: # this is a shortest path, count paths

281 sigma[w] += sigmav

282 P[w].append(v) # predecessors

283 return S, P, sigma, D

284

285

286def _single_source_dijkstra_path_basic(G, s, weight):

287 weight = _weight_function(G, weight)

288 # modified from Eppstein

289 S = []

290 P = {}

291 for v in G:

292 P[v] = []

293 sigma = dict.fromkeys(G, 0.0) # sigma[v]=0 for v in G

294 D = {}

295 sigma[s] = 1.0

296 seen = {s: 0}

297 c = count()

298 Q = [] # use Q as heap with (distance,node id) tuples

299 heappush(Q, (0, next(c), s, s))

300 while Q:

301 (dist, _, pred, v) = heappop(Q)

302 if v in D:

303 continue # already searched this node.

304 sigma[v] += sigma[pred] # count paths

305 S.append(v)

306 D[v] = dist

307 for w, edgedata in G[v].items():

308 vw_dist = dist + weight(v, w, edgedata)

309 if w not in D and (w not in seen or vw_dist < seen[w]):

310 seen[w] = vw_dist

311 heappush(Q, (vw_dist, next(c), v, w))

312 sigma[w] = 0.0

313 P[w] = [v]

314 elif vw_dist == seen[w]: # handle equal paths

315 sigma[w] += sigma[v]

316 P[w].append(v)

317 return S, P, sigma, D

318

319

320def _accumulate_basic(betweenness, S, P, sigma, s):

321 delta = dict.fromkeys(S, 0)

322 while S:

323 w = S.pop()

324 coeff = (1 + delta[w]) / sigma[w]

325 for v in P[w]:

326 delta[v] += sigma[v] * coeff

327 if w != s:

328 betweenness[w] += delta[w]

329 return betweenness, delta

330

331

332def _accumulate_endpoints(betweenness, S, P, sigma, s):

333 betweenness[s] += len(S) - 1

334 delta = dict.fromkeys(S, 0)

335 while S:

336 w = S.pop()

337 coeff = (1 + delta[w]) / sigma[w]

338 for v in P[w]:

339 delta[v] += sigma[v] * coeff

340 if w != s:

341 betweenness[w] += delta[w] + 1

342 return betweenness, delta

343

344

345def _accumulate_edges(betweenness, S, P, sigma, s):

346 delta = dict.fromkeys(S, 0)

347 while S:

348 w = S.pop()

349 coeff = (1 + delta[w]) / sigma[w]

350 for v in P[w]:

351 c = sigma[v] * coeff

352 if (v, w) not in betweenness:

353 betweenness[(w, v)] += c

354 else:

355 betweenness[(v, w)] += c

356 delta[v] += c

357 if w != s:

358 betweenness[w] += delta[w]

359 return betweenness

360

361

362def _rescale(

363 betweenness, n, *, normalized, directed, endpoints=True, sampled_nodes=None

364):

365 # For edge betweenness, `endpoints` is always `True`.

366

367 k = None if sampled_nodes is None else len(sampled_nodes)

368 # N is used to count the number of valid (s, t) pairs where s != t that

369 # could have a path pass through v. If endpoints is False, then v must

370 # not be the target t, hence why we subtract by 1.

371 N = n if endpoints else n - 1

372 if N < 2:

373 # No rescaling necessary: b=0 for all nodes

374 return betweenness

375

376 K_source = N if k is None else k

377

378 if k is None or endpoints:

379 # No sampling adjustment needed

380 if normalized:

381 # Divide by the number of valid (s, t) node pairs that could have

382 # a path through v where s != t.

383 scale = 1 / (K_source * (N - 1))

384 else:

385 # Scale to the full BC

386 if not directed:

387 # The non-normalized BC values are computed the same way for

388 # directed and undirected graphs: shortest paths are computed and

389 # counted for each *ordered* (s, t) pair. Undirected graphs should

390 # only count valid *unordered* node pairs {s, t}; that is, (s, t)

391 # and (t, s) should be counted only once. We correct for this here.

392 correction = 2

393 else:

394 correction = 1

395 scale = N / (K_source * correction)

396

397 if scale != 1:

398 for v in betweenness:

399 betweenness[v] *= scale

400 return betweenness

401

402 # Sampling adjustment needed when excluding endpoints when using k. In this

403 # case, we need to handle source nodes differently from non-source nodes,

404 # because source nodes can't include themselves since endpoints are excluded.

405 # Without this, k == n would be a special case that would violate the

406 # assumption that node `v` is not one of the (s, t) node pairs.

407 if normalized:

408 # NaN for undefined 0/0; there is no data for source node when k=1

409 scale_source = 1 / ((K_source - 1) * (N - 1)) if K_source > 1 else math.nan

410 scale_nonsource = 1 / (K_source * (N - 1))

411 else:

412 correction = 1 if directed else 2

413 scale_source = N / ((K_source - 1) * correction) if K_source > 1 else math.nan

414 scale_nonsource = N / (K_source * correction)

415

416 sampled_nodes = set(sampled_nodes)

417 for v in betweenness:

418 betweenness[v] *= scale_source if v in sampled_nodes else scale_nonsource

419 return betweenness

420

421

422@not_implemented_for("graph")

423def _add_edge_keys(G, betweenness, weight=None):

424 r"""Adds the corrected betweenness centrality (BC) values for multigraphs.

425

426 Parameters

427 ----------

428 G : NetworkX graph.

429

430 betweenness : dictionary

431 Dictionary mapping adjacent node tuples to betweenness centrality values.

432

433 weight : string or function

434 See `_weight_function` for details. Defaults to `None`.

435

436 Returns

437 -------

438 edges : dictionary

439 The parameter `betweenness` including edges with keys and their

440 betweenness centrality values.

441

442 The BC value is divided among edges of equal weight.

443 """

444 _weight = _weight_function(G, weight)

445

446 edge_bc = dict.fromkeys(G.edges, 0.0)

447 for u, v in betweenness:

448 d = G[u][v]

449 wt = _weight(u, v, d)

450 keys = [k for k in d if _weight(u, v, {k: d[k]}) == wt]

451 bc = betweenness[(u, v)] / len(keys)

452 for k in keys:

453 edge_bc[(u, v, k)] = bc

454

455 return edge_bc