Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.9/dist-packages/networkx/algorithms/centrality/betweenness.py: 13%

1"""Betweenness centrality measures."""

2from collections import deque

3from heapq import heappop, heappush

4from itertools import count

6import networkx as nx

7from networkx.algorithms.shortest_paths.weighted import _weight_function

8from networkx.utils import py_random_state

9from networkx.utils.decorators import not_implemented_for

11__all__ = ["betweenness_centrality", "edge_betweenness_centrality"]

14@py_random_state(5)

15@nx._dispatch(edge_attrs="weight")

16def betweenness_centrality(

17 G, k=None, normalized=True, weight=None, endpoints=False, seed=None

18):

19 r"""Compute the shortest-path betweenness centrality for nodes.

21 Betweenness centrality of a node $v$ is the sum of the

22 fraction of all-pairs shortest paths that pass through $v$

24 .. math::

26 c_B(v) =\sum_{s,t \in V} \frac{\sigma(s, t|v)}{\sigma(s, t)}

28 where $V$ is the set of nodes, $\sigma(s, t)$ is the number of

29 shortest $(s, t)$-paths, and $\sigma(s, t|v)$ is the number of

30 those paths passing through some node $v$ other than $s, t$.

31 If $s = t$, $\sigma(s, t) = 1$, and if $v \in {s, t}$,

32 $\sigma(s, t|v) = 0$ [2]_.

34 Parameters

35 ----------

36 G : graph

37 A NetworkX graph.

39 k : int, optional (default=None)

40 If k is not None use k node samples to estimate betweenness.

41 The value of k <= n where n is the number of nodes in the graph.

42 Higher values give better approximation.

44 normalized : bool, optional

45 If True the betweenness values are normalized by `2/((n-1)(n-2))`

46 for graphs, and `1/((n-1)(n-2))` for directed graphs where `n`

47 is the number of nodes in G.

49 weight : None or string, optional (default=None)

50 If None, all edge weights are considered equal.

51 Otherwise holds the name of the edge attribute used as weight.

52 Weights are used to calculate weighted shortest paths, so they are

53 interpreted as distances.

55 endpoints : bool, optional

56 If True include the endpoints in the shortest path counts.

58 seed : integer, random_state, or None (default)

59 Indicator of random number generation state.

60 See :ref:`Randomness<randomness>`.

61 Note that this is only used if k is not None.

63 Returns

64 -------

65 nodes : dictionary

66 Dictionary of nodes with betweenness centrality as the value.

68 See Also

69 --------

70 edge_betweenness_centrality

71 load_centrality

73 Notes

74 -----

75 The algorithm is from Ulrik Brandes [1]_.

76 See [4]_ for the original first published version and [2]_ for details on

77 algorithms for variations and related metrics.

79 For approximate betweenness calculations set k=#samples to use

80 k nodes ("pivots") to estimate the betweenness values. For an estimate

81 of the number of pivots needed see [3]_.

83 For weighted graphs the edge weights must be greater than zero.

84 Zero edge weights can produce an infinite number of equal length

85 paths between pairs of nodes.

87 The total number of paths between source and target is counted

88 differently for directed and undirected graphs. Directed paths

89 are easy to count. Undirected paths are tricky: should a path

90 from "u" to "v" count as 1 undirected path or as 2 directed paths?

92 For betweenness_centrality we report the number of undirected

93 paths when G is undirected.

95 For betweenness_centrality_subset the reporting is different.

96 If the source and target subsets are the same, then we want

97 to count undirected paths. But if the source and target subsets

98 differ -- for example, if sources is {0} and targets is {1},

99 then we are only counting the paths in one direction. They are

100 undirected paths but we are counting them in a directed way.

101 To count them as undirected paths, each should count as half a path.

102

103 This algorithm is not guaranteed to be correct if edge weights

104 are floating point numbers. As a workaround you can use integer

105 numbers by multiplying the relevant edge attributes by a convenient

106 constant factor (eg 100) and converting to integers.

107

108 References

109 ----------

110 .. [1] Ulrik Brandes:

111 A Faster Algorithm for Betweenness Centrality.

112 Journal of Mathematical Sociology 25(2):163-177, 2001.

113 https://doi.org/10.1080/0022250X.2001.9990249

114 .. [2] Ulrik Brandes:

115 On Variants of Shortest-Path Betweenness

116 Centrality and their Generic Computation.

117 Social Networks 30(2):136-145, 2008.

118 https://doi.org/10.1016/j.socnet.2007.11.001

119 .. [3] Ulrik Brandes and Christian Pich:

120 Centrality Estimation in Large Networks.

121 International Journal of Bifurcation and Chaos 17(7):2303-2318, 2007.

122 https://dx.doi.org/10.1142/S0218127407018403

123 .. [4] Linton C. Freeman:

124 A set of measures of centrality based on betweenness.

125 Sociometry 40: 35–41, 1977

126 https://doi.org/10.2307/3033543

127 """

128 betweenness = dict.fromkeys(G, 0.0) # b[v]=0 for v in G

129 if k is None:

130 nodes = G

131 else:

132 nodes = seed.sample(list(G.nodes()), k)

133 for s in nodes:

134 # single source shortest paths

135 if weight is None: # use BFS

136 S, P, sigma, _ = _single_source_shortest_path_basic(G, s)

137 else: # use Dijkstra's algorithm

138 S, P, sigma, _ = _single_source_dijkstra_path_basic(G, s, weight)

139 # accumulation

140 if endpoints:

141 betweenness, _ = _accumulate_endpoints(betweenness, S, P, sigma, s)

142 else:

143 betweenness, _ = _accumulate_basic(betweenness, S, P, sigma, s)

144 # rescaling

145 betweenness = _rescale(

146 betweenness,

147 len(G),

148 normalized=normalized,

149 directed=G.is_directed(),

150 k=k,

151 endpoints=endpoints,

152 )

153 return betweenness

154

155

156@py_random_state(4)

157@nx._dispatch(edge_attrs="weight")

158def edge_betweenness_centrality(G, k=None, normalized=True, weight=None, seed=None):

159 r"""Compute betweenness centrality for edges.

160

161 Betweenness centrality of an edge $e$ is the sum of the

162 fraction of all-pairs shortest paths that pass through $e$

163

164 .. math::

165

166 c_B(e) =\sum_{s,t \in V} \frac{\sigma(s, t|e)}{\sigma(s, t)}

167

168 where $V$ is the set of nodes, $\sigma(s, t)$ is the number of

169 shortest $(s, t)$-paths, and $\sigma(s, t|e)$ is the number of

170 those paths passing through edge $e$ [2]_.

171

172 Parameters

173 ----------

174 G : graph

175 A NetworkX graph.

176

177 k : int, optional (default=None)

178 If k is not None use k node samples to estimate betweenness.

179 The value of k <= n where n is the number of nodes in the graph.

180 Higher values give better approximation.

181

182 normalized : bool, optional

183 If True the betweenness values are normalized by $2/(n(n-1))$

184 for graphs, and $1/(n(n-1))$ for directed graphs where $n$

185 is the number of nodes in G.

186

187 weight : None or string, optional (default=None)

188 If None, all edge weights are considered equal.

189 Otherwise holds the name of the edge attribute used as weight.

190 Weights are used to calculate weighted shortest paths, so they are

191 interpreted as distances.

192

193 seed : integer, random_state, or None (default)

194 Indicator of random number generation state.

195 See :ref:`Randomness<randomness>`.

196 Note that this is only used if k is not None.

197

198 Returns

199 -------

200 edges : dictionary

201 Dictionary of edges with betweenness centrality as the value.

202

203 See Also

204 --------

205 betweenness_centrality

206 edge_load

207

208 Notes

209 -----

210 The algorithm is from Ulrik Brandes [1]_.

211

212 For weighted graphs the edge weights must be greater than zero.

213 Zero edge weights can produce an infinite number of equal length

214 paths between pairs of nodes.

215

216 References

217 ----------

218 .. [1] A Faster Algorithm for Betweenness Centrality. Ulrik Brandes,

219 Journal of Mathematical Sociology 25(2):163-177, 2001.

220 https://doi.org/10.1080/0022250X.2001.9990249

221 .. [2] Ulrik Brandes: On Variants of Shortest-Path Betweenness

222 Centrality and their Generic Computation.

223 Social Networks 30(2):136-145, 2008.

224 https://doi.org/10.1016/j.socnet.2007.11.001

225 """

226 betweenness = dict.fromkeys(G, 0.0) # b[v]=0 for v in G

227 # b[e]=0 for e in G.edges()

228 betweenness.update(dict.fromkeys(G.edges(), 0.0))

229 if k is None:

230 nodes = G

231 else:

232 nodes = seed.sample(list(G.nodes()), k)

233 for s in nodes:

234 # single source shortest paths

235 if weight is None: # use BFS

236 S, P, sigma, _ = _single_source_shortest_path_basic(G, s)

237 else: # use Dijkstra's algorithm

238 S, P, sigma, _ = _single_source_dijkstra_path_basic(G, s, weight)

239 # accumulation

240 betweenness = _accumulate_edges(betweenness, S, P, sigma, s)

241 # rescaling

242 for n in G: # remove nodes to only return edges

243 del betweenness[n]

244 betweenness = _rescale_e(

245 betweenness, len(G), normalized=normalized, directed=G.is_directed()

246 )

247 if G.is_multigraph():

248 betweenness = _add_edge_keys(G, betweenness, weight=weight)

249 return betweenness

250

251

252# helpers for betweenness centrality

253

254

255def _single_source_shortest_path_basic(G, s):

256 S = []

257 P = {}

258 for v in G:

259 P[v] = []

260 sigma = dict.fromkeys(G, 0.0) # sigma[v]=0 for v in G

261 D = {}

262 sigma[s] = 1.0

263 D[s] = 0

264 Q = deque([s])

265 while Q: # use BFS to find shortest paths

266 v = Q.popleft()

267 S.append(v)

268 Dv = D[v]

269 sigmav = sigma[v]

270 for w in G[v]:

271 if w not in D:

272 Q.append(w)

273 D[w] = Dv + 1

274 if D[w] == Dv + 1: # this is a shortest path, count paths

275 sigma[w] += sigmav

276 P[w].append(v) # predecessors

277 return S, P, sigma, D

278

279

280def _single_source_dijkstra_path_basic(G, s, weight):

281 weight = _weight_function(G, weight)

282 # modified from Eppstein

283 S = []

284 P = {}

285 for v in G:

286 P[v] = []

287 sigma = dict.fromkeys(G, 0.0) # sigma[v]=0 for v in G

288 D = {}

289 sigma[s] = 1.0

290 push = heappush

291 pop = heappop

292 seen = {s: 0}

293 c = count()

294 Q = [] # use Q as heap with (distance,node id) tuples

295 push(Q, (0, next(c), s, s))

296 while Q:

297 (dist, _, pred, v) = pop(Q)

298 if v in D:

299 continue # already searched this node.

300 sigma[v] += sigma[pred] # count paths

301 S.append(v)

302 D[v] = dist

303 for w, edgedata in G[v].items():

304 vw_dist = dist + weight(v, w, edgedata)

305 if w not in D and (w not in seen or vw_dist < seen[w]):

306 seen[w] = vw_dist

307 push(Q, (vw_dist, next(c), v, w))

308 sigma[w] = 0.0

309 P[w] = [v]

310 elif vw_dist == seen[w]: # handle equal paths

311 sigma[w] += sigma[v]

312 P[w].append(v)

313 return S, P, sigma, D

314

315

316def _accumulate_basic(betweenness, S, P, sigma, s):

317 delta = dict.fromkeys(S, 0)

318 while S:

319 w = S.pop()

320 coeff = (1 + delta[w]) / sigma[w]

321 for v in P[w]:

322 delta[v] += sigma[v] * coeff

323 if w != s:

324 betweenness[w] += delta[w]

325 return betweenness, delta

326

327

328def _accumulate_endpoints(betweenness, S, P, sigma, s):

329 betweenness[s] += len(S) - 1

330 delta = dict.fromkeys(S, 0)

331 while S:

332 w = S.pop()

333 coeff = (1 + delta[w]) / sigma[w]

334 for v in P[w]:

335 delta[v] += sigma[v] * coeff

336 if w != s:

337 betweenness[w] += delta[w] + 1

338 return betweenness, delta

339

340

341def _accumulate_edges(betweenness, S, P, sigma, s):

342 delta = dict.fromkeys(S, 0)

343 while S:

344 w = S.pop()

345 coeff = (1 + delta[w]) / sigma[w]

346 for v in P[w]:

347 c = sigma[v] * coeff

348 if (v, w) not in betweenness:

349 betweenness[(w, v)] += c

350 else:

351 betweenness[(v, w)] += c

352 delta[v] += c

353 if w != s:

354 betweenness[w] += delta[w]

355 return betweenness

356

357

358def _rescale(betweenness, n, normalized, directed=False, k=None, endpoints=False):

359 if normalized:

360 if endpoints:

361 if n < 2:

362 scale = None # no normalization

363 else:

364 # Scale factor should include endpoint nodes

365 scale = 1 / (n * (n - 1))

366 elif n <= 2:

367 scale = None # no normalization b=0 for all nodes

368 else:

369 scale = 1 / ((n - 1) * (n - 2))

370 else: # rescale by 2 for undirected graphs

371 if not directed:

372 scale = 0.5

373 else:

374 scale = None

375 if scale is not None:

376 if k is not None:

377 scale = scale * n / k

378 for v in betweenness:

379 betweenness[v] *= scale

380 return betweenness

381

382

383def _rescale_e(betweenness, n, normalized, directed=False, k=None):

384 if normalized:

385 if n <= 1:

386 scale = None # no normalization b=0 for all nodes

387 else:

388 scale = 1 / (n * (n - 1))

389 else: # rescale by 2 for undirected graphs

390 if not directed:

391 scale = 0.5

392 else:

393 scale = None

394 if scale is not None:

395 if k is not None:

396 scale = scale * n / k

397 for v in betweenness:

398 betweenness[v] *= scale

399 return betweenness

400

401

402@not_implemented_for("graph")

403def _add_edge_keys(G, betweenness, weight=None):

404 r"""Adds the corrected betweenness centrality (BC) values for multigraphs.

405

406 Parameters

407 ----------

408 G : NetworkX graph.

409

410 betweenness : dictionary

411 Dictionary mapping adjacent node tuples to betweenness centrality values.

412

413 weight : string or function

414 See `_weight_function` for details. Defaults to `None`.

415

416 Returns

417 -------

418 edges : dictionary

419 The parameter `betweenness` including edges with keys and their

420 betweenness centrality values.

421

422 The BC value is divided among edges of equal weight.

423 """

424 _weight = _weight_function(G, weight)

425

426 edge_bc = dict.fromkeys(G.edges, 0.0)

427 for u, v in betweenness:

428 d = G[u][v]

429 wt = _weight(u, v, d)

430 keys = [k for k in d if _weight(u, v, {k: d[k]}) == wt]

431 bc = betweenness[(u, v)] / len(keys)

432 for k in keys:

433 edge_bc[(u, v, k)] = bc

434

435 return edge_bc