Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.9/dist-packages/networkx/algorithms/centrality/betweenness.py: 13%

178 statements  

« prev     ^ index     » next       coverage.py v7.3.2, created at 2023-10-20 07:00 +0000

1"""Betweenness centrality measures.""" 

2from collections import deque 

3from heapq import heappop, heappush 

4from itertools import count 

5 

6import networkx as nx 

7from networkx.algorithms.shortest_paths.weighted import _weight_function 

8from networkx.utils import py_random_state 

9from networkx.utils.decorators import not_implemented_for 

10 

11__all__ = ["betweenness_centrality", "edge_betweenness_centrality"] 

12 

13 

14@py_random_state(5) 

15@nx._dispatch(edge_attrs="weight") 

16def betweenness_centrality( 

17 G, k=None, normalized=True, weight=None, endpoints=False, seed=None 

18): 

19 r"""Compute the shortest-path betweenness centrality for nodes. 

20 

21 Betweenness centrality of a node $v$ is the sum of the 

22 fraction of all-pairs shortest paths that pass through $v$ 

23 

24 .. math:: 

25 

26 c_B(v) =\sum_{s,t \in V} \frac{\sigma(s, t|v)}{\sigma(s, t)} 

27 

28 where $V$ is the set of nodes, $\sigma(s, t)$ is the number of 

29 shortest $(s, t)$-paths, and $\sigma(s, t|v)$ is the number of 

30 those paths passing through some node $v$ other than $s, t$. 

31 If $s = t$, $\sigma(s, t) = 1$, and if $v \in {s, t}$, 

32 $\sigma(s, t|v) = 0$ [2]_. 

33 

34 Parameters 

35 ---------- 

36 G : graph 

37 A NetworkX graph. 

38 

39 k : int, optional (default=None) 

40 If k is not None use k node samples to estimate betweenness. 

41 The value of k <= n where n is the number of nodes in the graph. 

42 Higher values give better approximation. 

43 

44 normalized : bool, optional 

45 If True the betweenness values are normalized by `2/((n-1)(n-2))` 

46 for graphs, and `1/((n-1)(n-2))` for directed graphs where `n` 

47 is the number of nodes in G. 

48 

49 weight : None or string, optional (default=None) 

50 If None, all edge weights are considered equal. 

51 Otherwise holds the name of the edge attribute used as weight. 

52 Weights are used to calculate weighted shortest paths, so they are 

53 interpreted as distances. 

54 

55 endpoints : bool, optional 

56 If True include the endpoints in the shortest path counts. 

57 

58 seed : integer, random_state, or None (default) 

59 Indicator of random number generation state. 

60 See :ref:`Randomness<randomness>`. 

61 Note that this is only used if k is not None. 

62 

63 Returns 

64 ------- 

65 nodes : dictionary 

66 Dictionary of nodes with betweenness centrality as the value. 

67 

68 See Also 

69 -------- 

70 edge_betweenness_centrality 

71 load_centrality 

72 

73 Notes 

74 ----- 

75 The algorithm is from Ulrik Brandes [1]_. 

76 See [4]_ for the original first published version and [2]_ for details on 

77 algorithms for variations and related metrics. 

78 

79 For approximate betweenness calculations set k=#samples to use 

80 k nodes ("pivots") to estimate the betweenness values. For an estimate 

81 of the number of pivots needed see [3]_. 

82 

83 For weighted graphs the edge weights must be greater than zero. 

84 Zero edge weights can produce an infinite number of equal length 

85 paths between pairs of nodes. 

86 

87 The total number of paths between source and target is counted 

88 differently for directed and undirected graphs. Directed paths 

89 are easy to count. Undirected paths are tricky: should a path 

90 from "u" to "v" count as 1 undirected path or as 2 directed paths? 

91 

92 For betweenness_centrality we report the number of undirected 

93 paths when G is undirected. 

94 

95 For betweenness_centrality_subset the reporting is different. 

96 If the source and target subsets are the same, then we want 

97 to count undirected paths. But if the source and target subsets 

98 differ -- for example, if sources is {0} and targets is {1}, 

99 then we are only counting the paths in one direction. They are 

100 undirected paths but we are counting them in a directed way. 

101 To count them as undirected paths, each should count as half a path. 

102 

103 This algorithm is not guaranteed to be correct if edge weights 

104 are floating point numbers. As a workaround you can use integer 

105 numbers by multiplying the relevant edge attributes by a convenient 

106 constant factor (eg 100) and converting to integers. 

107 

108 References 

109 ---------- 

110 .. [1] Ulrik Brandes: 

111 A Faster Algorithm for Betweenness Centrality. 

112 Journal of Mathematical Sociology 25(2):163-177, 2001. 

113 https://doi.org/10.1080/0022250X.2001.9990249 

114 .. [2] Ulrik Brandes: 

115 On Variants of Shortest-Path Betweenness 

116 Centrality and their Generic Computation. 

117 Social Networks 30(2):136-145, 2008. 

118 https://doi.org/10.1016/j.socnet.2007.11.001 

119 .. [3] Ulrik Brandes and Christian Pich: 

120 Centrality Estimation in Large Networks. 

121 International Journal of Bifurcation and Chaos 17(7):2303-2318, 2007. 

122 https://dx.doi.org/10.1142/S0218127407018403 

123 .. [4] Linton C. Freeman: 

124 A set of measures of centrality based on betweenness. 

125 Sociometry 40: 35–41, 1977 

126 https://doi.org/10.2307/3033543 

127 """ 

128 betweenness = dict.fromkeys(G, 0.0) # b[v]=0 for v in G 

129 if k is None: 

130 nodes = G 

131 else: 

132 nodes = seed.sample(list(G.nodes()), k) 

133 for s in nodes: 

134 # single source shortest paths 

135 if weight is None: # use BFS 

136 S, P, sigma, _ = _single_source_shortest_path_basic(G, s) 

137 else: # use Dijkstra's algorithm 

138 S, P, sigma, _ = _single_source_dijkstra_path_basic(G, s, weight) 

139 # accumulation 

140 if endpoints: 

141 betweenness, _ = _accumulate_endpoints(betweenness, S, P, sigma, s) 

142 else: 

143 betweenness, _ = _accumulate_basic(betweenness, S, P, sigma, s) 

144 # rescaling 

145 betweenness = _rescale( 

146 betweenness, 

147 len(G), 

148 normalized=normalized, 

149 directed=G.is_directed(), 

150 k=k, 

151 endpoints=endpoints, 

152 ) 

153 return betweenness 

154 

155 

156@py_random_state(4) 

157@nx._dispatch(edge_attrs="weight") 

158def edge_betweenness_centrality(G, k=None, normalized=True, weight=None, seed=None): 

159 r"""Compute betweenness centrality for edges. 

160 

161 Betweenness centrality of an edge $e$ is the sum of the 

162 fraction of all-pairs shortest paths that pass through $e$ 

163 

164 .. math:: 

165 

166 c_B(e) =\sum_{s,t \in V} \frac{\sigma(s, t|e)}{\sigma(s, t)} 

167 

168 where $V$ is the set of nodes, $\sigma(s, t)$ is the number of 

169 shortest $(s, t)$-paths, and $\sigma(s, t|e)$ is the number of 

170 those paths passing through edge $e$ [2]_. 

171 

172 Parameters 

173 ---------- 

174 G : graph 

175 A NetworkX graph. 

176 

177 k : int, optional (default=None) 

178 If k is not None use k node samples to estimate betweenness. 

179 The value of k <= n where n is the number of nodes in the graph. 

180 Higher values give better approximation. 

181 

182 normalized : bool, optional 

183 If True the betweenness values are normalized by $2/(n(n-1))$ 

184 for graphs, and $1/(n(n-1))$ for directed graphs where $n$ 

185 is the number of nodes in G. 

186 

187 weight : None or string, optional (default=None) 

188 If None, all edge weights are considered equal. 

189 Otherwise holds the name of the edge attribute used as weight. 

190 Weights are used to calculate weighted shortest paths, so they are 

191 interpreted as distances. 

192 

193 seed : integer, random_state, or None (default) 

194 Indicator of random number generation state. 

195 See :ref:`Randomness<randomness>`. 

196 Note that this is only used if k is not None. 

197 

198 Returns 

199 ------- 

200 edges : dictionary 

201 Dictionary of edges with betweenness centrality as the value. 

202 

203 See Also 

204 -------- 

205 betweenness_centrality 

206 edge_load 

207 

208 Notes 

209 ----- 

210 The algorithm is from Ulrik Brandes [1]_. 

211 

212 For weighted graphs the edge weights must be greater than zero. 

213 Zero edge weights can produce an infinite number of equal length 

214 paths between pairs of nodes. 

215 

216 References 

217 ---------- 

218 .. [1] A Faster Algorithm for Betweenness Centrality. Ulrik Brandes, 

219 Journal of Mathematical Sociology 25(2):163-177, 2001. 

220 https://doi.org/10.1080/0022250X.2001.9990249 

221 .. [2] Ulrik Brandes: On Variants of Shortest-Path Betweenness 

222 Centrality and their Generic Computation. 

223 Social Networks 30(2):136-145, 2008. 

224 https://doi.org/10.1016/j.socnet.2007.11.001 

225 """ 

226 betweenness = dict.fromkeys(G, 0.0) # b[v]=0 for v in G 

227 # b[e]=0 for e in G.edges() 

228 betweenness.update(dict.fromkeys(G.edges(), 0.0)) 

229 if k is None: 

230 nodes = G 

231 else: 

232 nodes = seed.sample(list(G.nodes()), k) 

233 for s in nodes: 

234 # single source shortest paths 

235 if weight is None: # use BFS 

236 S, P, sigma, _ = _single_source_shortest_path_basic(G, s) 

237 else: # use Dijkstra's algorithm 

238 S, P, sigma, _ = _single_source_dijkstra_path_basic(G, s, weight) 

239 # accumulation 

240 betweenness = _accumulate_edges(betweenness, S, P, sigma, s) 

241 # rescaling 

242 for n in G: # remove nodes to only return edges 

243 del betweenness[n] 

244 betweenness = _rescale_e( 

245 betweenness, len(G), normalized=normalized, directed=G.is_directed() 

246 ) 

247 if G.is_multigraph(): 

248 betweenness = _add_edge_keys(G, betweenness, weight=weight) 

249 return betweenness 

250 

251 

252# helpers for betweenness centrality 

253 

254 

255def _single_source_shortest_path_basic(G, s): 

256 S = [] 

257 P = {} 

258 for v in G: 

259 P[v] = [] 

260 sigma = dict.fromkeys(G, 0.0) # sigma[v]=0 for v in G 

261 D = {} 

262 sigma[s] = 1.0 

263 D[s] = 0 

264 Q = deque([s]) 

265 while Q: # use BFS to find shortest paths 

266 v = Q.popleft() 

267 S.append(v) 

268 Dv = D[v] 

269 sigmav = sigma[v] 

270 for w in G[v]: 

271 if w not in D: 

272 Q.append(w) 

273 D[w] = Dv + 1 

274 if D[w] == Dv + 1: # this is a shortest path, count paths 

275 sigma[w] += sigmav 

276 P[w].append(v) # predecessors 

277 return S, P, sigma, D 

278 

279 

280def _single_source_dijkstra_path_basic(G, s, weight): 

281 weight = _weight_function(G, weight) 

282 # modified from Eppstein 

283 S = [] 

284 P = {} 

285 for v in G: 

286 P[v] = [] 

287 sigma = dict.fromkeys(G, 0.0) # sigma[v]=0 for v in G 

288 D = {} 

289 sigma[s] = 1.0 

290 push = heappush 

291 pop = heappop 

292 seen = {s: 0} 

293 c = count() 

294 Q = [] # use Q as heap with (distance,node id) tuples 

295 push(Q, (0, next(c), s, s)) 

296 while Q: 

297 (dist, _, pred, v) = pop(Q) 

298 if v in D: 

299 continue # already searched this node. 

300 sigma[v] += sigma[pred] # count paths 

301 S.append(v) 

302 D[v] = dist 

303 for w, edgedata in G[v].items(): 

304 vw_dist = dist + weight(v, w, edgedata) 

305 if w not in D and (w not in seen or vw_dist < seen[w]): 

306 seen[w] = vw_dist 

307 push(Q, (vw_dist, next(c), v, w)) 

308 sigma[w] = 0.0 

309 P[w] = [v] 

310 elif vw_dist == seen[w]: # handle equal paths 

311 sigma[w] += sigma[v] 

312 P[w].append(v) 

313 return S, P, sigma, D 

314 

315 

316def _accumulate_basic(betweenness, S, P, sigma, s): 

317 delta = dict.fromkeys(S, 0) 

318 while S: 

319 w = S.pop() 

320 coeff = (1 + delta[w]) / sigma[w] 

321 for v in P[w]: 

322 delta[v] += sigma[v] * coeff 

323 if w != s: 

324 betweenness[w] += delta[w] 

325 return betweenness, delta 

326 

327 

328def _accumulate_endpoints(betweenness, S, P, sigma, s): 

329 betweenness[s] += len(S) - 1 

330 delta = dict.fromkeys(S, 0) 

331 while S: 

332 w = S.pop() 

333 coeff = (1 + delta[w]) / sigma[w] 

334 for v in P[w]: 

335 delta[v] += sigma[v] * coeff 

336 if w != s: 

337 betweenness[w] += delta[w] + 1 

338 return betweenness, delta 

339 

340 

341def _accumulate_edges(betweenness, S, P, sigma, s): 

342 delta = dict.fromkeys(S, 0) 

343 while S: 

344 w = S.pop() 

345 coeff = (1 + delta[w]) / sigma[w] 

346 for v in P[w]: 

347 c = sigma[v] * coeff 

348 if (v, w) not in betweenness: 

349 betweenness[(w, v)] += c 

350 else: 

351 betweenness[(v, w)] += c 

352 delta[v] += c 

353 if w != s: 

354 betweenness[w] += delta[w] 

355 return betweenness 

356 

357 

358def _rescale(betweenness, n, normalized, directed=False, k=None, endpoints=False): 

359 if normalized: 

360 if endpoints: 

361 if n < 2: 

362 scale = None # no normalization 

363 else: 

364 # Scale factor should include endpoint nodes 

365 scale = 1 / (n * (n - 1)) 

366 elif n <= 2: 

367 scale = None # no normalization b=0 for all nodes 

368 else: 

369 scale = 1 / ((n - 1) * (n - 2)) 

370 else: # rescale by 2 for undirected graphs 

371 if not directed: 

372 scale = 0.5 

373 else: 

374 scale = None 

375 if scale is not None: 

376 if k is not None: 

377 scale = scale * n / k 

378 for v in betweenness: 

379 betweenness[v] *= scale 

380 return betweenness 

381 

382 

383def _rescale_e(betweenness, n, normalized, directed=False, k=None): 

384 if normalized: 

385 if n <= 1: 

386 scale = None # no normalization b=0 for all nodes 

387 else: 

388 scale = 1 / (n * (n - 1)) 

389 else: # rescale by 2 for undirected graphs 

390 if not directed: 

391 scale = 0.5 

392 else: 

393 scale = None 

394 if scale is not None: 

395 if k is not None: 

396 scale = scale * n / k 

397 for v in betweenness: 

398 betweenness[v] *= scale 

399 return betweenness 

400 

401 

402@not_implemented_for("graph") 

403def _add_edge_keys(G, betweenness, weight=None): 

404 r"""Adds the corrected betweenness centrality (BC) values for multigraphs. 

405 

406 Parameters 

407 ---------- 

408 G : NetworkX graph. 

409 

410 betweenness : dictionary 

411 Dictionary mapping adjacent node tuples to betweenness centrality values. 

412 

413 weight : string or function 

414 See `_weight_function` for details. Defaults to `None`. 

415 

416 Returns 

417 ------- 

418 edges : dictionary 

419 The parameter `betweenness` including edges with keys and their 

420 betweenness centrality values. 

421 

422 The BC value is divided among edges of equal weight. 

423 """ 

424 _weight = _weight_function(G, weight) 

425 

426 edge_bc = dict.fromkeys(G.edges, 0.0) 

427 for u, v in betweenness: 

428 d = G[u][v] 

429 wt = _weight(u, v, d) 

430 keys = [k for k in d if _weight(u, v, {k: d[k]}) == wt] 

431 bc = betweenness[(u, v)] / len(keys) 

432 for k in keys: 

433 edge_bc[(u, v, k)] = bc 

434 

435 return edge_bc