Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/networkx/algorithms/community/modularity_max.py: 9%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

160 statements  

1"""Functions for detecting communities based on modularity.""" 

2 

3import random 

4from collections import defaultdict 

5from copy import deepcopy 

6 

7import networkx as nx 

8from networkx.algorithms.community.quality import modularity 

9from networkx.utils.mapped_queue import MappedQueue 

10 

11__all__ = [ 

12 "greedy_modularity_communities", 

13 "naive_greedy_modularity_communities", 

14] 

15 

16 

17def _greedy_modularity_communities_generator(G, weight=None, resolution=1): 

18 r"""Yield community partitions of G and the modularity change at each step. 

19 

20 This function performs Clauset-Newman-Moore greedy modularity maximization [2]_ 

21 At each step of the process it yields the change in modularity that will occur in 

22 the next step followed by yielding the new community partition after that step. 

23 

24 Greedy modularity maximization begins with each node in its own community 

25 and repeatedly joins the pair of communities that lead to the largest 

26 modularity until one community contains all nodes (the partition has one set). 

27 

28 This function maximizes the generalized modularity, where `resolution` 

29 is the resolution parameter, often expressed as $\gamma$. 

30 See :func:`~networkx.algorithms.community.quality.modularity`. 

31 

32 Parameters 

33 ---------- 

34 G : NetworkX graph 

35 

36 weight : string or None, optional (default=None) 

37 The name of an edge attribute that holds the numerical value used 

38 as a weight. If None, then each edge has weight 1. 

39 The degree is the sum of the edge weights adjacent to the node. 

40 

41 resolution : float (default=1) 

42 If resolution is less than 1, modularity favors larger communities. 

43 Greater than 1 favors smaller communities. 

44 

45 Yields 

46 ------ 

47 Alternating yield statements produce the following two objects: 

48 

49 communities: dict_values 

50 A dict_values of frozensets of nodes, one for each community. 

51 This represents a partition of the nodes of the graph into communities. 

52 The first yield is the partition with each node in its own community. 

53 

54 dq: float 

55 The change in modularity when merging the next two communities 

56 that leads to the largest modularity. 

57 

58 See Also 

59 -------- 

60 modularity 

61 

62 References 

63 ---------- 

64 .. [1] Newman, M. E. J. "Networks: An Introduction", page 224 

65 Oxford University Press 2011. 

66 .. [2] Clauset, A., Newman, M. E., & Moore, C. 

67 "Finding community structure in very large networks." 

68 Physical Review E 70(6), 2004. 

69 .. [3] Reichardt and Bornholdt "Statistical Mechanics of Community 

70 Detection" Phys. Rev. E74, 2006. 

71 .. [4] Newman, M. E. J."Analysis of weighted networks" 

72 Physical Review E 70(5 Pt 2):056131, 2004. 

73 """ 

74 directed = G.is_directed() 

75 N = G.number_of_nodes() 

76 

77 # Count edges (or the sum of edge-weights for weighted graphs) 

78 m = G.size(weight) 

79 q0 = 1 / m 

80 

81 # Calculate degrees (notation from the papers) 

82 # a : the fraction of (weighted) out-degree for each node 

83 # b : the fraction of (weighted) in-degree for each node 

84 if directed: 

85 a = {node: deg_out * q0 for node, deg_out in G.out_degree(weight=weight)} 

86 b = {node: deg_in * q0 for node, deg_in in G.in_degree(weight=weight)} 

87 else: 

88 a = b = {node: deg * q0 * 0.5 for node, deg in G.degree(weight=weight)} 

89 

90 # this preliminary step collects the edge weights for each node pair 

91 # It handles multigraph and digraph and works fine for graph. 

92 dq_dict = defaultdict(lambda: defaultdict(float)) 

93 for u, v, wt in G.edges(data=weight, default=1): 

94 if u == v: 

95 continue 

96 dq_dict[u][v] += wt 

97 dq_dict[v][u] += wt 

98 

99 # now scale and subtract the expected edge-weights term 

100 for u, nbrdict in dq_dict.items(): 

101 for v, wt in nbrdict.items(): 

102 dq_dict[u][v] = q0 * wt - resolution * (a[u] * b[v] + b[u] * a[v]) 

103 

104 # Use -dq to get a max_heap instead of a min_heap 

105 # dq_heap holds a heap for each node's neighbors 

106 dq_heap = {u: MappedQueue({(u, v): -dq for v, dq in dq_dict[u].items()}) for u in G} 

107 # H -> all_dq_heap holds a heap with the best items for each node 

108 H = MappedQueue([dq_heap[n].heap[0] for n in G if len(dq_heap[n]) > 0]) 

109 

110 # Initialize single-node communities 

111 communities = {n: frozenset([n]) for n in G} 

112 yield communities.values() 

113 

114 # Merge the two communities that lead to the largest modularity 

115 while len(H) > 1: 

116 # Find best merge 

117 # Remove from heap of row maxes 

118 # Ties will be broken by choosing the pair with lowest min community id 

119 try: 

120 negdq, u, v = H.pop() 

121 except IndexError: 

122 break 

123 dq = -negdq 

124 yield dq 

125 # Remove best merge from row u heap 

126 dq_heap[u].pop() 

127 # Push new row max onto H 

128 if len(dq_heap[u]) > 0: 

129 H.push(dq_heap[u].heap[0]) 

130 # If this element was also at the root of row v, we need to remove the 

131 # duplicate entry from H 

132 if dq_heap[v].heap[0] == (v, u): 

133 H.remove((v, u)) 

134 # Remove best merge from row v heap 

135 dq_heap[v].remove((v, u)) 

136 # Push new row max onto H 

137 if len(dq_heap[v]) > 0: 

138 H.push(dq_heap[v].heap[0]) 

139 else: 

140 # Duplicate wasn't in H, just remove from row v heap 

141 dq_heap[v].remove((v, u)) 

142 

143 # Perform merge 

144 communities[v] = frozenset(communities[u] | communities[v]) 

145 del communities[u] 

146 

147 # Get neighbor communities connected to the merged communities 

148 u_nbrs = set(dq_dict[u]) 

149 v_nbrs = set(dq_dict[v]) 

150 all_nbrs = (u_nbrs | v_nbrs) - {u, v} 

151 both_nbrs = u_nbrs & v_nbrs 

152 # Update dq for merge of u into v 

153 for w in all_nbrs: 

154 # Calculate new dq value 

155 if w in both_nbrs: 

156 dq_vw = dq_dict[v][w] + dq_dict[u][w] 

157 elif w in v_nbrs: 

158 dq_vw = dq_dict[v][w] - resolution * (a[u] * b[w] + a[w] * b[u]) 

159 else: # w in u_nbrs 

160 dq_vw = dq_dict[u][w] - resolution * (a[v] * b[w] + a[w] * b[v]) 

161 # Update rows v and w 

162 for row, col in [(v, w), (w, v)]: 

163 dq_heap_row = dq_heap[row] 

164 # Update dict for v,w only (u is removed below) 

165 dq_dict[row][col] = dq_vw 

166 # Save old max of per-row heap 

167 if len(dq_heap_row) > 0: 

168 d_oldmax = dq_heap_row.heap[0] 

169 else: 

170 d_oldmax = None 

171 # Add/update heaps 

172 d = (row, col) 

173 d_negdq = -dq_vw 

174 # Save old value for finding heap index 

175 if w in v_nbrs: 

176 # Update existing element in per-row heap 

177 dq_heap_row.update(d, d, priority=d_negdq) 

178 else: 

179 # We're creating a new nonzero element, add to heap 

180 dq_heap_row.push(d, priority=d_negdq) 

181 # Update heap of row maxes if necessary 

182 if d_oldmax is None: 

183 # No entries previously in this row, push new max 

184 H.push(d, priority=d_negdq) 

185 else: 

186 # We've updated an entry in this row, has the max changed? 

187 row_max = dq_heap_row.heap[0] 

188 if d_oldmax != row_max or d_oldmax.priority != row_max.priority: 

189 H.update(d_oldmax, row_max) 

190 

191 # Remove row/col u from dq_dict matrix 

192 for w in dq_dict[u]: 

193 # Remove from dict 

194 dq_old = dq_dict[w][u] 

195 del dq_dict[w][u] 

196 # Remove from heaps if we haven't already 

197 if w != v: 

198 # Remove both row and column 

199 for row, col in [(w, u), (u, w)]: 

200 dq_heap_row = dq_heap[row] 

201 # Check if replaced dq is row max 

202 d_old = (row, col) 

203 if dq_heap_row.heap[0] == d_old: 

204 # Update per-row heap and heap of row maxes 

205 dq_heap_row.remove(d_old) 

206 H.remove(d_old) 

207 # Update row max 

208 if len(dq_heap_row) > 0: 

209 H.push(dq_heap_row.heap[0]) 

210 else: 

211 # Only update per-row heap 

212 dq_heap_row.remove(d_old) 

213 

214 del dq_dict[u] 

215 # Mark row u as deleted, but keep placeholder 

216 dq_heap[u] = MappedQueue() 

217 # Merge u into v and update a 

218 a[v] += a[u] 

219 a[u] = 0 

220 if directed: 

221 b[v] += b[u] 

222 b[u] = 0 

223 

224 yield communities.values() 

225 

226 

227@nx._dispatchable(edge_attrs="weight") 

228def greedy_modularity_communities( 

229 G, 

230 weight=None, 

231 resolution=1, 

232 cutoff=1, 

233 best_n=None, 

234): 

235 r"""Find communities in G using greedy modularity maximization. 

236 

237 This function uses Clauset-Newman-Moore greedy modularity maximization [2]_ 

238 to find the community partition with the largest modularity. 

239 

240 Greedy modularity maximization begins with each node in its own community 

241 and repeatedly joins the pair of communities that lead to the largest 

242 modularity until no further increase in modularity is possible (a maximum). 

243 Two keyword arguments adjust the stopping condition. `cutoff` is a lower 

244 limit on the number of communities so you can stop the process before 

245 reaching a maximum (used to save computation time). `best_n` is an upper 

246 limit on the number of communities so you can make the process continue 

247 until at most n communities remain even if the maximum modularity occurs 

248 for more. To obtain exactly n communities, set both `cutoff` and `best_n` to n. 

249 

250 This function maximizes the generalized modularity, where `resolution` 

251 is the resolution parameter, often expressed as $\gamma$. 

252 See :func:`~networkx.algorithms.community.quality.modularity`. 

253 

254 Parameters 

255 ---------- 

256 G : NetworkX graph 

257 

258 weight : string or None, optional (default=None) 

259 The name of an edge attribute that holds the numerical value used 

260 as a weight. If None, then each edge has weight 1. 

261 The degree is the sum of the edge weights adjacent to the node. 

262 

263 resolution : float, optional (default=1) 

264 If resolution is less than 1, modularity favors larger communities. 

265 Greater than 1 favors smaller communities. 

266 

267 cutoff : int, optional (default=1) 

268 A minimum number of communities below which the merging process stops. 

269 The process stops at this number of communities even if modularity 

270 is not maximized. The goal is to let the user stop the process early. 

271 The process stops before the cutoff if it finds a maximum of modularity. 

272 

273 best_n : int or None, optional (default=None) 

274 A maximum number of communities above which the merging process will 

275 not stop. This forces community merging to continue after modularity 

276 starts to decrease until `best_n` communities remain. 

277 If ``None``, don't force it to continue beyond a maximum. 

278 

279 Raises 

280 ------ 

281 ValueError : If the `cutoff` or `best_n` value is not in the range 

282 ``[1, G.number_of_nodes()]``, or if `best_n` < `cutoff`. 

283 

284 Returns 

285 ------- 

286 communities: list 

287 A list of frozensets of nodes, one for each community. 

288 Sorted by length with largest communities first. 

289 

290 Examples 

291 -------- 

292 >>> G = nx.karate_club_graph() 

293 >>> c = nx.community.greedy_modularity_communities(G) 

294 >>> sorted(c[0]) 

295 [8, 14, 15, 18, 20, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33] 

296 

297 See Also 

298 -------- 

299 modularity 

300 

301 References 

302 ---------- 

303 .. [1] Newman, M. E. J. "Networks: An Introduction", page 224 

304 Oxford University Press 2011. 

305 .. [2] Clauset, A., Newman, M. E., & Moore, C. 

306 "Finding community structure in very large networks." 

307 Physical Review E 70(6), 2004. 

308 .. [3] Reichardt and Bornholdt "Statistical Mechanics of Community 

309 Detection" Phys. Rev. E74, 2006. 

310 .. [4] Newman, M. E. J."Analysis of weighted networks" 

311 Physical Review E 70(5 Pt 2):056131, 2004. 

312 """ 

313 if not G.size(): 

314 return [{n} for n in G] 

315 

316 if (cutoff < 1) or (cutoff > G.number_of_nodes()): 

317 raise ValueError(f"cutoff must be between 1 and {len(G)}. Got {cutoff}.") 

318 if best_n is not None: 

319 if (best_n < 1) or (best_n > G.number_of_nodes()): 

320 raise ValueError(f"best_n must be between 1 and {len(G)}. Got {best_n}.") 

321 if best_n < cutoff: 

322 raise ValueError(f"Must have best_n >= cutoff. Got {best_n} < {cutoff}") 

323 if best_n == 1: 

324 return [set(G)] 

325 else: 

326 best_n = G.number_of_nodes() 

327 

328 # retrieve generator object to construct output 

329 community_gen = _greedy_modularity_communities_generator( 

330 G, weight=weight, resolution=resolution 

331 ) 

332 

333 # construct the first best community 

334 communities = next(community_gen) 

335 

336 # continue merging communities until one of the breaking criteria is satisfied 

337 while len(communities) > cutoff: 

338 try: 

339 dq = next(community_gen) 

340 # StopIteration occurs when communities are the connected components 

341 except StopIteration: 

342 communities = sorted(communities, key=len, reverse=True) 

343 # if best_n requires more merging, merge big sets for highest modularity 

344 while len(communities) > best_n: 

345 comm1, comm2, *rest = communities 

346 communities = [comm1 ^ comm2] 

347 communities.extend(rest) 

348 return communities 

349 

350 # keep going unless max_mod is reached or best_n says to merge more 

351 if dq < 0 and len(communities) <= best_n: 

352 break 

353 communities = next(community_gen) 

354 

355 return sorted(communities, key=len, reverse=True) 

356 

357 

358@nx.utils.not_implemented_for("directed") 

359@nx.utils.not_implemented_for("multigraph") 

360@nx._dispatchable(edge_attrs="weight") 

361def naive_greedy_modularity_communities(G, resolution=1, weight=None): 

362 r"""Find communities in G using greedy modularity maximization. 

363 

364 This implementation is O(n^4), much slower than alternatives, but it is 

365 provided as an easy-to-understand reference implementation. 

366 

367 Greedy modularity maximization begins with each node in its own community 

368 and joins the pair of communities that most increases modularity until no 

369 such pair exists. 

370 

371 This function maximizes the generalized modularity, where `resolution` 

372 is the resolution parameter, often expressed as $\gamma$. 

373 See :func:`~networkx.algorithms.community.quality.modularity`. 

374 

375 Parameters 

376 ---------- 

377 G : NetworkX graph 

378 Graph must be simple and undirected. 

379 

380 resolution : float (default=1) 

381 If resolution is less than 1, modularity favors larger communities. 

382 Greater than 1 favors smaller communities. 

383 

384 weight : string or None, optional (default=None) 

385 The name of an edge attribute that holds the numerical value used 

386 as a weight. If None, then each edge has weight 1. 

387 The degree is the sum of the edge weights adjacent to the node. 

388 

389 Returns 

390 ------- 

391 list 

392 A list of sets of nodes, one for each community. 

393 Sorted by length with largest communities first. 

394 

395 Examples 

396 -------- 

397 >>> G = nx.karate_club_graph() 

398 >>> c = nx.community.naive_greedy_modularity_communities(G) 

399 >>> sorted(c[0]) 

400 [8, 14, 15, 18, 20, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33] 

401 

402 See Also 

403 -------- 

404 greedy_modularity_communities 

405 modularity 

406 """ 

407 # First create one community for each node 

408 communities = [frozenset([u]) for u in G.nodes()] 

409 # Track merges 

410 merges = [] 

411 # Greedily merge communities until no improvement is possible 

412 old_modularity = None 

413 new_modularity = modularity(G, communities, resolution=resolution, weight=weight) 

414 while old_modularity is None or new_modularity > old_modularity: 

415 # Save modularity for comparison 

416 old_modularity = new_modularity 

417 # Find best pair to merge 

418 trial_communities = list(communities) 

419 to_merge = None 

420 for i, u in enumerate(communities): 

421 for j, v in enumerate(communities): 

422 # Skip i==j and empty communities 

423 if j <= i or len(u) == 0 or len(v) == 0: 

424 continue 

425 # Merge communities u and v 

426 trial_communities[j] = u | v 

427 trial_communities[i] = frozenset([]) 

428 trial_modularity = modularity( 

429 G, trial_communities, resolution=resolution, weight=weight 

430 ) 

431 if trial_modularity >= new_modularity: 

432 # Check if strictly better or tie 

433 if trial_modularity > new_modularity: 

434 # Found new best, save modularity and group indexes 

435 new_modularity = trial_modularity 

436 to_merge = (i, j, new_modularity - old_modularity) 

437 elif to_merge and min(i, j) < min(to_merge[0], to_merge[1]): 

438 # Break ties by choosing pair with lowest min id 

439 new_modularity = trial_modularity 

440 to_merge = (i, j, new_modularity - old_modularity) 

441 # Un-merge 

442 trial_communities[i] = u 

443 trial_communities[j] = v 

444 if to_merge is not None: 

445 # If the best merge improves modularity, use it 

446 merges.append(to_merge) 

447 i, j, dq = to_merge 

448 u, v = communities[i], communities[j] 

449 communities[j] = u | v 

450 communities[i] = frozenset([]) 

451 # Remove empty communities and sort 

452 return sorted((c for c in communities if len(c) > 0), key=len, reverse=True)