Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/networkx/algorithms/community/modularity_max.py: 9%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

159 statements  

1"""Functions for detecting communities based on modularity.""" 

2 

3from collections import defaultdict 

4 

5import networkx as nx 

6from networkx.algorithms.community.quality import modularity 

7from networkx.utils import not_implemented_for 

8from networkx.utils.mapped_queue import MappedQueue 

9 

10__all__ = [ 

11 "greedy_modularity_communities", 

12 "naive_greedy_modularity_communities", 

13] 

14 

15 

16def _greedy_modularity_communities_generator(G, weight=None, resolution=1): 

17 r"""Yield community partitions of G and the modularity change at each step. 

18 

19 This function performs Clauset-Newman-Moore greedy modularity maximization [2]_ 

20 At each step of the process it yields the change in modularity that will occur in 

21 the next step followed by yielding the new community partition after that step. 

22 

23 Greedy modularity maximization begins with each node in its own community 

24 and repeatedly joins the pair of communities that lead to the largest 

25 modularity until one community contains all nodes (the partition has one set). 

26 

27 This function maximizes the generalized modularity, where `resolution` 

28 is the resolution parameter, often expressed as $\gamma$. 

29 See :func:`~networkx.algorithms.community.quality.modularity`. 

30 

31 Parameters 

32 ---------- 

33 G : NetworkX graph 

34 

35 weight : string or None, optional (default=None) 

36 The name of an edge attribute that holds the numerical value used 

37 as a weight. If None, then each edge has weight 1. 

38 The degree is the sum of the edge weights adjacent to the node. 

39 

40 resolution : float (default=1) 

41 If resolution is less than 1, modularity favors larger communities. 

42 Greater than 1 favors smaller communities. 

43 

44 Yields 

45 ------ 

46 Alternating yield statements produce the following two objects: 

47 

48 communities: dict_values 

49 A dict_values of frozensets of nodes, one for each community. 

50 This represents a partition of the nodes of the graph into communities. 

51 The first yield is the partition with each node in its own community. 

52 

53 dq: float 

54 The change in modularity when merging the next two communities 

55 that leads to the largest modularity. 

56 

57 See Also 

58 -------- 

59 modularity 

60 

61 References 

62 ---------- 

63 .. [1] Newman, M. E. J. "Networks: An Introduction", page 224 

64 Oxford University Press 2011. 

65 .. [2] Clauset, A., Newman, M. E., & Moore, C. 

66 "Finding community structure in very large networks." 

67 Physical Review E 70(6), 2004. 

68 .. [3] Reichardt and Bornholdt "Statistical Mechanics of Community 

69 Detection" Phys. Rev. E74, 2006. 

70 .. [4] Newman, M. E. J."Analysis of weighted networks" 

71 Physical Review E 70(5 Pt 2):056131, 2004. 

72 """ 

73 directed = G.is_directed() 

74 N = G.number_of_nodes() 

75 

76 # Count edges (or the sum of edge-weights for weighted graphs) 

77 m = G.size(weight) 

78 q0 = 1 / m 

79 

80 # Calculate degrees (notation from the papers) 

81 # a : the fraction of (weighted) out-degree for each node 

82 # b : the fraction of (weighted) in-degree for each node 

83 if directed: 

84 a = {node: deg_out * q0 for node, deg_out in G.out_degree(weight=weight)} 

85 b = {node: deg_in * q0 for node, deg_in in G.in_degree(weight=weight)} 

86 else: 

87 a = b = {node: deg * q0 * 0.5 for node, deg in G.degree(weight=weight)} 

88 

89 # this preliminary step collects the edge weights for each node pair 

90 # It handles multigraph and digraph and works fine for graph. 

91 dq_dict = defaultdict(lambda: defaultdict(float)) 

92 for u, v, wt in G.edges(data=weight, default=1): 

93 if u == v: 

94 continue 

95 dq_dict[u][v] += wt 

96 dq_dict[v][u] += wt 

97 

98 # now scale and subtract the expected edge-weights term 

99 for u, nbrdict in dq_dict.items(): 

100 for v, wt in nbrdict.items(): 

101 dq_dict[u][v] = q0 * wt - resolution * (a[u] * b[v] + b[u] * a[v]) 

102 

103 # Use -dq to get a max_heap instead of a min_heap 

104 # dq_heap holds a heap for each node's neighbors 

105 dq_heap = {u: MappedQueue({(u, v): -dq for v, dq in dq_dict[u].items()}) for u in G} 

106 # H -> all_dq_heap holds a heap with the best items for each node 

107 H = MappedQueue([dq_heap[n].heap[0] for n in G if len(dq_heap[n]) > 0]) 

108 

109 # Initialize single-node communities 

110 communities = {n: frozenset([n]) for n in G} 

111 yield communities.values() 

112 

113 # Merge the two communities that lead to the largest modularity 

114 while len(H) > 1: 

115 # Find best merge 

116 # Remove from heap of row maxes 

117 # Ties will be broken by choosing the pair with lowest min community id 

118 try: 

119 negdq, u, v = H.pop() 

120 except IndexError: 

121 break 

122 dq = -negdq 

123 yield dq 

124 # Remove best merge from row u heap 

125 dq_heap[u].pop() 

126 # Push new row max onto H 

127 if len(dq_heap[u]) > 0: 

128 H.push(dq_heap[u].heap[0]) 

129 # If this element was also at the root of row v, we need to remove the 

130 # duplicate entry from H 

131 if dq_heap[v].heap[0] == (v, u): 

132 H.remove((v, u)) 

133 # Remove best merge from row v heap 

134 dq_heap[v].remove((v, u)) 

135 # Push new row max onto H 

136 if len(dq_heap[v]) > 0: 

137 H.push(dq_heap[v].heap[0]) 

138 else: 

139 # Duplicate wasn't in H, just remove from row v heap 

140 dq_heap[v].remove((v, u)) 

141 

142 # Perform merge 

143 communities[v] = frozenset(communities[u] | communities[v]) 

144 del communities[u] 

145 

146 # Get neighbor communities connected to the merged communities 

147 u_nbrs = set(dq_dict[u]) 

148 v_nbrs = set(dq_dict[v]) 

149 all_nbrs = (u_nbrs | v_nbrs) - {u, v} 

150 both_nbrs = u_nbrs & v_nbrs 

151 # Update dq for merge of u into v 

152 for w in all_nbrs: 

153 # Calculate new dq value 

154 if w in both_nbrs: 

155 dq_vw = dq_dict[v][w] + dq_dict[u][w] 

156 elif w in v_nbrs: 

157 dq_vw = dq_dict[v][w] - resolution * (a[u] * b[w] + a[w] * b[u]) 

158 else: # w in u_nbrs 

159 dq_vw = dq_dict[u][w] - resolution * (a[v] * b[w] + a[w] * b[v]) 

160 # Update rows v and w 

161 for row, col in [(v, w), (w, v)]: 

162 dq_heap_row = dq_heap[row] 

163 # Update dict for v,w only (u is removed below) 

164 dq_dict[row][col] = dq_vw 

165 # Save old max of per-row heap 

166 if len(dq_heap_row) > 0: 

167 d_oldmax = dq_heap_row.heap[0] 

168 else: 

169 d_oldmax = None 

170 # Add/update heaps 

171 d = (row, col) 

172 d_negdq = -dq_vw 

173 # Save old value for finding heap index 

174 if w in v_nbrs: 

175 # Update existing element in per-row heap 

176 dq_heap_row.update(d, d, priority=d_negdq) 

177 else: 

178 # We're creating a new nonzero element, add to heap 

179 dq_heap_row.push(d, priority=d_negdq) 

180 # Update heap of row maxes if necessary 

181 if d_oldmax is None: 

182 # No entries previously in this row, push new max 

183 H.push(d, priority=d_negdq) 

184 else: 

185 # We've updated an entry in this row, has the max changed? 

186 row_max = dq_heap_row.heap[0] 

187 if d_oldmax != row_max or d_oldmax.priority != row_max.priority: 

188 H.update(d_oldmax, row_max) 

189 

190 # Remove row/col u from dq_dict matrix 

191 for w in dq_dict[u]: 

192 # Remove from dict 

193 dq_old = dq_dict[w][u] 

194 del dq_dict[w][u] 

195 # Remove from heaps if we haven't already 

196 if w != v: 

197 # Remove both row and column 

198 for row, col in [(w, u), (u, w)]: 

199 dq_heap_row = dq_heap[row] 

200 # Check if replaced dq is row max 

201 d_old = (row, col) 

202 if dq_heap_row.heap[0] == d_old: 

203 # Update per-row heap and heap of row maxes 

204 dq_heap_row.remove(d_old) 

205 H.remove(d_old) 

206 # Update row max 

207 if len(dq_heap_row) > 0: 

208 H.push(dq_heap_row.heap[0]) 

209 else: 

210 # Only update per-row heap 

211 dq_heap_row.remove(d_old) 

212 

213 del dq_dict[u] 

214 # Mark row u as deleted, but keep placeholder 

215 dq_heap[u] = MappedQueue() 

216 # Merge u into v and update a 

217 a[v] += a[u] 

218 a[u] = 0 

219 if directed: 

220 b[v] += b[u] 

221 b[u] = 0 

222 

223 yield communities.values() 

224 

225 

226@nx._dispatchable(edge_attrs="weight") 

227def greedy_modularity_communities( 

228 G, 

229 weight=None, 

230 resolution=1, 

231 cutoff=1, 

232 best_n=None, 

233): 

234 r"""Find communities in G using greedy modularity maximization. 

235 

236 This function uses Clauset-Newman-Moore greedy modularity maximization [2]_ 

237 to find the community partition with the largest modularity. 

238 

239 Greedy modularity maximization begins with each node in its own community 

240 and repeatedly joins the pair of communities that lead to the largest 

241 modularity until no further increase in modularity is possible (a maximum). 

242 Two keyword arguments adjust the stopping condition. `cutoff` is a lower 

243 limit on the number of communities so you can stop the process before 

244 reaching a maximum (used to save computation time). `best_n` is an upper 

245 limit on the number of communities so you can make the process continue 

246 until at most n communities remain even if the maximum modularity occurs 

247 for more. To obtain exactly n communities, set both `cutoff` and `best_n` to n. 

248 

249 This function maximizes the generalized modularity, where `resolution` 

250 is the resolution parameter, often expressed as $\gamma$. 

251 See :func:`~networkx.algorithms.community.quality.modularity`. 

252 

253 Parameters 

254 ---------- 

255 G : NetworkX graph 

256 

257 weight : string or None, optional (default=None) 

258 The name of an edge attribute that holds the numerical value used 

259 as a weight. If None, then each edge has weight 1. 

260 The degree is the sum of the edge weights adjacent to the node. 

261 

262 resolution : float, optional (default=1) 

263 If resolution is less than 1, modularity favors larger communities. 

264 Greater than 1 favors smaller communities. 

265 

266 cutoff : int, optional (default=1) 

267 A minimum number of communities below which the merging process stops. 

268 The process stops at this number of communities even if modularity 

269 is not maximized. The goal is to let the user stop the process early. 

270 The process stops before the cutoff if it finds a maximum of modularity. 

271 

272 best_n : int or None, optional (default=None) 

273 A maximum number of communities above which the merging process will 

274 not stop. This forces community merging to continue after modularity 

275 starts to decrease until `best_n` communities remain. 

276 If ``None``, don't force it to continue beyond a maximum. 

277 

278 Raises 

279 ------ 

280 ValueError : If the `cutoff` or `best_n` value is not in the range 

281 ``[1, G.number_of_nodes()]``, or if `best_n` < `cutoff`. 

282 

283 Returns 

284 ------- 

285 communities: list 

286 A list of frozensets of nodes, one for each community. 

287 Sorted by length with largest communities first. 

288 

289 Examples 

290 -------- 

291 >>> G = nx.karate_club_graph() 

292 >>> c = nx.community.greedy_modularity_communities(G) 

293 >>> sorted(c[0]) 

294 [8, 14, 15, 18, 20, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33] 

295 

296 See Also 

297 -------- 

298 modularity 

299 

300 References 

301 ---------- 

302 .. [1] Newman, M. E. J. "Networks: An Introduction", page 224 

303 Oxford University Press 2011. 

304 .. [2] Clauset, A., Newman, M. E., & Moore, C. 

305 "Finding community structure in very large networks." 

306 Physical Review E 70(6), 2004. 

307 .. [3] Reichardt and Bornholdt "Statistical Mechanics of Community 

308 Detection" Phys. Rev. E74, 2006. 

309 .. [4] Newman, M. E. J."Analysis of weighted networks" 

310 Physical Review E 70(5 Pt 2):056131, 2004. 

311 """ 

312 if not G.size(): 

313 return [{n} for n in G] 

314 

315 if (cutoff < 1) or (cutoff > G.number_of_nodes()): 

316 raise ValueError(f"cutoff must be between 1 and {len(G)}. Got {cutoff}.") 

317 if best_n is not None: 

318 if (best_n < 1) or (best_n > G.number_of_nodes()): 

319 raise ValueError(f"best_n must be between 1 and {len(G)}. Got {best_n}.") 

320 if best_n < cutoff: 

321 raise ValueError(f"Must have best_n >= cutoff. Got {best_n} < {cutoff}") 

322 if best_n == 1: 

323 return [set(G)] 

324 else: 

325 best_n = G.number_of_nodes() 

326 

327 # retrieve generator object to construct output 

328 community_gen = _greedy_modularity_communities_generator( 

329 G, weight=weight, resolution=resolution 

330 ) 

331 

332 # construct the first best community 

333 communities = next(community_gen) 

334 

335 # continue merging communities until one of the breaking criteria is satisfied 

336 while len(communities) > cutoff: 

337 try: 

338 dq = next(community_gen) 

339 # StopIteration occurs when communities are the connected components 

340 except StopIteration: 

341 communities = sorted(communities, key=len, reverse=True) 

342 # if best_n requires more merging, merge big sets for highest modularity 

343 while len(communities) > best_n: 

344 comm1, comm2, *rest = communities 

345 communities = [comm1 ^ comm2] 

346 communities.extend(rest) 

347 return communities 

348 

349 # keep going unless max_mod is reached or best_n says to merge more 

350 if dq < 0 and len(communities) <= best_n: 

351 break 

352 communities = next(community_gen) 

353 

354 return sorted(communities, key=len, reverse=True) 

355 

356 

357@not_implemented_for("directed") 

358@not_implemented_for("multigraph") 

359@nx._dispatchable(edge_attrs="weight") 

360def naive_greedy_modularity_communities(G, resolution=1, weight=None): 

361 r"""Find communities in G using greedy modularity maximization. 

362 

363 This implementation is O(n^4), much slower than alternatives, but it is 

364 provided as an easy-to-understand reference implementation. 

365 

366 Greedy modularity maximization begins with each node in its own community 

367 and joins the pair of communities that most increases modularity until no 

368 such pair exists. 

369 

370 This function maximizes the generalized modularity, where `resolution` 

371 is the resolution parameter, often expressed as $\gamma$. 

372 See :func:`~networkx.algorithms.community.quality.modularity`. 

373 

374 Parameters 

375 ---------- 

376 G : NetworkX graph 

377 Graph must be simple and undirected. 

378 

379 resolution : float (default=1) 

380 If resolution is less than 1, modularity favors larger communities. 

381 Greater than 1 favors smaller communities. 

382 

383 weight : string or None, optional (default=None) 

384 The name of an edge attribute that holds the numerical value used 

385 as a weight. If None, then each edge has weight 1. 

386 The degree is the sum of the edge weights adjacent to the node. 

387 

388 Returns 

389 ------- 

390 list 

391 A list of sets of nodes, one for each community. 

392 Sorted by length with largest communities first. 

393 

394 Examples 

395 -------- 

396 >>> G = nx.karate_club_graph() 

397 >>> c = nx.community.naive_greedy_modularity_communities(G) 

398 >>> sorted(c[0]) 

399 [8, 14, 15, 18, 20, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33] 

400 

401 See Also 

402 -------- 

403 greedy_modularity_communities 

404 modularity 

405 """ 

406 # First create one community for each node 

407 communities = [frozenset([u]) for u in G.nodes()] 

408 # Track merges 

409 merges = [] 

410 # Greedily merge communities until no improvement is possible 

411 old_modularity = None 

412 new_modularity = modularity(G, communities, resolution=resolution, weight=weight) 

413 while old_modularity is None or new_modularity > old_modularity: 

414 # Save modularity for comparison 

415 old_modularity = new_modularity 

416 # Find best pair to merge 

417 trial_communities = list(communities) 

418 to_merge = None 

419 for i, u in enumerate(communities): 

420 for j, v in enumerate(communities): 

421 # Skip i==j and empty communities 

422 if j <= i or len(u) == 0 or len(v) == 0: 

423 continue 

424 # Merge communities u and v 

425 trial_communities[j] = u | v 

426 trial_communities[i] = frozenset([]) 

427 trial_modularity = modularity( 

428 G, trial_communities, resolution=resolution, weight=weight 

429 ) 

430 if trial_modularity >= new_modularity: 

431 # Check if strictly better or tie 

432 if trial_modularity > new_modularity: 

433 # Found new best, save modularity and group indexes 

434 new_modularity = trial_modularity 

435 to_merge = (i, j, new_modularity - old_modularity) 

436 elif to_merge and min(i, j) < min(to_merge[0], to_merge[1]): 

437 # Break ties by choosing pair with lowest min id 

438 new_modularity = trial_modularity 

439 to_merge = (i, j, new_modularity - old_modularity) 

440 # Un-merge 

441 trial_communities[i] = u 

442 trial_communities[j] = v 

443 if to_merge is not None: 

444 # If the best merge improves modularity, use it 

445 merges.append(to_merge) 

446 i, j, dq = to_merge 

447 u, v = communities[i], communities[j] 

448 communities[j] = u | v 

449 communities[i] = frozenset([]) 

450 # Remove empty communities and sort 

451 return sorted((c for c in communities if len(c) > 0), key=len, reverse=True)