Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/networkx/algorithms/community/modularity

1"""Functions for detecting communities based on modularity."""

3import random

4from collections import defaultdict

5from copy import deepcopy

7import networkx as nx

8from networkx.algorithms.community.quality import modularity

9from networkx.utils.mapped_queue import MappedQueue

11__all__ = [

12 "greedy_modularity_communities",

13 "naive_greedy_modularity_communities",

14]

17def _greedy_modularity_communities_generator(G, weight=None, resolution=1):

18 r"""Yield community partitions of G and the modularity change at each step.

20 This function performs Clauset-Newman-Moore greedy modularity maximization [2]_

21 At each step of the process it yields the change in modularity that will occur in

22 the next step followed by yielding the new community partition after that step.

24 Greedy modularity maximization begins with each node in its own community

25 and repeatedly joins the pair of communities that lead to the largest

26 modularity until one community contains all nodes (the partition has one set).

28 This function maximizes the generalized modularity, where `resolution`

29 is the resolution parameter, often expressed as $\gamma$.

30 See :func:`~networkx.algorithms.community.quality.modularity`.

32 Parameters

33 ----------

34 G : NetworkX graph

36 weight : string or None, optional (default=None)

37 The name of an edge attribute that holds the numerical value used

38 as a weight. If None, then each edge has weight 1.

39 The degree is the sum of the edge weights adjacent to the node.

41 resolution : float (default=1)

42 If resolution is less than 1, modularity favors larger communities.

43 Greater than 1 favors smaller communities.

45 Yields

46 ------

47 Alternating yield statements produce the following two objects:

49 communities: dict_values

50 A dict_values of frozensets of nodes, one for each community.

51 This represents a partition of the nodes of the graph into communities.

52 The first yield is the partition with each node in its own community.

54 dq: float

55 The change in modularity when merging the next two communities

56 that leads to the largest modularity.

58 See Also

59 --------

60 modularity

62 References

63 ----------

64 .. [1] Newman, M. E. J. "Networks: An Introduction", page 224

65 Oxford University Press 2011.

66 .. [2] Clauset, A., Newman, M. E., & Moore, C.

67 "Finding community structure in very large networks."

68 Physical Review E 70(6), 2004.

69 .. [3] Reichardt and Bornholdt "Statistical Mechanics of Community

70 Detection" Phys. Rev. E74, 2006.

71 .. [4] Newman, M. E. J."Analysis of weighted networks"

72 Physical Review E 70(5 Pt 2):056131, 2004.

73 """

74 directed = G.is_directed()

75 N = G.number_of_nodes()

77 # Count edges (or the sum of edge-weights for weighted graphs)

78 m = G.size(weight)

79 q0 = 1 / m

81 # Calculate degrees (notation from the papers)

82 # a : the fraction of (weighted) out-degree for each node

83 # b : the fraction of (weighted) in-degree for each node

84 if directed:

85 a = {node: deg_out * q0 for node, deg_out in G.out_degree(weight=weight)}

86 b = {node: deg_in * q0 for node, deg_in in G.in_degree(weight=weight)}

87 else:

88 a = b = {node: deg * q0 * 0.5 for node, deg in G.degree(weight=weight)}

90 # this preliminary step collects the edge weights for each node pair

91 # It handles multigraph and digraph and works fine for graph.

92 dq_dict = defaultdict(lambda: defaultdict(float))

93 for u, v, wt in G.edges(data=weight, default=1):

94 if u == v:

95 continue

96 dq_dict[u][v] += wt

97 dq_dict[v][u] += wt

99 # now scale and subtract the expected edge-weights term

100 for u, nbrdict in dq_dict.items():

101 for v, wt in nbrdict.items():

102 dq_dict[u][v] = q0 * wt - resolution * (a[u] * b[v] + b[u] * a[v])

103

104 # Use -dq to get a max_heap instead of a min_heap

105 # dq_heap holds a heap for each node's neighbors

106 dq_heap = {u: MappedQueue({(u, v): -dq for v, dq in dq_dict[u].items()}) for u in G}

107 # H -> all_dq_heap holds a heap with the best items for each node

108 H = MappedQueue([dq_heap[n].heap[0] for n in G if len(dq_heap[n]) > 0])

109

110 # Initialize single-node communities

111 communities = {n: frozenset([n]) for n in G}

112 yield communities.values()

113

114 # Merge the two communities that lead to the largest modularity

115 while len(H) > 1:

116 # Find best merge

117 # Remove from heap of row maxes

118 # Ties will be broken by choosing the pair with lowest min community id

119 try:

120 negdq, u, v = H.pop()

121 except IndexError:

122 break

123 dq = -negdq

124 yield dq

125 # Remove best merge from row u heap

126 dq_heap[u].pop()

127 # Push new row max onto H

128 if len(dq_heap[u]) > 0:

129 H.push(dq_heap[u].heap[0])

130 # If this element was also at the root of row v, we need to remove the

131 # duplicate entry from H

132 if dq_heap[v].heap[0] == (v, u):

133 H.remove((v, u))

134 # Remove best merge from row v heap

135 dq_heap[v].remove((v, u))

136 # Push new row max onto H

137 if len(dq_heap[v]) > 0:

138 H.push(dq_heap[v].heap[0])

139 else:

140 # Duplicate wasn't in H, just remove from row v heap

141 dq_heap[v].remove((v, u))

142

143 # Perform merge

144 communities[v] = frozenset(communities[u] | communities[v])

145 del communities[u]

146

147 # Get neighbor communities connected to the merged communities

148 u_nbrs = set(dq_dict[u])

149 v_nbrs = set(dq_dict[v])

150 all_nbrs = (u_nbrs | v_nbrs) - {u, v}

151 both_nbrs = u_nbrs & v_nbrs

152 # Update dq for merge of u into v

153 for w in all_nbrs:

154 # Calculate new dq value

155 if w in both_nbrs:

156 dq_vw = dq_dict[v][w] + dq_dict[u][w]

157 elif w in v_nbrs:

158 dq_vw = dq_dict[v][w] - resolution * (a[u] * b[w] + a[w] * b[u])

159 else: # w in u_nbrs

160 dq_vw = dq_dict[u][w] - resolution * (a[v] * b[w] + a[w] * b[v])

161 # Update rows v and w

162 for row, col in [(v, w), (w, v)]:

163 dq_heap_row = dq_heap[row]

164 # Update dict for v,w only (u is removed below)

165 dq_dict[row][col] = dq_vw

166 # Save old max of per-row heap

167 if len(dq_heap_row) > 0:

168 d_oldmax = dq_heap_row.heap[0]

169 else:

170 d_oldmax = None

171 # Add/update heaps

172 d = (row, col)

173 d_negdq = -dq_vw

174 # Save old value for finding heap index

175 if w in v_nbrs:

176 # Update existing element in per-row heap

177 dq_heap_row.update(d, d, priority=d_negdq)

178 else:

179 # We're creating a new nonzero element, add to heap

180 dq_heap_row.push(d, priority=d_negdq)

181 # Update heap of row maxes if necessary

182 if d_oldmax is None:

183 # No entries previously in this row, push new max

184 H.push(d, priority=d_negdq)

185 else:

186 # We've updated an entry in this row, has the max changed?

187 row_max = dq_heap_row.heap[0]

188 if d_oldmax != row_max or d_oldmax.priority != row_max.priority:

189 H.update(d_oldmax, row_max)

190

191 # Remove row/col u from dq_dict matrix

192 for w in dq_dict[u]:

193 # Remove from dict

194 dq_old = dq_dict[w][u]

195 del dq_dict[w][u]

196 # Remove from heaps if we haven't already

197 if w != v:

198 # Remove both row and column

199 for row, col in [(w, u), (u, w)]:

200 dq_heap_row = dq_heap[row]

201 # Check if replaced dq is row max

202 d_old = (row, col)

203 if dq_heap_row.heap[0] == d_old:

204 # Update per-row heap and heap of row maxes

205 dq_heap_row.remove(d_old)

206 H.remove(d_old)

207 # Update row max

208 if len(dq_heap_row) > 0:

209 H.push(dq_heap_row.heap[0])

210 else:

211 # Only update per-row heap

212 dq_heap_row.remove(d_old)

213

214 del dq_dict[u]

215 # Mark row u as deleted, but keep placeholder

216 dq_heap[u] = MappedQueue()

217 # Merge u into v and update a

218 a[v] += a[u]

219 a[u] = 0

220 if directed:

221 b[v] += b[u]

222 b[u] = 0

223

224 yield communities.values()

225

226

227@nx._dispatchable(edge_attrs="weight")

228def greedy_modularity_communities(

229 G,

230 weight=None,

231 resolution=1,

232 cutoff=1,

233 best_n=None,

234):

235 r"""Find communities in G using greedy modularity maximization.

236

237 This function uses Clauset-Newman-Moore greedy modularity maximization [2]_

238 to find the community partition with the largest modularity.

239

240 Greedy modularity maximization begins with each node in its own community

241 and repeatedly joins the pair of communities that lead to the largest

242 modularity until no further increase in modularity is possible (a maximum).

243 Two keyword arguments adjust the stopping condition. `cutoff` is a lower

244 limit on the number of communities so you can stop the process before

245 reaching a maximum (used to save computation time). `best_n` is an upper

246 limit on the number of communities so you can make the process continue

247 until at most n communities remain even if the maximum modularity occurs

248 for more. To obtain exactly n communities, set both `cutoff` and `best_n` to n.

249

250 This function maximizes the generalized modularity, where `resolution`

251 is the resolution parameter, often expressed as $\gamma$.

252 See :func:`~networkx.algorithms.community.quality.modularity`.

253

254 Parameters

255 ----------

256 G : NetworkX graph

257

258 weight : string or None, optional (default=None)

259 The name of an edge attribute that holds the numerical value used

260 as a weight. If None, then each edge has weight 1.

261 The degree is the sum of the edge weights adjacent to the node.

262

263 resolution : float, optional (default=1)

264 If resolution is less than 1, modularity favors larger communities.

265 Greater than 1 favors smaller communities.

266

267 cutoff : int, optional (default=1)

268 A minimum number of communities below which the merging process stops.

269 The process stops at this number of communities even if modularity

270 is not maximized. The goal is to let the user stop the process early.

271 The process stops before the cutoff if it finds a maximum of modularity.

272

273 best_n : int or None, optional (default=None)

274 A maximum number of communities above which the merging process will

275 not stop. This forces community merging to continue after modularity

276 starts to decrease until `best_n` communities remain.

277 If ``None``, don't force it to continue beyond a maximum.

278

279 Raises

280 ------

281 ValueError : If the `cutoff` or `best_n` value is not in the range

282 ``[1, G.number_of_nodes()]``, or if `best_n` < `cutoff`.

283

284 Returns

285 -------

286 communities: list

287 A list of frozensets of nodes, one for each community.

288 Sorted by length with largest communities first.

289

290 Examples

291 --------

292 >>> G = nx.karate_club_graph()

293 >>> c = nx.community.greedy_modularity_communities(G)

294 >>> sorted(c[0])

295 [8, 14, 15, 18, 20, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33]

296

297 See Also

298 --------

299 modularity

300

301 References

302 ----------

303 .. [1] Newman, M. E. J. "Networks: An Introduction", page 224

304 Oxford University Press 2011.

305 .. [2] Clauset, A., Newman, M. E., & Moore, C.

306 "Finding community structure in very large networks."

307 Physical Review E 70(6), 2004.

308 .. [3] Reichardt and Bornholdt "Statistical Mechanics of Community

309 Detection" Phys. Rev. E74, 2006.

310 .. [4] Newman, M. E. J."Analysis of weighted networks"

311 Physical Review E 70(5 Pt 2):056131, 2004.

312 """

313 if not G.size():

314 return [{n} for n in G]

315

316 if (cutoff < 1) or (cutoff > G.number_of_nodes()):

317 raise ValueError(f"cutoff must be between 1 and {len(G)}. Got {cutoff}.")

318 if best_n is not None:

319 if (best_n < 1) or (best_n > G.number_of_nodes()):

320 raise ValueError(f"best_n must be between 1 and {len(G)}. Got {best_n}.")

321 if best_n < cutoff:

322 raise ValueError(f"Must have best_n >= cutoff. Got {best_n} < {cutoff}")

323 if best_n == 1:

324 return [set(G)]

325 else:

326 best_n = G.number_of_nodes()

327

328 # retrieve generator object to construct output

329 community_gen = _greedy_modularity_communities_generator(

330 G, weight=weight, resolution=resolution

331 )

332

333 # construct the first best community

334 communities = next(community_gen)

335

336 # continue merging communities until one of the breaking criteria is satisfied

337 while len(communities) > cutoff:

338 try:

339 dq = next(community_gen)

340 # StopIteration occurs when communities are the connected components

341 except StopIteration:

342 communities = sorted(communities, key=len, reverse=True)

343 # if best_n requires more merging, merge big sets for highest modularity

344 while len(communities) > best_n:

345 comm1, comm2, *rest = communities

346 communities = [comm1 ^ comm2]

347 communities.extend(rest)

348 return communities

349

350 # keep going unless max_mod is reached or best_n says to merge more

351 if dq < 0 and len(communities) <= best_n:

352 break

353 communities = next(community_gen)

354

355 return sorted(communities, key=len, reverse=True)

356

357

358@nx.utils.not_implemented_for("directed")

359@nx.utils.not_implemented_for("multigraph")

360@nx._dispatchable(edge_attrs="weight")

361def naive_greedy_modularity_communities(G, resolution=1, weight=None):

362 r"""Find communities in G using greedy modularity maximization.

363

364 This implementation is O(n^4), much slower than alternatives, but it is

365 provided as an easy-to-understand reference implementation.

366

367 Greedy modularity maximization begins with each node in its own community

368 and joins the pair of communities that most increases modularity until no

369 such pair exists.

370

371 This function maximizes the generalized modularity, where `resolution`

372 is the resolution parameter, often expressed as $\gamma$.

373 See :func:`~networkx.algorithms.community.quality.modularity`.

374

375 Parameters

376 ----------

377 G : NetworkX graph

378 Graph must be simple and undirected.

379

380 resolution : float (default=1)

381 If resolution is less than 1, modularity favors larger communities.

382 Greater than 1 favors smaller communities.

383

384 weight : string or None, optional (default=None)

385 The name of an edge attribute that holds the numerical value used

386 as a weight. If None, then each edge has weight 1.

387 The degree is the sum of the edge weights adjacent to the node.

388

389 Returns

390 -------

391 list

392 A list of sets of nodes, one for each community.

393 Sorted by length with largest communities first.

394

395 Examples

396 --------

397 >>> G = nx.karate_club_graph()

398 >>> c = nx.community.naive_greedy_modularity_communities(G)

399 >>> sorted(c[0])

400 [8, 14, 15, 18, 20, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33]

401

402 See Also

403 --------

404 greedy_modularity_communities

405 modularity

406 """

407 # First create one community for each node

408 communities = [frozenset([u]) for u in G.nodes()]

409 # Track merges

410 merges = []

411 # Greedily merge communities until no improvement is possible

412 old_modularity = None

413 new_modularity = modularity(G, communities, resolution=resolution, weight=weight)

414 while old_modularity is None or new_modularity > old_modularity:

415 # Save modularity for comparison

416 old_modularity = new_modularity

417 # Find best pair to merge

418 trial_communities = list(communities)

419 to_merge = None

420 for i, u in enumerate(communities):

421 for j, v in enumerate(communities):

422 # Skip i==j and empty communities

423 if j <= i or len(u) == 0 or len(v) == 0:

424 continue

425 # Merge communities u and v

426 trial_communities[j] = u | v

427 trial_communities[i] = frozenset([])

428 trial_modularity = modularity(

429 G, trial_communities, resolution=resolution, weight=weight

430 )

431 if trial_modularity >= new_modularity:

432 # Check if strictly better or tie

433 if trial_modularity > new_modularity:

434 # Found new best, save modularity and group indexes

435 new_modularity = trial_modularity

436 to_merge = (i, j, new_modularity - old_modularity)

437 elif to_merge and min(i, j) < min(to_merge[0], to_merge[1]):

438 # Break ties by choosing pair with lowest min id

439 new_modularity = trial_modularity

440 to_merge = (i, j, new_modularity - old_modularity)

441 # Un-merge

442 trial_communities[i] = u

443 trial_communities[j] = v

444 if to_merge is not None:

445 # If the best merge improves modularity, use it

446 merges.append(to_merge)

447 i, j, dq = to_merge

448 u, v = communities[i], communities[j]

449 communities[j] = u | v

450 communities[i] = frozenset([])

451 # Remove empty communities and sort

452 return sorted((c for c in communities if len(c) > 0), key=len, reverse=True)

Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/networkx/algorithms/community/modularity_max.py: 9%

160 statements