Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.9/dist-packages/networkx/algorithms/community/modularity

1"""Functions for detecting communities based on modularity."""

3from collections import defaultdict

5import networkx as nx

6from networkx.algorithms.community.quality import modularity

7from networkx.utils import not_implemented_for

8from networkx.utils.mapped_queue import MappedQueue

10__all__ = [

11 "greedy_modularity_communities",

12 "naive_greedy_modularity_communities",

13]

16def _greedy_modularity_communities_generator(G, weight=None, resolution=1):

17 r"""Yield community partitions of G and the modularity change at each step.

19 This function performs Clauset-Newman-Moore greedy modularity maximization [2]_

20 At each step of the process it yields the change in modularity that will occur in

21 the next step followed by yielding the new community partition after that step.

23 Greedy modularity maximization begins with each node in its own community

24 and repeatedly joins the pair of communities that lead to the largest

25 modularity until one community contains all nodes (the partition has one set).

27 This function maximizes the generalized modularity, where `resolution`

28 is the resolution parameter, often expressed as $\gamma$.

29 See :func:`~networkx.algorithms.community.quality.modularity`.

31 Parameters

32 ----------

33 G : NetworkX graph

35 weight : string or None, optional (default=None)

36 The name of an edge attribute that holds the numerical value used

37 as a weight. If None, then each edge has weight 1.

38 The degree is the sum of the edge weights adjacent to the node.

40 resolution : float (default=1)

41 If resolution is less than 1, modularity favors larger communities.

42 Greater than 1 favors smaller communities.

44 Yields

45 ------

46 Alternating yield statements produce the following two objects:

48 communities: dict_values

49 A dict_values of frozensets of nodes, one for each community.

50 This represents a partition of the nodes of the graph into communities.

51 The first yield is the partition with each node in its own community.

53 dq: float

54 The change in modularity when merging the next two communities

55 that leads to the largest modularity.

57 See Also

58 --------

59 modularity

61 References

62 ----------

63 .. [1] Newman, M. E. J. "Networks: An Introduction", page 224

64 Oxford University Press 2011.

65 .. [2] Clauset, A., Newman, M. E., & Moore, C.

66 "Finding community structure in very large networks."

67 Physical Review E 70(6), 2004.

68 .. [3] Reichardt and Bornholdt "Statistical Mechanics of Community

69 Detection" Phys. Rev. E74, 2006.

70 .. [4] Newman, M. E. J."Analysis of weighted networks"

71 Physical Review E 70(5 Pt 2):056131, 2004.

72 """

73 directed = G.is_directed()

74 N = G.number_of_nodes()

76 # Count edges (or the sum of edge-weights for weighted graphs)

77 m = G.size(weight)

78 q0 = 1 / m

80 # Calculate degrees (notation from the papers)

81 # a : the fraction of (weighted) out-degree for each node

82 # b : the fraction of (weighted) in-degree for each node

83 if directed:

84 a = {node: deg_out * q0 for node, deg_out in G.out_degree(weight=weight)}

85 b = {node: deg_in * q0 for node, deg_in in G.in_degree(weight=weight)}

86 else:

87 a = b = {node: deg * q0 * 0.5 for node, deg in G.degree(weight=weight)}

89 # this preliminary step collects the edge weights for each node pair

90 # It handles multigraph and digraph and works fine for graph.

91 dq_dict = defaultdict(lambda: defaultdict(float))

92 for u, v, wt in G.edges(data=weight, default=1):

93 if u == v:

94 continue

95 dq_dict[u][v] += wt

96 dq_dict[v][u] += wt

98 # now scale and subtract the expected edge-weights term

99 for u, nbrdict in dq_dict.items():

100 for v, wt in nbrdict.items():

101 dq_dict[u][v] = q0 * wt - resolution * (a[u] * b[v] + b[u] * a[v])

102

103 # Use -dq to get a max_heap instead of a min_heap

104 # dq_heap holds a heap for each node's neighbors

105 dq_heap = {u: MappedQueue({(u, v): -dq for v, dq in dq_dict[u].items()}) for u in G}

106 # H -> all_dq_heap holds a heap with the best items for each node

107 H = MappedQueue([dq_heap[n].heap[0] for n in G if len(dq_heap[n]) > 0])

108

109 # Initialize single-node communities

110 communities = {n: frozenset([n]) for n in G}

111 yield communities.values()

112

113 # Merge the two communities that lead to the largest modularity

114 while len(H) > 1:

115 # Find best merge

116 # Remove from heap of row maxes

117 # Ties will be broken by choosing the pair with lowest min community id

118 try:

119 negdq, u, v = H.pop()

120 except IndexError:

121 break

122 dq = -negdq

123 yield dq

124 # Remove best merge from row u heap

125 dq_heap[u].pop()

126 # Push new row max onto H

127 if len(dq_heap[u]) > 0:

128 H.push(dq_heap[u].heap[0])

129 # If this element was also at the root of row v, we need to remove the

130 # duplicate entry from H

131 if dq_heap[v].heap[0] == (v, u):

132 H.remove((v, u))

133 # Remove best merge from row v heap

134 dq_heap[v].remove((v, u))

135 # Push new row max onto H

136 if len(dq_heap[v]) > 0:

137 H.push(dq_heap[v].heap[0])

138 else:

139 # Duplicate wasn't in H, just remove from row v heap

140 dq_heap[v].remove((v, u))

141

142 # Perform merge

143 communities[v] = frozenset(communities[u] | communities[v])

144 del communities[u]

145

146 # Get neighbor communities connected to the merged communities

147 u_nbrs = set(dq_dict[u])

148 v_nbrs = set(dq_dict[v])

149 all_nbrs = (u_nbrs | v_nbrs) - {u, v}

150 both_nbrs = u_nbrs & v_nbrs

151 # Update dq for merge of u into v

152 for w in all_nbrs:

153 # Calculate new dq value

154 if w in both_nbrs:

155 dq_vw = dq_dict[v][w] + dq_dict[u][w]

156 elif w in v_nbrs:

157 dq_vw = dq_dict[v][w] - resolution * (a[u] * b[w] + a[w] * b[u])

158 else: # w in u_nbrs

159 dq_vw = dq_dict[u][w] - resolution * (a[v] * b[w] + a[w] * b[v])

160 # Update rows v and w

161 for row, col in [(v, w), (w, v)]:

162 dq_heap_row = dq_heap[row]

163 # Update dict for v,w only (u is removed below)

164 dq_dict[row][col] = dq_vw

165 # Save old max of per-row heap

166 if len(dq_heap_row) > 0:

167 d_oldmax = dq_heap_row.heap[0]

168 else:

169 d_oldmax = None

170 # Add/update heaps

171 d = (row, col)

172 d_negdq = -dq_vw

173 # Save old value for finding heap index

174 if w in v_nbrs:

175 # Update existing element in per-row heap

176 dq_heap_row.update(d, d, priority=d_negdq)

177 else:

178 # We're creating a new nonzero element, add to heap

179 dq_heap_row.push(d, priority=d_negdq)

180 # Update heap of row maxes if necessary

181 if d_oldmax is None:

182 # No entries previously in this row, push new max

183 H.push(d, priority=d_negdq)

184 else:

185 # We've updated an entry in this row, has the max changed?

186 row_max = dq_heap_row.heap[0]

187 if d_oldmax != row_max or d_oldmax.priority != row_max.priority:

188 H.update(d_oldmax, row_max)

189

190 # Remove row/col u from dq_dict matrix

191 for w in dq_dict[u]:

192 # Remove from dict

193 dq_old = dq_dict[w][u]

194 del dq_dict[w][u]

195 # Remove from heaps if we haven't already

196 if w != v:

197 # Remove both row and column

198 for row, col in [(w, u), (u, w)]:

199 dq_heap_row = dq_heap[row]

200 # Check if replaced dq is row max

201 d_old = (row, col)

202 if dq_heap_row.heap[0] == d_old:

203 # Update per-row heap and heap of row maxes

204 dq_heap_row.remove(d_old)

205 H.remove(d_old)

206 # Update row max

207 if len(dq_heap_row) > 0:

208 H.push(dq_heap_row.heap[0])

209 else:

210 # Only update per-row heap

211 dq_heap_row.remove(d_old)

212

213 del dq_dict[u]

214 # Mark row u as deleted, but keep placeholder

215 dq_heap[u] = MappedQueue()

216 # Merge u into v and update a

217 a[v] += a[u]

218 a[u] = 0

219 if directed:

220 b[v] += b[u]

221 b[u] = 0

222

223 yield communities.values()

224

225

226@nx._dispatch(edge_attrs="weight")

227def greedy_modularity_communities(

228 G,

229 weight=None,

230 resolution=1,

231 cutoff=1,

232 best_n=None,

233):

234 r"""Find communities in G using greedy modularity maximization.

235

236 This function uses Clauset-Newman-Moore greedy modularity maximization [2]_

237 to find the community partition with the largest modularity.

238

239 Greedy modularity maximization begins with each node in its own community

240 and repeatedly joins the pair of communities that lead to the largest

241 modularity until no further increase in modularity is possible (a maximum).

242 Two keyword arguments adjust the stopping condition. `cutoff` is a lower

243 limit on the number of communities so you can stop the process before

244 reaching a maximum (used to save computation time). `best_n` is an upper

245 limit on the number of communities so you can make the process continue

246 until at most n communities remain even if the maximum modularity occurs

247 for more. To obtain exactly n communities, set both `cutoff` and `best_n` to n.

248

249 This function maximizes the generalized modularity, where `resolution`

250 is the resolution parameter, often expressed as $\gamma$.

251 See :func:`~networkx.algorithms.community.quality.modularity`.

252

253 Parameters

254 ----------

255 G : NetworkX graph

256

257 weight : string or None, optional (default=None)

258 The name of an edge attribute that holds the numerical value used

259 as a weight. If None, then each edge has weight 1.

260 The degree is the sum of the edge weights adjacent to the node.

261

262 resolution : float, optional (default=1)

263 If resolution is less than 1, modularity favors larger communities.

264 Greater than 1 favors smaller communities.

265

266 cutoff : int, optional (default=1)

267 A minimum number of communities below which the merging process stops.

268 The process stops at this number of communities even if modularity

269 is not maximized. The goal is to let the user stop the process early.

270 The process stops before the cutoff if it finds a maximum of modularity.

271

272 best_n : int or None, optional (default=None)

273 A maximum number of communities above which the merging process will

274 not stop. This forces community merging to continue after modularity

275 starts to decrease until `best_n` communities remain.

276 If ``None``, don't force it to continue beyond a maximum.

277

278 Raises

279 ------

280 ValueError : If the `cutoff` or `best_n` value is not in the range

281 ``[1, G.number_of_nodes()]``, or if `best_n` < `cutoff`.

282

283 Returns

284 -------

285 communities: list

286 A list of frozensets of nodes, one for each community.

287 Sorted by length with largest communities first.

288

289 Examples

290 --------

291 >>> G = nx.karate_club_graph()

292 >>> c = nx.community.greedy_modularity_communities(G)

293 >>> sorted(c[0])

294 [8, 14, 15, 18, 20, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33]

295

296 See Also

297 --------

298 modularity

299

300 References

301 ----------

302 .. [1] Newman, M. E. J. "Networks: An Introduction", page 224

303 Oxford University Press 2011.

304 .. [2] Clauset, A., Newman, M. E., & Moore, C.

305 "Finding community structure in very large networks."

306 Physical Review E 70(6), 2004.

307 .. [3] Reichardt and Bornholdt "Statistical Mechanics of Community

308 Detection" Phys. Rev. E74, 2006.

309 .. [4] Newman, M. E. J."Analysis of weighted networks"

310 Physical Review E 70(5 Pt 2):056131, 2004.

311 """

312 if (cutoff < 1) or (cutoff > G.number_of_nodes()):

313 raise ValueError(f"cutoff must be between 1 and {len(G)}. Got {cutoff}.")

314 if best_n is not None:

315 if (best_n < 1) or (best_n > G.number_of_nodes()):

316 raise ValueError(f"best_n must be between 1 and {len(G)}. Got {best_n}.")

317 if best_n < cutoff:

318 raise ValueError(f"Must have best_n >= cutoff. Got {best_n} < {cutoff}")

319 if best_n == 1:

320 return [set(G)]

321 else:

322 best_n = G.number_of_nodes()

323

324 # retrieve generator object to construct output

325 community_gen = _greedy_modularity_communities_generator(

326 G, weight=weight, resolution=resolution

327 )

328

329 # construct the first best community

330 communities = next(community_gen)

331

332 # continue merging communities until one of the breaking criteria is satisfied

333 while len(communities) > cutoff:

334 try:

335 dq = next(community_gen)

336 # StopIteration occurs when communities are the connected components

337 except StopIteration:

338 communities = sorted(communities, key=len, reverse=True)

339 # if best_n requires more merging, merge big sets for highest modularity

340 while len(communities) > best_n:

341 comm1, comm2, *rest = communities

342 communities = [comm1 ^ comm2]

343 communities.extend(rest)

344 return communities

345

346 # keep going unless max_mod is reached or best_n says to merge more

347 if dq < 0 and len(communities) <= best_n:

348 break

349 communities = next(community_gen)

350

351 return sorted(communities, key=len, reverse=True)

352

353

354@not_implemented_for("directed")

355@not_implemented_for("multigraph")

356@nx._dispatch(edge_attrs="weight")

357def naive_greedy_modularity_communities(G, resolution=1, weight=None):

358 r"""Find communities in G using greedy modularity maximization.

359

360 This implementation is O(n^4), much slower than alternatives, but it is

361 provided as an easy-to-understand reference implementation.

362

363 Greedy modularity maximization begins with each node in its own community

364 and joins the pair of communities that most increases modularity until no

365 such pair exists.

366

367 This function maximizes the generalized modularity, where `resolution`

368 is the resolution parameter, often expressed as $\gamma$.

369 See :func:`~networkx.algorithms.community.quality.modularity`.

370

371 Parameters

372 ----------

373 G : NetworkX graph

374 Graph must be simple and undirected.

375

376 resolution : float (default=1)

377 If resolution is less than 1, modularity favors larger communities.

378 Greater than 1 favors smaller communities.

379

380 weight : string or None, optional (default=None)

381 The name of an edge attribute that holds the numerical value used

382 as a weight. If None, then each edge has weight 1.

383 The degree is the sum of the edge weights adjacent to the node.

384

385 Returns

386 -------

387 list

388 A list of sets of nodes, one for each community.

389 Sorted by length with largest communities first.

390

391 Examples

392 --------

393 >>> G = nx.karate_club_graph()

394 >>> c = nx.community.naive_greedy_modularity_communities(G)

395 >>> sorted(c[0])

396 [8, 14, 15, 18, 20, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33]

397

398 See Also

399 --------

400 greedy_modularity_communities

401 modularity

402 """

403 # First create one community for each node

404 communities = [frozenset([u]) for u in G.nodes()]

405 # Track merges

406 merges = []

407 # Greedily merge communities until no improvement is possible

408 old_modularity = None

409 new_modularity = modularity(G, communities, resolution=resolution, weight=weight)

410 while old_modularity is None or new_modularity > old_modularity:

411 # Save modularity for comparison

412 old_modularity = new_modularity

413 # Find best pair to merge

414 trial_communities = list(communities)

415 to_merge = None

416 for i, u in enumerate(communities):

417 for j, v in enumerate(communities):

418 # Skip i==j and empty communities

419 if j <= i or len(u) == 0 or len(v) == 0:

420 continue

421 # Merge communities u and v

422 trial_communities[j] = u | v

423 trial_communities[i] = frozenset([])

424 trial_modularity = modularity(

425 G, trial_communities, resolution=resolution, weight=weight

426 )

427 if trial_modularity >= new_modularity:

428 # Check if strictly better or tie

429 if trial_modularity > new_modularity:

430 # Found new best, save modularity and group indexes

431 new_modularity = trial_modularity

432 to_merge = (i, j, new_modularity - old_modularity)

433 elif to_merge and min(i, j) < min(to_merge[0], to_merge[1]):

434 # Break ties by choosing pair with lowest min id

435 new_modularity = trial_modularity

436 to_merge = (i, j, new_modularity - old_modularity)

437 # Un-merge

438 trial_communities[i] = u

439 trial_communities[j] = v

440 if to_merge is not None:

441 # If the best merge improves modularity, use it

442 merges.append(to_merge)

443 i, j, dq = to_merge

444 u, v = communities[i], communities[j]

445 communities[j] = u | v

446 communities[i] = frozenset([])

447 # Remove empty communities and sort

448 return sorted((c for c in communities if len(c) > 0), key=len, reverse=True)

Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.9/dist-packages/networkx/algorithms/community/modularity_max.py: 8%

156 statements