Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.9/dist-packages/networkx/algorithms/community/louvain.py: 12%

129 statements  

« prev     ^ index     » next       coverage.py v7.3.2, created at 2023-10-20 07:00 +0000

1"""Function for detecting communities based on Louvain Community Detection 

2Algorithm""" 

3 

4from collections import defaultdict, deque 

5 

6import networkx as nx 

7from networkx.algorithms.community import modularity 

8from networkx.utils import py_random_state 

9 

10__all__ = ["louvain_communities", "louvain_partitions"] 

11 

12 

13@py_random_state("seed") 

14@nx._dispatch(edge_attrs="weight") 

15def louvain_communities( 

16 G, weight="weight", resolution=1, threshold=0.0000001, seed=None 

17): 

18 r"""Find the best partition of a graph using the Louvain Community Detection 

19 Algorithm. 

20 

21 Louvain Community Detection Algorithm is a simple method to extract the community 

22 structure of a network. This is a heuristic method based on modularity optimization. [1]_ 

23 

24 The algorithm works in 2 steps. On the first step it assigns every node to be 

25 in its own community and then for each node it tries to find the maximum positive 

26 modularity gain by moving each node to all of its neighbor communities. If no positive 

27 gain is achieved the node remains in its original community. 

28 

29 The modularity gain obtained by moving an isolated node $i$ into a community $C$ can 

30 easily be calculated by the following formula (combining [1]_ [2]_ and some algebra): 

31 

32 .. math:: 

33 \Delta Q = \frac{k_{i,in}}{2m} - \gamma\frac{ \Sigma_{tot} \cdot k_i}{2m^2} 

34 

35 where $m$ is the size of the graph, $k_{i,in}$ is the sum of the weights of the links 

36 from $i$ to nodes in $C$, $k_i$ is the sum of the weights of the links incident to node $i$, 

37 $\Sigma_{tot}$ is the sum of the weights of the links incident to nodes in $C$ and $\gamma$ 

38 is the resolution parameter. 

39 

40 For the directed case the modularity gain can be computed using this formula according to [3]_ 

41 

42 .. math:: 

43 \Delta Q = \frac{k_{i,in}}{m} 

44 - \gamma\frac{k_i^{out} \cdot\Sigma_{tot}^{in} + k_i^{in} \cdot \Sigma_{tot}^{out}}{m^2} 

45 

46 where $k_i^{out}$, $k_i^{in}$ are the outer and inner weighted degrees of node $i$ and 

47 $\Sigma_{tot}^{in}$, $\Sigma_{tot}^{out}$ are the sum of in-going and out-going links incident 

48 to nodes in $C$. 

49 

50 The first phase continues until no individual move can improve the modularity. 

51 

52 The second phase consists in building a new network whose nodes are now the communities 

53 found in the first phase. To do so, the weights of the links between the new nodes are given by 

54 the sum of the weight of the links between nodes in the corresponding two communities. Once this 

55 phase is complete it is possible to reapply the first phase creating bigger communities with 

56 increased modularity. 

57 

58 The above two phases are executed until no modularity gain is achieved (or is less than 

59 the `threshold`). 

60 

61 Be careful with self-loops in the input graph. These are treated as 

62 previously reduced communities -- as if the process had been started 

63 in the middle of the algorithm. Large self-loop edge weights thus 

64 represent strong communities and in practice may be hard to add 

65 other nodes to. If your input graph edge weights for self-loops 

66 do not represent already reduced communities you may want to remove 

67 the self-loops before inputting that graph. 

68 

69 Parameters 

70 ---------- 

71 G : NetworkX graph 

72 weight : string or None, optional (default="weight") 

73 The name of an edge attribute that holds the numerical value 

74 used as a weight. If None then each edge has weight 1. 

75 resolution : float, optional (default=1) 

76 If resolution is less than 1, the algorithm favors larger communities. 

77 Greater than 1 favors smaller communities 

78 threshold : float, optional (default=0.0000001) 

79 Modularity gain threshold for each level. If the gain of modularity 

80 between 2 levels of the algorithm is less than the given threshold 

81 then the algorithm stops and returns the resulting communities. 

82 seed : integer, random_state, or None (default) 

83 Indicator of random number generation state. 

84 See :ref:`Randomness<randomness>`. 

85 

86 Returns 

87 ------- 

88 list 

89 A list of sets (partition of `G`). Each set represents one community and contains 

90 all the nodes that constitute it. 

91 

92 Examples 

93 -------- 

94 >>> import networkx as nx 

95 >>> G = nx.petersen_graph() 

96 >>> nx.community.louvain_communities(G, seed=123) 

97 [{0, 4, 5, 7, 9}, {1, 2, 3, 6, 8}] 

98 

99 Notes 

100 ----- 

101 The order in which the nodes are considered can affect the final output. In the algorithm 

102 the ordering happens using a random shuffle. 

103 

104 References 

105 ---------- 

106 .. [1] Blondel, V.D. et al. Fast unfolding of communities in 

107 large networks. J. Stat. Mech 10008, 1-12(2008). https://doi.org/10.1088/1742-5468/2008/10/P10008 

108 .. [2] Traag, V.A., Waltman, L. & van Eck, N.J. From Louvain to Leiden: guaranteeing 

109 well-connected communities. Sci Rep 9, 5233 (2019). https://doi.org/10.1038/s41598-019-41695-z 

110 .. [3] Nicolas Dugué, Anthony Perez. Directed Louvain : maximizing modularity in directed networks. 

111 [Research Report] Université d’Orléans. 2015. hal-01231784. https://hal.archives-ouvertes.fr/hal-01231784 

112 

113 See Also 

114 -------- 

115 louvain_partitions 

116 """ 

117 

118 d = louvain_partitions(G, weight, resolution, threshold, seed) 

119 q = deque(d, maxlen=1) 

120 return q.pop() 

121 

122 

123@py_random_state("seed") 

124@nx._dispatch(edge_attrs="weight") 

125def louvain_partitions( 

126 G, weight="weight", resolution=1, threshold=0.0000001, seed=None 

127): 

128 """Yields partitions for each level of the Louvain Community Detection Algorithm 

129 

130 Louvain Community Detection Algorithm is a simple method to extract the community 

131 structure of a network. This is a heuristic method based on modularity optimization. [1]_ 

132 

133 The partitions at each level (step of the algorithm) form a dendrogram of communities. 

134 A dendrogram is a diagram representing a tree and each level represents 

135 a partition of the G graph. The top level contains the smallest communities 

136 and as you traverse to the bottom of the tree the communities get bigger 

137 and the overall modularity increases making the partition better. 

138 

139 Each level is generated by executing the two phases of the Louvain Community 

140 Detection Algorithm. 

141 

142 Be careful with self-loops in the input graph. These are treated as 

143 previously reduced communities -- as if the process had been started 

144 in the middle of the algorithm. Large self-loop edge weights thus 

145 represent strong communities and in practice may be hard to add 

146 other nodes to. If your input graph edge weights for self-loops 

147 do not represent already reduced communities you may want to remove 

148 the self-loops before inputting that graph. 

149 

150 Parameters 

151 ---------- 

152 G : NetworkX graph 

153 weight : string or None, optional (default="weight") 

154 The name of an edge attribute that holds the numerical value 

155 used as a weight. If None then each edge has weight 1. 

156 resolution : float, optional (default=1) 

157 If resolution is less than 1, the algorithm favors larger communities. 

158 Greater than 1 favors smaller communities 

159 threshold : float, optional (default=0.0000001) 

160 Modularity gain threshold for each level. If the gain of modularity 

161 between 2 levels of the algorithm is less than the given threshold 

162 then the algorithm stops and returns the resulting communities. 

163 seed : integer, random_state, or None (default) 

164 Indicator of random number generation state. 

165 See :ref:`Randomness<randomness>`. 

166 

167 Yields 

168 ------ 

169 list 

170 A list of sets (partition of `G`). Each set represents one community and contains 

171 all the nodes that constitute it. 

172 

173 References 

174 ---------- 

175 .. [1] Blondel, V.D. et al. Fast unfolding of communities in 

176 large networks. J. Stat. Mech 10008, 1-12(2008) 

177 

178 See Also 

179 -------- 

180 louvain_communities 

181 """ 

182 

183 partition = [{u} for u in G.nodes()] 

184 if nx.is_empty(G): 

185 yield partition 

186 return 

187 mod = modularity(G, partition, resolution=resolution, weight=weight) 

188 is_directed = G.is_directed() 

189 if G.is_multigraph(): 

190 graph = _convert_multigraph(G, weight, is_directed) 

191 else: 

192 graph = G.__class__() 

193 graph.add_nodes_from(G) 

194 graph.add_weighted_edges_from(G.edges(data=weight, default=1)) 

195 

196 m = graph.size(weight="weight") 

197 partition, inner_partition, improvement = _one_level( 

198 graph, m, partition, resolution, is_directed, seed 

199 ) 

200 improvement = True 

201 while improvement: 

202 # gh-5901 protect the sets in the yielded list from further manipulation here 

203 yield [s.copy() for s in partition] 

204 new_mod = modularity( 

205 graph, inner_partition, resolution=resolution, weight="weight" 

206 ) 

207 if new_mod - mod <= threshold: 

208 return 

209 mod = new_mod 

210 graph = _gen_graph(graph, inner_partition) 

211 partition, inner_partition, improvement = _one_level( 

212 graph, m, partition, resolution, is_directed, seed 

213 ) 

214 

215 

216def _one_level(G, m, partition, resolution=1, is_directed=False, seed=None): 

217 """Calculate one level of the Louvain partitions tree 

218 

219 Parameters 

220 ---------- 

221 G : NetworkX Graph/DiGraph 

222 The graph from which to detect communities 

223 m : number 

224 The size of the graph `G`. 

225 partition : list of sets of nodes 

226 A valid partition of the graph `G` 

227 resolution : positive number 

228 The resolution parameter for computing the modularity of a partition 

229 is_directed : bool 

230 True if `G` is a directed graph. 

231 seed : integer, random_state, or None (default) 

232 Indicator of random number generation state. 

233 See :ref:`Randomness<randomness>`. 

234 

235 """ 

236 node2com = {u: i for i, u in enumerate(G.nodes())} 

237 inner_partition = [{u} for u in G.nodes()] 

238 if is_directed: 

239 in_degrees = dict(G.in_degree(weight="weight")) 

240 out_degrees = dict(G.out_degree(weight="weight")) 

241 Stot_in = list(in_degrees.values()) 

242 Stot_out = list(out_degrees.values()) 

243 # Calculate weights for both in and out neighbours without considering self-loops 

244 nbrs = {} 

245 for u in G: 

246 nbrs[u] = defaultdict(float) 

247 for _, n, wt in G.out_edges(u, data="weight"): 

248 if u != n: 

249 nbrs[u][n] += wt 

250 for n, _, wt in G.in_edges(u, data="weight"): 

251 if u != n: 

252 nbrs[u][n] += wt 

253 else: 

254 degrees = dict(G.degree(weight="weight")) 

255 Stot = list(degrees.values()) 

256 nbrs = {u: {v: data["weight"] for v, data in G[u].items() if v != u} for u in G} 

257 rand_nodes = list(G.nodes) 

258 seed.shuffle(rand_nodes) 

259 nb_moves = 1 

260 improvement = False 

261 while nb_moves > 0: 

262 nb_moves = 0 

263 for u in rand_nodes: 

264 best_mod = 0 

265 best_com = node2com[u] 

266 weights2com = _neighbor_weights(nbrs[u], node2com) 

267 if is_directed: 

268 in_degree = in_degrees[u] 

269 out_degree = out_degrees[u] 

270 Stot_in[best_com] -= in_degree 

271 Stot_out[best_com] -= out_degree 

272 remove_cost = ( 

273 -weights2com[best_com] / m 

274 + resolution 

275 * (out_degree * Stot_in[best_com] + in_degree * Stot_out[best_com]) 

276 / m**2 

277 ) 

278 else: 

279 degree = degrees[u] 

280 Stot[best_com] -= degree 

281 remove_cost = -weights2com[best_com] / m + resolution * ( 

282 Stot[best_com] * degree 

283 ) / (2 * m**2) 

284 for nbr_com, wt in weights2com.items(): 

285 if is_directed: 

286 gain = ( 

287 remove_cost 

288 + wt / m 

289 - resolution 

290 * ( 

291 out_degree * Stot_in[nbr_com] 

292 + in_degree * Stot_out[nbr_com] 

293 ) 

294 / m**2 

295 ) 

296 else: 

297 gain = ( 

298 remove_cost 

299 + wt / m 

300 - resolution * (Stot[nbr_com] * degree) / (2 * m**2) 

301 ) 

302 if gain > best_mod: 

303 best_mod = gain 

304 best_com = nbr_com 

305 if is_directed: 

306 Stot_in[best_com] += in_degree 

307 Stot_out[best_com] += out_degree 

308 else: 

309 Stot[best_com] += degree 

310 if best_com != node2com[u]: 

311 com = G.nodes[u].get("nodes", {u}) 

312 partition[node2com[u]].difference_update(com) 

313 inner_partition[node2com[u]].remove(u) 

314 partition[best_com].update(com) 

315 inner_partition[best_com].add(u) 

316 improvement = True 

317 nb_moves += 1 

318 node2com[u] = best_com 

319 partition = list(filter(len, partition)) 

320 inner_partition = list(filter(len, inner_partition)) 

321 return partition, inner_partition, improvement 

322 

323 

324def _neighbor_weights(nbrs, node2com): 

325 """Calculate weights between node and its neighbor communities. 

326 

327 Parameters 

328 ---------- 

329 nbrs : dictionary 

330 Dictionary with nodes' neighbours as keys and their edge weight as value. 

331 node2com : dictionary 

332 Dictionary with all graph's nodes as keys and their community index as value. 

333 

334 """ 

335 weights = defaultdict(float) 

336 for nbr, wt in nbrs.items(): 

337 weights[node2com[nbr]] += wt 

338 return weights 

339 

340 

341def _gen_graph(G, partition): 

342 """Generate a new graph based on the partitions of a given graph""" 

343 H = G.__class__() 

344 node2com = {} 

345 for i, part in enumerate(partition): 

346 nodes = set() 

347 for node in part: 

348 node2com[node] = i 

349 nodes.update(G.nodes[node].get("nodes", {node})) 

350 H.add_node(i, nodes=nodes) 

351 

352 for node1, node2, wt in G.edges(data=True): 

353 wt = wt["weight"] 

354 com1 = node2com[node1] 

355 com2 = node2com[node2] 

356 temp = H.get_edge_data(com1, com2, {"weight": 0})["weight"] 

357 H.add_edge(com1, com2, weight=wt + temp) 

358 return H 

359 

360 

361def _convert_multigraph(G, weight, is_directed): 

362 """Convert a Multigraph to normal Graph""" 

363 if is_directed: 

364 H = nx.DiGraph() 

365 else: 

366 H = nx.Graph() 

367 H.add_nodes_from(G) 

368 for u, v, wt in G.edges(data=weight, default=1): 

369 if H.has_edge(u, v): 

370 H[u][v]["weight"] += wt 

371 else: 

372 H.add_edge(u, v, weight=wt) 

373 return H