Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/networkx/algorithms/community/bipartitions.py: 12%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

135 statements  

1"""Functions for splitting a network into two communities (finding a bipartition).""" 

2 

3import random 

4from copy import deepcopy 

5from itertools import count 

6 

7import networkx as nx 

8 

9__all__ = [ 

10 "kernighan_lin_bisection", 

11 "spectral_modularity_bipartition", 

12 "greedy_node_swap_bipartition", 

13] 

14 

15 

16def _kernighan_lin_sweep(edge_info, side): 

17 """ 

18 This is a modified form of Kernighan-Lin, which moves single nodes at a 

19 time, alternating between sides to keep the bisection balanced. We keep 

20 two min-heaps of swap costs to make optimal-next-move selection fast. 

21 """ 

22 heap0, heap1 = cost_heaps = nx.utils.BinaryHeap(), nx.utils.BinaryHeap() 

23 # we use heap methods insert, pop, and get 

24 for u, nbrs in edge_info.items(): 

25 cost_u = sum(wt if side[v] else -wt for v, wt in nbrs.items()) 

26 if side[u]: 

27 heap1.insert(u, cost_u) 

28 else: 

29 heap0.insert(u, -cost_u) 

30 

31 def _update_heap_values(node): 

32 side_node = side[node] 

33 for nbr, wt in edge_info[node].items(): 

34 side_nbr = side[nbr] 

35 if side_nbr == side_node: 

36 wt = -wt 

37 heap_nbr = cost_heaps[side_nbr] 

38 if nbr in heap_nbr: 

39 cost_nbr = heap_nbr.get(nbr) + 2 * wt 

40 # allow_increase lets us update a value already on the heap 

41 heap_nbr.insert(nbr, cost_nbr, allow_increase=True) 

42 

43 i = 0 

44 totcost = 0 

45 while heap0 and heap1: 

46 u, cost_u = heap0.pop() 

47 _update_heap_values(u) 

48 v, cost_v = heap1.pop() 

49 _update_heap_values(v) 

50 totcost += cost_u + cost_v 

51 i += 1 

52 yield totcost, i, (u, v) 

53 

54 

55@nx.utils.not_implemented_for("directed") 

56@nx.utils.py_random_state(4) 

57@nx._dispatchable(edge_attrs="weight") 

58def kernighan_lin_bisection(G, partition=None, max_iter=10, weight="weight", seed=None): 

59 """Partition a graph into two blocks using the Kernighan–Lin algorithm. 

60 

61 This algorithm partitions a network into two sets by iteratively 

62 swapping pairs of nodes to reduce the edge cut between the two sets. The 

63 pairs are chosen according to a modified form of Kernighan-Lin [1]_, which 

64 moves nodes individually, alternating between sides to keep the bisection 

65 balanced. 

66 

67 Kernighan-Lin is an approximate algorithm for maximal modularity bisection. 

68 In [2]_ they suggest that fine-tuned improvements can be made using 

69 greedy node swapping, (see `greedy_node_swap_bipartition`). 

70 The improvements are typically only a few percent of the modularity value. 

71 But they claim that can make a difference between a good and excellent method. 

72 This function does not perform any improvements. But you can do that yourself. 

73 

74 Parameters 

75 ---------- 

76 G : NetworkX graph 

77 Graph must be undirected. 

78 

79 partition : tuple 

80 Pair of iterables containing an initial partition. If not 

81 specified, a random balanced partition is used. 

82 

83 max_iter : int 

84 Maximum number of times to attempt swaps to find an 

85 improvement before giving up. 

86 

87 weight : string or function (default: "weight") 

88 If this is a string, then edge weights will be accessed via the 

89 edge attribute with this key (that is, the weight of the edge 

90 joining `u` to `v` will be ``G.edges[u, v][weight]``). If no 

91 such edge attribute exists, the weight of the edge is assumed to 

92 be one. 

93 

94 If this is a function, the weight of an edge is the value 

95 returned by the function. The function must accept exactly three 

96 positional arguments: the two endpoints of an edge and the 

97 dictionary of edge attributes for that edge. The function must 

98 return a number or None to indicate a hidden edge. 

99 

100 seed : integer, random_state, or None (default) 

101 Indicator of random number generation state. 

102 See :ref:`Randomness<randomness>`. 

103 Only used if partition is None 

104 

105 Returns 

106 ------- 

107 partition : tuple 

108 A pair of sets of nodes representing the bipartition. 

109 

110 Raises 

111 ------ 

112 NetworkXError 

113 If `partition` is not a valid partition of the nodes of the graph. 

114 

115 References 

116 ---------- 

117 .. [1] Kernighan, B. W.; Lin, Shen (1970). 

118 "An efficient heuristic procedure for partitioning graphs." 

119 *Bell Systems Technical Journal* 49: 291--307. 

120 Oxford University Press 2011. 

121 .. [2] M. E. J. Newman, 

122 "Modularity and community structure in networks", 

123 PNAS, 103 (23), p. 8577-8582, 

124 https://doi.org/10.1073/pnas.0601602103 

125 

126 """ 

127 nodes = list(G) 

128 

129 if partition is None: 

130 seed.shuffle(nodes) 

131 mid = len(nodes) // 2 

132 A, B = nodes[:mid], nodes[mid:] 

133 else: 

134 try: 

135 A, B = partition 

136 except (TypeError, ValueError) as err: 

137 raise nx.NetworkXError("partition must be two sets") from err 

138 if not nx.community.is_partition(G, [A, B]): 

139 raise nx.NetworkXError("partition invalid") 

140 

141 side = {node: (node in A) for node in nodes} 

142 

143 # ruff: noqa: E731 skips check for no lambda functions 

144 # Using shortest_paths _weight_function with sum instead of min on multiedges 

145 if callable(weight): 

146 sum_weight = weight 

147 elif G.is_multigraph(): 

148 sum_weight = lambda u, v, d: sum(dd.get(weight, 1) for dd in d.values()) 

149 else: 

150 sum_weight = lambda u, v, d: d.get(weight, 1) 

151 

152 edge_info = { 

153 u: {v: wt for v, d in nbrs.items() if (wt := sum_weight(u, v, d)) is not None} 

154 for u, nbrs in G._adj.items() 

155 } 

156 

157 for i in range(max_iter): 

158 costs = list(_kernighan_lin_sweep(edge_info, side)) 

159 # find out how many edges to update: min_i 

160 min_cost, min_i, _ = min(costs) 

161 if min_cost >= 0: 

162 break 

163 

164 for _, _, (u, v) in costs[:min_i]: 

165 side[u] = 1 

166 side[v] = 0 

167 

168 part1 = {u for u, s in side.items() if s == 0} 

169 part2 = {u for u, s in side.items() if s == 1} 

170 return part1, part2 

171 

172 

173@nx.utils.not_implemented_for("directed") 

174@nx.utils.not_implemented_for("multigraph") 

175def spectral_modularity_bipartition(G): 

176 r"""Return a bipartition of the nodes based on the spectrum of the 

177 modularity matrix of the graph. 

178 

179 This method calculates the eigenvector associated with the second 

180 largest eigenvalue of the modularity matrix, where the modularity 

181 matrix *B* is defined by 

182 

183 ..math:: 

184 

185 B_{i j} = A_{i j} - \frac{k_i k_j}{2 m}, 

186 

187 where *A* is the adjacency matrix, `k_i` is the degree of node *i*, 

188 and *m* is the number of edges in the graph. Nodes whose 

189 corresponding values in the eigenvector are negative are placed in 

190 one block, nodes whose values are nonnegative are placed in another 

191 block. 

192 

193 Parameters 

194 ---------- 

195 G : NetworkX graph 

196 

197 Returns 

198 ------- 

199 C : tuple 

200 Pair of communities as two sets of nodes of ``G``, partitioned 

201 according to second largest eigenvalue of the modularity matrix. 

202 

203 Examples 

204 -------- 

205 >>> G = nx.karate_club_graph() 

206 >>> MrHi, Officer = nx.community.spectral_modularity_bipartition(G) 

207 >>> MrHi, Officer = sorted([sorted(MrHi), sorted(Officer)]) 

208 >>> MrHi 

209 [0, 1, 2, 3, 4, 5, 6, 7, 10, 11, 12, 13, 16, 17, 19, 21] 

210 >>> Officer 

211 [8, 9, 14, 15, 18, 20, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33] 

212 

213 References 

214 ---------- 

215 .. [1] M. E. J. Newman *Networks: An Introduction*, pages 373--378 

216 Oxford University Press 2011. 

217 .. [2] M. E. J. Newman, 

218 "Modularity and community structure in networks", 

219 PNAS, 103 (23), p. 8577-8582, 

220 https://doi.org/10.1073/pnas.0601602103 

221 

222 """ 

223 import numpy as np 

224 

225 B = nx.linalg.modularity_matrix(G) 

226 eigenvalues, eigenvectors = np.linalg.eig(B) 

227 index = np.argsort(eigenvalues)[-1] 

228 v2 = zip(np.real(eigenvectors[:, index]), G) 

229 left, right = set(), set() 

230 for u, n in v2: 

231 if u < 0: 

232 left.add(n) 

233 else: 

234 right.add(n) 

235 return left, right 

236 

237 

238@nx.utils.not_implemented_for("multigraph") 

239def greedy_node_swap_bipartition(G, *, init_split=None, max_iter=10): 

240 """Split the nodes into two communities based on greedy 

241 modularity maximization. 

242 

243 The algorithm works by selecting a node to change communities which 

244 will maximize the modularity. The swap is made and the community 

245 structure with the highest modularity is kept. 

246 

247 Parameters 

248 ---------- 

249 G : NetworkX graph 

250 

251 init_split : 2-tuple of sets of nodes 

252 Pair of sets of nodes in ``G`` providing an initial bipartition 

253 for the algorithm. If not specified, a random balanced partition 

254 is used. If this pair of sets is not a partition of the nodes of `G`, 

255 :exc:`NetworkXException` is raised. 

256 

257 max_iter : int 

258 Maximum number of iterations of attempting swaps to find an improvement. 

259 

260 Returns 

261 ------- 

262 max_split : 2-tuple of sets of nodes 

263 Pair of sets of nodes of ``G``, partitioned according to a 

264 node swap greedy modularity maximization algorithm. 

265 

266 Raises 

267 ------ 

268 NetworkXError 

269 if init_split is not a valid partition of the 

270 graph into two communities or if G is a MultiGraph 

271 

272 Examples 

273 -------- 

274 >>> G = nx.barbell_graph(3, 0) 

275 >>> left, right = nx.community.greedy_node_swap_bipartition(G) 

276 >>> # Sort the communities so the nodes appear in increasing order. 

277 >>> left, right = sorted([sorted(left), sorted(right)]) 

278 >>> sorted(left) 

279 [0, 1, 2] 

280 >>> sorted(right) 

281 [3, 4, 5] 

282 

283 Notes 

284 ----- 

285 This function is not implemented for multigraphs. 

286 

287 References 

288 ---------- 

289 .. [1] M. E. J. Newman "Networks: An Introduction", pages 373--375. 

290 Oxford University Press 2011. 

291 

292 """ 

293 if init_split is None: 

294 m1 = len(G) // 2 

295 m2 = len(G) - m1 

296 some_nodes = set(random.sample(list(G), m1)) 

297 other_nodes = {n for n in G if n not in some_nodes} 

298 best_split_so_far = (some_nodes, other_nodes) 

299 else: 

300 if not nx.community.is_partition(G, init_split): 

301 raise nx.NetworkXError("init_split is not a partition of G") 

302 if not len(init_split) == 2: 

303 raise nx.NetworkXError("init_split must be a bipartition") 

304 best_split_so_far = deepcopy(init_split) 

305 

306 best_mod = nx.community.modularity(G, best_split_so_far) 

307 

308 max_split, max_mod = best_split_so_far, best_mod 

309 its = 0 

310 m = G.number_of_edges() 

311 G_degree = dict(G.degree) 

312 

313 while max_mod >= best_mod and its < max_iter: 

314 best_split_so_far = max_split 

315 best_mod = max_mod 

316 next_split = deepcopy(max_split) 

317 next_mod = max_mod 

318 nodes = set(G) 

319 while nodes: 

320 max_swap = -1 

321 max_node = None 

322 max_node_comm = None 

323 left, right = next_split 

324 leftd = sum(G_degree[n] for n in left) 

325 rightd = sum(G_degree[n] for n in right) 

326 for n in nodes: 

327 if n in left: 

328 in_comm, out_comm = left, right 

329 in_deg, out_deg = leftd, rightd 

330 else: 

331 in_comm, out_comm = right, left 

332 in_deg, out_deg = rightd, leftd 

333 

334 d_eii = -len(G[n].keys() & in_comm) / m 

335 d_ejj = len(G[n].keys() & out_comm) / m 

336 deg = G_degree[n] 

337 d_sum_ai = (deg / (2 * m**2)) * (in_deg - out_deg - deg) 

338 swap_change = d_eii + d_ejj + d_sum_ai 

339 

340 if swap_change > max_swap: 

341 max_swap = swap_change 

342 max_node = n 

343 max_node_comm = in_comm 

344 non_max_node_comm = out_comm 

345 # swap the node from one comm to the other 

346 max_node_comm.remove(max_node) 

347 non_max_node_comm.add(max_node) 

348 next_mod += max_swap 

349 # deepcopy next_split each time it reaches a high (might go lower later) 

350 if next_mod > max_mod: 

351 max_split, max_mod = deepcopy(next_split), next_mod 

352 nodes.remove(max_node) 

353 its += 1 

354 return best_split_so_far