1"""Functions for splitting a network into two communities (finding a bipartition)."""
2
3import random
4from copy import deepcopy
5from itertools import count
6
7import networkx as nx
8
9__all__ = [
10 "kernighan_lin_bisection",
11 "spectral_modularity_bipartition",
12 "greedy_node_swap_bipartition",
13]
14
15
16def _kernighan_lin_sweep(edge_info, side):
17 """
18 This is a modified form of Kernighan-Lin, which moves single nodes at a
19 time, alternating between sides to keep the bisection balanced. We keep
20 two min-heaps of swap costs to make optimal-next-move selection fast.
21 """
22 heap0, heap1 = cost_heaps = nx.utils.BinaryHeap(), nx.utils.BinaryHeap()
23 # we use heap methods insert, pop, and get
24 for u, nbrs in edge_info.items():
25 cost_u = sum(wt if side[v] else -wt for v, wt in nbrs.items())
26 if side[u]:
27 heap1.insert(u, cost_u)
28 else:
29 heap0.insert(u, -cost_u)
30
31 def _update_heap_values(node):
32 side_node = side[node]
33 for nbr, wt in edge_info[node].items():
34 side_nbr = side[nbr]
35 if side_nbr == side_node:
36 wt = -wt
37 heap_nbr = cost_heaps[side_nbr]
38 if nbr in heap_nbr:
39 cost_nbr = heap_nbr.get(nbr) + 2 * wt
40 # allow_increase lets us update a value already on the heap
41 heap_nbr.insert(nbr, cost_nbr, allow_increase=True)
42
43 i = 0
44 totcost = 0
45 while heap0 and heap1:
46 u, cost_u = heap0.pop()
47 _update_heap_values(u)
48 v, cost_v = heap1.pop()
49 _update_heap_values(v)
50 totcost += cost_u + cost_v
51 i += 1
52 yield totcost, i, (u, v)
53
54
55@nx.utils.not_implemented_for("directed")
56@nx.utils.py_random_state(4)
57@nx._dispatchable(edge_attrs="weight")
58def kernighan_lin_bisection(G, partition=None, max_iter=10, weight="weight", seed=None):
59 """Partition a graph into two blocks using the Kernighan–Lin algorithm.
60
61 This algorithm partitions a network into two sets by iteratively
62 swapping pairs of nodes to reduce the edge cut between the two sets. The
63 pairs are chosen according to a modified form of Kernighan-Lin [1]_, which
64 moves nodes individually, alternating between sides to keep the bisection
65 balanced.
66
67 Kernighan-Lin is an approximate algorithm for maximal modularity bisection.
68 In [2]_ they suggest that fine-tuned improvements can be made using
69 greedy node swapping, (see `greedy_node_swap_bipartition`).
70 The improvements are typically only a few percent of the modularity value.
71 But they claim that can make a difference between a good and excellent method.
72 This function does not perform any improvements. But you can do that yourself.
73
74 Parameters
75 ----------
76 G : NetworkX graph
77 Graph must be undirected.
78
79 partition : tuple
80 Pair of iterables containing an initial partition. If not
81 specified, a random balanced partition is used.
82
83 max_iter : int
84 Maximum number of times to attempt swaps to find an
85 improvement before giving up.
86
87 weight : string or function (default: "weight")
88 If this is a string, then edge weights will be accessed via the
89 edge attribute with this key (that is, the weight of the edge
90 joining `u` to `v` will be ``G.edges[u, v][weight]``). If no
91 such edge attribute exists, the weight of the edge is assumed to
92 be one.
93
94 If this is a function, the weight of an edge is the value
95 returned by the function. The function must accept exactly three
96 positional arguments: the two endpoints of an edge and the
97 dictionary of edge attributes for that edge. The function must
98 return a number or None to indicate a hidden edge.
99
100 seed : integer, random_state, or None (default)
101 Indicator of random number generation state.
102 See :ref:`Randomness<randomness>`.
103 Only used if partition is None
104
105 Returns
106 -------
107 partition : tuple
108 A pair of sets of nodes representing the bipartition.
109
110 Raises
111 ------
112 NetworkXError
113 If `partition` is not a valid partition of the nodes of the graph.
114
115 References
116 ----------
117 .. [1] Kernighan, B. W.; Lin, Shen (1970).
118 "An efficient heuristic procedure for partitioning graphs."
119 *Bell Systems Technical Journal* 49: 291--307.
120 Oxford University Press 2011.
121 .. [2] M. E. J. Newman,
122 "Modularity and community structure in networks",
123 PNAS, 103 (23), p. 8577-8582,
124 https://doi.org/10.1073/pnas.0601602103
125
126 """
127 nodes = list(G)
128
129 if partition is None:
130 seed.shuffle(nodes)
131 mid = len(nodes) // 2
132 A, B = nodes[:mid], nodes[mid:]
133 else:
134 try:
135 A, B = partition
136 except (TypeError, ValueError) as err:
137 raise nx.NetworkXError("partition must be two sets") from err
138 if not nx.community.is_partition(G, [A, B]):
139 raise nx.NetworkXError("partition invalid")
140
141 side = {node: (node in A) for node in nodes}
142
143 # ruff: noqa: E731 skips check for no lambda functions
144 # Using shortest_paths _weight_function with sum instead of min on multiedges
145 if callable(weight):
146 sum_weight = weight
147 elif G.is_multigraph():
148 sum_weight = lambda u, v, d: sum(dd.get(weight, 1) for dd in d.values())
149 else:
150 sum_weight = lambda u, v, d: d.get(weight, 1)
151
152 edge_info = {
153 u: {v: wt for v, d in nbrs.items() if (wt := sum_weight(u, v, d)) is not None}
154 for u, nbrs in G._adj.items()
155 }
156
157 for i in range(max_iter):
158 costs = list(_kernighan_lin_sweep(edge_info, side))
159 # find out how many edges to update: min_i
160 min_cost, min_i, _ = min(costs)
161 if min_cost >= 0:
162 break
163
164 for _, _, (u, v) in costs[:min_i]:
165 side[u] = 1
166 side[v] = 0
167
168 part1 = {u for u, s in side.items() if s == 0}
169 part2 = {u for u, s in side.items() if s == 1}
170 return part1, part2
171
172
173@nx.utils.not_implemented_for("directed")
174@nx.utils.not_implemented_for("multigraph")
175def spectral_modularity_bipartition(G):
176 r"""Return a bipartition of the nodes based on the spectrum of the
177 modularity matrix of the graph.
178
179 This method calculates the eigenvector associated with the second
180 largest eigenvalue of the modularity matrix, where the modularity
181 matrix *B* is defined by
182
183 ..math::
184
185 B_{i j} = A_{i j} - \frac{k_i k_j}{2 m},
186
187 where *A* is the adjacency matrix, `k_i` is the degree of node *i*,
188 and *m* is the number of edges in the graph. Nodes whose
189 corresponding values in the eigenvector are negative are placed in
190 one block, nodes whose values are nonnegative are placed in another
191 block.
192
193 Parameters
194 ----------
195 G : NetworkX graph
196
197 Returns
198 -------
199 C : tuple
200 Pair of communities as two sets of nodes of ``G``, partitioned
201 according to second largest eigenvalue of the modularity matrix.
202
203 Examples
204 --------
205 >>> G = nx.karate_club_graph()
206 >>> MrHi, Officer = nx.community.spectral_modularity_bipartition(G)
207 >>> MrHi, Officer = sorted([sorted(MrHi), sorted(Officer)])
208 >>> MrHi
209 [0, 1, 2, 3, 4, 5, 6, 7, 10, 11, 12, 13, 16, 17, 19, 21]
210 >>> Officer
211 [8, 9, 14, 15, 18, 20, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33]
212
213 References
214 ----------
215 .. [1] M. E. J. Newman *Networks: An Introduction*, pages 373--378
216 Oxford University Press 2011.
217 .. [2] M. E. J. Newman,
218 "Modularity and community structure in networks",
219 PNAS, 103 (23), p. 8577-8582,
220 https://doi.org/10.1073/pnas.0601602103
221
222 """
223 import numpy as np
224
225 B = nx.linalg.modularity_matrix(G)
226 eigenvalues, eigenvectors = np.linalg.eig(B)
227 index = np.argsort(eigenvalues)[-1]
228 v2 = zip(np.real(eigenvectors[:, index]), G)
229 left, right = set(), set()
230 for u, n in v2:
231 if u < 0:
232 left.add(n)
233 else:
234 right.add(n)
235 return left, right
236
237
238@nx.utils.not_implemented_for("multigraph")
239def greedy_node_swap_bipartition(G, *, init_split=None, max_iter=10):
240 """Split the nodes into two communities based on greedy
241 modularity maximization.
242
243 The algorithm works by selecting a node to change communities which
244 will maximize the modularity. The swap is made and the community
245 structure with the highest modularity is kept.
246
247 Parameters
248 ----------
249 G : NetworkX graph
250
251 init_split : 2-tuple of sets of nodes
252 Pair of sets of nodes in ``G`` providing an initial bipartition
253 for the algorithm. If not specified, a random balanced partition
254 is used. If this pair of sets is not a partition of the nodes of `G`,
255 :exc:`NetworkXException` is raised.
256
257 max_iter : int
258 Maximum number of iterations of attempting swaps to find an improvement.
259
260 Returns
261 -------
262 max_split : 2-tuple of sets of nodes
263 Pair of sets of nodes of ``G``, partitioned according to a
264 node swap greedy modularity maximization algorithm.
265
266 Raises
267 ------
268 NetworkXError
269 if init_split is not a valid partition of the
270 graph into two communities or if G is a MultiGraph
271
272 Examples
273 --------
274 >>> G = nx.barbell_graph(3, 0)
275 >>> left, right = nx.community.greedy_node_swap_bipartition(G)
276 >>> # Sort the communities so the nodes appear in increasing order.
277 >>> left, right = sorted([sorted(left), sorted(right)])
278 >>> sorted(left)
279 [0, 1, 2]
280 >>> sorted(right)
281 [3, 4, 5]
282
283 Notes
284 -----
285 This function is not implemented for multigraphs.
286
287 References
288 ----------
289 .. [1] M. E. J. Newman "Networks: An Introduction", pages 373--375.
290 Oxford University Press 2011.
291
292 """
293 if init_split is None:
294 m1 = len(G) // 2
295 m2 = len(G) - m1
296 some_nodes = set(random.sample(list(G), m1))
297 other_nodes = {n for n in G if n not in some_nodes}
298 best_split_so_far = (some_nodes, other_nodes)
299 else:
300 if not nx.community.is_partition(G, init_split):
301 raise nx.NetworkXError("init_split is not a partition of G")
302 if not len(init_split) == 2:
303 raise nx.NetworkXError("init_split must be a bipartition")
304 best_split_so_far = deepcopy(init_split)
305
306 best_mod = nx.community.modularity(G, best_split_so_far)
307
308 max_split, max_mod = best_split_so_far, best_mod
309 its = 0
310 m = G.number_of_edges()
311 G_degree = dict(G.degree)
312
313 while max_mod >= best_mod and its < max_iter:
314 best_split_so_far = max_split
315 best_mod = max_mod
316 next_split = deepcopy(max_split)
317 next_mod = max_mod
318 nodes = set(G)
319 while nodes:
320 max_swap = -1
321 max_node = None
322 max_node_comm = None
323 left, right = next_split
324 leftd = sum(G_degree[n] for n in left)
325 rightd = sum(G_degree[n] for n in right)
326 for n in nodes:
327 if n in left:
328 in_comm, out_comm = left, right
329 in_deg, out_deg = leftd, rightd
330 else:
331 in_comm, out_comm = right, left
332 in_deg, out_deg = rightd, leftd
333
334 d_eii = -len(G[n].keys() & in_comm) / m
335 d_ejj = len(G[n].keys() & out_comm) / m
336 deg = G_degree[n]
337 d_sum_ai = (deg / (2 * m**2)) * (in_deg - out_deg - deg)
338 swap_change = d_eii + d_ejj + d_sum_ai
339
340 if swap_change > max_swap:
341 max_swap = swap_change
342 max_node = n
343 max_node_comm = in_comm
344 non_max_node_comm = out_comm
345 # swap the node from one comm to the other
346 max_node_comm.remove(max_node)
347 non_max_node_comm.add(max_node)
348 next_mod += max_swap
349 # deepcopy next_split each time it reaches a high (might go lower later)
350 if next_mod > max_mod:
351 max_split, max_mod = deepcopy(next_split), next_mod
352 nodes.remove(max_node)
353 its += 1
354 return best_split_so_far