1"""Fast algorithms for the densest subgraph problem"""
2
3import math
4
5import networkx as nx
6
7__all__ = ["densest_subgraph"]
8
9
10def _greedy_plus_plus(G, iterations):
11 if G.number_of_edges() == 0:
12 return 0.0, set()
13 if iterations < 1:
14 raise ValueError(
15 f"The number of iterations must be an integer >= 1. Provided: {iterations}"
16 )
17
18 loads = {node: 0 for node in G.nodes} # Load vector for Greedy++.
19 best_density = 0.0 # Highest density encountered.
20 best_subgraph = set() # Nodes of the best subgraph found.
21
22 for _ in range(iterations):
23 # Initialize heap for fast access to minimum weighted degree.
24 heap = nx.utils.BinaryHeap()
25
26 # Compute initial weighted degrees and add nodes to the heap.
27 for node, degree in G.degree:
28 heap.insert(node, loads[node] + degree)
29 # Set up tracking for current graph state.
30 remaining_nodes = set(G.nodes)
31 num_edges = G.number_of_edges()
32 current_degrees = dict(G.degree)
33
34 while remaining_nodes:
35 num_nodes = len(remaining_nodes)
36
37 # Current density of the (implicit) graph
38 current_density = num_edges / num_nodes
39
40 # Update the best density.
41 if current_density > best_density:
42 best_density = current_density
43 best_subgraph = set(remaining_nodes)
44
45 # Pop the node with the smallest weighted degree.
46 node, _ = heap.pop()
47 if node not in remaining_nodes:
48 continue # Skip nodes already removed.
49
50 # Update the load of the popped node.
51 loads[node] += current_degrees[node]
52
53 # Update neighbors' degrees and the heap.
54 for neighbor in G.neighbors(node):
55 if neighbor in remaining_nodes:
56 current_degrees[neighbor] -= 1
57 num_edges -= 1
58 heap.insert(neighbor, loads[neighbor] + current_degrees[neighbor])
59
60 # Remove the node from the remaining nodes.
61 remaining_nodes.remove(node)
62
63 return best_density, best_subgraph
64
65
66def _fractional_peeling(G, b, x, node_to_idx, edge_to_idx):
67 """
68 Optimized fractional peeling using NumPy arrays.
69
70 Parameters
71 ----------
72 G : networkx.Graph
73 The input graph.
74 b : numpy.ndarray
75 Induced load vector.
76 x : numpy.ndarray
77 Fractional edge values.
78 node_to_idx : dict
79 Mapping from node to index.
80 edge_to_idx : dict
81 Mapping from edge to index.
82
83 Returns
84 -------
85 best_density : float
86 The best density found.
87 best_subgraph : set
88 The subset of nodes defining the densest subgraph.
89 """
90 heap = nx.utils.BinaryHeap()
91
92 remaining_nodes = set(G.nodes)
93
94 # Initialize heap with b values
95 for idx in remaining_nodes:
96 heap.insert(idx, b[idx])
97
98 num_edges = G.number_of_edges()
99
100 best_density = 0.0
101 best_subgraph = set()
102
103 while remaining_nodes:
104 num_nodes = len(remaining_nodes)
105 current_density = num_edges / num_nodes
106
107 if current_density > best_density:
108 best_density = current_density
109 best_subgraph = set(remaining_nodes)
110
111 # Pop the node with the smallest b
112 node, _ = heap.pop()
113 while node not in remaining_nodes:
114 node, _ = heap.pop() # Clean the heap from stale values
115
116 # Update neighbors b values by subtracting fractional x value
117 for neighbor in G.neighbors(node):
118 if neighbor in remaining_nodes:
119 neighbor_idx = node_to_idx[neighbor]
120 # Take off fractional value
121 b[neighbor_idx] -= x[edge_to_idx[(neighbor, node)]]
122 num_edges -= 1
123 heap.insert(neighbor, b[neighbor_idx])
124
125 remaining_nodes.remove(node) # peel off node
126
127 return best_density, best_subgraph
128
129
130def _fista(G, iterations):
131 if G.number_of_edges() == 0:
132 return 0.0, set()
133 if iterations < 1:
134 raise ValueError(
135 f"The number of iterations must be an integer >= 1. Provided: {iterations}"
136 )
137 import numpy as np
138
139 # 1. Node Mapping: Assign a unique index to each node and edge
140 node_to_idx = {node: idx for idx, node in enumerate(G)}
141 num_nodes = G.number_of_nodes()
142 num_undirected_edges = G.number_of_edges()
143
144 # 2. Edge Mapping: Assign a unique index to each bidirectional edge
145 bidirectional_edges = [(u, v) for u, v in G.edges] + [(v, u) for u, v in G.edges]
146 edge_to_idx = {edge: idx for idx, edge in enumerate(bidirectional_edges)}
147
148 num_edges = len(bidirectional_edges)
149
150 # 3. Reverse Edge Mapping: Map each (bidirectional) edge to its reverse edge index
151 reverse_edge_idx = np.empty(num_edges, dtype=np.int32)
152 for idx in range(num_undirected_edges):
153 reverse_edge_idx[idx] = num_undirected_edges + idx
154 for idx in range(num_undirected_edges, 2 * num_undirected_edges):
155 reverse_edge_idx[idx] = idx - num_undirected_edges
156
157 # 4. Initialize Variables as NumPy Arrays
158 x = np.full(num_edges, 0.5, dtype=np.float32)
159 y = x.copy()
160 z = np.zeros(num_edges, dtype=np.float32)
161 b = np.zeros(num_nodes, dtype=np.float32) # Induced load vector
162 tk = 1.0 # Momentum term
163
164 # 5. Precompute Edge Source Indices
165 edge_src_indices = np.array(
166 [node_to_idx[u] for u, _ in bidirectional_edges], dtype=np.int32
167 )
168
169 # 6. Compute Learning Rate
170 max_degree = max(deg for _, deg in G.degree)
171 # 0.9 for floating point errs when max_degree is very large
172 learning_rate = 0.9 / max_degree
173
174 # 7. Iterative Updates
175 for _ in range(iterations):
176 # 7a. Update b: sum y over outgoing edges for each node
177 b[:] = 0.0 # Reset b to zero
178 np.add.at(b, edge_src_indices, y) # b_u = \sum_{v : (u,v) \in E(G)} y_{uv}
179
180 # 7b. Compute z, z_{uv} = y_{uv} - 2 * learning_rate * b_u
181 z = y - 2.0 * learning_rate * b[edge_src_indices]
182
183 # 7c. Update Momentum Term
184 tknew = (1.0 + math.sqrt(1 + 4.0 * tk**2)) / 2.0
185
186 # 7d. Update x in a vectorized manner, x_{uv} = (z_{uv} - z_{vu} + 1.0) / 2.0
187 new_xuv = (z - z[reverse_edge_idx] + 1.0) / 2.0
188 clamped_x = np.clip(new_xuv, 0.0, 1.0) # Clamp x_{uv} between 0 and 1
189
190 # Update y using the FISTA update formula (similar to gradient descent)
191 y = (
192 clamped_x
193 + ((tk - 1.0) / tknew) * (clamped_x - x)
194 + (tk / tknew) * (clamped_x - y)
195 )
196
197 # Update x
198 x = clamped_x
199
200 # Update tk, the momemntum term
201 tk = tknew
202
203 # Rebalance the b values! Otherwise performance is a bit suboptimal.
204 b[:] = 0.0
205 np.add.at(b, edge_src_indices, x) # b_u = \sum_{v : (u,v) \in E(G)} x_{uv}
206
207 # Extract the actual (approximate) dense subgraph.
208 return _fractional_peeling(G, b, x, node_to_idx, edge_to_idx)
209
210
211ALGORITHMS = {"greedy++": _greedy_plus_plus, "fista": _fista}
212
213
214@nx.utils.not_implemented_for("directed")
215@nx.utils.not_implemented_for("multigraph")
216@nx._dispatchable
217def densest_subgraph(G, iterations=1, *, method="fista"):
218 r"""Returns an approximate densest subgraph for a graph `G`.
219
220 This function runs an iterative algorithm to find the densest subgraph,
221 and returns both the density and the subgraph. For a discussion on the
222 notion of density used and the different algorithms available on
223 networkx, please see the Notes section below.
224
225 Parameters
226 ----------
227 G : NetworkX graph
228 Undirected graph.
229
230 iterations : int, optional (default=1)
231 Number of iterations to use for the iterative algorithm. Can be
232 specified positionally or as a keyword argument.
233
234 method : string, optional (default='fista')
235 The algorithm to use to approximate the densest subgraph. Supported
236 options: 'greedy++' by Boob et al. [2]_ and 'fista' by Harb et al. [3]_.
237 Must be specified as a keyword argument. Other inputs produce a
238 ValueError.
239
240 Returns
241 -------
242 d : float
243 The density of the approximate subgraph found.
244
245 S : set
246 The subset of nodes defining the approximate densest subgraph.
247
248 Examples
249 --------
250 >>> G = nx.star_graph(4)
251 >>> nx.approximation.densest_subgraph(G, iterations=1)
252 (0.8, {0, 1, 2, 3, 4})
253
254 Notes
255 -----
256 **Problem Definition:**
257 The densest subgraph problem (DSG) asks to find the subgraph
258 $S \subseteq V(G)$ with maximum density. For a subset of the nodes of
259 $G$, $S \subseteq V(G)$, define $E(S) = \{ (u,v) : (u,v)\in E(G),
260 u\in S, v\in S \}$ as the set of edges with both endpoints in $S$.
261 The density of $S$ is defined as $|E(S)|/|S|$, the ratio between the
262 edges in the subgraph $G[S]$ and the number of nodes in that subgraph.
263 Note that this is different from the standard graph theoretic definition
264 of density, defined as $\frac{2|E(S)|}{|S|(|S|-1)}$, for historical
265 reasons.
266
267 **Exact Algorithms:**
268 The densest subgraph problem is polynomial time solvable using maximum
269 flow, commonly referred to as Goldberg's algorithm. However, the
270 algorithm is quite involved. It first binary searches on the optimal
271 density, $d^\ast$. For a guess of the density $d$, it sets up a flow
272 network $G'$ with size $O(m)$. The maximum flow solution either
273 informs the algorithm that no subgraph with density $d$ exists, or it
274 provides a subgraph with density at least $d$. However, this is
275 inherently bottlenecked by the maximum flow algorithm. For example, [2]_
276 notes that Goldberg’s algorithm was not feasible on many large graphs
277 even though they used a highly optimized maximum flow library.
278
279 **Charikar's Greedy Peeling:**
280 While exact solution algorithms are quite involved, there are several
281 known approximation algorithms for the densest subgraph problem.
282
283 Charikar [1]_ described a very simple 1/2-approximation algorithm for DSG
284 known as the greedy "peeling" algorithm. The algorithm creates an
285 ordering of the nodes as follows. The first node $v_1$ is the one with
286 the smallest degree in $G$ (ties broken arbitrarily). It selects
287 $v_2$ to be the smallest degree node in $G \setminus v_1$. Letting
288 $G_i$ be the graph after removing $v_1, ..., v_i$ (with $G_0=G$),
289 the algorithm returns the graph among $G_0, ..., G_n$ with the highest
290 density.
291
292 **Greedy++:**
293 Boob et al. [2]_ generalized this algorithm into Greedy++, an iterative
294 algorithm that runs several rounds of "peeling". In fact, Greedy++ with 1
295 iteration is precisely Charikar's algorithm. The algorithm converges to a
296 $(1-\epsilon)$ approximate densest subgraph in $O(\Delta(G)\log
297 n/\epsilon^2)$ iterations, where $\Delta(G)$ is the maximum degree,
298 and $n$ is the number of nodes in $G$. The algorithm also has other
299 desirable properties as shown by [4]_ and [5]_.
300
301 **FISTA Algorithm:**
302 Harb et al. [3]_ gave a faster and more scalable algorithm using ideas
303 from quadratic programming for the densest subgraph, which is based on a
304 fast iterative shrinkage-thresholding algorithm (FISTA) algorithm. It is
305 known that computing the densest subgraph can be formulated as the
306 following convex optimization problem:
307
308 Minimize $\sum_{u \in V(G)} b_u^2$
309
310 Subject to:
311
312 $b_u = \sum_{v: \{u,v\} \in E(G)} x_{uv}$ for all $u \in V(G)$
313
314 $x_{uv} + x_{vu} = 1.0$ for all $\{u,v\} \in E(G)$
315
316 $x_{uv} \geq 0, x_{vu} \geq 0$ for all $\{u,v\} \in E(G)$
317
318 Here, $x_{uv}$ represents the fraction of edge $\{u,v\}$ assigned to
319 $u$, and $x_{vu}$ to $v$.
320
321 The FISTA algorithm efficiently solves this convex program using gradient
322 descent with projections. For a learning rate $\alpha$, the algorithm
323 does:
324
325 1. **Initialization**: Set $x^{(0)}_{uv} = x^{(0)}_{vu} = 0.5$ for all
326 edges as a feasible solution.
327
328 2. **Gradient Update**: For iteration $k\geq 1$, set
329 $x^{(k+1)}_{uv} = x^{(k)}_{uv} - 2 \alpha \sum_{v: \{u,v\} \in E(G)}
330 x^{(k)}_{uv}$. However, now $x^{(k+1)}_{uv}$ might be infeasible!
331 To ensure feasibility, we project $x^{(k+1)}_{uv}$.
332
333 3. **Projection to the Feasible Set**: Compute
334 $b^{(k+1)}_u = \sum_{v: \{u,v\} \in E(G)} x^{(k)}_{uv}$ for all
335 nodes $u$. Define $z^{(k+1)}_{uv} = x^{(k+1)}_{uv} - 2 \alpha
336 b^{(k+1)}_u$. Update $x^{(k+1)}_{uv} =
337 CLAMP((z^{(k+1)}_{uv} - z^{(k+1)}_{vu} + 1.0) / 2.0)$, where
338 $CLAMP(x) = \max(0, \min(1, x))$.
339
340 With a learning rate of $\alpha=1/\Delta(G)$, where $\Delta(G)$ is
341 the maximum degree, the algorithm converges to the optimum solution of
342 the convex program.
343
344 **Fractional Peeling:**
345 To obtain a **discrete** subgraph, we use fractional peeling, an
346 adaptation of the standard peeling algorithm which peels the minimum
347 degree vertex in each iteration, and returns the densest subgraph found
348 along the way. Here, we instead peel the vertex with the smallest
349 induced load $b_u$:
350
351 1. Compute $b_u$ and $x_{uv}$.
352
353 2. Iteratively remove the vertex with the smallest $b_u$, updating its
354 neighbors' load by $x_{vu}$.
355
356 Fractional peeling transforms the approximately optimal fractional
357 values $b_u, x_{uv}$ into a discrete subgraph. Unlike traditional
358 peeling, which removes the lowest-degree node, this method accounts for
359 fractional edge contributions from the convex program.
360
361 This approach is both scalable and theoretically sound, ensuring a quick
362 approximation of the densest subgraph while leveraging fractional load
363 balancing.
364
365 References
366 ----------
367 .. [1] Charikar, Moses. "Greedy approximation algorithms for finding dense
368 components in a graph." In International workshop on approximation
369 algorithms for combinatorial optimization, pp. 84-95. Berlin, Heidelberg:
370 Springer Berlin Heidelberg, 2000.
371
372 .. [2] Boob, Digvijay, Yu Gao, Richard Peng, Saurabh Sawlani, Charalampos
373 Tsourakakis, Di Wang, and Junxing Wang. "Flowless: Extracting densest
374 subgraphs without flow computations." In Proceedings of The Web Conference
375 2020, pp. 573-583. 2020.
376
377 .. [3] Harb, Elfarouk, Kent Quanrud, and Chandra Chekuri. "Faster and scalable
378 algorithms for densest subgraph and decomposition." Advances in Neural
379 Information Processing Systems 35 (2022): 26966-26979.
380
381 .. [4] Harb, Elfarouk, Kent Quanrud, and Chandra Chekuri. "Convergence to
382 lexicographically optimal base in a (contra) polymatroid and applications
383 to densest subgraph and tree packing." arXiv preprint arXiv:2305.02987
384 (2023).
385
386 .. [5] Chekuri, Chandra, Kent Quanrud, and Manuel R. Torres. "Densest
387 subgraph: Supermodularity, iterative peeling, and flow." In Proceedings of
388 the 2022 Annual ACM-SIAM Symposium on Discrete Algorithms (SODA), pp.
389 1531-1555. Society for Industrial and Applied Mathematics, 2022.
390 """
391 try:
392 algo = ALGORITHMS[method]
393 except KeyError as e:
394 raise ValueError(f"{method} is not a valid choice for an algorithm.") from e
395
396 return algo(G, iterations)