Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/networkx/algorithms/approximation/density.py: 11%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

109 statements  

1"""Fast algorithms for the densest subgraph problem""" 

2 

3import math 

4 

5import networkx as nx 

6 

7__all__ = ["densest_subgraph"] 

8 

9 

10def _greedy_plus_plus(G, iterations): 

11 if G.number_of_edges() == 0: 

12 return 0.0, set() 

13 if iterations < 1: 

14 raise ValueError( 

15 f"The number of iterations must be an integer >= 1. Provided: {iterations}" 

16 ) 

17 

18 loads = {node: 0 for node in G.nodes} # Load vector for Greedy++. 

19 best_density = 0.0 # Highest density encountered. 

20 best_subgraph = set() # Nodes of the best subgraph found. 

21 

22 for _ in range(iterations): 

23 # Initialize heap for fast access to minimum weighted degree. 

24 heap = nx.utils.BinaryHeap() 

25 

26 # Compute initial weighted degrees and add nodes to the heap. 

27 for node, degree in G.degree: 

28 heap.insert(node, loads[node] + degree) 

29 # Set up tracking for current graph state. 

30 remaining_nodes = set(G.nodes) 

31 num_edges = G.number_of_edges() 

32 current_degrees = dict(G.degree) 

33 

34 while remaining_nodes: 

35 num_nodes = len(remaining_nodes) 

36 

37 # Current density of the (implicit) graph 

38 current_density = num_edges / num_nodes 

39 

40 # Update the best density. 

41 if current_density > best_density: 

42 best_density = current_density 

43 best_subgraph = set(remaining_nodes) 

44 

45 # Pop the node with the smallest weighted degree. 

46 node, _ = heap.pop() 

47 if node not in remaining_nodes: 

48 continue # Skip nodes already removed. 

49 

50 # Update the load of the popped node. 

51 loads[node] += current_degrees[node] 

52 

53 # Update neighbors' degrees and the heap. 

54 for neighbor in G.neighbors(node): 

55 if neighbor in remaining_nodes: 

56 current_degrees[neighbor] -= 1 

57 num_edges -= 1 

58 heap.insert(neighbor, loads[neighbor] + current_degrees[neighbor]) 

59 

60 # Remove the node from the remaining nodes. 

61 remaining_nodes.remove(node) 

62 

63 return best_density, best_subgraph 

64 

65 

66def _fractional_peeling(G, b, x, node_to_idx, edge_to_idx): 

67 """ 

68 Optimized fractional peeling using NumPy arrays. 

69 

70 Parameters 

71 ---------- 

72 G : networkx.Graph 

73 The input graph. 

74 b : numpy.ndarray 

75 Induced load vector. 

76 x : numpy.ndarray 

77 Fractional edge values. 

78 node_to_idx : dict 

79 Mapping from node to index. 

80 edge_to_idx : dict 

81 Mapping from edge to index. 

82 

83 Returns 

84 ------- 

85 best_density : float 

86 The best density found. 

87 best_subgraph : set 

88 The subset of nodes defining the densest subgraph. 

89 """ 

90 heap = nx.utils.BinaryHeap() 

91 

92 remaining_nodes = set(G.nodes) 

93 

94 # Initialize heap with b values 

95 for idx in remaining_nodes: 

96 heap.insert(idx, b[idx]) 

97 

98 num_edges = G.number_of_edges() 

99 

100 best_density = 0.0 

101 best_subgraph = set() 

102 

103 while remaining_nodes: 

104 num_nodes = len(remaining_nodes) 

105 current_density = num_edges / num_nodes 

106 

107 if current_density > best_density: 

108 best_density = current_density 

109 best_subgraph = set(remaining_nodes) 

110 

111 # Pop the node with the smallest b 

112 node, _ = heap.pop() 

113 while node not in remaining_nodes: 

114 node, _ = heap.pop() # Clean the heap from stale values 

115 

116 # Update neighbors b values by subtracting fractional x value 

117 for neighbor in G.neighbors(node): 

118 if neighbor in remaining_nodes: 

119 neighbor_idx = node_to_idx[neighbor] 

120 # Take off fractional value 

121 b[neighbor_idx] -= x[edge_to_idx[(neighbor, node)]] 

122 num_edges -= 1 

123 heap.insert(neighbor, b[neighbor_idx]) 

124 

125 remaining_nodes.remove(node) # peel off node 

126 

127 return best_density, best_subgraph 

128 

129 

130def _fista(G, iterations): 

131 if G.number_of_edges() == 0: 

132 return 0.0, set() 

133 if iterations < 1: 

134 raise ValueError( 

135 f"The number of iterations must be an integer >= 1. Provided: {iterations}" 

136 ) 

137 import numpy as np 

138 

139 # 1. Node Mapping: Assign a unique index to each node and edge 

140 node_to_idx = {node: idx for idx, node in enumerate(G)} 

141 num_nodes = G.number_of_nodes() 

142 num_undirected_edges = G.number_of_edges() 

143 

144 # 2. Edge Mapping: Assign a unique index to each bidirectional edge 

145 bidirectional_edges = [(u, v) for u, v in G.edges] + [(v, u) for u, v in G.edges] 

146 edge_to_idx = {edge: idx for idx, edge in enumerate(bidirectional_edges)} 

147 

148 num_edges = len(bidirectional_edges) 

149 

150 # 3. Reverse Edge Mapping: Map each (bidirectional) edge to its reverse edge index 

151 reverse_edge_idx = np.empty(num_edges, dtype=np.int32) 

152 for idx in range(num_undirected_edges): 

153 reverse_edge_idx[idx] = num_undirected_edges + idx 

154 for idx in range(num_undirected_edges, 2 * num_undirected_edges): 

155 reverse_edge_idx[idx] = idx - num_undirected_edges 

156 

157 # 4. Initialize Variables as NumPy Arrays 

158 x = np.full(num_edges, 0.5, dtype=np.float32) 

159 y = x.copy() 

160 z = np.zeros(num_edges, dtype=np.float32) 

161 b = np.zeros(num_nodes, dtype=np.float32) # Induced load vector 

162 tk = 1.0 # Momentum term 

163 

164 # 5. Precompute Edge Source Indices 

165 edge_src_indices = np.array( 

166 [node_to_idx[u] for u, _ in bidirectional_edges], dtype=np.int32 

167 ) 

168 

169 # 6. Compute Learning Rate 

170 max_degree = max(deg for _, deg in G.degree) 

171 # 0.9 for floating point errs when max_degree is very large 

172 learning_rate = 0.9 / max_degree 

173 

174 # 7. Iterative Updates 

175 for _ in range(iterations): 

176 # 7a. Update b: sum y over outgoing edges for each node 

177 b[:] = 0.0 # Reset b to zero 

178 np.add.at(b, edge_src_indices, y) # b_u = \sum_{v : (u,v) \in E(G)} y_{uv} 

179 

180 # 7b. Compute z, z_{uv} = y_{uv} - 2 * learning_rate * b_u 

181 z = y - 2.0 * learning_rate * b[edge_src_indices] 

182 

183 # 7c. Update Momentum Term 

184 tknew = (1.0 + math.sqrt(1 + 4.0 * tk**2)) / 2.0 

185 

186 # 7d. Update x in a vectorized manner, x_{uv} = (z_{uv} - z_{vu} + 1.0) / 2.0 

187 new_xuv = (z - z[reverse_edge_idx] + 1.0) / 2.0 

188 clamped_x = np.clip(new_xuv, 0.0, 1.0) # Clamp x_{uv} between 0 and 1 

189 

190 # Update y using the FISTA update formula (similar to gradient descent) 

191 y = ( 

192 clamped_x 

193 + ((tk - 1.0) / tknew) * (clamped_x - x) 

194 + (tk / tknew) * (clamped_x - y) 

195 ) 

196 

197 # Update x 

198 x = clamped_x 

199 

200 # Update tk, the momemntum term 

201 tk = tknew 

202 

203 # Rebalance the b values! Otherwise performance is a bit suboptimal. 

204 b[:] = 0.0 

205 np.add.at(b, edge_src_indices, x) # b_u = \sum_{v : (u,v) \in E(G)} x_{uv} 

206 

207 # Extract the actual (approximate) dense subgraph. 

208 return _fractional_peeling(G, b, x, node_to_idx, edge_to_idx) 

209 

210 

211ALGORITHMS = {"greedy++": _greedy_plus_plus, "fista": _fista} 

212 

213 

214@nx.utils.not_implemented_for("directed") 

215@nx.utils.not_implemented_for("multigraph") 

216@nx._dispatchable 

217def densest_subgraph(G, iterations=1, *, method="fista"): 

218 r"""Returns an approximate densest subgraph for a graph `G`. 

219 

220 This function runs an iterative algorithm to find the densest subgraph, 

221 and returns both the density and the subgraph. For a discussion on the 

222 notion of density used and the different algorithms available on 

223 networkx, please see the Notes section below. 

224 

225 Parameters 

226 ---------- 

227 G : NetworkX graph 

228 Undirected graph. 

229 

230 iterations : int, optional (default=1) 

231 Number of iterations to use for the iterative algorithm. Can be 

232 specified positionally or as a keyword argument. 

233 

234 method : string, optional (default='fista') 

235 The algorithm to use to approximate the densest subgraph. Supported 

236 options: 'greedy++' by Boob et al. [2]_ and 'fista' by Harb et al. [3]_. 

237 Must be specified as a keyword argument. Other inputs produce a 

238 ValueError. 

239 

240 Returns 

241 ------- 

242 d : float 

243 The density of the approximate subgraph found. 

244 

245 S : set 

246 The subset of nodes defining the approximate densest subgraph. 

247 

248 Examples 

249 -------- 

250 >>> G = nx.star_graph(4) 

251 >>> nx.approximation.densest_subgraph(G, iterations=1) 

252 (0.8, {0, 1, 2, 3, 4}) 

253 

254 Notes 

255 ----- 

256 **Problem Definition:** 

257 The densest subgraph problem (DSG) asks to find the subgraph 

258 $S \subseteq V(G)$ with maximum density. For a subset of the nodes of 

259 $G$, $S \subseteq V(G)$, define $E(S) = \{ (u,v) : (u,v)\in E(G), 

260 u\in S, v\in S \}$ as the set of edges with both endpoints in $S$. 

261 The density of $S$ is defined as $|E(S)|/|S|$, the ratio between the 

262 edges in the subgraph $G[S]$ and the number of nodes in that subgraph. 

263 Note that this is different from the standard graph theoretic definition 

264 of density, defined as $\frac{2|E(S)|}{|S|(|S|-1)}$, for historical 

265 reasons. 

266 

267 **Exact Algorithms:** 

268 The densest subgraph problem is polynomial time solvable using maximum 

269 flow, commonly referred to as Goldberg's algorithm. However, the 

270 algorithm is quite involved. It first binary searches on the optimal 

271 density, $d^\ast$. For a guess of the density $d$, it sets up a flow 

272 network $G'$ with size $O(m)$. The maximum flow solution either 

273 informs the algorithm that no subgraph with density $d$ exists, or it 

274 provides a subgraph with density at least $d$. However, this is 

275 inherently bottlenecked by the maximum flow algorithm. For example, [2]_ 

276 notes that Goldberg’s algorithm was not feasible on many large graphs 

277 even though they used a highly optimized maximum flow library. 

278 

279 **Charikar's Greedy Peeling:** 

280 While exact solution algorithms are quite involved, there are several 

281 known approximation algorithms for the densest subgraph problem. 

282 

283 Charikar [1]_ described a very simple 1/2-approximation algorithm for DSG 

284 known as the greedy "peeling" algorithm. The algorithm creates an 

285 ordering of the nodes as follows. The first node $v_1$ is the one with 

286 the smallest degree in $G$ (ties broken arbitrarily). It selects 

287 $v_2$ to be the smallest degree node in $G \setminus v_1$. Letting 

288 $G_i$ be the graph after removing $v_1, ..., v_i$ (with $G_0=G$), 

289 the algorithm returns the graph among $G_0, ..., G_n$ with the highest 

290 density. 

291 

292 **Greedy++:** 

293 Boob et al. [2]_ generalized this algorithm into Greedy++, an iterative 

294 algorithm that runs several rounds of "peeling". In fact, Greedy++ with 1 

295 iteration is precisely Charikar's algorithm. The algorithm converges to a 

296 $(1-\epsilon)$ approximate densest subgraph in $O(\Delta(G)\log 

297 n/\epsilon^2)$ iterations, where $\Delta(G)$ is the maximum degree, 

298 and $n$ is the number of nodes in $G$. The algorithm also has other 

299 desirable properties as shown by [4]_ and [5]_. 

300 

301 **FISTA Algorithm:** 

302 Harb et al. [3]_ gave a faster and more scalable algorithm using ideas 

303 from quadratic programming for the densest subgraph, which is based on a 

304 fast iterative shrinkage-thresholding algorithm (FISTA) algorithm. It is 

305 known that computing the densest subgraph can be formulated as the 

306 following convex optimization problem: 

307 

308 Minimize $\sum_{u \in V(G)} b_u^2$ 

309 

310 Subject to: 

311 

312 $b_u = \sum_{v: \{u,v\} \in E(G)} x_{uv}$ for all $u \in V(G)$ 

313 

314 $x_{uv} + x_{vu} = 1.0$ for all $\{u,v\} \in E(G)$ 

315 

316 $x_{uv} \geq 0, x_{vu} \geq 0$ for all $\{u,v\} \in E(G)$ 

317 

318 Here, $x_{uv}$ represents the fraction of edge $\{u,v\}$ assigned to 

319 $u$, and $x_{vu}$ to $v$. 

320 

321 The FISTA algorithm efficiently solves this convex program using gradient 

322 descent with projections. For a learning rate $\alpha$, the algorithm 

323 does: 

324 

325 1. **Initialization**: Set $x^{(0)}_{uv} = x^{(0)}_{vu} = 0.5$ for all 

326 edges as a feasible solution. 

327 

328 2. **Gradient Update**: For iteration $k\geq 1$, set 

329 $x^{(k+1)}_{uv} = x^{(k)}_{uv} - 2 \alpha \sum_{v: \{u,v\} \in E(G)} 

330 x^{(k)}_{uv}$. However, now $x^{(k+1)}_{uv}$ might be infeasible! 

331 To ensure feasibility, we project $x^{(k+1)}_{uv}$. 

332 

333 3. **Projection to the Feasible Set**: Compute 

334 $b^{(k+1)}_u = \sum_{v: \{u,v\} \in E(G)} x^{(k)}_{uv}$ for all 

335 nodes $u$. Define $z^{(k+1)}_{uv} = x^{(k+1)}_{uv} - 2 \alpha 

336 b^{(k+1)}_u$. Update $x^{(k+1)}_{uv} = 

337 CLAMP((z^{(k+1)}_{uv} - z^{(k+1)}_{vu} + 1.0) / 2.0)$, where 

338 $CLAMP(x) = \max(0, \min(1, x))$. 

339 

340 With a learning rate of $\alpha=1/\Delta(G)$, where $\Delta(G)$ is 

341 the maximum degree, the algorithm converges to the optimum solution of 

342 the convex program. 

343 

344 **Fractional Peeling:** 

345 To obtain a **discrete** subgraph, we use fractional peeling, an 

346 adaptation of the standard peeling algorithm which peels the minimum 

347 degree vertex in each iteration, and returns the densest subgraph found 

348 along the way. Here, we instead peel the vertex with the smallest 

349 induced load $b_u$: 

350 

351 1. Compute $b_u$ and $x_{uv}$. 

352 

353 2. Iteratively remove the vertex with the smallest $b_u$, updating its 

354 neighbors' load by $x_{vu}$. 

355 

356 Fractional peeling transforms the approximately optimal fractional 

357 values $b_u, x_{uv}$ into a discrete subgraph. Unlike traditional 

358 peeling, which removes the lowest-degree node, this method accounts for 

359 fractional edge contributions from the convex program. 

360 

361 This approach is both scalable and theoretically sound, ensuring a quick 

362 approximation of the densest subgraph while leveraging fractional load 

363 balancing. 

364 

365 References 

366 ---------- 

367 .. [1] Charikar, Moses. "Greedy approximation algorithms for finding dense 

368 components in a graph." In International workshop on approximation 

369 algorithms for combinatorial optimization, pp. 84-95. Berlin, Heidelberg: 

370 Springer Berlin Heidelberg, 2000. 

371 

372 .. [2] Boob, Digvijay, Yu Gao, Richard Peng, Saurabh Sawlani, Charalampos 

373 Tsourakakis, Di Wang, and Junxing Wang. "Flowless: Extracting densest 

374 subgraphs without flow computations." In Proceedings of The Web Conference 

375 2020, pp. 573-583. 2020. 

376 

377 .. [3] Harb, Elfarouk, Kent Quanrud, and Chandra Chekuri. "Faster and scalable 

378 algorithms for densest subgraph and decomposition." Advances in Neural 

379 Information Processing Systems 35 (2022): 26966-26979. 

380 

381 .. [4] Harb, Elfarouk, Kent Quanrud, and Chandra Chekuri. "Convergence to 

382 lexicographically optimal base in a (contra) polymatroid and applications 

383 to densest subgraph and tree packing." arXiv preprint arXiv:2305.02987 

384 (2023). 

385 

386 .. [5] Chekuri, Chandra, Kent Quanrud, and Manuel R. Torres. "Densest 

387 subgraph: Supermodularity, iterative peeling, and flow." In Proceedings of 

388 the 2022 Annual ACM-SIAM Symposium on Discrete Algorithms (SODA), pp. 

389 1531-1555. Society for Industrial and Applied Mathematics, 2022. 

390 """ 

391 try: 

392 algo = ALGORITHMS[method] 

393 except KeyError as e: 

394 raise ValueError(f"{method} is not a valid choice for an algorithm.") from e 

395 

396 return algo(G, iterations)