Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/networkx/algorithms/approximation/density.py: 11%

1"""Fast algorithms for the densest subgraph problem"""

3import math

5import networkx as nx

7__all__ = ["densest_subgraph"]

10def _greedy_plus_plus(G, iterations):

11 if G.number_of_edges() == 0:

12 return 0.0, set()

13 if iterations < 1:

14 raise ValueError(

15 f"The number of iterations must be an integer >= 1. Provided: {iterations}"

16 )

18 loads = {node: 0 for node in G.nodes} # Load vector for Greedy++.

19 best_density = 0.0 # Highest density encountered.

20 best_subgraph = set() # Nodes of the best subgraph found.

22 for _ in range(iterations):

23 # Initialize heap for fast access to minimum weighted degree.

24 heap = nx.utils.BinaryHeap()

26 # Compute initial weighted degrees and add nodes to the heap.

27 for node, degree in G.degree:

28 heap.insert(node, loads[node] + degree)

29 # Set up tracking for current graph state.

30 remaining_nodes = set(G.nodes)

31 num_edges = G.number_of_edges()

32 current_degrees = dict(G.degree)

34 while remaining_nodes:

35 num_nodes = len(remaining_nodes)

37 # Current density of the (implicit) graph

38 current_density = num_edges / num_nodes

40 # Update the best density.

41 if current_density > best_density:

42 best_density = current_density

43 best_subgraph = set(remaining_nodes)

45 # Pop the node with the smallest weighted degree.

46 node, _ = heap.pop()

47 if node not in remaining_nodes:

48 continue # Skip nodes already removed.

50 # Update the load of the popped node.

51 loads[node] += current_degrees[node]

53 # Update neighbors' degrees and the heap.

54 for neighbor in G.neighbors(node):

55 if neighbor in remaining_nodes:

56 current_degrees[neighbor] -= 1

57 num_edges -= 1

58 heap.insert(neighbor, loads[neighbor] + current_degrees[neighbor])

60 # Remove the node from the remaining nodes.

61 remaining_nodes.remove(node)

63 return best_density, best_subgraph

66def _fractional_peeling(G, b, x, node_to_idx, edge_to_idx):

67 """

68 Optimized fractional peeling using NumPy arrays.

70 Parameters

71 ----------

72 G : networkx.Graph

73 The input graph.

74 b : numpy.ndarray

75 Induced load vector.

76 x : numpy.ndarray

77 Fractional edge values.

78 node_to_idx : dict

79 Mapping from node to index.

80 edge_to_idx : dict

81 Mapping from edge to index.

83 Returns

84 -------

85 best_density : float

86 The best density found.

87 best_subgraph : set

88 The subset of nodes defining the densest subgraph.

89 """

90 heap = nx.utils.BinaryHeap()

92 remaining_nodes = set(G.nodes)

94 # Initialize heap with b values

95 for idx in remaining_nodes:

96 heap.insert(idx, b[idx])

98 num_edges = G.number_of_edges()

100 best_density = 0.0

101 best_subgraph = set()

102

103 while remaining_nodes:

104 num_nodes = len(remaining_nodes)

105 current_density = num_edges / num_nodes

106

107 if current_density > best_density:

108 best_density = current_density

109 best_subgraph = set(remaining_nodes)

110

111 # Pop the node with the smallest b

112 node, _ = heap.pop()

113 while node not in remaining_nodes:

114 node, _ = heap.pop() # Clean the heap from stale values

115

116 # Update neighbors b values by subtracting fractional x value

117 for neighbor in G.neighbors(node):

118 if neighbor in remaining_nodes:

119 neighbor_idx = node_to_idx[neighbor]

120 # Take off fractional value

121 b[neighbor_idx] -= x[edge_to_idx[(neighbor, node)]]

122 num_edges -= 1

123 heap.insert(neighbor, b[neighbor_idx])

124

125 remaining_nodes.remove(node) # peel off node

126

127 return best_density, best_subgraph

128

129

130def _fista(G, iterations):

131 if G.number_of_edges() == 0:

132 return 0.0, set()

133 if iterations < 1:

134 raise ValueError(

135 f"The number of iterations must be an integer >= 1. Provided: {iterations}"

136 )

137 import numpy as np

138

139 # 1. Node Mapping: Assign a unique index to each node and edge

140 node_to_idx = {node: idx for idx, node in enumerate(G)}

141 num_nodes = G.number_of_nodes()

142 num_undirected_edges = G.number_of_edges()

143

144 # 2. Edge Mapping: Assign a unique index to each bidirectional edge

145 bidirectional_edges = [(u, v) for u, v in G.edges] + [(v, u) for u, v in G.edges]

146 edge_to_idx = {edge: idx for idx, edge in enumerate(bidirectional_edges)}

147

148 num_edges = len(bidirectional_edges)

149

150 # 3. Reverse Edge Mapping: Map each (bidirectional) edge to its reverse edge index

151 reverse_edge_idx = np.empty(num_edges, dtype=np.int32)

152 for idx in range(num_undirected_edges):

153 reverse_edge_idx[idx] = num_undirected_edges + idx

154 for idx in range(num_undirected_edges, 2 * num_undirected_edges):

155 reverse_edge_idx[idx] = idx - num_undirected_edges

156

157 # 4. Initialize Variables as NumPy Arrays

158 x = np.full(num_edges, 0.5, dtype=np.float32)

159 y = x.copy()

160 z = np.zeros(num_edges, dtype=np.float32)

161 b = np.zeros(num_nodes, dtype=np.float32) # Induced load vector

162 tk = 1.0 # Momentum term

163

164 # 5. Precompute Edge Source Indices

165 edge_src_indices = np.array(

166 [node_to_idx[u] for u, _ in bidirectional_edges], dtype=np.int32

167 )

168

169 # 6. Compute Learning Rate

170 max_degree = max(deg for _, deg in G.degree)

171 # 0.9 for floating point errs when max_degree is very large

172 learning_rate = 0.9 / max_degree

173

174 # 7. Iterative Updates

175 for _ in range(iterations):

176 # 7a. Update b: sum y over outgoing edges for each node

177 b[:] = 0.0 # Reset b to zero

178 np.add.at(b, edge_src_indices, y) # b_u = \sum_{v : (u,v) \in E(G)} y_{uv}

179

180 # 7b. Compute z, z_{uv} = y_{uv} - 2 * learning_rate * b_u

181 z = y - 2.0 * learning_rate * b[edge_src_indices]

182

183 # 7c. Update Momentum Term

184 tknew = (1.0 + math.sqrt(1 + 4.0 * tk**2)) / 2.0

185

186 # 7d. Update x in a vectorized manner, x_{uv} = (z_{uv} - z_{vu} + 1.0) / 2.0

187 new_xuv = (z - z[reverse_edge_idx] + 1.0) / 2.0

188 clamped_x = np.clip(new_xuv, 0.0, 1.0) # Clamp x_{uv} between 0 and 1

189

190 # Update y using the FISTA update formula (similar to gradient descent)

191 y = (

192 clamped_x

193 + ((tk - 1.0) / tknew) * (clamped_x - x)

194 + (tk / tknew) * (clamped_x - y)

195 )

196

197 # Update x

198 x = clamped_x

199

200 # Update tk, the momemntum term

201 tk = tknew

202

203 # Rebalance the b values! Otherwise performance is a bit suboptimal.

204 b[:] = 0.0

205 np.add.at(b, edge_src_indices, x) # b_u = \sum_{v : (u,v) \in E(G)} x_{uv}

206

207 # Extract the actual (approximate) dense subgraph.

208 return _fractional_peeling(G, b, x, node_to_idx, edge_to_idx)

209

210

211ALGORITHMS = {"greedy++": _greedy_plus_plus, "fista": _fista}

212

213

214@nx.utils.not_implemented_for("directed")

215@nx.utils.not_implemented_for("multigraph")

216@nx._dispatchable

217def densest_subgraph(G, iterations=1, *, method="fista"):

218 r"""Returns an approximate densest subgraph for a graph `G`.

219

220 This function runs an iterative algorithm to find the densest subgraph,

221 and returns both the density and the subgraph. For a discussion on the

222 notion of density used and the different algorithms available on

223 networkx, please see the Notes section below.

224

225 Parameters

226 ----------

227 G : NetworkX graph

228 Undirected graph.

229

230 iterations : int, optional (default=1)

231 Number of iterations to use for the iterative algorithm. Can be

232 specified positionally or as a keyword argument.

233

234 method : string, optional (default='fista')

235 The algorithm to use to approximate the densest subgraph. Supported

236 options: 'greedy++' by Boob et al. [2]_ and 'fista' by Harb et al. [3]_.

237 Must be specified as a keyword argument. Other inputs produce a

238 ValueError.

239

240 Returns

241 -------

242 d : float

243 The density of the approximate subgraph found.

244

245 S : set

246 The subset of nodes defining the approximate densest subgraph.

247

248 Examples

249 --------

250 >>> G = nx.star_graph(4)

251 >>> nx.approximation.densest_subgraph(G, iterations=1)

252 (0.8, {0, 1, 2, 3, 4})

253

254 Notes

255 -----

256 **Problem Definition:**

257 The densest subgraph problem (DSG) asks to find the subgraph

258 $S \subseteq V(G)$ with maximum density. For a subset of the nodes of

259 $G$, $S \subseteq V(G)$, define $E(S) = \{ (u,v) : (u,v)\in E(G),

260 u\in S, v\in S \}$ as the set of edges with both endpoints in $S$.

261 The density of $S$ is defined as $|E(S)|/|S|$, the ratio between the

262 edges in the subgraph $G[S]$ and the number of nodes in that subgraph.

263 Note that this is different from the standard graph theoretic definition

264 of density, defined as $\frac{2|E(S)|}{|S|(|S|-1)}$, for historical

265 reasons.

266

267 **Exact Algorithms:**

268 The densest subgraph problem is polynomial time solvable using maximum

269 flow, commonly referred to as Goldberg's algorithm. However, the

270 algorithm is quite involved. It first binary searches on the optimal

271 density, $d^\ast$. For a guess of the density $d$, it sets up a flow

272 network $G'$ with size $O(m)$. The maximum flow solution either

273 informs the algorithm that no subgraph with density $d$ exists, or it

274 provides a subgraph with density at least $d$. However, this is

275 inherently bottlenecked by the maximum flow algorithm. For example, [2]_

276 notes that Goldberg’s algorithm was not feasible on many large graphs

277 even though they used a highly optimized maximum flow library.

278

279 **Charikar's Greedy Peeling:**

280 While exact solution algorithms are quite involved, there are several

281 known approximation algorithms for the densest subgraph problem.

282

283 Charikar [1]_ described a very simple 1/2-approximation algorithm for DSG

284 known as the greedy "peeling" algorithm. The algorithm creates an

285 ordering of the nodes as follows. The first node $v_1$ is the one with

286 the smallest degree in $G$ (ties broken arbitrarily). It selects

287 $v_2$ to be the smallest degree node in $G \setminus v_1$. Letting

288 $G_i$ be the graph after removing $v_1, ..., v_i$ (with $G_0=G$),

289 the algorithm returns the graph among $G_0, ..., G_n$ with the highest

290 density.

291

292 **Greedy++:**

293 Boob et al. [2]_ generalized this algorithm into Greedy++, an iterative

294 algorithm that runs several rounds of "peeling". In fact, Greedy++ with 1

295 iteration is precisely Charikar's algorithm. The algorithm converges to a

296 $(1-\epsilon)$ approximate densest subgraph in $O(\Delta(G)\log

297 n/\epsilon^2)$ iterations, where $\Delta(G)$ is the maximum degree,

298 and $n$ is the number of nodes in $G$. The algorithm also has other

299 desirable properties as shown by [4]_ and [5]_.

300

301 **FISTA Algorithm:**

302 Harb et al. [3]_ gave a faster and more scalable algorithm using ideas

303 from quadratic programming for the densest subgraph, which is based on a

304 fast iterative shrinkage-thresholding algorithm (FISTA) algorithm. It is

305 known that computing the densest subgraph can be formulated as the

306 following convex optimization problem:

307

308 Minimize $\sum_{u \in V(G)} b_u^2$

309

310 Subject to:

311

312 $b_u = \sum_{v: \{u,v\} \in E(G)} x_{uv}$ for all $u \in V(G)$

313

314 $x_{uv} + x_{vu} = 1.0$ for all $\{u,v\} \in E(G)$

315

316 $x_{uv} \geq 0, x_{vu} \geq 0$ for all $\{u,v\} \in E(G)$

317

318 Here, $x_{uv}$ represents the fraction of edge $\{u,v\}$ assigned to

319 $u$, and $x_{vu}$ to $v$.

320

321 The FISTA algorithm efficiently solves this convex program using gradient

322 descent with projections. For a learning rate $\alpha$, the algorithm

323 does:

324

325 1. **Initialization**: Set $x^{(0)}_{uv} = x^{(0)}_{vu} = 0.5$ for all

326 edges as a feasible solution.

327

328 2. **Gradient Update**: For iteration $k\geq 1$, set

329 $x^{(k+1)}_{uv} = x^{(k)}_{uv} - 2 \alpha \sum_{v: \{u,v\} \in E(G)}

330 x^{(k)}_{uv}$. However, now $x^{(k+1)}_{uv}$ might be infeasible!

331 To ensure feasibility, we project $x^{(k+1)}_{uv}$.

332

333 3. **Projection to the Feasible Set**: Compute

334 $b^{(k+1)}_u = \sum_{v: \{u,v\} \in E(G)} x^{(k)}_{uv}$ for all

335 nodes $u$. Define $z^{(k+1)}_{uv} = x^{(k+1)}_{uv} - 2 \alpha

336 b^{(k+1)}_u$. Update $x^{(k+1)}_{uv} =

337 CLAMP((z^{(k+1)}_{uv} - z^{(k+1)}_{vu} + 1.0) / 2.0)$, where

338 $CLAMP(x) = \max(0, \min(1, x))$.

339

340 With a learning rate of $\alpha=1/\Delta(G)$, where $\Delta(G)$ is

341 the maximum degree, the algorithm converges to the optimum solution of

342 the convex program.

343

344 **Fractional Peeling:**

345 To obtain a **discrete** subgraph, we use fractional peeling, an

346 adaptation of the standard peeling algorithm which peels the minimum

347 degree vertex in each iteration, and returns the densest subgraph found

348 along the way. Here, we instead peel the vertex with the smallest

349 induced load $b_u$:

350

351 1. Compute $b_u$ and $x_{uv}$.

352

353 2. Iteratively remove the vertex with the smallest $b_u$, updating its

354 neighbors' load by $x_{vu}$.

355

356 Fractional peeling transforms the approximately optimal fractional

357 values $b_u, x_{uv}$ into a discrete subgraph. Unlike traditional

358 peeling, which removes the lowest-degree node, this method accounts for

359 fractional edge contributions from the convex program.

360

361 This approach is both scalable and theoretically sound, ensuring a quick

362 approximation of the densest subgraph while leveraging fractional load

363 balancing.

364

365 References

366 ----------

367 .. [1] Charikar, Moses. "Greedy approximation algorithms for finding dense

368 components in a graph." In International workshop on approximation

369 algorithms for combinatorial optimization, pp. 84-95. Berlin, Heidelberg:

370 Springer Berlin Heidelberg, 2000.

371

372 .. [2] Boob, Digvijay, Yu Gao, Richard Peng, Saurabh Sawlani, Charalampos

373 Tsourakakis, Di Wang, and Junxing Wang. "Flowless: Extracting densest

374 subgraphs without flow computations." In Proceedings of The Web Conference

375 2020, pp. 573-583. 2020.

376

377 .. [3] Harb, Elfarouk, Kent Quanrud, and Chandra Chekuri. "Faster and scalable

378 algorithms for densest subgraph and decomposition." Advances in Neural

379 Information Processing Systems 35 (2022): 26966-26979.

380

381 .. [4] Harb, Elfarouk, Kent Quanrud, and Chandra Chekuri. "Convergence to

382 lexicographically optimal base in a (contra) polymatroid and applications

383 to densest subgraph and tree packing." arXiv preprint arXiv:2305.02987

384 (2023).

385

386 .. [5] Chekuri, Chandra, Kent Quanrud, and Manuel R. Torres. "Densest

387 subgraph: Supermodularity, iterative peeling, and flow." In Proceedings of

388 the 2022 Annual ACM-SIAM Symposium on Discrete Algorithms (SODA), pp.

389 1531-1555. Society for Industrial and Applied Mathematics, 2022.

390 """

391 try:

392 algo = ALGORITHMS[method]

393 except KeyError as e:

394 raise ValueError(f"{method} is not a valid choice for an algorithm.") from e

395

396 return algo(G, iterations)