Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/scikit_learn-1.4.dev0-py3.8-linux-x86_64.egg/sklearn/externals/_scipy/sparse/csgraph/_laplacian.py: 14%

146 statements  

« prev     ^ index     » next       coverage.py v7.3.2, created at 2023-12-12 06:31 +0000

1""" 

2This file is a copy of the scipy.sparse.csgraph._laplacian module from SciPy 1.12 

3 

4scipy.sparse.csgraph.laplacian supports sparse arrays only starting from Scipy 1.12, 

5see https://github.com/scipy/scipy/pull/19156. This vendored file can be removed as 

6soon as Scipy 1.12 becomes the minimum supported version. 

7 

8Laplacian of a compressed-sparse graph 

9""" 

10 

11# License: BSD 3 clause 

12 

13import numpy as np 

14from scipy.sparse import issparse 

15from scipy.sparse.linalg import LinearOperator 

16 

17 

18############################################################################### 

19# Graph laplacian 

20def laplacian( 

21 csgraph, 

22 normed=False, 

23 return_diag=False, 

24 use_out_degree=False, 

25 *, 

26 copy=True, 

27 form="array", 

28 dtype=None, 

29 symmetrized=False, 

30): 

31 """ 

32 Return the Laplacian of a directed graph. 

33 

34 Parameters 

35 ---------- 

36 csgraph : array_like or sparse matrix, 2 dimensions 

37 Compressed-sparse graph, with shape (N, N). 

38 normed : bool, optional 

39 If True, then compute symmetrically normalized Laplacian. 

40 Default: False. 

41 return_diag : bool, optional 

42 If True, then also return an array related to vertex degrees. 

43 Default: False. 

44 use_out_degree : bool, optional 

45 If True, then use out-degree instead of in-degree. 

46 This distinction matters only if the graph is asymmetric. 

47 Default: False. 

48 copy : bool, optional 

49 If False, then change `csgraph` in place if possible, 

50 avoiding doubling the memory use. 

51 Default: True, for backward compatibility. 

52 form : 'array', or 'function', or 'lo' 

53 Determines the format of the output Laplacian: 

54 

55 * 'array' is a numpy array; 

56 * 'function' is a pointer to evaluating the Laplacian-vector 

57 or Laplacian-matrix product; 

58 * 'lo' results in the format of the `LinearOperator`. 

59 

60 Choosing 'function' or 'lo' always avoids doubling 

61 the memory use, ignoring `copy` value. 

62 Default: 'array', for backward compatibility. 

63 dtype : None or one of numeric numpy dtypes, optional 

64 The dtype of the output. If ``dtype=None``, the dtype of the 

65 output matches the dtype of the input csgraph, except for 

66 the case ``normed=True`` and integer-like csgraph, where 

67 the output dtype is 'float' allowing accurate normalization, 

68 but dramatically increasing the memory use. 

69 Default: None, for backward compatibility. 

70 symmetrized : bool, optional 

71 If True, then the output Laplacian is symmetric/Hermitian. 

72 The symmetrization is done by ``csgraph + csgraph.T.conj`` 

73 without dividing by 2 to preserve integer dtypes if possible 

74 prior to the construction of the Laplacian. 

75 The symmetrization will increase the memory footprint of 

76 sparse matrices unless the sparsity pattern is symmetric or 

77 `form` is 'function' or 'lo'. 

78 Default: False, for backward compatibility. 

79 

80 Returns 

81 ------- 

82 lap : ndarray, or sparse matrix, or `LinearOperator` 

83 The N x N Laplacian of csgraph. It will be a NumPy array (dense) 

84 if the input was dense, or a sparse matrix otherwise, or 

85 the format of a function or `LinearOperator` if 

86 `form` equals 'function' or 'lo', respectively. 

87 diag : ndarray, optional 

88 The length-N main diagonal of the Laplacian matrix. 

89 For the normalized Laplacian, this is the array of square roots 

90 of vertex degrees or 1 if the degree is zero. 

91 

92 Notes 

93 ----- 

94 The Laplacian matrix of a graph is sometimes referred to as the 

95 "Kirchhoff matrix" or just the "Laplacian", and is useful in many 

96 parts of spectral graph theory. 

97 In particular, the eigen-decomposition of the Laplacian can give 

98 insight into many properties of the graph, e.g., 

99 is commonly used for spectral data embedding and clustering. 

100 

101 The constructed Laplacian doubles the memory use if ``copy=True`` and 

102 ``form="array"`` which is the default. 

103 Choosing ``copy=False`` has no effect unless ``form="array"`` 

104 or the matrix is sparse in the ``coo`` format, or dense array, except 

105 for the integer input with ``normed=True`` that forces the float output. 

106 

107 Sparse input is reformatted into ``coo`` if ``form="array"``, 

108 which is the default. 

109 

110 If the input adjacency matrix is not symmetric, the Laplacian is 

111 also non-symmetric unless ``symmetrized=True`` is used. 

112 

113 Diagonal entries of the input adjacency matrix are ignored and 

114 replaced with zeros for the purpose of normalization where ``normed=True``. 

115 The normalization uses the inverse square roots of row-sums of the input 

116 adjacency matrix, and thus may fail if the row-sums contain 

117 negative or complex with a non-zero imaginary part values. 

118 

119 The normalization is symmetric, making the normalized Laplacian also 

120 symmetric if the input csgraph was symmetric. 

121 

122 References 

123 ---------- 

124 .. [1] Laplacian matrix. https://en.wikipedia.org/wiki/Laplacian_matrix 

125 

126 Examples 

127 -------- 

128 >>> import numpy as np 

129 >>> from scipy.sparse import csgraph 

130 

131 Our first illustration is the symmetric graph 

132 

133 >>> G = np.arange(4) * np.arange(4)[:, np.newaxis] 

134 >>> G 

135 array([[0, 0, 0, 0], 

136 [0, 1, 2, 3], 

137 [0, 2, 4, 6], 

138 [0, 3, 6, 9]]) 

139 

140 and its symmetric Laplacian matrix 

141 

142 >>> csgraph.laplacian(G) 

143 array([[ 0, 0, 0, 0], 

144 [ 0, 5, -2, -3], 

145 [ 0, -2, 8, -6], 

146 [ 0, -3, -6, 9]]) 

147 

148 The non-symmetric graph 

149 

150 >>> G = np.arange(9).reshape(3, 3) 

151 >>> G 

152 array([[0, 1, 2], 

153 [3, 4, 5], 

154 [6, 7, 8]]) 

155 

156 has different row- and column sums, resulting in two varieties 

157 of the Laplacian matrix, using an in-degree, which is the default 

158 

159 >>> L_in_degree = csgraph.laplacian(G) 

160 >>> L_in_degree 

161 array([[ 9, -1, -2], 

162 [-3, 8, -5], 

163 [-6, -7, 7]]) 

164 

165 or alternatively an out-degree 

166 

167 >>> L_out_degree = csgraph.laplacian(G, use_out_degree=True) 

168 >>> L_out_degree 

169 array([[ 3, -1, -2], 

170 [-3, 8, -5], 

171 [-6, -7, 13]]) 

172 

173 Constructing a symmetric Laplacian matrix, one can add the two as 

174 

175 >>> L_in_degree + L_out_degree.T 

176 array([[ 12, -4, -8], 

177 [ -4, 16, -12], 

178 [ -8, -12, 20]]) 

179 

180 or use the ``symmetrized=True`` option 

181 

182 >>> csgraph.laplacian(G, symmetrized=True) 

183 array([[ 12, -4, -8], 

184 [ -4, 16, -12], 

185 [ -8, -12, 20]]) 

186 

187 that is equivalent to symmetrizing the original graph 

188 

189 >>> csgraph.laplacian(G + G.T) 

190 array([[ 12, -4, -8], 

191 [ -4, 16, -12], 

192 [ -8, -12, 20]]) 

193 

194 The goal of normalization is to make the non-zero diagonal entries 

195 of the Laplacian matrix to be all unit, also scaling off-diagonal 

196 entries correspondingly. The normalization can be done manually, e.g., 

197 

198 >>> G = np.array([[0, 1, 1], [1, 0, 1], [1, 1, 0]]) 

199 >>> L, d = csgraph.laplacian(G, return_diag=True) 

200 >>> L 

201 array([[ 2, -1, -1], 

202 [-1, 2, -1], 

203 [-1, -1, 2]]) 

204 >>> d 

205 array([2, 2, 2]) 

206 >>> scaling = np.sqrt(d) 

207 >>> scaling 

208 array([1.41421356, 1.41421356, 1.41421356]) 

209 >>> (1/scaling)*L*(1/scaling) 

210 array([[ 1. , -0.5, -0.5], 

211 [-0.5, 1. , -0.5], 

212 [-0.5, -0.5, 1. ]]) 

213 

214 Or using ``normed=True`` option 

215 

216 >>> L, d = csgraph.laplacian(G, return_diag=True, normed=True) 

217 >>> L 

218 array([[ 1. , -0.5, -0.5], 

219 [-0.5, 1. , -0.5], 

220 [-0.5, -0.5, 1. ]]) 

221 

222 which now instead of the diagonal returns the scaling coefficients 

223 

224 >>> d 

225 array([1.41421356, 1.41421356, 1.41421356]) 

226 

227 Zero scaling coefficients are substituted with 1s, where scaling 

228 has thus no effect, e.g., 

229 

230 >>> G = np.array([[0, 0, 0], [0, 0, 1], [0, 1, 0]]) 

231 >>> G 

232 array([[0, 0, 0], 

233 [0, 0, 1], 

234 [0, 1, 0]]) 

235 >>> L, d = csgraph.laplacian(G, return_diag=True, normed=True) 

236 >>> L 

237 array([[ 0., -0., -0.], 

238 [-0., 1., -1.], 

239 [-0., -1., 1.]]) 

240 >>> d 

241 array([1., 1., 1.]) 

242 

243 Only the symmetric normalization is implemented, resulting 

244 in a symmetric Laplacian matrix if and only if its graph is symmetric 

245 and has all non-negative degrees, like in the examples above. 

246 

247 The output Laplacian matrix is by default a dense array or a sparse matrix 

248 inferring its shape, format, and dtype from the input graph matrix: 

249 

250 >>> G = np.array([[0, 1, 1], [1, 0, 1], [1, 1, 0]]).astype(np.float32) 

251 >>> G 

252 array([[0., 1., 1.], 

253 [1., 0., 1.], 

254 [1., 1., 0.]], dtype=float32) 

255 >>> csgraph.laplacian(G) 

256 array([[ 2., -1., -1.], 

257 [-1., 2., -1.], 

258 [-1., -1., 2.]], dtype=float32) 

259 

260 but can alternatively be generated matrix-free as a LinearOperator: 

261 

262 >>> L = csgraph.laplacian(G, form="lo") 

263 >>> L 

264 <3x3 _CustomLinearOperator with dtype=float32> 

265 >>> L(np.eye(3)) 

266 array([[ 2., -1., -1.], 

267 [-1., 2., -1.], 

268 [-1., -1., 2.]]) 

269 

270 or as a lambda-function: 

271 

272 >>> L = csgraph.laplacian(G, form="function") 

273 >>> L 

274 <function _laplace.<locals>.<lambda> at 0x0000012AE6F5A598> 

275 >>> L(np.eye(3)) 

276 array([[ 2., -1., -1.], 

277 [-1., 2., -1.], 

278 [-1., -1., 2.]]) 

279 

280 The Laplacian matrix is used for 

281 spectral data clustering and embedding 

282 as well as for spectral graph partitioning. 

283 Our final example illustrates the latter 

284 for a noisy directed linear graph. 

285 

286 >>> from scipy.sparse import diags, random 

287 >>> from scipy.sparse.linalg import lobpcg 

288 

289 Create a directed linear graph with ``N=35`` vertices 

290 using a sparse adjacency matrix ``G``: 

291 

292 >>> N = 35 

293 >>> G = diags(np.ones(N-1), 1, format="csr") 

294 

295 Fix a random seed ``rng`` and add a random sparse noise to the graph ``G``: 

296 

297 >>> rng = np.random.default_rng() 

298 >>> G += 1e-2 * random(N, N, density=0.1, random_state=rng) 

299 

300 Set initial approximations for eigenvectors: 

301 

302 >>> X = rng.random((N, 2)) 

303 

304 The constant vector of ones is always a trivial eigenvector 

305 of the non-normalized Laplacian to be filtered out: 

306 

307 >>> Y = np.ones((N, 1)) 

308 

309 Alternating (1) the sign of the graph weights allows determining 

310 labels for spectral max- and min- cuts in a single loop. 

311 Since the graph is undirected, the option ``symmetrized=True`` 

312 must be used in the construction of the Laplacian. 

313 The option ``normed=True`` cannot be used in (2) for the negative weights 

314 here as the symmetric normalization evaluates square roots. 

315 The option ``form="lo"`` in (2) is matrix-free, i.e., guarantees 

316 a fixed memory footprint and read-only access to the graph. 

317 Calling the eigenvalue solver ``lobpcg`` (3) computes the Fiedler vector 

318 that determines the labels as the signs of its components in (5). 

319 Since the sign in an eigenvector is not deterministic and can flip, 

320 we fix the sign of the first component to be always +1 in (4). 

321 

322 >>> for cut in ["max", "min"]: 

323 ... G = -G # 1. 

324 ... L = csgraph.laplacian(G, symmetrized=True, form="lo") # 2. 

325 ... _, eves = lobpcg(L, X, Y=Y, largest=False, tol=1e-3) # 3. 

326 ... eves *= np.sign(eves[0, 0]) # 4. 

327 ... print(cut + "-cut labels:\\n", 1 * (eves[:, 0]>0)) # 5. 

328 max-cut labels: 

329 [1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1] 

330 min-cut labels: 

331 [1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0] 

332 

333 As anticipated for a (slightly noisy) linear graph, 

334 the max-cut strips all the edges of the graph coloring all 

335 odd vertices into one color and all even vertices into another one, 

336 while the balanced min-cut partitions the graph 

337 in the middle by deleting a single edge. 

338 Both determined partitions are optimal. 

339 """ 

340 if csgraph.ndim != 2 or csgraph.shape[0] != csgraph.shape[1]: 

341 raise ValueError("csgraph must be a square matrix or array") 

342 

343 if normed and ( 

344 np.issubdtype(csgraph.dtype, np.signedinteger) 

345 or np.issubdtype(csgraph.dtype, np.uint) 

346 ): 

347 csgraph = csgraph.astype(np.float64) 

348 

349 if form == "array": 

350 create_lap = _laplacian_sparse if issparse(csgraph) else _laplacian_dense 

351 else: 

352 create_lap = ( 

353 _laplacian_sparse_flo if issparse(csgraph) else _laplacian_dense_flo 

354 ) 

355 

356 degree_axis = 1 if use_out_degree else 0 

357 

358 lap, d = create_lap( 

359 csgraph, 

360 normed=normed, 

361 axis=degree_axis, 

362 copy=copy, 

363 form=form, 

364 dtype=dtype, 

365 symmetrized=symmetrized, 

366 ) 

367 if return_diag: 

368 return lap, d 

369 return lap 

370 

371 

372def _setdiag_dense(m, d): 

373 step = len(d) + 1 

374 m.flat[::step] = d 

375 

376 

377def _laplace(m, d): 

378 return lambda v: v * d[:, np.newaxis] - m @ v 

379 

380 

381def _laplace_normed(m, d, nd): 

382 laplace = _laplace(m, d) 

383 return lambda v: nd[:, np.newaxis] * laplace(v * nd[:, np.newaxis]) 

384 

385 

386def _laplace_sym(m, d): 

387 return ( 

388 lambda v: v * d[:, np.newaxis] 

389 - m @ v 

390 - np.transpose(np.conjugate(np.transpose(np.conjugate(v)) @ m)) 

391 ) 

392 

393 

394def _laplace_normed_sym(m, d, nd): 

395 laplace_sym = _laplace_sym(m, d) 

396 return lambda v: nd[:, np.newaxis] * laplace_sym(v * nd[:, np.newaxis]) 

397 

398 

399def _linearoperator(mv, shape, dtype): 

400 return LinearOperator(matvec=mv, matmat=mv, shape=shape, dtype=dtype) 

401 

402 

403def _laplacian_sparse_flo(graph, normed, axis, copy, form, dtype, symmetrized): 

404 # The keyword argument `copy` is unused and has no effect here. 

405 del copy 

406 

407 if dtype is None: 

408 dtype = graph.dtype 

409 

410 graph_sum = np.asarray(graph.sum(axis=axis)).ravel() 

411 graph_diagonal = graph.diagonal() 

412 diag = graph_sum - graph_diagonal 

413 if symmetrized: 

414 graph_sum += np.asarray(graph.sum(axis=1 - axis)).ravel() 

415 diag = graph_sum - graph_diagonal - graph_diagonal 

416 

417 if normed: 

418 isolated_node_mask = diag == 0 

419 w = np.where(isolated_node_mask, 1, np.sqrt(diag)) 

420 if symmetrized: 

421 md = _laplace_normed_sym(graph, graph_sum, 1.0 / w) 

422 else: 

423 md = _laplace_normed(graph, graph_sum, 1.0 / w) 

424 if form == "function": 

425 return md, w.astype(dtype, copy=False) 

426 elif form == "lo": 

427 m = _linearoperator(md, shape=graph.shape, dtype=dtype) 

428 return m, w.astype(dtype, copy=False) 

429 else: 

430 raise ValueError(f"Invalid form: {form!r}") 

431 else: 

432 if symmetrized: 

433 md = _laplace_sym(graph, graph_sum) 

434 else: 

435 md = _laplace(graph, graph_sum) 

436 if form == "function": 

437 return md, diag.astype(dtype, copy=False) 

438 elif form == "lo": 

439 m = _linearoperator(md, shape=graph.shape, dtype=dtype) 

440 return m, diag.astype(dtype, copy=False) 

441 else: 

442 raise ValueError(f"Invalid form: {form!r}") 

443 

444 

445def _laplacian_sparse(graph, normed, axis, copy, form, dtype, symmetrized): 

446 # The keyword argument `form` is unused and has no effect here. 

447 del form 

448 

449 if dtype is None: 

450 dtype = graph.dtype 

451 

452 needs_copy = False 

453 if graph.format in ("lil", "dok"): 

454 m = graph.tocoo() 

455 else: 

456 m = graph 

457 if copy: 

458 needs_copy = True 

459 

460 if symmetrized: 

461 m += m.T.conj() 

462 

463 w = np.asarray(m.sum(axis=axis)).ravel() - m.diagonal() 

464 if normed: 

465 m = m.tocoo(copy=needs_copy) 

466 isolated_node_mask = w == 0 

467 w = np.where(isolated_node_mask, 1, np.sqrt(w)) 

468 m.data /= w[m.row] 

469 m.data /= w[m.col] 

470 m.data *= -1 

471 m.setdiag(1 - isolated_node_mask) 

472 else: 

473 if m.format == "dia": 

474 m = m.copy() 

475 else: 

476 m = m.tocoo(copy=needs_copy) 

477 m.data *= -1 

478 m.setdiag(w) 

479 

480 return m.astype(dtype, copy=False), w.astype(dtype) 

481 

482 

483def _laplacian_dense_flo(graph, normed, axis, copy, form, dtype, symmetrized): 

484 if copy: 

485 m = np.array(graph) 

486 else: 

487 m = np.asarray(graph) 

488 

489 if dtype is None: 

490 dtype = m.dtype 

491 

492 graph_sum = m.sum(axis=axis) 

493 graph_diagonal = m.diagonal() 

494 diag = graph_sum - graph_diagonal 

495 if symmetrized: 

496 graph_sum += m.sum(axis=1 - axis) 

497 diag = graph_sum - graph_diagonal - graph_diagonal 

498 

499 if normed: 

500 isolated_node_mask = diag == 0 

501 w = np.where(isolated_node_mask, 1, np.sqrt(diag)) 

502 if symmetrized: 

503 md = _laplace_normed_sym(m, graph_sum, 1.0 / w) 

504 else: 

505 md = _laplace_normed(m, graph_sum, 1.0 / w) 

506 if form == "function": 

507 return md, w.astype(dtype, copy=False) 

508 elif form == "lo": 

509 m = _linearoperator(md, shape=graph.shape, dtype=dtype) 

510 return m, w.astype(dtype, copy=False) 

511 else: 

512 raise ValueError(f"Invalid form: {form!r}") 

513 else: 

514 if symmetrized: 

515 md = _laplace_sym(m, graph_sum) 

516 else: 

517 md = _laplace(m, graph_sum) 

518 if form == "function": 

519 return md, diag.astype(dtype, copy=False) 

520 elif form == "lo": 

521 m = _linearoperator(md, shape=graph.shape, dtype=dtype) 

522 return m, diag.astype(dtype, copy=False) 

523 else: 

524 raise ValueError(f"Invalid form: {form!r}") 

525 

526 

527def _laplacian_dense(graph, normed, axis, copy, form, dtype, symmetrized): 

528 if form != "array": 

529 raise ValueError(f'{form!r} must be "array"') 

530 

531 if dtype is None: 

532 dtype = graph.dtype 

533 

534 if copy: 

535 m = np.array(graph) 

536 else: 

537 m = np.asarray(graph) 

538 

539 if dtype is None: 

540 dtype = m.dtype 

541 

542 if symmetrized: 

543 m += m.T.conj() 

544 np.fill_diagonal(m, 0) 

545 w = m.sum(axis=axis) 

546 if normed: 

547 isolated_node_mask = w == 0 

548 w = np.where(isolated_node_mask, 1, np.sqrt(w)) 

549 m /= w 

550 m /= w[:, np.newaxis] 

551 m *= -1 

552 _setdiag_dense(m, 1 - isolated_node_mask) 

553 else: 

554 m *= -1 

555 _setdiag_dense(m, w) 

556 

557 return m.astype(dtype, copy=False), w.astype(dtype, copy=False)