Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.9/dist-packages/scipy/sparse/linalg/_eigen/_svds.py: 10%

126 statements  

« prev     ^ index     » next       coverage.py v7.4.4, created at 2024-03-22 06:44 +0000

1import numpy as np 

2 

3from .arpack import _arpack # type: ignore[attr-defined] 

4from . import eigsh 

5 

6from scipy._lib._util import check_random_state 

7from scipy.sparse.linalg._interface import LinearOperator, aslinearoperator 

8from scipy.sparse.linalg._eigen.lobpcg import lobpcg # type: ignore[no-redef] 

9from scipy.sparse.linalg._svdp import _svdp 

10from scipy.linalg import svd 

11 

12arpack_int = _arpack.timing.nbx.dtype 

13__all__ = ['svds'] 

14 

15 

16def _herm(x): 

17 return x.T.conj() 

18 

19 

20def _iv(A, k, ncv, tol, which, v0, maxiter, 

21 return_singular, solver, random_state): 

22 

23 # input validation/standardization for `solver` 

24 # out of order because it's needed for other parameters 

25 solver = str(solver).lower() 

26 solvers = {"arpack", "lobpcg", "propack"} 

27 if solver not in solvers: 

28 raise ValueError(f"solver must be one of {solvers}.") 

29 

30 # input validation/standardization for `A` 

31 A = aslinearoperator(A) # this takes care of some input validation 

32 if not (np.issubdtype(A.dtype, np.complexfloating) 

33 or np.issubdtype(A.dtype, np.floating)): 

34 message = "`A` must be of floating or complex floating data type." 

35 raise ValueError(message) 

36 if np.prod(A.shape) == 0: 

37 message = "`A` must not be empty." 

38 raise ValueError(message) 

39 

40 # input validation/standardization for `k` 

41 kmax = min(A.shape) if solver == 'propack' else min(A.shape) - 1 

42 if int(k) != k or not (0 < k <= kmax): 

43 message = "`k` must be an integer satisfying `0 < k < min(A.shape)`." 

44 raise ValueError(message) 

45 k = int(k) 

46 

47 # input validation/standardization for `ncv` 

48 if solver == "arpack" and ncv is not None: 

49 if int(ncv) != ncv or not (k < ncv < min(A.shape)): 

50 message = ("`ncv` must be an integer satisfying " 

51 "`k < ncv < min(A.shape)`.") 

52 raise ValueError(message) 

53 ncv = int(ncv) 

54 

55 # input validation/standardization for `tol` 

56 if tol < 0 or not np.isfinite(tol): 

57 message = "`tol` must be a non-negative floating point value." 

58 raise ValueError(message) 

59 tol = float(tol) 

60 

61 # input validation/standardization for `which` 

62 which = str(which).upper() 

63 whichs = {'LM', 'SM'} 

64 if which not in whichs: 

65 raise ValueError(f"`which` must be in {whichs}.") 

66 

67 # input validation/standardization for `v0` 

68 if v0 is not None: 

69 v0 = np.atleast_1d(v0) 

70 if not (np.issubdtype(v0.dtype, np.complexfloating) 

71 or np.issubdtype(v0.dtype, np.floating)): 

72 message = ("`v0` must be of floating or complex floating " 

73 "data type.") 

74 raise ValueError(message) 

75 

76 shape = (A.shape[0],) if solver == 'propack' else (min(A.shape),) 

77 if v0.shape != shape: 

78 message = f"`v0` must have shape {shape}." 

79 raise ValueError(message) 

80 

81 # input validation/standardization for `maxiter` 

82 if maxiter is not None and (int(maxiter) != maxiter or maxiter <= 0): 

83 message = "`maxiter` must be a positive integer." 

84 raise ValueError(message) 

85 maxiter = int(maxiter) if maxiter is not None else maxiter 

86 

87 # input validation/standardization for `return_singular_vectors` 

88 # not going to be flexible with this; too complicated for little gain 

89 rs_options = {True, False, "vh", "u"} 

90 if return_singular not in rs_options: 

91 raise ValueError(f"`return_singular_vectors` must be in {rs_options}.") 

92 

93 random_state = check_random_state(random_state) 

94 

95 return (A, k, ncv, tol, which, v0, maxiter, 

96 return_singular, solver, random_state) 

97 

98 

99def svds(A, k=6, ncv=None, tol=0, which='LM', v0=None, 

100 maxiter=None, return_singular_vectors=True, 

101 solver='arpack', random_state=None, options=None): 

102 """ 

103 Partial singular value decomposition of a sparse matrix. 

104 

105 Compute the largest or smallest `k` singular values and corresponding 

106 singular vectors of a sparse matrix `A`. The order in which the singular 

107 values are returned is not guaranteed. 

108 

109 In the descriptions below, let ``M, N = A.shape``. 

110 

111 Parameters 

112 ---------- 

113 A : ndarray, sparse matrix, or LinearOperator 

114 Matrix to decompose of a floating point numeric dtype. 

115 k : int, default: 6 

116 Number of singular values and singular vectors to compute. 

117 Must satisfy ``1 <= k <= kmax``, where ``kmax=min(M, N)`` for 

118 ``solver='propack'`` and ``kmax=min(M, N) - 1`` otherwise. 

119 ncv : int, optional 

120 When ``solver='arpack'``, this is the number of Lanczos vectors 

121 generated. See :ref:`'arpack' <sparse.linalg.svds-arpack>` for details. 

122 When ``solver='lobpcg'`` or ``solver='propack'``, this parameter is 

123 ignored. 

124 tol : float, optional 

125 Tolerance for singular values. Zero (default) means machine precision. 

126 which : {'LM', 'SM'} 

127 Which `k` singular values to find: either the largest magnitude ('LM') 

128 or smallest magnitude ('SM') singular values. 

129 v0 : ndarray, optional 

130 The starting vector for iteration; see method-specific 

131 documentation (:ref:`'arpack' <sparse.linalg.svds-arpack>`, 

132 :ref:`'lobpcg' <sparse.linalg.svds-lobpcg>`), or 

133 :ref:`'propack' <sparse.linalg.svds-propack>` for details. 

134 maxiter : int, optional 

135 Maximum number of iterations; see method-specific 

136 documentation (:ref:`'arpack' <sparse.linalg.svds-arpack>`, 

137 :ref:`'lobpcg' <sparse.linalg.svds-lobpcg>`), or 

138 :ref:`'propack' <sparse.linalg.svds-propack>` for details. 

139 return_singular_vectors : {True, False, "u", "vh"} 

140 Singular values are always computed and returned; this parameter 

141 controls the computation and return of singular vectors. 

142 

143 - ``True``: return singular vectors. 

144 - ``False``: do not return singular vectors. 

145 - ``"u"``: if ``M <= N``, compute only the left singular vectors and 

146 return ``None`` for the right singular vectors. Otherwise, compute 

147 all singular vectors. 

148 - ``"vh"``: if ``M > N``, compute only the right singular vectors and 

149 return ``None`` for the left singular vectors. Otherwise, compute 

150 all singular vectors. 

151 

152 If ``solver='propack'``, the option is respected regardless of the 

153 matrix shape. 

154 

155 solver : {'arpack', 'propack', 'lobpcg'}, optional 

156 The solver used. 

157 :ref:`'arpack' <sparse.linalg.svds-arpack>`, 

158 :ref:`'lobpcg' <sparse.linalg.svds-lobpcg>`, and 

159 :ref:`'propack' <sparse.linalg.svds-propack>` are supported. 

160 Default: `'arpack'`. 

161 random_state : {None, int, `numpy.random.Generator`, 

162 `numpy.random.RandomState`}, optional 

163 

164 Pseudorandom number generator state used to generate resamples. 

165 

166 If `random_state` is ``None`` (or `np.random`), the 

167 `numpy.random.RandomState` singleton is used. 

168 If `random_state` is an int, a new ``RandomState`` instance is used, 

169 seeded with `random_state`. 

170 If `random_state` is already a ``Generator`` or ``RandomState`` 

171 instance then that instance is used. 

172 options : dict, optional 

173 A dictionary of solver-specific options. No solver-specific options 

174 are currently supported; this parameter is reserved for future use. 

175 

176 Returns 

177 ------- 

178 u : ndarray, shape=(M, k) 

179 Unitary matrix having left singular vectors as columns. 

180 s : ndarray, shape=(k,) 

181 The singular values. 

182 vh : ndarray, shape=(k, N) 

183 Unitary matrix having right singular vectors as rows. 

184 

185 Notes 

186 ----- 

187 This is a naive implementation using ARPACK or LOBPCG as an eigensolver 

188 on the matrix ``A.conj().T @ A`` or ``A @ A.conj().T``, depending on 

189 which one is smaller size, followed by the Rayleigh-Ritz method 

190 as postprocessing; see 

191 Using the normal matrix, in Rayleigh-Ritz method, (2022, Nov. 19), 

192 Wikipedia, https://w.wiki/4zms. 

193 

194 Alternatively, the PROPACK solver can be called. 

195 

196 Choices of the input matrix `A` numeric dtype may be limited. 

197 Only ``solver="lobpcg"`` supports all floating point dtypes 

198 real: 'np.float32', 'np.float64', 'np.longdouble' and 

199 complex: 'np.complex64', 'np.complex128', 'np.clongdouble'. 

200 The ``solver="arpack"`` supports only 

201 'np.float32', 'np.float64', and 'np.complex128'. 

202 

203 Examples 

204 -------- 

205 Construct a matrix `A` from singular values and vectors. 

206 

207 >>> import numpy as np 

208 >>> from scipy import sparse, linalg, stats 

209 >>> from scipy.sparse.linalg import svds, aslinearoperator, LinearOperator 

210 

211 Construct a dense matrix `A` from singular values and vectors. 

212 

213 >>> rng = np.random.default_rng(258265244568965474821194062361901728911) 

214 >>> orthogonal = stats.ortho_group.rvs(10, random_state=rng) 

215 >>> s = [1e-3, 1, 2, 3, 4] # non-zero singular values 

216 >>> u = orthogonal[:, :5] # left singular vectors 

217 >>> vT = orthogonal[:, 5:].T # right singular vectors 

218 >>> A = u @ np.diag(s) @ vT 

219 

220 With only four singular values/vectors, the SVD approximates the original 

221 matrix. 

222 

223 >>> u4, s4, vT4 = svds(A, k=4) 

224 >>> A4 = u4 @ np.diag(s4) @ vT4 

225 >>> np.allclose(A4, A, atol=1e-3) 

226 True 

227 

228 With all five non-zero singular values/vectors, we can reproduce 

229 the original matrix more accurately. 

230 

231 >>> u5, s5, vT5 = svds(A, k=5) 

232 >>> A5 = u5 @ np.diag(s5) @ vT5 

233 >>> np.allclose(A5, A) 

234 True 

235 

236 The singular values match the expected singular values. 

237 

238 >>> np.allclose(s5, s) 

239 True 

240 

241 Since the singular values are not close to each other in this example, 

242 every singular vector matches as expected up to a difference in sign. 

243 

244 >>> (np.allclose(np.abs(u5), np.abs(u)) and 

245 ... np.allclose(np.abs(vT5), np.abs(vT))) 

246 True 

247 

248 The singular vectors are also orthogonal. 

249 

250 >>> (np.allclose(u5.T @ u5, np.eye(5)) and 

251 ... np.allclose(vT5 @ vT5.T, np.eye(5))) 

252 True 

253 

254 If there are (nearly) multiple singular values, the corresponding 

255 individual singular vectors may be unstable, but the whole invariant 

256 subspace containing all such singular vectors is computed accurately 

257 as can be measured by angles between subspaces via 'subspace_angles'. 

258 

259 >>> rng = np.random.default_rng(178686584221410808734965903901790843963) 

260 >>> s = [1, 1 + 1e-6] # non-zero singular values 

261 >>> u, _ = np.linalg.qr(rng.standard_normal((99, 2))) 

262 >>> v, _ = np.linalg.qr(rng.standard_normal((99, 2))) 

263 >>> vT = v.T 

264 >>> A = u @ np.diag(s) @ vT 

265 >>> A = A.astype(np.float32) 

266 >>> u2, s2, vT2 = svds(A, k=2, random_state=rng) 

267 >>> np.allclose(s2, s) 

268 True 

269 

270 The angles between the individual exact and computed singular vectors 

271 may not be so small. To check use: 

272 

273 >>> (linalg.subspace_angles(u2[:, :1], u[:, :1]) + 

274 ... linalg.subspace_angles(u2[:, 1:], u[:, 1:])) 

275 array([0.06562513]) # may vary 

276 >>> (linalg.subspace_angles(vT2[:1, :].T, vT[:1, :].T) + 

277 ... linalg.subspace_angles(vT2[1:, :].T, vT[1:, :].T)) 

278 array([0.06562507]) # may vary 

279 

280 As opposed to the angles between the 2-dimensional invariant subspaces 

281 that these vectors span, which are small for rights singular vectors 

282 

283 >>> linalg.subspace_angles(u2, u).sum() < 1e-6 

284 True 

285 

286 as well as for left singular vectors. 

287 

288 >>> linalg.subspace_angles(vT2.T, vT.T).sum() < 1e-6 

289 True 

290 

291 The next example follows that of 'sklearn.decomposition.TruncatedSVD'. 

292 

293 >>> rng = np.random.RandomState(0) 

294 >>> X_dense = rng.random(size=(100, 100)) 

295 >>> X_dense[:, 2 * np.arange(50)] = 0 

296 >>> X = sparse.csr_matrix(X_dense) 

297 >>> _, singular_values, _ = svds(X, k=5, random_state=rng) 

298 >>> print(singular_values) 

299 [ 4.3293... 4.4491... 4.5420... 4.5987... 35.2410...] 

300 

301 The function can be called without the transpose of the input matrix 

302 ever explicitly constructed. 

303 

304 >>> rng = np.random.default_rng(102524723947864966825913730119128190974) 

305 >>> G = sparse.rand(8, 9, density=0.5, random_state=rng) 

306 >>> Glo = aslinearoperator(G) 

307 >>> _, singular_values_svds, _ = svds(Glo, k=5, random_state=rng) 

308 >>> _, singular_values_svd, _ = linalg.svd(G.toarray()) 

309 >>> np.allclose(singular_values_svds, singular_values_svd[-4::-1]) 

310 True 

311 

312 The most memory efficient scenario is where neither 

313 the original matrix, nor its transpose, is explicitly constructed. 

314 Our example computes the smallest singular values and vectors 

315 of 'LinearOperator' constructed from the numpy function 'np.diff' used 

316 column-wise to be consistent with 'LinearOperator' operating on columns. 

317 

318 >>> diff0 = lambda a: np.diff(a, axis=0) 

319 

320 Let us create the matrix from 'diff0' to be used for validation only. 

321 

322 >>> n = 5 # The dimension of the space. 

323 >>> M_from_diff0 = diff0(np.eye(n)) 

324 >>> print(M_from_diff0.astype(int)) 

325 [[-1 1 0 0 0] 

326 [ 0 -1 1 0 0] 

327 [ 0 0 -1 1 0] 

328 [ 0 0 0 -1 1]] 

329 

330 The matrix 'M_from_diff0' is bi-diagonal and could be alternatively 

331 created directly by 

332 

333 >>> M = - np.eye(n - 1, n, dtype=int) 

334 >>> np.fill_diagonal(M[:,1:], 1) 

335 >>> np.allclose(M, M_from_diff0) 

336 True 

337 

338 Its transpose 

339 

340 >>> print(M.T) 

341 [[-1 0 0 0] 

342 [ 1 -1 0 0] 

343 [ 0 1 -1 0] 

344 [ 0 0 1 -1] 

345 [ 0 0 0 1]] 

346 

347 can be viewed as the incidence matrix; see 

348 Incidence matrix, (2022, Nov. 19), Wikipedia, https://w.wiki/5YXU, 

349 of a linear graph with 5 vertices and 4 edges. The 5x5 normal matrix 

350 ``M.T @ M`` thus is 

351 

352 >>> print(M.T @ M) 

353 [[ 1 -1 0 0 0] 

354 [-1 2 -1 0 0] 

355 [ 0 -1 2 -1 0] 

356 [ 0 0 -1 2 -1] 

357 [ 0 0 0 -1 1]] 

358 

359 the graph Laplacian, while the actually used in 'svds' smaller size 

360 4x4 normal matrix ``M @ M.T`` 

361 

362 >>> print(M @ M.T) 

363 [[ 2 -1 0 0] 

364 [-1 2 -1 0] 

365 [ 0 -1 2 -1] 

366 [ 0 0 -1 2]] 

367 

368 is the so-called edge-based Laplacian; see 

369 Symmetric Laplacian via the incidence matrix, in Laplacian matrix, 

370 (2022, Nov. 19), Wikipedia, https://w.wiki/5YXW. 

371 

372 The 'LinearOperator' setup needs the options 'rmatvec' and 'rmatmat' 

373 of multiplication by the matrix transpose ``M.T``, but we want to be 

374 matrix-free to save memory, so knowing how ``M.T`` looks like, we 

375 manually construct the following function to be 

376 used in ``rmatmat=diff0t``. 

377 

378 >>> def diff0t(a): 

379 ... if a.ndim == 1: 

380 ... a = a[:,np.newaxis] # Turn 1D into 2D array 

381 ... d = np.zeros((a.shape[0] + 1, a.shape[1]), dtype=a.dtype) 

382 ... d[0, :] = - a[0, :] 

383 ... d[1:-1, :] = a[0:-1, :] - a[1:, :] 

384 ... d[-1, :] = a[-1, :] 

385 ... return d 

386 

387 We check that our function 'diff0t' for the matrix transpose is valid. 

388 

389 >>> np.allclose(M.T, diff0t(np.eye(n-1))) 

390 True 

391 

392 Now we setup our matrix-free 'LinearOperator' called 'diff0_func_aslo' 

393 and for validation the matrix-based 'diff0_matrix_aslo'. 

394 

395 >>> def diff0_func_aslo_def(n): 

396 ... return LinearOperator(matvec=diff0, 

397 ... matmat=diff0, 

398 ... rmatvec=diff0t, 

399 ... rmatmat=diff0t, 

400 ... shape=(n - 1, n)) 

401 >>> diff0_func_aslo = diff0_func_aslo_def(n) 

402 >>> diff0_matrix_aslo = aslinearoperator(M_from_diff0) 

403 

404 And validate both the matrix and its transpose in 'LinearOperator'. 

405 

406 >>> np.allclose(diff0_func_aslo(np.eye(n)), 

407 ... diff0_matrix_aslo(np.eye(n))) 

408 True 

409 >>> np.allclose(diff0_func_aslo.T(np.eye(n-1)), 

410 ... diff0_matrix_aslo.T(np.eye(n-1))) 

411 True 

412 

413 Having the 'LinearOperator' setup validated, we run the solver. 

414 

415 >>> n = 100 

416 >>> diff0_func_aslo = diff0_func_aslo_def(n) 

417 >>> u, s, vT = svds(diff0_func_aslo, k=3, which='SM') 

418 

419 The singular values squared and the singular vectors are known 

420 explicitly; see 

421 Pure Dirichlet boundary conditions, in 

422 Eigenvalues and eigenvectors of the second derivative, 

423 (2022, Nov. 19), Wikipedia, https://w.wiki/5YX6, 

424 since 'diff' corresponds to first 

425 derivative, and its smaller size n-1 x n-1 normal matrix 

426 ``M @ M.T`` represent the discrete second derivative with the Dirichlet 

427 boundary conditions. We use these analytic expressions for validation. 

428 

429 >>> se = 2. * np.sin(np.pi * np.arange(1, 4) / (2. * n)) 

430 >>> ue = np.sqrt(2 / n) * np.sin(np.pi * np.outer(np.arange(1, n), 

431 ... np.arange(1, 4)) / n) 

432 >>> np.allclose(s, se, atol=1e-3) 

433 True 

434 >>> print(np.allclose(np.abs(u), np.abs(ue), atol=1e-6)) 

435 True 

436 

437 """ 

438 args = _iv(A, k, ncv, tol, which, v0, maxiter, return_singular_vectors, 

439 solver, random_state) 

440 (A, k, ncv, tol, which, v0, maxiter, 

441 return_singular_vectors, solver, random_state) = args 

442 

443 largest = (which == 'LM') 

444 n, m = A.shape 

445 

446 if n >= m: 

447 X_dot = A.matvec 

448 X_matmat = A.matmat 

449 XH_dot = A.rmatvec 

450 XH_mat = A.rmatmat 

451 transpose = False 

452 else: 

453 X_dot = A.rmatvec 

454 X_matmat = A.rmatmat 

455 XH_dot = A.matvec 

456 XH_mat = A.matmat 

457 transpose = True 

458 

459 dtype = getattr(A, 'dtype', None) 

460 if dtype is None: 

461 dtype = A.dot(np.zeros([m, 1])).dtype 

462 

463 def matvec_XH_X(x): 

464 return XH_dot(X_dot(x)) 

465 

466 def matmat_XH_X(x): 

467 return XH_mat(X_matmat(x)) 

468 

469 XH_X = LinearOperator(matvec=matvec_XH_X, dtype=A.dtype, 

470 matmat=matmat_XH_X, 

471 shape=(min(A.shape), min(A.shape))) 

472 

473 # Get a low rank approximation of the implicitly defined gramian matrix. 

474 # This is not a stable way to approach the problem. 

475 if solver == 'lobpcg': 

476 

477 if k == 1 and v0 is not None: 

478 X = np.reshape(v0, (-1, 1)) 

479 else: 

480 X = random_state.standard_normal(size=(min(A.shape), k)) 

481 

482 _, eigvec = lobpcg(XH_X, X, tol=tol ** 2, maxiter=maxiter, 

483 largest=largest) 

484 

485 elif solver == 'propack': 

486 jobu = return_singular_vectors in {True, 'u'} 

487 jobv = return_singular_vectors in {True, 'vh'} 

488 irl_mode = (which == 'SM') 

489 res = _svdp(A, k=k, tol=tol**2, which=which, maxiter=None, 

490 compute_u=jobu, compute_v=jobv, irl_mode=irl_mode, 

491 kmax=maxiter, v0=v0, random_state=random_state) 

492 

493 u, s, vh, _ = res # but we'll ignore bnd, the last output 

494 

495 # PROPACK order appears to be largest first. `svds` output order is not 

496 # guaranteed, according to documentation, but for ARPACK and LOBPCG 

497 # they actually are ordered smallest to largest, so reverse for 

498 # consistency. 

499 s = s[::-1] 

500 u = u[:, ::-1] 

501 vh = vh[::-1] 

502 

503 u = u if jobu else None 

504 vh = vh if jobv else None 

505 

506 if return_singular_vectors: 

507 return u, s, vh 

508 else: 

509 return s 

510 

511 elif solver == 'arpack' or solver is None: 

512 if v0 is None: 

513 v0 = random_state.standard_normal(size=(min(A.shape),)) 

514 _, eigvec = eigsh(XH_X, k=k, tol=tol ** 2, maxiter=maxiter, 

515 ncv=ncv, which=which, v0=v0) 

516 # arpack do not guarantee exactly orthonormal eigenvectors 

517 # for clustered eigenvalues, especially in complex arithmetic 

518 eigvec, _ = np.linalg.qr(eigvec) 

519 

520 # the eigenvectors eigvec must be orthonomal here; see gh-16712 

521 Av = X_matmat(eigvec) 

522 if not return_singular_vectors: 

523 s = svd(Av, compute_uv=False, overwrite_a=True) 

524 return s[::-1] 

525 

526 # compute the left singular vectors of X and update the right ones 

527 # accordingly 

528 u, s, vh = svd(Av, full_matrices=False, overwrite_a=True) 

529 u = u[:, ::-1] 

530 s = s[::-1] 

531 vh = vh[::-1] 

532 

533 jobu = return_singular_vectors in {True, 'u'} 

534 jobv = return_singular_vectors in {True, 'vh'} 

535 

536 if transpose: 

537 u_tmp = eigvec @ _herm(vh) if jobu else None 

538 vh = _herm(u) if jobv else None 

539 u = u_tmp 

540 else: 

541 if not jobu: 

542 u = None 

543 vh = vh @ _herm(eigvec) if jobv else None 

544 

545 return u, s, vh