Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/scikit_learn-1.4.dev0-py3.8-linux-x86_64.egg/sklearn/utils/extmath.py: 12%

267 statements  

« prev     ^ index     » next       coverage.py v7.3.2, created at 2023-12-12 06:31 +0000

1""" 

2The :mod:`sklearn.utils.extmath` module includes utilities to perform 

3optimal mathematical operations in scikit-learn that are not available in SciPy. 

4""" 

5# Authors: Gael Varoquaux 

6# Alexandre Gramfort 

7# Alexandre T. Passos 

8# Olivier Grisel 

9# Lars Buitinck 

10# Stefan van der Walt 

11# Kyle Kastner 

12# Giorgio Patrini 

13# License: BSD 3 clause 

14 

15import warnings 

16from functools import partial 

17from numbers import Integral 

18 

19import numpy as np 

20from scipy import linalg, sparse 

21 

22from ..utils import deprecated 

23from ..utils._param_validation import Interval, StrOptions, validate_params 

24from . import check_random_state 

25from ._array_api import _is_numpy_namespace, device, get_namespace 

26from .sparsefuncs_fast import csr_row_norms 

27from .validation import check_array 

28 

29 

30def squared_norm(x): 

31 """Squared Euclidean or Frobenius norm of x. 

32 

33 Faster than norm(x) ** 2. 

34 

35 Parameters 

36 ---------- 

37 x : array-like 

38 The input array which could be either be a vector or a 2 dimensional array. 

39 

40 Returns 

41 ------- 

42 float 

43 The Euclidean norm when x is a vector, the Frobenius norm when x 

44 is a matrix (2-d array). 

45 """ 

46 x = np.ravel(x, order="K") 

47 if np.issubdtype(x.dtype, np.integer): 

48 warnings.warn( 

49 ( 

50 "Array type is integer, np.dot may overflow. " 

51 "Data should be float type to avoid this issue" 

52 ), 

53 UserWarning, 

54 ) 

55 return np.dot(x, x) 

56 

57 

58def row_norms(X, squared=False): 

59 """Row-wise (squared) Euclidean norm of X. 

60 

61 Equivalent to np.sqrt((X * X).sum(axis=1)), but also supports sparse 

62 matrices and does not create an X.shape-sized temporary. 

63 

64 Performs no input validation. 

65 

66 Parameters 

67 ---------- 

68 X : array-like 

69 The input array. 

70 squared : bool, default=False 

71 If True, return squared norms. 

72 

73 Returns 

74 ------- 

75 array-like 

76 The row-wise (squared) Euclidean norm of X. 

77 """ 

78 if sparse.issparse(X): 

79 X = X.tocsr() 

80 norms = csr_row_norms(X) 

81 if not squared: 

82 norms = np.sqrt(norms) 

83 else: 

84 xp, _ = get_namespace(X) 

85 if _is_numpy_namespace(xp): 

86 X = np.asarray(X) 

87 norms = np.einsum("ij,ij->i", X, X) 

88 norms = xp.asarray(norms) 

89 else: 

90 norms = xp.sum(xp.multiply(X, X), axis=1) 

91 if not squared: 

92 norms = xp.sqrt(norms) 

93 return norms 

94 

95 

96def fast_logdet(A): 

97 """Compute logarithm of determinant of a square matrix. 

98 

99 The (natural) logarithm of the determinant of a square matrix 

100 is returned if det(A) is non-negative and well defined. 

101 If the determinant is zero or negative returns -Inf. 

102 

103 Equivalent to : np.log(np.det(A)) but more robust. 

104 

105 Parameters 

106 ---------- 

107 A : array_like of shape (n, n) 

108 The square matrix. 

109 

110 Returns 

111 ------- 

112 logdet : float 

113 When det(A) is strictly positive, log(det(A)) is returned. 

114 When det(A) is non-positive or not defined, then -inf is returned. 

115 

116 See Also 

117 -------- 

118 numpy.linalg.slogdet : Compute the sign and (natural) logarithm of the determinant 

119 of an array. 

120 

121 Examples 

122 -------- 

123 >>> import numpy as np 

124 >>> from sklearn.utils.extmath import fast_logdet 

125 >>> a = np.array([[5, 1], [2, 8]]) 

126 >>> fast_logdet(a) 

127 3.6375861597263857 

128 """ 

129 xp, _ = get_namespace(A) 

130 sign, ld = xp.linalg.slogdet(A) 

131 if not sign > 0: 

132 return -xp.inf 

133 return ld 

134 

135 

136def density(w): 

137 """Compute density of a sparse vector. 

138 

139 Parameters 

140 ---------- 

141 w : array-like 

142 The sparse vector. 

143 

144 Returns 

145 ------- 

146 float 

147 The density of w, between 0 and 1. 

148 """ 

149 if hasattr(w, "toarray"): 

150 d = float(w.nnz) / (w.shape[0] * w.shape[1]) 

151 else: 

152 d = 0 if w is None else float((w != 0).sum()) / w.size 

153 return d 

154 

155 

156def safe_sparse_dot(a, b, *, dense_output=False): 

157 """Dot product that handle the sparse matrix case correctly. 

158 

159 Parameters 

160 ---------- 

161 a : {ndarray, sparse matrix} 

162 b : {ndarray, sparse matrix} 

163 dense_output : bool, default=False 

164 When False, ``a`` and ``b`` both being sparse will yield sparse output. 

165 When True, output will always be a dense array. 

166 

167 Returns 

168 ------- 

169 dot_product : {ndarray, sparse matrix} 

170 Sparse if ``a`` and ``b`` are sparse and ``dense_output=False``. 

171 """ 

172 if a.ndim > 2 or b.ndim > 2: 

173 if sparse.issparse(a): 

174 # sparse is always 2D. Implies b is 3D+ 

175 # [i, j] @ [k, ..., l, m, n] -> [i, k, ..., l, n] 

176 b_ = np.rollaxis(b, -2) 

177 b_2d = b_.reshape((b.shape[-2], -1)) 

178 ret = a @ b_2d 

179 ret = ret.reshape(a.shape[0], *b_.shape[1:]) 

180 elif sparse.issparse(b): 

181 # sparse is always 2D. Implies a is 3D+ 

182 # [k, ..., l, m] @ [i, j] -> [k, ..., l, j] 

183 a_2d = a.reshape(-1, a.shape[-1]) 

184 ret = a_2d @ b 

185 ret = ret.reshape(*a.shape[:-1], b.shape[1]) 

186 else: 

187 ret = np.dot(a, b) 

188 else: 

189 ret = a @ b 

190 

191 if ( 

192 sparse.issparse(a) 

193 and sparse.issparse(b) 

194 and dense_output 

195 and hasattr(ret, "toarray") 

196 ): 

197 return ret.toarray() 

198 return ret 

199 

200 

201def randomized_range_finder( 

202 A, *, size, n_iter, power_iteration_normalizer="auto", random_state=None 

203): 

204 """Compute an orthonormal matrix whose range approximates the range of A. 

205 

206 Parameters 

207 ---------- 

208 A : 2D array 

209 The input data matrix. 

210 

211 size : int 

212 Size of the return array. 

213 

214 n_iter : int 

215 Number of power iterations used to stabilize the result. 

216 

217 power_iteration_normalizer : {'auto', 'QR', 'LU', 'none'}, default='auto' 

218 Whether the power iterations are normalized with step-by-step 

219 QR factorization (the slowest but most accurate), 'none' 

220 (the fastest but numerically unstable when `n_iter` is large, e.g. 

221 typically 5 or larger), or 'LU' factorization (numerically stable 

222 but can lose slightly in accuracy). The 'auto' mode applies no 

223 normalization if `n_iter` <= 2 and switches to LU otherwise. 

224 

225 .. versionadded:: 0.18 

226 

227 random_state : int, RandomState instance or None, default=None 

228 The seed of the pseudo random number generator to use when shuffling 

229 the data, i.e. getting the random vectors to initialize the algorithm. 

230 Pass an int for reproducible results across multiple function calls. 

231 See :term:`Glossary <random_state>`. 

232 

233 Returns 

234 ------- 

235 Q : ndarray 

236 A (size x size) projection matrix, the range of which 

237 approximates well the range of the input matrix A. 

238 

239 Notes 

240 ----- 

241 

242 Follows Algorithm 4.3 of 

243 :arxiv:`"Finding structure with randomness: 

244 Stochastic algorithms for constructing approximate matrix decompositions" 

245 <0909.4061>` 

246 Halko, et al. (2009) 

247 

248 An implementation of a randomized algorithm for principal component 

249 analysis 

250 A. Szlam et al. 2014 

251 """ 

252 xp, is_array_api_compliant = get_namespace(A) 

253 random_state = check_random_state(random_state) 

254 

255 # Generating normal random vectors with shape: (A.shape[1], size) 

256 # XXX: generate random number directly from xp if it's possible 

257 # one day. 

258 Q = xp.asarray(random_state.normal(size=(A.shape[1], size))) 

259 if hasattr(A, "dtype") and xp.isdtype(A.dtype, kind="real floating"): 

260 # Use float32 computation and components if A has a float32 dtype. 

261 Q = xp.astype(Q, A.dtype, copy=False) 

262 

263 # Move Q to device if needed only after converting to float32 if needed to 

264 # avoid allocating unnecessary memory on the device. 

265 

266 # Note: we cannot combine the astype and to_device operations in one go 

267 # using xp.asarray(..., dtype=dtype, device=device) because downcasting 

268 # from float64 to float32 in asarray might not always be accepted as only 

269 # casts following type promotion rules are guarateed to work. 

270 # https://github.com/data-apis/array-api/issues/647 

271 if is_array_api_compliant: 

272 Q = xp.asarray(Q, device=device(A)) 

273 

274 # Deal with "auto" mode 

275 if power_iteration_normalizer == "auto": 

276 if n_iter <= 2: 

277 power_iteration_normalizer = "none" 

278 elif is_array_api_compliant: 

279 # XXX: https://github.com/data-apis/array-api/issues/627 

280 warnings.warn( 

281 "Array API does not support LU factorization, falling back to QR" 

282 " instead. Set `power_iteration_normalizer='QR'` explicitly to silence" 

283 " this warning." 

284 ) 

285 power_iteration_normalizer = "QR" 

286 else: 

287 power_iteration_normalizer = "LU" 

288 elif power_iteration_normalizer == "LU" and is_array_api_compliant: 

289 raise ValueError( 

290 "Array API does not support LU factorization. Set " 

291 "`power_iteration_normalizer='QR'` instead." 

292 ) 

293 

294 if is_array_api_compliant: 

295 qr_normalizer = partial(xp.linalg.qr, mode="reduced") 

296 else: 

297 # Use scipy.linalg instead of numpy.linalg when not explicitly 

298 # using the Array API. 

299 qr_normalizer = partial(linalg.qr, mode="economic") 

300 

301 if power_iteration_normalizer == "QR": 

302 normalizer = qr_normalizer 

303 elif power_iteration_normalizer == "LU": 

304 normalizer = partial(linalg.lu, permute_l=True) 

305 else: 

306 normalizer = lambda x: (x, None) 

307 

308 # Perform power iterations with Q to further 'imprint' the top 

309 # singular vectors of A in Q 

310 for _ in range(n_iter): 

311 Q, _ = normalizer(A @ Q) 

312 Q, _ = normalizer(A.T @ Q) 

313 

314 # Sample the range of A using by linear projection of Q 

315 # Extract an orthonormal basis 

316 Q, _ = qr_normalizer(A @ Q) 

317 

318 return Q 

319 

320 

321@validate_params( 

322 { 

323 "M": [np.ndarray, "sparse matrix"], 

324 "n_components": [Interval(Integral, 1, None, closed="left")], 

325 "n_oversamples": [Interval(Integral, 0, None, closed="left")], 

326 "n_iter": [Interval(Integral, 0, None, closed="left"), StrOptions({"auto"})], 

327 "power_iteration_normalizer": [StrOptions({"auto", "QR", "LU", "none"})], 

328 "transpose": ["boolean", StrOptions({"auto"})], 

329 "flip_sign": ["boolean"], 

330 "random_state": ["random_state"], 

331 "svd_lapack_driver": [StrOptions({"gesdd", "gesvd"})], 

332 }, 

333 prefer_skip_nested_validation=True, 

334) 

335def randomized_svd( 

336 M, 

337 n_components, 

338 *, 

339 n_oversamples=10, 

340 n_iter="auto", 

341 power_iteration_normalizer="auto", 

342 transpose="auto", 

343 flip_sign=True, 

344 random_state=None, 

345 svd_lapack_driver="gesdd", 

346): 

347 """Compute a truncated randomized SVD. 

348 

349 This method solves the fixed-rank approximation problem described in [1]_ 

350 (problem (1.5), p5). 

351 

352 Parameters 

353 ---------- 

354 M : {ndarray, sparse matrix} 

355 Matrix to decompose. 

356 

357 n_components : int 

358 Number of singular values and vectors to extract. 

359 

360 n_oversamples : int, default=10 

361 Additional number of random vectors to sample the range of `M` so as 

362 to ensure proper conditioning. The total number of random vectors 

363 used to find the range of `M` is `n_components + n_oversamples`. Smaller 

364 number can improve speed but can negatively impact the quality of 

365 approximation of singular vectors and singular values. Users might wish 

366 to increase this parameter up to `2*k - n_components` where k is the 

367 effective rank, for large matrices, noisy problems, matrices with 

368 slowly decaying spectrums, or to increase precision accuracy. See [1]_ 

369 (pages 5, 23 and 26). 

370 

371 n_iter : int or 'auto', default='auto' 

372 Number of power iterations. It can be used to deal with very noisy 

373 problems. When 'auto', it is set to 4, unless `n_components` is small 

374 (< .1 * min(X.shape)) in which case `n_iter` is set to 7. 

375 This improves precision with few components. Note that in general 

376 users should rather increase `n_oversamples` before increasing `n_iter` 

377 as the principle of the randomized method is to avoid usage of these 

378 more costly power iterations steps. When `n_components` is equal 

379 or greater to the effective matrix rank and the spectrum does not 

380 present a slow decay, `n_iter=0` or `1` should even work fine in theory 

381 (see [1]_ page 9). 

382 

383 .. versionchanged:: 0.18 

384 

385 power_iteration_normalizer : {'auto', 'QR', 'LU', 'none'}, default='auto' 

386 Whether the power iterations are normalized with step-by-step 

387 QR factorization (the slowest but most accurate), 'none' 

388 (the fastest but numerically unstable when `n_iter` is large, e.g. 

389 typically 5 or larger), or 'LU' factorization (numerically stable 

390 but can lose slightly in accuracy). The 'auto' mode applies no 

391 normalization if `n_iter` <= 2 and switches to LU otherwise. 

392 

393 .. versionadded:: 0.18 

394 

395 transpose : bool or 'auto', default='auto' 

396 Whether the algorithm should be applied to M.T instead of M. The 

397 result should approximately be the same. The 'auto' mode will 

398 trigger the transposition if M.shape[1] > M.shape[0] since this 

399 implementation of randomized SVD tend to be a little faster in that 

400 case. 

401 

402 .. versionchanged:: 0.18 

403 

404 flip_sign : bool, default=True 

405 The output of a singular value decomposition is only unique up to a 

406 permutation of the signs of the singular vectors. If `flip_sign` is 

407 set to `True`, the sign ambiguity is resolved by making the largest 

408 loadings for each component in the left singular vectors positive. 

409 

410 random_state : int, RandomState instance or None, default='warn' 

411 The seed of the pseudo random number generator to use when 

412 shuffling the data, i.e. getting the random vectors to initialize 

413 the algorithm. Pass an int for reproducible results across multiple 

414 function calls. See :term:`Glossary <random_state>`. 

415 

416 .. versionchanged:: 1.2 

417 The default value changed from 0 to None. 

418 

419 svd_lapack_driver : {"gesdd", "gesvd"}, default="gesdd" 

420 Whether to use the more efficient divide-and-conquer approach 

421 (`"gesdd"`) or more general rectangular approach (`"gesvd"`) to compute 

422 the SVD of the matrix B, which is the projection of M into a low 

423 dimensional subspace, as described in [1]_. 

424 

425 .. versionadded:: 1.2 

426 

427 Returns 

428 ------- 

429 u : ndarray of shape (n_samples, n_components) 

430 Unitary matrix having left singular vectors with signs flipped as columns. 

431 s : ndarray of shape (n_components,) 

432 The singular values, sorted in non-increasing order. 

433 vh : ndarray of shape (n_components, n_features) 

434 Unitary matrix having right singular vectors with signs flipped as rows. 

435 

436 Notes 

437 ----- 

438 This algorithm finds a (usually very good) approximate truncated 

439 singular value decomposition using randomization to speed up the 

440 computations. It is particularly fast on large matrices on which 

441 you wish to extract only a small number of components. In order to 

442 obtain further speed up, `n_iter` can be set <=2 (at the cost of 

443 loss of precision). To increase the precision it is recommended to 

444 increase `n_oversamples`, up to `2*k-n_components` where k is the 

445 effective rank. Usually, `n_components` is chosen to be greater than k 

446 so increasing `n_oversamples` up to `n_components` should be enough. 

447 

448 References 

449 ---------- 

450 .. [1] :arxiv:`"Finding structure with randomness: 

451 Stochastic algorithms for constructing approximate matrix decompositions" 

452 <0909.4061>` 

453 Halko, et al. (2009) 

454 

455 .. [2] A randomized algorithm for the decomposition of matrices 

456 Per-Gunnar Martinsson, Vladimir Rokhlin and Mark Tygert 

457 

458 .. [3] An implementation of a randomized algorithm for principal component 

459 analysis A. Szlam et al. 2014 

460 

461 Examples 

462 -------- 

463 >>> import numpy as np 

464 >>> from sklearn.utils.extmath import randomized_svd 

465 >>> a = np.array([[1, 2, 3, 5], 

466 ... [3, 4, 5, 6], 

467 ... [7, 8, 9, 10]]) 

468 >>> U, s, Vh = randomized_svd(a, n_components=2, random_state=0) 

469 >>> U.shape, s.shape, Vh.shape 

470 ((3, 2), (2,), (2, 4)) 

471 """ 

472 if sparse.issparse(M) and M.format in ("lil", "dok"): 

473 warnings.warn( 

474 "Calculating SVD of a {} is expensive. " 

475 "csr_matrix is more efficient.".format(type(M).__name__), 

476 sparse.SparseEfficiencyWarning, 

477 ) 

478 

479 random_state = check_random_state(random_state) 

480 n_random = n_components + n_oversamples 

481 n_samples, n_features = M.shape 

482 

483 if n_iter == "auto": 

484 # Checks if the number of iterations is explicitly specified 

485 # Adjust n_iter. 7 was found a good compromise for PCA. See #5299 

486 n_iter = 7 if n_components < 0.1 * min(M.shape) else 4 

487 

488 if transpose == "auto": 

489 transpose = n_samples < n_features 

490 if transpose: 

491 # this implementation is a bit faster with smaller shape[1] 

492 M = M.T 

493 

494 Q = randomized_range_finder( 

495 M, 

496 size=n_random, 

497 n_iter=n_iter, 

498 power_iteration_normalizer=power_iteration_normalizer, 

499 random_state=random_state, 

500 ) 

501 

502 # project M to the (k + p) dimensional space using the basis vectors 

503 B = Q.T @ M 

504 

505 # compute the SVD on the thin matrix: (k + p) wide 

506 xp, is_array_api_compliant = get_namespace(B) 

507 if is_array_api_compliant: 

508 Uhat, s, Vt = xp.linalg.svd(B, full_matrices=False) 

509 else: 

510 # When when array_api_dispatch is disabled, rely on scipy.linalg 

511 # instead of numpy.linalg to avoid introducing a behavior change w.r.t. 

512 # previous versions of scikit-learn. 

513 Uhat, s, Vt = linalg.svd( 

514 B, full_matrices=False, lapack_driver=svd_lapack_driver 

515 ) 

516 del B 

517 U = Q @ Uhat 

518 

519 if flip_sign: 

520 if not transpose: 

521 U, Vt = svd_flip(U, Vt) 

522 else: 

523 # In case of transpose u_based_decision=false 

524 # to actually flip based on u and not v. 

525 U, Vt = svd_flip(U, Vt, u_based_decision=False) 

526 

527 if transpose: 

528 # transpose back the results according to the input convention 

529 return Vt[:n_components, :].T, s[:n_components], U[:, :n_components].T 

530 else: 

531 return U[:, :n_components], s[:n_components], Vt[:n_components, :] 

532 

533 

534def _randomized_eigsh( 

535 M, 

536 n_components, 

537 *, 

538 n_oversamples=10, 

539 n_iter="auto", 

540 power_iteration_normalizer="auto", 

541 selection="module", 

542 random_state=None, 

543): 

544 """Computes a truncated eigendecomposition using randomized methods 

545 

546 This method solves the fixed-rank approximation problem described in the 

547 Halko et al paper. 

548 

549 The choice of which components to select can be tuned with the `selection` 

550 parameter. 

551 

552 .. versionadded:: 0.24 

553 

554 Parameters 

555 ---------- 

556 M : ndarray or sparse matrix 

557 Matrix to decompose, it should be real symmetric square or complex 

558 hermitian 

559 

560 n_components : int 

561 Number of eigenvalues and vectors to extract. 

562 

563 n_oversamples : int, default=10 

564 Additional number of random vectors to sample the range of M so as 

565 to ensure proper conditioning. The total number of random vectors 

566 used to find the range of M is n_components + n_oversamples. Smaller 

567 number can improve speed but can negatively impact the quality of 

568 approximation of eigenvectors and eigenvalues. Users might wish 

569 to increase this parameter up to `2*k - n_components` where k is the 

570 effective rank, for large matrices, noisy problems, matrices with 

571 slowly decaying spectrums, or to increase precision accuracy. See Halko 

572 et al (pages 5, 23 and 26). 

573 

574 n_iter : int or 'auto', default='auto' 

575 Number of power iterations. It can be used to deal with very noisy 

576 problems. When 'auto', it is set to 4, unless `n_components` is small 

577 (< .1 * min(X.shape)) in which case `n_iter` is set to 7. 

578 This improves precision with few components. Note that in general 

579 users should rather increase `n_oversamples` before increasing `n_iter` 

580 as the principle of the randomized method is to avoid usage of these 

581 more costly power iterations steps. When `n_components` is equal 

582 or greater to the effective matrix rank and the spectrum does not 

583 present a slow decay, `n_iter=0` or `1` should even work fine in theory 

584 (see Halko et al paper, page 9). 

585 

586 power_iteration_normalizer : {'auto', 'QR', 'LU', 'none'}, default='auto' 

587 Whether the power iterations are normalized with step-by-step 

588 QR factorization (the slowest but most accurate), 'none' 

589 (the fastest but numerically unstable when `n_iter` is large, e.g. 

590 typically 5 or larger), or 'LU' factorization (numerically stable 

591 but can lose slightly in accuracy). The 'auto' mode applies no 

592 normalization if `n_iter` <= 2 and switches to LU otherwise. 

593 

594 selection : {'value', 'module'}, default='module' 

595 Strategy used to select the n components. When `selection` is `'value'` 

596 (not yet implemented, will become the default when implemented), the 

597 components corresponding to the n largest eigenvalues are returned. 

598 When `selection` is `'module'`, the components corresponding to the n 

599 eigenvalues with largest modules are returned. 

600 

601 random_state : int, RandomState instance, default=None 

602 The seed of the pseudo random number generator to use when shuffling 

603 the data, i.e. getting the random vectors to initialize the algorithm. 

604 Pass an int for reproducible results across multiple function calls. 

605 See :term:`Glossary <random_state>`. 

606 

607 Notes 

608 ----- 

609 This algorithm finds a (usually very good) approximate truncated 

610 eigendecomposition using randomized methods to speed up the computations. 

611 

612 This method is particularly fast on large matrices on which 

613 you wish to extract only a small number of components. In order to 

614 obtain further speed up, `n_iter` can be set <=2 (at the cost of 

615 loss of precision). To increase the precision it is recommended to 

616 increase `n_oversamples`, up to `2*k-n_components` where k is the 

617 effective rank. Usually, `n_components` is chosen to be greater than k 

618 so increasing `n_oversamples` up to `n_components` should be enough. 

619 

620 Strategy 'value': not implemented yet. 

621 Algorithms 5.3, 5.4 and 5.5 in the Halko et al paper should provide good 

622 candidates for a future implementation. 

623 

624 Strategy 'module': 

625 The principle is that for diagonalizable matrices, the singular values and 

626 eigenvalues are related: if t is an eigenvalue of A, then :math:`|t|` is a 

627 singular value of A. This method relies on a randomized SVD to find the n 

628 singular components corresponding to the n singular values with largest 

629 modules, and then uses the signs of the singular vectors to find the true 

630 sign of t: if the sign of left and right singular vectors are different 

631 then the corresponding eigenvalue is negative. 

632 

633 Returns 

634 ------- 

635 eigvals : 1D array of shape (n_components,) containing the `n_components` 

636 eigenvalues selected (see ``selection`` parameter). 

637 eigvecs : 2D array of shape (M.shape[0], n_components) containing the 

638 `n_components` eigenvectors corresponding to the `eigvals`, in the 

639 corresponding order. Note that this follows the `scipy.linalg.eigh` 

640 convention. 

641 

642 See Also 

643 -------- 

644 :func:`randomized_svd` 

645 

646 References 

647 ---------- 

648 * :arxiv:`"Finding structure with randomness: 

649 Stochastic algorithms for constructing approximate matrix decompositions" 

650 (Algorithm 4.3 for strategy 'module') <0909.4061>` 

651 Halko, et al. (2009) 

652 """ 

653 if selection == "value": # pragma: no cover 

654 # to do : an algorithm can be found in the Halko et al reference 

655 raise NotImplementedError() 

656 

657 elif selection == "module": 

658 # Note: no need for deterministic U and Vt (flip_sign=True), 

659 # as we only use the dot product UVt afterwards 

660 U, S, Vt = randomized_svd( 

661 M, 

662 n_components=n_components, 

663 n_oversamples=n_oversamples, 

664 n_iter=n_iter, 

665 power_iteration_normalizer=power_iteration_normalizer, 

666 flip_sign=False, 

667 random_state=random_state, 

668 ) 

669 

670 eigvecs = U[:, :n_components] 

671 eigvals = S[:n_components] 

672 

673 # Conversion of Singular values into Eigenvalues: 

674 # For any eigenvalue t, the corresponding singular value is |t|. 

675 # So if there is a negative eigenvalue t, the corresponding singular 

676 # value will be -t, and the left (U) and right (V) singular vectors 

677 # will have opposite signs. 

678 # Fastest way: see <https://stackoverflow.com/a/61974002/7262247> 

679 diag_VtU = np.einsum("ji,ij->j", Vt[:n_components, :], U[:, :n_components]) 

680 signs = np.sign(diag_VtU) 

681 eigvals = eigvals * signs 

682 

683 else: # pragma: no cover 

684 raise ValueError("Invalid `selection`: %r" % selection) 

685 

686 return eigvals, eigvecs 

687 

688 

689def weighted_mode(a, w, *, axis=0): 

690 """Return an array of the weighted modal (most common) value in the passed array. 

691 

692 If there is more than one such value, only the first is returned. 

693 The bin-count for the modal bins is also returned. 

694 

695 This is an extension of the algorithm in scipy.stats.mode. 

696 

697 Parameters 

698 ---------- 

699 a : array-like of shape (n_samples,) 

700 Array of which values to find mode(s). 

701 w : array-like of shape (n_samples,) 

702 Array of weights for each value. 

703 axis : int, default=0 

704 Axis along which to operate. Default is 0, i.e. the first axis. 

705 

706 Returns 

707 ------- 

708 vals : ndarray 

709 Array of modal values. 

710 score : ndarray 

711 Array of weighted counts for each mode. 

712 

713 See Also 

714 -------- 

715 scipy.stats.mode: Calculates the Modal (most common) value of array elements 

716 along specified axis. 

717 

718 Examples 

719 -------- 

720 >>> from sklearn.utils.extmath import weighted_mode 

721 >>> x = [4, 1, 4, 2, 4, 2] 

722 >>> weights = [1, 1, 1, 1, 1, 1] 

723 >>> weighted_mode(x, weights) 

724 (array([4.]), array([3.])) 

725 

726 The value 4 appears three times: with uniform weights, the result is 

727 simply the mode of the distribution. 

728 

729 >>> weights = [1, 3, 0.5, 1.5, 1, 2] # deweight the 4's 

730 >>> weighted_mode(x, weights) 

731 (array([2.]), array([3.5])) 

732 

733 The value 2 has the highest score: it appears twice with weights of 

734 1.5 and 2: the sum of these is 3.5. 

735 """ 

736 if axis is None: 

737 a = np.ravel(a) 

738 w = np.ravel(w) 

739 axis = 0 

740 else: 

741 a = np.asarray(a) 

742 w = np.asarray(w) 

743 

744 if a.shape != w.shape: 

745 w = np.full(a.shape, w, dtype=w.dtype) 

746 

747 scores = np.unique(np.ravel(a)) # get ALL unique values 

748 testshape = list(a.shape) 

749 testshape[axis] = 1 

750 oldmostfreq = np.zeros(testshape) 

751 oldcounts = np.zeros(testshape) 

752 for score in scores: 

753 template = np.zeros(a.shape) 

754 ind = a == score 

755 template[ind] = w[ind] 

756 counts = np.expand_dims(np.sum(template, axis), axis) 

757 mostfrequent = np.where(counts > oldcounts, score, oldmostfreq) 

758 oldcounts = np.maximum(counts, oldcounts) 

759 oldmostfreq = mostfrequent 

760 return mostfrequent, oldcounts 

761 

762 

763def cartesian(arrays, out=None): 

764 """Generate a cartesian product of input arrays. 

765 

766 Parameters 

767 ---------- 

768 arrays : list of array-like 

769 1-D arrays to form the cartesian product of. 

770 out : ndarray of shape (M, len(arrays)), default=None 

771 Array to place the cartesian product in. 

772 

773 Returns 

774 ------- 

775 out : ndarray of shape (M, len(arrays)) 

776 Array containing the cartesian products formed of input arrays. 

777 If not provided, the `dtype` of the output array is set to the most 

778 permissive `dtype` of the input arrays, according to NumPy type 

779 promotion. 

780 

781 .. versionadded:: 1.2 

782 Add support for arrays of different types. 

783 

784 Notes 

785 ----- 

786 This function may not be used on more than 32 arrays 

787 because the underlying numpy functions do not support it. 

788 

789 Examples 

790 -------- 

791 >>> from sklearn.utils.extmath import cartesian 

792 >>> cartesian(([1, 2, 3], [4, 5], [6, 7])) 

793 array([[1, 4, 6], 

794 [1, 4, 7], 

795 [1, 5, 6], 

796 [1, 5, 7], 

797 [2, 4, 6], 

798 [2, 4, 7], 

799 [2, 5, 6], 

800 [2, 5, 7], 

801 [3, 4, 6], 

802 [3, 4, 7], 

803 [3, 5, 6], 

804 [3, 5, 7]]) 

805 """ 

806 arrays = [np.asarray(x) for x in arrays] 

807 shape = (len(x) for x in arrays) 

808 

809 ix = np.indices(shape) 

810 ix = ix.reshape(len(arrays), -1).T 

811 

812 if out is None: 

813 dtype = np.result_type(*arrays) # find the most permissive dtype 

814 out = np.empty_like(ix, dtype=dtype) 

815 

816 for n, arr in enumerate(arrays): 

817 out[:, n] = arrays[n][ix[:, n]] 

818 

819 return out 

820 

821 

822def svd_flip(u, v, u_based_decision=True): 

823 """Sign correction to ensure deterministic output from SVD. 

824 

825 Adjusts the columns of u and the rows of v such that the loadings in the 

826 columns in u that are largest in absolute value are always positive. 

827 

828 If u_based_decision is False, then the same sign correction is applied to 

829 so that the rows in v that are largest in absolute value are always 

830 positive. 

831 

832 Parameters 

833 ---------- 

834 u : ndarray 

835 Parameters u and v are the output of `linalg.svd` or 

836 :func:`~sklearn.utils.extmath.randomized_svd`, with matching inner 

837 dimensions so one can compute `np.dot(u * s, v)`. 

838 

839 v : ndarray 

840 Parameters u and v are the output of `linalg.svd` or 

841 :func:`~sklearn.utils.extmath.randomized_svd`, with matching inner 

842 dimensions so one can compute `np.dot(u * s, v)`. The input v should 

843 really be called vt to be consistent with scipy's output. 

844 

845 u_based_decision : bool, default=True 

846 If True, use the columns of u as the basis for sign flipping. 

847 Otherwise, use the rows of v. The choice of which variable to base the 

848 decision on is generally algorithm dependent. 

849 

850 Returns 

851 ------- 

852 u_adjusted : ndarray 

853 Array u with adjusted columns and the same dimensions as u. 

854 

855 v_adjusted : ndarray 

856 Array v with adjusted rows and the same dimensions as v. 

857 """ 

858 xp, _ = get_namespace(u, v) 

859 device = getattr(u, "device", None) 

860 

861 if u_based_decision: 

862 # columns of u, rows of v, or equivalently rows of u.T and v 

863 max_abs_u_cols = xp.argmax(xp.abs(u.T), axis=1) 

864 shift = xp.arange(u.T.shape[0], device=device) 

865 indices = max_abs_u_cols + shift * u.T.shape[1] 

866 signs = xp.sign(xp.take(xp.reshape(u.T, (-1,)), indices, axis=0)) 

867 u *= signs[np.newaxis, :] 

868 v *= signs[:, np.newaxis] 

869 else: 

870 # rows of v, columns of u 

871 max_abs_v_rows = xp.argmax(xp.abs(v), axis=1) 

872 shift = xp.arange(v.shape[0], device=device) 

873 indices = max_abs_v_rows + shift * v.shape[1] 

874 signs = xp.sign(xp.take(xp.reshape(v, (-1,)), indices)) 

875 u *= signs[np.newaxis, :] 

876 v *= signs[:, np.newaxis] 

877 return u, v 

878 

879 

880# TODO(1.6): remove 

881@deprecated( # type: ignore 

882 "The function `log_logistic` is deprecated and will be removed in 1.6. " 

883 "Use `-np.logaddexp(0, -x)` instead." 

884) 

885def log_logistic(X, out=None): 

886 """Compute the log of the logistic function, ``log(1 / (1 + e ** -x))``. 

887 

888 This implementation is numerically stable and uses `-np.logaddexp(0, -x)`. 

889 

890 For the ordinary logistic function, use ``scipy.special.expit``. 

891 

892 Parameters 

893 ---------- 

894 X : array-like of shape (M, N) or (M,) 

895 Argument to the logistic function. 

896 

897 out : array-like of shape (M, N) or (M,), default=None 

898 Preallocated output array. 

899 

900 Returns 

901 ------- 

902 out : ndarray of shape (M, N) or (M,) 

903 Log of the logistic function evaluated at every point in x. 

904 

905 Notes 

906 ----- 

907 See the blog post describing this implementation: 

908 http://fa.bianp.net/blog/2013/numerical-optimizers-for-logistic-regression/ 

909 """ 

910 X = check_array(X, dtype=np.float64, ensure_2d=False) 

911 

912 if out is None: 

913 out = np.empty_like(X) 

914 

915 np.logaddexp(0, -X, out=out) 

916 out *= -1 

917 return out 

918 

919 

920def softmax(X, copy=True): 

921 """ 

922 Calculate the softmax function. 

923 

924 The softmax function is calculated by 

925 np.exp(X) / np.sum(np.exp(X), axis=1) 

926 

927 This will cause overflow when large values are exponentiated. 

928 Hence the largest value in each row is subtracted from each data 

929 point to prevent this. 

930 

931 Parameters 

932 ---------- 

933 X : array-like of float of shape (M, N) 

934 Argument to the logistic function. 

935 

936 copy : bool, default=True 

937 Copy X or not. 

938 

939 Returns 

940 ------- 

941 out : ndarray of shape (M, N) 

942 Softmax function evaluated at every point in x. 

943 """ 

944 xp, is_array_api_compliant = get_namespace(X) 

945 if copy: 

946 X = xp.asarray(X, copy=True) 

947 max_prob = xp.reshape(xp.max(X, axis=1), (-1, 1)) 

948 X -= max_prob 

949 

950 if _is_numpy_namespace(xp): 

951 # optimization for NumPy arrays 

952 np.exp(X, out=np.asarray(X)) 

953 else: 

954 # array_api does not have `out=` 

955 X = xp.exp(X) 

956 

957 sum_prob = xp.reshape(xp.sum(X, axis=1), (-1, 1)) 

958 X /= sum_prob 

959 return X 

960 

961 

962def make_nonnegative(X, min_value=0): 

963 """Ensure `X.min()` >= `min_value`. 

964 

965 Parameters 

966 ---------- 

967 X : array-like 

968 The matrix to make non-negative. 

969 min_value : float, default=0 

970 The threshold value. 

971 

972 Returns 

973 ------- 

974 array-like 

975 The thresholded array. 

976 

977 Raises 

978 ------ 

979 ValueError 

980 When X is sparse. 

981 """ 

982 min_ = X.min() 

983 if min_ < min_value: 

984 if sparse.issparse(X): 

985 raise ValueError( 

986 "Cannot make the data matrix" 

987 " nonnegative because it is sparse." 

988 " Adding a value to every entry would" 

989 " make it no longer sparse." 

990 ) 

991 X = X + (min_value - min_) 

992 return X 

993 

994 

995# Use at least float64 for the accumulating functions to avoid precision issue 

996# see https://github.com/numpy/numpy/issues/9393. The float64 is also retained 

997# as it is in case the float overflows 

998def _safe_accumulator_op(op, x, *args, **kwargs): 

999 """ 

1000 This function provides numpy accumulator functions with a float64 dtype 

1001 when used on a floating point input. This prevents accumulator overflow on 

1002 smaller floating point dtypes. 

1003 

1004 Parameters 

1005 ---------- 

1006 op : function 

1007 A numpy accumulator function such as np.mean or np.sum. 

1008 x : ndarray 

1009 A numpy array to apply the accumulator function. 

1010 *args : positional arguments 

1011 Positional arguments passed to the accumulator function after the 

1012 input x. 

1013 **kwargs : keyword arguments 

1014 Keyword arguments passed to the accumulator function. 

1015 

1016 Returns 

1017 ------- 

1018 result 

1019 The output of the accumulator function passed to this function. 

1020 """ 

1021 if np.issubdtype(x.dtype, np.floating) and x.dtype.itemsize < 8: 

1022 result = op(x, *args, **kwargs, dtype=np.float64) 

1023 else: 

1024 result = op(x, *args, **kwargs) 

1025 return result 

1026 

1027 

1028def _incremental_mean_and_var( 

1029 X, last_mean, last_variance, last_sample_count, sample_weight=None 

1030): 

1031 """Calculate mean update and a Youngs and Cramer variance update. 

1032 

1033 If sample_weight is given, the weighted mean and variance is computed. 

1034 

1035 Update a given mean and (possibly) variance according to new data given 

1036 in X. last_mean is always required to compute the new mean. 

1037 If last_variance is None, no variance is computed and None return for 

1038 updated_variance. 

1039 

1040 From the paper "Algorithms for computing the sample variance: analysis and 

1041 recommendations", by Chan, Golub, and LeVeque. 

1042 

1043 Parameters 

1044 ---------- 

1045 X : array-like of shape (n_samples, n_features) 

1046 Data to use for variance update. 

1047 

1048 last_mean : array-like of shape (n_features,) 

1049 

1050 last_variance : array-like of shape (n_features,) 

1051 

1052 last_sample_count : array-like of shape (n_features,) 

1053 The number of samples encountered until now if sample_weight is None. 

1054 If sample_weight is not None, this is the sum of sample_weight 

1055 encountered. 

1056 

1057 sample_weight : array-like of shape (n_samples,) or None 

1058 Sample weights. If None, compute the unweighted mean/variance. 

1059 

1060 Returns 

1061 ------- 

1062 updated_mean : ndarray of shape (n_features,) 

1063 

1064 updated_variance : ndarray of shape (n_features,) 

1065 None if last_variance was None. 

1066 

1067 updated_sample_count : ndarray of shape (n_features,) 

1068 

1069 Notes 

1070 ----- 

1071 NaNs are ignored during the algorithm. 

1072 

1073 References 

1074 ---------- 

1075 T. Chan, G. Golub, R. LeVeque. Algorithms for computing the sample 

1076 variance: recommendations, The American Statistician, Vol. 37, No. 3, 

1077 pp. 242-247 

1078 

1079 Also, see the sparse implementation of this in 

1080 `utils.sparsefuncs.incr_mean_variance_axis` and 

1081 `utils.sparsefuncs_fast.incr_mean_variance_axis0` 

1082 """ 

1083 # old = stats until now 

1084 # new = the current increment 

1085 # updated = the aggregated stats 

1086 last_sum = last_mean * last_sample_count 

1087 X_nan_mask = np.isnan(X) 

1088 if np.any(X_nan_mask): 

1089 sum_op = np.nansum 

1090 else: 

1091 sum_op = np.sum 

1092 if sample_weight is not None: 

1093 # equivalent to np.nansum(X * sample_weight, axis=0) 

1094 # safer because np.float64(X*W) != np.float64(X)*np.float64(W) 

1095 new_sum = _safe_accumulator_op( 

1096 np.matmul, sample_weight, np.where(X_nan_mask, 0, X) 

1097 ) 

1098 new_sample_count = _safe_accumulator_op( 

1099 np.sum, sample_weight[:, None] * (~X_nan_mask), axis=0 

1100 ) 

1101 else: 

1102 new_sum = _safe_accumulator_op(sum_op, X, axis=0) 

1103 n_samples = X.shape[0] 

1104 new_sample_count = n_samples - np.sum(X_nan_mask, axis=0) 

1105 

1106 updated_sample_count = last_sample_count + new_sample_count 

1107 

1108 updated_mean = (last_sum + new_sum) / updated_sample_count 

1109 

1110 if last_variance is None: 

1111 updated_variance = None 

1112 else: 

1113 T = new_sum / new_sample_count 

1114 temp = X - T 

1115 if sample_weight is not None: 

1116 # equivalent to np.nansum((X-T)**2 * sample_weight, axis=0) 

1117 # safer because np.float64(X*W) != np.float64(X)*np.float64(W) 

1118 correction = _safe_accumulator_op( 

1119 np.matmul, sample_weight, np.where(X_nan_mask, 0, temp) 

1120 ) 

1121 temp **= 2 

1122 new_unnormalized_variance = _safe_accumulator_op( 

1123 np.matmul, sample_weight, np.where(X_nan_mask, 0, temp) 

1124 ) 

1125 else: 

1126 correction = _safe_accumulator_op(sum_op, temp, axis=0) 

1127 temp **= 2 

1128 new_unnormalized_variance = _safe_accumulator_op(sum_op, temp, axis=0) 

1129 

1130 # correction term of the corrected 2 pass algorithm. 

1131 # See "Algorithms for computing the sample variance: analysis 

1132 # and recommendations", by Chan, Golub, and LeVeque. 

1133 new_unnormalized_variance -= correction**2 / new_sample_count 

1134 

1135 last_unnormalized_variance = last_variance * last_sample_count 

1136 

1137 with np.errstate(divide="ignore", invalid="ignore"): 

1138 last_over_new_count = last_sample_count / new_sample_count 

1139 updated_unnormalized_variance = ( 

1140 last_unnormalized_variance 

1141 + new_unnormalized_variance 

1142 + last_over_new_count 

1143 / updated_sample_count 

1144 * (last_sum / last_over_new_count - new_sum) ** 2 

1145 ) 

1146 

1147 zeros = last_sample_count == 0 

1148 updated_unnormalized_variance[zeros] = new_unnormalized_variance[zeros] 

1149 updated_variance = updated_unnormalized_variance / updated_sample_count 

1150 

1151 return updated_mean, updated_variance, updated_sample_count 

1152 

1153 

1154def _deterministic_vector_sign_flip(u): 

1155 """Modify the sign of vectors for reproducibility. 

1156 

1157 Flips the sign of elements of all the vectors (rows of u) such that 

1158 the absolute maximum element of each vector is positive. 

1159 

1160 Parameters 

1161 ---------- 

1162 u : ndarray 

1163 Array with vectors as its rows. 

1164 

1165 Returns 

1166 ------- 

1167 u_flipped : ndarray with same shape as u 

1168 Array with the sign flipped vectors as its rows. 

1169 """ 

1170 max_abs_rows = np.argmax(np.abs(u), axis=1) 

1171 signs = np.sign(u[range(u.shape[0]), max_abs_rows]) 

1172 u *= signs[:, np.newaxis] 

1173 return u 

1174 

1175 

1176def stable_cumsum(arr, axis=None, rtol=1e-05, atol=1e-08): 

1177 """Use high precision for cumsum and check that final value matches sum. 

1178 

1179 Warns if the final cumulative sum does not match the sum (up to the chosen 

1180 tolerance). 

1181 

1182 Parameters 

1183 ---------- 

1184 arr : array-like 

1185 To be cumulatively summed as flat. 

1186 axis : int, default=None 

1187 Axis along which the cumulative sum is computed. 

1188 The default (None) is to compute the cumsum over the flattened array. 

1189 rtol : float, default=1e-05 

1190 Relative tolerance, see ``np.allclose``. 

1191 atol : float, default=1e-08 

1192 Absolute tolerance, see ``np.allclose``. 

1193 

1194 Returns 

1195 ------- 

1196 out : ndarray 

1197 Array with the cumulative sums along the chosen axis. 

1198 """ 

1199 out = np.cumsum(arr, axis=axis, dtype=np.float64) 

1200 expected = np.sum(arr, axis=axis, dtype=np.float64) 

1201 if not np.allclose( 

1202 out.take(-1, axis=axis), expected, rtol=rtol, atol=atol, equal_nan=True 

1203 ): 

1204 warnings.warn( 

1205 ( 

1206 "cumsum was found to be unstable: " 

1207 "its last element does not correspond to sum" 

1208 ), 

1209 RuntimeWarning, 

1210 ) 

1211 return out 

1212 

1213 

1214def _nanaverage(a, weights=None): 

1215 """Compute the weighted average, ignoring NaNs. 

1216 

1217 Parameters 

1218 ---------- 

1219 a : ndarray 

1220 Array containing data to be averaged. 

1221 weights : array-like, default=None 

1222 An array of weights associated with the values in a. Each value in a 

1223 contributes to the average according to its associated weight. The 

1224 weights array can either be 1-D of the same shape as a. If `weights=None`, 

1225 then all data in a are assumed to have a weight equal to one. 

1226 

1227 Returns 

1228 ------- 

1229 weighted_average : float 

1230 The weighted average. 

1231 

1232 Notes 

1233 ----- 

1234 This wrapper to combine :func:`numpy.average` and :func:`numpy.nanmean`, so 

1235 that :func:`np.nan` values are ignored from the average and weights can 

1236 be passed. Note that when possible, we delegate to the prime methods. 

1237 """ 

1238 

1239 if len(a) == 0: 

1240 return np.nan 

1241 

1242 mask = np.isnan(a) 

1243 if mask.all(): 

1244 return np.nan 

1245 

1246 if weights is None: 

1247 return np.nanmean(a) 

1248 

1249 weights = np.array(weights, copy=False) 

1250 a, weights = a[~mask], weights[~mask] 

1251 try: 

1252 return np.average(a, weights=weights) 

1253 except ZeroDivisionError: 

1254 # this is when all weights are zero, then ignore them 

1255 return np.average(a)