Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/scikit_learn-1.4.dev0-py3.8-linux-x86

1"""

2The :mod:`sklearn.utils.extmath` module includes utilities to perform

3optimal mathematical operations in scikit-learn that are not available in SciPy.

4"""

5# Authors: Gael Varoquaux

6# Alexandre Gramfort

7# Alexandre T. Passos

8# Olivier Grisel

9# Lars Buitinck

10# Stefan van der Walt

11# Kyle Kastner

12# Giorgio Patrini

13# License: BSD 3 clause

15import warnings

16from functools import partial

17from numbers import Integral

19import numpy as np

20from scipy import linalg, sparse

22from ..utils import deprecated

23from ..utils._param_validation import Interval, StrOptions, validate_params

24from . import check_random_state

25from ._array_api import _is_numpy_namespace, device, get_namespace

26from .sparsefuncs_fast import csr_row_norms

27from .validation import check_array

30def squared_norm(x):

31 """Squared Euclidean or Frobenius norm of x.

33 Faster than norm(x) ** 2.

35 Parameters

36 ----------

37 x : array-like

38 The input array which could be either be a vector or a 2 dimensional array.

40 Returns

41 -------

42 float

43 The Euclidean norm when x is a vector, the Frobenius norm when x

44 is a matrix (2-d array).

45 """

46 x = np.ravel(x, order="K")

47 if np.issubdtype(x.dtype, np.integer):

48 warnings.warn(

49 (

50 "Array type is integer, np.dot may overflow. "

51 "Data should be float type to avoid this issue"

52 ),

53 UserWarning,

54 )

55 return np.dot(x, x)

58def row_norms(X, squared=False):

59 """Row-wise (squared) Euclidean norm of X.

61 Equivalent to np.sqrt((X * X).sum(axis=1)), but also supports sparse

62 matrices and does not create an X.shape-sized temporary.

64 Performs no input validation.

66 Parameters

67 ----------

68 X : array-like

69 The input array.

70 squared : bool, default=False

71 If True, return squared norms.

73 Returns

74 -------

75 array-like

76 The row-wise (squared) Euclidean norm of X.

77 """

78 if sparse.issparse(X):

79 X = X.tocsr()

80 norms = csr_row_norms(X)

81 if not squared:

82 norms = np.sqrt(norms)

83 else:

84 xp, _ = get_namespace(X)

85 if _is_numpy_namespace(xp):

86 X = np.asarray(X)

87 norms = np.einsum("ij,ij->i", X, X)

88 norms = xp.asarray(norms)

89 else:

90 norms = xp.sum(xp.multiply(X, X), axis=1)

91 if not squared:

92 norms = xp.sqrt(norms)

93 return norms

96def fast_logdet(A):

97 """Compute logarithm of determinant of a square matrix.

99 The (natural) logarithm of the determinant of a square matrix

100 is returned if det(A) is non-negative and well defined.

101 If the determinant is zero or negative returns -Inf.

102

103 Equivalent to : np.log(np.det(A)) but more robust.

104

105 Parameters

106 ----------

107 A : array_like of shape (n, n)

108 The square matrix.

109

110 Returns

111 -------

112 logdet : float

113 When det(A) is strictly positive, log(det(A)) is returned.

114 When det(A) is non-positive or not defined, then -inf is returned.

115

116 See Also

117 --------

118 numpy.linalg.slogdet : Compute the sign and (natural) logarithm of the determinant

119 of an array.

120

121 Examples

122 --------

123 >>> import numpy as np

124 >>> from sklearn.utils.extmath import fast_logdet

125 >>> a = np.array([[5, 1], [2, 8]])

126 >>> fast_logdet(a)

127 3.6375861597263857

128 """

129 xp, _ = get_namespace(A)

130 sign, ld = xp.linalg.slogdet(A)

131 if not sign > 0:

132 return -xp.inf

133 return ld

134

135

136def density(w):

137 """Compute density of a sparse vector.

138

139 Parameters

140 ----------

141 w : array-like

142 The sparse vector.

143

144 Returns

145 -------

146 float

147 The density of w, between 0 and 1.

148 """

149 if hasattr(w, "toarray"):

150 d = float(w.nnz) / (w.shape[0] * w.shape[1])

151 else:

152 d = 0 if w is None else float((w != 0).sum()) / w.size

153 return d

154

155

156def safe_sparse_dot(a, b, *, dense_output=False):

157 """Dot product that handle the sparse matrix case correctly.

158

159 Parameters

160 ----------

161 a : {ndarray, sparse matrix}

162 b : {ndarray, sparse matrix}

163 dense_output : bool, default=False

164 When False, ``a`` and ``b`` both being sparse will yield sparse output.

165 When True, output will always be a dense array.

166

167 Returns

168 -------

169 dot_product : {ndarray, sparse matrix}

170 Sparse if ``a`` and ``b`` are sparse and ``dense_output=False``.

171 """

172 if a.ndim > 2 or b.ndim > 2:

173 if sparse.issparse(a):

174 # sparse is always 2D. Implies b is 3D+

175 # [i, j] @ [k, ..., l, m, n] -> [i, k, ..., l, n]

176 b_ = np.rollaxis(b, -2)

177 b_2d = b_.reshape((b.shape[-2], -1))

178 ret = a @ b_2d

179 ret = ret.reshape(a.shape[0], *b_.shape[1:])

180 elif sparse.issparse(b):

181 # sparse is always 2D. Implies a is 3D+

182 # [k, ..., l, m] @ [i, j] -> [k, ..., l, j]

183 a_2d = a.reshape(-1, a.shape[-1])

184 ret = a_2d @ b

185 ret = ret.reshape(*a.shape[:-1], b.shape[1])

186 else:

187 ret = np.dot(a, b)

188 else:

189 ret = a @ b

190

191 if (

192 sparse.issparse(a)

193 and sparse.issparse(b)

194 and dense_output

195 and hasattr(ret, "toarray")

196 ):

197 return ret.toarray()

198 return ret

199

200

201def randomized_range_finder(

202 A, *, size, n_iter, power_iteration_normalizer="auto", random_state=None

203):

204 """Compute an orthonormal matrix whose range approximates the range of A.

205

206 Parameters

207 ----------

208 A : 2D array

209 The input data matrix.

210

211 size : int

212 Size of the return array.

213

214 n_iter : int

215 Number of power iterations used to stabilize the result.

216

217 power_iteration_normalizer : {'auto', 'QR', 'LU', 'none'}, default='auto'

218 Whether the power iterations are normalized with step-by-step

219 QR factorization (the slowest but most accurate), 'none'

220 (the fastest but numerically unstable when `n_iter` is large, e.g.

221 typically 5 or larger), or 'LU' factorization (numerically stable

222 but can lose slightly in accuracy). The 'auto' mode applies no

223 normalization if `n_iter` <= 2 and switches to LU otherwise.

224

225 .. versionadded:: 0.18

226

227 random_state : int, RandomState instance or None, default=None

228 The seed of the pseudo random number generator to use when shuffling

229 the data, i.e. getting the random vectors to initialize the algorithm.

230 Pass an int for reproducible results across multiple function calls.

231 See :term:`Glossary <random_state>`.

232

233 Returns

234 -------

235 Q : ndarray

236 A (size x size) projection matrix, the range of which

237 approximates well the range of the input matrix A.

238

239 Notes

240 -----

241

242 Follows Algorithm 4.3 of

243 :arxiv:`"Finding structure with randomness:

244 Stochastic algorithms for constructing approximate matrix decompositions"

245 <0909.4061>`

246 Halko, et al. (2009)

247

248 An implementation of a randomized algorithm for principal component

249 analysis

250 A. Szlam et al. 2014

251 """

252 xp, is_array_api_compliant = get_namespace(A)

253 random_state = check_random_state(random_state)

254

255 # Generating normal random vectors with shape: (A.shape[1], size)

256 # XXX: generate random number directly from xp if it's possible

257 # one day.

258 Q = xp.asarray(random_state.normal(size=(A.shape[1], size)))

259 if hasattr(A, "dtype") and xp.isdtype(A.dtype, kind="real floating"):

260 # Use float32 computation and components if A has a float32 dtype.

261 Q = xp.astype(Q, A.dtype, copy=False)

262

263 # Move Q to device if needed only after converting to float32 if needed to

264 # avoid allocating unnecessary memory on the device.

265

266 # Note: we cannot combine the astype and to_device operations in one go

267 # using xp.asarray(..., dtype=dtype, device=device) because downcasting

268 # from float64 to float32 in asarray might not always be accepted as only

269 # casts following type promotion rules are guarateed to work.

270 # https://github.com/data-apis/array-api/issues/647

271 if is_array_api_compliant:

272 Q = xp.asarray(Q, device=device(A))

273

274 # Deal with "auto" mode

275 if power_iteration_normalizer == "auto":

276 if n_iter <= 2:

277 power_iteration_normalizer = "none"

278 elif is_array_api_compliant:

279 # XXX: https://github.com/data-apis/array-api/issues/627

280 warnings.warn(

281 "Array API does not support LU factorization, falling back to QR"

282 " instead. Set `power_iteration_normalizer='QR'` explicitly to silence"

283 " this warning."

284 )

285 power_iteration_normalizer = "QR"

286 else:

287 power_iteration_normalizer = "LU"

288 elif power_iteration_normalizer == "LU" and is_array_api_compliant:

289 raise ValueError(

290 "Array API does not support LU factorization. Set "

291 "`power_iteration_normalizer='QR'` instead."

292 )

293

294 if is_array_api_compliant:

295 qr_normalizer = partial(xp.linalg.qr, mode="reduced")

296 else:

297 # Use scipy.linalg instead of numpy.linalg when not explicitly

298 # using the Array API.

299 qr_normalizer = partial(linalg.qr, mode="economic")

300

301 if power_iteration_normalizer == "QR":

302 normalizer = qr_normalizer

303 elif power_iteration_normalizer == "LU":

304 normalizer = partial(linalg.lu, permute_l=True)

305 else:

306 normalizer = lambda x: (x, None)

307

308 # Perform power iterations with Q to further 'imprint' the top

309 # singular vectors of A in Q

310 for _ in range(n_iter):

311 Q, _ = normalizer(A @ Q)

312 Q, _ = normalizer(A.T @ Q)

313

314 # Sample the range of A using by linear projection of Q

315 # Extract an orthonormal basis

316 Q, _ = qr_normalizer(A @ Q)

317

318 return Q

319

320

321@validate_params(

322 {

323 "M": [np.ndarray, "sparse matrix"],

324 "n_components": [Interval(Integral, 1, None, closed="left")],

325 "n_oversamples": [Interval(Integral, 0, None, closed="left")],

326 "n_iter": [Interval(Integral, 0, None, closed="left"), StrOptions({"auto"})],

327 "power_iteration_normalizer": [StrOptions({"auto", "QR", "LU", "none"})],

328 "transpose": ["boolean", StrOptions({"auto"})],

329 "flip_sign": ["boolean"],

330 "random_state": ["random_state"],

331 "svd_lapack_driver": [StrOptions({"gesdd", "gesvd"})],

332 },

333 prefer_skip_nested_validation=True,

334)

335def randomized_svd(

336 M,

337 n_components,

338 *,

339 n_oversamples=10,

340 n_iter="auto",

341 power_iteration_normalizer="auto",

342 transpose="auto",

343 flip_sign=True,

344 random_state=None,

345 svd_lapack_driver="gesdd",

346):

347 """Compute a truncated randomized SVD.

348

349 This method solves the fixed-rank approximation problem described in [1]_

350 (problem (1.5), p5).

351

352 Parameters

353 ----------

354 M : {ndarray, sparse matrix}

355 Matrix to decompose.

356

357 n_components : int

358 Number of singular values and vectors to extract.

359

360 n_oversamples : int, default=10

361 Additional number of random vectors to sample the range of `M` so as

362 to ensure proper conditioning. The total number of random vectors

363 used to find the range of `M` is `n_components + n_oversamples`. Smaller

364 number can improve speed but can negatively impact the quality of

365 approximation of singular vectors and singular values. Users might wish

366 to increase this parameter up to `2*k - n_components` where k is the

367 effective rank, for large matrices, noisy problems, matrices with

368 slowly decaying spectrums, or to increase precision accuracy. See [1]_

369 (pages 5, 23 and 26).

370

371 n_iter : int or 'auto', default='auto'

372 Number of power iterations. It can be used to deal with very noisy

373 problems. When 'auto', it is set to 4, unless `n_components` is small

374 (< .1 * min(X.shape)) in which case `n_iter` is set to 7.

375 This improves precision with few components. Note that in general

376 users should rather increase `n_oversamples` before increasing `n_iter`

377 as the principle of the randomized method is to avoid usage of these

378 more costly power iterations steps. When `n_components` is equal

379 or greater to the effective matrix rank and the spectrum does not

380 present a slow decay, `n_iter=0` or `1` should even work fine in theory

381 (see [1]_ page 9).

382

383 .. versionchanged:: 0.18

384

385 power_iteration_normalizer : {'auto', 'QR', 'LU', 'none'}, default='auto'

386 Whether the power iterations are normalized with step-by-step

387 QR factorization (the slowest but most accurate), 'none'

388 (the fastest but numerically unstable when `n_iter` is large, e.g.

389 typically 5 or larger), or 'LU' factorization (numerically stable

390 but can lose slightly in accuracy). The 'auto' mode applies no

391 normalization if `n_iter` <= 2 and switches to LU otherwise.

392

393 .. versionadded:: 0.18

394

395 transpose : bool or 'auto', default='auto'

396 Whether the algorithm should be applied to M.T instead of M. The

397 result should approximately be the same. The 'auto' mode will

398 trigger the transposition if M.shape[1] > M.shape[0] since this

399 implementation of randomized SVD tend to be a little faster in that

400 case.

401

402 .. versionchanged:: 0.18

403

404 flip_sign : bool, default=True

405 The output of a singular value decomposition is only unique up to a

406 permutation of the signs of the singular vectors. If `flip_sign` is

407 set to `True`, the sign ambiguity is resolved by making the largest

408 loadings for each component in the left singular vectors positive.

409

410 random_state : int, RandomState instance or None, default='warn'

411 The seed of the pseudo random number generator to use when

412 shuffling the data, i.e. getting the random vectors to initialize

413 the algorithm. Pass an int for reproducible results across multiple

414 function calls. See :term:`Glossary <random_state>`.

415

416 .. versionchanged:: 1.2

417 The default value changed from 0 to None.

418

419 svd_lapack_driver : {"gesdd", "gesvd"}, default="gesdd"

420 Whether to use the more efficient divide-and-conquer approach

421 (`"gesdd"`) or more general rectangular approach (`"gesvd"`) to compute

422 the SVD of the matrix B, which is the projection of M into a low

423 dimensional subspace, as described in [1]_.

424

425 .. versionadded:: 1.2

426

427 Returns

428 -------

429 u : ndarray of shape (n_samples, n_components)

430 Unitary matrix having left singular vectors with signs flipped as columns.

431 s : ndarray of shape (n_components,)

432 The singular values, sorted in non-increasing order.

433 vh : ndarray of shape (n_components, n_features)

434 Unitary matrix having right singular vectors with signs flipped as rows.

435

436 Notes

437 -----

438 This algorithm finds a (usually very good) approximate truncated

439 singular value decomposition using randomization to speed up the

440 computations. It is particularly fast on large matrices on which

441 you wish to extract only a small number of components. In order to

442 obtain further speed up, `n_iter` can be set <=2 (at the cost of

443 loss of precision). To increase the precision it is recommended to

444 increase `n_oversamples`, up to `2*k-n_components` where k is the

445 effective rank. Usually, `n_components` is chosen to be greater than k

446 so increasing `n_oversamples` up to `n_components` should be enough.

447

448 References

449 ----------

450 .. [1] :arxiv:`"Finding structure with randomness:

451 Stochastic algorithms for constructing approximate matrix decompositions"

452 <0909.4061>`

453 Halko, et al. (2009)

454

455 .. [2] A randomized algorithm for the decomposition of matrices

456 Per-Gunnar Martinsson, Vladimir Rokhlin and Mark Tygert

457

458 .. [3] An implementation of a randomized algorithm for principal component

459 analysis A. Szlam et al. 2014

460

461 Examples

462 --------

463 >>> import numpy as np

464 >>> from sklearn.utils.extmath import randomized_svd

465 >>> a = np.array([[1, 2, 3, 5],

466 ... [3, 4, 5, 6],

467 ... [7, 8, 9, 10]])

468 >>> U, s, Vh = randomized_svd(a, n_components=2, random_state=0)

469 >>> U.shape, s.shape, Vh.shape

470 ((3, 2), (2,), (2, 4))

471 """

472 if sparse.issparse(M) and M.format in ("lil", "dok"):

473 warnings.warn(

474 "Calculating SVD of a {} is expensive. "

475 "csr_matrix is more efficient.".format(type(M).__name__),

476 sparse.SparseEfficiencyWarning,

477 )

478

479 random_state = check_random_state(random_state)

480 n_random = n_components + n_oversamples

481 n_samples, n_features = M.shape

482

483 if n_iter == "auto":

484 # Checks if the number of iterations is explicitly specified

485 # Adjust n_iter. 7 was found a good compromise for PCA. See #5299

486 n_iter = 7 if n_components < 0.1 * min(M.shape) else 4

487

488 if transpose == "auto":

489 transpose = n_samples < n_features

490 if transpose:

491 # this implementation is a bit faster with smaller shape[1]

492 M = M.T

493

494 Q = randomized_range_finder(

495 M,

496 size=n_random,

497 n_iter=n_iter,

498 power_iteration_normalizer=power_iteration_normalizer,

499 random_state=random_state,

500 )

501

502 # project M to the (k + p) dimensional space using the basis vectors

503 B = Q.T @ M

504

505 # compute the SVD on the thin matrix: (k + p) wide

506 xp, is_array_api_compliant = get_namespace(B)

507 if is_array_api_compliant:

508 Uhat, s, Vt = xp.linalg.svd(B, full_matrices=False)

509 else:

510 # When when array_api_dispatch is disabled, rely on scipy.linalg

511 # instead of numpy.linalg to avoid introducing a behavior change w.r.t.

512 # previous versions of scikit-learn.

513 Uhat, s, Vt = linalg.svd(

514 B, full_matrices=False, lapack_driver=svd_lapack_driver

515 )

516 del B

517 U = Q @ Uhat

518

519 if flip_sign:

520 if not transpose:

521 U, Vt = svd_flip(U, Vt)

522 else:

523 # In case of transpose u_based_decision=false

524 # to actually flip based on u and not v.

525 U, Vt = svd_flip(U, Vt, u_based_decision=False)

526

527 if transpose:

528 # transpose back the results according to the input convention

529 return Vt[:n_components, :].T, s[:n_components], U[:, :n_components].T

530 else:

531 return U[:, :n_components], s[:n_components], Vt[:n_components, :]

532

533

534def _randomized_eigsh(

535 M,

536 n_components,

537 *,

538 n_oversamples=10,

539 n_iter="auto",

540 power_iteration_normalizer="auto",

541 selection="module",

542 random_state=None,

543):

544 """Computes a truncated eigendecomposition using randomized methods

545

546 This method solves the fixed-rank approximation problem described in the

547 Halko et al paper.

548

549 The choice of which components to select can be tuned with the `selection`

550 parameter.

551

552 .. versionadded:: 0.24

553

554 Parameters

555 ----------

556 M : ndarray or sparse matrix

557 Matrix to decompose, it should be real symmetric square or complex

558 hermitian

559

560 n_components : int

561 Number of eigenvalues and vectors to extract.

562

563 n_oversamples : int, default=10

564 Additional number of random vectors to sample the range of M so as

565 to ensure proper conditioning. The total number of random vectors

566 used to find the range of M is n_components + n_oversamples. Smaller

567 number can improve speed but can negatively impact the quality of

568 approximation of eigenvectors and eigenvalues. Users might wish

569 to increase this parameter up to `2*k - n_components` where k is the

570 effective rank, for large matrices, noisy problems, matrices with

571 slowly decaying spectrums, or to increase precision accuracy. See Halko

572 et al (pages 5, 23 and 26).

573

574 n_iter : int or 'auto', default='auto'

575 Number of power iterations. It can be used to deal with very noisy

576 problems. When 'auto', it is set to 4, unless `n_components` is small

577 (< .1 * min(X.shape)) in which case `n_iter` is set to 7.

578 This improves precision with few components. Note that in general

579 users should rather increase `n_oversamples` before increasing `n_iter`

580 as the principle of the randomized method is to avoid usage of these

581 more costly power iterations steps. When `n_components` is equal

582 or greater to the effective matrix rank and the spectrum does not

583 present a slow decay, `n_iter=0` or `1` should even work fine in theory

584 (see Halko et al paper, page 9).

585

586 power_iteration_normalizer : {'auto', 'QR', 'LU', 'none'}, default='auto'

587 Whether the power iterations are normalized with step-by-step

588 QR factorization (the slowest but most accurate), 'none'

589 (the fastest but numerically unstable when `n_iter` is large, e.g.

590 typically 5 or larger), or 'LU' factorization (numerically stable

591 but can lose slightly in accuracy). The 'auto' mode applies no

592 normalization if `n_iter` <= 2 and switches to LU otherwise.

593

594 selection : {'value', 'module'}, default='module'

595 Strategy used to select the n components. When `selection` is `'value'`

596 (not yet implemented, will become the default when implemented), the

597 components corresponding to the n largest eigenvalues are returned.

598 When `selection` is `'module'`, the components corresponding to the n

599 eigenvalues with largest modules are returned.

600

601 random_state : int, RandomState instance, default=None

602 The seed of the pseudo random number generator to use when shuffling

603 the data, i.e. getting the random vectors to initialize the algorithm.

604 Pass an int for reproducible results across multiple function calls.

605 See :term:`Glossary <random_state>`.

606

607 Notes

608 -----

609 This algorithm finds a (usually very good) approximate truncated

610 eigendecomposition using randomized methods to speed up the computations.

611

612 This method is particularly fast on large matrices on which

613 you wish to extract only a small number of components. In order to

614 obtain further speed up, `n_iter` can be set <=2 (at the cost of

615 loss of precision). To increase the precision it is recommended to

616 increase `n_oversamples`, up to `2*k-n_components` where k is the

617 effective rank. Usually, `n_components` is chosen to be greater than k

618 so increasing `n_oversamples` up to `n_components` should be enough.

619

620 Strategy 'value': not implemented yet.

621 Algorithms 5.3, 5.4 and 5.5 in the Halko et al paper should provide good

622 candidates for a future implementation.

623

624 Strategy 'module':

625 The principle is that for diagonalizable matrices, the singular values and

626 eigenvalues are related: if t is an eigenvalue of A, then :math:`|t|` is a

627 singular value of A. This method relies on a randomized SVD to find the n

628 singular components corresponding to the n singular values with largest

629 modules, and then uses the signs of the singular vectors to find the true

630 sign of t: if the sign of left and right singular vectors are different

631 then the corresponding eigenvalue is negative.

632

633 Returns

634 -------

635 eigvals : 1D array of shape (n_components,) containing the `n_components`

636 eigenvalues selected (see ``selection`` parameter).

637 eigvecs : 2D array of shape (M.shape[0], n_components) containing the

638 `n_components` eigenvectors corresponding to the `eigvals`, in the

639 corresponding order. Note that this follows the `scipy.linalg.eigh`

640 convention.

641

642 See Also

643 --------

644 :func:`randomized_svd`

645

646 References

647 ----------

648 * :arxiv:`"Finding structure with randomness:

649 Stochastic algorithms for constructing approximate matrix decompositions"

650 (Algorithm 4.3 for strategy 'module') <0909.4061>`

651 Halko, et al. (2009)

652 """

653 if selection == "value": # pragma: no cover

654 # to do : an algorithm can be found in the Halko et al reference

655 raise NotImplementedError()

656

657 elif selection == "module":

658 # Note: no need for deterministic U and Vt (flip_sign=True),

659 # as we only use the dot product UVt afterwards

660 U, S, Vt = randomized_svd(

661 M,

662 n_components=n_components,

663 n_oversamples=n_oversamples,

664 n_iter=n_iter,

665 power_iteration_normalizer=power_iteration_normalizer,

666 flip_sign=False,

667 random_state=random_state,

668 )

669

670 eigvecs = U[:, :n_components]

671 eigvals = S[:n_components]

672

673 # Conversion of Singular values into Eigenvalues:

674 # For any eigenvalue t, the corresponding singular value is |t|.

675 # So if there is a negative eigenvalue t, the corresponding singular

676 # value will be -t, and the left (U) and right (V) singular vectors

677 # will have opposite signs.

678 # Fastest way: see <https://stackoverflow.com/a/61974002/7262247>

679 diag_VtU = np.einsum("ji,ij->j", Vt[:n_components, :], U[:, :n_components])

680 signs = np.sign(diag_VtU)

681 eigvals = eigvals * signs

682

683 else: # pragma: no cover

684 raise ValueError("Invalid `selection`: %r" % selection)

685

686 return eigvals, eigvecs

687

688

689def weighted_mode(a, w, *, axis=0):

690 """Return an array of the weighted modal (most common) value in the passed array.

691

692 If there is more than one such value, only the first is returned.

693 The bin-count for the modal bins is also returned.

694

695 This is an extension of the algorithm in scipy.stats.mode.

696

697 Parameters

698 ----------

699 a : array-like of shape (n_samples,)

700 Array of which values to find mode(s).

701 w : array-like of shape (n_samples,)

702 Array of weights for each value.

703 axis : int, default=0

704 Axis along which to operate. Default is 0, i.e. the first axis.

705

706 Returns

707 -------

708 vals : ndarray

709 Array of modal values.

710 score : ndarray

711 Array of weighted counts for each mode.

712

713 See Also

714 --------

715 scipy.stats.mode: Calculates the Modal (most common) value of array elements

716 along specified axis.

717

718 Examples

719 --------

720 >>> from sklearn.utils.extmath import weighted_mode

721 >>> x = [4, 1, 4, 2, 4, 2]

722 >>> weights = [1, 1, 1, 1, 1, 1]

723 >>> weighted_mode(x, weights)

724 (array([4.]), array([3.]))

725

726 The value 4 appears three times: with uniform weights, the result is

727 simply the mode of the distribution.

728

729 >>> weights = [1, 3, 0.5, 1.5, 1, 2] # deweight the 4's

730 >>> weighted_mode(x, weights)

731 (array([2.]), array([3.5]))

732

733 The value 2 has the highest score: it appears twice with weights of

734 1.5 and 2: the sum of these is 3.5.

735 """

736 if axis is None:

737 a = np.ravel(a)

738 w = np.ravel(w)

739 axis = 0

740 else:

741 a = np.asarray(a)

742 w = np.asarray(w)

743

744 if a.shape != w.shape:

745 w = np.full(a.shape, w, dtype=w.dtype)

746

747 scores = np.unique(np.ravel(a)) # get ALL unique values

748 testshape = list(a.shape)

749 testshape[axis] = 1

750 oldmostfreq = np.zeros(testshape)

751 oldcounts = np.zeros(testshape)

752 for score in scores:

753 template = np.zeros(a.shape)

754 ind = a == score

755 template[ind] = w[ind]

756 counts = np.expand_dims(np.sum(template, axis), axis)

757 mostfrequent = np.where(counts > oldcounts, score, oldmostfreq)

758 oldcounts = np.maximum(counts, oldcounts)

759 oldmostfreq = mostfrequent

760 return mostfrequent, oldcounts

761

762

763def cartesian(arrays, out=None):

764 """Generate a cartesian product of input arrays.

765

766 Parameters

767 ----------

768 arrays : list of array-like

769 1-D arrays to form the cartesian product of.

770 out : ndarray of shape (M, len(arrays)), default=None

771 Array to place the cartesian product in.

772

773 Returns

774 -------

775 out : ndarray of shape (M, len(arrays))

776 Array containing the cartesian products formed of input arrays.

777 If not provided, the `dtype` of the output array is set to the most

778 permissive `dtype` of the input arrays, according to NumPy type

779 promotion.

780

781 .. versionadded:: 1.2

782 Add support for arrays of different types.

783

784 Notes

785 -----

786 This function may not be used on more than 32 arrays

787 because the underlying numpy functions do not support it.

788

789 Examples

790 --------

791 >>> from sklearn.utils.extmath import cartesian

792 >>> cartesian(([1, 2, 3], [4, 5], [6, 7]))

793 array([[1, 4, 6],

794 [1, 4, 7],

795 [1, 5, 6],

796 [1, 5, 7],

797 [2, 4, 6],

798 [2, 4, 7],

799 [2, 5, 6],

800 [2, 5, 7],

801 [3, 4, 6],

802 [3, 4, 7],

803 [3, 5, 6],

804 [3, 5, 7]])

805 """

806 arrays = [np.asarray(x) for x in arrays]

807 shape = (len(x) for x in arrays)

808

809 ix = np.indices(shape)

810 ix = ix.reshape(len(arrays), -1).T

811

812 if out is None:

813 dtype = np.result_type(*arrays) # find the most permissive dtype

814 out = np.empty_like(ix, dtype=dtype)

815

816 for n, arr in enumerate(arrays):

817 out[:, n] = arrays[n][ix[:, n]]

818

819 return out

820

821

822def svd_flip(u, v, u_based_decision=True):

823 """Sign correction to ensure deterministic output from SVD.

824

825 Adjusts the columns of u and the rows of v such that the loadings in the

826 columns in u that are largest in absolute value are always positive.

827

828 If u_based_decision is False, then the same sign correction is applied to

829 so that the rows in v that are largest in absolute value are always

830 positive.

831

832 Parameters

833 ----------

834 u : ndarray

835 Parameters u and v are the output of `linalg.svd` or

836 :func:`~sklearn.utils.extmath.randomized_svd`, with matching inner

837 dimensions so one can compute `np.dot(u * s, v)`.

838

839 v : ndarray

840 Parameters u and v are the output of `linalg.svd` or

841 :func:`~sklearn.utils.extmath.randomized_svd`, with matching inner

842 dimensions so one can compute `np.dot(u * s, v)`. The input v should

843 really be called vt to be consistent with scipy's output.

844

845 u_based_decision : bool, default=True

846 If True, use the columns of u as the basis for sign flipping.

847 Otherwise, use the rows of v. The choice of which variable to base the

848 decision on is generally algorithm dependent.

849

850 Returns

851 -------

852 u_adjusted : ndarray

853 Array u with adjusted columns and the same dimensions as u.

854

855 v_adjusted : ndarray

856 Array v with adjusted rows and the same dimensions as v.

857 """

858 xp, _ = get_namespace(u, v)

859 device = getattr(u, "device", None)

860

861 if u_based_decision:

862 # columns of u, rows of v, or equivalently rows of u.T and v

863 max_abs_u_cols = xp.argmax(xp.abs(u.T), axis=1)

864 shift = xp.arange(u.T.shape[0], device=device)

865 indices = max_abs_u_cols + shift * u.T.shape[1]

866 signs = xp.sign(xp.take(xp.reshape(u.T, (-1,)), indices, axis=0))

867 u *= signs[np.newaxis, :]

868 v *= signs[:, np.newaxis]

869 else:

870 # rows of v, columns of u

871 max_abs_v_rows = xp.argmax(xp.abs(v), axis=1)

872 shift = xp.arange(v.shape[0], device=device)

873 indices = max_abs_v_rows + shift * v.shape[1]

874 signs = xp.sign(xp.take(xp.reshape(v, (-1,)), indices))

875 u *= signs[np.newaxis, :]

876 v *= signs[:, np.newaxis]

877 return u, v

878

879

880# TODO(1.6): remove

881@deprecated( # type: ignore

882 "The function `log_logistic` is deprecated and will be removed in 1.6. "

883 "Use `-np.logaddexp(0, -x)` instead."

884)

885def log_logistic(X, out=None):

886 """Compute the log of the logistic function, ``log(1 / (1 + e ** -x))``.

887

888 This implementation is numerically stable and uses `-np.logaddexp(0, -x)`.

889

890 For the ordinary logistic function, use ``scipy.special.expit``.

891

892 Parameters

893 ----------

894 X : array-like of shape (M, N) or (M,)

895 Argument to the logistic function.

896

897 out : array-like of shape (M, N) or (M,), default=None

898 Preallocated output array.

899

900 Returns

901 -------

902 out : ndarray of shape (M, N) or (M,)

903 Log of the logistic function evaluated at every point in x.

904

905 Notes

906 -----

907 See the blog post describing this implementation:

908 http://fa.bianp.net/blog/2013/numerical-optimizers-for-logistic-regression/

909 """

910 X = check_array(X, dtype=np.float64, ensure_2d=False)

911

912 if out is None:

913 out = np.empty_like(X)

914

915 np.logaddexp(0, -X, out=out)

916 out *= -1

917 return out

918

919

920def softmax(X, copy=True):

921 """

922 Calculate the softmax function.

923

924 The softmax function is calculated by

925 np.exp(X) / np.sum(np.exp(X), axis=1)

926

927 This will cause overflow when large values are exponentiated.

928 Hence the largest value in each row is subtracted from each data

929 point to prevent this.

930

931 Parameters

932 ----------

933 X : array-like of float of shape (M, N)

934 Argument to the logistic function.

935

936 copy : bool, default=True

937 Copy X or not.

938

939 Returns

940 -------

941 out : ndarray of shape (M, N)

942 Softmax function evaluated at every point in x.

943 """

944 xp, is_array_api_compliant = get_namespace(X)

945 if copy:

946 X = xp.asarray(X, copy=True)

947 max_prob = xp.reshape(xp.max(X, axis=1), (-1, 1))

948 X -= max_prob

949

950 if _is_numpy_namespace(xp):

951 # optimization for NumPy arrays

952 np.exp(X, out=np.asarray(X))

953 else:

954 # array_api does not have `out=`

955 X = xp.exp(X)

956

957 sum_prob = xp.reshape(xp.sum(X, axis=1), (-1, 1))

958 X /= sum_prob

959 return X

960

961

962def make_nonnegative(X, min_value=0):

963 """Ensure `X.min()` >= `min_value`.

964

965 Parameters

966 ----------

967 X : array-like

968 The matrix to make non-negative.

969 min_value : float, default=0

970 The threshold value.

971

972 Returns

973 -------

974 array-like

975 The thresholded array.

976

977 Raises

978 ------

979 ValueError

980 When X is sparse.

981 """

982 min_ = X.min()

983 if min_ < min_value:

984 if sparse.issparse(X):

985 raise ValueError(

986 "Cannot make the data matrix"

987 " nonnegative because it is sparse."

988 " Adding a value to every entry would"

989 " make it no longer sparse."

990 )

991 X = X + (min_value - min_)

992 return X

993

994

995# Use at least float64 for the accumulating functions to avoid precision issue

996# see https://github.com/numpy/numpy/issues/9393. The float64 is also retained

997# as it is in case the float overflows

998def _safe_accumulator_op(op, x, *args, **kwargs):

999 """

1000 This function provides numpy accumulator functions with a float64 dtype

1001 when used on a floating point input. This prevents accumulator overflow on

1002 smaller floating point dtypes.

1003

1004 Parameters

1005 ----------

1006 op : function

1007 A numpy accumulator function such as np.mean or np.sum.

1008 x : ndarray

1009 A numpy array to apply the accumulator function.

1010 *args : positional arguments

1011 Positional arguments passed to the accumulator function after the

1012 input x.

1013 **kwargs : keyword arguments

1014 Keyword arguments passed to the accumulator function.

1015

1016 Returns

1017 -------

1018 result

1019 The output of the accumulator function passed to this function.

1020 """

1021 if np.issubdtype(x.dtype, np.floating) and x.dtype.itemsize < 8:

1022 result = op(x, *args, **kwargs, dtype=np.float64)

1023 else:

1024 result = op(x, *args, **kwargs)

1025 return result

1026

1027

1028def _incremental_mean_and_var(

1029 X, last_mean, last_variance, last_sample_count, sample_weight=None

1030):

1031 """Calculate mean update and a Youngs and Cramer variance update.

1032

1033 If sample_weight is given, the weighted mean and variance is computed.

1034

1035 Update a given mean and (possibly) variance according to new data given

1036 in X. last_mean is always required to compute the new mean.

1037 If last_variance is None, no variance is computed and None return for

1038 updated_variance.

1039

1040 From the paper "Algorithms for computing the sample variance: analysis and

1041 recommendations", by Chan, Golub, and LeVeque.

1042

1043 Parameters

1044 ----------

1045 X : array-like of shape (n_samples, n_features)

1046 Data to use for variance update.

1047

1048 last_mean : array-like of shape (n_features,)

1049

1050 last_variance : array-like of shape (n_features,)

1051

1052 last_sample_count : array-like of shape (n_features,)

1053 The number of samples encountered until now if sample_weight is None.

1054 If sample_weight is not None, this is the sum of sample_weight

1055 encountered.

1056

1057 sample_weight : array-like of shape (n_samples,) or None

1058 Sample weights. If None, compute the unweighted mean/variance.

1059

1060 Returns

1061 -------

1062 updated_mean : ndarray of shape (n_features,)

1063

1064 updated_variance : ndarray of shape (n_features,)

1065 None if last_variance was None.

1066

1067 updated_sample_count : ndarray of shape (n_features,)

1068

1069 Notes

1070 -----

1071 NaNs are ignored during the algorithm.

1072

1073 References

1074 ----------

1075 T. Chan, G. Golub, R. LeVeque. Algorithms for computing the sample

1076 variance: recommendations, The American Statistician, Vol. 37, No. 3,

1077 pp. 242-247

1078

1079 Also, see the sparse implementation of this in

1080 `utils.sparsefuncs.incr_mean_variance_axis` and

1081 `utils.sparsefuncs_fast.incr_mean_variance_axis0`

1082 """

1083 # old = stats until now

1084 # new = the current increment

1085 # updated = the aggregated stats

1086 last_sum = last_mean * last_sample_count

1087 X_nan_mask = np.isnan(X)

1088 if np.any(X_nan_mask):

1089 sum_op = np.nansum

1090 else:

1091 sum_op = np.sum

1092 if sample_weight is not None:

1093 # equivalent to np.nansum(X * sample_weight, axis=0)

1094 # safer because np.float64(X*W) != np.float64(X)*np.float64(W)

1095 new_sum = _safe_accumulator_op(

1096 np.matmul, sample_weight, np.where(X_nan_mask, 0, X)

1097 )

1098 new_sample_count = _safe_accumulator_op(

1099 np.sum, sample_weight[:, None] * (~X_nan_mask), axis=0

1100 )

1101 else:

1102 new_sum = _safe_accumulator_op(sum_op, X, axis=0)

1103 n_samples = X.shape[0]

1104 new_sample_count = n_samples - np.sum(X_nan_mask, axis=0)

1105

1106 updated_sample_count = last_sample_count + new_sample_count

1107

1108 updated_mean = (last_sum + new_sum) / updated_sample_count

1109

1110 if last_variance is None:

1111 updated_variance = None

1112 else:

1113 T = new_sum / new_sample_count

1114 temp = X - T

1115 if sample_weight is not None:

1116 # equivalent to np.nansum((X-T)**2 * sample_weight, axis=0)

1117 # safer because np.float64(X*W) != np.float64(X)*np.float64(W)

1118 correction = _safe_accumulator_op(

1119 np.matmul, sample_weight, np.where(X_nan_mask, 0, temp)

1120 )

1121 temp **= 2

1122 new_unnormalized_variance = _safe_accumulator_op(

1123 np.matmul, sample_weight, np.where(X_nan_mask, 0, temp)

1124 )

1125 else:

1126 correction = _safe_accumulator_op(sum_op, temp, axis=0)

1127 temp **= 2

1128 new_unnormalized_variance = _safe_accumulator_op(sum_op, temp, axis=0)

1129

1130 # correction term of the corrected 2 pass algorithm.

1131 # See "Algorithms for computing the sample variance: analysis

1132 # and recommendations", by Chan, Golub, and LeVeque.

1133 new_unnormalized_variance -= correction**2 / new_sample_count

1134

1135 last_unnormalized_variance = last_variance * last_sample_count

1136

1137 with np.errstate(divide="ignore", invalid="ignore"):

1138 last_over_new_count = last_sample_count / new_sample_count

1139 updated_unnormalized_variance = (

1140 last_unnormalized_variance

1141 + new_unnormalized_variance

1142 + last_over_new_count

1143 / updated_sample_count

1144 * (last_sum / last_over_new_count - new_sum) ** 2

1145 )

1146

1147 zeros = last_sample_count == 0

1148 updated_unnormalized_variance[zeros] = new_unnormalized_variance[zeros]

1149 updated_variance = updated_unnormalized_variance / updated_sample_count

1150

1151 return updated_mean, updated_variance, updated_sample_count

1152

1153

1154def _deterministic_vector_sign_flip(u):

1155 """Modify the sign of vectors for reproducibility.

1156

1157 Flips the sign of elements of all the vectors (rows of u) such that

1158 the absolute maximum element of each vector is positive.

1159

1160 Parameters

1161 ----------

1162 u : ndarray

1163 Array with vectors as its rows.

1164

1165 Returns

1166 -------

1167 u_flipped : ndarray with same shape as u

1168 Array with the sign flipped vectors as its rows.

1169 """

1170 max_abs_rows = np.argmax(np.abs(u), axis=1)

1171 signs = np.sign(u[range(u.shape[0]), max_abs_rows])

1172 u *= signs[:, np.newaxis]

1173 return u

1174

1175

1176def stable_cumsum(arr, axis=None, rtol=1e-05, atol=1e-08):

1177 """Use high precision for cumsum and check that final value matches sum.

1178

1179 Warns if the final cumulative sum does not match the sum (up to the chosen

1180 tolerance).

1181

1182 Parameters

1183 ----------

1184 arr : array-like

1185 To be cumulatively summed as flat.

1186 axis : int, default=None

1187 Axis along which the cumulative sum is computed.

1188 The default (None) is to compute the cumsum over the flattened array.

1189 rtol : float, default=1e-05

1190 Relative tolerance, see ``np.allclose``.

1191 atol : float, default=1e-08

1192 Absolute tolerance, see ``np.allclose``.

1193

1194 Returns

1195 -------

1196 out : ndarray

1197 Array with the cumulative sums along the chosen axis.

1198 """

1199 out = np.cumsum(arr, axis=axis, dtype=np.float64)

1200 expected = np.sum(arr, axis=axis, dtype=np.float64)

1201 if not np.allclose(

1202 out.take(-1, axis=axis), expected, rtol=rtol, atol=atol, equal_nan=True

1203 ):

1204 warnings.warn(

1205 (

1206 "cumsum was found to be unstable: "

1207 "its last element does not correspond to sum"

1208 ),

1209 RuntimeWarning,

1210 )

1211 return out

1212

1213

1214def _nanaverage(a, weights=None):

1215 """Compute the weighted average, ignoring NaNs.

1216

1217 Parameters

1218 ----------

1219 a : ndarray

1220 Array containing data to be averaged.

1221 weights : array-like, default=None

1222 An array of weights associated with the values in a. Each value in a

1223 contributes to the average according to its associated weight. The

1224 weights array can either be 1-D of the same shape as a. If `weights=None`,

1225 then all data in a are assumed to have a weight equal to one.

1226

1227 Returns

1228 -------

1229 weighted_average : float

1230 The weighted average.

1231

1232 Notes

1233 -----

1234 This wrapper to combine :func:`numpy.average` and :func:`numpy.nanmean`, so

1235 that :func:`np.nan` values are ignored from the average and weights can

1236 be passed. Note that when possible, we delegate to the prime methods.

1237 """

1238

1239 if len(a) == 0:

1240 return np.nan

1241

1242 mask = np.isnan(a)

1243 if mask.all():

1244 return np.nan

1245

1246 if weights is None:

1247 return np.nanmean(a)

1248

1249 weights = np.array(weights, copy=False)

1250 a, weights = a[~mask], weights[~mask]

1251 try:

1252 return np.average(a, weights=weights)

1253 except ZeroDivisionError:

1254 # this is when all weights are zero, then ignore them

1255 return np.average(a)

Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/scikit_learn-1.4.dev0-py3.8-linux-x86_64.egg/sklearn/utils/extmath.py: 12%

267 statements