1"""
2The :mod:`sklearn.utils.extmath` module includes utilities to perform
3optimal mathematical operations in scikit-learn that are not available in SciPy.
4"""
5# Authors: Gael Varoquaux
6# Alexandre Gramfort
7# Alexandre T. Passos
8# Olivier Grisel
9# Lars Buitinck
10# Stefan van der Walt
11# Kyle Kastner
12# Giorgio Patrini
13# License: BSD 3 clause
14
15import warnings
16from functools import partial
17from numbers import Integral
18
19import numpy as np
20from scipy import linalg, sparse
21
22from ..utils import deprecated
23from ..utils._param_validation import Interval, StrOptions, validate_params
24from . import check_random_state
25from ._array_api import _is_numpy_namespace, device, get_namespace
26from .sparsefuncs_fast import csr_row_norms
27from .validation import check_array
28
29
30def squared_norm(x):
31 """Squared Euclidean or Frobenius norm of x.
32
33 Faster than norm(x) ** 2.
34
35 Parameters
36 ----------
37 x : array-like
38 The input array which could be either be a vector or a 2 dimensional array.
39
40 Returns
41 -------
42 float
43 The Euclidean norm when x is a vector, the Frobenius norm when x
44 is a matrix (2-d array).
45 """
46 x = np.ravel(x, order="K")
47 if np.issubdtype(x.dtype, np.integer):
48 warnings.warn(
49 (
50 "Array type is integer, np.dot may overflow. "
51 "Data should be float type to avoid this issue"
52 ),
53 UserWarning,
54 )
55 return np.dot(x, x)
56
57
58def row_norms(X, squared=False):
59 """Row-wise (squared) Euclidean norm of X.
60
61 Equivalent to np.sqrt((X * X).sum(axis=1)), but also supports sparse
62 matrices and does not create an X.shape-sized temporary.
63
64 Performs no input validation.
65
66 Parameters
67 ----------
68 X : array-like
69 The input array.
70 squared : bool, default=False
71 If True, return squared norms.
72
73 Returns
74 -------
75 array-like
76 The row-wise (squared) Euclidean norm of X.
77 """
78 if sparse.issparse(X):
79 X = X.tocsr()
80 norms = csr_row_norms(X)
81 if not squared:
82 norms = np.sqrt(norms)
83 else:
84 xp, _ = get_namespace(X)
85 if _is_numpy_namespace(xp):
86 X = np.asarray(X)
87 norms = np.einsum("ij,ij->i", X, X)
88 norms = xp.asarray(norms)
89 else:
90 norms = xp.sum(xp.multiply(X, X), axis=1)
91 if not squared:
92 norms = xp.sqrt(norms)
93 return norms
94
95
96def fast_logdet(A):
97 """Compute logarithm of determinant of a square matrix.
98
99 The (natural) logarithm of the determinant of a square matrix
100 is returned if det(A) is non-negative and well defined.
101 If the determinant is zero or negative returns -Inf.
102
103 Equivalent to : np.log(np.det(A)) but more robust.
104
105 Parameters
106 ----------
107 A : array_like of shape (n, n)
108 The square matrix.
109
110 Returns
111 -------
112 logdet : float
113 When det(A) is strictly positive, log(det(A)) is returned.
114 When det(A) is non-positive or not defined, then -inf is returned.
115
116 See Also
117 --------
118 numpy.linalg.slogdet : Compute the sign and (natural) logarithm of the determinant
119 of an array.
120
121 Examples
122 --------
123 >>> import numpy as np
124 >>> from sklearn.utils.extmath import fast_logdet
125 >>> a = np.array([[5, 1], [2, 8]])
126 >>> fast_logdet(a)
127 3.6375861597263857
128 """
129 xp, _ = get_namespace(A)
130 sign, ld = xp.linalg.slogdet(A)
131 if not sign > 0:
132 return -xp.inf
133 return ld
134
135
136def density(w):
137 """Compute density of a sparse vector.
138
139 Parameters
140 ----------
141 w : array-like
142 The sparse vector.
143
144 Returns
145 -------
146 float
147 The density of w, between 0 and 1.
148 """
149 if hasattr(w, "toarray"):
150 d = float(w.nnz) / (w.shape[0] * w.shape[1])
151 else:
152 d = 0 if w is None else float((w != 0).sum()) / w.size
153 return d
154
155
156def safe_sparse_dot(a, b, *, dense_output=False):
157 """Dot product that handle the sparse matrix case correctly.
158
159 Parameters
160 ----------
161 a : {ndarray, sparse matrix}
162 b : {ndarray, sparse matrix}
163 dense_output : bool, default=False
164 When False, ``a`` and ``b`` both being sparse will yield sparse output.
165 When True, output will always be a dense array.
166
167 Returns
168 -------
169 dot_product : {ndarray, sparse matrix}
170 Sparse if ``a`` and ``b`` are sparse and ``dense_output=False``.
171 """
172 if a.ndim > 2 or b.ndim > 2:
173 if sparse.issparse(a):
174 # sparse is always 2D. Implies b is 3D+
175 # [i, j] @ [k, ..., l, m, n] -> [i, k, ..., l, n]
176 b_ = np.rollaxis(b, -2)
177 b_2d = b_.reshape((b.shape[-2], -1))
178 ret = a @ b_2d
179 ret = ret.reshape(a.shape[0], *b_.shape[1:])
180 elif sparse.issparse(b):
181 # sparse is always 2D. Implies a is 3D+
182 # [k, ..., l, m] @ [i, j] -> [k, ..., l, j]
183 a_2d = a.reshape(-1, a.shape[-1])
184 ret = a_2d @ b
185 ret = ret.reshape(*a.shape[:-1], b.shape[1])
186 else:
187 ret = np.dot(a, b)
188 else:
189 ret = a @ b
190
191 if (
192 sparse.issparse(a)
193 and sparse.issparse(b)
194 and dense_output
195 and hasattr(ret, "toarray")
196 ):
197 return ret.toarray()
198 return ret
199
200
201def randomized_range_finder(
202 A, *, size, n_iter, power_iteration_normalizer="auto", random_state=None
203):
204 """Compute an orthonormal matrix whose range approximates the range of A.
205
206 Parameters
207 ----------
208 A : 2D array
209 The input data matrix.
210
211 size : int
212 Size of the return array.
213
214 n_iter : int
215 Number of power iterations used to stabilize the result.
216
217 power_iteration_normalizer : {'auto', 'QR', 'LU', 'none'}, default='auto'
218 Whether the power iterations are normalized with step-by-step
219 QR factorization (the slowest but most accurate), 'none'
220 (the fastest but numerically unstable when `n_iter` is large, e.g.
221 typically 5 or larger), or 'LU' factorization (numerically stable
222 but can lose slightly in accuracy). The 'auto' mode applies no
223 normalization if `n_iter` <= 2 and switches to LU otherwise.
224
225 .. versionadded:: 0.18
226
227 random_state : int, RandomState instance or None, default=None
228 The seed of the pseudo random number generator to use when shuffling
229 the data, i.e. getting the random vectors to initialize the algorithm.
230 Pass an int for reproducible results across multiple function calls.
231 See :term:`Glossary <random_state>`.
232
233 Returns
234 -------
235 Q : ndarray
236 A (size x size) projection matrix, the range of which
237 approximates well the range of the input matrix A.
238
239 Notes
240 -----
241
242 Follows Algorithm 4.3 of
243 :arxiv:`"Finding structure with randomness:
244 Stochastic algorithms for constructing approximate matrix decompositions"
245 <0909.4061>`
246 Halko, et al. (2009)
247
248 An implementation of a randomized algorithm for principal component
249 analysis
250 A. Szlam et al. 2014
251 """
252 xp, is_array_api_compliant = get_namespace(A)
253 random_state = check_random_state(random_state)
254
255 # Generating normal random vectors with shape: (A.shape[1], size)
256 # XXX: generate random number directly from xp if it's possible
257 # one day.
258 Q = xp.asarray(random_state.normal(size=(A.shape[1], size)))
259 if hasattr(A, "dtype") and xp.isdtype(A.dtype, kind="real floating"):
260 # Use float32 computation and components if A has a float32 dtype.
261 Q = xp.astype(Q, A.dtype, copy=False)
262
263 # Move Q to device if needed only after converting to float32 if needed to
264 # avoid allocating unnecessary memory on the device.
265
266 # Note: we cannot combine the astype and to_device operations in one go
267 # using xp.asarray(..., dtype=dtype, device=device) because downcasting
268 # from float64 to float32 in asarray might not always be accepted as only
269 # casts following type promotion rules are guarateed to work.
270 # https://github.com/data-apis/array-api/issues/647
271 if is_array_api_compliant:
272 Q = xp.asarray(Q, device=device(A))
273
274 # Deal with "auto" mode
275 if power_iteration_normalizer == "auto":
276 if n_iter <= 2:
277 power_iteration_normalizer = "none"
278 elif is_array_api_compliant:
279 # XXX: https://github.com/data-apis/array-api/issues/627
280 warnings.warn(
281 "Array API does not support LU factorization, falling back to QR"
282 " instead. Set `power_iteration_normalizer='QR'` explicitly to silence"
283 " this warning."
284 )
285 power_iteration_normalizer = "QR"
286 else:
287 power_iteration_normalizer = "LU"
288 elif power_iteration_normalizer == "LU" and is_array_api_compliant:
289 raise ValueError(
290 "Array API does not support LU factorization. Set "
291 "`power_iteration_normalizer='QR'` instead."
292 )
293
294 if is_array_api_compliant:
295 qr_normalizer = partial(xp.linalg.qr, mode="reduced")
296 else:
297 # Use scipy.linalg instead of numpy.linalg when not explicitly
298 # using the Array API.
299 qr_normalizer = partial(linalg.qr, mode="economic")
300
301 if power_iteration_normalizer == "QR":
302 normalizer = qr_normalizer
303 elif power_iteration_normalizer == "LU":
304 normalizer = partial(linalg.lu, permute_l=True)
305 else:
306 normalizer = lambda x: (x, None)
307
308 # Perform power iterations with Q to further 'imprint' the top
309 # singular vectors of A in Q
310 for _ in range(n_iter):
311 Q, _ = normalizer(A @ Q)
312 Q, _ = normalizer(A.T @ Q)
313
314 # Sample the range of A using by linear projection of Q
315 # Extract an orthonormal basis
316 Q, _ = qr_normalizer(A @ Q)
317
318 return Q
319
320
321@validate_params(
322 {
323 "M": [np.ndarray, "sparse matrix"],
324 "n_components": [Interval(Integral, 1, None, closed="left")],
325 "n_oversamples": [Interval(Integral, 0, None, closed="left")],
326 "n_iter": [Interval(Integral, 0, None, closed="left"), StrOptions({"auto"})],
327 "power_iteration_normalizer": [StrOptions({"auto", "QR", "LU", "none"})],
328 "transpose": ["boolean", StrOptions({"auto"})],
329 "flip_sign": ["boolean"],
330 "random_state": ["random_state"],
331 "svd_lapack_driver": [StrOptions({"gesdd", "gesvd"})],
332 },
333 prefer_skip_nested_validation=True,
334)
335def randomized_svd(
336 M,
337 n_components,
338 *,
339 n_oversamples=10,
340 n_iter="auto",
341 power_iteration_normalizer="auto",
342 transpose="auto",
343 flip_sign=True,
344 random_state=None,
345 svd_lapack_driver="gesdd",
346):
347 """Compute a truncated randomized SVD.
348
349 This method solves the fixed-rank approximation problem described in [1]_
350 (problem (1.5), p5).
351
352 Parameters
353 ----------
354 M : {ndarray, sparse matrix}
355 Matrix to decompose.
356
357 n_components : int
358 Number of singular values and vectors to extract.
359
360 n_oversamples : int, default=10
361 Additional number of random vectors to sample the range of `M` so as
362 to ensure proper conditioning. The total number of random vectors
363 used to find the range of `M` is `n_components + n_oversamples`. Smaller
364 number can improve speed but can negatively impact the quality of
365 approximation of singular vectors and singular values. Users might wish
366 to increase this parameter up to `2*k - n_components` where k is the
367 effective rank, for large matrices, noisy problems, matrices with
368 slowly decaying spectrums, or to increase precision accuracy. See [1]_
369 (pages 5, 23 and 26).
370
371 n_iter : int or 'auto', default='auto'
372 Number of power iterations. It can be used to deal with very noisy
373 problems. When 'auto', it is set to 4, unless `n_components` is small
374 (< .1 * min(X.shape)) in which case `n_iter` is set to 7.
375 This improves precision with few components. Note that in general
376 users should rather increase `n_oversamples` before increasing `n_iter`
377 as the principle of the randomized method is to avoid usage of these
378 more costly power iterations steps. When `n_components` is equal
379 or greater to the effective matrix rank and the spectrum does not
380 present a slow decay, `n_iter=0` or `1` should even work fine in theory
381 (see [1]_ page 9).
382
383 .. versionchanged:: 0.18
384
385 power_iteration_normalizer : {'auto', 'QR', 'LU', 'none'}, default='auto'
386 Whether the power iterations are normalized with step-by-step
387 QR factorization (the slowest but most accurate), 'none'
388 (the fastest but numerically unstable when `n_iter` is large, e.g.
389 typically 5 or larger), or 'LU' factorization (numerically stable
390 but can lose slightly in accuracy). The 'auto' mode applies no
391 normalization if `n_iter` <= 2 and switches to LU otherwise.
392
393 .. versionadded:: 0.18
394
395 transpose : bool or 'auto', default='auto'
396 Whether the algorithm should be applied to M.T instead of M. The
397 result should approximately be the same. The 'auto' mode will
398 trigger the transposition if M.shape[1] > M.shape[0] since this
399 implementation of randomized SVD tend to be a little faster in that
400 case.
401
402 .. versionchanged:: 0.18
403
404 flip_sign : bool, default=True
405 The output of a singular value decomposition is only unique up to a
406 permutation of the signs of the singular vectors. If `flip_sign` is
407 set to `True`, the sign ambiguity is resolved by making the largest
408 loadings for each component in the left singular vectors positive.
409
410 random_state : int, RandomState instance or None, default='warn'
411 The seed of the pseudo random number generator to use when
412 shuffling the data, i.e. getting the random vectors to initialize
413 the algorithm. Pass an int for reproducible results across multiple
414 function calls. See :term:`Glossary <random_state>`.
415
416 .. versionchanged:: 1.2
417 The default value changed from 0 to None.
418
419 svd_lapack_driver : {"gesdd", "gesvd"}, default="gesdd"
420 Whether to use the more efficient divide-and-conquer approach
421 (`"gesdd"`) or more general rectangular approach (`"gesvd"`) to compute
422 the SVD of the matrix B, which is the projection of M into a low
423 dimensional subspace, as described in [1]_.
424
425 .. versionadded:: 1.2
426
427 Returns
428 -------
429 u : ndarray of shape (n_samples, n_components)
430 Unitary matrix having left singular vectors with signs flipped as columns.
431 s : ndarray of shape (n_components,)
432 The singular values, sorted in non-increasing order.
433 vh : ndarray of shape (n_components, n_features)
434 Unitary matrix having right singular vectors with signs flipped as rows.
435
436 Notes
437 -----
438 This algorithm finds a (usually very good) approximate truncated
439 singular value decomposition using randomization to speed up the
440 computations. It is particularly fast on large matrices on which
441 you wish to extract only a small number of components. In order to
442 obtain further speed up, `n_iter` can be set <=2 (at the cost of
443 loss of precision). To increase the precision it is recommended to
444 increase `n_oversamples`, up to `2*k-n_components` where k is the
445 effective rank. Usually, `n_components` is chosen to be greater than k
446 so increasing `n_oversamples` up to `n_components` should be enough.
447
448 References
449 ----------
450 .. [1] :arxiv:`"Finding structure with randomness:
451 Stochastic algorithms for constructing approximate matrix decompositions"
452 <0909.4061>`
453 Halko, et al. (2009)
454
455 .. [2] A randomized algorithm for the decomposition of matrices
456 Per-Gunnar Martinsson, Vladimir Rokhlin and Mark Tygert
457
458 .. [3] An implementation of a randomized algorithm for principal component
459 analysis A. Szlam et al. 2014
460
461 Examples
462 --------
463 >>> import numpy as np
464 >>> from sklearn.utils.extmath import randomized_svd
465 >>> a = np.array([[1, 2, 3, 5],
466 ... [3, 4, 5, 6],
467 ... [7, 8, 9, 10]])
468 >>> U, s, Vh = randomized_svd(a, n_components=2, random_state=0)
469 >>> U.shape, s.shape, Vh.shape
470 ((3, 2), (2,), (2, 4))
471 """
472 if sparse.issparse(M) and M.format in ("lil", "dok"):
473 warnings.warn(
474 "Calculating SVD of a {} is expensive. "
475 "csr_matrix is more efficient.".format(type(M).__name__),
476 sparse.SparseEfficiencyWarning,
477 )
478
479 random_state = check_random_state(random_state)
480 n_random = n_components + n_oversamples
481 n_samples, n_features = M.shape
482
483 if n_iter == "auto":
484 # Checks if the number of iterations is explicitly specified
485 # Adjust n_iter. 7 was found a good compromise for PCA. See #5299
486 n_iter = 7 if n_components < 0.1 * min(M.shape) else 4
487
488 if transpose == "auto":
489 transpose = n_samples < n_features
490 if transpose:
491 # this implementation is a bit faster with smaller shape[1]
492 M = M.T
493
494 Q = randomized_range_finder(
495 M,
496 size=n_random,
497 n_iter=n_iter,
498 power_iteration_normalizer=power_iteration_normalizer,
499 random_state=random_state,
500 )
501
502 # project M to the (k + p) dimensional space using the basis vectors
503 B = Q.T @ M
504
505 # compute the SVD on the thin matrix: (k + p) wide
506 xp, is_array_api_compliant = get_namespace(B)
507 if is_array_api_compliant:
508 Uhat, s, Vt = xp.linalg.svd(B, full_matrices=False)
509 else:
510 # When when array_api_dispatch is disabled, rely on scipy.linalg
511 # instead of numpy.linalg to avoid introducing a behavior change w.r.t.
512 # previous versions of scikit-learn.
513 Uhat, s, Vt = linalg.svd(
514 B, full_matrices=False, lapack_driver=svd_lapack_driver
515 )
516 del B
517 U = Q @ Uhat
518
519 if flip_sign:
520 if not transpose:
521 U, Vt = svd_flip(U, Vt)
522 else:
523 # In case of transpose u_based_decision=false
524 # to actually flip based on u and not v.
525 U, Vt = svd_flip(U, Vt, u_based_decision=False)
526
527 if transpose:
528 # transpose back the results according to the input convention
529 return Vt[:n_components, :].T, s[:n_components], U[:, :n_components].T
530 else:
531 return U[:, :n_components], s[:n_components], Vt[:n_components, :]
532
533
534def _randomized_eigsh(
535 M,
536 n_components,
537 *,
538 n_oversamples=10,
539 n_iter="auto",
540 power_iteration_normalizer="auto",
541 selection="module",
542 random_state=None,
543):
544 """Computes a truncated eigendecomposition using randomized methods
545
546 This method solves the fixed-rank approximation problem described in the
547 Halko et al paper.
548
549 The choice of which components to select can be tuned with the `selection`
550 parameter.
551
552 .. versionadded:: 0.24
553
554 Parameters
555 ----------
556 M : ndarray or sparse matrix
557 Matrix to decompose, it should be real symmetric square or complex
558 hermitian
559
560 n_components : int
561 Number of eigenvalues and vectors to extract.
562
563 n_oversamples : int, default=10
564 Additional number of random vectors to sample the range of M so as
565 to ensure proper conditioning. The total number of random vectors
566 used to find the range of M is n_components + n_oversamples. Smaller
567 number can improve speed but can negatively impact the quality of
568 approximation of eigenvectors and eigenvalues. Users might wish
569 to increase this parameter up to `2*k - n_components` where k is the
570 effective rank, for large matrices, noisy problems, matrices with
571 slowly decaying spectrums, or to increase precision accuracy. See Halko
572 et al (pages 5, 23 and 26).
573
574 n_iter : int or 'auto', default='auto'
575 Number of power iterations. It can be used to deal with very noisy
576 problems. When 'auto', it is set to 4, unless `n_components` is small
577 (< .1 * min(X.shape)) in which case `n_iter` is set to 7.
578 This improves precision with few components. Note that in general
579 users should rather increase `n_oversamples` before increasing `n_iter`
580 as the principle of the randomized method is to avoid usage of these
581 more costly power iterations steps. When `n_components` is equal
582 or greater to the effective matrix rank and the spectrum does not
583 present a slow decay, `n_iter=0` or `1` should even work fine in theory
584 (see Halko et al paper, page 9).
585
586 power_iteration_normalizer : {'auto', 'QR', 'LU', 'none'}, default='auto'
587 Whether the power iterations are normalized with step-by-step
588 QR factorization (the slowest but most accurate), 'none'
589 (the fastest but numerically unstable when `n_iter` is large, e.g.
590 typically 5 or larger), or 'LU' factorization (numerically stable
591 but can lose slightly in accuracy). The 'auto' mode applies no
592 normalization if `n_iter` <= 2 and switches to LU otherwise.
593
594 selection : {'value', 'module'}, default='module'
595 Strategy used to select the n components. When `selection` is `'value'`
596 (not yet implemented, will become the default when implemented), the
597 components corresponding to the n largest eigenvalues are returned.
598 When `selection` is `'module'`, the components corresponding to the n
599 eigenvalues with largest modules are returned.
600
601 random_state : int, RandomState instance, default=None
602 The seed of the pseudo random number generator to use when shuffling
603 the data, i.e. getting the random vectors to initialize the algorithm.
604 Pass an int for reproducible results across multiple function calls.
605 See :term:`Glossary <random_state>`.
606
607 Notes
608 -----
609 This algorithm finds a (usually very good) approximate truncated
610 eigendecomposition using randomized methods to speed up the computations.
611
612 This method is particularly fast on large matrices on which
613 you wish to extract only a small number of components. In order to
614 obtain further speed up, `n_iter` can be set <=2 (at the cost of
615 loss of precision). To increase the precision it is recommended to
616 increase `n_oversamples`, up to `2*k-n_components` where k is the
617 effective rank. Usually, `n_components` is chosen to be greater than k
618 so increasing `n_oversamples` up to `n_components` should be enough.
619
620 Strategy 'value': not implemented yet.
621 Algorithms 5.3, 5.4 and 5.5 in the Halko et al paper should provide good
622 candidates for a future implementation.
623
624 Strategy 'module':
625 The principle is that for diagonalizable matrices, the singular values and
626 eigenvalues are related: if t is an eigenvalue of A, then :math:`|t|` is a
627 singular value of A. This method relies on a randomized SVD to find the n
628 singular components corresponding to the n singular values with largest
629 modules, and then uses the signs of the singular vectors to find the true
630 sign of t: if the sign of left and right singular vectors are different
631 then the corresponding eigenvalue is negative.
632
633 Returns
634 -------
635 eigvals : 1D array of shape (n_components,) containing the `n_components`
636 eigenvalues selected (see ``selection`` parameter).
637 eigvecs : 2D array of shape (M.shape[0], n_components) containing the
638 `n_components` eigenvectors corresponding to the `eigvals`, in the
639 corresponding order. Note that this follows the `scipy.linalg.eigh`
640 convention.
641
642 See Also
643 --------
644 :func:`randomized_svd`
645
646 References
647 ----------
648 * :arxiv:`"Finding structure with randomness:
649 Stochastic algorithms for constructing approximate matrix decompositions"
650 (Algorithm 4.3 for strategy 'module') <0909.4061>`
651 Halko, et al. (2009)
652 """
653 if selection == "value": # pragma: no cover
654 # to do : an algorithm can be found in the Halko et al reference
655 raise NotImplementedError()
656
657 elif selection == "module":
658 # Note: no need for deterministic U and Vt (flip_sign=True),
659 # as we only use the dot product UVt afterwards
660 U, S, Vt = randomized_svd(
661 M,
662 n_components=n_components,
663 n_oversamples=n_oversamples,
664 n_iter=n_iter,
665 power_iteration_normalizer=power_iteration_normalizer,
666 flip_sign=False,
667 random_state=random_state,
668 )
669
670 eigvecs = U[:, :n_components]
671 eigvals = S[:n_components]
672
673 # Conversion of Singular values into Eigenvalues:
674 # For any eigenvalue t, the corresponding singular value is |t|.
675 # So if there is a negative eigenvalue t, the corresponding singular
676 # value will be -t, and the left (U) and right (V) singular vectors
677 # will have opposite signs.
678 # Fastest way: see <https://stackoverflow.com/a/61974002/7262247>
679 diag_VtU = np.einsum("ji,ij->j", Vt[:n_components, :], U[:, :n_components])
680 signs = np.sign(diag_VtU)
681 eigvals = eigvals * signs
682
683 else: # pragma: no cover
684 raise ValueError("Invalid `selection`: %r" % selection)
685
686 return eigvals, eigvecs
687
688
689def weighted_mode(a, w, *, axis=0):
690 """Return an array of the weighted modal (most common) value in the passed array.
691
692 If there is more than one such value, only the first is returned.
693 The bin-count for the modal bins is also returned.
694
695 This is an extension of the algorithm in scipy.stats.mode.
696
697 Parameters
698 ----------
699 a : array-like of shape (n_samples,)
700 Array of which values to find mode(s).
701 w : array-like of shape (n_samples,)
702 Array of weights for each value.
703 axis : int, default=0
704 Axis along which to operate. Default is 0, i.e. the first axis.
705
706 Returns
707 -------
708 vals : ndarray
709 Array of modal values.
710 score : ndarray
711 Array of weighted counts for each mode.
712
713 See Also
714 --------
715 scipy.stats.mode: Calculates the Modal (most common) value of array elements
716 along specified axis.
717
718 Examples
719 --------
720 >>> from sklearn.utils.extmath import weighted_mode
721 >>> x = [4, 1, 4, 2, 4, 2]
722 >>> weights = [1, 1, 1, 1, 1, 1]
723 >>> weighted_mode(x, weights)
724 (array([4.]), array([3.]))
725
726 The value 4 appears three times: with uniform weights, the result is
727 simply the mode of the distribution.
728
729 >>> weights = [1, 3, 0.5, 1.5, 1, 2] # deweight the 4's
730 >>> weighted_mode(x, weights)
731 (array([2.]), array([3.5]))
732
733 The value 2 has the highest score: it appears twice with weights of
734 1.5 and 2: the sum of these is 3.5.
735 """
736 if axis is None:
737 a = np.ravel(a)
738 w = np.ravel(w)
739 axis = 0
740 else:
741 a = np.asarray(a)
742 w = np.asarray(w)
743
744 if a.shape != w.shape:
745 w = np.full(a.shape, w, dtype=w.dtype)
746
747 scores = np.unique(np.ravel(a)) # get ALL unique values
748 testshape = list(a.shape)
749 testshape[axis] = 1
750 oldmostfreq = np.zeros(testshape)
751 oldcounts = np.zeros(testshape)
752 for score in scores:
753 template = np.zeros(a.shape)
754 ind = a == score
755 template[ind] = w[ind]
756 counts = np.expand_dims(np.sum(template, axis), axis)
757 mostfrequent = np.where(counts > oldcounts, score, oldmostfreq)
758 oldcounts = np.maximum(counts, oldcounts)
759 oldmostfreq = mostfrequent
760 return mostfrequent, oldcounts
761
762
763def cartesian(arrays, out=None):
764 """Generate a cartesian product of input arrays.
765
766 Parameters
767 ----------
768 arrays : list of array-like
769 1-D arrays to form the cartesian product of.
770 out : ndarray of shape (M, len(arrays)), default=None
771 Array to place the cartesian product in.
772
773 Returns
774 -------
775 out : ndarray of shape (M, len(arrays))
776 Array containing the cartesian products formed of input arrays.
777 If not provided, the `dtype` of the output array is set to the most
778 permissive `dtype` of the input arrays, according to NumPy type
779 promotion.
780
781 .. versionadded:: 1.2
782 Add support for arrays of different types.
783
784 Notes
785 -----
786 This function may not be used on more than 32 arrays
787 because the underlying numpy functions do not support it.
788
789 Examples
790 --------
791 >>> from sklearn.utils.extmath import cartesian
792 >>> cartesian(([1, 2, 3], [4, 5], [6, 7]))
793 array([[1, 4, 6],
794 [1, 4, 7],
795 [1, 5, 6],
796 [1, 5, 7],
797 [2, 4, 6],
798 [2, 4, 7],
799 [2, 5, 6],
800 [2, 5, 7],
801 [3, 4, 6],
802 [3, 4, 7],
803 [3, 5, 6],
804 [3, 5, 7]])
805 """
806 arrays = [np.asarray(x) for x in arrays]
807 shape = (len(x) for x in arrays)
808
809 ix = np.indices(shape)
810 ix = ix.reshape(len(arrays), -1).T
811
812 if out is None:
813 dtype = np.result_type(*arrays) # find the most permissive dtype
814 out = np.empty_like(ix, dtype=dtype)
815
816 for n, arr in enumerate(arrays):
817 out[:, n] = arrays[n][ix[:, n]]
818
819 return out
820
821
822def svd_flip(u, v, u_based_decision=True):
823 """Sign correction to ensure deterministic output from SVD.
824
825 Adjusts the columns of u and the rows of v such that the loadings in the
826 columns in u that are largest in absolute value are always positive.
827
828 If u_based_decision is False, then the same sign correction is applied to
829 so that the rows in v that are largest in absolute value are always
830 positive.
831
832 Parameters
833 ----------
834 u : ndarray
835 Parameters u and v are the output of `linalg.svd` or
836 :func:`~sklearn.utils.extmath.randomized_svd`, with matching inner
837 dimensions so one can compute `np.dot(u * s, v)`.
838
839 v : ndarray
840 Parameters u and v are the output of `linalg.svd` or
841 :func:`~sklearn.utils.extmath.randomized_svd`, with matching inner
842 dimensions so one can compute `np.dot(u * s, v)`. The input v should
843 really be called vt to be consistent with scipy's output.
844
845 u_based_decision : bool, default=True
846 If True, use the columns of u as the basis for sign flipping.
847 Otherwise, use the rows of v. The choice of which variable to base the
848 decision on is generally algorithm dependent.
849
850 Returns
851 -------
852 u_adjusted : ndarray
853 Array u with adjusted columns and the same dimensions as u.
854
855 v_adjusted : ndarray
856 Array v with adjusted rows and the same dimensions as v.
857 """
858 xp, _ = get_namespace(u, v)
859 device = getattr(u, "device", None)
860
861 if u_based_decision:
862 # columns of u, rows of v, or equivalently rows of u.T and v
863 max_abs_u_cols = xp.argmax(xp.abs(u.T), axis=1)
864 shift = xp.arange(u.T.shape[0], device=device)
865 indices = max_abs_u_cols + shift * u.T.shape[1]
866 signs = xp.sign(xp.take(xp.reshape(u.T, (-1,)), indices, axis=0))
867 u *= signs[np.newaxis, :]
868 v *= signs[:, np.newaxis]
869 else:
870 # rows of v, columns of u
871 max_abs_v_rows = xp.argmax(xp.abs(v), axis=1)
872 shift = xp.arange(v.shape[0], device=device)
873 indices = max_abs_v_rows + shift * v.shape[1]
874 signs = xp.sign(xp.take(xp.reshape(v, (-1,)), indices))
875 u *= signs[np.newaxis, :]
876 v *= signs[:, np.newaxis]
877 return u, v
878
879
880# TODO(1.6): remove
881@deprecated( # type: ignore
882 "The function `log_logistic` is deprecated and will be removed in 1.6. "
883 "Use `-np.logaddexp(0, -x)` instead."
884)
885def log_logistic(X, out=None):
886 """Compute the log of the logistic function, ``log(1 / (1 + e ** -x))``.
887
888 This implementation is numerically stable and uses `-np.logaddexp(0, -x)`.
889
890 For the ordinary logistic function, use ``scipy.special.expit``.
891
892 Parameters
893 ----------
894 X : array-like of shape (M, N) or (M,)
895 Argument to the logistic function.
896
897 out : array-like of shape (M, N) or (M,), default=None
898 Preallocated output array.
899
900 Returns
901 -------
902 out : ndarray of shape (M, N) or (M,)
903 Log of the logistic function evaluated at every point in x.
904
905 Notes
906 -----
907 See the blog post describing this implementation:
908 http://fa.bianp.net/blog/2013/numerical-optimizers-for-logistic-regression/
909 """
910 X = check_array(X, dtype=np.float64, ensure_2d=False)
911
912 if out is None:
913 out = np.empty_like(X)
914
915 np.logaddexp(0, -X, out=out)
916 out *= -1
917 return out
918
919
920def softmax(X, copy=True):
921 """
922 Calculate the softmax function.
923
924 The softmax function is calculated by
925 np.exp(X) / np.sum(np.exp(X), axis=1)
926
927 This will cause overflow when large values are exponentiated.
928 Hence the largest value in each row is subtracted from each data
929 point to prevent this.
930
931 Parameters
932 ----------
933 X : array-like of float of shape (M, N)
934 Argument to the logistic function.
935
936 copy : bool, default=True
937 Copy X or not.
938
939 Returns
940 -------
941 out : ndarray of shape (M, N)
942 Softmax function evaluated at every point in x.
943 """
944 xp, is_array_api_compliant = get_namespace(X)
945 if copy:
946 X = xp.asarray(X, copy=True)
947 max_prob = xp.reshape(xp.max(X, axis=1), (-1, 1))
948 X -= max_prob
949
950 if _is_numpy_namespace(xp):
951 # optimization for NumPy arrays
952 np.exp(X, out=np.asarray(X))
953 else:
954 # array_api does not have `out=`
955 X = xp.exp(X)
956
957 sum_prob = xp.reshape(xp.sum(X, axis=1), (-1, 1))
958 X /= sum_prob
959 return X
960
961
962def make_nonnegative(X, min_value=0):
963 """Ensure `X.min()` >= `min_value`.
964
965 Parameters
966 ----------
967 X : array-like
968 The matrix to make non-negative.
969 min_value : float, default=0
970 The threshold value.
971
972 Returns
973 -------
974 array-like
975 The thresholded array.
976
977 Raises
978 ------
979 ValueError
980 When X is sparse.
981 """
982 min_ = X.min()
983 if min_ < min_value:
984 if sparse.issparse(X):
985 raise ValueError(
986 "Cannot make the data matrix"
987 " nonnegative because it is sparse."
988 " Adding a value to every entry would"
989 " make it no longer sparse."
990 )
991 X = X + (min_value - min_)
992 return X
993
994
995# Use at least float64 for the accumulating functions to avoid precision issue
996# see https://github.com/numpy/numpy/issues/9393. The float64 is also retained
997# as it is in case the float overflows
998def _safe_accumulator_op(op, x, *args, **kwargs):
999 """
1000 This function provides numpy accumulator functions with a float64 dtype
1001 when used on a floating point input. This prevents accumulator overflow on
1002 smaller floating point dtypes.
1003
1004 Parameters
1005 ----------
1006 op : function
1007 A numpy accumulator function such as np.mean or np.sum.
1008 x : ndarray
1009 A numpy array to apply the accumulator function.
1010 *args : positional arguments
1011 Positional arguments passed to the accumulator function after the
1012 input x.
1013 **kwargs : keyword arguments
1014 Keyword arguments passed to the accumulator function.
1015
1016 Returns
1017 -------
1018 result
1019 The output of the accumulator function passed to this function.
1020 """
1021 if np.issubdtype(x.dtype, np.floating) and x.dtype.itemsize < 8:
1022 result = op(x, *args, **kwargs, dtype=np.float64)
1023 else:
1024 result = op(x, *args, **kwargs)
1025 return result
1026
1027
1028def _incremental_mean_and_var(
1029 X, last_mean, last_variance, last_sample_count, sample_weight=None
1030):
1031 """Calculate mean update and a Youngs and Cramer variance update.
1032
1033 If sample_weight is given, the weighted mean and variance is computed.
1034
1035 Update a given mean and (possibly) variance according to new data given
1036 in X. last_mean is always required to compute the new mean.
1037 If last_variance is None, no variance is computed and None return for
1038 updated_variance.
1039
1040 From the paper "Algorithms for computing the sample variance: analysis and
1041 recommendations", by Chan, Golub, and LeVeque.
1042
1043 Parameters
1044 ----------
1045 X : array-like of shape (n_samples, n_features)
1046 Data to use for variance update.
1047
1048 last_mean : array-like of shape (n_features,)
1049
1050 last_variance : array-like of shape (n_features,)
1051
1052 last_sample_count : array-like of shape (n_features,)
1053 The number of samples encountered until now if sample_weight is None.
1054 If sample_weight is not None, this is the sum of sample_weight
1055 encountered.
1056
1057 sample_weight : array-like of shape (n_samples,) or None
1058 Sample weights. If None, compute the unweighted mean/variance.
1059
1060 Returns
1061 -------
1062 updated_mean : ndarray of shape (n_features,)
1063
1064 updated_variance : ndarray of shape (n_features,)
1065 None if last_variance was None.
1066
1067 updated_sample_count : ndarray of shape (n_features,)
1068
1069 Notes
1070 -----
1071 NaNs are ignored during the algorithm.
1072
1073 References
1074 ----------
1075 T. Chan, G. Golub, R. LeVeque. Algorithms for computing the sample
1076 variance: recommendations, The American Statistician, Vol. 37, No. 3,
1077 pp. 242-247
1078
1079 Also, see the sparse implementation of this in
1080 `utils.sparsefuncs.incr_mean_variance_axis` and
1081 `utils.sparsefuncs_fast.incr_mean_variance_axis0`
1082 """
1083 # old = stats until now
1084 # new = the current increment
1085 # updated = the aggregated stats
1086 last_sum = last_mean * last_sample_count
1087 X_nan_mask = np.isnan(X)
1088 if np.any(X_nan_mask):
1089 sum_op = np.nansum
1090 else:
1091 sum_op = np.sum
1092 if sample_weight is not None:
1093 # equivalent to np.nansum(X * sample_weight, axis=0)
1094 # safer because np.float64(X*W) != np.float64(X)*np.float64(W)
1095 new_sum = _safe_accumulator_op(
1096 np.matmul, sample_weight, np.where(X_nan_mask, 0, X)
1097 )
1098 new_sample_count = _safe_accumulator_op(
1099 np.sum, sample_weight[:, None] * (~X_nan_mask), axis=0
1100 )
1101 else:
1102 new_sum = _safe_accumulator_op(sum_op, X, axis=0)
1103 n_samples = X.shape[0]
1104 new_sample_count = n_samples - np.sum(X_nan_mask, axis=0)
1105
1106 updated_sample_count = last_sample_count + new_sample_count
1107
1108 updated_mean = (last_sum + new_sum) / updated_sample_count
1109
1110 if last_variance is None:
1111 updated_variance = None
1112 else:
1113 T = new_sum / new_sample_count
1114 temp = X - T
1115 if sample_weight is not None:
1116 # equivalent to np.nansum((X-T)**2 * sample_weight, axis=0)
1117 # safer because np.float64(X*W) != np.float64(X)*np.float64(W)
1118 correction = _safe_accumulator_op(
1119 np.matmul, sample_weight, np.where(X_nan_mask, 0, temp)
1120 )
1121 temp **= 2
1122 new_unnormalized_variance = _safe_accumulator_op(
1123 np.matmul, sample_weight, np.where(X_nan_mask, 0, temp)
1124 )
1125 else:
1126 correction = _safe_accumulator_op(sum_op, temp, axis=0)
1127 temp **= 2
1128 new_unnormalized_variance = _safe_accumulator_op(sum_op, temp, axis=0)
1129
1130 # correction term of the corrected 2 pass algorithm.
1131 # See "Algorithms for computing the sample variance: analysis
1132 # and recommendations", by Chan, Golub, and LeVeque.
1133 new_unnormalized_variance -= correction**2 / new_sample_count
1134
1135 last_unnormalized_variance = last_variance * last_sample_count
1136
1137 with np.errstate(divide="ignore", invalid="ignore"):
1138 last_over_new_count = last_sample_count / new_sample_count
1139 updated_unnormalized_variance = (
1140 last_unnormalized_variance
1141 + new_unnormalized_variance
1142 + last_over_new_count
1143 / updated_sample_count
1144 * (last_sum / last_over_new_count - new_sum) ** 2
1145 )
1146
1147 zeros = last_sample_count == 0
1148 updated_unnormalized_variance[zeros] = new_unnormalized_variance[zeros]
1149 updated_variance = updated_unnormalized_variance / updated_sample_count
1150
1151 return updated_mean, updated_variance, updated_sample_count
1152
1153
1154def _deterministic_vector_sign_flip(u):
1155 """Modify the sign of vectors for reproducibility.
1156
1157 Flips the sign of elements of all the vectors (rows of u) such that
1158 the absolute maximum element of each vector is positive.
1159
1160 Parameters
1161 ----------
1162 u : ndarray
1163 Array with vectors as its rows.
1164
1165 Returns
1166 -------
1167 u_flipped : ndarray with same shape as u
1168 Array with the sign flipped vectors as its rows.
1169 """
1170 max_abs_rows = np.argmax(np.abs(u), axis=1)
1171 signs = np.sign(u[range(u.shape[0]), max_abs_rows])
1172 u *= signs[:, np.newaxis]
1173 return u
1174
1175
1176def stable_cumsum(arr, axis=None, rtol=1e-05, atol=1e-08):
1177 """Use high precision for cumsum and check that final value matches sum.
1178
1179 Warns if the final cumulative sum does not match the sum (up to the chosen
1180 tolerance).
1181
1182 Parameters
1183 ----------
1184 arr : array-like
1185 To be cumulatively summed as flat.
1186 axis : int, default=None
1187 Axis along which the cumulative sum is computed.
1188 The default (None) is to compute the cumsum over the flattened array.
1189 rtol : float, default=1e-05
1190 Relative tolerance, see ``np.allclose``.
1191 atol : float, default=1e-08
1192 Absolute tolerance, see ``np.allclose``.
1193
1194 Returns
1195 -------
1196 out : ndarray
1197 Array with the cumulative sums along the chosen axis.
1198 """
1199 out = np.cumsum(arr, axis=axis, dtype=np.float64)
1200 expected = np.sum(arr, axis=axis, dtype=np.float64)
1201 if not np.allclose(
1202 out.take(-1, axis=axis), expected, rtol=rtol, atol=atol, equal_nan=True
1203 ):
1204 warnings.warn(
1205 (
1206 "cumsum was found to be unstable: "
1207 "its last element does not correspond to sum"
1208 ),
1209 RuntimeWarning,
1210 )
1211 return out
1212
1213
1214def _nanaverage(a, weights=None):
1215 """Compute the weighted average, ignoring NaNs.
1216
1217 Parameters
1218 ----------
1219 a : ndarray
1220 Array containing data to be averaged.
1221 weights : array-like, default=None
1222 An array of weights associated with the values in a. Each value in a
1223 contributes to the average according to its associated weight. The
1224 weights array can either be 1-D of the same shape as a. If `weights=None`,
1225 then all data in a are assumed to have a weight equal to one.
1226
1227 Returns
1228 -------
1229 weighted_average : float
1230 The weighted average.
1231
1232 Notes
1233 -----
1234 This wrapper to combine :func:`numpy.average` and :func:`numpy.nanmean`, so
1235 that :func:`np.nan` values are ignored from the average and weights can
1236 be passed. Note that when possible, we delegate to the prime methods.
1237 """
1238
1239 if len(a) == 0:
1240 return np.nan
1241
1242 mask = np.isnan(a)
1243 if mask.all():
1244 return np.nan
1245
1246 if weights is None:
1247 return np.nanmean(a)
1248
1249 weights = np.array(weights, copy=False)
1250 a, weights = a[~mask], weights[~mask]
1251 try:
1252 return np.average(a, weights=weights)
1253 except ZeroDivisionError:
1254 # this is when all weights are zero, then ignore them
1255 return np.average(a)