Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/scipy/stats/

1from functools import cached_property

3import numpy as np

4from scipy import linalg

5from scipy.stats import _multivariate

8__all__ = ["Covariance"]

11class Covariance:

12 """

13 Representation of a covariance matrix

15 Calculations involving covariance matrices (e.g. data whitening,

16 multivariate normal function evaluation) are often performed more

17 efficiently using a decomposition of the covariance matrix instead of the

18 covariance metrix itself. This class allows the user to construct an

19 object representing a covariance matrix using any of several

20 decompositions and perform calculations using a common interface.

22 .. note::

24 The `Covariance` class cannot be instantiated directly. Instead, use

25 one of the factory methods (e.g. `Covariance.from_diagonal`).

27 Examples

28 --------

29 The `Covariance` class is is used by calling one of its

30 factory methods to create a `Covariance` object, then pass that

31 representation of the `Covariance` matrix as a shape parameter of a

32 multivariate distribution.

34 For instance, the multivariate normal distribution can accept an array

35 representing a covariance matrix:

37 >>> from scipy import stats

38 >>> import numpy as np

39 >>> d = [1, 2, 3]

40 >>> A = np.diag(d) # a diagonal covariance matrix

41 >>> x = [4, -2, 5] # a point of interest

42 >>> dist = stats.multivariate_normal(mean=[0, 0, 0], cov=A)

43 >>> dist.pdf(x)

44 4.9595685102808205e-08

46 but the calculations are performed in a very generic way that does not

47 take advantage of any special properties of the covariance matrix. Because

48 our covariance matrix is diagonal, we can use ``Covariance.from_diagonal``

49 to create an object representing the covariance matrix, and

50 `multivariate_normal` can use this to compute the probability density

51 function more efficiently.

53 >>> cov = stats.Covariance.from_diagonal(d)

54 >>> dist = stats.multivariate_normal(mean=[0, 0, 0], cov=cov)

55 >>> dist.pdf(x)

56 4.9595685102808205e-08

58 """

59 def __init__(self):

60 message = ("The `Covariance` class cannot be instantiated directly. "

61 "Please use one of the factory methods "

62 "(e.g. `Covariance.from_diagonal`).")

63 raise NotImplementedError(message)

65 @staticmethod

66 def from_diagonal(diagonal):

67 r"""

68 Return a representation of a covariance matrix from its diagonal.

70 Parameters

71 ----------

72 diagonal : array_like

73 The diagonal elements of a diagonal matrix.

75 Notes

76 -----

77 Let the diagonal elements of a diagonal covariance matrix :math:`D` be

78 stored in the vector :math:`d`.

80 When all elements of :math:`d` are strictly positive, whitening of a

81 data point :math:`x` is performed by computing

82 :math:`x \cdot d^{-1/2}`, where the inverse square root can be taken

83 element-wise.

84 :math:`\log\det{D}` is calculated as :math:`-2 \sum(\log{d})`,

85 where the :math:`\log` operation is performed element-wise.

87 This `Covariance` class supports singular covariance matrices. When

88 computing ``_log_pdet``, non-positive elements of :math:`d` are

89 ignored. Whitening is not well defined when the point to be whitened

90 does not lie in the span of the columns of the covariance matrix. The

91 convention taken here is to treat the inverse square root of

92 non-positive elements of :math:`d` as zeros.

94 Examples

95 --------

96 Prepare a symmetric positive definite covariance matrix ``A`` and a

97 data point ``x``.

99 >>> import numpy as np

100 >>> from scipy import stats

101 >>> rng = np.random.default_rng()

102 >>> n = 5

103 >>> A = np.diag(rng.random(n))

104 >>> x = rng.random(size=n)

105

106 Extract the diagonal from ``A`` and create the `Covariance` object.

107

108 >>> d = np.diag(A)

109 >>> cov = stats.Covariance.from_diagonal(d)

110

111 Compare the functionality of the `Covariance` object against a

112 reference implementations.

113

114 >>> res = cov.whiten(x)

115 >>> ref = np.diag(d**-0.5) @ x

116 >>> np.allclose(res, ref)

117 True

118 >>> res = cov.log_pdet

119 >>> ref = np.linalg.slogdet(A)[-1]

120 >>> np.allclose(res, ref)

121 True

122

123 """

124 return CovViaDiagonal(diagonal)

125

126 @staticmethod

127 def from_precision(precision, covariance=None):

128 r"""

129 Return a representation of a covariance from its precision matrix.

130

131 Parameters

132 ----------

133 precision : array_like

134 The precision matrix; that is, the inverse of a square, symmetric,

135 positive definite covariance matrix.

136 covariance : array_like, optional

137 The square, symmetric, positive definite covariance matrix. If not

138 provided, this may need to be calculated (e.g. to evaluate the

139 cumulative distribution function of

140 `scipy.stats.multivariate_normal`) by inverting `precision`.

141

142 Notes

143 -----

144 Let the covariance matrix be :math:`A`, its precision matrix be

145 :math:`P = A^{-1}`, and :math:`L` be the lower Cholesky factor such

146 that :math:`L L^T = P`.

147 Whitening of a data point :math:`x` is performed by computing

148 :math:`x^T L`. :math:`\log\det{A}` is calculated as

149 :math:`-2tr(\log{L})`, where the :math:`\log` operation is performed

150 element-wise.

151

152 This `Covariance` class does not support singular covariance matrices

153 because the precision matrix does not exist for a singular covariance

154 matrix.

155

156 Examples

157 --------

158 Prepare a symmetric positive definite precision matrix ``P`` and a

159 data point ``x``. (If the precision matrix is not already available,

160 consider the other factory methods of the `Covariance` class.)

161

162 >>> import numpy as np

163 >>> from scipy import stats

164 >>> rng = np.random.default_rng()

165 >>> n = 5

166 >>> P = rng.random(size=(n, n))

167 >>> P = P @ P.T # a precision matrix must be positive definite

168 >>> x = rng.random(size=n)

169

170 Create the `Covariance` object.

171

172 >>> cov = stats.Covariance.from_precision(P)

173

174 Compare the functionality of the `Covariance` object against

175 reference implementations.

176

177 >>> res = cov.whiten(x)

178 >>> ref = x @ np.linalg.cholesky(P)

179 >>> np.allclose(res, ref)

180 True

181 >>> res = cov.log_pdet

182 >>> ref = -np.linalg.slogdet(P)[-1]

183 >>> np.allclose(res, ref)

184 True

185

186 """

187 return CovViaPrecision(precision, covariance)

188

189 @staticmethod

190 def from_cholesky(cholesky):

191 r"""

192 Representation of a covariance provided via the (lower) Cholesky factor

193

194 Parameters

195 ----------

196 cholesky : array_like

197 The lower triangular Cholesky factor of the covariance matrix.

198

199 Notes

200 -----

201 Let the covariance matrix be :math:`A`and :math:`L` be the lower

202 Cholesky factor such that :math:`L L^T = A`.

203 Whitening of a data point :math:`x` is performed by computing

204 :math:`L^{-1} x`. :math:`\log\det{A}` is calculated as

205 :math:`2tr(\log{L})`, where the :math:`\log` operation is performed

206 element-wise.

207

208 This `Covariance` class does not support singular covariance matrices

209 because the Cholesky decomposition does not exist for a singular

210 covariance matrix.

211

212 Examples

213 --------

214 Prepare a symmetric positive definite covariance matrix ``A`` and a

215 data point ``x``.

216

217 >>> import numpy as np

218 >>> from scipy import stats

219 >>> rng = np.random.default_rng()

220 >>> n = 5

221 >>> A = rng.random(size=(n, n))

222 >>> A = A @ A.T # make the covariance symmetric positive definite

223 >>> x = rng.random(size=n)

224

225 Perform the Cholesky decomposition of ``A`` and create the

226 `Covariance` object.

227

228 >>> L = np.linalg.cholesky(A)

229 >>> cov = stats.Covariance.from_cholesky(L)

230

231 Compare the functionality of the `Covariance` object against

232 reference implementation.

233

234 >>> from scipy.linalg import solve_triangular

235 >>> res = cov.whiten(x)

236 >>> ref = solve_triangular(L, x, lower=True)

237 >>> np.allclose(res, ref)

238 True

239 >>> res = cov.log_pdet

240 >>> ref = np.linalg.slogdet(A)[-1]

241 >>> np.allclose(res, ref)

242 True

243

244 """

245 return CovViaCholesky(cholesky)

246

247 @staticmethod

248 def from_eigendecomposition(eigendecomposition):

249 r"""

250 Representation of a covariance provided via eigendecomposition

251

252 Parameters

253 ----------

254 eigendecomposition : sequence

255 A sequence (nominally a tuple) containing the eigenvalue and

256 eigenvector arrays as computed by `scipy.linalg.eigh` or

257 `numpy.linalg.eigh`.

258

259 Notes

260 -----

261 Let the covariance matrix be :math:`A`, let :math:`V` be matrix of

262 eigenvectors, and let :math:`W` be the diagonal matrix of eigenvalues

263 such that `V W V^T = A`.

264

265 When all of the eigenvalues are strictly positive, whitening of a

266 data point :math:`x` is performed by computing

267 :math:`x^T (V W^{-1/2})`, where the inverse square root can be taken

268 element-wise.

269 :math:`\log\det{A}` is calculated as :math:`tr(\log{W})`,

270 where the :math:`\log` operation is performed element-wise.

271

272 This `Covariance` class supports singular covariance matrices. When

273 computing ``_log_pdet``, non-positive eigenvalues are ignored.

274 Whitening is not well defined when the point to be whitened

275 does not lie in the span of the columns of the covariance matrix. The

276 convention taken here is to treat the inverse square root of

277 non-positive eigenvalues as zeros.

278

279 Examples

280 --------

281 Prepare a symmetric positive definite covariance matrix ``A`` and a

282 data point ``x``.

283

284 >>> import numpy as np

285 >>> from scipy import stats

286 >>> rng = np.random.default_rng()

287 >>> n = 5

288 >>> A = rng.random(size=(n, n))

289 >>> A = A @ A.T # make the covariance symmetric positive definite

290 >>> x = rng.random(size=n)

291

292 Perform the eigendecomposition of ``A`` and create the `Covariance`

293 object.

294

295 >>> w, v = np.linalg.eigh(A)

296 >>> cov = stats.Covariance.from_eigendecomposition((w, v))

297

298 Compare the functionality of the `Covariance` object against

299 reference implementations.

300

301 >>> res = cov.whiten(x)

302 >>> ref = x @ (v @ np.diag(w**-0.5))

303 >>> np.allclose(res, ref)

304 True

305 >>> res = cov.log_pdet

306 >>> ref = np.linalg.slogdet(A)[-1]

307 >>> np.allclose(res, ref)

308 True

309

310 """

311 return CovViaEigendecomposition(eigendecomposition)

312

313 def whiten(self, x):

314 """

315 Perform a whitening transformation on data.

316

317 "Whitening" ("white" as in "white noise", in which each frequency has

318 equal magnitude) transforms a set of random variables into a new set of

319 random variables with unit-diagonal covariance. When a whitening

320 transform is applied to a sample of points distributed according to

321 a multivariate normal distribution with zero mean, the covariance of

322 the transformed sample is approximately the identity matrix.

323

324 Parameters

325 ----------

326 x : array_like

327 An array of points. The last dimension must correspond with the

328 dimensionality of the space, i.e., the number of columns in the

329 covariance matrix.

330

331 Returns

332 -------

333 x_ : array_like

334 The transformed array of points.

335

336 References

337 ----------

338 .. [1] "Whitening Transformation". Wikipedia.

339 https://en.wikipedia.org/wiki/Whitening_transformation

340 .. [2] Novak, Lukas, and Miroslav Vorechovsky. "Generalization of

341 coloring linear transformation". Transactions of VSB 18.2

342 (2018): 31-35. :doi:`10.31490/tces-2018-0013`

343

344 Examples

345 --------

346 >>> import numpy as np

347 >>> from scipy import stats

348 >>> rng = np.random.default_rng()

349 >>> n = 3

350 >>> A = rng.random(size=(n, n))

351 >>> cov_array = A @ A.T # make matrix symmetric positive definite

352 >>> precision = np.linalg.inv(cov_array)

353 >>> cov_object = stats.Covariance.from_precision(precision)

354 >>> x = rng.multivariate_normal(np.zeros(n), cov_array, size=(10000))

355 >>> x_ = cov_object.whiten(x)

356 >>> np.cov(x_, rowvar=False) # near-identity covariance

357 array([[0.97862122, 0.00893147, 0.02430451],

358 [0.00893147, 0.96719062, 0.02201312],

359 [0.02430451, 0.02201312, 0.99206881]])

360

361 """

362 return self._whiten(np.asarray(x))

363

364 def colorize(self, x):

365 """

366 Perform a colorizing transformation on data.

367

368 "Colorizing" ("color" as in "colored noise", in which different

369 frequencies may have different magnitudes) transforms a set of

370 uncorrelated random variables into a new set of random variables with

371 the desired covariance. When a coloring transform is applied to a

372 sample of points distributed according to a multivariate normal

373 distribution with identity covariance and zero mean, the covariance of

374 the transformed sample is approximately the covariance matrix used

375 in the coloring transform.

376

377 Parameters

378 ----------

379 x : array_like

380 An array of points. The last dimension must correspond with the

381 dimensionality of the space, i.e., the number of columns in the

382 covariance matrix.

383

384 Returns

385 -------

386 x_ : array_like

387 The transformed array of points.

388

389 References

390 ----------

391 .. [1] "Whitening Transformation". Wikipedia.

392 https://en.wikipedia.org/wiki/Whitening_transformation

393 .. [2] Novak, Lukas, and Miroslav Vorechovsky. "Generalization of

394 coloring linear transformation". Transactions of VSB 18.2

395 (2018): 31-35. :doi:`10.31490/tces-2018-0013`

396

397 Examples

398 --------

399 >>> import numpy as np

400 >>> from scipy import stats

401 >>> rng = np.random.default_rng(1638083107694713882823079058616272161)

402 >>> n = 3

403 >>> A = rng.random(size=(n, n))

404 >>> cov_array = A @ A.T # make matrix symmetric positive definite

405 >>> cholesky = np.linalg.cholesky(cov_array)

406 >>> cov_object = stats.Covariance.from_cholesky(cholesky)

407 >>> x = rng.multivariate_normal(np.zeros(n), np.eye(n), size=(10000))

408 >>> x_ = cov_object.colorize(x)

409 >>> cov_data = np.cov(x_, rowvar=False)

410 >>> np.allclose(cov_data, cov_array, rtol=3e-2)

411 True

412 """

413 return self._colorize(np.asarray(x))

414

415 @property

416 def log_pdet(self):

417 """

418 Log of the pseudo-determinant of the covariance matrix

419 """

420 return np.array(self._log_pdet, dtype=float)[()]

421

422 @property

423 def rank(self):

424 """

425 Rank of the covariance matrix

426 """

427 return np.array(self._rank, dtype=int)[()]

428

429 @property

430 def covariance(self):

431 """

432 Explicit representation of the covariance matrix

433 """

434 return self._covariance

435

436 @property

437 def shape(self):

438 """

439 Shape of the covariance array

440 """

441 return self._shape

442

443 def _validate_matrix(self, A, name):

444 A = np.atleast_2d(A)

445 m, n = A.shape[-2:]

446 if m != n or A.ndim != 2 or not (np.issubdtype(A.dtype, np.integer) or

447 np.issubdtype(A.dtype, np.floating)):

448 message = (f"The input `{name}` must be a square, "

449 "two-dimensional array of real numbers.")

450 raise ValueError(message)

451 return A

452

453 def _validate_vector(self, A, name):

454 A = np.atleast_1d(A)

455 if A.ndim != 1 or not (np.issubdtype(A.dtype, np.integer) or

456 np.issubdtype(A.dtype, np.floating)):

457 message = (f"The input `{name}` must be a one-dimensional array "

458 "of real numbers.")

459 raise ValueError(message)

460 return A

461

462

463class CovViaPrecision(Covariance):

464

465 def __init__(self, precision, covariance=None):

466 precision = self._validate_matrix(precision, 'precision')

467 if covariance is not None:

468 covariance = self._validate_matrix(covariance, 'covariance')

469 message = "`precision.shape` must equal `covariance.shape`."

470 if precision.shape != covariance.shape:

471 raise ValueError(message)

472

473 self._chol_P = np.linalg.cholesky(precision)

474 self._log_pdet = -2*np.log(np.diag(self._chol_P)).sum(axis=-1)

475 self._rank = precision.shape[-1] # must be full rank if invertible

476 self._precision = precision

477 self._cov_matrix = covariance

478 self._shape = precision.shape

479 self._allow_singular = False

480

481 def _whiten(self, x):

482 return x @ self._chol_P

483

484 @cached_property

485 def _covariance(self):

486 n = self._shape[-1]

487 return (linalg.cho_solve((self._chol_P, True), np.eye(n))

488 if self._cov_matrix is None else self._cov_matrix)

489

490 def _colorize(self, x):

491 return linalg.solve_triangular(self._chol_P.T, x.T, lower=False).T

492

493

494def _dot_diag(x, d):

495 # If d were a full diagonal matrix, x @ d would always do what we want.

496 # Special treatment is needed for n-dimensional `d` in which each row

497 # includes only the diagonal elements of a covariance matrix.

498 return x * d if x.ndim < 2 else x * np.expand_dims(d, -2)

499

500

501class CovViaDiagonal(Covariance):

502

503 def __init__(self, diagonal):

504 diagonal = self._validate_vector(diagonal, 'diagonal')

505

506 i_zero = diagonal <= 0

507 positive_diagonal = np.array(diagonal, dtype=np.float64)

508

509 positive_diagonal[i_zero] = 1 # ones don't affect determinant

510 self._log_pdet = np.sum(np.log(positive_diagonal), axis=-1)

511

512 psuedo_reciprocals = 1 / np.sqrt(positive_diagonal)

513 psuedo_reciprocals[i_zero] = 0

514

515 self._sqrt_diagonal = np.sqrt(diagonal)

516 self._LP = psuedo_reciprocals

517 self._rank = positive_diagonal.shape[-1] - i_zero.sum(axis=-1)

518 self._covariance = np.apply_along_axis(np.diag, -1, diagonal)

519 self._i_zero = i_zero

520 self._shape = self._covariance.shape

521 self._allow_singular = True

522

523 def _whiten(self, x):

524 return _dot_diag(x, self._LP)

525

526 def _colorize(self, x):

527 return _dot_diag(x, self._sqrt_diagonal)

528

529 def _support_mask(self, x):

530 """

531 Check whether x lies in the support of the distribution.

532 """

533 return ~np.any(_dot_diag(x, self._i_zero), axis=-1)

534

535

536class CovViaCholesky(Covariance):

537

538 def __init__(self, cholesky):

539 L = self._validate_matrix(cholesky, 'cholesky')

540

541 self._factor = L

542 self._log_pdet = 2*np.log(np.diag(self._factor)).sum(axis=-1)

543 self._rank = L.shape[-1] # must be full rank for cholesky

544 self._covariance = L @ L.T

545 self._shape = L.shape

546 self._allow_singular = False

547

548 def _whiten(self, x):

549 res = linalg.solve_triangular(self._factor, x.T, lower=True).T

550 return res

551

552 def _colorize(self, x):

553 return x @ self._factor.T

554

555

556class CovViaEigendecomposition(Covariance):

557

558 def __init__(self, eigendecomposition):

559 eigenvalues, eigenvectors = eigendecomposition

560 eigenvalues = self._validate_vector(eigenvalues, 'eigenvalues')

561 eigenvectors = self._validate_matrix(eigenvectors, 'eigenvectors')

562 message = ("The shapes of `eigenvalues` and `eigenvectors` "

563 "must be compatible.")

564 try:

565 eigenvalues = np.expand_dims(eigenvalues, -2)

566 eigenvectors, eigenvalues = np.broadcast_arrays(eigenvectors,

567 eigenvalues)

568 eigenvalues = eigenvalues[..., 0, :]

569 except ValueError:

570 raise ValueError(message)

571

572 i_zero = eigenvalues <= 0

573 positive_eigenvalues = np.array(eigenvalues, dtype=np.float64)

574

575 positive_eigenvalues[i_zero] = 1 # ones don't affect determinant

576 self._log_pdet = np.sum(np.log(positive_eigenvalues), axis=-1)

577

578 psuedo_reciprocals = 1 / np.sqrt(positive_eigenvalues)

579 psuedo_reciprocals[i_zero] = 0

580

581 self._LP = eigenvectors * psuedo_reciprocals

582 self._LA = eigenvectors * np.sqrt(positive_eigenvalues)

583 self._rank = positive_eigenvalues.shape[-1] - i_zero.sum(axis=-1)

584 self._w = eigenvalues

585 self._v = eigenvectors

586 self._shape = eigenvectors.shape

587 self._null_basis = eigenvectors * i_zero

588 # This is only used for `_support_mask`, not to decide whether

589 # the covariance is singular or not.

590 self._eps = _multivariate._eigvalsh_to_eps(eigenvalues) * 10**3

591 self._allow_singular = True

592

593 def _whiten(self, x):

594 return x @ self._LP

595

596 def _colorize(self, x):

597 return x @ self._LA.T

598

599 @cached_property

600 def _covariance(self):

601 return (self._v * self._w) @ self._v.T

602

603 def _support_mask(self, x):

604 """

605 Check whether x lies in the support of the distribution.

606 """

607 residual = np.linalg.norm(x @ self._null_basis, axis=-1)

608 in_support = residual < self._eps

609 return in_support

610

611class CovViaPSD(Covariance):

612 """

613 Representation of a covariance provided via an instance of _PSD

614 """

615

616 def __init__(self, psd):

617 self._LP = psd.U

618 self._log_pdet = psd.log_pdet

619 self._rank = psd.rank

620 self._covariance = psd._M

621 self._shape = psd._M.shape

622 self._psd = psd

623 self._allow_singular = False # by default

624

625 def _whiten(self, x):

626 return x @ self._LP

627

628 def _support_mask(self, x):

629 return self._psd._support_mask(x)

Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/scipy/stats/_covariance.py: 33%

162 statements