Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/scipy/stats/

2# Author: Joris Vankerschaver 2013

4import math

5import numpy as np

6from numpy import asarray_chkfinite, asarray

7from numpy.lib import NumpyVersion

8import scipy.linalg

9from scipy._lib import doccer

10from scipy.special import gammaln, psi, multigammaln, xlogy, entr, betaln

11from scipy._lib._util import check_random_state

12from scipy.linalg.blas import drot

13from scipy.linalg._misc import LinAlgError

14from scipy.linalg.lapack import get_lapack_funcs

16from ._discrete_distns import binom

17from . import _mvn, _covariance, _rcont

19__all__ = ['multivariate_normal',

20 'matrix_normal',

21 'dirichlet',

22 'wishart',

23 'invwishart',

24 'multinomial',

25 'special_ortho_group',

26 'ortho_group',

27 'random_correlation',

28 'unitary_group',

29 'multivariate_t',

30 'multivariate_hypergeom',

31 'random_table',

32 'uniform_direction']

34_LOG_2PI = np.log(2 * np.pi)

35_LOG_2 = np.log(2)

36_LOG_PI = np.log(np.pi)

39_doc_random_state = """\

40seed : {None, int, np.random.RandomState, np.random.Generator}, optional

41 Used for drawing random variates.

42 If `seed` is `None`, the `~np.random.RandomState` singleton is used.

43 If `seed` is an int, a new ``RandomState`` instance is used, seeded

44 with seed.

45 If `seed` is already a ``RandomState`` or ``Generator`` instance,

46 then that object is used.

47 Default is `None`.

48"""

51def _squeeze_output(out):

52 """

53 Remove single-dimensional entries from array and convert to scalar,

54 if necessary.

55 """

56 out = out.squeeze()

57 if out.ndim == 0:

58 out = out[()]

59 return out

62def _eigvalsh_to_eps(spectrum, cond=None, rcond=None):

63 """Determine which eigenvalues are "small" given the spectrum.

65 This is for compatibility across various linear algebra functions

66 that should agree about whether or not a Hermitian matrix is numerically

67 singular and what is its numerical matrix rank.

68 This is designed to be compatible with scipy.linalg.pinvh.

70 Parameters

71 ----------

72 spectrum : 1d ndarray

73 Array of eigenvalues of a Hermitian matrix.

74 cond, rcond : float, optional

75 Cutoff for small eigenvalues.

76 Singular values smaller than rcond * largest_eigenvalue are

77 considered zero.

78 If None or -1, suitable machine precision is used.

80 Returns

81 -------

82 eps : float

83 Magnitude cutoff for numerical negligibility.

85 """

86 if rcond is not None:

87 cond = rcond

88 if cond in [None, -1]:

89 t = spectrum.dtype.char.lower()

90 factor = {'f': 1E3, 'd': 1E6}

91 cond = factor[t] * np.finfo(t).eps

92 eps = cond * np.max(abs(spectrum))

93 return eps

96def _pinv_1d(v, eps=1e-5):

97 """A helper function for computing the pseudoinverse.

99 Parameters

100 ----------

101 v : iterable of numbers

102 This may be thought of as a vector of eigenvalues or singular values.

103 eps : float

104 Values with magnitude no greater than eps are considered negligible.

105

106 Returns

107 -------

108 v_pinv : 1d float ndarray

109 A vector of pseudo-inverted numbers.

110

111 """

112 return np.array([0 if abs(x) <= eps else 1/x for x in v], dtype=float)

113

114

115class _PSD:

116 """

117 Compute coordinated functions of a symmetric positive semidefinite matrix.

118

119 This class addresses two issues. Firstly it allows the pseudoinverse,

120 the logarithm of the pseudo-determinant, and the rank of the matrix

121 to be computed using one call to eigh instead of three.

122 Secondly it allows these functions to be computed in a way

123 that gives mutually compatible results.

124 All of the functions are computed with a common understanding as to

125 which of the eigenvalues are to be considered negligibly small.

126 The functions are designed to coordinate with scipy.linalg.pinvh()

127 but not necessarily with np.linalg.det() or with np.linalg.matrix_rank().

128

129 Parameters

130 ----------

131 M : array_like

132 Symmetric positive semidefinite matrix (2-D).

133 cond, rcond : float, optional

134 Cutoff for small eigenvalues.

135 Singular values smaller than rcond * largest_eigenvalue are

136 considered zero.

137 If None or -1, suitable machine precision is used.

138 lower : bool, optional

139 Whether the pertinent array data is taken from the lower

140 or upper triangle of M. (Default: lower)

141 check_finite : bool, optional

142 Whether to check that the input matrices contain only finite

143 numbers. Disabling may give a performance gain, but may result

144 in problems (crashes, non-termination) if the inputs do contain

145 infinities or NaNs.

146 allow_singular : bool, optional

147 Whether to allow a singular matrix. (Default: True)

148

149 Notes

150 -----

151 The arguments are similar to those of scipy.linalg.pinvh().

152

153 """

154

155 def __init__(self, M, cond=None, rcond=None, lower=True,

156 check_finite=True, allow_singular=True):

157 self._M = np.asarray(M)

158

159 # Compute the symmetric eigendecomposition.

160 # Note that eigh takes care of array conversion, chkfinite,

161 # and assertion that the matrix is square.

162 s, u = scipy.linalg.eigh(M, lower=lower, check_finite=check_finite)

163

164 eps = _eigvalsh_to_eps(s, cond, rcond)

165 if np.min(s) < -eps:

166 msg = "The input matrix must be symmetric positive semidefinite."

167 raise ValueError(msg)

168 d = s[s > eps]

169 if len(d) < len(s) and not allow_singular:

170 msg = ("When `allow_singular is False`, the input matrix must be "

171 "symmetric positive definite.")

172 raise np.linalg.LinAlgError(msg)

173 s_pinv = _pinv_1d(s, eps)

174 U = np.multiply(u, np.sqrt(s_pinv))

175

176 # Save the eigenvector basis, and tolerance for testing support

177 self.eps = 1e3*eps

178 self.V = u[:, s <= eps]

179

180 # Initialize the eagerly precomputed attributes.

181 self.rank = len(d)

182 self.U = U

183 self.log_pdet = np.sum(np.log(d))

184

185 # Initialize attributes to be lazily computed.

186 self._pinv = None

187

188 def _support_mask(self, x):

189 """

190 Check whether x lies in the support of the distribution.

191 """

192 residual = np.linalg.norm(x @ self.V, axis=-1)

193 in_support = residual < self.eps

194 return in_support

195

196 @property

197 def pinv(self):

198 if self._pinv is None:

199 self._pinv = np.dot(self.U, self.U.T)

200 return self._pinv

201

202

203class multi_rv_generic:

204 """

205 Class which encapsulates common functionality between all multivariate

206 distributions.

207 """

208 def __init__(self, seed=None):

209 super().__init__()

210 self._random_state = check_random_state(seed)

211

212 @property

213 def random_state(self):

214 """ Get or set the Generator object for generating random variates.

215

216 If `seed` is None (or `np.random`), the `numpy.random.RandomState`

217 singleton is used.

218 If `seed` is an int, a new ``RandomState`` instance is used,

219 seeded with `seed`.

220 If `seed` is already a ``Generator`` or ``RandomState`` instance then

221 that instance is used.

222

223 """

224 return self._random_state

225

226 @random_state.setter

227 def random_state(self, seed):

228 self._random_state = check_random_state(seed)

229

230 def _get_random_state(self, random_state):

231 if random_state is not None:

232 return check_random_state(random_state)

233 else:

234 return self._random_state

235

236

237class multi_rv_frozen:

238 """

239 Class which encapsulates common functionality between all frozen

240 multivariate distributions.

241 """

242 @property

243 def random_state(self):

244 return self._dist._random_state

245

246 @random_state.setter

247 def random_state(self, seed):

248 self._dist._random_state = check_random_state(seed)

249

250

251_mvn_doc_default_callparams = """\

252mean : array_like, default: ``[0]``

253 Mean of the distribution.

254cov : array_like or `Covariance`, default: ``[1]``

255 Symmetric positive (semi)definite covariance matrix of the distribution.

256allow_singular : bool, default: ``False``

257 Whether to allow a singular covariance matrix. This is ignored if `cov` is

258 a `Covariance` object.

259"""

260

261_mvn_doc_callparams_note = """\

262Setting the parameter `mean` to `None` is equivalent to having `mean`

263be the zero-vector. The parameter `cov` can be a scalar, in which case

264the covariance matrix is the identity times that value, a vector of

265diagonal entries for the covariance matrix, a two-dimensional array_like,

266or a `Covariance` object.

267"""

268

269_mvn_doc_frozen_callparams = ""

270

271_mvn_doc_frozen_callparams_note = """\

272See class definition for a detailed description of parameters."""

273

274mvn_docdict_params = {

275 '_mvn_doc_default_callparams': _mvn_doc_default_callparams,

276 '_mvn_doc_callparams_note': _mvn_doc_callparams_note,

277 '_doc_random_state': _doc_random_state

278}

279

280mvn_docdict_noparams = {

281 '_mvn_doc_default_callparams': _mvn_doc_frozen_callparams,

282 '_mvn_doc_callparams_note': _mvn_doc_frozen_callparams_note,

283 '_doc_random_state': _doc_random_state

284}

285

286

287class multivariate_normal_gen(multi_rv_generic):

288 r"""A multivariate normal random variable.

289

290 The `mean` keyword specifies the mean. The `cov` keyword specifies the

291 covariance matrix.

292

293 Methods

294 -------

295 pdf(x, mean=None, cov=1, allow_singular=False)

296 Probability density function.

297 logpdf(x, mean=None, cov=1, allow_singular=False)

298 Log of the probability density function.

299 cdf(x, mean=None, cov=1, allow_singular=False, maxpts=1000000*dim, abseps=1e-5, releps=1e-5, lower_limit=None) # noqa

300 Cumulative distribution function.

301 logcdf(x, mean=None, cov=1, allow_singular=False, maxpts=1000000*dim, abseps=1e-5, releps=1e-5)

302 Log of the cumulative distribution function.

303 rvs(mean=None, cov=1, size=1, random_state=None)

304 Draw random samples from a multivariate normal distribution.

305 entropy()

306 Compute the differential entropy of the multivariate normal.

307

308 Parameters

309 ----------

310 %(_mvn_doc_default_callparams)s

311 %(_doc_random_state)s

312

313 Notes

314 -----

315 %(_mvn_doc_callparams_note)s

316

317 The covariance matrix `cov` may be an instance of a subclass of

318 `Covariance`, e.g. `scipy.stats.CovViaPrecision`. If so, `allow_singular`

319 is ignored.

320

321 Otherwise, `cov` must be a symmetric positive semidefinite

322 matrix when `allow_singular` is True; it must be (strictly) positive

323 definite when `allow_singular` is False.

324 Symmetry is not checked; only the lower triangular portion is used.

325 The determinant and inverse of `cov` are computed

326 as the pseudo-determinant and pseudo-inverse, respectively, so

327 that `cov` does not need to have full rank.

328

329 The probability density function for `multivariate_normal` is

330

331 .. math::

332

333 f(x) = \frac{1}{\sqrt{(2 \pi)^k \det \Sigma}}

334 \exp\left( -\frac{1}{2} (x - \mu)^T \Sigma^{-1} (x - \mu) \right),

335

336 where :math:`\mu` is the mean, :math:`\Sigma` the covariance matrix,

337 :math:`k` the rank of :math:`\Sigma`. In case of singular :math:`\Sigma`,

338 SciPy extends this definition according to [1]_.

339

340 .. versionadded:: 0.14.0

341

342 References

343 ----------

344 .. [1] Multivariate Normal Distribution - Degenerate Case, Wikipedia,

345 https://en.wikipedia.org/wiki/Multivariate_normal_distribution#Degenerate_case

346

347 Examples

348 --------

349 >>> import numpy as np

350 >>> import matplotlib.pyplot as plt

351 >>> from scipy.stats import multivariate_normal

352

353 >>> x = np.linspace(0, 5, 10, endpoint=False)

354 >>> y = multivariate_normal.pdf(x, mean=2.5, cov=0.5); y

355 array([ 0.00108914, 0.01033349, 0.05946514, 0.20755375, 0.43939129,

356 0.56418958, 0.43939129, 0.20755375, 0.05946514, 0.01033349])

357 >>> fig1 = plt.figure()

358 >>> ax = fig1.add_subplot(111)

359 >>> ax.plot(x, y)

360 >>> plt.show()

361

362 Alternatively, the object may be called (as a function) to fix the mean

363 and covariance parameters, returning a "frozen" multivariate normal

364 random variable:

365

366 >>> rv = multivariate_normal(mean=None, cov=1, allow_singular=False)

367 >>> # Frozen object with the same methods but holding the given

368 >>> # mean and covariance fixed.

369

370 The input quantiles can be any shape of array, as long as the last

371 axis labels the components. This allows us for instance to

372 display the frozen pdf for a non-isotropic random variable in 2D as

373 follows:

374

375 >>> x, y = np.mgrid[-1:1:.01, -1:1:.01]

376 >>> pos = np.dstack((x, y))

377 >>> rv = multivariate_normal([0.5, -0.2], [[2.0, 0.3], [0.3, 0.5]])

378 >>> fig2 = plt.figure()

379 >>> ax2 = fig2.add_subplot(111)

380 >>> ax2.contourf(x, y, rv.pdf(pos))

381

382 """

383

384 def __init__(self, seed=None):

385 super().__init__(seed)

386 self.__doc__ = doccer.docformat(self.__doc__, mvn_docdict_params)

387

388 def __call__(self, mean=None, cov=1, allow_singular=False, seed=None):

389 """Create a frozen multivariate normal distribution.

390

391 See `multivariate_normal_frozen` for more information.

392 """

393 return multivariate_normal_frozen(mean, cov,

394 allow_singular=allow_singular,

395 seed=seed)

396

397 def _process_parameters(self, mean, cov, allow_singular=True):

398 """

399 Infer dimensionality from mean or covariance matrix, ensure that

400 mean and covariance are full vector resp. matrix.

401 """

402 if isinstance(cov, _covariance.Covariance):

403 return self._process_parameters_Covariance(mean, cov)

404 else:

405 # Before `Covariance` classes were introduced,

406 # `multivariate_normal` accepted plain arrays as `cov` and used the

407 # following input validation. To avoid disturbing the behavior of

408 # `multivariate_normal` when plain arrays are used, we use the

409 # original input validation here.

410 dim, mean, cov = self._process_parameters_psd(None, mean, cov)

411 # After input validation, some methods then processed the arrays

412 # with a `_PSD` object and used that to perform computation.

413 # To avoid branching statements in each method depending on whether

414 # `cov` is an array or `Covariance` object, we always process the

415 # array with `_PSD`, and then use wrapper that satisfies the

416 # `Covariance` interface, `CovViaPSD`.

417 psd = _PSD(cov, allow_singular=allow_singular)

418 cov_object = _covariance.CovViaPSD(psd)

419 return dim, mean, cov_object

420

421 def _process_parameters_Covariance(self, mean, cov):

422 dim = cov.shape[-1]

423 mean = np.array([0.]) if mean is None else mean

424 message = (f"`cov` represents a covariance matrix in {dim} dimensions,"

425 f"and so `mean` must be broadcastable to shape {(dim,)}")

426 try:

427 mean = np.broadcast_to(mean, dim)

428 except ValueError as e:

429 raise ValueError(message) from e

430 return dim, mean, cov

431

432 def _process_parameters_psd(self, dim, mean, cov):

433 # Try to infer dimensionality

434 if dim is None:

435 if mean is None:

436 if cov is None:

437 dim = 1

438 else:

439 cov = np.asarray(cov, dtype=float)

440 if cov.ndim < 2:

441 dim = 1

442 else:

443 dim = cov.shape[0]

444 else:

445 mean = np.asarray(mean, dtype=float)

446 dim = mean.size

447 else:

448 if not np.isscalar(dim):

449 raise ValueError("Dimension of random variable must be "

450 "a scalar.")

451

452 # Check input sizes and return full arrays for mean and cov if

453 # necessary

454 if mean is None:

455 mean = np.zeros(dim)

456 mean = np.asarray(mean, dtype=float)

457

458 if cov is None:

459 cov = 1.0

460 cov = np.asarray(cov, dtype=float)

461

462 if dim == 1:

463 mean = mean.reshape(1)

464 cov = cov.reshape(1, 1)

465

466 if mean.ndim != 1 or mean.shape[0] != dim:

467 raise ValueError("Array 'mean' must be a vector of length %d." %

468 dim)

469 if cov.ndim == 0:

470 cov = cov * np.eye(dim)

471 elif cov.ndim == 1:

472 cov = np.diag(cov)

473 elif cov.ndim == 2 and cov.shape != (dim, dim):

474 rows, cols = cov.shape

475 if rows != cols:

476 msg = ("Array 'cov' must be square if it is two dimensional,"

477 " but cov.shape = %s." % str(cov.shape))

478 else:

479 msg = ("Dimension mismatch: array 'cov' is of shape %s,"

480 " but 'mean' is a vector of length %d.")

481 msg = msg % (str(cov.shape), len(mean))

482 raise ValueError(msg)

483 elif cov.ndim > 2:

484 raise ValueError("Array 'cov' must be at most two-dimensional,"

485 " but cov.ndim = %d" % cov.ndim)

486

487 return dim, mean, cov

488

489 def _process_quantiles(self, x, dim):

490 """

491 Adjust quantiles array so that last axis labels the components of

492 each data point.

493 """

494 x = np.asarray(x, dtype=float)

495

496 if x.ndim == 0:

497 x = x[np.newaxis]

498 elif x.ndim == 1:

499 if dim == 1:

500 x = x[:, np.newaxis]

501 else:

502 x = x[np.newaxis, :]

503

504 return x

505

506 def _logpdf(self, x, mean, cov_object):

507 """Log of the multivariate normal probability density function.

508

509 Parameters

510 ----------

511 x : ndarray

512 Points at which to evaluate the log of the probability

513 density function

514 mean : ndarray

515 Mean of the distribution

516 cov_object : Covariance

517 An object representing the Covariance matrix

518

519 Notes

520 -----

521 As this function does no argument checking, it should not be

522 called directly; use 'logpdf' instead.

523

524 """

525 log_det_cov, rank = cov_object.log_pdet, cov_object.rank

526 dev = x - mean

527 if dev.ndim > 1:

528 log_det_cov = log_det_cov[..., np.newaxis]

529 rank = rank[..., np.newaxis]

530 maha = np.sum(np.square(cov_object.whiten(dev)), axis=-1)

531 return -0.5 * (rank * _LOG_2PI + log_det_cov + maha)

532

533 def logpdf(self, x, mean=None, cov=1, allow_singular=False):

534 """Log of the multivariate normal probability density function.

535

536 Parameters

537 ----------

538 x : array_like

539 Quantiles, with the last axis of `x` denoting the components.

540 %(_mvn_doc_default_callparams)s

541

542 Returns

543 -------

544 pdf : ndarray or scalar

545 Log of the probability density function evaluated at `x`

546

547 Notes

548 -----

549 %(_mvn_doc_callparams_note)s

550

551 """

552 params = self._process_parameters(mean, cov, allow_singular)

553 dim, mean, cov_object = params

554 x = self._process_quantiles(x, dim)

555 out = self._logpdf(x, mean, cov_object)

556 if np.any(cov_object.rank < dim):

557 out_of_bounds = ~cov_object._support_mask(x-mean)

558 out[out_of_bounds] = -np.inf

559 return _squeeze_output(out)

560

561 def pdf(self, x, mean=None, cov=1, allow_singular=False):

562 """Multivariate normal probability density function.

563

564 Parameters

565 ----------

566 x : array_like

567 Quantiles, with the last axis of `x` denoting the components.

568 %(_mvn_doc_default_callparams)s

569

570 Returns

571 -------

572 pdf : ndarray or scalar

573 Probability density function evaluated at `x`

574

575 Notes

576 -----

577 %(_mvn_doc_callparams_note)s

578

579 """

580 params = self._process_parameters(mean, cov, allow_singular)

581 dim, mean, cov_object = params

582 x = self._process_quantiles(x, dim)

583 out = np.exp(self._logpdf(x, mean, cov_object))

584 if np.any((cov_object.rank < dim)):

585 out_of_bounds = ~cov_object._support_mask(x-mean)

586 out[out_of_bounds] = 0.0

587 return _squeeze_output(out)

588

589 def _cdf(self, x, mean, cov, maxpts, abseps, releps, lower_limit):

590 """Multivariate normal cumulative distribution function.

591

592 Parameters

593 ----------

594 x : ndarray

595 Points at which to evaluate the cumulative distribution function.

596 mean : ndarray

597 Mean of the distribution

598 cov : array_like

599 Covariance matrix of the distribution

600 maxpts : integer

601 The maximum number of points to use for integration

602 abseps : float

603 Absolute error tolerance

604 releps : float

605 Relative error tolerance

606 lower_limit : array_like, optional

607 Lower limit of integration of the cumulative distribution function.

608 Default is negative infinity. Must be broadcastable with `x`.

609

610 Notes

611 -----

612 As this function does no argument checking, it should not be

613 called directly; use 'cdf' instead.

614

615

616 .. versionadded:: 1.0.0

617

618 """

619 lower = (np.full(mean.shape, -np.inf)

620 if lower_limit is None else lower_limit)

621 # In 2d, _mvn.mvnun accepts input in which `lower` bound elements

622 # are greater than `x`. Not so in other dimensions. Fix this by

623 # ensuring that lower bounds are indeed lower when passed, then

624 # set signs of resulting CDF manually.

625 b, a = np.broadcast_arrays(x, lower)

626 i_swap = b < a

627 signs = (-1)**(i_swap.sum(axis=-1)) # odd # of swaps -> negative

628 a, b = a.copy(), b.copy()

629 a[i_swap], b[i_swap] = b[i_swap], a[i_swap]

630 n = x.shape[-1]

631 limits = np.concatenate((a, b), axis=-1)

632

633 # mvnun expects 1-d arguments, so process points sequentially

634 def func1d(limits):

635 return _mvn.mvnun(limits[:n], limits[n:], mean, cov,

636 maxpts, abseps, releps)[0]

637

638 out = np.apply_along_axis(func1d, -1, limits) * signs

639 return _squeeze_output(out)

640

641 def logcdf(self, x, mean=None, cov=1, allow_singular=False, maxpts=None,

642 abseps=1e-5, releps=1e-5, *, lower_limit=None):

643 """Log of the multivariate normal cumulative distribution function.

644

645 Parameters

646 ----------

647 x : array_like

648 Quantiles, with the last axis of `x` denoting the components.

649 %(_mvn_doc_default_callparams)s

650 maxpts : integer, optional

651 The maximum number of points to use for integration

652 (default `1000000*dim`)

653 abseps : float, optional

654 Absolute error tolerance (default 1e-5)

655 releps : float, optional

656 Relative error tolerance (default 1e-5)

657 lower_limit : array_like, optional

658 Lower limit of integration of the cumulative distribution function.

659 Default is negative infinity. Must be broadcastable with `x`.

660

661 Returns

662 -------

663 cdf : ndarray or scalar

664 Log of the cumulative distribution function evaluated at `x`

665

666 Notes

667 -----

668 %(_mvn_doc_callparams_note)s

669

670 .. versionadded:: 1.0.0

671

672 """

673 params = self._process_parameters(mean, cov, allow_singular)

674 dim, mean, cov_object = params

675 cov = cov_object.covariance

676 x = self._process_quantiles(x, dim)

677 if not maxpts:

678 maxpts = 1000000 * dim

679 cdf = self._cdf(x, mean, cov, maxpts, abseps, releps, lower_limit)

680 # the log of a negative real is complex, and cdf can be negative

681 # if lower limit is greater than upper limit

682 cdf = cdf + 0j if np.any(cdf < 0) else cdf

683 out = np.log(cdf)

684 return out

685

686 def cdf(self, x, mean=None, cov=1, allow_singular=False, maxpts=None,

687 abseps=1e-5, releps=1e-5, *, lower_limit=None):

688 """Multivariate normal cumulative distribution function.

689

690 Parameters

691 ----------

692 x : array_like

693 Quantiles, with the last axis of `x` denoting the components.

694 %(_mvn_doc_default_callparams)s

695 maxpts : integer, optional

696 The maximum number of points to use for integration

697 (default `1000000*dim`)

698 abseps : float, optional

699 Absolute error tolerance (default 1e-5)

700 releps : float, optional

701 Relative error tolerance (default 1e-5)

702 lower_limit : array_like, optional

703 Lower limit of integration of the cumulative distribution function.

704 Default is negative infinity. Must be broadcastable with `x`.

705

706 Returns

707 -------

708 cdf : ndarray or scalar

709 Cumulative distribution function evaluated at `x`

710

711 Notes

712 -----

713 %(_mvn_doc_callparams_note)s

714

715 .. versionadded:: 1.0.0

716

717 """

718 params = self._process_parameters(mean, cov, allow_singular)

719 dim, mean, cov_object = params

720 cov = cov_object.covariance

721 x = self._process_quantiles(x, dim)

722 if not maxpts:

723 maxpts = 1000000 * dim

724 out = self._cdf(x, mean, cov, maxpts, abseps, releps, lower_limit)

725 return out

726

727 def rvs(self, mean=None, cov=1, size=1, random_state=None):

728 """Draw random samples from a multivariate normal distribution.

729

730 Parameters

731 ----------

732 %(_mvn_doc_default_callparams)s

733 size : integer, optional

734 Number of samples to draw (default 1).

735 %(_doc_random_state)s

736

737 Returns

738 -------

739 rvs : ndarray or scalar

740 Random variates of size (`size`, `N`), where `N` is the

741 dimension of the random variable.

742

743 Notes

744 -----

745 %(_mvn_doc_callparams_note)s

746

747 """

748 dim, mean, cov_object = self._process_parameters(mean, cov)

749 random_state = self._get_random_state(random_state)

750

751 if isinstance(cov_object, _covariance.CovViaPSD):

752 cov = cov_object.covariance

753 out = random_state.multivariate_normal(mean, cov, size)

754 out = _squeeze_output(out)

755 else:

756 size = size or tuple()

757 if not np.iterable(size):

758 size = (size,)

759 shape = tuple(size) + (cov_object.shape[-1],)

760 x = random_state.normal(size=shape)

761 out = mean + cov_object.colorize(x)

762 return out

763

764 def entropy(self, mean=None, cov=1):

765 """Compute the differential entropy of the multivariate normal.

766

767 Parameters

768 ----------

769 %(_mvn_doc_default_callparams)s

770

771 Returns

772 -------

773 h : scalar

774 Entropy of the multivariate normal distribution

775

776 Notes

777 -----

778 %(_mvn_doc_callparams_note)s

779

780 """

781 dim, mean, cov_object = self._process_parameters(mean, cov)

782 return 0.5 * (cov_object.rank * (_LOG_2PI + 1) + cov_object.log_pdet)

783

784

785multivariate_normal = multivariate_normal_gen()

786

787

788class multivariate_normal_frozen(multi_rv_frozen):

789 def __init__(self, mean=None, cov=1, allow_singular=False, seed=None,

790 maxpts=None, abseps=1e-5, releps=1e-5):

791 """Create a frozen multivariate normal distribution.

792

793 Parameters

794 ----------

795 mean : array_like, default: ``[0]``

796 Mean of the distribution.

797 cov : array_like, default: ``[1]``

798 Symmetric positive (semi)definite covariance matrix of the

799 distribution.

800 allow_singular : bool, default: ``False``

801 Whether to allow a singular covariance matrix.

802 seed : {None, int, `numpy.random.Generator`, `numpy.random.RandomState`}, optional

803 If `seed` is None (or `np.random`), the `numpy.random.RandomState`

804 singleton is used.

805 If `seed` is an int, a new ``RandomState`` instance is used,

806 seeded with `seed`.

807 If `seed` is already a ``Generator`` or ``RandomState`` instance

808 then that instance is used.

809 maxpts : integer, optional

810 The maximum number of points to use for integration of the

811 cumulative distribution function (default `1000000*dim`)

812 abseps : float, optional

813 Absolute error tolerance for the cumulative distribution function

814 (default 1e-5)

815 releps : float, optional

816 Relative error tolerance for the cumulative distribution function

817 (default 1e-5)

818

819 Examples

820 --------

821 When called with the default parameters, this will create a 1D random

822 variable with mean 0 and covariance 1:

823

824 >>> from scipy.stats import multivariate_normal

825 >>> r = multivariate_normal()

826 >>> r.mean

827 array([ 0.])

828 >>> r.cov

829 array([[1.]])

830

831 """

832 self._dist = multivariate_normal_gen(seed)

833 self.dim, self.mean, self.cov_object = (

834 self._dist._process_parameters(mean, cov, allow_singular))

835 self.allow_singular = allow_singular or self.cov_object._allow_singular

836 if not maxpts:

837 maxpts = 1000000 * self.dim

838 self.maxpts = maxpts

839 self.abseps = abseps

840 self.releps = releps

841

842 @property

843 def cov(self):

844 return self.cov_object.covariance

845

846 def logpdf(self, x):

847 x = self._dist._process_quantiles(x, self.dim)

848 out = self._dist._logpdf(x, self.mean, self.cov_object)

849 if np.any(self.cov_object.rank < self.dim):

850 out_of_bounds = ~self.cov_object._support_mask(x-self.mean)

851 out[out_of_bounds] = -np.inf

852 return _squeeze_output(out)

853

854 def pdf(self, x):

855 return np.exp(self.logpdf(x))

856

857 def logcdf(self, x, *, lower_limit=None):

858 cdf = self.cdf(x, lower_limit=lower_limit)

859 # the log of a negative real is complex, and cdf can be negative

860 # if lower limit is greater than upper limit

861 cdf = cdf + 0j if np.any(cdf < 0) else cdf

862 out = np.log(cdf)

863 return out

864

865 def cdf(self, x, *, lower_limit=None):

866 x = self._dist._process_quantiles(x, self.dim)

867 out = self._dist._cdf(x, self.mean, self.cov_object.covariance,

868 self.maxpts, self.abseps, self.releps,

869 lower_limit)

870 return _squeeze_output(out)

871

872 def rvs(self, size=1, random_state=None):

873 return self._dist.rvs(self.mean, self.cov_object, size, random_state)

874

875 def entropy(self):

876 """Computes the differential entropy of the multivariate normal.

877

878 Returns

879 -------

880 h : scalar

881 Entropy of the multivariate normal distribution

882

883 """

884 log_pdet = self.cov_object.log_pdet

885 rank = self.cov_object.rank

886 return 0.5 * (rank * (_LOG_2PI + 1) + log_pdet)

887

888

889# Set frozen generator docstrings from corresponding docstrings in

890# multivariate_normal_gen and fill in default strings in class docstrings

891for name in ['logpdf', 'pdf', 'logcdf', 'cdf', 'rvs']:

892 method = multivariate_normal_gen.__dict__[name]

893 method_frozen = multivariate_normal_frozen.__dict__[name]

894 method_frozen.__doc__ = doccer.docformat(method.__doc__,

895 mvn_docdict_noparams)

896 method.__doc__ = doccer.docformat(method.__doc__, mvn_docdict_params)

897

898_matnorm_doc_default_callparams = """\

899mean : array_like, optional

900 Mean of the distribution (default: `None`)

901rowcov : array_like, optional

902 Among-row covariance matrix of the distribution (default: `1`)

903colcov : array_like, optional

904 Among-column covariance matrix of the distribution (default: `1`)

905"""

906

907_matnorm_doc_callparams_note = """\

908If `mean` is set to `None` then a matrix of zeros is used for the mean.

909The dimensions of this matrix are inferred from the shape of `rowcov` and

910`colcov`, if these are provided, or set to `1` if ambiguous.

911

912`rowcov` and `colcov` can be two-dimensional array_likes specifying the

913covariance matrices directly. Alternatively, a one-dimensional array will

914be be interpreted as the entries of a diagonal matrix, and a scalar or

915zero-dimensional array will be interpreted as this value times the

916identity matrix.

917"""

918

919_matnorm_doc_frozen_callparams = ""

920

921_matnorm_doc_frozen_callparams_note = """\

922See class definition for a detailed description of parameters."""

923

924matnorm_docdict_params = {

925 '_matnorm_doc_default_callparams': _matnorm_doc_default_callparams,

926 '_matnorm_doc_callparams_note': _matnorm_doc_callparams_note,

927 '_doc_random_state': _doc_random_state

928}

929

930matnorm_docdict_noparams = {

931 '_matnorm_doc_default_callparams': _matnorm_doc_frozen_callparams,

932 '_matnorm_doc_callparams_note': _matnorm_doc_frozen_callparams_note,

933 '_doc_random_state': _doc_random_state

934}

935

936

937class matrix_normal_gen(multi_rv_generic):

938 r"""A matrix normal random variable.

939

940 The `mean` keyword specifies the mean. The `rowcov` keyword specifies the

941 among-row covariance matrix. The 'colcov' keyword specifies the

942 among-column covariance matrix.

943

944 Methods

945 -------

946 pdf(X, mean=None, rowcov=1, colcov=1)

947 Probability density function.

948 logpdf(X, mean=None, rowcov=1, colcov=1)

949 Log of the probability density function.

950 rvs(mean=None, rowcov=1, colcov=1, size=1, random_state=None)

951 Draw random samples.

952

953 Parameters

954 ----------

955 %(_matnorm_doc_default_callparams)s

956 %(_doc_random_state)s

957

958 Notes

959 -----

960 %(_matnorm_doc_callparams_note)s

961

962 The covariance matrices specified by `rowcov` and `colcov` must be

963 (symmetric) positive definite. If the samples in `X` are

964 :math:`m \times n`, then `rowcov` must be :math:`m \times m` and

965 `colcov` must be :math:`n \times n`. `mean` must be the same shape as `X`.

966

967 The probability density function for `matrix_normal` is

968

969 .. math::

970

971 f(X) = (2 \pi)^{-\frac{mn}{2}}|U|^{-\frac{n}{2}} |V|^{-\frac{m}{2}}

972 \exp\left( -\frac{1}{2} \mathrm{Tr}\left[ U^{-1} (X-M) V^{-1}

973 (X-M)^T \right] \right),

974

975 where :math:`M` is the mean, :math:`U` the among-row covariance matrix,

976 :math:`V` the among-column covariance matrix.

977

978 The `allow_singular` behaviour of the `multivariate_normal`

979 distribution is not currently supported. Covariance matrices must be

980 full rank.

981

982 The `matrix_normal` distribution is closely related to the

983 `multivariate_normal` distribution. Specifically, :math:`\mathrm{Vec}(X)`

984 (the vector formed by concatenating the columns of :math:`X`) has a

985 multivariate normal distribution with mean :math:`\mathrm{Vec}(M)`

986 and covariance :math:`V \otimes U` (where :math:`\otimes` is the Kronecker

987 product). Sampling and pdf evaluation are

988 :math:`\mathcal{O}(m^3 + n^3 + m^2 n + m n^2)` for the matrix normal, but

989 :math:`\mathcal{O}(m^3 n^3)` for the equivalent multivariate normal,

990 making this equivalent form algorithmically inefficient.

991

992 .. versionadded:: 0.17.0

993

994 Examples

995 --------

996

997 >>> import numpy as np

998 >>> from scipy.stats import matrix_normal

999

1000 >>> M = np.arange(6).reshape(3,2); M

1001 array([[0, 1],

1002 [2, 3],

1003 [4, 5]])

1004 >>> U = np.diag([1,2,3]); U

1005 array([[1, 0, 0],

1006 [0, 2, 0],

1007 [0, 0, 3]])

1008 >>> V = 0.3*np.identity(2); V

1009 array([[ 0.3, 0. ],

1010 [ 0. , 0.3]])

1011 >>> X = M + 0.1; X

1012 array([[ 0.1, 1.1],

1013 [ 2.1, 3.1],

1014 [ 4.1, 5.1]])

1015 >>> matrix_normal.pdf(X, mean=M, rowcov=U, colcov=V)

1016 0.023410202050005054

1017

1018 >>> # Equivalent multivariate normal

1019 >>> from scipy.stats import multivariate_normal

1020 >>> vectorised_X = X.T.flatten()

1021 >>> equiv_mean = M.T.flatten()

1022 >>> equiv_cov = np.kron(V,U)

1023 >>> multivariate_normal.pdf(vectorised_X, mean=equiv_mean, cov=equiv_cov)

1024 0.023410202050005054

1025

1026 Alternatively, the object may be called (as a function) to fix the mean

1027 and covariance parameters, returning a "frozen" matrix normal

1028 random variable:

1029

1030 >>> rv = matrix_normal(mean=None, rowcov=1, colcov=1)

1031 >>> # Frozen object with the same methods but holding the given

1032 >>> # mean and covariance fixed.

1033

1034 """

1035

1036 def __init__(self, seed=None):

1037 super().__init__(seed)

1038 self.__doc__ = doccer.docformat(self.__doc__, matnorm_docdict_params)

1039

1040 def __call__(self, mean=None, rowcov=1, colcov=1, seed=None):

1041 """Create a frozen matrix normal distribution.

1042

1043 See `matrix_normal_frozen` for more information.

1044

1045 """

1046 return matrix_normal_frozen(mean, rowcov, colcov, seed=seed)

1047

1048 def _process_parameters(self, mean, rowcov, colcov):

1049 """

1050 Infer dimensionality from mean or covariance matrices. Handle

1051 defaults. Ensure compatible dimensions.

1052 """

1053

1054 # Process mean

1055 if mean is not None:

1056 mean = np.asarray(mean, dtype=float)

1057 meanshape = mean.shape

1058 if len(meanshape) != 2:

1059 raise ValueError("Array `mean` must be two dimensional.")

1060 if np.any(meanshape == 0):

1061 raise ValueError("Array `mean` has invalid shape.")

1062

1063 # Process among-row covariance

1064 rowcov = np.asarray(rowcov, dtype=float)

1065 if rowcov.ndim == 0:

1066 if mean is not None:

1067 rowcov = rowcov * np.identity(meanshape[0])

1068 else:

1069 rowcov = rowcov * np.identity(1)

1070 elif rowcov.ndim == 1:

1071 rowcov = np.diag(rowcov)

1072 rowshape = rowcov.shape

1073 if len(rowshape) != 2:

1074 raise ValueError("`rowcov` must be a scalar or a 2D array.")

1075 if rowshape[0] != rowshape[1]:

1076 raise ValueError("Array `rowcov` must be square.")

1077 if rowshape[0] == 0:

1078 raise ValueError("Array `rowcov` has invalid shape.")

1079 numrows = rowshape[0]

1080

1081 # Process among-column covariance

1082 colcov = np.asarray(colcov, dtype=float)

1083 if colcov.ndim == 0:

1084 if mean is not None:

1085 colcov = colcov * np.identity(meanshape[1])

1086 else:

1087 colcov = colcov * np.identity(1)

1088 elif colcov.ndim == 1:

1089 colcov = np.diag(colcov)

1090 colshape = colcov.shape

1091 if len(colshape) != 2:

1092 raise ValueError("`colcov` must be a scalar or a 2D array.")

1093 if colshape[0] != colshape[1]:

1094 raise ValueError("Array `colcov` must be square.")

1095 if colshape[0] == 0:

1096 raise ValueError("Array `colcov` has invalid shape.")

1097 numcols = colshape[0]

1098

1099 # Ensure mean and covariances compatible

1100 if mean is not None:

1101 if meanshape[0] != numrows:

1102 raise ValueError("Arrays `mean` and `rowcov` must have the "

1103 "same number of rows.")

1104 if meanshape[1] != numcols:

1105 raise ValueError("Arrays `mean` and `colcov` must have the "

1106 "same number of columns.")

1107 else:

1108 mean = np.zeros((numrows, numcols))

1109

1110 dims = (numrows, numcols)

1111

1112 return dims, mean, rowcov, colcov

1113

1114 def _process_quantiles(self, X, dims):

1115 """

1116 Adjust quantiles array so that last two axes labels the components of

1117 each data point.

1118 """

1119 X = np.asarray(X, dtype=float)

1120 if X.ndim == 2:

1121 X = X[np.newaxis, :]

1122 if X.shape[-2:] != dims:

1123 raise ValueError("The shape of array `X` is not compatible "

1124 "with the distribution parameters.")

1125 return X

1126

1127 def _logpdf(self, dims, X, mean, row_prec_rt, log_det_rowcov,

1128 col_prec_rt, log_det_colcov):

1129 """Log of the matrix normal probability density function.

1130

1131 Parameters

1132 ----------

1133 dims : tuple

1134 Dimensions of the matrix variates

1135 X : ndarray

1136 Points at which to evaluate the log of the probability

1137 density function

1138 mean : ndarray

1139 Mean of the distribution

1140 row_prec_rt : ndarray

1141 A decomposition such that np.dot(row_prec_rt, row_prec_rt.T)

1142 is the inverse of the among-row covariance matrix

1143 log_det_rowcov : float

1144 Logarithm of the determinant of the among-row covariance matrix

1145 col_prec_rt : ndarray

1146 A decomposition such that np.dot(col_prec_rt, col_prec_rt.T)

1147 is the inverse of the among-column covariance matrix

1148 log_det_colcov : float

1149 Logarithm of the determinant of the among-column covariance matrix

1150

1151 Notes

1152 -----

1153 As this function does no argument checking, it should not be

1154 called directly; use 'logpdf' instead.

1155

1156 """

1157 numrows, numcols = dims

1158 roll_dev = np.moveaxis(X-mean, -1, 0)

1159 scale_dev = np.tensordot(col_prec_rt.T,

1160 np.dot(roll_dev, row_prec_rt), 1)

1161 maha = np.sum(np.sum(np.square(scale_dev), axis=-1), axis=0)

1162 return -0.5 * (numrows*numcols*_LOG_2PI + numcols*log_det_rowcov

1163 + numrows*log_det_colcov + maha)

1164

1165 def logpdf(self, X, mean=None, rowcov=1, colcov=1):

1166 """Log of the matrix normal probability density function.

1167

1168 Parameters

1169 ----------

1170 X : array_like

1171 Quantiles, with the last two axes of `X` denoting the components.

1172 %(_matnorm_doc_default_callparams)s

1173

1174 Returns

1175 -------

1176 logpdf : ndarray

1177 Log of the probability density function evaluated at `X`

1178

1179 Notes

1180 -----

1181 %(_matnorm_doc_callparams_note)s

1182

1183 """

1184 dims, mean, rowcov, colcov = self._process_parameters(mean, rowcov,

1185 colcov)

1186 X = self._process_quantiles(X, dims)

1187 rowpsd = _PSD(rowcov, allow_singular=False)

1188 colpsd = _PSD(colcov, allow_singular=False)

1189 out = self._logpdf(dims, X, mean, rowpsd.U, rowpsd.log_pdet, colpsd.U,

1190 colpsd.log_pdet)

1191 return _squeeze_output(out)

1192

1193 def pdf(self, X, mean=None, rowcov=1, colcov=1):

1194 """Matrix normal probability density function.

1195

1196 Parameters

1197 ----------

1198 X : array_like

1199 Quantiles, with the last two axes of `X` denoting the components.

1200 %(_matnorm_doc_default_callparams)s

1201

1202 Returns

1203 -------

1204 pdf : ndarray

1205 Probability density function evaluated at `X`

1206

1207 Notes

1208 -----

1209 %(_matnorm_doc_callparams_note)s

1210

1211 """

1212 return np.exp(self.logpdf(X, mean, rowcov, colcov))

1213

1214 def rvs(self, mean=None, rowcov=1, colcov=1, size=1, random_state=None):

1215 """Draw random samples from a matrix normal distribution.

1216

1217 Parameters

1218 ----------

1219 %(_matnorm_doc_default_callparams)s

1220 size : integer, optional

1221 Number of samples to draw (default 1).

1222 %(_doc_random_state)s

1223

1224 Returns

1225 -------

1226 rvs : ndarray or scalar

1227 Random variates of size (`size`, `dims`), where `dims` is the

1228 dimension of the random matrices.

1229

1230 Notes

1231 -----

1232 %(_matnorm_doc_callparams_note)s

1233

1234 """

1235 size = int(size)

1236 dims, mean, rowcov, colcov = self._process_parameters(mean, rowcov,

1237 colcov)

1238 rowchol = scipy.linalg.cholesky(rowcov, lower=True)

1239 colchol = scipy.linalg.cholesky(colcov, lower=True)

1240 random_state = self._get_random_state(random_state)

1241 # We aren't generating standard normal variates with size=(size,

1242 # dims[0], dims[1]) directly to ensure random variates remain backwards

1243 # compatible. See https://github.com/scipy/scipy/pull/12312 for more

1244 # details.

1245 std_norm = random_state.standard_normal(

1246 size=(dims[1], size, dims[0])

1247 ).transpose(1, 2, 0)

1248 out = mean + np.einsum('jp,ipq,kq->ijk',

1249 rowchol, std_norm, colchol,

1250 optimize=True)

1251 if size == 1:

1252 out = out.reshape(mean.shape)

1253 return out

1254

1255

1256matrix_normal = matrix_normal_gen()

1257

1258

1259class matrix_normal_frozen(multi_rv_frozen):

1260 """

1261 Create a frozen matrix normal distribution.

1262

1263 Parameters

1264 ----------

1265 %(_matnorm_doc_default_callparams)s

1266 seed : {None, int, `numpy.random.Generator`, `numpy.random.RandomState`}, optional

1267 If `seed` is `None` the `~np.random.RandomState` singleton is used.

1268 If `seed` is an int, a new ``RandomState`` instance is used, seeded

1269 with seed.

1270 If `seed` is already a ``RandomState`` or ``Generator`` instance,

1271 then that object is used.

1272 Default is `None`.

1273

1274 Examples

1275 --------

1276 >>> import numpy as np

1277 >>> from scipy.stats import matrix_normal

1278

1279 >>> distn = matrix_normal(mean=np.zeros((3,3)))

1280 >>> X = distn.rvs(); X

1281 array([[-0.02976962, 0.93339138, -0.09663178],

1282 [ 0.67405524, 0.28250467, -0.93308929],

1283 [-0.31144782, 0.74535536, 1.30412916]])

1284 >>> distn.pdf(X)

1285 2.5160642368346784e-05

1286 >>> distn.logpdf(X)

1287 -10.590229595124615

1288 """

1289

1290 def __init__(self, mean=None, rowcov=1, colcov=1, seed=None):

1291 self._dist = matrix_normal_gen(seed)

1292 self.dims, self.mean, self.rowcov, self.colcov = \

1293 self._dist._process_parameters(mean, rowcov, colcov)

1294 self.rowpsd = _PSD(self.rowcov, allow_singular=False)

1295 self.colpsd = _PSD(self.colcov, allow_singular=False)

1296

1297 def logpdf(self, X):

1298 X = self._dist._process_quantiles(X, self.dims)

1299 out = self._dist._logpdf(self.dims, X, self.mean, self.rowpsd.U,

1300 self.rowpsd.log_pdet, self.colpsd.U,

1301 self.colpsd.log_pdet)

1302 return _squeeze_output(out)

1303

1304 def pdf(self, X):

1305 return np.exp(self.logpdf(X))

1306

1307 def rvs(self, size=1, random_state=None):

1308 return self._dist.rvs(self.mean, self.rowcov, self.colcov, size,

1309 random_state)

1310

1311

1312# Set frozen generator docstrings from corresponding docstrings in

1313# matrix_normal_gen and fill in default strings in class docstrings

1314for name in ['logpdf', 'pdf', 'rvs']:

1315 method = matrix_normal_gen.__dict__[name]

1316 method_frozen = matrix_normal_frozen.__dict__[name]

1317 method_frozen.__doc__ = doccer.docformat(method.__doc__,

1318 matnorm_docdict_noparams)

1319 method.__doc__ = doccer.docformat(method.__doc__, matnorm_docdict_params)

1320

1321_dirichlet_doc_default_callparams = """\

1322alpha : array_like

1323 The concentration parameters. The number of entries determines the

1324 dimensionality of the distribution.

1325"""

1326_dirichlet_doc_frozen_callparams = ""

1327

1328_dirichlet_doc_frozen_callparams_note = """\

1329See class definition for a detailed description of parameters."""

1330

1331dirichlet_docdict_params = {

1332 '_dirichlet_doc_default_callparams': _dirichlet_doc_default_callparams,

1333 '_doc_random_state': _doc_random_state

1334}

1335

1336dirichlet_docdict_noparams = {

1337 '_dirichlet_doc_default_callparams': _dirichlet_doc_frozen_callparams,

1338 '_doc_random_state': _doc_random_state

1339}

1340

1341

1342def _dirichlet_check_parameters(alpha):

1343 alpha = np.asarray(alpha)

1344 if np.min(alpha) <= 0:

1345 raise ValueError("All parameters must be greater than 0")

1346 elif alpha.ndim != 1:

1347 raise ValueError("Parameter vector 'a' must be one dimensional, "

1348 "but a.shape = %s." % (alpha.shape, ))

1349 return alpha

1350

1351

1352def _dirichlet_check_input(alpha, x):

1353 x = np.asarray(x)

1354

1355 if x.shape[0] + 1 != alpha.shape[0] and x.shape[0] != alpha.shape[0]:

1356 raise ValueError("Vector 'x' must have either the same number "

1357 "of entries as, or one entry fewer than, "

1358 "parameter vector 'a', but alpha.shape = %s "

1359 "and x.shape = %s." % (alpha.shape, x.shape))

1360

1361 if x.shape[0] != alpha.shape[0]:

1362 xk = np.array([1 - np.sum(x, 0)])

1363 if xk.ndim == 1:

1364 x = np.append(x, xk)

1365 elif xk.ndim == 2:

1366 x = np.vstack((x, xk))

1367 else:

1368 raise ValueError("The input must be one dimensional or a two "

1369 "dimensional matrix containing the entries.")

1370

1371 if np.min(x) < 0:

1372 raise ValueError("Each entry in 'x' must be greater than or equal "

1373 "to zero.")

1374

1375 if np.max(x) > 1:

1376 raise ValueError("Each entry in 'x' must be smaller or equal one.")

1377

1378 # Check x_i > 0 or alpha_i > 1

1379 xeq0 = (x == 0)

1380 alphalt1 = (alpha < 1)

1381 if x.shape != alpha.shape:

1382 alphalt1 = np.repeat(alphalt1, x.shape[-1], axis=-1).reshape(x.shape)

1383 chk = np.logical_and(xeq0, alphalt1)

1384

1385 if np.sum(chk):

1386 raise ValueError("Each entry in 'x' must be greater than zero if its "

1387 "alpha is less than one.")

1388

1389 if (np.abs(np.sum(x, 0) - 1.0) > 10e-10).any():

1390 raise ValueError("The input vector 'x' must lie within the normal "

1391 "simplex. but np.sum(x, 0) = %s." % np.sum(x, 0))

1392

1393 return x

1394

1395

1396def _lnB(alpha):

1397 r"""Internal helper function to compute the log of the useful quotient.

1398

1399 .. math::

1400

1401 B(\alpha) = \frac{\prod_{i=1}{K}\Gamma(\alpha_i)}

1402 {\Gamma\left(\sum_{i=1}^{K} \alpha_i \right)}

1403

1404 Parameters

1405 ----------

1406 %(_dirichlet_doc_default_callparams)s

1407

1408 Returns

1409 -------

1410 B : scalar

1411 Helper quotient, internal use only

1412

1413 """

1414 return np.sum(gammaln(alpha)) - gammaln(np.sum(alpha))

1415

1416

1417class dirichlet_gen(multi_rv_generic):

1418 r"""A Dirichlet random variable.

1419

1420 The ``alpha`` keyword specifies the concentration parameters of the

1421 distribution.

1422

1423 .. versionadded:: 0.15.0

1424

1425 Methods

1426 -------

1427 pdf(x, alpha)

1428 Probability density function.

1429 logpdf(x, alpha)

1430 Log of the probability density function.

1431 rvs(alpha, size=1, random_state=None)

1432 Draw random samples from a Dirichlet distribution.

1433 mean(alpha)

1434 The mean of the Dirichlet distribution

1435 var(alpha)

1436 The variance of the Dirichlet distribution

1437 entropy(alpha)

1438 Compute the differential entropy of the Dirichlet distribution.

1439

1440 Parameters

1441 ----------

1442 %(_dirichlet_doc_default_callparams)s

1443 %(_doc_random_state)s

1444

1445 Notes

1446 -----

1447 Each :math:`\alpha` entry must be positive. The distribution has only

1448 support on the simplex defined by

1449

1450 .. math::

1451 \sum_{i=1}^{K} x_i = 1

1452

1453 where :math:`0 < x_i < 1`.

1454

1455 If the quantiles don't lie within the simplex, a ValueError is raised.

1456

1457 The probability density function for `dirichlet` is

1458

1459 .. math::

1460

1461 f(x) = \frac{1}{\mathrm{B}(\boldsymbol\alpha)} \prod_{i=1}^K x_i^{\alpha_i - 1}

1462

1463 where

1464

1465 .. math::

1466

1467 \mathrm{B}(\boldsymbol\alpha) = \frac{\prod_{i=1}^K \Gamma(\alpha_i)}

1468 {\Gamma\bigl(\sum_{i=1}^K \alpha_i\bigr)}

1469

1470 and :math:`\boldsymbol\alpha=(\alpha_1,\ldots,\alpha_K)`, the

1471 concentration parameters and :math:`K` is the dimension of the space

1472 where :math:`x` takes values.

1473

1474 Note that the `dirichlet` interface is somewhat inconsistent.

1475 The array returned by the rvs function is transposed

1476 with respect to the format expected by the pdf and logpdf.

1477

1478 Examples

1479 --------

1480 >>> import numpy as np

1481 >>> from scipy.stats import dirichlet

1482

1483 Generate a dirichlet random variable

1484

1485 >>> quantiles = np.array([0.2, 0.2, 0.6]) # specify quantiles

1486 >>> alpha = np.array([0.4, 5, 15]) # specify concentration parameters

1487 >>> dirichlet.pdf(quantiles, alpha)

1488 0.2843831684937255

1489

1490 The same PDF but following a log scale

1491

1492 >>> dirichlet.logpdf(quantiles, alpha)

1493 -1.2574327653159187

1494

1495 Once we specify the dirichlet distribution

1496 we can then calculate quantities of interest

1497

1498 >>> dirichlet.mean(alpha) # get the mean of the distribution

1499 array([0.01960784, 0.24509804, 0.73529412])

1500 >>> dirichlet.var(alpha) # get variance

1501 array([0.00089829, 0.00864603, 0.00909517])

1502 >>> dirichlet.entropy(alpha) # calculate the differential entropy

1503 -4.3280162474082715

1504

1505 We can also return random samples from the distribution

1506

1507 >>> dirichlet.rvs(alpha, size=1, random_state=1)

1508 array([[0.00766178, 0.24670518, 0.74563305]])

1509 >>> dirichlet.rvs(alpha, size=2, random_state=2)

1510 array([[0.01639427, 0.1292273 , 0.85437844],

1511 [0.00156917, 0.19033695, 0.80809388]])

1512

1513 Alternatively, the object may be called (as a function) to fix

1514 concentration parameters, returning a "frozen" Dirichlet

1515 random variable:

1516

1517 >>> rv = dirichlet(alpha)

1518 >>> # Frozen object with the same methods but holding the given

1519 >>> # concentration parameters fixed.

1520

1521 """

1522

1523 def __init__(self, seed=None):

1524 super().__init__(seed)

1525 self.__doc__ = doccer.docformat(self.__doc__, dirichlet_docdict_params)

1526

1527 def __call__(self, alpha, seed=None):

1528 return dirichlet_frozen(alpha, seed=seed)

1529

1530 def _logpdf(self, x, alpha):

1531 """Log of the Dirichlet probability density function.

1532

1533 Parameters

1534 ----------

1535 x : ndarray

1536 Points at which to evaluate the log of the probability

1537 density function

1538 %(_dirichlet_doc_default_callparams)s

1539

1540 Notes

1541 -----

1542 As this function does no argument checking, it should not be

1543 called directly; use 'logpdf' instead.

1544

1545 """

1546 lnB = _lnB(alpha)

1547 return - lnB + np.sum((xlogy(alpha - 1, x.T)).T, 0)

1548

1549 def logpdf(self, x, alpha):

1550 """Log of the Dirichlet probability density function.

1551

1552 Parameters

1553 ----------

1554 x : array_like

1555 Quantiles, with the last axis of `x` denoting the components.

1556 %(_dirichlet_doc_default_callparams)s

1557

1558 Returns

1559 -------

1560 pdf : ndarray or scalar

1561 Log of the probability density function evaluated at `x`.

1562

1563 """

1564 alpha = _dirichlet_check_parameters(alpha)

1565 x = _dirichlet_check_input(alpha, x)

1566

1567 out = self._logpdf(x, alpha)

1568 return _squeeze_output(out)

1569

1570 def pdf(self, x, alpha):

1571 """The Dirichlet probability density function.

1572

1573 Parameters

1574 ----------

1575 x : array_like

1576 Quantiles, with the last axis of `x` denoting the components.

1577 %(_dirichlet_doc_default_callparams)s

1578

1579 Returns

1580 -------

1581 pdf : ndarray or scalar

1582 The probability density function evaluated at `x`.

1583

1584 """

1585 alpha = _dirichlet_check_parameters(alpha)

1586 x = _dirichlet_check_input(alpha, x)

1587

1588 out = np.exp(self._logpdf(x, alpha))

1589 return _squeeze_output(out)

1590

1591 def mean(self, alpha):

1592 """Mean of the Dirichlet distribution.

1593

1594 Parameters

1595 ----------

1596 %(_dirichlet_doc_default_callparams)s

1597

1598 Returns

1599 -------

1600 mu : ndarray or scalar

1601 Mean of the Dirichlet distribution.

1602

1603 """

1604 alpha = _dirichlet_check_parameters(alpha)

1605

1606 out = alpha / (np.sum(alpha))

1607 return _squeeze_output(out)

1608

1609 def var(self, alpha):

1610 """Variance of the Dirichlet distribution.

1611

1612 Parameters

1613 ----------

1614 %(_dirichlet_doc_default_callparams)s

1615

1616 Returns

1617 -------

1618 v : ndarray or scalar

1619 Variance of the Dirichlet distribution.

1620

1621 """

1622

1623 alpha = _dirichlet_check_parameters(alpha)

1624

1625 alpha0 = np.sum(alpha)

1626 out = (alpha * (alpha0 - alpha)) / ((alpha0 * alpha0) * (alpha0 + 1))

1627 return _squeeze_output(out)

1628

1629 def entropy(self, alpha):

1630 """

1631 Differential entropy of the Dirichlet distribution.

1632

1633 Parameters

1634 ----------

1635 %(_dirichlet_doc_default_callparams)s

1636

1637 Returns

1638 -------

1639 h : scalar

1640 Entropy of the Dirichlet distribution

1641

1642 """

1643

1644 alpha = _dirichlet_check_parameters(alpha)

1645

1646 alpha0 = np.sum(alpha)

1647 lnB = _lnB(alpha)

1648 K = alpha.shape[0]

1649

1650 out = lnB + (alpha0 - K) * scipy.special.psi(alpha0) - np.sum(

1651 (alpha - 1) * scipy.special.psi(alpha))

1652 return _squeeze_output(out)

1653

1654 def rvs(self, alpha, size=1, random_state=None):

1655 """

1656 Draw random samples from a Dirichlet distribution.

1657

1658 Parameters

1659 ----------

1660 %(_dirichlet_doc_default_callparams)s

1661 size : int, optional

1662 Number of samples to draw (default 1).

1663 %(_doc_random_state)s

1664

1665 Returns

1666 -------

1667 rvs : ndarray or scalar

1668 Random variates of size (`size`, `N`), where `N` is the

1669 dimension of the random variable.

1670

1671 """

1672 alpha = _dirichlet_check_parameters(alpha)

1673 random_state = self._get_random_state(random_state)

1674 return random_state.dirichlet(alpha, size=size)

1675

1676

1677dirichlet = dirichlet_gen()

1678

1679

1680class dirichlet_frozen(multi_rv_frozen):

1681 def __init__(self, alpha, seed=None):

1682 self.alpha = _dirichlet_check_parameters(alpha)

1683 self._dist = dirichlet_gen(seed)

1684

1685 def logpdf(self, x):

1686 return self._dist.logpdf(x, self.alpha)

1687

1688 def pdf(self, x):

1689 return self._dist.pdf(x, self.alpha)

1690

1691 def mean(self):

1692 return self._dist.mean(self.alpha)

1693

1694 def var(self):

1695 return self._dist.var(self.alpha)

1696

1697 def entropy(self):

1698 return self._dist.entropy(self.alpha)

1699

1700 def rvs(self, size=1, random_state=None):

1701 return self._dist.rvs(self.alpha, size, random_state)

1702

1703

1704# Set frozen generator docstrings from corresponding docstrings in

1705# multivariate_normal_gen and fill in default strings in class docstrings

1706for name in ['logpdf', 'pdf', 'rvs', 'mean', 'var', 'entropy']:

1707 method = dirichlet_gen.__dict__[name]

1708 method_frozen = dirichlet_frozen.__dict__[name]

1709 method_frozen.__doc__ = doccer.docformat(

1710 method.__doc__, dirichlet_docdict_noparams)

1711 method.__doc__ = doccer.docformat(method.__doc__, dirichlet_docdict_params)

1712

1713

1714_wishart_doc_default_callparams = """\

1715df : int

1716 Degrees of freedom, must be greater than or equal to dimension of the

1717 scale matrix

1718scale : array_like

1719 Symmetric positive definite scale matrix of the distribution

1720"""

1721

1722_wishart_doc_callparams_note = ""

1723

1724_wishart_doc_frozen_callparams = ""

1725

1726_wishart_doc_frozen_callparams_note = """\

1727See class definition for a detailed description of parameters."""

1728

1729wishart_docdict_params = {

1730 '_doc_default_callparams': _wishart_doc_default_callparams,

1731 '_doc_callparams_note': _wishart_doc_callparams_note,

1732 '_doc_random_state': _doc_random_state

1733}

1734

1735wishart_docdict_noparams = {

1736 '_doc_default_callparams': _wishart_doc_frozen_callparams,

1737 '_doc_callparams_note': _wishart_doc_frozen_callparams_note,

1738 '_doc_random_state': _doc_random_state

1739}

1740

1741

1742class wishart_gen(multi_rv_generic):

1743 r"""A Wishart random variable.

1744

1745 The `df` keyword specifies the degrees of freedom. The `scale` keyword

1746 specifies the scale matrix, which must be symmetric and positive definite.

1747 In this context, the scale matrix is often interpreted in terms of a

1748 multivariate normal precision matrix (the inverse of the covariance

1749 matrix). These arguments must satisfy the relationship

1750 ``df > scale.ndim - 1``, but see notes on using the `rvs` method with

1751 ``df < scale.ndim``.

1752

1753 Methods

1754 -------

1755 pdf(x, df, scale)

1756 Probability density function.

1757 logpdf(x, df, scale)

1758 Log of the probability density function.

1759 rvs(df, scale, size=1, random_state=None)

1760 Draw random samples from a Wishart distribution.

1761 entropy()

1762 Compute the differential entropy of the Wishart distribution.

1763

1764 Parameters

1765 ----------

1766 %(_doc_default_callparams)s

1767 %(_doc_random_state)s

1768

1769 Raises

1770 ------

1771 scipy.linalg.LinAlgError

1772 If the scale matrix `scale` is not positive definite.

1773

1774 See Also

1775 --------

1776 invwishart, chi2

1777

1778 Notes

1779 -----

1780 %(_doc_callparams_note)s

1781

1782 The scale matrix `scale` must be a symmetric positive definite

1783 matrix. Singular matrices, including the symmetric positive semi-definite

1784 case, are not supported. Symmetry is not checked; only the lower triangular

1785 portion is used.

1786

1787 The Wishart distribution is often denoted

1788

1789 .. math::

1790

1791 W_p(\nu, \Sigma)

1792

1793 where :math:`\nu` is the degrees of freedom and :math:`\Sigma` is the

1794 :math:`p \times p` scale matrix.

1795

1796 The probability density function for `wishart` has support over positive

1797 definite matrices :math:`S`; if :math:`S \sim W_p(\nu, \Sigma)`, then

1798 its PDF is given by:

1799

1800 .. math::

1801

1802 f(S) = \frac{|S|^{\frac{\nu - p - 1}{2}}}{2^{ \frac{\nu p}{2} }

1803 |\Sigma|^\frac{\nu}{2} \Gamma_p \left ( \frac{\nu}{2} \right )}

1804 \exp\left( -tr(\Sigma^{-1} S) / 2 \right)

1805

1806 If :math:`S \sim W_p(\nu, \Sigma)` (Wishart) then

1807 :math:`S^{-1} \sim W_p^{-1}(\nu, \Sigma^{-1})` (inverse Wishart).

1808

1809 If the scale matrix is 1-dimensional and equal to one, then the Wishart

1810 distribution :math:`W_1(\nu, 1)` collapses to the :math:`\chi^2(\nu)`

1811 distribution.

1812

1813 The algorithm [2]_ implemented by the `rvs` method may

1814 produce numerically singular matrices with :math:`p - 1 < \nu < p`; the

1815 user may wish to check for this condition and generate replacement samples

1816 as necessary.

1817

1818

1819 .. versionadded:: 0.16.0

1820

1821 References

1822 ----------

1823 .. [1] M.L. Eaton, "Multivariate Statistics: A Vector Space Approach",

1824 Wiley, 1983.

1825 .. [2] W.B. Smith and R.R. Hocking, "Algorithm AS 53: Wishart Variate

1826 Generator", Applied Statistics, vol. 21, pp. 341-345, 1972.

1827

1828 Examples

1829 --------

1830 >>> import numpy as np

1831 >>> import matplotlib.pyplot as plt

1832 >>> from scipy.stats import wishart, chi2

1833 >>> x = np.linspace(1e-5, 8, 100)

1834 >>> w = wishart.pdf(x, df=3, scale=1); w[:5]

1835 array([ 0.00126156, 0.10892176, 0.14793434, 0.17400548, 0.1929669 ])

1836 >>> c = chi2.pdf(x, 3); c[:5]

1837 array([ 0.00126156, 0.10892176, 0.14793434, 0.17400548, 0.1929669 ])

1838 >>> plt.plot(x, w)

1839 >>> plt.show()

1840

1841 The input quantiles can be any shape of array, as long as the last

1842 axis labels the components.

1843

1844 Alternatively, the object may be called (as a function) to fix the degrees

1845 of freedom and scale parameters, returning a "frozen" Wishart random

1846 variable:

1847

1848 >>> rv = wishart(df=1, scale=1)

1849 >>> # Frozen object with the same methods but holding the given

1850 >>> # degrees of freedom and scale fixed.

1851

1852 """

1853

1854 def __init__(self, seed=None):

1855 super().__init__(seed)

1856 self.__doc__ = doccer.docformat(self.__doc__, wishart_docdict_params)

1857

1858 def __call__(self, df=None, scale=None, seed=None):

1859 """Create a frozen Wishart distribution.

1860

1861 See `wishart_frozen` for more information.

1862 """

1863 return wishart_frozen(df, scale, seed)

1864

1865 def _process_parameters(self, df, scale):

1866 if scale is None:

1867 scale = 1.0

1868 scale = np.asarray(scale, dtype=float)

1869

1870 if scale.ndim == 0:

1871 scale = scale[np.newaxis, np.newaxis]

1872 elif scale.ndim == 1:

1873 scale = np.diag(scale)

1874 elif scale.ndim == 2 and not scale.shape[0] == scale.shape[1]:

1875 raise ValueError("Array 'scale' must be square if it is two"

1876 " dimensional, but scale.scale = %s."

1877 % str(scale.shape))

1878 elif scale.ndim > 2:

1879 raise ValueError("Array 'scale' must be at most two-dimensional,"

1880 " but scale.ndim = %d" % scale.ndim)

1881

1882 dim = scale.shape[0]

1883

1884 if df is None:

1885 df = dim

1886 elif not np.isscalar(df):

1887 raise ValueError("Degrees of freedom must be a scalar.")

1888 elif df <= dim - 1:

1889 raise ValueError("Degrees of freedom must be greater than the "

1890 "dimension of scale matrix minus 1.")

1891

1892 return dim, df, scale

1893

1894 def _process_quantiles(self, x, dim):

1895 """

1896 Adjust quantiles array so that last axis labels the components of

1897 each data point.

1898 """

1899 x = np.asarray(x, dtype=float)

1900

1901 if x.ndim == 0:

1902 x = x * np.eye(dim)[:, :, np.newaxis]

1903 if x.ndim == 1:

1904 if dim == 1:

1905 x = x[np.newaxis, np.newaxis, :]

1906 else:

1907 x = np.diag(x)[:, :, np.newaxis]

1908 elif x.ndim == 2:

1909 if not x.shape[0] == x.shape[1]:

1910 raise ValueError("Quantiles must be square if they are two"

1911 " dimensional, but x.shape = %s."

1912 % str(x.shape))

1913 x = x[:, :, np.newaxis]

1914 elif x.ndim == 3:

1915 if not x.shape[0] == x.shape[1]:

1916 raise ValueError("Quantiles must be square in the first two"

1917 " dimensions if they are three dimensional"

1918 ", but x.shape = %s." % str(x.shape))

1919 elif x.ndim > 3:

1920 raise ValueError("Quantiles must be at most two-dimensional with"

1921 " an additional dimension for multiple"

1922 "components, but x.ndim = %d" % x.ndim)

1923

1924 # Now we have 3-dim array; should have shape [dim, dim, *]

1925 if not x.shape[0:2] == (dim, dim):

1926 raise ValueError('Quantiles have incompatible dimensions: should'

1927 ' be %s, got %s.' % ((dim, dim), x.shape[0:2]))

1928

1929 return x

1930

1931 def _process_size(self, size):

1932 size = np.asarray(size)

1933

1934 if size.ndim == 0:

1935 size = size[np.newaxis]

1936 elif size.ndim > 1:

1937 raise ValueError('Size must be an integer or tuple of integers;'

1938 ' thus must have dimension <= 1.'

1939 ' Got size.ndim = %s' % str(tuple(size)))

1940 n = size.prod()

1941 shape = tuple(size)

1942

1943 return n, shape

1944

1945 def _logpdf(self, x, dim, df, scale, log_det_scale, C):

1946 """Log of the Wishart probability density function.

1947

1948 Parameters

1949 ----------

1950 x : ndarray

1951 Points at which to evaluate the log of the probability

1952 density function

1953 dim : int

1954 Dimension of the scale matrix

1955 df : int

1956 Degrees of freedom

1957 scale : ndarray

1958 Scale matrix

1959 log_det_scale : float

1960 Logarithm of the determinant of the scale matrix

1961 C : ndarray

1962 Cholesky factorization of the scale matrix, lower triagular.

1963

1964 Notes

1965 -----

1966 As this function does no argument checking, it should not be

1967 called directly; use 'logpdf' instead.

1968

1969 """

1970 # log determinant of x

1971 # Note: x has components along the last axis, so that x.T has

1972 # components alone the 0-th axis. Then since det(A) = det(A'), this

1973 # gives us a 1-dim vector of determinants

1974

1975 # Retrieve tr(scale^{-1} x)

1976 log_det_x = np.empty(x.shape[-1])

1977 scale_inv_x = np.empty(x.shape)

1978 tr_scale_inv_x = np.empty(x.shape[-1])

1979 for i in range(x.shape[-1]):

1980 _, log_det_x[i] = self._cholesky_logdet(x[:, :, i])

1981 scale_inv_x[:, :, i] = scipy.linalg.cho_solve((C, True), x[:, :, i])

1982 tr_scale_inv_x[i] = scale_inv_x[:, :, i].trace()

1983

1984 # Log PDF

1985 out = ((0.5 * (df - dim - 1) * log_det_x - 0.5 * tr_scale_inv_x) -

1986 (0.5 * df * dim * _LOG_2 + 0.5 * df * log_det_scale +

1987 multigammaln(0.5*df, dim)))

1988

1989 return out

1990

1991 def logpdf(self, x, df, scale):

1992 """Log of the Wishart probability density function.

1993

1994 Parameters

1995 ----------

1996 x : array_like

1997 Quantiles, with the last axis of `x` denoting the components.

1998 Each quantile must be a symmetric positive definite matrix.

1999 %(_doc_default_callparams)s

2000

2001 Returns

2002 -------

2003 pdf : ndarray

2004 Log of the probability density function evaluated at `x`

2005

2006 Notes

2007 -----

2008 %(_doc_callparams_note)s

2009

2010 """

2011 dim, df, scale = self._process_parameters(df, scale)

2012 x = self._process_quantiles(x, dim)

2013

2014 # Cholesky decomposition of scale, get log(det(scale))

2015 C, log_det_scale = self._cholesky_logdet(scale)

2016

2017 out = self._logpdf(x, dim, df, scale, log_det_scale, C)

2018 return _squeeze_output(out)

2019

2020 def pdf(self, x, df, scale):

2021 """Wishart probability density function.

2022

2023 Parameters

2024 ----------

2025 x : array_like

2026 Quantiles, with the last axis of `x` denoting the components.

2027 Each quantile must be a symmetric positive definite matrix.

2028 %(_doc_default_callparams)s

2029

2030 Returns

2031 -------

2032 pdf : ndarray

2033 Probability density function evaluated at `x`

2034

2035 Notes

2036 -----

2037 %(_doc_callparams_note)s

2038

2039 """

2040 return np.exp(self.logpdf(x, df, scale))

2041

2042 def _mean(self, dim, df, scale):

2043 """Mean of the Wishart distribution.

2044

2045 Parameters

2046 ----------

2047 dim : int

2048 Dimension of the scale matrix

2049 %(_doc_default_callparams)s

2050

2051 Notes

2052 -----

2053 As this function does no argument checking, it should not be

2054 called directly; use 'mean' instead.

2055

2056 """

2057 return df * scale

2058

2059 def mean(self, df, scale):

2060 """Mean of the Wishart distribution.

2061

2062 Parameters

2063 ----------

2064 %(_doc_default_callparams)s

2065

2066 Returns

2067 -------

2068 mean : float

2069 The mean of the distribution

2070 """

2071 dim, df, scale = self._process_parameters(df, scale)

2072 out = self._mean(dim, df, scale)

2073 return _squeeze_output(out)

2074

2075 def _mode(self, dim, df, scale):

2076 """Mode of the Wishart distribution.

2077

2078 Parameters

2079 ----------

2080 dim : int

2081 Dimension of the scale matrix

2082 %(_doc_default_callparams)s

2083

2084 Notes

2085 -----

2086 As this function does no argument checking, it should not be

2087 called directly; use 'mode' instead.

2088

2089 """

2090 if df >= dim + 1:

2091 out = (df-dim-1) * scale

2092 else:

2093 out = None

2094 return out

2095

2096 def mode(self, df, scale):

2097 """Mode of the Wishart distribution

2098

2099 Only valid if the degrees of freedom are greater than the dimension of

2100 the scale matrix.

2101

2102 Parameters

2103 ----------

2104 %(_doc_default_callparams)s

2105

2106 Returns

2107 -------

2108 mode : float or None

2109 The Mode of the distribution

2110 """

2111 dim, df, scale = self._process_parameters(df, scale)

2112 out = self._mode(dim, df, scale)

2113 return _squeeze_output(out) if out is not None else out

2114

2115 def _var(self, dim, df, scale):

2116 """Variance of the Wishart distribution.

2117

2118 Parameters

2119 ----------

2120 dim : int

2121 Dimension of the scale matrix

2122 %(_doc_default_callparams)s

2123

2124 Notes

2125 -----

2126 As this function does no argument checking, it should not be

2127 called directly; use 'var' instead.

2128

2129 """

2130 var = scale**2

2131 diag = scale.diagonal() # 1 x dim array

2132 var += np.outer(diag, diag)

2133 var *= df

2134 return var

2135

2136 def var(self, df, scale):

2137 """Variance of the Wishart distribution.

2138

2139 Parameters

2140 ----------

2141 %(_doc_default_callparams)s

2142

2143 Returns

2144 -------

2145 var : float

2146 The variance of the distribution

2147 """

2148 dim, df, scale = self._process_parameters(df, scale)

2149 out = self._var(dim, df, scale)

2150 return _squeeze_output(out)

2151

2152 def _standard_rvs(self, n, shape, dim, df, random_state):

2153 """

2154 Parameters

2155 ----------

2156 n : integer

2157 Number of variates to generate

2158 shape : iterable

2159 Shape of the variates to generate

2160 dim : int

2161 Dimension of the scale matrix

2162 df : int

2163 Degrees of freedom

2164 random_state : {None, int, `numpy.random.Generator`,

2165 `numpy.random.RandomState`}, optional

2166

2167 If `seed` is None (or `np.random`), the `numpy.random.RandomState`

2168 singleton is used.

2169 If `seed` is an int, a new ``RandomState`` instance is used,

2170 seeded with `seed`.

2171 If `seed` is already a ``Generator`` or ``RandomState`` instance

2172 then that instance is used.

2173

2174 Notes

2175 -----

2176 As this function does no argument checking, it should not be

2177 called directly; use 'rvs' instead.

2178

2179 """

2180 # Random normal variates for off-diagonal elements

2181 n_tril = dim * (dim-1) // 2

2182 covariances = random_state.normal(

2183 size=n*n_tril).reshape(shape+(n_tril,))

2184

2185 # Random chi-square variates for diagonal elements

2186 variances = (np.r_[[random_state.chisquare(df-(i+1)+1, size=n)**0.5

2187 for i in range(dim)]].reshape((dim,) +

2188 shape[::-1]).T)

2189

2190 # Create the A matri(ces) - lower triangular

2191 A = np.zeros(shape + (dim, dim))

2192

2193 # Input the covariances

2194 size_idx = tuple([slice(None, None, None)]*len(shape))

2195 tril_idx = np.tril_indices(dim, k=-1)

2196 A[size_idx + tril_idx] = covariances

2197

2198 # Input the variances

2199 diag_idx = np.diag_indices(dim)

2200 A[size_idx + diag_idx] = variances

2201

2202 return A

2203

2204 def _rvs(self, n, shape, dim, df, C, random_state):

2205 """Draw random samples from a Wishart distribution.

2206

2207 Parameters

2208 ----------

2209 n : integer

2210 Number of variates to generate

2211 shape : iterable

2212 Shape of the variates to generate

2213 dim : int

2214 Dimension of the scale matrix

2215 df : int

2216 Degrees of freedom

2217 C : ndarray

2218 Cholesky factorization of the scale matrix, lower triangular.

2219 %(_doc_random_state)s

2220

2221 Notes

2222 -----

2223 As this function does no argument checking, it should not be

2224 called directly; use 'rvs' instead.

2225

2226 """

2227 random_state = self._get_random_state(random_state)

2228 # Calculate the matrices A, which are actually lower triangular

2229 # Cholesky factorizations of a matrix B such that B ~ W(df, I)

2230 A = self._standard_rvs(n, shape, dim, df, random_state)

2231

2232 # Calculate SA = C A A' C', where SA ~ W(df, scale)

2233 # Note: this is the product of a (lower) (lower) (lower)' (lower)'

2234 # or, denoting B = AA', it is C B C' where C is the lower

2235 # triangular Cholesky factorization of the scale matrix.

2236 # this appears to conflict with the instructions in [1]_, which

2237 # suggest that it should be D' B D where D is the lower

2238 # triangular factorization of the scale matrix. However, it is

2239 # meant to refer to the Bartlett (1933) representation of a

2240 # Wishart random variate as L A A' L' where L is lower triangular

2241 # so it appears that understanding D' to be upper triangular

2242 # is either a typo in or misreading of [1]_.

2243 for index in np.ndindex(shape):

2244 CA = np.dot(C, A[index])

2245 A[index] = np.dot(CA, CA.T)

2246

2247 return A

2248

2249 def rvs(self, df, scale, size=1, random_state=None):

2250 """Draw random samples from a Wishart distribution.

2251

2252 Parameters

2253 ----------

2254 %(_doc_default_callparams)s

2255 size : integer or iterable of integers, optional

2256 Number of samples to draw (default 1).

2257 %(_doc_random_state)s

2258

2259 Returns

2260 -------

2261 rvs : ndarray

2262 Random variates of shape (`size`) + (`dim`, `dim), where `dim` is

2263 the dimension of the scale matrix.

2264

2265 Notes

2266 -----

2267 %(_doc_callparams_note)s

2268

2269 """

2270 n, shape = self._process_size(size)

2271 dim, df, scale = self._process_parameters(df, scale)

2272

2273 # Cholesky decomposition of scale

2274 C = scipy.linalg.cholesky(scale, lower=True)

2275

2276 out = self._rvs(n, shape, dim, df, C, random_state)

2277

2278 return _squeeze_output(out)

2279

2280 def _entropy(self, dim, df, log_det_scale):

2281 """Compute the differential entropy of the Wishart.

2282

2283 Parameters

2284 ----------

2285 dim : int

2286 Dimension of the scale matrix

2287 df : int

2288 Degrees of freedom

2289 log_det_scale : float

2290 Logarithm of the determinant of the scale matrix

2291

2292 Notes

2293 -----

2294 As this function does no argument checking, it should not be

2295 called directly; use 'entropy' instead.

2296

2297 """

2298 return (

2299 0.5 * (dim+1) * log_det_scale +

2300 0.5 * dim * (dim+1) * _LOG_2 +

2301 multigammaln(0.5*df, dim) -

2302 0.5 * (df - dim - 1) * np.sum(

2303 [psi(0.5*(df + 1 - (i+1))) for i in range(dim)]

2304 ) +

2305 0.5 * df * dim

2306 )

2307

2308 def entropy(self, df, scale):

2309 """Compute the differential entropy of the Wishart.

2310

2311 Parameters

2312 ----------

2313 %(_doc_default_callparams)s

2314

2315 Returns

2316 -------

2317 h : scalar

2318 Entropy of the Wishart distribution

2319

2320 Notes

2321 -----

2322 %(_doc_callparams_note)s

2323

2324 """

2325 dim, df, scale = self._process_parameters(df, scale)

2326 _, log_det_scale = self._cholesky_logdet(scale)

2327 return self._entropy(dim, df, log_det_scale)

2328

2329 def _cholesky_logdet(self, scale):

2330 """Compute Cholesky decomposition and determine (log(det(scale)).

2331

2332 Parameters

2333 ----------

2334 scale : ndarray

2335 Scale matrix.

2336

2337 Returns

2338 -------

2339 c_decomp : ndarray

2340 The Cholesky decomposition of `scale`.

2341 logdet : scalar

2342 The log of the determinant of `scale`.

2343

2344 Notes

2345 -----

2346 This computation of ``logdet`` is equivalent to

2347 ``np.linalg.slogdet(scale)``. It is ~2x faster though.

2348

2349 """

2350 c_decomp = scipy.linalg.cholesky(scale, lower=True)

2351 logdet = 2 * np.sum(np.log(c_decomp.diagonal()))

2352 return c_decomp, logdet

2353

2354

2355wishart = wishart_gen()

2356

2357

2358class wishart_frozen(multi_rv_frozen):

2359 """Create a frozen Wishart distribution.

2360

2361 Parameters

2362 ----------

2363 df : array_like

2364 Degrees of freedom of the distribution

2365 scale : array_like

2366 Scale matrix of the distribution

2367 seed : {None, int, `numpy.random.Generator`, `numpy.random.RandomState`}, optional

2368 If `seed` is None (or `np.random`), the `numpy.random.RandomState`

2369 singleton is used.

2370 If `seed` is an int, a new ``RandomState`` instance is used,

2371 seeded with `seed`.

2372 If `seed` is already a ``Generator`` or ``RandomState`` instance then

2373 that instance is used.

2374

2375 """

2376 def __init__(self, df, scale, seed=None):

2377 self._dist = wishart_gen(seed)

2378 self.dim, self.df, self.scale = self._dist._process_parameters(

2379 df, scale)

2380 self.C, self.log_det_scale = self._dist._cholesky_logdet(self.scale)

2381

2382 def logpdf(self, x):

2383 x = self._dist._process_quantiles(x, self.dim)

2384

2385 out = self._dist._logpdf(x, self.dim, self.df, self.scale,

2386 self.log_det_scale, self.C)

2387 return _squeeze_output(out)

2388

2389 def pdf(self, x):

2390 return np.exp(self.logpdf(x))

2391

2392 def mean(self):

2393 out = self._dist._mean(self.dim, self.df, self.scale)

2394 return _squeeze_output(out)

2395

2396 def mode(self):

2397 out = self._dist._mode(self.dim, self.df, self.scale)

2398 return _squeeze_output(out) if out is not None else out

2399

2400 def var(self):

2401 out = self._dist._var(self.dim, self.df, self.scale)

2402 return _squeeze_output(out)

2403

2404 def rvs(self, size=1, random_state=None):

2405 n, shape = self._dist._process_size(size)

2406 out = self._dist._rvs(n, shape, self.dim, self.df,

2407 self.C, random_state)

2408 return _squeeze_output(out)

2409

2410 def entropy(self):

2411 return self._dist._entropy(self.dim, self.df, self.log_det_scale)

2412

2413

2414# Set frozen generator docstrings from corresponding docstrings in

2415# Wishart and fill in default strings in class docstrings

2416for name in ['logpdf', 'pdf', 'mean', 'mode', 'var', 'rvs', 'entropy']:

2417 method = wishart_gen.__dict__[name]

2418 method_frozen = wishart_frozen.__dict__[name]

2419 method_frozen.__doc__ = doccer.docformat(

2420 method.__doc__, wishart_docdict_noparams)

2421 method.__doc__ = doccer.docformat(method.__doc__, wishart_docdict_params)

2422

2423

2424def _cho_inv_batch(a, check_finite=True):

2425 """

2426 Invert the matrices a_i, using a Cholesky factorization of A, where

2427 a_i resides in the last two dimensions of a and the other indices describe

2428 the index i.

2429

2430 Overwrites the data in a.

2431

2432 Parameters

2433 ----------

2434 a : array

2435 Array of matrices to invert, where the matrices themselves are stored

2436 in the last two dimensions.

2437 check_finite : bool, optional

2438 Whether to check that the input matrices contain only finite numbers.

2439 Disabling may give a performance gain, but may result in problems

2440 (crashes, non-termination) if the inputs do contain infinities or NaNs.

2441

2442 Returns

2443 -------

2444 x : array

2445 Array of inverses of the matrices ``a_i``.

2446

2447 See Also

2448 --------

2449 scipy.linalg.cholesky : Cholesky factorization of a matrix

2450

2451 """

2452 if check_finite:

2453 a1 = asarray_chkfinite(a)

2454 else:

2455 a1 = asarray(a)

2456 if len(a1.shape) < 2 or a1.shape[-2] != a1.shape[-1]:

2457 raise ValueError('expected square matrix in last two dimensions')

2458

2459 potrf, potri = get_lapack_funcs(('potrf', 'potri'), (a1,))

2460

2461 triu_rows, triu_cols = np.triu_indices(a.shape[-2], k=1)

2462 for index in np.ndindex(a1.shape[:-2]):

2463

2464 # Cholesky decomposition

2465 a1[index], info = potrf(a1[index], lower=True, overwrite_a=False,

2466 clean=False)

2467 if info > 0:

2468 raise LinAlgError("%d-th leading minor not positive definite"

2469 % info)

2470 if info < 0:

2471 raise ValueError('illegal value in %d-th argument of internal'

2472 ' potrf' % -info)

2473 # Inversion

2474 a1[index], info = potri(a1[index], lower=True, overwrite_c=False)

2475 if info > 0:

2476 raise LinAlgError("the inverse could not be computed")

2477 if info < 0:

2478 raise ValueError('illegal value in %d-th argument of internal'

2479 ' potrf' % -info)

2480

2481 # Make symmetric (dpotri only fills in the lower triangle)

2482 a1[index][triu_rows, triu_cols] = a1[index][triu_cols, triu_rows]

2483

2484 return a1

2485

2486

2487class invwishart_gen(wishart_gen):

2488 r"""An inverse Wishart random variable.

2489

2490 The `df` keyword specifies the degrees of freedom. The `scale` keyword

2491 specifies the scale matrix, which must be symmetric and positive definite.

2492 In this context, the scale matrix is often interpreted in terms of a

2493 multivariate normal covariance matrix.

2494

2495 Methods

2496 -------

2497 pdf(x, df, scale)

2498 Probability density function.

2499 logpdf(x, df, scale)

2500 Log of the probability density function.

2501 rvs(df, scale, size=1, random_state=None)

2502 Draw random samples from an inverse Wishart distribution.

2503

2504 Parameters

2505 ----------

2506 %(_doc_default_callparams)s

2507 %(_doc_random_state)s

2508

2509 Raises

2510 ------

2511 scipy.linalg.LinAlgError

2512 If the scale matrix `scale` is not positive definite.

2513

2514 See Also

2515 --------

2516 wishart

2517

2518 Notes

2519 -----

2520 %(_doc_callparams_note)s

2521

2522 The scale matrix `scale` must be a symmetric positive definite

2523 matrix. Singular matrices, including the symmetric positive semi-definite

2524 case, are not supported. Symmetry is not checked; only the lower triangular

2525 portion is used.

2526

2527 The inverse Wishart distribution is often denoted

2528

2529 .. math::

2530

2531 W_p^{-1}(\nu, \Psi)

2532

2533 where :math:`\nu` is the degrees of freedom and :math:`\Psi` is the

2534 :math:`p \times p` scale matrix.

2535

2536 The probability density function for `invwishart` has support over positive

2537 definite matrices :math:`S`; if :math:`S \sim W^{-1}_p(\nu, \Sigma)`,

2538 then its PDF is given by:

2539

2540 .. math::

2541

2542 f(S) = \frac{|\Sigma|^\frac{\nu}{2}}{2^{ \frac{\nu p}{2} }

2543 |S|^{\frac{\nu + p + 1}{2}} \Gamma_p \left(\frac{\nu}{2} \right)}

2544 \exp\left( -tr(\Sigma S^{-1}) / 2 \right)

2545

2546 If :math:`S \sim W_p^{-1}(\nu, \Psi)` (inverse Wishart) then

2547 :math:`S^{-1} \sim W_p(\nu, \Psi^{-1})` (Wishart).

2548

2549 If the scale matrix is 1-dimensional and equal to one, then the inverse

2550 Wishart distribution :math:`W_1(\nu, 1)` collapses to the

2551 inverse Gamma distribution with parameters shape = :math:`\frac{\nu}{2}`

2552 and scale = :math:`\frac{1}{2}`.

2553

2554 .. versionadded:: 0.16.0

2555

2556 References

2557 ----------

2558 .. [1] M.L. Eaton, "Multivariate Statistics: A Vector Space Approach",

2559 Wiley, 1983.

2560 .. [2] M.C. Jones, "Generating Inverse Wishart Matrices", Communications

2561 in Statistics - Simulation and Computation, vol. 14.2, pp.511-514,

2562 1985.

2563

2564 Examples

2565 --------

2566 >>> import numpy as np

2567 >>> import matplotlib.pyplot as plt

2568 >>> from scipy.stats import invwishart, invgamma

2569 >>> x = np.linspace(0.01, 1, 100)

2570 >>> iw = invwishart.pdf(x, df=6, scale=1)

2571 >>> iw[:3]

2572 array([ 1.20546865e-15, 5.42497807e-06, 4.45813929e-03])

2573 >>> ig = invgamma.pdf(x, 6/2., scale=1./2)

2574 >>> ig[:3]

2575 array([ 1.20546865e-15, 5.42497807e-06, 4.45813929e-03])

2576 >>> plt.plot(x, iw)

2577 >>> plt.show()

2578

2579 The input quantiles can be any shape of array, as long as the last

2580 axis labels the components.

2581

2582 Alternatively, the object may be called (as a function) to fix the degrees

2583 of freedom and scale parameters, returning a "frozen" inverse Wishart

2584 random variable:

2585

2586 >>> rv = invwishart(df=1, scale=1)

2587 >>> # Frozen object with the same methods but holding the given

2588 >>> # degrees of freedom and scale fixed.

2589

2590 """

2591

2592 def __init__(self, seed=None):

2593 super().__init__(seed)

2594 self.__doc__ = doccer.docformat(self.__doc__, wishart_docdict_params)

2595

2596 def __call__(self, df=None, scale=None, seed=None):

2597 """Create a frozen inverse Wishart distribution.

2598

2599 See `invwishart_frozen` for more information.

2600

2601 """

2602 return invwishart_frozen(df, scale, seed)

2603

2604 def _logpdf(self, x, dim, df, scale, log_det_scale):

2605 """Log of the inverse Wishart probability density function.

2606

2607 Parameters

2608 ----------

2609 x : ndarray

2610 Points at which to evaluate the log of the probability

2611 density function.

2612 dim : int

2613 Dimension of the scale matrix

2614 df : int

2615 Degrees of freedom

2616 scale : ndarray

2617 Scale matrix

2618 log_det_scale : float

2619 Logarithm of the determinant of the scale matrix

2620

2621 Notes

2622 -----

2623 As this function does no argument checking, it should not be

2624 called directly; use 'logpdf' instead.

2625

2626 """

2627 log_det_x = np.empty(x.shape[-1])

2628 x_inv = np.copy(x).T

2629 if dim > 1:

2630 _cho_inv_batch(x_inv) # works in-place

2631 else:

2632 x_inv = 1./x_inv

2633 tr_scale_x_inv = np.empty(x.shape[-1])

2634

2635 for i in range(x.shape[-1]):

2636 C, lower = scipy.linalg.cho_factor(x[:, :, i], lower=True)

2637

2638 log_det_x[i] = 2 * np.sum(np.log(C.diagonal()))

2639

2640 tr_scale_x_inv[i] = np.dot(scale, x_inv[i]).trace()

2641

2642 # Log PDF

2643 out = ((0.5 * df * log_det_scale - 0.5 * tr_scale_x_inv) -

2644 (0.5 * df * dim * _LOG_2 + 0.5 * (df + dim + 1) * log_det_x) -

2645 multigammaln(0.5*df, dim))

2646

2647 return out

2648

2649 def logpdf(self, x, df, scale):

2650 """Log of the inverse Wishart probability density function.

2651

2652 Parameters

2653 ----------

2654 x : array_like

2655 Quantiles, with the last axis of `x` denoting the components.

2656 Each quantile must be a symmetric positive definite matrix.

2657 %(_doc_default_callparams)s

2658

2659 Returns

2660 -------

2661 pdf : ndarray

2662 Log of the probability density function evaluated at `x`

2663

2664 Notes

2665 -----

2666 %(_doc_callparams_note)s

2667

2668 """

2669 dim, df, scale = self._process_parameters(df, scale)

2670 x = self._process_quantiles(x, dim)

2671 _, log_det_scale = self._cholesky_logdet(scale)

2672 out = self._logpdf(x, dim, df, scale, log_det_scale)

2673 return _squeeze_output(out)

2674

2675 def pdf(self, x, df, scale):

2676 """Inverse Wishart probability density function.

2677

2678 Parameters

2679 ----------

2680 x : array_like

2681 Quantiles, with the last axis of `x` denoting the components.

2682 Each quantile must be a symmetric positive definite matrix.

2683 %(_doc_default_callparams)s

2684

2685 Returns

2686 -------

2687 pdf : ndarray

2688 Probability density function evaluated at `x`

2689

2690 Notes

2691 -----

2692 %(_doc_callparams_note)s

2693

2694 """

2695 return np.exp(self.logpdf(x, df, scale))

2696

2697 def _mean(self, dim, df, scale):

2698 """Mean of the inverse Wishart distribution.

2699

2700 Parameters

2701 ----------

2702 dim : int

2703 Dimension of the scale matrix

2704 %(_doc_default_callparams)s

2705

2706 Notes

2707 -----

2708 As this function does no argument checking, it should not be

2709 called directly; use 'mean' instead.

2710

2711 """

2712 if df > dim + 1:

2713 out = scale / (df - dim - 1)

2714 else:

2715 out = None

2716 return out

2717

2718 def mean(self, df, scale):

2719 """Mean of the inverse Wishart distribution.

2720

2721 Only valid if the degrees of freedom are greater than the dimension of

2722 the scale matrix plus one.

2723

2724 Parameters

2725 ----------

2726 %(_doc_default_callparams)s

2727

2728 Returns

2729 -------

2730 mean : float or None

2731 The mean of the distribution

2732

2733 """

2734 dim, df, scale = self._process_parameters(df, scale)

2735 out = self._mean(dim, df, scale)

2736 return _squeeze_output(out) if out is not None else out

2737

2738 def _mode(self, dim, df, scale):

2739 """Mode of the inverse Wishart distribution.

2740

2741 Parameters

2742 ----------

2743 dim : int

2744 Dimension of the scale matrix

2745 %(_doc_default_callparams)s

2746

2747 Notes

2748 -----

2749 As this function does no argument checking, it should not be

2750 called directly; use 'mode' instead.

2751

2752 """

2753 return scale / (df + dim + 1)

2754

2755 def mode(self, df, scale):

2756 """Mode of the inverse Wishart distribution.

2757

2758 Parameters

2759 ----------

2760 %(_doc_default_callparams)s

2761

2762 Returns

2763 -------

2764 mode : float

2765 The Mode of the distribution

2766

2767 """

2768 dim, df, scale = self._process_parameters(df, scale)

2769 out = self._mode(dim, df, scale)

2770 return _squeeze_output(out)

2771

2772 def _var(self, dim, df, scale):

2773 """Variance of the inverse Wishart distribution.

2774

2775 Parameters

2776 ----------

2777 dim : int

2778 Dimension of the scale matrix

2779 %(_doc_default_callparams)s

2780

2781 Notes

2782 -----

2783 As this function does no argument checking, it should not be

2784 called directly; use 'var' instead.

2785

2786 """

2787 if df > dim + 3:

2788 var = (df - dim + 1) * scale**2

2789 diag = scale.diagonal() # 1 x dim array

2790 var += (df - dim - 1) * np.outer(diag, diag)

2791 var /= (df - dim) * (df - dim - 1)**2 * (df - dim - 3)

2792 else:

2793 var = None

2794 return var

2795

2796 def var(self, df, scale):

2797 """Variance of the inverse Wishart distribution.

2798

2799 Only valid if the degrees of freedom are greater than the dimension of

2800 the scale matrix plus three.

2801

2802 Parameters

2803 ----------

2804 %(_doc_default_callparams)s

2805

2806 Returns

2807 -------

2808 var : float

2809 The variance of the distribution

2810 """

2811 dim, df, scale = self._process_parameters(df, scale)

2812 out = self._var(dim, df, scale)

2813 return _squeeze_output(out) if out is not None else out

2814

2815 def _rvs(self, n, shape, dim, df, C, random_state):

2816 """Draw random samples from an inverse Wishart distribution.

2817

2818 Parameters

2819 ----------

2820 n : integer

2821 Number of variates to generate

2822 shape : iterable

2823 Shape of the variates to generate

2824 dim : int

2825 Dimension of the scale matrix

2826 df : int

2827 Degrees of freedom

2828 C : ndarray

2829 Cholesky factorization of the scale matrix, lower triagular.

2830 %(_doc_random_state)s

2831

2832 Notes

2833 -----

2834 As this function does no argument checking, it should not be

2835 called directly; use 'rvs' instead.

2836

2837 """

2838 random_state = self._get_random_state(random_state)

2839 # Get random draws A such that A ~ W(df, I)

2840 A = super()._standard_rvs(n, shape, dim, df, random_state)

2841

2842 # Calculate SA = (CA)'^{-1} (CA)^{-1} ~ iW(df, scale)

2843 eye = np.eye(dim)

2844 trtrs = get_lapack_funcs(('trtrs'), (A,))

2845

2846 for index in np.ndindex(A.shape[:-2]):

2847 # Calculate CA

2848 CA = np.dot(C, A[index])

2849 # Get (C A)^{-1} via triangular solver

2850 if dim > 1:

2851 CA, info = trtrs(CA, eye, lower=True)

2852 if info > 0:

2853 raise LinAlgError("Singular matrix.")

2854 if info < 0:

2855 raise ValueError('Illegal value in %d-th argument of'

2856 ' internal trtrs' % -info)

2857 else:

2858 CA = 1. / CA

2859 # Get SA

2860 A[index] = np.dot(CA.T, CA)

2861

2862 return A

2863

2864 def rvs(self, df, scale, size=1, random_state=None):

2865 """Draw random samples from an inverse Wishart distribution.

2866

2867 Parameters

2868 ----------

2869 %(_doc_default_callparams)s

2870 size : integer or iterable of integers, optional

2871 Number of samples to draw (default 1).

2872 %(_doc_random_state)s

2873

2874 Returns

2875 -------

2876 rvs : ndarray

2877 Random variates of shape (`size`) + (`dim`, `dim), where `dim` is

2878 the dimension of the scale matrix.

2879

2880 Notes

2881 -----

2882 %(_doc_callparams_note)s

2883

2884 """

2885 n, shape = self._process_size(size)

2886 dim, df, scale = self._process_parameters(df, scale)

2887

2888 # Invert the scale

2889 eye = np.eye(dim)

2890 L, lower = scipy.linalg.cho_factor(scale, lower=True)

2891 inv_scale = scipy.linalg.cho_solve((L, lower), eye)

2892 # Cholesky decomposition of inverted scale

2893 C = scipy.linalg.cholesky(inv_scale, lower=True)

2894

2895 out = self._rvs(n, shape, dim, df, C, random_state)

2896

2897 return _squeeze_output(out)

2898

2899 def entropy(self):

2900 # Need to find reference for inverse Wishart entropy

2901 raise AttributeError

2902

2903

2904invwishart = invwishart_gen()

2905

2906

2907class invwishart_frozen(multi_rv_frozen):

2908 def __init__(self, df, scale, seed=None):

2909 """Create a frozen inverse Wishart distribution.

2910

2911 Parameters

2912 ----------

2913 df : array_like

2914 Degrees of freedom of the distribution

2915 scale : array_like

2916 Scale matrix of the distribution

2917 seed : {None, int, `numpy.random.Generator`}, optional

2918 If `seed` is None the `numpy.random.Generator` singleton is used.

2919 If `seed` is an int, a new ``Generator`` instance is used,

2920 seeded with `seed`.

2921 If `seed` is already a ``Generator`` instance then that instance is

2922 used.

2923

2924 """

2925 self._dist = invwishart_gen(seed)

2926 self.dim, self.df, self.scale = self._dist._process_parameters(

2927 df, scale

2928 )

2929

2930 # Get the determinant via Cholesky factorization

2931 C, lower = scipy.linalg.cho_factor(self.scale, lower=True)

2932 self.log_det_scale = 2 * np.sum(np.log(C.diagonal()))

2933

2934 # Get the inverse using the Cholesky factorization

2935 eye = np.eye(self.dim)

2936 self.inv_scale = scipy.linalg.cho_solve((C, lower), eye)

2937

2938 # Get the Cholesky factorization of the inverse scale

2939 self.C = scipy.linalg.cholesky(self.inv_scale, lower=True)

2940

2941 def logpdf(self, x):

2942 x = self._dist._process_quantiles(x, self.dim)

2943 out = self._dist._logpdf(x, self.dim, self.df, self.scale,

2944 self.log_det_scale)

2945 return _squeeze_output(out)

2946

2947 def pdf(self, x):

2948 return np.exp(self.logpdf(x))

2949

2950 def mean(self):

2951 out = self._dist._mean(self.dim, self.df, self.scale)

2952 return _squeeze_output(out) if out is not None else out

2953

2954 def mode(self):

2955 out = self._dist._mode(self.dim, self.df, self.scale)

2956 return _squeeze_output(out)

2957

2958 def var(self):

2959 out = self._dist._var(self.dim, self.df, self.scale)

2960 return _squeeze_output(out) if out is not None else out

2961

2962 def rvs(self, size=1, random_state=None):

2963 n, shape = self._dist._process_size(size)

2964

2965 out = self._dist._rvs(n, shape, self.dim, self.df,

2966 self.C, random_state)

2967

2968 return _squeeze_output(out)

2969

2970 def entropy(self):

2971 # Need to find reference for inverse Wishart entropy

2972 raise AttributeError

2973

2974

2975# Set frozen generator docstrings from corresponding docstrings in

2976# inverse Wishart and fill in default strings in class docstrings

2977for name in ['logpdf', 'pdf', 'mean', 'mode', 'var', 'rvs']:

2978 method = invwishart_gen.__dict__[name]

2979 method_frozen = wishart_frozen.__dict__[name]

2980 method_frozen.__doc__ = doccer.docformat(

2981 method.__doc__, wishart_docdict_noparams)

2982 method.__doc__ = doccer.docformat(method.__doc__, wishart_docdict_params)

2983

2984_multinomial_doc_default_callparams = """\

2985n : int

2986 Number of trials

2987p : array_like

2988 Probability of a trial falling into each category; should sum to 1

2989"""

2990

2991_multinomial_doc_callparams_note = """\

2992`n` should be a positive integer. Each element of `p` should be in the

2993interval :math:`[0,1]` and the elements should sum to 1. If they do not sum to

29941, the last element of the `p` array is not used and is replaced with the

2995remaining probability left over from the earlier elements.

2996"""

2997

2998_multinomial_doc_frozen_callparams = ""

2999

3000_multinomial_doc_frozen_callparams_note = """\

3001See class definition for a detailed description of parameters."""

3002

3003multinomial_docdict_params = {

3004 '_doc_default_callparams': _multinomial_doc_default_callparams,

3005 '_doc_callparams_note': _multinomial_doc_callparams_note,

3006 '_doc_random_state': _doc_random_state

3007}

3008

3009multinomial_docdict_noparams = {

3010 '_doc_default_callparams': _multinomial_doc_frozen_callparams,

3011 '_doc_callparams_note': _multinomial_doc_frozen_callparams_note,

3012 '_doc_random_state': _doc_random_state

3013}

3014

3015

3016class multinomial_gen(multi_rv_generic):

3017 r"""A multinomial random variable.

3018

3019 Methods

3020 -------

3021 pmf(x, n, p)

3022 Probability mass function.

3023 logpmf(x, n, p)

3024 Log of the probability mass function.

3025 rvs(n, p, size=1, random_state=None)

3026 Draw random samples from a multinomial distribution.

3027 entropy(n, p)

3028 Compute the entropy of the multinomial distribution.

3029 cov(n, p)

3030 Compute the covariance matrix of the multinomial distribution.

3031

3032 Parameters

3033 ----------

3034 %(_doc_default_callparams)s

3035 %(_doc_random_state)s

3036

3037 Notes

3038 -----

3039 %(_doc_callparams_note)s

3040

3041 The probability mass function for `multinomial` is

3042

3043 .. math::

3044

3045 f(x) = \frac{n!}{x_1! \cdots x_k!} p_1^{x_1} \cdots p_k^{x_k},

3046

3047 supported on :math:`x=(x_1, \ldots, x_k)` where each :math:`x_i` is a

3048 nonnegative integer and their sum is :math:`n`.

3049

3050 .. versionadded:: 0.19.0

3051

3052 Examples

3053 --------

3054

3055 >>> from scipy.stats import multinomial

3056 >>> rv = multinomial(8, [0.3, 0.2, 0.5])

3057 >>> rv.pmf([1, 3, 4])

3058 0.042000000000000072

3059

3060 The multinomial distribution for :math:`k=2` is identical to the

3061 corresponding binomial distribution (tiny numerical differences

3062 notwithstanding):

3063

3064 >>> from scipy.stats import binom

3065 >>> multinomial.pmf([3, 4], n=7, p=[0.4, 0.6])

3066 0.29030399999999973

3067 >>> binom.pmf(3, 7, 0.4)

3068 0.29030400000000012

3069

3070 The functions ``pmf``, ``logpmf``, ``entropy``, and ``cov`` support

3071 broadcasting, under the convention that the vector parameters (``x`` and

3072 ``p``) are interpreted as if each row along the last axis is a single

3073 object. For instance:

3074

3075 >>> multinomial.pmf([[3, 4], [3, 5]], n=[7, 8], p=[.3, .7])

3076 array([0.2268945, 0.25412184])

3077

3078 Here, ``x.shape == (2, 2)``, ``n.shape == (2,)``, and ``p.shape == (2,)``,

3079 but following the rules mentioned above they behave as if the rows

3080 ``[3, 4]`` and ``[3, 5]`` in ``x`` and ``[.3, .7]`` in ``p`` were a single

3081 object, and as if we had ``x.shape = (2,)``, ``n.shape = (2,)``, and

3082 ``p.shape = ()``. To obtain the individual elements without broadcasting,

3083 we would do this:

3084

3085 >>> multinomial.pmf([3, 4], n=7, p=[.3, .7])

3086 0.2268945

3087 >>> multinomial.pmf([3, 5], 8, p=[.3, .7])

3088 0.25412184

3089

3090 This broadcasting also works for ``cov``, where the output objects are

3091 square matrices of size ``p.shape[-1]``. For example:

3092

3093 >>> multinomial.cov([4, 5], [[.3, .7], [.4, .6]])

3094 array([[[ 0.84, -0.84],

3095 [-0.84, 0.84]],

3096 [[ 1.2 , -1.2 ],

3097 [-1.2 , 1.2 ]]])

3098

3099 In this example, ``n.shape == (2,)`` and ``p.shape == (2, 2)``, and

3100 following the rules above, these broadcast as if ``p.shape == (2,)``.

3101 Thus the result should also be of shape ``(2,)``, but since each output is

3102 a :math:`2 \times 2` matrix, the result in fact has shape ``(2, 2, 2)``,

3103 where ``result[0]`` is equal to ``multinomial.cov(n=4, p=[.3, .7])`` and

3104 ``result[1]`` is equal to ``multinomial.cov(n=5, p=[.4, .6])``.

3105

3106 Alternatively, the object may be called (as a function) to fix the `n` and

3107 `p` parameters, returning a "frozen" multinomial random variable:

3108

3109 >>> rv = multinomial(n=7, p=[.3, .7])

3110 >>> # Frozen object with the same methods but holding the given

3111 >>> # degrees of freedom and scale fixed.

3112

3113 See also

3114 --------

3115 scipy.stats.binom : The binomial distribution.

3116 numpy.random.Generator.multinomial : Sampling from the multinomial distribution.

3117 scipy.stats.multivariate_hypergeom :

3118 The multivariate hypergeometric distribution.

3119 """ # noqa: E501

3120

3121 def __init__(self, seed=None):

3122 super().__init__(seed)

3123 self.__doc__ = \

3124 doccer.docformat(self.__doc__, multinomial_docdict_params)

3125

3126 def __call__(self, n, p, seed=None):

3127 """Create a frozen multinomial distribution.

3128

3129 See `multinomial_frozen` for more information.

3130 """

3131 return multinomial_frozen(n, p, seed)

3132

3133 def _process_parameters(self, n, p, eps=1e-15):

3134 """Returns: n_, p_, npcond.

3135

3136 n_ and p_ are arrays of the correct shape; npcond is a boolean array

3137 flagging values out of the domain.

3138 """

3139 p = np.array(p, dtype=np.float64, copy=True)

3140 p_adjusted = 1. - p[..., :-1].sum(axis=-1)

3141 i_adjusted = np.abs(p_adjusted) > eps

3142 p[i_adjusted, -1] = p_adjusted[i_adjusted]

3143

3144 # true for bad p

3145 pcond = np.any(p < 0, axis=-1)

3146 pcond |= np.any(p > 1, axis=-1)

3147

3148 n = np.array(n, dtype=np.int_, copy=True)

3149

3150 # true for bad n

3151 ncond = n <= 0

3152

3153 return n, p, ncond | pcond

3154

3155 def _process_quantiles(self, x, n, p):

3156 """Returns: x_, xcond.

3157

3158 x_ is an int array; xcond is a boolean array flagging values out of the

3159 domain.

3160 """

3161 xx = np.asarray(x, dtype=np.int_)

3162

3163 if xx.ndim == 0:

3164 raise ValueError("x must be an array.")

3165

3166 if xx.size != 0 and not xx.shape[-1] == p.shape[-1]:

3167 raise ValueError("Size of each quantile should be size of p: "

3168 "received %d, but expected %d." %

3169 (xx.shape[-1], p.shape[-1]))

3170

3171 # true for x out of the domain

3172 cond = np.any(xx != x, axis=-1)

3173 cond |= np.any(xx < 0, axis=-1)

3174 cond = cond | (np.sum(xx, axis=-1) != n)

3175

3176 return xx, cond

3177

3178 def _checkresult(self, result, cond, bad_value):

3179 result = np.asarray(result)

3180

3181 if cond.ndim != 0:

3182 result[cond] = bad_value

3183 elif cond:

3184 if result.ndim == 0:

3185 return bad_value

3186 result[...] = bad_value

3187 return result

3188

3189 def _logpmf(self, x, n, p):

3190 return gammaln(n+1) + np.sum(xlogy(x, p) - gammaln(x+1), axis=-1)

3191

3192 def logpmf(self, x, n, p):

3193 """Log of the Multinomial probability mass function.

3194

3195 Parameters

3196 ----------

3197 x : array_like

3198 Quantiles, with the last axis of `x` denoting the components.

3199 %(_doc_default_callparams)s

3200

3201 Returns

3202 -------

3203 logpmf : ndarray or scalar

3204 Log of the probability mass function evaluated at `x`

3205

3206 Notes

3207 -----

3208 %(_doc_callparams_note)s

3209 """

3210 n, p, npcond = self._process_parameters(n, p)

3211 x, xcond = self._process_quantiles(x, n, p)

3212

3213 result = self._logpmf(x, n, p)

3214

3215 # replace values for which x was out of the domain; broadcast

3216 # xcond to the right shape

3217 xcond_ = xcond | np.zeros(npcond.shape, dtype=np.bool_)

3218 result = self._checkresult(result, xcond_, np.NINF)

3219

3220 # replace values bad for n or p; broadcast npcond to the right shape

3221 npcond_ = npcond | np.zeros(xcond.shape, dtype=np.bool_)

3222 return self._checkresult(result, npcond_, np.NAN)

3223

3224 def pmf(self, x, n, p):

3225 """Multinomial probability mass function.

3226

3227 Parameters

3228 ----------

3229 x : array_like

3230 Quantiles, with the last axis of `x` denoting the components.

3231 %(_doc_default_callparams)s

3232

3233 Returns

3234 -------

3235 pmf : ndarray or scalar

3236 Probability density function evaluated at `x`

3237

3238 Notes

3239 -----

3240 %(_doc_callparams_note)s

3241 """

3242 return np.exp(self.logpmf(x, n, p))

3243

3244 def mean(self, n, p):

3245 """Mean of the Multinomial distribution.

3246

3247 Parameters

3248 ----------

3249 %(_doc_default_callparams)s

3250

3251 Returns

3252 -------

3253 mean : float

3254 The mean of the distribution

3255 """

3256 n, p, npcond = self._process_parameters(n, p)

3257 result = n[..., np.newaxis]*p

3258 return self._checkresult(result, npcond, np.NAN)

3259

3260 def cov(self, n, p):

3261 """Covariance matrix of the multinomial distribution.

3262

3263 Parameters

3264 ----------

3265 %(_doc_default_callparams)s

3266

3267 Returns

3268 -------

3269 cov : ndarray

3270 The covariance matrix of the distribution

3271 """

3272 n, p, npcond = self._process_parameters(n, p)

3273

3274 nn = n[..., np.newaxis, np.newaxis]

3275 result = nn * np.einsum('...j,...k->...jk', -p, p)

3276

3277 # change the diagonal

3278 for i in range(p.shape[-1]):

3279 result[..., i, i] += n*p[..., i]

3280

3281 return self._checkresult(result, npcond, np.nan)

3282

3283 def entropy(self, n, p):

3284 r"""Compute the entropy of the multinomial distribution.

3285

3286 The entropy is computed using this expression:

3287

3288 .. math::

3289

3290 f(x) = - \log n! - n\sum_{i=1}^k p_i \log p_i +

3291 \sum_{i=1}^k \sum_{x=0}^n \binom n x p_i^x(1-p_i)^{n-x} \log x!

3292

3293 Parameters

3294 ----------

3295 %(_doc_default_callparams)s

3296

3297 Returns

3298 -------

3299 h : scalar

3300 Entropy of the Multinomial distribution

3301

3302 Notes

3303 -----

3304 %(_doc_callparams_note)s

3305 """

3306 n, p, npcond = self._process_parameters(n, p)

3307

3308 x = np.r_[1:np.max(n)+1]

3309

3310 term1 = n*np.sum(entr(p), axis=-1)

3311 term1 -= gammaln(n+1)

3312

3313 n = n[..., np.newaxis]

3314 new_axes_needed = max(p.ndim, n.ndim) - x.ndim + 1

3315 x.shape += (1,)*new_axes_needed

3316

3317 term2 = np.sum(binom.pmf(x, n, p)*gammaln(x+1),

3318 axis=(-1, -1-new_axes_needed))

3319

3320 return self._checkresult(term1 + term2, npcond, np.nan)

3321

3322 def rvs(self, n, p, size=None, random_state=None):

3323 """Draw random samples from a Multinomial distribution.

3324

3325 Parameters

3326 ----------

3327 %(_doc_default_callparams)s

3328 size : integer or iterable of integers, optional

3329 Number of samples to draw (default 1).

3330 %(_doc_random_state)s

3331

3332 Returns

3333 -------

3334 rvs : ndarray or scalar

3335 Random variates of shape (`size`, `len(p)`)

3336

3337 Notes

3338 -----

3339 %(_doc_callparams_note)s

3340 """

3341 n, p, npcond = self._process_parameters(n, p)

3342 random_state = self._get_random_state(random_state)

3343 return random_state.multinomial(n, p, size)

3344

3345

3346multinomial = multinomial_gen()

3347

3348

3349class multinomial_frozen(multi_rv_frozen):

3350 r"""Create a frozen Multinomial distribution.

3351

3352 Parameters

3353 ----------

3354 n : int

3355 number of trials

3356 p: array_like

3357 probability of a trial falling into each category; should sum to 1

3358 seed : {None, int, `numpy.random.Generator`, `numpy.random.RandomState`}, optional

3359 If `seed` is None (or `np.random`), the `numpy.random.RandomState`

3360 singleton is used.

3361 If `seed` is an int, a new ``RandomState`` instance is used,

3362 seeded with `seed`.

3363 If `seed` is already a ``Generator`` or ``RandomState`` instance then

3364 that instance is used.

3365 """

3366 def __init__(self, n, p, seed=None):

3367 self._dist = multinomial_gen(seed)

3368 self.n, self.p, self.npcond = self._dist._process_parameters(n, p)

3369

3370 # monkey patch self._dist

3371 def _process_parameters(n, p):

3372 return self.n, self.p, self.npcond

3373

3374 self._dist._process_parameters = _process_parameters

3375

3376 def logpmf(self, x):

3377 return self._dist.logpmf(x, self.n, self.p)

3378

3379 def pmf(self, x):

3380 return self._dist.pmf(x, self.n, self.p)

3381

3382 def mean(self):

3383 return self._dist.mean(self.n, self.p)

3384

3385 def cov(self):

3386 return self._dist.cov(self.n, self.p)

3387

3388 def entropy(self):

3389 return self._dist.entropy(self.n, self.p)

3390

3391 def rvs(self, size=1, random_state=None):

3392 return self._dist.rvs(self.n, self.p, size, random_state)

3393

3394

3395# Set frozen generator docstrings from corresponding docstrings in

3396# multinomial and fill in default strings in class docstrings

3397for name in ['logpmf', 'pmf', 'mean', 'cov', 'rvs']:

3398 method = multinomial_gen.__dict__[name]

3399 method_frozen = multinomial_frozen.__dict__[name]

3400 method_frozen.__doc__ = doccer.docformat(

3401 method.__doc__, multinomial_docdict_noparams)

3402 method.__doc__ = doccer.docformat(method.__doc__,

3403 multinomial_docdict_params)

3404

3405

3406class special_ortho_group_gen(multi_rv_generic):

3407 r"""A Special Orthogonal matrix (SO(N)) random variable.

3408

3409 Return a random rotation matrix, drawn from the Haar distribution

3410 (the only uniform distribution on SO(N)) with a determinant of +1.

3411

3412 The `dim` keyword specifies the dimension N.

3413

3414 Methods

3415 -------

3416 rvs(dim=None, size=1, random_state=None)

3417 Draw random samples from SO(N).

3418

3419 Parameters

3420 ----------

3421 dim : scalar

3422 Dimension of matrices

3423 seed : {None, int, np.random.RandomState, np.random.Generator}, optional

3424 Used for drawing random variates.

3425 If `seed` is `None`, the `~np.random.RandomState` singleton is used.

3426 If `seed` is an int, a new ``RandomState`` instance is used, seeded

3427 with seed.

3428 If `seed` is already a ``RandomState`` or ``Generator`` instance,

3429 then that object is used.

3430 Default is `None`.

3431

3432 Notes

3433 -----

3434 This class is wrapping the random_rot code from the MDP Toolkit,

3435 https://github.com/mdp-toolkit/mdp-toolkit

3436

3437 Return a random rotation matrix, drawn from the Haar distribution

3438 (the only uniform distribution on SO(N)).

3439 The algorithm is described in the paper

3440 Stewart, G.W., "The efficient generation of random orthogonal

3441 matrices with an application to condition estimators", SIAM Journal

3442 on Numerical Analysis, 17(3), pp. 403-409, 1980.

3443 For more information see

3444 https://en.wikipedia.org/wiki/Orthogonal_matrix#Randomization

3445

3446 See also the similar `ortho_group`. For a random rotation in three

3447 dimensions, see `scipy.spatial.transform.Rotation.random`.

3448

3449 Examples

3450 --------

3451 >>> import numpy as np

3452 >>> from scipy.stats import special_ortho_group

3453 >>> x = special_ortho_group.rvs(3)

3454

3455 >>> np.dot(x, x.T)

3456 array([[ 1.00000000e+00, 1.13231364e-17, -2.86852790e-16],

3457 [ 1.13231364e-17, 1.00000000e+00, -1.46845020e-16],

3458 [ -2.86852790e-16, -1.46845020e-16, 1.00000000e+00]])

3459

3460 >>> import scipy.linalg

3461 >>> scipy.linalg.det(x)

3462 1.0

3463

3464 This generates one random matrix from SO(3). It is orthogonal and

3465 has a determinant of 1.

3466

3467 Alternatively, the object may be called (as a function) to fix the `dim`

3468 parameter, returning a "frozen" special_ortho_group random variable:

3469

3470 >>> rv = special_ortho_group(5)

3471 >>> # Frozen object with the same methods but holding the

3472 >>> # dimension parameter fixed.

3473

3474 See Also

3475 --------

3476 ortho_group, scipy.spatial.transform.Rotation.random

3477

3478 """

3479

3480 def __init__(self, seed=None):

3481 super().__init__(seed)

3482 self.__doc__ = doccer.docformat(self.__doc__)

3483

3484 def __call__(self, dim=None, seed=None):

3485 """Create a frozen SO(N) distribution.

3486

3487 See `special_ortho_group_frozen` for more information.

3488 """

3489 return special_ortho_group_frozen(dim, seed=seed)

3490

3491 def _process_parameters(self, dim):

3492 """Dimension N must be specified; it cannot be inferred."""

3493 if dim is None or not np.isscalar(dim) or dim <= 1 or dim != int(dim):

3494 raise ValueError("""Dimension of rotation must be specified,

3495 and must be a scalar greater than 1.""")

3496

3497 return dim

3498

3499 def rvs(self, dim, size=1, random_state=None):

3500 """Draw random samples from SO(N).

3501

3502 Parameters

3503 ----------

3504 dim : integer

3505 Dimension of rotation space (N).

3506 size : integer, optional

3507 Number of samples to draw (default 1).

3508

3509 Returns

3510 -------

3511 rvs : ndarray or scalar

3512 Random size N-dimensional matrices, dimension (size, dim, dim)

3513

3514 """

3515 random_state = self._get_random_state(random_state)

3516

3517 size = int(size)

3518 size = (size,) if size > 1 else ()

3519

3520 dim = self._process_parameters(dim)

3521

3522 # H represents a (dim, dim) matrix, while D represents the diagonal of

3523 # a (dim, dim) diagonal matrix. The algorithm that follows is

3524 # broadcasted on the leading shape in `size` to vectorize along

3525 # samples.

3526 H = np.empty(size + (dim, dim))

3527 H[..., :, :] = np.eye(dim)

3528 D = np.empty(size + (dim,))

3529

3530 for n in range(dim-1):

3531

3532 # x is a vector with length dim-n, xrow and xcol are views of it as

3533 # a row vector and column vector respectively. It's important they

3534 # are views and not copies because we are going to modify x

3535 # in-place.

3536 x = random_state.normal(size=size + (dim-n,))

3537 xrow = x[..., None, :]

3538 xcol = x[..., :, None]

3539

3540 # This is the squared norm of x, without vectorization it would be

3541 # dot(x, x), to have proper broadcasting we use matmul and squeeze

3542 # out (convert to scalar) the resulting 1x1 matrix

3543 norm2 = np.matmul(xrow, xcol).squeeze((-2, -1))

3544

3545 x0 = x[..., 0].copy()

3546 D[..., n] = np.where(x0 != 0, np.sign(x0), 1)

3547 x[..., 0] += D[..., n]*np.sqrt(norm2)

3548

3549 # In renormalizing x we have to append an additional axis with

3550 # [..., None] to broadcast the scalar against the vector x

3551 x /= np.sqrt((norm2 - x0**2 + x[..., 0]**2) / 2.)[..., None]

3552

3553 # Householder transformation, without vectorization the RHS can be

3554 # written as outer(H @ x, x) (apart from the slicing)

3555 H[..., :, n:] -= np.matmul(H[..., :, n:], xcol) * xrow

3556

3557 D[..., -1] = (-1)**(dim-1)*D[..., :-1].prod(axis=-1)

3558

3559 # Without vectorization this could be written as H = diag(D) @ H,

3560 # left-multiplication by a diagonal matrix amounts to multiplying each

3561 # row of H by an element of the diagonal, so we add a dummy axis for

3562 # the column index

3563 H *= D[..., :, None]

3564 return H

3565

3566

3567special_ortho_group = special_ortho_group_gen()

3568

3569

3570class special_ortho_group_frozen(multi_rv_frozen):

3571 def __init__(self, dim=None, seed=None):

3572 """Create a frozen SO(N) distribution.

3573

3574 Parameters

3575 ----------

3576 dim : scalar

3577 Dimension of matrices

3578 seed : {None, int, `numpy.random.Generator`, `numpy.random.RandomState`}, optional

3579 If `seed` is None (or `np.random`), the `numpy.random.RandomState`

3580 singleton is used.

3581 If `seed` is an int, a new ``RandomState`` instance is used,

3582 seeded with `seed`.

3583 If `seed` is already a ``Generator`` or ``RandomState`` instance

3584 then that instance is used.

3585

3586 Examples

3587 --------

3588 >>> from scipy.stats import special_ortho_group

3589 >>> g = special_ortho_group(5)

3590 >>> x = g.rvs()

3591

3592 """

3593 self._dist = special_ortho_group_gen(seed)

3594 self.dim = self._dist._process_parameters(dim)

3595

3596 def rvs(self, size=1, random_state=None):

3597 return self._dist.rvs(self.dim, size, random_state)

3598

3599

3600class ortho_group_gen(multi_rv_generic):

3601 r"""An Orthogonal matrix (O(N)) random variable.

3602

3603 Return a random orthogonal matrix, drawn from the O(N) Haar

3604 distribution (the only uniform distribution on O(N)).

3605

3606 The `dim` keyword specifies the dimension N.

3607

3608 Methods

3609 -------

3610 rvs(dim=None, size=1, random_state=None)

3611 Draw random samples from O(N).

3612

3613 Parameters

3614 ----------

3615 dim : scalar

3616 Dimension of matrices

3617 seed : {None, int, np.random.RandomState, np.random.Generator}, optional

3618 Used for drawing random variates.

3619 If `seed` is `None`, the `~np.random.RandomState` singleton is used.

3620 If `seed` is an int, a new ``RandomState`` instance is used, seeded

3621 with seed.

3622 If `seed` is already a ``RandomState`` or ``Generator`` instance,

3623 then that object is used.

3624 Default is `None`.

3625

3626 Notes

3627 -----

3628 This class is closely related to `special_ortho_group`.

3629

3630 Some care is taken to avoid numerical error, as per the paper by Mezzadri.

3631

3632 References

3633 ----------

3634 .. [1] F. Mezzadri, "How to generate random matrices from the classical

3635 compact groups", :arXiv:`math-ph/0609050v2`.

3636

3637 Examples

3638 --------

3639 >>> import numpy as np

3640 >>> from scipy.stats import ortho_group

3641 >>> x = ortho_group.rvs(3)

3642

3643 >>> np.dot(x, x.T)

3644 array([[ 1.00000000e+00, 1.13231364e-17, -2.86852790e-16],

3645 [ 1.13231364e-17, 1.00000000e+00, -1.46845020e-16],

3646 [ -2.86852790e-16, -1.46845020e-16, 1.00000000e+00]])

3647

3648 >>> import scipy.linalg

3649 >>> np.fabs(scipy.linalg.det(x))

3650 1.0

3651

3652 This generates one random matrix from O(3). It is orthogonal and

3653 has a determinant of +1 or -1.

3654

3655 Alternatively, the object may be called (as a function) to fix the `dim`

3656 parameter, returning a "frozen" ortho_group random variable:

3657

3658 >>> rv = ortho_group(5)

3659 >>> # Frozen object with the same methods but holding the

3660 >>> # dimension parameter fixed.

3661

3662 See Also

3663 --------

3664 special_ortho_group

3665 """

3666

3667 def __init__(self, seed=None):

3668 super().__init__(seed)

3669 self.__doc__ = doccer.docformat(self.__doc__)

3670

3671 def __call__(self, dim=None, seed=None):

3672 """Create a frozen O(N) distribution.

3673

3674 See `ortho_group_frozen` for more information.

3675 """

3676 return ortho_group_frozen(dim, seed=seed)

3677

3678 def _process_parameters(self, dim):

3679 """Dimension N must be specified; it cannot be inferred."""

3680 if dim is None or not np.isscalar(dim) or dim <= 1 or dim != int(dim):

3681 raise ValueError("Dimension of rotation must be specified,"

3682 "and must be a scalar greater than 1.")

3683

3684 return dim

3685

3686 def rvs(self, dim, size=1, random_state=None):

3687 """Draw random samples from O(N).

3688

3689 Parameters

3690 ----------

3691 dim : integer

3692 Dimension of rotation space (N).

3693 size : integer, optional

3694 Number of samples to draw (default 1).

3695

3696 Returns

3697 -------

3698 rvs : ndarray or scalar

3699 Random size N-dimensional matrices, dimension (size, dim, dim)

3700

3701 """

3702 random_state = self._get_random_state(random_state)

3703

3704 size = int(size)

3705 if size > 1 and NumpyVersion(np.__version__) < '1.22.0':

3706 return np.array([self.rvs(dim, size=1, random_state=random_state)

3707 for i in range(size)])

3708

3709 dim = self._process_parameters(dim)

3710

3711 size = (size,) if size > 1 else ()

3712 z = random_state.normal(size=size + (dim, dim))

3713 q, r = np.linalg.qr(z)

3714 # The last two dimensions are the rows and columns of R matrices.

3715 # Extract the diagonals. Note that this eliminates a dimension.

3716 d = r.diagonal(offset=0, axis1=-2, axis2=-1)

3717 # Add back a dimension for proper broadcasting: we're dividing

3718 # each row of each R matrix by the diagonal of the R matrix.

3719 q *= (d/abs(d))[..., np.newaxis, :] # to broadcast properly

3720 return q

3721

3722

3723ortho_group = ortho_group_gen()

3724

3725

3726class ortho_group_frozen(multi_rv_frozen):

3727 def __init__(self, dim=None, seed=None):

3728 """Create a frozen O(N) distribution.

3729

3730 Parameters

3731 ----------

3732 dim : scalar

3733 Dimension of matrices

3734 seed : {None, int, `numpy.random.Generator`, `numpy.random.RandomState`}, optional

3735 If `seed` is None (or `np.random`), the `numpy.random.RandomState`

3736 singleton is used.

3737 If `seed` is an int, a new ``RandomState`` instance is used,

3738 seeded with `seed`.

3739 If `seed` is already a ``Generator`` or ``RandomState`` instance

3740 then that instance is used.

3741

3742 Examples

3743 --------

3744 >>> from scipy.stats import ortho_group

3745 >>> g = ortho_group(5)

3746 >>> x = g.rvs()

3747

3748 """

3749 self._dist = ortho_group_gen(seed)

3750 self.dim = self._dist._process_parameters(dim)

3751

3752 def rvs(self, size=1, random_state=None):

3753 return self._dist.rvs(self.dim, size, random_state)

3754

3755

3756class random_correlation_gen(multi_rv_generic):

3757 r"""A random correlation matrix.

3758

3759 Return a random correlation matrix, given a vector of eigenvalues.

3760

3761 The `eigs` keyword specifies the eigenvalues of the correlation matrix,

3762 and implies the dimension.

3763

3764 Methods

3765 -------

3766 rvs(eigs=None, random_state=None)

3767 Draw random correlation matrices, all with eigenvalues eigs.

3768

3769 Parameters

3770 ----------

3771 eigs : 1d ndarray

3772 Eigenvalues of correlation matrix

3773 seed : {None, int, `numpy.random.Generator`, `numpy.random.RandomState`}, optional

3774 If `seed` is None (or `np.random`), the `numpy.random.RandomState`

3775 singleton is used.

3776 If `seed` is an int, a new ``RandomState`` instance is used,

3777 seeded with `seed`.

3778 If `seed` is already a ``Generator`` or ``RandomState`` instance

3779 then that instance is used.

3780 tol : float, optional

3781 Tolerance for input parameter checks

3782 diag_tol : float, optional

3783 Tolerance for deviation of the diagonal of the resulting

3784 matrix. Default: 1e-7

3785

3786 Raises

3787 ------

3788 RuntimeError

3789 Floating point error prevented generating a valid correlation

3790 matrix.

3791

3792 Returns

3793 -------

3794 rvs : ndarray or scalar

3795 Random size N-dimensional matrices, dimension (size, dim, dim),

3796 each having eigenvalues eigs.

3797

3798 Notes

3799 -----

3800

3801 Generates a random correlation matrix following a numerically stable

3802 algorithm spelled out by Davies & Higham. This algorithm uses a single O(N)

3803 similarity transformation to construct a symmetric positive semi-definite

3804 matrix, and applies a series of Givens rotations to scale it to have ones

3805 on the diagonal.

3806

3807 References

3808 ----------

3809

3810 .. [1] Davies, Philip I; Higham, Nicholas J; "Numerically stable generation

3811 of correlation matrices and their factors", BIT 2000, Vol. 40,

3812 No. 4, pp. 640 651

3813

3814 Examples

3815 --------

3816 >>> import numpy as np

3817 >>> from scipy.stats import random_correlation

3818 >>> rng = np.random.default_rng()

3819 >>> x = random_correlation.rvs((.5, .8, 1.2, 1.5), random_state=rng)

3820 >>> x

3821 array([[ 1. , -0.02423399, 0.03130519, 0.4946965 ],

3822 [-0.02423399, 1. , 0.20334736, 0.04039817],

3823 [ 0.03130519, 0.20334736, 1. , 0.02694275],

3824 [ 0.4946965 , 0.04039817, 0.02694275, 1. ]])

3825 >>> import scipy.linalg

3826 >>> e, v = scipy.linalg.eigh(x)

3827 >>> e

3828 array([ 0.5, 0.8, 1.2, 1.5])

3829

3830 """

3831

3832 def __init__(self, seed=None):

3833 super().__init__(seed)

3834 self.__doc__ = doccer.docformat(self.__doc__)

3835

3836 def __call__(self, eigs, seed=None, tol=1e-13, diag_tol=1e-7):

3837 """Create a frozen random correlation matrix.

3838

3839 See `random_correlation_frozen` for more information.

3840 """

3841 return random_correlation_frozen(eigs, seed=seed, tol=tol,

3842 diag_tol=diag_tol)

3843

3844 def _process_parameters(self, eigs, tol):

3845 eigs = np.asarray(eigs, dtype=float)

3846 dim = eigs.size

3847

3848 if eigs.ndim != 1 or eigs.shape[0] != dim or dim <= 1:

3849 raise ValueError("Array 'eigs' must be a vector of length "

3850 "greater than 1.")

3851

3852 if np.fabs(np.sum(eigs) - dim) > tol:

3853 raise ValueError("Sum of eigenvalues must equal dimensionality.")

3854

3855 for x in eigs:

3856 if x < -tol:

3857 raise ValueError("All eigenvalues must be non-negative.")

3858

3859 return dim, eigs

3860

3861 def _givens_to_1(self, aii, ajj, aij):

3862 """Computes a 2x2 Givens matrix to put 1's on the diagonal.

3863

3864 The input matrix is a 2x2 symmetric matrix M = [ aii aij ; aij ajj ].

3865

3866 The output matrix g is a 2x2 anti-symmetric matrix of the form

3867 [ c s ; -s c ]; the elements c and s are returned.

3868

3869 Applying the output matrix to the input matrix (as b=g.T M g)

3870 results in a matrix with bii=1, provided tr(M) - det(M) >= 1

3871 and floating point issues do not occur. Otherwise, some other

3872 valid rotation is returned. When tr(M)==2, also bjj=1.

3873

3874 """

3875 aiid = aii - 1.

3876 ajjd = ajj - 1.

3877

3878 if ajjd == 0:

3879 # ajj==1, so swap aii and ajj to avoid division by zero

3880 return 0., 1.

3881

3882 dd = math.sqrt(max(aij**2 - aiid*ajjd, 0))

3883

3884 # The choice of t should be chosen to avoid cancellation [1]

3885 t = (aij + math.copysign(dd, aij)) / ajjd

3886 c = 1. / math.sqrt(1. + t*t)

3887 if c == 0:

3888 # Underflow

3889 s = 1.0

3890 else:

3891 s = c*t

3892 return c, s

3893

3894 def _to_corr(self, m):

3895 """

3896 Given a psd matrix m, rotate to put one's on the diagonal, turning it

3897 into a correlation matrix. This also requires the trace equal the

3898 dimensionality. Note: modifies input matrix

3899 """

3900 # Check requirements for in-place Givens

3901 if not (m.flags.c_contiguous and m.dtype == np.float64 and

3902 m.shape[0] == m.shape[1]):

3903 raise ValueError()

3904

3905 d = m.shape[0]

3906 for i in range(d-1):

3907 if m[i, i] == 1:

3908 continue

3909 elif m[i, i] > 1:

3910 for j in range(i+1, d):

3911 if m[j, j] < 1:

3912 break

3913 else:

3914 for j in range(i+1, d):

3915 if m[j, j] > 1:

3916 break

3917

3918 c, s = self._givens_to_1(m[i, i], m[j, j], m[i, j])

3919

3920 # Use BLAS to apply Givens rotations in-place. Equivalent to:

3921 # g = np.eye(d)

3922 # g[i, i] = g[j,j] = c

3923 # g[j, i] = -s; g[i, j] = s

3924 # m = np.dot(g.T, np.dot(m, g))

3925 mv = m.ravel()

3926 drot(mv, mv, c, -s, n=d,

3927 offx=i*d, incx=1, offy=j*d, incy=1,

3928 overwrite_x=True, overwrite_y=True)

3929 drot(mv, mv, c, -s, n=d,

3930 offx=i, incx=d, offy=j, incy=d,

3931 overwrite_x=True, overwrite_y=True)

3932

3933 return m

3934

3935 def rvs(self, eigs, random_state=None, tol=1e-13, diag_tol=1e-7):

3936 """Draw random correlation matrices.

3937

3938 Parameters

3939 ----------

3940 eigs : 1d ndarray

3941 Eigenvalues of correlation matrix

3942 tol : float, optional

3943 Tolerance for input parameter checks

3944 diag_tol : float, optional

3945 Tolerance for deviation of the diagonal of the resulting

3946 matrix. Default: 1e-7

3947

3948 Raises

3949 ------

3950 RuntimeError

3951 Floating point error prevented generating a valid correlation

3952 matrix.

3953

3954 Returns

3955 -------

3956 rvs : ndarray or scalar

3957 Random size N-dimensional matrices, dimension (size, dim, dim),

3958 each having eigenvalues eigs.

3959

3960 """

3961 dim, eigs = self._process_parameters(eigs, tol=tol)

3962

3963 random_state = self._get_random_state(random_state)

3964

3965 m = ortho_group.rvs(dim, random_state=random_state)

3966 m = np.dot(np.dot(m, np.diag(eigs)), m.T) # Set the trace of m

3967 m = self._to_corr(m) # Carefully rotate to unit diagonal

3968

3969 # Check diagonal

3970 if abs(m.diagonal() - 1).max() > diag_tol:

3971 raise RuntimeError("Failed to generate a valid correlation matrix")

3972

3973 return m

3974

3975

3976random_correlation = random_correlation_gen()

3977

3978

3979class random_correlation_frozen(multi_rv_frozen):

3980 def __init__(self, eigs, seed=None, tol=1e-13, diag_tol=1e-7):

3981 """Create a frozen random correlation matrix distribution.

3982

3983 Parameters

3984 ----------

3985 eigs : 1d ndarray

3986 Eigenvalues of correlation matrix

3987 seed : {None, int, `numpy.random.Generator`, `numpy.random.RandomState`}, optional

3988 If `seed` is None (or `np.random`), the `numpy.random.RandomState`

3989 singleton is used.

3990 If `seed` is an int, a new ``RandomState`` instance is used,

3991 seeded with `seed`.

3992 If `seed` is already a ``Generator`` or ``RandomState`` instance

3993 then that instance is used.

3994 tol : float, optional

3995 Tolerance for input parameter checks

3996 diag_tol : float, optional

3997 Tolerance for deviation of the diagonal of the resulting

3998 matrix. Default: 1e-7

3999

4000 Raises

4001 ------

4002 RuntimeError

4003 Floating point error prevented generating a valid correlation

4004 matrix.

4005

4006 Returns

4007 -------

4008 rvs : ndarray or scalar

4009 Random size N-dimensional matrices, dimension (size, dim, dim),

4010 each having eigenvalues eigs.

4011 """

4012

4013 self._dist = random_correlation_gen(seed)

4014 self.tol = tol

4015 self.diag_tol = diag_tol

4016 _, self.eigs = self._dist._process_parameters(eigs, tol=self.tol)

4017

4018 def rvs(self, random_state=None):

4019 return self._dist.rvs(self.eigs, random_state=random_state,

4020 tol=self.tol, diag_tol=self.diag_tol)

4021

4022

4023class unitary_group_gen(multi_rv_generic):

4024 r"""A matrix-valued U(N) random variable.

4025

4026 Return a random unitary matrix.

4027

4028 The `dim` keyword specifies the dimension N.

4029

4030 Methods

4031 -------

4032 rvs(dim=None, size=1, random_state=None)

4033 Draw random samples from U(N).

4034

4035 Parameters

4036 ----------

4037 dim : scalar

4038 Dimension of matrices

4039 seed : {None, int, np.random.RandomState, np.random.Generator}, optional

4040 Used for drawing random variates.

4041 If `seed` is `None`, the `~np.random.RandomState` singleton is used.

4042 If `seed` is an int, a new ``RandomState`` instance is used, seeded

4043 with seed.

4044 If `seed` is already a ``RandomState`` or ``Generator`` instance,

4045 then that object is used.

4046 Default is `None`.

4047

4048 Notes

4049 -----

4050 This class is similar to `ortho_group`.

4051

4052 References

4053 ----------

4054 .. [1] F. Mezzadri, "How to generate random matrices from the classical

4055 compact groups", :arXiv:`math-ph/0609050v2`.

4056

4057 Examples

4058 --------

4059 >>> import numpy as np

4060 >>> from scipy.stats import unitary_group

4061 >>> x = unitary_group.rvs(3)

4062

4063 >>> np.dot(x, x.conj().T)

4064 array([[ 1.00000000e+00, 1.13231364e-17, -2.86852790e-16],

4065 [ 1.13231364e-17, 1.00000000e+00, -1.46845020e-16],

4066 [ -2.86852790e-16, -1.46845020e-16, 1.00000000e+00]])

4067

4068 This generates one random matrix from U(3). The dot product confirms that

4069 it is unitary up to machine precision.

4070

4071 Alternatively, the object may be called (as a function) to fix the `dim`

4072 parameter, return a "frozen" unitary_group random variable:

4073

4074 >>> rv = unitary_group(5)

4075

4076 See Also

4077 --------

4078 ortho_group

4079

4080 """

4081

4082 def __init__(self, seed=None):

4083 super().__init__(seed)

4084 self.__doc__ = doccer.docformat(self.__doc__)

4085

4086 def __call__(self, dim=None, seed=None):

4087 """Create a frozen (U(N)) n-dimensional unitary matrix distribution.

4088

4089 See `unitary_group_frozen` for more information.

4090 """

4091 return unitary_group_frozen(dim, seed=seed)

4092

4093 def _process_parameters(self, dim):

4094 """Dimension N must be specified; it cannot be inferred."""

4095 if dim is None or not np.isscalar(dim) or dim <= 1 or dim != int(dim):

4096 raise ValueError("Dimension of rotation must be specified,"

4097 "and must be a scalar greater than 1.")

4098

4099 return dim

4100

4101 def rvs(self, dim, size=1, random_state=None):

4102 """Draw random samples from U(N).

4103

4104 Parameters

4105 ----------

4106 dim : integer

4107 Dimension of space (N).

4108 size : integer, optional

4109 Number of samples to draw (default 1).

4110

4111 Returns

4112 -------

4113 rvs : ndarray or scalar

4114 Random size N-dimensional matrices, dimension (size, dim, dim)

4115

4116 """

4117 random_state = self._get_random_state(random_state)

4118

4119 size = int(size)

4120 if size > 1 and NumpyVersion(np.__version__) < '1.22.0':

4121 return np.array([self.rvs(dim, size=1, random_state=random_state)

4122 for i in range(size)])

4123

4124 dim = self._process_parameters(dim)

4125

4126 size = (size,) if size > 1 else ()

4127 z = 1/math.sqrt(2)*(random_state.normal(size=size + (dim, dim)) +

4128 1j*random_state.normal(size=size + (dim, dim)))

4129 q, r = np.linalg.qr(z)

4130 # The last two dimensions are the rows and columns of R matrices.

4131 # Extract the diagonals. Note that this eliminates a dimension.

4132 d = r.diagonal(offset=0, axis1=-2, axis2=-1)

4133 # Add back a dimension for proper broadcasting: we're dividing

4134 # each row of each R matrix by the diagonal of the R matrix.

4135 q *= (d/abs(d))[..., np.newaxis, :] # to broadcast properly

4136 return q

4137

4138

4139unitary_group = unitary_group_gen()

4140

4141

4142class unitary_group_frozen(multi_rv_frozen):

4143 def __init__(self, dim=None, seed=None):

4144 """Create a frozen (U(N)) n-dimensional unitary matrix distribution.

4145

4146 Parameters

4147 ----------

4148 dim : scalar

4149 Dimension of matrices

4150 seed : {None, int, `numpy.random.Generator`, `numpy.random.RandomState`}, optional

4151 If `seed` is None (or `np.random`), the `numpy.random.RandomState`

4152 singleton is used.

4153 If `seed` is an int, a new ``RandomState`` instance is used,

4154 seeded with `seed`.

4155 If `seed` is already a ``Generator`` or ``RandomState`` instance

4156 then that instance is used.

4157

4158 Examples

4159 --------

4160 >>> from scipy.stats import unitary_group

4161 >>> x = unitary_group(3)

4162 >>> x.rvs()

4163

4164 """

4165 self._dist = unitary_group_gen(seed)

4166 self.dim = self._dist._process_parameters(dim)

4167

4168 def rvs(self, size=1, random_state=None):

4169 return self._dist.rvs(self.dim, size, random_state)

4170

4171

4172_mvt_doc_default_callparams = """\

4173loc : array_like, optional

4174 Location of the distribution. (default ``0``)

4175shape : array_like, optional

4176 Positive semidefinite matrix of the distribution. (default ``1``)

4177df : float, optional

4178 Degrees of freedom of the distribution; must be greater than zero.

4179 If ``np.inf`` then results are multivariate normal. The default is ``1``.

4180allow_singular : bool, optional

4181 Whether to allow a singular matrix. (default ``False``)

4182"""

4183

4184_mvt_doc_callparams_note = """\

4185Setting the parameter `loc` to ``None`` is equivalent to having `loc`

4186be the zero-vector. The parameter `shape` can be a scalar, in which case

4187the shape matrix is the identity times that value, a vector of

4188diagonal entries for the shape matrix, or a two-dimensional array_like.

4189"""

4190

4191_mvt_doc_frozen_callparams_note = """\

4192See class definition for a detailed description of parameters."""

4193

4194mvt_docdict_params = {

4195 '_mvt_doc_default_callparams': _mvt_doc_default_callparams,

4196 '_mvt_doc_callparams_note': _mvt_doc_callparams_note,

4197 '_doc_random_state': _doc_random_state

4198}

4199

4200mvt_docdict_noparams = {

4201 '_mvt_doc_default_callparams': "",

4202 '_mvt_doc_callparams_note': _mvt_doc_frozen_callparams_note,

4203 '_doc_random_state': _doc_random_state

4204}

4205

4206

4207class multivariate_t_gen(multi_rv_generic):

4208 r"""A multivariate t-distributed random variable.

4209

4210 The `loc` parameter specifies the location. The `shape` parameter specifies

4211 the positive semidefinite shape matrix. The `df` parameter specifies the

4212 degrees of freedom.

4213

4214 In addition to calling the methods below, the object itself may be called

4215 as a function to fix the location, shape matrix, and degrees of freedom

4216 parameters, returning a "frozen" multivariate t-distribution random.

4217

4218 Methods

4219 -------

4220 pdf(x, loc=None, shape=1, df=1, allow_singular=False)

4221 Probability density function.

4222 logpdf(x, loc=None, shape=1, df=1, allow_singular=False)

4223 Log of the probability density function.

4224 rvs(loc=None, shape=1, df=1, size=1, random_state=None)

4225 Draw random samples from a multivariate t-distribution.

4226

4227 Parameters

4228 ----------

4229 %(_mvt_doc_default_callparams)s

4230 %(_doc_random_state)s

4231

4232 Notes

4233 -----

4234 %(_mvt_doc_callparams_note)s

4235 The matrix `shape` must be a (symmetric) positive semidefinite matrix. The

4236 determinant and inverse of `shape` are computed as the pseudo-determinant

4237 and pseudo-inverse, respectively, so that `shape` does not need to have

4238 full rank.

4239

4240 The probability density function for `multivariate_t` is

4241

4242 .. math::

4243

4244 f(x) = \frac{\Gamma(\nu + p)/2}{\Gamma(\nu/2)\nu^{p/2}\pi^{p/2}|\Sigma|^{1/2}}

4245 \left[1 + \frac{1}{\nu} (\mathbf{x} - \boldsymbol{\mu})^{\top}

4246 \boldsymbol{\Sigma}^{-1}

4247 (\mathbf{x} - \boldsymbol{\mu}) \right]^{-(\nu + p)/2},

4248

4249 where :math:`p` is the dimension of :math:`\mathbf{x}`,

4250 :math:`\boldsymbol{\mu}` is the :math:`p`-dimensional location,

4251 :math:`\boldsymbol{\Sigma}` the :math:`p \times p`-dimensional shape

4252 matrix, and :math:`\nu` is the degrees of freedom.

4253

4254 .. versionadded:: 1.6.0

4255

4256 Examples

4257 --------

4258 The object may be called (as a function) to fix the `loc`, `shape`,

4259 `df`, and `allow_singular` parameters, returning a "frozen"

4260 multivariate_t random variable:

4261

4262 >>> import numpy as np

4263 >>> from scipy.stats import multivariate_t

4264 >>> rv = multivariate_t([1.0, -0.5], [[2.1, 0.3], [0.3, 1.5]], df=2)

4265 >>> # Frozen object with the same methods but holding the given location,

4266 >>> # scale, and degrees of freedom fixed.

4267

4268 Create a contour plot of the PDF.

4269

4270 >>> import matplotlib.pyplot as plt

4271 >>> x, y = np.mgrid[-1:3:.01, -2:1.5:.01]

4272 >>> pos = np.dstack((x, y))

4273 >>> fig, ax = plt.subplots(1, 1)

4274 >>> ax.set_aspect('equal')

4275 >>> plt.contourf(x, y, rv.pdf(pos))

4276

4277 """

4278

4279 def __init__(self, seed=None):

4280 """Initialize a multivariate t-distributed random variable.

4281

4282 Parameters

4283 ----------

4284 seed : Random state.

4285

4286 """

4287 super().__init__(seed)

4288 self.__doc__ = doccer.docformat(self.__doc__, mvt_docdict_params)

4289 self._random_state = check_random_state(seed)

4290

4291 def __call__(self, loc=None, shape=1, df=1, allow_singular=False,

4292 seed=None):

4293 """Create a frozen multivariate t-distribution.

4294

4295 See `multivariate_t_frozen` for parameters.

4296 """

4297 if df == np.inf:

4298 return multivariate_normal_frozen(mean=loc, cov=shape,

4299 allow_singular=allow_singular,

4300 seed=seed)

4301 return multivariate_t_frozen(loc=loc, shape=shape, df=df,

4302 allow_singular=allow_singular, seed=seed)

4303

4304 def pdf(self, x, loc=None, shape=1, df=1, allow_singular=False):

4305 """Multivariate t-distribution probability density function.

4306

4307 Parameters

4308 ----------

4309 x : array_like

4310 Points at which to evaluate the probability density function.

4311 %(_mvt_doc_default_callparams)s

4312

4313 Returns

4314 -------

4315 pdf : Probability density function evaluated at `x`.

4316

4317 Examples

4318 --------

4319 >>> from scipy.stats import multivariate_t

4320 >>> x = [0.4, 5]

4321 >>> loc = [0, 1]

4322 >>> shape = [[1, 0.1], [0.1, 1]]

4323 >>> df = 7

4324 >>> multivariate_t.pdf(x, loc, shape, df)

4325 array([0.00075713])

4326

4327 """

4328 dim, loc, shape, df = self._process_parameters(loc, shape, df)

4329 x = self._process_quantiles(x, dim)

4330 shape_info = _PSD(shape, allow_singular=allow_singular)

4331 logpdf = self._logpdf(x, loc, shape_info.U, shape_info.log_pdet, df,

4332 dim, shape_info.rank)

4333 return np.exp(logpdf)

4334

4335 def logpdf(self, x, loc=None, shape=1, df=1):

4336 """Log of the multivariate t-distribution probability density function.

4337

4338 Parameters

4339 ----------

4340 x : array_like

4341 Points at which to evaluate the log of the probability density

4342 function.

4343 %(_mvt_doc_default_callparams)s

4344

4345 Returns

4346 -------

4347 logpdf : Log of the probability density function evaluated at `x`.

4348

4349 Examples

4350 --------

4351 >>> from scipy.stats import multivariate_t

4352 >>> x = [0.4, 5]

4353 >>> loc = [0, 1]

4354 >>> shape = [[1, 0.1], [0.1, 1]]

4355 >>> df = 7

4356 >>> multivariate_t.logpdf(x, loc, shape, df)

4357 array([-7.1859802])

4358

4359 See Also

4360 --------

4361 pdf : Probability density function.

4362

4363 """

4364 dim, loc, shape, df = self._process_parameters(loc, shape, df)

4365 x = self._process_quantiles(x, dim)

4366 shape_info = _PSD(shape)

4367 return self._logpdf(x, loc, shape_info.U, shape_info.log_pdet, df, dim,

4368 shape_info.rank)

4369

4370 def _logpdf(self, x, loc, prec_U, log_pdet, df, dim, rank):

4371 """Utility method `pdf`, `logpdf` for parameters.

4372

4373 Parameters

4374 ----------

4375 x : ndarray

4376 Points at which to evaluate the log of the probability density

4377 function.

4378 loc : ndarray

4379 Location of the distribution.

4380 prec_U : ndarray

4381 A decomposition such that `np.dot(prec_U, prec_U.T)` is the inverse

4382 of the shape matrix.

4383 log_pdet : float

4384 Logarithm of the determinant of the shape matrix.

4385 df : float

4386 Degrees of freedom of the distribution.

4387 dim : int

4388 Dimension of the quantiles x.

4389 rank : int

4390 Rank of the shape matrix.

4391

4392 Notes

4393 -----

4394 As this function does no argument checking, it should not be called

4395 directly; use 'logpdf' instead.

4396

4397 """

4398 if df == np.inf:

4399 return multivariate_normal._logpdf(x, loc, prec_U, log_pdet, rank)

4400

4401 dev = x - loc

4402 maha = np.square(np.dot(dev, prec_U)).sum(axis=-1)

4403

4404 t = 0.5 * (df + dim)

4405 A = gammaln(t)

4406 B = gammaln(0.5 * df)

4407 C = dim/2. * np.log(df * np.pi)

4408 D = 0.5 * log_pdet

4409 E = -t * np.log(1 + (1./df) * maha)

4410

4411 return _squeeze_output(A - B - C - D + E)

4412

4413 def rvs(self, loc=None, shape=1, df=1, size=1, random_state=None):

4414 """Draw random samples from a multivariate t-distribution.

4415

4416 Parameters

4417 ----------

4418 %(_mvt_doc_default_callparams)s

4419 size : integer, optional

4420 Number of samples to draw (default 1).

4421 %(_doc_random_state)s

4422

4423 Returns

4424 -------

4425 rvs : ndarray or scalar

4426 Random variates of size (`size`, `P`), where `P` is the

4427 dimension of the random variable.

4428

4429 Examples

4430 --------

4431 >>> from scipy.stats import multivariate_t

4432 >>> x = [0.4, 5]

4433 >>> loc = [0, 1]

4434 >>> shape = [[1, 0.1], [0.1, 1]]

4435 >>> df = 7

4436 >>> multivariate_t.rvs(loc, shape, df)

4437 array([[0.93477495, 3.00408716]])

4438

4439 """

4440 # For implementation details, see equation (3):

4441 #

4442 # Hofert, "On Sampling from the Multivariatet Distribution", 2013

4443 # http://rjournal.github.io/archive/2013-2/hofert.pdf

4444 #

4445 dim, loc, shape, df = self._process_parameters(loc, shape, df)

4446 if random_state is not None:

4447 rng = check_random_state(random_state)

4448 else:

4449 rng = self._random_state

4450

4451 if np.isinf(df):

4452 x = np.ones(size)

4453 else:

4454 x = rng.chisquare(df, size=size) / df

4455

4456 z = rng.multivariate_normal(np.zeros(dim), shape, size=size)

4457 samples = loc + z / np.sqrt(x)[..., None]

4458 return _squeeze_output(samples)

4459

4460 def _process_quantiles(self, x, dim):

4461 """

4462 Adjust quantiles array so that last axis labels the components of

4463 each data point.

4464 """

4465 x = np.asarray(x, dtype=float)

4466 if x.ndim == 0:

4467 x = x[np.newaxis]

4468 elif x.ndim == 1:

4469 if dim == 1:

4470 x = x[:, np.newaxis]

4471 else:

4472 x = x[np.newaxis, :]

4473 return x

4474

4475 def _process_parameters(self, loc, shape, df):

4476 """

4477 Infer dimensionality from location array and shape matrix, handle

4478 defaults, and ensure compatible dimensions.

4479 """

4480 if loc is None and shape is None:

4481 loc = np.asarray(0, dtype=float)

4482 shape = np.asarray(1, dtype=float)

4483 dim = 1

4484 elif loc is None:

4485 shape = np.asarray(shape, dtype=float)

4486 if shape.ndim < 2:

4487 dim = 1

4488 else:

4489 dim = shape.shape[0]

4490 loc = np.zeros(dim)

4491 elif shape is None:

4492 loc = np.asarray(loc, dtype=float)

4493 dim = loc.size

4494 shape = np.eye(dim)

4495 else:

4496 shape = np.asarray(shape, dtype=float)

4497 loc = np.asarray(loc, dtype=float)

4498 dim = loc.size

4499

4500 if dim == 1:

4501 loc = loc.reshape(1)

4502 shape = shape.reshape(1, 1)

4503

4504 if loc.ndim != 1 or loc.shape[0] != dim:

4505 raise ValueError("Array 'loc' must be a vector of length %d." %

4506 dim)

4507 if shape.ndim == 0:

4508 shape = shape * np.eye(dim)

4509 elif shape.ndim == 1:

4510 shape = np.diag(shape)

4511 elif shape.ndim == 2 and shape.shape != (dim, dim):

4512 rows, cols = shape.shape

4513 if rows != cols:

4514 msg = ("Array 'cov' must be square if it is two dimensional,"

4515 " but cov.shape = %s." % str(shape.shape))

4516 else:

4517 msg = ("Dimension mismatch: array 'cov' is of shape %s,"

4518 " but 'loc' is a vector of length %d.")

4519 msg = msg % (str(shape.shape), len(loc))

4520 raise ValueError(msg)

4521 elif shape.ndim > 2:

4522 raise ValueError("Array 'cov' must be at most two-dimensional,"

4523 " but cov.ndim = %d" % shape.ndim)

4524

4525 # Process degrees of freedom.

4526 if df is None:

4527 df = 1

4528 elif df <= 0:

4529 raise ValueError("'df' must be greater than zero.")

4530 elif np.isnan(df):

4531 raise ValueError("'df' is 'nan' but must be greater than zero or 'np.inf'.")

4532

4533 return dim, loc, shape, df

4534

4535

4536class multivariate_t_frozen(multi_rv_frozen):

4537

4538 def __init__(self, loc=None, shape=1, df=1, allow_singular=False,

4539 seed=None):

4540 """Create a frozen multivariate t distribution.

4541

4542 Parameters

4543 ----------

4544 %(_mvt_doc_default_callparams)s

4545

4546 Examples

4547 --------

4548 >>> import numpy as np

4549 >>> loc = np.zeros(3)

4550 >>> shape = np.eye(3)

4551 >>> df = 10

4552 >>> dist = multivariate_t(loc, shape, df)

4553 >>> dist.rvs()

4554 array([[ 0.81412036, -1.53612361, 0.42199647]])

4555 >>> dist.pdf([1, 1, 1])

4556 array([0.01237803])

4557

4558 """

4559 self._dist = multivariate_t_gen(seed)

4560 dim, loc, shape, df = self._dist._process_parameters(loc, shape, df)

4561 self.dim, self.loc, self.shape, self.df = dim, loc, shape, df

4562 self.shape_info = _PSD(shape, allow_singular=allow_singular)

4563

4564 def logpdf(self, x):

4565 x = self._dist._process_quantiles(x, self.dim)

4566 U = self.shape_info.U

4567 log_pdet = self.shape_info.log_pdet

4568 return self._dist._logpdf(x, self.loc, U, log_pdet, self.df, self.dim,

4569 self.shape_info.rank)

4570

4571 def pdf(self, x):

4572 return np.exp(self.logpdf(x))

4573

4574 def rvs(self, size=1, random_state=None):

4575 return self._dist.rvs(loc=self.loc,

4576 shape=self.shape,

4577 df=self.df,

4578 size=size,

4579 random_state=random_state)

4580

4581

4582multivariate_t = multivariate_t_gen()

4583

4584

4585# Set frozen generator docstrings from corresponding docstrings in

4586# matrix_normal_gen and fill in default strings in class docstrings

4587for name in ['logpdf', 'pdf', 'rvs']:

4588 method = multivariate_t_gen.__dict__[name]

4589 method_frozen = multivariate_t_frozen.__dict__[name]

4590 method_frozen.__doc__ = doccer.docformat(method.__doc__,

4591 mvt_docdict_noparams)

4592 method.__doc__ = doccer.docformat(method.__doc__, mvt_docdict_params)

4593

4594

4595_mhg_doc_default_callparams = """\

4596m : array_like

4597 The number of each type of object in the population.

4598 That is, :math:`m[i]` is the number of objects of

4599 type :math:`i`.

4600n : array_like

4601 The number of samples taken from the population.

4602"""

4603

4604_mhg_doc_callparams_note = """\

4605`m` must be an array of positive integers. If the quantile

4606:math:`i` contains values out of the range :math:`[0, m_i]`

4607where :math:`m_i` is the number of objects of type :math:`i`

4608in the population or if the parameters are inconsistent with one

4609another (e.g. ``x.sum() != n``), methods return the appropriate

4610value (e.g. ``0`` for ``pmf``). If `m` or `n` contain negative

4611values, the result will contain ``nan`` there.

4612"""

4613

4614_mhg_doc_frozen_callparams = ""

4615

4616_mhg_doc_frozen_callparams_note = """\

4617See class definition for a detailed description of parameters."""

4618

4619mhg_docdict_params = {

4620 '_doc_default_callparams': _mhg_doc_default_callparams,

4621 '_doc_callparams_note': _mhg_doc_callparams_note,

4622 '_doc_random_state': _doc_random_state

4623}

4624

4625mhg_docdict_noparams = {

4626 '_doc_default_callparams': _mhg_doc_frozen_callparams,

4627 '_doc_callparams_note': _mhg_doc_frozen_callparams_note,

4628 '_doc_random_state': _doc_random_state

4629}

4630

4631

4632class multivariate_hypergeom_gen(multi_rv_generic):

4633 r"""A multivariate hypergeometric random variable.

4634

4635 Methods

4636 -------

4637 pmf(x, m, n)

4638 Probability mass function.

4639 logpmf(x, m, n)

4640 Log of the probability mass function.

4641 rvs(m, n, size=1, random_state=None)

4642 Draw random samples from a multivariate hypergeometric

4643 distribution.

4644 mean(m, n)

4645 Mean of the multivariate hypergeometric distribution.

4646 var(m, n)

4647 Variance of the multivariate hypergeometric distribution.

4648 cov(m, n)

4649 Compute the covariance matrix of the multivariate

4650 hypergeometric distribution.

4651

4652 Parameters

4653 ----------

4654 %(_doc_default_callparams)s

4655 %(_doc_random_state)s

4656

4657 Notes

4658 -----

4659 %(_doc_callparams_note)s

4660

4661 The probability mass function for `multivariate_hypergeom` is

4662

4663 .. math::

4664

4665 P(X_1 = x_1, X_2 = x_2, \ldots, X_k = x_k) = \frac{\binom{m_1}{x_1}

4666 \binom{m_2}{x_2} \cdots \binom{m_k}{x_k}}{\binom{M}{n}}, \\ \quad

4667 (x_1, x_2, \ldots, x_k) \in \mathbb{N}^k \text{ with }

4668 \sum_{i=1}^k x_i = n

4669

4670 where :math:`m_i` are the number of objects of type :math:`i`, :math:`M`

4671 is the total number of objects in the population (sum of all the

4672 :math:`m_i`), and :math:`n` is the size of the sample to be taken

4673 from the population.

4674

4675 .. versionadded:: 1.6.0

4676

4677 Examples

4678 --------

4679 To evaluate the probability mass function of the multivariate

4680 hypergeometric distribution, with a dichotomous population of size

4681 :math:`10` and :math:`20`, at a sample of size :math:`12` with

4682 :math:`8` objects of the first type and :math:`4` objects of the

4683 second type, use:

4684

4685 >>> from scipy.stats import multivariate_hypergeom

4686 >>> multivariate_hypergeom.pmf(x=[8, 4], m=[10, 20], n=12)

4687 0.0025207176631464523

4688

4689 The `multivariate_hypergeom` distribution is identical to the

4690 corresponding `hypergeom` distribution (tiny numerical differences

4691 notwithstanding) when only two types (good and bad) of objects

4692 are present in the population as in the example above. Consider

4693 another example for a comparison with the hypergeometric distribution:

4694

4695 >>> from scipy.stats import hypergeom

4696 >>> multivariate_hypergeom.pmf(x=[3, 1], m=[10, 5], n=4)

4697 0.4395604395604395

4698 >>> hypergeom.pmf(k=3, M=15, n=4, N=10)

4699 0.43956043956044005

4700

4701 The functions ``pmf``, ``logpmf``, ``mean``, ``var``, ``cov``, and ``rvs``

4702 support broadcasting, under the convention that the vector parameters

4703 (``x``, ``m``, and ``n``) are interpreted as if each row along the last

4704 axis is a single object. For instance, we can combine the previous two

4705 calls to `multivariate_hypergeom` as

4706

4707 >>> multivariate_hypergeom.pmf(x=[[8, 4], [3, 1]], m=[[10, 20], [10, 5]],

4708 ... n=[12, 4])

4709 array([0.00252072, 0.43956044])

4710

4711 This broadcasting also works for ``cov``, where the output objects are

4712 square matrices of size ``m.shape[-1]``. For example:

4713

4714 >>> multivariate_hypergeom.cov(m=[[7, 9], [10, 15]], n=[8, 12])

4715 array([[[ 1.05, -1.05],

4716 [-1.05, 1.05]],

4717 [[ 1.56, -1.56],

4718 [-1.56, 1.56]]])

4719

4720 That is, ``result[0]`` is equal to

4721 ``multivariate_hypergeom.cov(m=[7, 9], n=8)`` and ``result[1]`` is equal

4722 to ``multivariate_hypergeom.cov(m=[10, 15], n=12)``.

4723

4724 Alternatively, the object may be called (as a function) to fix the `m`

4725 and `n` parameters, returning a "frozen" multivariate hypergeometric

4726 random variable.

4727

4728 >>> rv = multivariate_hypergeom(m=[10, 20], n=12)

4729 >>> rv.pmf(x=[8, 4])

4730 0.0025207176631464523

4731

4732 See Also

4733 --------

4734 scipy.stats.hypergeom : The hypergeometric distribution.

4735 scipy.stats.multinomial : The multinomial distribution.

4736

4737 References

4738 ----------

4739 .. [1] The Multivariate Hypergeometric Distribution,

4740 http://www.randomservices.org/random/urn/MultiHypergeometric.html

4741 .. [2] Thomas J. Sargent and John Stachurski, 2020,

4742 Multivariate Hypergeometric Distribution

4743 https://python.quantecon.org/_downloads/pdf/multi_hyper.pdf

4744 """

4745 def __init__(self, seed=None):

4746 super().__init__(seed)

4747 self.__doc__ = doccer.docformat(self.__doc__, mhg_docdict_params)

4748

4749 def __call__(self, m, n, seed=None):

4750 """Create a frozen multivariate_hypergeom distribution.

4751

4752 See `multivariate_hypergeom_frozen` for more information.

4753 """

4754 return multivariate_hypergeom_frozen(m, n, seed=seed)

4755

4756 def _process_parameters(self, m, n):

4757 m = np.asarray(m)

4758 n = np.asarray(n)

4759 if m.size == 0:

4760 m = m.astype(int)

4761 if n.size == 0:

4762 n = n.astype(int)

4763 if not np.issubdtype(m.dtype, np.integer):

4764 raise TypeError("'m' must an array of integers.")

4765 if not np.issubdtype(n.dtype, np.integer):

4766 raise TypeError("'n' must an array of integers.")

4767 if m.ndim == 0:

4768 raise ValueError("'m' must be an array with"

4769 " at least one dimension.")

4770

4771 # check for empty arrays

4772 if m.size != 0:

4773 n = n[..., np.newaxis]

4774

4775 m, n = np.broadcast_arrays(m, n)

4776

4777 # check for empty arrays

4778 if m.size != 0:

4779 n = n[..., 0]

4780

4781 mcond = m < 0

4782

4783 M = m.sum(axis=-1)

4784

4785 ncond = (n < 0) | (n > M)

4786 return M, m, n, mcond, ncond, np.any(mcond, axis=-1) | ncond

4787

4788 def _process_quantiles(self, x, M, m, n):

4789 x = np.asarray(x)

4790 if not np.issubdtype(x.dtype, np.integer):

4791 raise TypeError("'x' must an array of integers.")

4792 if x.ndim == 0:

4793 raise ValueError("'x' must be an array with"

4794 " at least one dimension.")

4795 if not x.shape[-1] == m.shape[-1]:

4796 raise ValueError(f"Size of each quantile must be size of 'm': "

4797 f"received {x.shape[-1]}, "

4798 f"but expected {m.shape[-1]}.")

4799

4800 # check for empty arrays

4801 if m.size != 0:

4802 n = n[..., np.newaxis]

4803 M = M[..., np.newaxis]

4804

4805 x, m, n, M = np.broadcast_arrays(x, m, n, M)

4806

4807 # check for empty arrays

4808 if m.size != 0:

4809 n, M = n[..., 0], M[..., 0]

4810

4811 xcond = (x < 0) | (x > m)

4812 return (x, M, m, n, xcond,

4813 np.any(xcond, axis=-1) | (x.sum(axis=-1) != n))

4814

4815 def _checkresult(self, result, cond, bad_value):

4816 result = np.asarray(result)

4817 if cond.ndim != 0:

4818 result[cond] = bad_value

4819 elif cond:

4820 return bad_value

4821 if result.ndim == 0:

4822 return result[()]

4823 return result

4824

4825 def _logpmf(self, x, M, m, n, mxcond, ncond):

4826 # This equation of the pmf comes from the relation,

4827 # n combine r = beta(n+1, 1) / beta(r+1, n-r+1)

4828 num = np.zeros_like(m, dtype=np.float_)

4829 den = np.zeros_like(n, dtype=np.float_)

4830 m, x = m[~mxcond], x[~mxcond]

4831 M, n = M[~ncond], n[~ncond]

4832 num[~mxcond] = (betaln(m+1, 1) - betaln(x+1, m-x+1))

4833 den[~ncond] = (betaln(M+1, 1) - betaln(n+1, M-n+1))

4834 num[mxcond] = np.nan

4835 den[ncond] = np.nan

4836 num = num.sum(axis=-1)

4837 return num - den

4838

4839 def logpmf(self, x, m, n):

4840 """Log of the multivariate hypergeometric probability mass function.

4841

4842 Parameters

4843 ----------

4844 x : array_like

4845 Quantiles, with the last axis of `x` denoting the components.

4846 %(_doc_default_callparams)s

4847

4848 Returns

4849 -------

4850 logpmf : ndarray or scalar

4851 Log of the probability mass function evaluated at `x`

4852

4853 Notes

4854 -----

4855 %(_doc_callparams_note)s

4856 """

4857 M, m, n, mcond, ncond, mncond = self._process_parameters(m, n)

4858 (x, M, m, n, xcond,

4859 xcond_reduced) = self._process_quantiles(x, M, m, n)

4860 mxcond = mcond | xcond

4861 ncond = ncond | np.zeros(n.shape, dtype=np.bool_)

4862

4863 result = self._logpmf(x, M, m, n, mxcond, ncond)

4864

4865 # replace values for which x was out of the domain; broadcast

4866 # xcond to the right shape

4867 xcond_ = xcond_reduced | np.zeros(mncond.shape, dtype=np.bool_)

4868 result = self._checkresult(result, xcond_, np.NINF)

4869

4870 # replace values bad for n or m; broadcast

4871 # mncond to the right shape

4872 mncond_ = mncond | np.zeros(xcond_reduced.shape, dtype=np.bool_)

4873 return self._checkresult(result, mncond_, np.nan)

4874

4875 def pmf(self, x, m, n):

4876 """Multivariate hypergeometric probability mass function.

4877

4878 Parameters

4879 ----------

4880 x : array_like

4881 Quantiles, with the last axis of `x` denoting the components.

4882 %(_doc_default_callparams)s

4883

4884 Returns

4885 -------

4886 pmf : ndarray or scalar

4887 Probability density function evaluated at `x`

4888

4889 Notes

4890 -----

4891 %(_doc_callparams_note)s

4892 """

4893 out = np.exp(self.logpmf(x, m, n))

4894 return out

4895

4896 def mean(self, m, n):

4897 """Mean of the multivariate hypergeometric distribution.

4898

4899 Parameters

4900 ----------

4901 %(_doc_default_callparams)s

4902

4903 Returns

4904 -------

4905 mean : array_like or scalar

4906 The mean of the distribution

4907 """

4908 M, m, n, _, _, mncond = self._process_parameters(m, n)

4909 # check for empty arrays

4910 if m.size != 0:

4911 M, n = M[..., np.newaxis], n[..., np.newaxis]

4912 cond = (M == 0)

4913 M = np.ma.masked_array(M, mask=cond)

4914 mu = n*(m/M)

4915 if m.size != 0:

4916 mncond = (mncond[..., np.newaxis] |

4917 np.zeros(mu.shape, dtype=np.bool_))

4918 return self._checkresult(mu, mncond, np.nan)

4919

4920 def var(self, m, n):

4921 """Variance of the multivariate hypergeometric distribution.

4922

4923 Parameters

4924 ----------

4925 %(_doc_default_callparams)s

4926

4927 Returns

4928 -------

4929 array_like

4930 The variances of the components of the distribution. This is

4931 the diagonal of the covariance matrix of the distribution

4932 """

4933 M, m, n, _, _, mncond = self._process_parameters(m, n)

4934 # check for empty arrays

4935 if m.size != 0:

4936 M, n = M[..., np.newaxis], n[..., np.newaxis]

4937 cond = (M == 0) & (M-1 == 0)

4938 M = np.ma.masked_array(M, mask=cond)

4939 output = n * m/M * (M-m)/M * (M-n)/(M-1)

4940 if m.size != 0:

4941 mncond = (mncond[..., np.newaxis] |

4942 np.zeros(output.shape, dtype=np.bool_))

4943 return self._checkresult(output, mncond, np.nan)

4944

4945 def cov(self, m, n):

4946 """Covariance matrix of the multivariate hypergeometric distribution.

4947

4948 Parameters

4949 ----------

4950 %(_doc_default_callparams)s

4951

4952 Returns

4953 -------

4954 cov : array_like

4955 The covariance matrix of the distribution

4956 """

4957 # see [1]_ for the formula and [2]_ for implementation

4958 # cov( x_i,x_j ) = -n * (M-n)/(M-1) * (K_i*K_j) / (M**2)

4959 M, m, n, _, _, mncond = self._process_parameters(m, n)

4960 # check for empty arrays

4961 if m.size != 0:

4962 M = M[..., np.newaxis, np.newaxis]

4963 n = n[..., np.newaxis, np.newaxis]

4964 cond = (M == 0) & (M-1 == 0)

4965 M = np.ma.masked_array(M, mask=cond)

4966 output = (-n * (M-n)/(M-1) *

4967 np.einsum("...i,...j->...ij", m, m) / (M**2))

4968 # check for empty arrays

4969 if m.size != 0:

4970 M, n = M[..., 0, 0], n[..., 0, 0]

4971 cond = cond[..., 0, 0]

4972 dim = m.shape[-1]

4973 # diagonal entries need to be computed differently

4974 for i in range(dim):

4975 output[..., i, i] = (n * (M-n) * m[..., i]*(M-m[..., i]))

4976 output[..., i, i] = output[..., i, i] / (M-1)

4977 output[..., i, i] = output[..., i, i] / (M**2)

4978 if m.size != 0:

4979 mncond = (mncond[..., np.newaxis, np.newaxis] |

4980 np.zeros(output.shape, dtype=np.bool_))

4981 return self._checkresult(output, mncond, np.nan)

4982

4983 def rvs(self, m, n, size=None, random_state=None):

4984 """Draw random samples from a multivariate hypergeometric distribution.

4985

4986 Parameters

4987 ----------

4988 %(_doc_default_callparams)s

4989 size : integer or iterable of integers, optional

4990 Number of samples to draw. Default is ``None``, in which case a

4991 single variate is returned as an array with shape ``m.shape``.

4992 %(_doc_random_state)s

4993

4994 Returns

4995 -------

4996 rvs : array_like

4997 Random variates of shape ``size`` or ``m.shape``

4998 (if ``size=None``).

4999

5000 Notes

5001 -----

5002 %(_doc_callparams_note)s

5003

5004 Also note that NumPy's `multivariate_hypergeometric` sampler is not

5005 used as it doesn't support broadcasting.

5006 """

5007 M, m, n, _, _, _ = self._process_parameters(m, n)

5008

5009 random_state = self._get_random_state(random_state)

5010

5011 if size is not None and isinstance(size, int):

5012 size = (size, )

5013

5014 if size is None:

5015 rvs = np.empty(m.shape, dtype=m.dtype)

5016 else:

5017 rvs = np.empty(size + (m.shape[-1], ), dtype=m.dtype)

5018 rem = M

5019

5020 # This sampler has been taken from numpy gh-13794

5021 # https://github.com/numpy/numpy/pull/13794

5022 for c in range(m.shape[-1] - 1):

5023 rem = rem - m[..., c]

5024 n0mask = n == 0

5025 rvs[..., c] = (~n0mask *

5026 random_state.hypergeometric(m[..., c],

5027 rem + n0mask,

5028 n + n0mask,

5029 size=size))

5030 n = n - rvs[..., c]

5031 rvs[..., m.shape[-1] - 1] = n

5032

5033 return rvs

5034

5035

5036multivariate_hypergeom = multivariate_hypergeom_gen()

5037

5038

5039class multivariate_hypergeom_frozen(multi_rv_frozen):

5040 def __init__(self, m, n, seed=None):

5041 self._dist = multivariate_hypergeom_gen(seed)

5042 (self.M, self.m, self.n,

5043 self.mcond, self.ncond,

5044 self.mncond) = self._dist._process_parameters(m, n)

5045

5046 # monkey patch self._dist

5047 def _process_parameters(m, n):

5048 return (self.M, self.m, self.n,

5049 self.mcond, self.ncond,

5050 self.mncond)

5051 self._dist._process_parameters = _process_parameters

5052

5053 def logpmf(self, x):

5054 return self._dist.logpmf(x, self.m, self.n)

5055

5056 def pmf(self, x):

5057 return self._dist.pmf(x, self.m, self.n)

5058

5059 def mean(self):

5060 return self._dist.mean(self.m, self.n)

5061

5062 def var(self):

5063 return self._dist.var(self.m, self.n)

5064

5065 def cov(self):

5066 return self._dist.cov(self.m, self.n)

5067

5068 def rvs(self, size=1, random_state=None):

5069 return self._dist.rvs(self.m, self.n,

5070 size=size,

5071 random_state=random_state)

5072

5073

5074# Set frozen generator docstrings from corresponding docstrings in

5075# multivariate_hypergeom and fill in default strings in class docstrings

5076for name in ['logpmf', 'pmf', 'mean', 'var', 'cov', 'rvs']:

5077 method = multivariate_hypergeom_gen.__dict__[name]

5078 method_frozen = multivariate_hypergeom_frozen.__dict__[name]

5079 method_frozen.__doc__ = doccer.docformat(

5080 method.__doc__, mhg_docdict_noparams)

5081 method.__doc__ = doccer.docformat(method.__doc__,

5082 mhg_docdict_params)

5083

5084

5085class random_table_gen(multi_rv_generic):

5086 r"""Contingency tables from independent samples with fixed marginal sums.

5087

5088 This is the distribution of random tables with given row and column vector

5089 sums. This distribution represents the set of random tables under the null

5090 hypothesis that rows and columns are independent. It is used in hypothesis

5091 tests of independence.

5092

5093 Because of assumed independence, the expected frequency of each table

5094 element can be computed from the row and column sums, so that the

5095 distribution is completely determined by these two vectors.

5096

5097 Methods

5098 -------

5099 logpmf(x)

5100 Log-probability of table `x` to occur in the distribution.

5101 pmf(x)

5102 Probability of table `x` to occur in the distribution.

5103 mean(row, col)

5104 Mean table.

5105 rvs(row, col, size=None, method=None, random_state=None)

5106 Draw random tables with given row and column vector sums.

5107

5108 Parameters

5109 ----------

5110 %(_doc_row_col)s

5111 %(_doc_random_state)s

5112

5113 Notes

5114 -----

5115 %(_doc_row_col_note)s

5116

5117 Random elements from the distribution are generated either with Boyett's

5118 [1]_ or Patefield's algorithm [2]_. Boyett's algorithm has

5119 O(N) time and space complexity, where N is the total sum of entries in the

5120 table. Patefield's algorithm has O(K x log(N)) time complexity, where K is

5121 the number of cells in the table and requires only a small constant work

5122 space. By default, the `rvs` method selects the fastest algorithm based on

5123 the input, but you can specify the algorithm with the keyword `method`.

5124 Allowed values are "boyett" and "patefield".

5125

5126 .. versionadded:: 1.10.0

5127

5128 Examples

5129 --------

5130 >>> from scipy.stats import random_table

5131

5132 >>> row = [1, 5]

5133 >>> col = [2, 3, 1]

5134 >>> random_table.mean(row, col)

5135 array([[0.33333333, 0.5 , 0.16666667],

5136 [1.66666667, 2.5 , 0.83333333]])

5137

5138 Alternatively, the object may be called (as a function) to fix the row

5139 and column vector sums, returning a "frozen" distribution.

5140

5141 >>> dist = random_table(row, col)

5142 >>> dist.rvs(random_state=123)

5143 array([[1., 0., 0.],

5144 [1., 3., 1.]])

5145

5146 References

5147 ----------

5148 .. [1] J. Boyett, AS 144 Appl. Statist. 28 (1979) 329-332

5149 .. [2] W.M. Patefield, AS 159 Appl. Statist. 30 (1981) 91-97

5150 """

5151

5152 def __init__(self, seed=None):

5153 super().__init__(seed)

5154

5155 def __call__(self, row, col, *, seed=None):

5156 """Create a frozen distribution of tables with given marginals.

5157

5158 See `random_table_frozen` for more information.

5159 """

5160 return random_table_frozen(row, col, seed=seed)

5161

5162 def logpmf(self, x, row, col):

5163 """Log-probability of table to occur in the distribution.

5164

5165 Parameters

5166 ----------

5167 %(_doc_x)s

5168 %(_doc_row_col)s

5169

5170 Returns

5171 -------

5172 logpmf : ndarray or scalar

5173 Log of the probability mass function evaluated at `x`.

5174

5175 Notes

5176 -----

5177 %(_doc_row_col_note)s

5178

5179 If row and column marginals of `x` do not match `row` and `col`,

5180 negative infinity is returned.

5181

5182 Examples

5183 --------

5184 >>> from scipy.stats import random_table

5185 >>> import numpy as np

5186

5187 >>> x = [[1, 5, 1], [2, 3, 1]]

5188 >>> row = np.sum(x, axis=1)

5189 >>> col = np.sum(x, axis=0)

5190 >>> random_table.logpmf(x, row, col)

5191 -1.6306401200847027

5192

5193 Alternatively, the object may be called (as a function) to fix the row

5194 and column vector sums, returning a "frozen" distribution.

5195

5196 >>> d = random_table(row, col)

5197 >>> d.logpmf(x)

5198 -1.6306401200847027

5199 """

5200 r, c, n = self._process_parameters(row, col)

5201 x = np.asarray(x)

5202

5203 if x.ndim < 2:

5204 raise ValueError("`x` must be at least two-dimensional")

5205

5206 dtype_is_int = np.issubdtype(x.dtype, np.integer)

5207 with np.errstate(invalid='ignore'):

5208 if not dtype_is_int and not np.all(x.astype(int) == x):

5209 raise ValueError("`x` must contain only integral values")

5210

5211 # x does not contain NaN if we arrive here

5212 if np.any(x < 0):

5213 raise ValueError("`x` must contain only non-negative values")

5214

5215 r2 = np.sum(x, axis=-1)

5216 c2 = np.sum(x, axis=-2)

5217

5218 if r2.shape[-1] != len(r):

5219 raise ValueError("shape of `x` must agree with `row`")

5220

5221 if c2.shape[-1] != len(c):

5222 raise ValueError("shape of `x` must agree with `col`")

5223

5224 res = np.empty(x.shape[:-2])

5225

5226 mask = np.all(r2 == r, axis=-1) & np.all(c2 == c, axis=-1)

5227

5228 def lnfac(x):

5229 return gammaln(x + 1)

5230

5231 res[mask] = (np.sum(lnfac(r), axis=-1) + np.sum(lnfac(c), axis=-1)

5232 - lnfac(n) - np.sum(lnfac(x[mask]), axis=(-1, -2)))

5233 res[~mask] = -np.inf

5234

5235 return res[()]

5236

5237 def pmf(self, x, row, col):

5238 """Probability of table to occur in the distribution.

5239

5240 Parameters

5241 ----------

5242 %(_doc_x)s

5243 %(_doc_row_col)s

5244

5245 Returns

5246 -------

5247 pmf : ndarray or scalar

5248 Probability mass function evaluated at `x`.

5249

5250 Notes

5251 -----

5252 %(_doc_row_col_note)s

5253

5254 If row and column marginals of `x` do not match `row` and `col`,

5255 zero is returned.

5256

5257 Examples

5258 --------

5259 >>> from scipy.stats import random_table

5260 >>> import numpy as np

5261

5262 >>> x = [[1, 5, 1], [2, 3, 1]]

5263 >>> row = np.sum(x, axis=1)

5264 >>> col = np.sum(x, axis=0)

5265 >>> random_table.pmf(x, row, col)

5266 0.19580419580419592

5267

5268 Alternatively, the object may be called (as a function) to fix the row

5269 and column vector sums, returning a "frozen" distribution.

5270

5271 >>> d = random_table(row, col)

5272 >>> d.pmf(x)

5273 0.19580419580419592

5274 """

5275 return np.exp(self.logpmf(x, row, col))

5276

5277 def mean(self, row, col):

5278 """Mean of distribution of conditional tables.

5279 %(_doc_mean_params)s

5280

5281 Returns

5282 -------

5283 mean: ndarray

5284 Mean of the distribution.

5285

5286 Notes

5287 -----

5288 %(_doc_row_col_note)s

5289

5290 Examples

5291 --------

5292 >>> from scipy.stats import random_table

5293

5294 >>> row = [1, 5]

5295 >>> col = [2, 3, 1]

5296 >>> random_table.mean(row, col)

5297 array([[0.33333333, 0.5 , 0.16666667],

5298 [1.66666667, 2.5 , 0.83333333]])

5299

5300 Alternatively, the object may be called (as a function) to fix the row

5301 and column vector sums, returning a "frozen" distribution.

5302

5303 >>> d = random_table(row, col)

5304 >>> d.mean()

5305 array([[0.33333333, 0.5 , 0.16666667],

5306 [1.66666667, 2.5 , 0.83333333]])

5307 """

5308 r, c, n = self._process_parameters(row, col)

5309 return np.outer(r, c) / n

5310

5311 def rvs(self, row, col, *, size=None, method=None, random_state=None):

5312 """Draw random tables with fixed column and row marginals.

5313

5314 Parameters

5315 ----------

5316 %(_doc_row_col)s

5317 size : integer, optional

5318 Number of samples to draw (default 1).

5319 method : str, optional

5320 Which method to use, "boyett" or "patefield". If None (default),

5321 selects the fastest method for this input.

5322 %(_doc_random_state)s

5323

5324 Returns

5325 -------

5326 rvs : ndarray

5327 Random 2D tables of shape (`size`, `len(row)`, `len(col)`).

5328

5329 Notes

5330 -----

5331 %(_doc_row_col_note)s

5332

5333 Examples

5334 --------

5335 >>> from scipy.stats import random_table

5336

5337 >>> row = [1, 5]

5338 >>> col = [2, 3, 1]

5339 >>> random_table.rvs(row, col, random_state=123)

5340 array([[1., 0., 0.],

5341 [1., 3., 1.]])

5342

5343 Alternatively, the object may be called (as a function) to fix the row

5344 and column vector sums, returning a "frozen" distribution.

5345

5346 >>> d = random_table(row, col)

5347 >>> d.rvs(random_state=123)

5348 array([[1., 0., 0.],

5349 [1., 3., 1.]])

5350 """

5351 r, c, n = self._process_parameters(row, col)

5352 size, shape = self._process_size_shape(size, r, c)

5353

5354 random_state = self._get_random_state(random_state)

5355 meth = self._process_rvs_method(method, r, c, n)

5356

5357 return meth(r, c, n, size, random_state).reshape(shape)

5358

5359 @staticmethod

5360 def _process_parameters(row, col):

5361 """

5362 Check that row and column vectors are one-dimensional, that they do

5363 not contain negative or non-integer entries, and that the sums over

5364 both vectors are equal.

5365 """

5366 r = np.array(row, dtype=np.int64, copy=True)

5367 c = np.array(col, dtype=np.int64, copy=True)

5368

5369 if np.ndim(r) != 1:

5370 raise ValueError("`row` must be one-dimensional")

5371 if np.ndim(c) != 1:

5372 raise ValueError("`col` must be one-dimensional")

5373

5374 if np.any(r < 0):

5375 raise ValueError("each element of `row` must be non-negative")

5376 if np.any(c < 0):

5377 raise ValueError("each element of `col` must be non-negative")

5378

5379 n = np.sum(r)

5380 if n != np.sum(c):

5381 raise ValueError("sums over `row` and `col` must be equal")

5382

5383 if not np.all(r == np.asarray(row)):

5384 raise ValueError("each element of `row` must be an integer")

5385 if not np.all(c == np.asarray(col)):

5386 raise ValueError("each element of `col` must be an integer")

5387

5388 return r, c, n

5389

5390 @staticmethod

5391 def _process_size_shape(size, r, c):

5392 """

5393 Compute the number of samples to be drawn and the shape of the output

5394 """

5395 shape = (len(r), len(c))

5396

5397 if size is None:

5398 return 1, shape

5399

5400 size = np.atleast_1d(size)

5401 if not np.issubdtype(size.dtype, np.integer) or np.any(size < 0):

5402 raise ValueError("`size` must be a non-negative integer or `None`")

5403

5404 return np.prod(size), tuple(size) + shape

5405

5406 @classmethod

5407 def _process_rvs_method(cls, method, r, c, n):

5408 known_methods = {

5409 None: cls._rvs_select(r, c, n),

5410 "boyett": cls._rvs_boyett,

5411 "patefield": cls._rvs_patefield,

5412 }

5413 try:

5414 return known_methods[method]

5415 except KeyError:

5416 raise ValueError(f"'{method}' not recognized, "

5417 f"must be one of {set(known_methods)}")

5418

5419 @classmethod

5420 def _rvs_select(cls, r, c, n):

5421 fac = 1.0 # benchmarks show that this value is about 1

5422 k = len(r) * len(c) # number of cells

5423 # n + 1 guards against failure if n == 0

5424 if n > fac * np.log(n + 1) * k:

5425 return cls._rvs_patefield

5426 return cls._rvs_boyett

5427

5428 @staticmethod

5429 def _rvs_boyett(row, col, ntot, size, random_state):

5430 return _rcont.rvs_rcont1(row, col, ntot, size, random_state)

5431

5432 @staticmethod

5433 def _rvs_patefield(row, col, ntot, size, random_state):

5434 return _rcont.rvs_rcont2(row, col, ntot, size, random_state)

5435

5436

5437random_table = random_table_gen()

5438

5439

5440class random_table_frozen(multi_rv_frozen):

5441 def __init__(self, row, col, *, seed=None):

5442 self._dist = random_table_gen(seed)

5443 self._params = self._dist._process_parameters(row, col)

5444

5445 # monkey patch self._dist

5446 def _process_parameters(r, c):

5447 return self._params

5448 self._dist._process_parameters = _process_parameters

5449

5450 def logpmf(self, x):

5451 return self._dist.logpmf(x, None, None)

5452

5453 def pmf(self, x):

5454 return self._dist.pmf(x, None, None)

5455

5456 def mean(self):

5457 return self._dist.mean(None, None)

5458

5459 def rvs(self, size=None, method=None, random_state=None):

5460 # optimisations are possible here

5461 return self._dist.rvs(None, None, size=size, method=method,

5462 random_state=random_state)

5463

5464

5465_ctab_doc_row_col = """\

5466row : array_like

5467 Sum of table entries in each row.

5468col : array_like

5469 Sum of table entries in each column."""

5470

5471_ctab_doc_x = """\

5472x : array-like

5473 Two-dimensional table of non-negative integers, or a

5474 multi-dimensional array with the last two dimensions

5475 corresponding with the tables."""

5476

5477_ctab_doc_row_col_note = """\

5478The row and column vectors must be one-dimensional, not empty,

5479and each sum up to the same value. They cannot contain negative

5480or noninteger entries."""

5481

5482_ctab_doc_mean_params = f"""

5483Parameters

5484----------

5485{_ctab_doc_row_col}"""

5486

5487_ctab_doc_row_col_note_frozen = """\

5488See class definition for a detailed description of parameters."""

5489

5490_ctab_docdict = {

5491 "_doc_random_state": _doc_random_state,

5492 "_doc_row_col": _ctab_doc_row_col,

5493 "_doc_x": _ctab_doc_x,

5494 "_doc_mean_params": _ctab_doc_mean_params,

5495 "_doc_row_col_note": _ctab_doc_row_col_note,

5496}

5497

5498_ctab_docdict_frozen = _ctab_docdict.copy()

5499_ctab_docdict_frozen.update({

5500 "_doc_row_col": "",

5501 "_doc_mean_params": "",

5502 "_doc_row_col_note": _ctab_doc_row_col_note_frozen,

5503})

5504

5505

5506def _docfill(obj, docdict, template=None):

5507 obj.__doc__ = doccer.docformat(template or obj.__doc__, docdict)

5508

5509

5510# Set frozen generator docstrings from corresponding docstrings in

5511# random_table and fill in default strings in class docstrings

5512_docfill(random_table_gen, _ctab_docdict)

5513for name in ['logpmf', 'pmf', 'mean', 'rvs']:

5514 method = random_table_gen.__dict__[name]

5515 method_frozen = random_table_frozen.__dict__[name]

5516 _docfill(method_frozen, _ctab_docdict_frozen, method.__doc__)

5517 _docfill(method, _ctab_docdict)

5518

5519

5520class uniform_direction_gen(multi_rv_generic):

5521 r"""A vector-valued uniform direction.

5522

5523 Return a random direction (unit vector). The `dim` keyword specifies

5524 the dimensionality of the space.

5525

5526 Methods

5527 -------

5528 rvs(dim=None, size=1, random_state=None)

5529 Draw random directions.

5530

5531 Parameters

5532 ----------

5533 dim : scalar

5534 Dimension of directions.

5535 seed : {None, int, `numpy.random.Generator`,

5536 `numpy.random.RandomState`}, optional

5537

5538 Used for drawing random variates.

5539 If `seed` is `None`, the `~np.random.RandomState` singleton is used.

5540 If `seed` is an int, a new ``RandomState`` instance is used, seeded

5541 with seed.

5542 If `seed` is already a ``RandomState`` or ``Generator`` instance,

5543 then that object is used.

5544 Default is `None`.

5545

5546 Notes

5547 -----

5548 This distribution generates unit vectors uniformly distributed on

5549 the surface of a hypersphere. These can be interpreted as random

5550 directions.

5551 For example, if `dim` is 3, 3D vectors from the surface of :math:`S^2`

5552 will be sampled.

5553

5554 References

5555 ----------

5556 .. [1] Marsaglia, G. (1972). "Choosing a Point from the Surface of a

5557 Sphere". Annals of Mathematical Statistics. 43 (2): 645-646.

5558

5559 Examples

5560 --------

5561 >>> import numpy as np

5562 >>> from scipy.stats import uniform_direction

5563 >>> x = uniform_direction.rvs(3)

5564 >>> np.linalg.norm(x)

5565 1.

5566

5567 This generates one random direction, a vector on the surface of

5568 :math:`S^2`.

5569

5570 Alternatively, the object may be called (as a function) to return a frozen

5571 distribution with fixed `dim` parameter. Here,

5572 we create a `uniform_direction` with ``dim=3`` and draw 5 observations.

5573 The samples are then arranged in an array of shape 5x3.

5574

5575 >>> rng = np.random.default_rng()

5576 >>> uniform_sphere_dist = uniform_direction(3)

5577 >>> unit_vectors = uniform_sphere_dist.rvs(5, random_state=rng)

5578 >>> unit_vectors

5579 array([[ 0.56688642, -0.1332634 , -0.81294566],

5580 [-0.427126 , -0.74779278, 0.50830044],

5581 [ 0.3793989 , 0.92346629, 0.05715323],

5582 [ 0.36428383, -0.92449076, -0.11231259],

5583 [-0.27733285, 0.94410968, -0.17816678]])

5584 """

5585

5586 def __init__(self, seed=None):

5587 super().__init__(seed)

5588 self.__doc__ = doccer.docformat(self.__doc__)

5589

5590 def __call__(self, dim=None, seed=None):

5591 """Create a frozen n-dimensional uniform direction distribution.

5592

5593 See `uniform_direction` for more information.

5594 """

5595 return uniform_direction_frozen(dim, seed=seed)

5596

5597 def _process_parameters(self, dim):

5598 """Dimension N must be specified; it cannot be inferred."""

5599 if dim is None or not np.isscalar(dim) or dim < 1 or dim != int(dim):

5600 raise ValueError("Dimension of vector must be specified, "

5601 "and must be an integer greater than 0.")

5602

5603 return int(dim)

5604

5605 def rvs(self, dim, size=None, random_state=None):

5606 """Draw random samples from S(N-1).

5607

5608 Parameters

5609 ----------

5610 dim : integer

5611 Dimension of space (N).

5612 size : int or tuple of ints, optional

5613 Given a shape of, for example, (m,n,k), m*n*k samples are

5614 generated, and packed in an m-by-n-by-k arrangement.

5615 Because each sample is N-dimensional, the output shape

5616 is (m,n,k,N). If no shape is specified, a single (N-D)

5617 sample is returned.

5618 random_state : {None, int, `numpy.random.Generator`,

5619 `numpy.random.RandomState`}, optional

5620

5621 Pseudorandom number generator state used to generate resamples.

5622

5623 If `random_state` is ``None`` (or `np.random`), the

5624 `numpy.random.RandomState` singleton is used.

5625 If `random_state` is an int, a new ``RandomState`` instance is

5626 used, seeded with `random_state`.

5627 If `random_state` is already a ``Generator`` or ``RandomState``

5628 instance then that instance is used.

5629

5630 Returns

5631 -------

5632 rvs : ndarray

5633 Random direction vectors

5634

5635 """

5636 random_state = self._get_random_state(random_state)

5637 if size is None:

5638 size = np.array([], dtype=int)

5639 size = np.atleast_1d(size)

5640

5641 dim = self._process_parameters(dim)

5642

5643 samples = _sample_uniform_direction(dim, size, random_state)

5644 return samples

5645

5646

5647uniform_direction = uniform_direction_gen()

5648

5649

5650class uniform_direction_frozen(multi_rv_frozen):

5651 def __init__(self, dim=None, seed=None):

5652 """Create a frozen n-dimensional uniform direction distribution.

5653

5654 Parameters

5655 ----------

5656 dim : int

5657 Dimension of matrices

5658 seed : {None, int, `numpy.random.Generator`,

5659 `numpy.random.RandomState`}, optional

5660

5661 If `seed` is None (or `np.random`), the `numpy.random.RandomState`

5662 singleton is used.

5663 If `seed` is an int, a new ``RandomState`` instance is used,

5664 seeded with `seed`.

5665 If `seed` is already a ``Generator`` or ``RandomState`` instance

5666 then that instance is used.

5667

5668 Examples

5669 --------

5670 >>> from scipy.stats import uniform_direction

5671 >>> x = uniform_direction(3)

5672 >>> x.rvs()

5673

5674 """

5675 self._dist = uniform_direction_gen(seed)

5676 self.dim = self._dist._process_parameters(dim)

5677

5678 def rvs(self, size=None, random_state=None):

5679 return self._dist.rvs(self.dim, size, random_state)

5680

5681

5682def _sample_uniform_direction(dim, size, random_state):

5683 """

5684 Private method to generate uniform directions

5685 Reference: Marsaglia, G. (1972). "Choosing a Point from the Surface of a

5686 Sphere". Annals of Mathematical Statistics. 43 (2): 645-646.

5687 """

5688 samples_shape = np.append(size, dim)

5689 samples = random_state.standard_normal(samples_shape)

5690 samples /= np.linalg.norm(samples, axis=-1, keepdims=True)

5691 return samples

Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/scipy/stats/_multivariate.py: 28%

1497 statements