Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/scipy/stats/_multivariate.py: 28%

1497 statements  

« prev     ^ index     » next       coverage.py v7.3.2, created at 2023-12-12 06:31 +0000

1# 

2# Author: Joris Vankerschaver 2013 

3# 

4import math 

5import numpy as np 

6from numpy import asarray_chkfinite, asarray 

7from numpy.lib import NumpyVersion 

8import scipy.linalg 

9from scipy._lib import doccer 

10from scipy.special import gammaln, psi, multigammaln, xlogy, entr, betaln 

11from scipy._lib._util import check_random_state 

12from scipy.linalg.blas import drot 

13from scipy.linalg._misc import LinAlgError 

14from scipy.linalg.lapack import get_lapack_funcs 

15 

16from ._discrete_distns import binom 

17from . import _mvn, _covariance, _rcont 

18 

19__all__ = ['multivariate_normal', 

20 'matrix_normal', 

21 'dirichlet', 

22 'wishart', 

23 'invwishart', 

24 'multinomial', 

25 'special_ortho_group', 

26 'ortho_group', 

27 'random_correlation', 

28 'unitary_group', 

29 'multivariate_t', 

30 'multivariate_hypergeom', 

31 'random_table', 

32 'uniform_direction'] 

33 

34_LOG_2PI = np.log(2 * np.pi) 

35_LOG_2 = np.log(2) 

36_LOG_PI = np.log(np.pi) 

37 

38 

39_doc_random_state = """\ 

40seed : {None, int, np.random.RandomState, np.random.Generator}, optional 

41 Used for drawing random variates. 

42 If `seed` is `None`, the `~np.random.RandomState` singleton is used. 

43 If `seed` is an int, a new ``RandomState`` instance is used, seeded 

44 with seed. 

45 If `seed` is already a ``RandomState`` or ``Generator`` instance, 

46 then that object is used. 

47 Default is `None`. 

48""" 

49 

50 

51def _squeeze_output(out): 

52 """ 

53 Remove single-dimensional entries from array and convert to scalar, 

54 if necessary. 

55 """ 

56 out = out.squeeze() 

57 if out.ndim == 0: 

58 out = out[()] 

59 return out 

60 

61 

62def _eigvalsh_to_eps(spectrum, cond=None, rcond=None): 

63 """Determine which eigenvalues are "small" given the spectrum. 

64 

65 This is for compatibility across various linear algebra functions 

66 that should agree about whether or not a Hermitian matrix is numerically 

67 singular and what is its numerical matrix rank. 

68 This is designed to be compatible with scipy.linalg.pinvh. 

69 

70 Parameters 

71 ---------- 

72 spectrum : 1d ndarray 

73 Array of eigenvalues of a Hermitian matrix. 

74 cond, rcond : float, optional 

75 Cutoff for small eigenvalues. 

76 Singular values smaller than rcond * largest_eigenvalue are 

77 considered zero. 

78 If None or -1, suitable machine precision is used. 

79 

80 Returns 

81 ------- 

82 eps : float 

83 Magnitude cutoff for numerical negligibility. 

84 

85 """ 

86 if rcond is not None: 

87 cond = rcond 

88 if cond in [None, -1]: 

89 t = spectrum.dtype.char.lower() 

90 factor = {'f': 1E3, 'd': 1E6} 

91 cond = factor[t] * np.finfo(t).eps 

92 eps = cond * np.max(abs(spectrum)) 

93 return eps 

94 

95 

96def _pinv_1d(v, eps=1e-5): 

97 """A helper function for computing the pseudoinverse. 

98 

99 Parameters 

100 ---------- 

101 v : iterable of numbers 

102 This may be thought of as a vector of eigenvalues or singular values. 

103 eps : float 

104 Values with magnitude no greater than eps are considered negligible. 

105 

106 Returns 

107 ------- 

108 v_pinv : 1d float ndarray 

109 A vector of pseudo-inverted numbers. 

110 

111 """ 

112 return np.array([0 if abs(x) <= eps else 1/x for x in v], dtype=float) 

113 

114 

115class _PSD: 

116 """ 

117 Compute coordinated functions of a symmetric positive semidefinite matrix. 

118 

119 This class addresses two issues. Firstly it allows the pseudoinverse, 

120 the logarithm of the pseudo-determinant, and the rank of the matrix 

121 to be computed using one call to eigh instead of three. 

122 Secondly it allows these functions to be computed in a way 

123 that gives mutually compatible results. 

124 All of the functions are computed with a common understanding as to 

125 which of the eigenvalues are to be considered negligibly small. 

126 The functions are designed to coordinate with scipy.linalg.pinvh() 

127 but not necessarily with np.linalg.det() or with np.linalg.matrix_rank(). 

128 

129 Parameters 

130 ---------- 

131 M : array_like 

132 Symmetric positive semidefinite matrix (2-D). 

133 cond, rcond : float, optional 

134 Cutoff for small eigenvalues. 

135 Singular values smaller than rcond * largest_eigenvalue are 

136 considered zero. 

137 If None or -1, suitable machine precision is used. 

138 lower : bool, optional 

139 Whether the pertinent array data is taken from the lower 

140 or upper triangle of M. (Default: lower) 

141 check_finite : bool, optional 

142 Whether to check that the input matrices contain only finite 

143 numbers. Disabling may give a performance gain, but may result 

144 in problems (crashes, non-termination) if the inputs do contain 

145 infinities or NaNs. 

146 allow_singular : bool, optional 

147 Whether to allow a singular matrix. (Default: True) 

148 

149 Notes 

150 ----- 

151 The arguments are similar to those of scipy.linalg.pinvh(). 

152 

153 """ 

154 

155 def __init__(self, M, cond=None, rcond=None, lower=True, 

156 check_finite=True, allow_singular=True): 

157 self._M = np.asarray(M) 

158 

159 # Compute the symmetric eigendecomposition. 

160 # Note that eigh takes care of array conversion, chkfinite, 

161 # and assertion that the matrix is square. 

162 s, u = scipy.linalg.eigh(M, lower=lower, check_finite=check_finite) 

163 

164 eps = _eigvalsh_to_eps(s, cond, rcond) 

165 if np.min(s) < -eps: 

166 msg = "The input matrix must be symmetric positive semidefinite." 

167 raise ValueError(msg) 

168 d = s[s > eps] 

169 if len(d) < len(s) and not allow_singular: 

170 msg = ("When `allow_singular is False`, the input matrix must be " 

171 "symmetric positive definite.") 

172 raise np.linalg.LinAlgError(msg) 

173 s_pinv = _pinv_1d(s, eps) 

174 U = np.multiply(u, np.sqrt(s_pinv)) 

175 

176 # Save the eigenvector basis, and tolerance for testing support 

177 self.eps = 1e3*eps 

178 self.V = u[:, s <= eps] 

179 

180 # Initialize the eagerly precomputed attributes. 

181 self.rank = len(d) 

182 self.U = U 

183 self.log_pdet = np.sum(np.log(d)) 

184 

185 # Initialize attributes to be lazily computed. 

186 self._pinv = None 

187 

188 def _support_mask(self, x): 

189 """ 

190 Check whether x lies in the support of the distribution. 

191 """ 

192 residual = np.linalg.norm(x @ self.V, axis=-1) 

193 in_support = residual < self.eps 

194 return in_support 

195 

196 @property 

197 def pinv(self): 

198 if self._pinv is None: 

199 self._pinv = np.dot(self.U, self.U.T) 

200 return self._pinv 

201 

202 

203class multi_rv_generic: 

204 """ 

205 Class which encapsulates common functionality between all multivariate 

206 distributions. 

207 """ 

208 def __init__(self, seed=None): 

209 super().__init__() 

210 self._random_state = check_random_state(seed) 

211 

212 @property 

213 def random_state(self): 

214 """ Get or set the Generator object for generating random variates. 

215 

216 If `seed` is None (or `np.random`), the `numpy.random.RandomState` 

217 singleton is used. 

218 If `seed` is an int, a new ``RandomState`` instance is used, 

219 seeded with `seed`. 

220 If `seed` is already a ``Generator`` or ``RandomState`` instance then 

221 that instance is used. 

222 

223 """ 

224 return self._random_state 

225 

226 @random_state.setter 

227 def random_state(self, seed): 

228 self._random_state = check_random_state(seed) 

229 

230 def _get_random_state(self, random_state): 

231 if random_state is not None: 

232 return check_random_state(random_state) 

233 else: 

234 return self._random_state 

235 

236 

237class multi_rv_frozen: 

238 """ 

239 Class which encapsulates common functionality between all frozen 

240 multivariate distributions. 

241 """ 

242 @property 

243 def random_state(self): 

244 return self._dist._random_state 

245 

246 @random_state.setter 

247 def random_state(self, seed): 

248 self._dist._random_state = check_random_state(seed) 

249 

250 

251_mvn_doc_default_callparams = """\ 

252mean : array_like, default: ``[0]`` 

253 Mean of the distribution. 

254cov : array_like or `Covariance`, default: ``[1]`` 

255 Symmetric positive (semi)definite covariance matrix of the distribution. 

256allow_singular : bool, default: ``False`` 

257 Whether to allow a singular covariance matrix. This is ignored if `cov` is 

258 a `Covariance` object. 

259""" 

260 

261_mvn_doc_callparams_note = """\ 

262Setting the parameter `mean` to `None` is equivalent to having `mean` 

263be the zero-vector. The parameter `cov` can be a scalar, in which case 

264the covariance matrix is the identity times that value, a vector of 

265diagonal entries for the covariance matrix, a two-dimensional array_like, 

266or a `Covariance` object. 

267""" 

268 

269_mvn_doc_frozen_callparams = "" 

270 

271_mvn_doc_frozen_callparams_note = """\ 

272See class definition for a detailed description of parameters.""" 

273 

274mvn_docdict_params = { 

275 '_mvn_doc_default_callparams': _mvn_doc_default_callparams, 

276 '_mvn_doc_callparams_note': _mvn_doc_callparams_note, 

277 '_doc_random_state': _doc_random_state 

278} 

279 

280mvn_docdict_noparams = { 

281 '_mvn_doc_default_callparams': _mvn_doc_frozen_callparams, 

282 '_mvn_doc_callparams_note': _mvn_doc_frozen_callparams_note, 

283 '_doc_random_state': _doc_random_state 

284} 

285 

286 

287class multivariate_normal_gen(multi_rv_generic): 

288 r"""A multivariate normal random variable. 

289 

290 The `mean` keyword specifies the mean. The `cov` keyword specifies the 

291 covariance matrix. 

292 

293 Methods 

294 ------- 

295 pdf(x, mean=None, cov=1, allow_singular=False) 

296 Probability density function. 

297 logpdf(x, mean=None, cov=1, allow_singular=False) 

298 Log of the probability density function. 

299 cdf(x, mean=None, cov=1, allow_singular=False, maxpts=1000000*dim, abseps=1e-5, releps=1e-5, lower_limit=None) # noqa 

300 Cumulative distribution function. 

301 logcdf(x, mean=None, cov=1, allow_singular=False, maxpts=1000000*dim, abseps=1e-5, releps=1e-5) 

302 Log of the cumulative distribution function. 

303 rvs(mean=None, cov=1, size=1, random_state=None) 

304 Draw random samples from a multivariate normal distribution. 

305 entropy() 

306 Compute the differential entropy of the multivariate normal. 

307 

308 Parameters 

309 ---------- 

310 %(_mvn_doc_default_callparams)s 

311 %(_doc_random_state)s 

312 

313 Notes 

314 ----- 

315 %(_mvn_doc_callparams_note)s 

316 

317 The covariance matrix `cov` may be an instance of a subclass of 

318 `Covariance`, e.g. `scipy.stats.CovViaPrecision`. If so, `allow_singular` 

319 is ignored. 

320 

321 Otherwise, `cov` must be a symmetric positive semidefinite 

322 matrix when `allow_singular` is True; it must be (strictly) positive 

323 definite when `allow_singular` is False. 

324 Symmetry is not checked; only the lower triangular portion is used. 

325 The determinant and inverse of `cov` are computed 

326 as the pseudo-determinant and pseudo-inverse, respectively, so 

327 that `cov` does not need to have full rank. 

328 

329 The probability density function for `multivariate_normal` is 

330 

331 .. math:: 

332 

333 f(x) = \frac{1}{\sqrt{(2 \pi)^k \det \Sigma}} 

334 \exp\left( -\frac{1}{2} (x - \mu)^T \Sigma^{-1} (x - \mu) \right), 

335 

336 where :math:`\mu` is the mean, :math:`\Sigma` the covariance matrix, 

337 :math:`k` the rank of :math:`\Sigma`. In case of singular :math:`\Sigma`, 

338 SciPy extends this definition according to [1]_. 

339 

340 .. versionadded:: 0.14.0 

341 

342 References 

343 ---------- 

344 .. [1] Multivariate Normal Distribution - Degenerate Case, Wikipedia, 

345 https://en.wikipedia.org/wiki/Multivariate_normal_distribution#Degenerate_case 

346 

347 Examples 

348 -------- 

349 >>> import numpy as np 

350 >>> import matplotlib.pyplot as plt 

351 >>> from scipy.stats import multivariate_normal 

352 

353 >>> x = np.linspace(0, 5, 10, endpoint=False) 

354 >>> y = multivariate_normal.pdf(x, mean=2.5, cov=0.5); y 

355 array([ 0.00108914, 0.01033349, 0.05946514, 0.20755375, 0.43939129, 

356 0.56418958, 0.43939129, 0.20755375, 0.05946514, 0.01033349]) 

357 >>> fig1 = plt.figure() 

358 >>> ax = fig1.add_subplot(111) 

359 >>> ax.plot(x, y) 

360 >>> plt.show() 

361 

362 Alternatively, the object may be called (as a function) to fix the mean 

363 and covariance parameters, returning a "frozen" multivariate normal 

364 random variable: 

365 

366 >>> rv = multivariate_normal(mean=None, cov=1, allow_singular=False) 

367 >>> # Frozen object with the same methods but holding the given 

368 >>> # mean and covariance fixed. 

369 

370 The input quantiles can be any shape of array, as long as the last 

371 axis labels the components. This allows us for instance to 

372 display the frozen pdf for a non-isotropic random variable in 2D as 

373 follows: 

374 

375 >>> x, y = np.mgrid[-1:1:.01, -1:1:.01] 

376 >>> pos = np.dstack((x, y)) 

377 >>> rv = multivariate_normal([0.5, -0.2], [[2.0, 0.3], [0.3, 0.5]]) 

378 >>> fig2 = plt.figure() 

379 >>> ax2 = fig2.add_subplot(111) 

380 >>> ax2.contourf(x, y, rv.pdf(pos)) 

381 

382 """ 

383 

384 def __init__(self, seed=None): 

385 super().__init__(seed) 

386 self.__doc__ = doccer.docformat(self.__doc__, mvn_docdict_params) 

387 

388 def __call__(self, mean=None, cov=1, allow_singular=False, seed=None): 

389 """Create a frozen multivariate normal distribution. 

390 

391 See `multivariate_normal_frozen` for more information. 

392 """ 

393 return multivariate_normal_frozen(mean, cov, 

394 allow_singular=allow_singular, 

395 seed=seed) 

396 

397 def _process_parameters(self, mean, cov, allow_singular=True): 

398 """ 

399 Infer dimensionality from mean or covariance matrix, ensure that 

400 mean and covariance are full vector resp. matrix. 

401 """ 

402 if isinstance(cov, _covariance.Covariance): 

403 return self._process_parameters_Covariance(mean, cov) 

404 else: 

405 # Before `Covariance` classes were introduced, 

406 # `multivariate_normal` accepted plain arrays as `cov` and used the 

407 # following input validation. To avoid disturbing the behavior of 

408 # `multivariate_normal` when plain arrays are used, we use the 

409 # original input validation here. 

410 dim, mean, cov = self._process_parameters_psd(None, mean, cov) 

411 # After input validation, some methods then processed the arrays 

412 # with a `_PSD` object and used that to perform computation. 

413 # To avoid branching statements in each method depending on whether 

414 # `cov` is an array or `Covariance` object, we always process the 

415 # array with `_PSD`, and then use wrapper that satisfies the 

416 # `Covariance` interface, `CovViaPSD`. 

417 psd = _PSD(cov, allow_singular=allow_singular) 

418 cov_object = _covariance.CovViaPSD(psd) 

419 return dim, mean, cov_object 

420 

421 def _process_parameters_Covariance(self, mean, cov): 

422 dim = cov.shape[-1] 

423 mean = np.array([0.]) if mean is None else mean 

424 message = (f"`cov` represents a covariance matrix in {dim} dimensions," 

425 f"and so `mean` must be broadcastable to shape {(dim,)}") 

426 try: 

427 mean = np.broadcast_to(mean, dim) 

428 except ValueError as e: 

429 raise ValueError(message) from e 

430 return dim, mean, cov 

431 

432 def _process_parameters_psd(self, dim, mean, cov): 

433 # Try to infer dimensionality 

434 if dim is None: 

435 if mean is None: 

436 if cov is None: 

437 dim = 1 

438 else: 

439 cov = np.asarray(cov, dtype=float) 

440 if cov.ndim < 2: 

441 dim = 1 

442 else: 

443 dim = cov.shape[0] 

444 else: 

445 mean = np.asarray(mean, dtype=float) 

446 dim = mean.size 

447 else: 

448 if not np.isscalar(dim): 

449 raise ValueError("Dimension of random variable must be " 

450 "a scalar.") 

451 

452 # Check input sizes and return full arrays for mean and cov if 

453 # necessary 

454 if mean is None: 

455 mean = np.zeros(dim) 

456 mean = np.asarray(mean, dtype=float) 

457 

458 if cov is None: 

459 cov = 1.0 

460 cov = np.asarray(cov, dtype=float) 

461 

462 if dim == 1: 

463 mean = mean.reshape(1) 

464 cov = cov.reshape(1, 1) 

465 

466 if mean.ndim != 1 or mean.shape[0] != dim: 

467 raise ValueError("Array 'mean' must be a vector of length %d." % 

468 dim) 

469 if cov.ndim == 0: 

470 cov = cov * np.eye(dim) 

471 elif cov.ndim == 1: 

472 cov = np.diag(cov) 

473 elif cov.ndim == 2 and cov.shape != (dim, dim): 

474 rows, cols = cov.shape 

475 if rows != cols: 

476 msg = ("Array 'cov' must be square if it is two dimensional," 

477 " but cov.shape = %s." % str(cov.shape)) 

478 else: 

479 msg = ("Dimension mismatch: array 'cov' is of shape %s," 

480 " but 'mean' is a vector of length %d.") 

481 msg = msg % (str(cov.shape), len(mean)) 

482 raise ValueError(msg) 

483 elif cov.ndim > 2: 

484 raise ValueError("Array 'cov' must be at most two-dimensional," 

485 " but cov.ndim = %d" % cov.ndim) 

486 

487 return dim, mean, cov 

488 

489 def _process_quantiles(self, x, dim): 

490 """ 

491 Adjust quantiles array so that last axis labels the components of 

492 each data point. 

493 """ 

494 x = np.asarray(x, dtype=float) 

495 

496 if x.ndim == 0: 

497 x = x[np.newaxis] 

498 elif x.ndim == 1: 

499 if dim == 1: 

500 x = x[:, np.newaxis] 

501 else: 

502 x = x[np.newaxis, :] 

503 

504 return x 

505 

506 def _logpdf(self, x, mean, cov_object): 

507 """Log of the multivariate normal probability density function. 

508 

509 Parameters 

510 ---------- 

511 x : ndarray 

512 Points at which to evaluate the log of the probability 

513 density function 

514 mean : ndarray 

515 Mean of the distribution 

516 cov_object : Covariance 

517 An object representing the Covariance matrix 

518 

519 Notes 

520 ----- 

521 As this function does no argument checking, it should not be 

522 called directly; use 'logpdf' instead. 

523 

524 """ 

525 log_det_cov, rank = cov_object.log_pdet, cov_object.rank 

526 dev = x - mean 

527 if dev.ndim > 1: 

528 log_det_cov = log_det_cov[..., np.newaxis] 

529 rank = rank[..., np.newaxis] 

530 maha = np.sum(np.square(cov_object.whiten(dev)), axis=-1) 

531 return -0.5 * (rank * _LOG_2PI + log_det_cov + maha) 

532 

533 def logpdf(self, x, mean=None, cov=1, allow_singular=False): 

534 """Log of the multivariate normal probability density function. 

535 

536 Parameters 

537 ---------- 

538 x : array_like 

539 Quantiles, with the last axis of `x` denoting the components. 

540 %(_mvn_doc_default_callparams)s 

541 

542 Returns 

543 ------- 

544 pdf : ndarray or scalar 

545 Log of the probability density function evaluated at `x` 

546 

547 Notes 

548 ----- 

549 %(_mvn_doc_callparams_note)s 

550 

551 """ 

552 params = self._process_parameters(mean, cov, allow_singular) 

553 dim, mean, cov_object = params 

554 x = self._process_quantiles(x, dim) 

555 out = self._logpdf(x, mean, cov_object) 

556 if np.any(cov_object.rank < dim): 

557 out_of_bounds = ~cov_object._support_mask(x-mean) 

558 out[out_of_bounds] = -np.inf 

559 return _squeeze_output(out) 

560 

561 def pdf(self, x, mean=None, cov=1, allow_singular=False): 

562 """Multivariate normal probability density function. 

563 

564 Parameters 

565 ---------- 

566 x : array_like 

567 Quantiles, with the last axis of `x` denoting the components. 

568 %(_mvn_doc_default_callparams)s 

569 

570 Returns 

571 ------- 

572 pdf : ndarray or scalar 

573 Probability density function evaluated at `x` 

574 

575 Notes 

576 ----- 

577 %(_mvn_doc_callparams_note)s 

578 

579 """ 

580 params = self._process_parameters(mean, cov, allow_singular) 

581 dim, mean, cov_object = params 

582 x = self._process_quantiles(x, dim) 

583 out = np.exp(self._logpdf(x, mean, cov_object)) 

584 if np.any((cov_object.rank < dim)): 

585 out_of_bounds = ~cov_object._support_mask(x-mean) 

586 out[out_of_bounds] = 0.0 

587 return _squeeze_output(out) 

588 

589 def _cdf(self, x, mean, cov, maxpts, abseps, releps, lower_limit): 

590 """Multivariate normal cumulative distribution function. 

591 

592 Parameters 

593 ---------- 

594 x : ndarray 

595 Points at which to evaluate the cumulative distribution function. 

596 mean : ndarray 

597 Mean of the distribution 

598 cov : array_like 

599 Covariance matrix of the distribution 

600 maxpts : integer 

601 The maximum number of points to use for integration 

602 abseps : float 

603 Absolute error tolerance 

604 releps : float 

605 Relative error tolerance 

606 lower_limit : array_like, optional 

607 Lower limit of integration of the cumulative distribution function. 

608 Default is negative infinity. Must be broadcastable with `x`. 

609 

610 Notes 

611 ----- 

612 As this function does no argument checking, it should not be 

613 called directly; use 'cdf' instead. 

614 

615 

616 .. versionadded:: 1.0.0 

617 

618 """ 

619 lower = (np.full(mean.shape, -np.inf) 

620 if lower_limit is None else lower_limit) 

621 # In 2d, _mvn.mvnun accepts input in which `lower` bound elements 

622 # are greater than `x`. Not so in other dimensions. Fix this by 

623 # ensuring that lower bounds are indeed lower when passed, then 

624 # set signs of resulting CDF manually. 

625 b, a = np.broadcast_arrays(x, lower) 

626 i_swap = b < a 

627 signs = (-1)**(i_swap.sum(axis=-1)) # odd # of swaps -> negative 

628 a, b = a.copy(), b.copy() 

629 a[i_swap], b[i_swap] = b[i_swap], a[i_swap] 

630 n = x.shape[-1] 

631 limits = np.concatenate((a, b), axis=-1) 

632 

633 # mvnun expects 1-d arguments, so process points sequentially 

634 def func1d(limits): 

635 return _mvn.mvnun(limits[:n], limits[n:], mean, cov, 

636 maxpts, abseps, releps)[0] 

637 

638 out = np.apply_along_axis(func1d, -1, limits) * signs 

639 return _squeeze_output(out) 

640 

641 def logcdf(self, x, mean=None, cov=1, allow_singular=False, maxpts=None, 

642 abseps=1e-5, releps=1e-5, *, lower_limit=None): 

643 """Log of the multivariate normal cumulative distribution function. 

644 

645 Parameters 

646 ---------- 

647 x : array_like 

648 Quantiles, with the last axis of `x` denoting the components. 

649 %(_mvn_doc_default_callparams)s 

650 maxpts : integer, optional 

651 The maximum number of points to use for integration 

652 (default `1000000*dim`) 

653 abseps : float, optional 

654 Absolute error tolerance (default 1e-5) 

655 releps : float, optional 

656 Relative error tolerance (default 1e-5) 

657 lower_limit : array_like, optional 

658 Lower limit of integration of the cumulative distribution function. 

659 Default is negative infinity. Must be broadcastable with `x`. 

660 

661 Returns 

662 ------- 

663 cdf : ndarray or scalar 

664 Log of the cumulative distribution function evaluated at `x` 

665 

666 Notes 

667 ----- 

668 %(_mvn_doc_callparams_note)s 

669 

670 .. versionadded:: 1.0.0 

671 

672 """ 

673 params = self._process_parameters(mean, cov, allow_singular) 

674 dim, mean, cov_object = params 

675 cov = cov_object.covariance 

676 x = self._process_quantiles(x, dim) 

677 if not maxpts: 

678 maxpts = 1000000 * dim 

679 cdf = self._cdf(x, mean, cov, maxpts, abseps, releps, lower_limit) 

680 # the log of a negative real is complex, and cdf can be negative 

681 # if lower limit is greater than upper limit 

682 cdf = cdf + 0j if np.any(cdf < 0) else cdf 

683 out = np.log(cdf) 

684 return out 

685 

686 def cdf(self, x, mean=None, cov=1, allow_singular=False, maxpts=None, 

687 abseps=1e-5, releps=1e-5, *, lower_limit=None): 

688 """Multivariate normal cumulative distribution function. 

689 

690 Parameters 

691 ---------- 

692 x : array_like 

693 Quantiles, with the last axis of `x` denoting the components. 

694 %(_mvn_doc_default_callparams)s 

695 maxpts : integer, optional 

696 The maximum number of points to use for integration 

697 (default `1000000*dim`) 

698 abseps : float, optional 

699 Absolute error tolerance (default 1e-5) 

700 releps : float, optional 

701 Relative error tolerance (default 1e-5) 

702 lower_limit : array_like, optional 

703 Lower limit of integration of the cumulative distribution function. 

704 Default is negative infinity. Must be broadcastable with `x`. 

705 

706 Returns 

707 ------- 

708 cdf : ndarray or scalar 

709 Cumulative distribution function evaluated at `x` 

710 

711 Notes 

712 ----- 

713 %(_mvn_doc_callparams_note)s 

714 

715 .. versionadded:: 1.0.0 

716 

717 """ 

718 params = self._process_parameters(mean, cov, allow_singular) 

719 dim, mean, cov_object = params 

720 cov = cov_object.covariance 

721 x = self._process_quantiles(x, dim) 

722 if not maxpts: 

723 maxpts = 1000000 * dim 

724 out = self._cdf(x, mean, cov, maxpts, abseps, releps, lower_limit) 

725 return out 

726 

727 def rvs(self, mean=None, cov=1, size=1, random_state=None): 

728 """Draw random samples from a multivariate normal distribution. 

729 

730 Parameters 

731 ---------- 

732 %(_mvn_doc_default_callparams)s 

733 size : integer, optional 

734 Number of samples to draw (default 1). 

735 %(_doc_random_state)s 

736 

737 Returns 

738 ------- 

739 rvs : ndarray or scalar 

740 Random variates of size (`size`, `N`), where `N` is the 

741 dimension of the random variable. 

742 

743 Notes 

744 ----- 

745 %(_mvn_doc_callparams_note)s 

746 

747 """ 

748 dim, mean, cov_object = self._process_parameters(mean, cov) 

749 random_state = self._get_random_state(random_state) 

750 

751 if isinstance(cov_object, _covariance.CovViaPSD): 

752 cov = cov_object.covariance 

753 out = random_state.multivariate_normal(mean, cov, size) 

754 out = _squeeze_output(out) 

755 else: 

756 size = size or tuple() 

757 if not np.iterable(size): 

758 size = (size,) 

759 shape = tuple(size) + (cov_object.shape[-1],) 

760 x = random_state.normal(size=shape) 

761 out = mean + cov_object.colorize(x) 

762 return out 

763 

764 def entropy(self, mean=None, cov=1): 

765 """Compute the differential entropy of the multivariate normal. 

766 

767 Parameters 

768 ---------- 

769 %(_mvn_doc_default_callparams)s 

770 

771 Returns 

772 ------- 

773 h : scalar 

774 Entropy of the multivariate normal distribution 

775 

776 Notes 

777 ----- 

778 %(_mvn_doc_callparams_note)s 

779 

780 """ 

781 dim, mean, cov_object = self._process_parameters(mean, cov) 

782 return 0.5 * (cov_object.rank * (_LOG_2PI + 1) + cov_object.log_pdet) 

783 

784 

785multivariate_normal = multivariate_normal_gen() 

786 

787 

788class multivariate_normal_frozen(multi_rv_frozen): 

789 def __init__(self, mean=None, cov=1, allow_singular=False, seed=None, 

790 maxpts=None, abseps=1e-5, releps=1e-5): 

791 """Create a frozen multivariate normal distribution. 

792 

793 Parameters 

794 ---------- 

795 mean : array_like, default: ``[0]`` 

796 Mean of the distribution. 

797 cov : array_like, default: ``[1]`` 

798 Symmetric positive (semi)definite covariance matrix of the 

799 distribution. 

800 allow_singular : bool, default: ``False`` 

801 Whether to allow a singular covariance matrix. 

802 seed : {None, int, `numpy.random.Generator`, `numpy.random.RandomState`}, optional 

803 If `seed` is None (or `np.random`), the `numpy.random.RandomState` 

804 singleton is used. 

805 If `seed` is an int, a new ``RandomState`` instance is used, 

806 seeded with `seed`. 

807 If `seed` is already a ``Generator`` or ``RandomState`` instance 

808 then that instance is used. 

809 maxpts : integer, optional 

810 The maximum number of points to use for integration of the 

811 cumulative distribution function (default `1000000*dim`) 

812 abseps : float, optional 

813 Absolute error tolerance for the cumulative distribution function 

814 (default 1e-5) 

815 releps : float, optional 

816 Relative error tolerance for the cumulative distribution function 

817 (default 1e-5) 

818 

819 Examples 

820 -------- 

821 When called with the default parameters, this will create a 1D random 

822 variable with mean 0 and covariance 1: 

823 

824 >>> from scipy.stats import multivariate_normal 

825 >>> r = multivariate_normal() 

826 >>> r.mean 

827 array([ 0.]) 

828 >>> r.cov 

829 array([[1.]]) 

830 

831 """ 

832 self._dist = multivariate_normal_gen(seed) 

833 self.dim, self.mean, self.cov_object = ( 

834 self._dist._process_parameters(mean, cov, allow_singular)) 

835 self.allow_singular = allow_singular or self.cov_object._allow_singular 

836 if not maxpts: 

837 maxpts = 1000000 * self.dim 

838 self.maxpts = maxpts 

839 self.abseps = abseps 

840 self.releps = releps 

841 

842 @property 

843 def cov(self): 

844 return self.cov_object.covariance 

845 

846 def logpdf(self, x): 

847 x = self._dist._process_quantiles(x, self.dim) 

848 out = self._dist._logpdf(x, self.mean, self.cov_object) 

849 if np.any(self.cov_object.rank < self.dim): 

850 out_of_bounds = ~self.cov_object._support_mask(x-self.mean) 

851 out[out_of_bounds] = -np.inf 

852 return _squeeze_output(out) 

853 

854 def pdf(self, x): 

855 return np.exp(self.logpdf(x)) 

856 

857 def logcdf(self, x, *, lower_limit=None): 

858 cdf = self.cdf(x, lower_limit=lower_limit) 

859 # the log of a negative real is complex, and cdf can be negative 

860 # if lower limit is greater than upper limit 

861 cdf = cdf + 0j if np.any(cdf < 0) else cdf 

862 out = np.log(cdf) 

863 return out 

864 

865 def cdf(self, x, *, lower_limit=None): 

866 x = self._dist._process_quantiles(x, self.dim) 

867 out = self._dist._cdf(x, self.mean, self.cov_object.covariance, 

868 self.maxpts, self.abseps, self.releps, 

869 lower_limit) 

870 return _squeeze_output(out) 

871 

872 def rvs(self, size=1, random_state=None): 

873 return self._dist.rvs(self.mean, self.cov_object, size, random_state) 

874 

875 def entropy(self): 

876 """Computes the differential entropy of the multivariate normal. 

877 

878 Returns 

879 ------- 

880 h : scalar 

881 Entropy of the multivariate normal distribution 

882 

883 """ 

884 log_pdet = self.cov_object.log_pdet 

885 rank = self.cov_object.rank 

886 return 0.5 * (rank * (_LOG_2PI + 1) + log_pdet) 

887 

888 

889# Set frozen generator docstrings from corresponding docstrings in 

890# multivariate_normal_gen and fill in default strings in class docstrings 

891for name in ['logpdf', 'pdf', 'logcdf', 'cdf', 'rvs']: 

892 method = multivariate_normal_gen.__dict__[name] 

893 method_frozen = multivariate_normal_frozen.__dict__[name] 

894 method_frozen.__doc__ = doccer.docformat(method.__doc__, 

895 mvn_docdict_noparams) 

896 method.__doc__ = doccer.docformat(method.__doc__, mvn_docdict_params) 

897 

898_matnorm_doc_default_callparams = """\ 

899mean : array_like, optional 

900 Mean of the distribution (default: `None`) 

901rowcov : array_like, optional 

902 Among-row covariance matrix of the distribution (default: `1`) 

903colcov : array_like, optional 

904 Among-column covariance matrix of the distribution (default: `1`) 

905""" 

906 

907_matnorm_doc_callparams_note = """\ 

908If `mean` is set to `None` then a matrix of zeros is used for the mean. 

909The dimensions of this matrix are inferred from the shape of `rowcov` and 

910`colcov`, if these are provided, or set to `1` if ambiguous. 

911 

912`rowcov` and `colcov` can be two-dimensional array_likes specifying the 

913covariance matrices directly. Alternatively, a one-dimensional array will 

914be be interpreted as the entries of a diagonal matrix, and a scalar or 

915zero-dimensional array will be interpreted as this value times the 

916identity matrix. 

917""" 

918 

919_matnorm_doc_frozen_callparams = "" 

920 

921_matnorm_doc_frozen_callparams_note = """\ 

922See class definition for a detailed description of parameters.""" 

923 

924matnorm_docdict_params = { 

925 '_matnorm_doc_default_callparams': _matnorm_doc_default_callparams, 

926 '_matnorm_doc_callparams_note': _matnorm_doc_callparams_note, 

927 '_doc_random_state': _doc_random_state 

928} 

929 

930matnorm_docdict_noparams = { 

931 '_matnorm_doc_default_callparams': _matnorm_doc_frozen_callparams, 

932 '_matnorm_doc_callparams_note': _matnorm_doc_frozen_callparams_note, 

933 '_doc_random_state': _doc_random_state 

934} 

935 

936 

937class matrix_normal_gen(multi_rv_generic): 

938 r"""A matrix normal random variable. 

939 

940 The `mean` keyword specifies the mean. The `rowcov` keyword specifies the 

941 among-row covariance matrix. The 'colcov' keyword specifies the 

942 among-column covariance matrix. 

943 

944 Methods 

945 ------- 

946 pdf(X, mean=None, rowcov=1, colcov=1) 

947 Probability density function. 

948 logpdf(X, mean=None, rowcov=1, colcov=1) 

949 Log of the probability density function. 

950 rvs(mean=None, rowcov=1, colcov=1, size=1, random_state=None) 

951 Draw random samples. 

952 

953 Parameters 

954 ---------- 

955 %(_matnorm_doc_default_callparams)s 

956 %(_doc_random_state)s 

957 

958 Notes 

959 ----- 

960 %(_matnorm_doc_callparams_note)s 

961 

962 The covariance matrices specified by `rowcov` and `colcov` must be 

963 (symmetric) positive definite. If the samples in `X` are 

964 :math:`m \times n`, then `rowcov` must be :math:`m \times m` and 

965 `colcov` must be :math:`n \times n`. `mean` must be the same shape as `X`. 

966 

967 The probability density function for `matrix_normal` is 

968 

969 .. math:: 

970 

971 f(X) = (2 \pi)^{-\frac{mn}{2}}|U|^{-\frac{n}{2}} |V|^{-\frac{m}{2}} 

972 \exp\left( -\frac{1}{2} \mathrm{Tr}\left[ U^{-1} (X-M) V^{-1} 

973 (X-M)^T \right] \right), 

974 

975 where :math:`M` is the mean, :math:`U` the among-row covariance matrix, 

976 :math:`V` the among-column covariance matrix. 

977 

978 The `allow_singular` behaviour of the `multivariate_normal` 

979 distribution is not currently supported. Covariance matrices must be 

980 full rank. 

981 

982 The `matrix_normal` distribution is closely related to the 

983 `multivariate_normal` distribution. Specifically, :math:`\mathrm{Vec}(X)` 

984 (the vector formed by concatenating the columns of :math:`X`) has a 

985 multivariate normal distribution with mean :math:`\mathrm{Vec}(M)` 

986 and covariance :math:`V \otimes U` (where :math:`\otimes` is the Kronecker 

987 product). Sampling and pdf evaluation are 

988 :math:`\mathcal{O}(m^3 + n^3 + m^2 n + m n^2)` for the matrix normal, but 

989 :math:`\mathcal{O}(m^3 n^3)` for the equivalent multivariate normal, 

990 making this equivalent form algorithmically inefficient. 

991 

992 .. versionadded:: 0.17.0 

993 

994 Examples 

995 -------- 

996 

997 >>> import numpy as np 

998 >>> from scipy.stats import matrix_normal 

999 

1000 >>> M = np.arange(6).reshape(3,2); M 

1001 array([[0, 1], 

1002 [2, 3], 

1003 [4, 5]]) 

1004 >>> U = np.diag([1,2,3]); U 

1005 array([[1, 0, 0], 

1006 [0, 2, 0], 

1007 [0, 0, 3]]) 

1008 >>> V = 0.3*np.identity(2); V 

1009 array([[ 0.3, 0. ], 

1010 [ 0. , 0.3]]) 

1011 >>> X = M + 0.1; X 

1012 array([[ 0.1, 1.1], 

1013 [ 2.1, 3.1], 

1014 [ 4.1, 5.1]]) 

1015 >>> matrix_normal.pdf(X, mean=M, rowcov=U, colcov=V) 

1016 0.023410202050005054 

1017 

1018 >>> # Equivalent multivariate normal 

1019 >>> from scipy.stats import multivariate_normal 

1020 >>> vectorised_X = X.T.flatten() 

1021 >>> equiv_mean = M.T.flatten() 

1022 >>> equiv_cov = np.kron(V,U) 

1023 >>> multivariate_normal.pdf(vectorised_X, mean=equiv_mean, cov=equiv_cov) 

1024 0.023410202050005054 

1025 

1026 Alternatively, the object may be called (as a function) to fix the mean 

1027 and covariance parameters, returning a "frozen" matrix normal 

1028 random variable: 

1029 

1030 >>> rv = matrix_normal(mean=None, rowcov=1, colcov=1) 

1031 >>> # Frozen object with the same methods but holding the given 

1032 >>> # mean and covariance fixed. 

1033 

1034 """ 

1035 

1036 def __init__(self, seed=None): 

1037 super().__init__(seed) 

1038 self.__doc__ = doccer.docformat(self.__doc__, matnorm_docdict_params) 

1039 

1040 def __call__(self, mean=None, rowcov=1, colcov=1, seed=None): 

1041 """Create a frozen matrix normal distribution. 

1042 

1043 See `matrix_normal_frozen` for more information. 

1044 

1045 """ 

1046 return matrix_normal_frozen(mean, rowcov, colcov, seed=seed) 

1047 

1048 def _process_parameters(self, mean, rowcov, colcov): 

1049 """ 

1050 Infer dimensionality from mean or covariance matrices. Handle 

1051 defaults. Ensure compatible dimensions. 

1052 """ 

1053 

1054 # Process mean 

1055 if mean is not None: 

1056 mean = np.asarray(mean, dtype=float) 

1057 meanshape = mean.shape 

1058 if len(meanshape) != 2: 

1059 raise ValueError("Array `mean` must be two dimensional.") 

1060 if np.any(meanshape == 0): 

1061 raise ValueError("Array `mean` has invalid shape.") 

1062 

1063 # Process among-row covariance 

1064 rowcov = np.asarray(rowcov, dtype=float) 

1065 if rowcov.ndim == 0: 

1066 if mean is not None: 

1067 rowcov = rowcov * np.identity(meanshape[0]) 

1068 else: 

1069 rowcov = rowcov * np.identity(1) 

1070 elif rowcov.ndim == 1: 

1071 rowcov = np.diag(rowcov) 

1072 rowshape = rowcov.shape 

1073 if len(rowshape) != 2: 

1074 raise ValueError("`rowcov` must be a scalar or a 2D array.") 

1075 if rowshape[0] != rowshape[1]: 

1076 raise ValueError("Array `rowcov` must be square.") 

1077 if rowshape[0] == 0: 

1078 raise ValueError("Array `rowcov` has invalid shape.") 

1079 numrows = rowshape[0] 

1080 

1081 # Process among-column covariance 

1082 colcov = np.asarray(colcov, dtype=float) 

1083 if colcov.ndim == 0: 

1084 if mean is not None: 

1085 colcov = colcov * np.identity(meanshape[1]) 

1086 else: 

1087 colcov = colcov * np.identity(1) 

1088 elif colcov.ndim == 1: 

1089 colcov = np.diag(colcov) 

1090 colshape = colcov.shape 

1091 if len(colshape) != 2: 

1092 raise ValueError("`colcov` must be a scalar or a 2D array.") 

1093 if colshape[0] != colshape[1]: 

1094 raise ValueError("Array `colcov` must be square.") 

1095 if colshape[0] == 0: 

1096 raise ValueError("Array `colcov` has invalid shape.") 

1097 numcols = colshape[0] 

1098 

1099 # Ensure mean and covariances compatible 

1100 if mean is not None: 

1101 if meanshape[0] != numrows: 

1102 raise ValueError("Arrays `mean` and `rowcov` must have the " 

1103 "same number of rows.") 

1104 if meanshape[1] != numcols: 

1105 raise ValueError("Arrays `mean` and `colcov` must have the " 

1106 "same number of columns.") 

1107 else: 

1108 mean = np.zeros((numrows, numcols)) 

1109 

1110 dims = (numrows, numcols) 

1111 

1112 return dims, mean, rowcov, colcov 

1113 

1114 def _process_quantiles(self, X, dims): 

1115 """ 

1116 Adjust quantiles array so that last two axes labels the components of 

1117 each data point. 

1118 """ 

1119 X = np.asarray(X, dtype=float) 

1120 if X.ndim == 2: 

1121 X = X[np.newaxis, :] 

1122 if X.shape[-2:] != dims: 

1123 raise ValueError("The shape of array `X` is not compatible " 

1124 "with the distribution parameters.") 

1125 return X 

1126 

1127 def _logpdf(self, dims, X, mean, row_prec_rt, log_det_rowcov, 

1128 col_prec_rt, log_det_colcov): 

1129 """Log of the matrix normal probability density function. 

1130 

1131 Parameters 

1132 ---------- 

1133 dims : tuple 

1134 Dimensions of the matrix variates 

1135 X : ndarray 

1136 Points at which to evaluate the log of the probability 

1137 density function 

1138 mean : ndarray 

1139 Mean of the distribution 

1140 row_prec_rt : ndarray 

1141 A decomposition such that np.dot(row_prec_rt, row_prec_rt.T) 

1142 is the inverse of the among-row covariance matrix 

1143 log_det_rowcov : float 

1144 Logarithm of the determinant of the among-row covariance matrix 

1145 col_prec_rt : ndarray 

1146 A decomposition such that np.dot(col_prec_rt, col_prec_rt.T) 

1147 is the inverse of the among-column covariance matrix 

1148 log_det_colcov : float 

1149 Logarithm of the determinant of the among-column covariance matrix 

1150 

1151 Notes 

1152 ----- 

1153 As this function does no argument checking, it should not be 

1154 called directly; use 'logpdf' instead. 

1155 

1156 """ 

1157 numrows, numcols = dims 

1158 roll_dev = np.moveaxis(X-mean, -1, 0) 

1159 scale_dev = np.tensordot(col_prec_rt.T, 

1160 np.dot(roll_dev, row_prec_rt), 1) 

1161 maha = np.sum(np.sum(np.square(scale_dev), axis=-1), axis=0) 

1162 return -0.5 * (numrows*numcols*_LOG_2PI + numcols*log_det_rowcov 

1163 + numrows*log_det_colcov + maha) 

1164 

1165 def logpdf(self, X, mean=None, rowcov=1, colcov=1): 

1166 """Log of the matrix normal probability density function. 

1167 

1168 Parameters 

1169 ---------- 

1170 X : array_like 

1171 Quantiles, with the last two axes of `X` denoting the components. 

1172 %(_matnorm_doc_default_callparams)s 

1173 

1174 Returns 

1175 ------- 

1176 logpdf : ndarray 

1177 Log of the probability density function evaluated at `X` 

1178 

1179 Notes 

1180 ----- 

1181 %(_matnorm_doc_callparams_note)s 

1182 

1183 """ 

1184 dims, mean, rowcov, colcov = self._process_parameters(mean, rowcov, 

1185 colcov) 

1186 X = self._process_quantiles(X, dims) 

1187 rowpsd = _PSD(rowcov, allow_singular=False) 

1188 colpsd = _PSD(colcov, allow_singular=False) 

1189 out = self._logpdf(dims, X, mean, rowpsd.U, rowpsd.log_pdet, colpsd.U, 

1190 colpsd.log_pdet) 

1191 return _squeeze_output(out) 

1192 

1193 def pdf(self, X, mean=None, rowcov=1, colcov=1): 

1194 """Matrix normal probability density function. 

1195 

1196 Parameters 

1197 ---------- 

1198 X : array_like 

1199 Quantiles, with the last two axes of `X` denoting the components. 

1200 %(_matnorm_doc_default_callparams)s 

1201 

1202 Returns 

1203 ------- 

1204 pdf : ndarray 

1205 Probability density function evaluated at `X` 

1206 

1207 Notes 

1208 ----- 

1209 %(_matnorm_doc_callparams_note)s 

1210 

1211 """ 

1212 return np.exp(self.logpdf(X, mean, rowcov, colcov)) 

1213 

1214 def rvs(self, mean=None, rowcov=1, colcov=1, size=1, random_state=None): 

1215 """Draw random samples from a matrix normal distribution. 

1216 

1217 Parameters 

1218 ---------- 

1219 %(_matnorm_doc_default_callparams)s 

1220 size : integer, optional 

1221 Number of samples to draw (default 1). 

1222 %(_doc_random_state)s 

1223 

1224 Returns 

1225 ------- 

1226 rvs : ndarray or scalar 

1227 Random variates of size (`size`, `dims`), where `dims` is the 

1228 dimension of the random matrices. 

1229 

1230 Notes 

1231 ----- 

1232 %(_matnorm_doc_callparams_note)s 

1233 

1234 """ 

1235 size = int(size) 

1236 dims, mean, rowcov, colcov = self._process_parameters(mean, rowcov, 

1237 colcov) 

1238 rowchol = scipy.linalg.cholesky(rowcov, lower=True) 

1239 colchol = scipy.linalg.cholesky(colcov, lower=True) 

1240 random_state = self._get_random_state(random_state) 

1241 # We aren't generating standard normal variates with size=(size, 

1242 # dims[0], dims[1]) directly to ensure random variates remain backwards 

1243 # compatible. See https://github.com/scipy/scipy/pull/12312 for more 

1244 # details. 

1245 std_norm = random_state.standard_normal( 

1246 size=(dims[1], size, dims[0]) 

1247 ).transpose(1, 2, 0) 

1248 out = mean + np.einsum('jp,ipq,kq->ijk', 

1249 rowchol, std_norm, colchol, 

1250 optimize=True) 

1251 if size == 1: 

1252 out = out.reshape(mean.shape) 

1253 return out 

1254 

1255 

1256matrix_normal = matrix_normal_gen() 

1257 

1258 

1259class matrix_normal_frozen(multi_rv_frozen): 

1260 """ 

1261 Create a frozen matrix normal distribution. 

1262 

1263 Parameters 

1264 ---------- 

1265 %(_matnorm_doc_default_callparams)s 

1266 seed : {None, int, `numpy.random.Generator`, `numpy.random.RandomState`}, optional 

1267 If `seed` is `None` the `~np.random.RandomState` singleton is used. 

1268 If `seed` is an int, a new ``RandomState`` instance is used, seeded 

1269 with seed. 

1270 If `seed` is already a ``RandomState`` or ``Generator`` instance, 

1271 then that object is used. 

1272 Default is `None`. 

1273 

1274 Examples 

1275 -------- 

1276 >>> import numpy as np 

1277 >>> from scipy.stats import matrix_normal 

1278 

1279 >>> distn = matrix_normal(mean=np.zeros((3,3))) 

1280 >>> X = distn.rvs(); X 

1281 array([[-0.02976962, 0.93339138, -0.09663178], 

1282 [ 0.67405524, 0.28250467, -0.93308929], 

1283 [-0.31144782, 0.74535536, 1.30412916]]) 

1284 >>> distn.pdf(X) 

1285 2.5160642368346784e-05 

1286 >>> distn.logpdf(X) 

1287 -10.590229595124615 

1288 """ 

1289 

1290 def __init__(self, mean=None, rowcov=1, colcov=1, seed=None): 

1291 self._dist = matrix_normal_gen(seed) 

1292 self.dims, self.mean, self.rowcov, self.colcov = \ 

1293 self._dist._process_parameters(mean, rowcov, colcov) 

1294 self.rowpsd = _PSD(self.rowcov, allow_singular=False) 

1295 self.colpsd = _PSD(self.colcov, allow_singular=False) 

1296 

1297 def logpdf(self, X): 

1298 X = self._dist._process_quantiles(X, self.dims) 

1299 out = self._dist._logpdf(self.dims, X, self.mean, self.rowpsd.U, 

1300 self.rowpsd.log_pdet, self.colpsd.U, 

1301 self.colpsd.log_pdet) 

1302 return _squeeze_output(out) 

1303 

1304 def pdf(self, X): 

1305 return np.exp(self.logpdf(X)) 

1306 

1307 def rvs(self, size=1, random_state=None): 

1308 return self._dist.rvs(self.mean, self.rowcov, self.colcov, size, 

1309 random_state) 

1310 

1311 

1312# Set frozen generator docstrings from corresponding docstrings in 

1313# matrix_normal_gen and fill in default strings in class docstrings 

1314for name in ['logpdf', 'pdf', 'rvs']: 

1315 method = matrix_normal_gen.__dict__[name] 

1316 method_frozen = matrix_normal_frozen.__dict__[name] 

1317 method_frozen.__doc__ = doccer.docformat(method.__doc__, 

1318 matnorm_docdict_noparams) 

1319 method.__doc__ = doccer.docformat(method.__doc__, matnorm_docdict_params) 

1320 

1321_dirichlet_doc_default_callparams = """\ 

1322alpha : array_like 

1323 The concentration parameters. The number of entries determines the 

1324 dimensionality of the distribution. 

1325""" 

1326_dirichlet_doc_frozen_callparams = "" 

1327 

1328_dirichlet_doc_frozen_callparams_note = """\ 

1329See class definition for a detailed description of parameters.""" 

1330 

1331dirichlet_docdict_params = { 

1332 '_dirichlet_doc_default_callparams': _dirichlet_doc_default_callparams, 

1333 '_doc_random_state': _doc_random_state 

1334} 

1335 

1336dirichlet_docdict_noparams = { 

1337 '_dirichlet_doc_default_callparams': _dirichlet_doc_frozen_callparams, 

1338 '_doc_random_state': _doc_random_state 

1339} 

1340 

1341 

1342def _dirichlet_check_parameters(alpha): 

1343 alpha = np.asarray(alpha) 

1344 if np.min(alpha) <= 0: 

1345 raise ValueError("All parameters must be greater than 0") 

1346 elif alpha.ndim != 1: 

1347 raise ValueError("Parameter vector 'a' must be one dimensional, " 

1348 "but a.shape = %s." % (alpha.shape, )) 

1349 return alpha 

1350 

1351 

1352def _dirichlet_check_input(alpha, x): 

1353 x = np.asarray(x) 

1354 

1355 if x.shape[0] + 1 != alpha.shape[0] and x.shape[0] != alpha.shape[0]: 

1356 raise ValueError("Vector 'x' must have either the same number " 

1357 "of entries as, or one entry fewer than, " 

1358 "parameter vector 'a', but alpha.shape = %s " 

1359 "and x.shape = %s." % (alpha.shape, x.shape)) 

1360 

1361 if x.shape[0] != alpha.shape[0]: 

1362 xk = np.array([1 - np.sum(x, 0)]) 

1363 if xk.ndim == 1: 

1364 x = np.append(x, xk) 

1365 elif xk.ndim == 2: 

1366 x = np.vstack((x, xk)) 

1367 else: 

1368 raise ValueError("The input must be one dimensional or a two " 

1369 "dimensional matrix containing the entries.") 

1370 

1371 if np.min(x) < 0: 

1372 raise ValueError("Each entry in 'x' must be greater than or equal " 

1373 "to zero.") 

1374 

1375 if np.max(x) > 1: 

1376 raise ValueError("Each entry in 'x' must be smaller or equal one.") 

1377 

1378 # Check x_i > 0 or alpha_i > 1 

1379 xeq0 = (x == 0) 

1380 alphalt1 = (alpha < 1) 

1381 if x.shape != alpha.shape: 

1382 alphalt1 = np.repeat(alphalt1, x.shape[-1], axis=-1).reshape(x.shape) 

1383 chk = np.logical_and(xeq0, alphalt1) 

1384 

1385 if np.sum(chk): 

1386 raise ValueError("Each entry in 'x' must be greater than zero if its " 

1387 "alpha is less than one.") 

1388 

1389 if (np.abs(np.sum(x, 0) - 1.0) > 10e-10).any(): 

1390 raise ValueError("The input vector 'x' must lie within the normal " 

1391 "simplex. but np.sum(x, 0) = %s." % np.sum(x, 0)) 

1392 

1393 return x 

1394 

1395 

1396def _lnB(alpha): 

1397 r"""Internal helper function to compute the log of the useful quotient. 

1398 

1399 .. math:: 

1400 

1401 B(\alpha) = \frac{\prod_{i=1}{K}\Gamma(\alpha_i)} 

1402 {\Gamma\left(\sum_{i=1}^{K} \alpha_i \right)} 

1403 

1404 Parameters 

1405 ---------- 

1406 %(_dirichlet_doc_default_callparams)s 

1407 

1408 Returns 

1409 ------- 

1410 B : scalar 

1411 Helper quotient, internal use only 

1412 

1413 """ 

1414 return np.sum(gammaln(alpha)) - gammaln(np.sum(alpha)) 

1415 

1416 

1417class dirichlet_gen(multi_rv_generic): 

1418 r"""A Dirichlet random variable. 

1419 

1420 The ``alpha`` keyword specifies the concentration parameters of the 

1421 distribution. 

1422 

1423 .. versionadded:: 0.15.0 

1424 

1425 Methods 

1426 ------- 

1427 pdf(x, alpha) 

1428 Probability density function. 

1429 logpdf(x, alpha) 

1430 Log of the probability density function. 

1431 rvs(alpha, size=1, random_state=None) 

1432 Draw random samples from a Dirichlet distribution. 

1433 mean(alpha) 

1434 The mean of the Dirichlet distribution 

1435 var(alpha) 

1436 The variance of the Dirichlet distribution 

1437 entropy(alpha) 

1438 Compute the differential entropy of the Dirichlet distribution. 

1439 

1440 Parameters 

1441 ---------- 

1442 %(_dirichlet_doc_default_callparams)s 

1443 %(_doc_random_state)s 

1444 

1445 Notes 

1446 ----- 

1447 Each :math:`\alpha` entry must be positive. The distribution has only 

1448 support on the simplex defined by 

1449 

1450 .. math:: 

1451 \sum_{i=1}^{K} x_i = 1 

1452 

1453 where :math:`0 < x_i < 1`. 

1454 

1455 If the quantiles don't lie within the simplex, a ValueError is raised. 

1456 

1457 The probability density function for `dirichlet` is 

1458 

1459 .. math:: 

1460 

1461 f(x) = \frac{1}{\mathrm{B}(\boldsymbol\alpha)} \prod_{i=1}^K x_i^{\alpha_i - 1} 

1462 

1463 where 

1464 

1465 .. math:: 

1466 

1467 \mathrm{B}(\boldsymbol\alpha) = \frac{\prod_{i=1}^K \Gamma(\alpha_i)} 

1468 {\Gamma\bigl(\sum_{i=1}^K \alpha_i\bigr)} 

1469 

1470 and :math:`\boldsymbol\alpha=(\alpha_1,\ldots,\alpha_K)`, the 

1471 concentration parameters and :math:`K` is the dimension of the space 

1472 where :math:`x` takes values. 

1473 

1474 Note that the `dirichlet` interface is somewhat inconsistent. 

1475 The array returned by the rvs function is transposed 

1476 with respect to the format expected by the pdf and logpdf. 

1477 

1478 Examples 

1479 -------- 

1480 >>> import numpy as np 

1481 >>> from scipy.stats import dirichlet 

1482 

1483 Generate a dirichlet random variable 

1484 

1485 >>> quantiles = np.array([0.2, 0.2, 0.6]) # specify quantiles 

1486 >>> alpha = np.array([0.4, 5, 15]) # specify concentration parameters 

1487 >>> dirichlet.pdf(quantiles, alpha) 

1488 0.2843831684937255 

1489 

1490 The same PDF but following a log scale 

1491 

1492 >>> dirichlet.logpdf(quantiles, alpha) 

1493 -1.2574327653159187 

1494 

1495 Once we specify the dirichlet distribution 

1496 we can then calculate quantities of interest 

1497 

1498 >>> dirichlet.mean(alpha) # get the mean of the distribution 

1499 array([0.01960784, 0.24509804, 0.73529412]) 

1500 >>> dirichlet.var(alpha) # get variance 

1501 array([0.00089829, 0.00864603, 0.00909517]) 

1502 >>> dirichlet.entropy(alpha) # calculate the differential entropy 

1503 -4.3280162474082715 

1504 

1505 We can also return random samples from the distribution 

1506 

1507 >>> dirichlet.rvs(alpha, size=1, random_state=1) 

1508 array([[0.00766178, 0.24670518, 0.74563305]]) 

1509 >>> dirichlet.rvs(alpha, size=2, random_state=2) 

1510 array([[0.01639427, 0.1292273 , 0.85437844], 

1511 [0.00156917, 0.19033695, 0.80809388]]) 

1512 

1513 Alternatively, the object may be called (as a function) to fix 

1514 concentration parameters, returning a "frozen" Dirichlet 

1515 random variable: 

1516 

1517 >>> rv = dirichlet(alpha) 

1518 >>> # Frozen object with the same methods but holding the given 

1519 >>> # concentration parameters fixed. 

1520 

1521 """ 

1522 

1523 def __init__(self, seed=None): 

1524 super().__init__(seed) 

1525 self.__doc__ = doccer.docformat(self.__doc__, dirichlet_docdict_params) 

1526 

1527 def __call__(self, alpha, seed=None): 

1528 return dirichlet_frozen(alpha, seed=seed) 

1529 

1530 def _logpdf(self, x, alpha): 

1531 """Log of the Dirichlet probability density function. 

1532 

1533 Parameters 

1534 ---------- 

1535 x : ndarray 

1536 Points at which to evaluate the log of the probability 

1537 density function 

1538 %(_dirichlet_doc_default_callparams)s 

1539 

1540 Notes 

1541 ----- 

1542 As this function does no argument checking, it should not be 

1543 called directly; use 'logpdf' instead. 

1544 

1545 """ 

1546 lnB = _lnB(alpha) 

1547 return - lnB + np.sum((xlogy(alpha - 1, x.T)).T, 0) 

1548 

1549 def logpdf(self, x, alpha): 

1550 """Log of the Dirichlet probability density function. 

1551 

1552 Parameters 

1553 ---------- 

1554 x : array_like 

1555 Quantiles, with the last axis of `x` denoting the components. 

1556 %(_dirichlet_doc_default_callparams)s 

1557 

1558 Returns 

1559 ------- 

1560 pdf : ndarray or scalar 

1561 Log of the probability density function evaluated at `x`. 

1562 

1563 """ 

1564 alpha = _dirichlet_check_parameters(alpha) 

1565 x = _dirichlet_check_input(alpha, x) 

1566 

1567 out = self._logpdf(x, alpha) 

1568 return _squeeze_output(out) 

1569 

1570 def pdf(self, x, alpha): 

1571 """The Dirichlet probability density function. 

1572 

1573 Parameters 

1574 ---------- 

1575 x : array_like 

1576 Quantiles, with the last axis of `x` denoting the components. 

1577 %(_dirichlet_doc_default_callparams)s 

1578 

1579 Returns 

1580 ------- 

1581 pdf : ndarray or scalar 

1582 The probability density function evaluated at `x`. 

1583 

1584 """ 

1585 alpha = _dirichlet_check_parameters(alpha) 

1586 x = _dirichlet_check_input(alpha, x) 

1587 

1588 out = np.exp(self._logpdf(x, alpha)) 

1589 return _squeeze_output(out) 

1590 

1591 def mean(self, alpha): 

1592 """Mean of the Dirichlet distribution. 

1593 

1594 Parameters 

1595 ---------- 

1596 %(_dirichlet_doc_default_callparams)s 

1597 

1598 Returns 

1599 ------- 

1600 mu : ndarray or scalar 

1601 Mean of the Dirichlet distribution. 

1602 

1603 """ 

1604 alpha = _dirichlet_check_parameters(alpha) 

1605 

1606 out = alpha / (np.sum(alpha)) 

1607 return _squeeze_output(out) 

1608 

1609 def var(self, alpha): 

1610 """Variance of the Dirichlet distribution. 

1611 

1612 Parameters 

1613 ---------- 

1614 %(_dirichlet_doc_default_callparams)s 

1615 

1616 Returns 

1617 ------- 

1618 v : ndarray or scalar 

1619 Variance of the Dirichlet distribution. 

1620 

1621 """ 

1622 

1623 alpha = _dirichlet_check_parameters(alpha) 

1624 

1625 alpha0 = np.sum(alpha) 

1626 out = (alpha * (alpha0 - alpha)) / ((alpha0 * alpha0) * (alpha0 + 1)) 

1627 return _squeeze_output(out) 

1628 

1629 def entropy(self, alpha): 

1630 """ 

1631 Differential entropy of the Dirichlet distribution. 

1632 

1633 Parameters 

1634 ---------- 

1635 %(_dirichlet_doc_default_callparams)s 

1636 

1637 Returns 

1638 ------- 

1639 h : scalar 

1640 Entropy of the Dirichlet distribution 

1641 

1642 """ 

1643 

1644 alpha = _dirichlet_check_parameters(alpha) 

1645 

1646 alpha0 = np.sum(alpha) 

1647 lnB = _lnB(alpha) 

1648 K = alpha.shape[0] 

1649 

1650 out = lnB + (alpha0 - K) * scipy.special.psi(alpha0) - np.sum( 

1651 (alpha - 1) * scipy.special.psi(alpha)) 

1652 return _squeeze_output(out) 

1653 

1654 def rvs(self, alpha, size=1, random_state=None): 

1655 """ 

1656 Draw random samples from a Dirichlet distribution. 

1657 

1658 Parameters 

1659 ---------- 

1660 %(_dirichlet_doc_default_callparams)s 

1661 size : int, optional 

1662 Number of samples to draw (default 1). 

1663 %(_doc_random_state)s 

1664 

1665 Returns 

1666 ------- 

1667 rvs : ndarray or scalar 

1668 Random variates of size (`size`, `N`), where `N` is the 

1669 dimension of the random variable. 

1670 

1671 """ 

1672 alpha = _dirichlet_check_parameters(alpha) 

1673 random_state = self._get_random_state(random_state) 

1674 return random_state.dirichlet(alpha, size=size) 

1675 

1676 

1677dirichlet = dirichlet_gen() 

1678 

1679 

1680class dirichlet_frozen(multi_rv_frozen): 

1681 def __init__(self, alpha, seed=None): 

1682 self.alpha = _dirichlet_check_parameters(alpha) 

1683 self._dist = dirichlet_gen(seed) 

1684 

1685 def logpdf(self, x): 

1686 return self._dist.logpdf(x, self.alpha) 

1687 

1688 def pdf(self, x): 

1689 return self._dist.pdf(x, self.alpha) 

1690 

1691 def mean(self): 

1692 return self._dist.mean(self.alpha) 

1693 

1694 def var(self): 

1695 return self._dist.var(self.alpha) 

1696 

1697 def entropy(self): 

1698 return self._dist.entropy(self.alpha) 

1699 

1700 def rvs(self, size=1, random_state=None): 

1701 return self._dist.rvs(self.alpha, size, random_state) 

1702 

1703 

1704# Set frozen generator docstrings from corresponding docstrings in 

1705# multivariate_normal_gen and fill in default strings in class docstrings 

1706for name in ['logpdf', 'pdf', 'rvs', 'mean', 'var', 'entropy']: 

1707 method = dirichlet_gen.__dict__[name] 

1708 method_frozen = dirichlet_frozen.__dict__[name] 

1709 method_frozen.__doc__ = doccer.docformat( 

1710 method.__doc__, dirichlet_docdict_noparams) 

1711 method.__doc__ = doccer.docformat(method.__doc__, dirichlet_docdict_params) 

1712 

1713 

1714_wishart_doc_default_callparams = """\ 

1715df : int 

1716 Degrees of freedom, must be greater than or equal to dimension of the 

1717 scale matrix 

1718scale : array_like 

1719 Symmetric positive definite scale matrix of the distribution 

1720""" 

1721 

1722_wishart_doc_callparams_note = "" 

1723 

1724_wishart_doc_frozen_callparams = "" 

1725 

1726_wishart_doc_frozen_callparams_note = """\ 

1727See class definition for a detailed description of parameters.""" 

1728 

1729wishart_docdict_params = { 

1730 '_doc_default_callparams': _wishart_doc_default_callparams, 

1731 '_doc_callparams_note': _wishart_doc_callparams_note, 

1732 '_doc_random_state': _doc_random_state 

1733} 

1734 

1735wishart_docdict_noparams = { 

1736 '_doc_default_callparams': _wishart_doc_frozen_callparams, 

1737 '_doc_callparams_note': _wishart_doc_frozen_callparams_note, 

1738 '_doc_random_state': _doc_random_state 

1739} 

1740 

1741 

1742class wishart_gen(multi_rv_generic): 

1743 r"""A Wishart random variable. 

1744 

1745 The `df` keyword specifies the degrees of freedom. The `scale` keyword 

1746 specifies the scale matrix, which must be symmetric and positive definite. 

1747 In this context, the scale matrix is often interpreted in terms of a 

1748 multivariate normal precision matrix (the inverse of the covariance 

1749 matrix). These arguments must satisfy the relationship 

1750 ``df > scale.ndim - 1``, but see notes on using the `rvs` method with 

1751 ``df < scale.ndim``. 

1752 

1753 Methods 

1754 ------- 

1755 pdf(x, df, scale) 

1756 Probability density function. 

1757 logpdf(x, df, scale) 

1758 Log of the probability density function. 

1759 rvs(df, scale, size=1, random_state=None) 

1760 Draw random samples from a Wishart distribution. 

1761 entropy() 

1762 Compute the differential entropy of the Wishart distribution. 

1763 

1764 Parameters 

1765 ---------- 

1766 %(_doc_default_callparams)s 

1767 %(_doc_random_state)s 

1768 

1769 Raises 

1770 ------ 

1771 scipy.linalg.LinAlgError 

1772 If the scale matrix `scale` is not positive definite. 

1773 

1774 See Also 

1775 -------- 

1776 invwishart, chi2 

1777 

1778 Notes 

1779 ----- 

1780 %(_doc_callparams_note)s 

1781 

1782 The scale matrix `scale` must be a symmetric positive definite 

1783 matrix. Singular matrices, including the symmetric positive semi-definite 

1784 case, are not supported. Symmetry is not checked; only the lower triangular 

1785 portion is used. 

1786 

1787 The Wishart distribution is often denoted 

1788 

1789 .. math:: 

1790 

1791 W_p(\nu, \Sigma) 

1792 

1793 where :math:`\nu` is the degrees of freedom and :math:`\Sigma` is the 

1794 :math:`p \times p` scale matrix. 

1795 

1796 The probability density function for `wishart` has support over positive 

1797 definite matrices :math:`S`; if :math:`S \sim W_p(\nu, \Sigma)`, then 

1798 its PDF is given by: 

1799 

1800 .. math:: 

1801 

1802 f(S) = \frac{|S|^{\frac{\nu - p - 1}{2}}}{2^{ \frac{\nu p}{2} } 

1803 |\Sigma|^\frac{\nu}{2} \Gamma_p \left ( \frac{\nu}{2} \right )} 

1804 \exp\left( -tr(\Sigma^{-1} S) / 2 \right) 

1805 

1806 If :math:`S \sim W_p(\nu, \Sigma)` (Wishart) then 

1807 :math:`S^{-1} \sim W_p^{-1}(\nu, \Sigma^{-1})` (inverse Wishart). 

1808 

1809 If the scale matrix is 1-dimensional and equal to one, then the Wishart 

1810 distribution :math:`W_1(\nu, 1)` collapses to the :math:`\chi^2(\nu)` 

1811 distribution. 

1812 

1813 The algorithm [2]_ implemented by the `rvs` method may 

1814 produce numerically singular matrices with :math:`p - 1 < \nu < p`; the 

1815 user may wish to check for this condition and generate replacement samples 

1816 as necessary. 

1817 

1818 

1819 .. versionadded:: 0.16.0 

1820 

1821 References 

1822 ---------- 

1823 .. [1] M.L. Eaton, "Multivariate Statistics: A Vector Space Approach", 

1824 Wiley, 1983. 

1825 .. [2] W.B. Smith and R.R. Hocking, "Algorithm AS 53: Wishart Variate 

1826 Generator", Applied Statistics, vol. 21, pp. 341-345, 1972. 

1827 

1828 Examples 

1829 -------- 

1830 >>> import numpy as np 

1831 >>> import matplotlib.pyplot as plt 

1832 >>> from scipy.stats import wishart, chi2 

1833 >>> x = np.linspace(1e-5, 8, 100) 

1834 >>> w = wishart.pdf(x, df=3, scale=1); w[:5] 

1835 array([ 0.00126156, 0.10892176, 0.14793434, 0.17400548, 0.1929669 ]) 

1836 >>> c = chi2.pdf(x, 3); c[:5] 

1837 array([ 0.00126156, 0.10892176, 0.14793434, 0.17400548, 0.1929669 ]) 

1838 >>> plt.plot(x, w) 

1839 >>> plt.show() 

1840 

1841 The input quantiles can be any shape of array, as long as the last 

1842 axis labels the components. 

1843 

1844 Alternatively, the object may be called (as a function) to fix the degrees 

1845 of freedom and scale parameters, returning a "frozen" Wishart random 

1846 variable: 

1847 

1848 >>> rv = wishart(df=1, scale=1) 

1849 >>> # Frozen object with the same methods but holding the given 

1850 >>> # degrees of freedom and scale fixed. 

1851 

1852 """ 

1853 

1854 def __init__(self, seed=None): 

1855 super().__init__(seed) 

1856 self.__doc__ = doccer.docformat(self.__doc__, wishart_docdict_params) 

1857 

1858 def __call__(self, df=None, scale=None, seed=None): 

1859 """Create a frozen Wishart distribution. 

1860 

1861 See `wishart_frozen` for more information. 

1862 """ 

1863 return wishart_frozen(df, scale, seed) 

1864 

1865 def _process_parameters(self, df, scale): 

1866 if scale is None: 

1867 scale = 1.0 

1868 scale = np.asarray(scale, dtype=float) 

1869 

1870 if scale.ndim == 0: 

1871 scale = scale[np.newaxis, np.newaxis] 

1872 elif scale.ndim == 1: 

1873 scale = np.diag(scale) 

1874 elif scale.ndim == 2 and not scale.shape[0] == scale.shape[1]: 

1875 raise ValueError("Array 'scale' must be square if it is two" 

1876 " dimensional, but scale.scale = %s." 

1877 % str(scale.shape)) 

1878 elif scale.ndim > 2: 

1879 raise ValueError("Array 'scale' must be at most two-dimensional," 

1880 " but scale.ndim = %d" % scale.ndim) 

1881 

1882 dim = scale.shape[0] 

1883 

1884 if df is None: 

1885 df = dim 

1886 elif not np.isscalar(df): 

1887 raise ValueError("Degrees of freedom must be a scalar.") 

1888 elif df <= dim - 1: 

1889 raise ValueError("Degrees of freedom must be greater than the " 

1890 "dimension of scale matrix minus 1.") 

1891 

1892 return dim, df, scale 

1893 

1894 def _process_quantiles(self, x, dim): 

1895 """ 

1896 Adjust quantiles array so that last axis labels the components of 

1897 each data point. 

1898 """ 

1899 x = np.asarray(x, dtype=float) 

1900 

1901 if x.ndim == 0: 

1902 x = x * np.eye(dim)[:, :, np.newaxis] 

1903 if x.ndim == 1: 

1904 if dim == 1: 

1905 x = x[np.newaxis, np.newaxis, :] 

1906 else: 

1907 x = np.diag(x)[:, :, np.newaxis] 

1908 elif x.ndim == 2: 

1909 if not x.shape[0] == x.shape[1]: 

1910 raise ValueError("Quantiles must be square if they are two" 

1911 " dimensional, but x.shape = %s." 

1912 % str(x.shape)) 

1913 x = x[:, :, np.newaxis] 

1914 elif x.ndim == 3: 

1915 if not x.shape[0] == x.shape[1]: 

1916 raise ValueError("Quantiles must be square in the first two" 

1917 " dimensions if they are three dimensional" 

1918 ", but x.shape = %s." % str(x.shape)) 

1919 elif x.ndim > 3: 

1920 raise ValueError("Quantiles must be at most two-dimensional with" 

1921 " an additional dimension for multiple" 

1922 "components, but x.ndim = %d" % x.ndim) 

1923 

1924 # Now we have 3-dim array; should have shape [dim, dim, *] 

1925 if not x.shape[0:2] == (dim, dim): 

1926 raise ValueError('Quantiles have incompatible dimensions: should' 

1927 ' be %s, got %s.' % ((dim, dim), x.shape[0:2])) 

1928 

1929 return x 

1930 

1931 def _process_size(self, size): 

1932 size = np.asarray(size) 

1933 

1934 if size.ndim == 0: 

1935 size = size[np.newaxis] 

1936 elif size.ndim > 1: 

1937 raise ValueError('Size must be an integer or tuple of integers;' 

1938 ' thus must have dimension <= 1.' 

1939 ' Got size.ndim = %s' % str(tuple(size))) 

1940 n = size.prod() 

1941 shape = tuple(size) 

1942 

1943 return n, shape 

1944 

1945 def _logpdf(self, x, dim, df, scale, log_det_scale, C): 

1946 """Log of the Wishart probability density function. 

1947 

1948 Parameters 

1949 ---------- 

1950 x : ndarray 

1951 Points at which to evaluate the log of the probability 

1952 density function 

1953 dim : int 

1954 Dimension of the scale matrix 

1955 df : int 

1956 Degrees of freedom 

1957 scale : ndarray 

1958 Scale matrix 

1959 log_det_scale : float 

1960 Logarithm of the determinant of the scale matrix 

1961 C : ndarray 

1962 Cholesky factorization of the scale matrix, lower triagular. 

1963 

1964 Notes 

1965 ----- 

1966 As this function does no argument checking, it should not be 

1967 called directly; use 'logpdf' instead. 

1968 

1969 """ 

1970 # log determinant of x 

1971 # Note: x has components along the last axis, so that x.T has 

1972 # components alone the 0-th axis. Then since det(A) = det(A'), this 

1973 # gives us a 1-dim vector of determinants 

1974 

1975 # Retrieve tr(scale^{-1} x) 

1976 log_det_x = np.empty(x.shape[-1]) 

1977 scale_inv_x = np.empty(x.shape) 

1978 tr_scale_inv_x = np.empty(x.shape[-1]) 

1979 for i in range(x.shape[-1]): 

1980 _, log_det_x[i] = self._cholesky_logdet(x[:, :, i]) 

1981 scale_inv_x[:, :, i] = scipy.linalg.cho_solve((C, True), x[:, :, i]) 

1982 tr_scale_inv_x[i] = scale_inv_x[:, :, i].trace() 

1983 

1984 # Log PDF 

1985 out = ((0.5 * (df - dim - 1) * log_det_x - 0.5 * tr_scale_inv_x) - 

1986 (0.5 * df * dim * _LOG_2 + 0.5 * df * log_det_scale + 

1987 multigammaln(0.5*df, dim))) 

1988 

1989 return out 

1990 

1991 def logpdf(self, x, df, scale): 

1992 """Log of the Wishart probability density function. 

1993 

1994 Parameters 

1995 ---------- 

1996 x : array_like 

1997 Quantiles, with the last axis of `x` denoting the components. 

1998 Each quantile must be a symmetric positive definite matrix. 

1999 %(_doc_default_callparams)s 

2000 

2001 Returns 

2002 ------- 

2003 pdf : ndarray 

2004 Log of the probability density function evaluated at `x` 

2005 

2006 Notes 

2007 ----- 

2008 %(_doc_callparams_note)s 

2009 

2010 """ 

2011 dim, df, scale = self._process_parameters(df, scale) 

2012 x = self._process_quantiles(x, dim) 

2013 

2014 # Cholesky decomposition of scale, get log(det(scale)) 

2015 C, log_det_scale = self._cholesky_logdet(scale) 

2016 

2017 out = self._logpdf(x, dim, df, scale, log_det_scale, C) 

2018 return _squeeze_output(out) 

2019 

2020 def pdf(self, x, df, scale): 

2021 """Wishart probability density function. 

2022 

2023 Parameters 

2024 ---------- 

2025 x : array_like 

2026 Quantiles, with the last axis of `x` denoting the components. 

2027 Each quantile must be a symmetric positive definite matrix. 

2028 %(_doc_default_callparams)s 

2029 

2030 Returns 

2031 ------- 

2032 pdf : ndarray 

2033 Probability density function evaluated at `x` 

2034 

2035 Notes 

2036 ----- 

2037 %(_doc_callparams_note)s 

2038 

2039 """ 

2040 return np.exp(self.logpdf(x, df, scale)) 

2041 

2042 def _mean(self, dim, df, scale): 

2043 """Mean of the Wishart distribution. 

2044 

2045 Parameters 

2046 ---------- 

2047 dim : int 

2048 Dimension of the scale matrix 

2049 %(_doc_default_callparams)s 

2050 

2051 Notes 

2052 ----- 

2053 As this function does no argument checking, it should not be 

2054 called directly; use 'mean' instead. 

2055 

2056 """ 

2057 return df * scale 

2058 

2059 def mean(self, df, scale): 

2060 """Mean of the Wishart distribution. 

2061 

2062 Parameters 

2063 ---------- 

2064 %(_doc_default_callparams)s 

2065 

2066 Returns 

2067 ------- 

2068 mean : float 

2069 The mean of the distribution 

2070 """ 

2071 dim, df, scale = self._process_parameters(df, scale) 

2072 out = self._mean(dim, df, scale) 

2073 return _squeeze_output(out) 

2074 

2075 def _mode(self, dim, df, scale): 

2076 """Mode of the Wishart distribution. 

2077 

2078 Parameters 

2079 ---------- 

2080 dim : int 

2081 Dimension of the scale matrix 

2082 %(_doc_default_callparams)s 

2083 

2084 Notes 

2085 ----- 

2086 As this function does no argument checking, it should not be 

2087 called directly; use 'mode' instead. 

2088 

2089 """ 

2090 if df >= dim + 1: 

2091 out = (df-dim-1) * scale 

2092 else: 

2093 out = None 

2094 return out 

2095 

2096 def mode(self, df, scale): 

2097 """Mode of the Wishart distribution 

2098 

2099 Only valid if the degrees of freedom are greater than the dimension of 

2100 the scale matrix. 

2101 

2102 Parameters 

2103 ---------- 

2104 %(_doc_default_callparams)s 

2105 

2106 Returns 

2107 ------- 

2108 mode : float or None 

2109 The Mode of the distribution 

2110 """ 

2111 dim, df, scale = self._process_parameters(df, scale) 

2112 out = self._mode(dim, df, scale) 

2113 return _squeeze_output(out) if out is not None else out 

2114 

2115 def _var(self, dim, df, scale): 

2116 """Variance of the Wishart distribution. 

2117 

2118 Parameters 

2119 ---------- 

2120 dim : int 

2121 Dimension of the scale matrix 

2122 %(_doc_default_callparams)s 

2123 

2124 Notes 

2125 ----- 

2126 As this function does no argument checking, it should not be 

2127 called directly; use 'var' instead. 

2128 

2129 """ 

2130 var = scale**2 

2131 diag = scale.diagonal() # 1 x dim array 

2132 var += np.outer(diag, diag) 

2133 var *= df 

2134 return var 

2135 

2136 def var(self, df, scale): 

2137 """Variance of the Wishart distribution. 

2138 

2139 Parameters 

2140 ---------- 

2141 %(_doc_default_callparams)s 

2142 

2143 Returns 

2144 ------- 

2145 var : float 

2146 The variance of the distribution 

2147 """ 

2148 dim, df, scale = self._process_parameters(df, scale) 

2149 out = self._var(dim, df, scale) 

2150 return _squeeze_output(out) 

2151 

2152 def _standard_rvs(self, n, shape, dim, df, random_state): 

2153 """ 

2154 Parameters 

2155 ---------- 

2156 n : integer 

2157 Number of variates to generate 

2158 shape : iterable 

2159 Shape of the variates to generate 

2160 dim : int 

2161 Dimension of the scale matrix 

2162 df : int 

2163 Degrees of freedom 

2164 random_state : {None, int, `numpy.random.Generator`, 

2165 `numpy.random.RandomState`}, optional 

2166 

2167 If `seed` is None (or `np.random`), the `numpy.random.RandomState` 

2168 singleton is used. 

2169 If `seed` is an int, a new ``RandomState`` instance is used, 

2170 seeded with `seed`. 

2171 If `seed` is already a ``Generator`` or ``RandomState`` instance 

2172 then that instance is used. 

2173 

2174 Notes 

2175 ----- 

2176 As this function does no argument checking, it should not be 

2177 called directly; use 'rvs' instead. 

2178 

2179 """ 

2180 # Random normal variates for off-diagonal elements 

2181 n_tril = dim * (dim-1) // 2 

2182 covariances = random_state.normal( 

2183 size=n*n_tril).reshape(shape+(n_tril,)) 

2184 

2185 # Random chi-square variates for diagonal elements 

2186 variances = (np.r_[[random_state.chisquare(df-(i+1)+1, size=n)**0.5 

2187 for i in range(dim)]].reshape((dim,) + 

2188 shape[::-1]).T) 

2189 

2190 # Create the A matri(ces) - lower triangular 

2191 A = np.zeros(shape + (dim, dim)) 

2192 

2193 # Input the covariances 

2194 size_idx = tuple([slice(None, None, None)]*len(shape)) 

2195 tril_idx = np.tril_indices(dim, k=-1) 

2196 A[size_idx + tril_idx] = covariances 

2197 

2198 # Input the variances 

2199 diag_idx = np.diag_indices(dim) 

2200 A[size_idx + diag_idx] = variances 

2201 

2202 return A 

2203 

2204 def _rvs(self, n, shape, dim, df, C, random_state): 

2205 """Draw random samples from a Wishart distribution. 

2206 

2207 Parameters 

2208 ---------- 

2209 n : integer 

2210 Number of variates to generate 

2211 shape : iterable 

2212 Shape of the variates to generate 

2213 dim : int 

2214 Dimension of the scale matrix 

2215 df : int 

2216 Degrees of freedom 

2217 C : ndarray 

2218 Cholesky factorization of the scale matrix, lower triangular. 

2219 %(_doc_random_state)s 

2220 

2221 Notes 

2222 ----- 

2223 As this function does no argument checking, it should not be 

2224 called directly; use 'rvs' instead. 

2225 

2226 """ 

2227 random_state = self._get_random_state(random_state) 

2228 # Calculate the matrices A, which are actually lower triangular 

2229 # Cholesky factorizations of a matrix B such that B ~ W(df, I) 

2230 A = self._standard_rvs(n, shape, dim, df, random_state) 

2231 

2232 # Calculate SA = C A A' C', where SA ~ W(df, scale) 

2233 # Note: this is the product of a (lower) (lower) (lower)' (lower)' 

2234 # or, denoting B = AA', it is C B C' where C is the lower 

2235 # triangular Cholesky factorization of the scale matrix. 

2236 # this appears to conflict with the instructions in [1]_, which 

2237 # suggest that it should be D' B D where D is the lower 

2238 # triangular factorization of the scale matrix. However, it is 

2239 # meant to refer to the Bartlett (1933) representation of a 

2240 # Wishart random variate as L A A' L' where L is lower triangular 

2241 # so it appears that understanding D' to be upper triangular 

2242 # is either a typo in or misreading of [1]_. 

2243 for index in np.ndindex(shape): 

2244 CA = np.dot(C, A[index]) 

2245 A[index] = np.dot(CA, CA.T) 

2246 

2247 return A 

2248 

2249 def rvs(self, df, scale, size=1, random_state=None): 

2250 """Draw random samples from a Wishart distribution. 

2251 

2252 Parameters 

2253 ---------- 

2254 %(_doc_default_callparams)s 

2255 size : integer or iterable of integers, optional 

2256 Number of samples to draw (default 1). 

2257 %(_doc_random_state)s 

2258 

2259 Returns 

2260 ------- 

2261 rvs : ndarray 

2262 Random variates of shape (`size`) + (`dim`, `dim), where `dim` is 

2263 the dimension of the scale matrix. 

2264 

2265 Notes 

2266 ----- 

2267 %(_doc_callparams_note)s 

2268 

2269 """ 

2270 n, shape = self._process_size(size) 

2271 dim, df, scale = self._process_parameters(df, scale) 

2272 

2273 # Cholesky decomposition of scale 

2274 C = scipy.linalg.cholesky(scale, lower=True) 

2275 

2276 out = self._rvs(n, shape, dim, df, C, random_state) 

2277 

2278 return _squeeze_output(out) 

2279 

2280 def _entropy(self, dim, df, log_det_scale): 

2281 """Compute the differential entropy of the Wishart. 

2282 

2283 Parameters 

2284 ---------- 

2285 dim : int 

2286 Dimension of the scale matrix 

2287 df : int 

2288 Degrees of freedom 

2289 log_det_scale : float 

2290 Logarithm of the determinant of the scale matrix 

2291 

2292 Notes 

2293 ----- 

2294 As this function does no argument checking, it should not be 

2295 called directly; use 'entropy' instead. 

2296 

2297 """ 

2298 return ( 

2299 0.5 * (dim+1) * log_det_scale + 

2300 0.5 * dim * (dim+1) * _LOG_2 + 

2301 multigammaln(0.5*df, dim) - 

2302 0.5 * (df - dim - 1) * np.sum( 

2303 [psi(0.5*(df + 1 - (i+1))) for i in range(dim)] 

2304 ) + 

2305 0.5 * df * dim 

2306 ) 

2307 

2308 def entropy(self, df, scale): 

2309 """Compute the differential entropy of the Wishart. 

2310 

2311 Parameters 

2312 ---------- 

2313 %(_doc_default_callparams)s 

2314 

2315 Returns 

2316 ------- 

2317 h : scalar 

2318 Entropy of the Wishart distribution 

2319 

2320 Notes 

2321 ----- 

2322 %(_doc_callparams_note)s 

2323 

2324 """ 

2325 dim, df, scale = self._process_parameters(df, scale) 

2326 _, log_det_scale = self._cholesky_logdet(scale) 

2327 return self._entropy(dim, df, log_det_scale) 

2328 

2329 def _cholesky_logdet(self, scale): 

2330 """Compute Cholesky decomposition and determine (log(det(scale)). 

2331 

2332 Parameters 

2333 ---------- 

2334 scale : ndarray 

2335 Scale matrix. 

2336 

2337 Returns 

2338 ------- 

2339 c_decomp : ndarray 

2340 The Cholesky decomposition of `scale`. 

2341 logdet : scalar 

2342 The log of the determinant of `scale`. 

2343 

2344 Notes 

2345 ----- 

2346 This computation of ``logdet`` is equivalent to 

2347 ``np.linalg.slogdet(scale)``. It is ~2x faster though. 

2348 

2349 """ 

2350 c_decomp = scipy.linalg.cholesky(scale, lower=True) 

2351 logdet = 2 * np.sum(np.log(c_decomp.diagonal())) 

2352 return c_decomp, logdet 

2353 

2354 

2355wishart = wishart_gen() 

2356 

2357 

2358class wishart_frozen(multi_rv_frozen): 

2359 """Create a frozen Wishart distribution. 

2360 

2361 Parameters 

2362 ---------- 

2363 df : array_like 

2364 Degrees of freedom of the distribution 

2365 scale : array_like 

2366 Scale matrix of the distribution 

2367 seed : {None, int, `numpy.random.Generator`, `numpy.random.RandomState`}, optional 

2368 If `seed` is None (or `np.random`), the `numpy.random.RandomState` 

2369 singleton is used. 

2370 If `seed` is an int, a new ``RandomState`` instance is used, 

2371 seeded with `seed`. 

2372 If `seed` is already a ``Generator`` or ``RandomState`` instance then 

2373 that instance is used. 

2374 

2375 """ 

2376 def __init__(self, df, scale, seed=None): 

2377 self._dist = wishart_gen(seed) 

2378 self.dim, self.df, self.scale = self._dist._process_parameters( 

2379 df, scale) 

2380 self.C, self.log_det_scale = self._dist._cholesky_logdet(self.scale) 

2381 

2382 def logpdf(self, x): 

2383 x = self._dist._process_quantiles(x, self.dim) 

2384 

2385 out = self._dist._logpdf(x, self.dim, self.df, self.scale, 

2386 self.log_det_scale, self.C) 

2387 return _squeeze_output(out) 

2388 

2389 def pdf(self, x): 

2390 return np.exp(self.logpdf(x)) 

2391 

2392 def mean(self): 

2393 out = self._dist._mean(self.dim, self.df, self.scale) 

2394 return _squeeze_output(out) 

2395 

2396 def mode(self): 

2397 out = self._dist._mode(self.dim, self.df, self.scale) 

2398 return _squeeze_output(out) if out is not None else out 

2399 

2400 def var(self): 

2401 out = self._dist._var(self.dim, self.df, self.scale) 

2402 return _squeeze_output(out) 

2403 

2404 def rvs(self, size=1, random_state=None): 

2405 n, shape = self._dist._process_size(size) 

2406 out = self._dist._rvs(n, shape, self.dim, self.df, 

2407 self.C, random_state) 

2408 return _squeeze_output(out) 

2409 

2410 def entropy(self): 

2411 return self._dist._entropy(self.dim, self.df, self.log_det_scale) 

2412 

2413 

2414# Set frozen generator docstrings from corresponding docstrings in 

2415# Wishart and fill in default strings in class docstrings 

2416for name in ['logpdf', 'pdf', 'mean', 'mode', 'var', 'rvs', 'entropy']: 

2417 method = wishart_gen.__dict__[name] 

2418 method_frozen = wishart_frozen.__dict__[name] 

2419 method_frozen.__doc__ = doccer.docformat( 

2420 method.__doc__, wishart_docdict_noparams) 

2421 method.__doc__ = doccer.docformat(method.__doc__, wishart_docdict_params) 

2422 

2423 

2424def _cho_inv_batch(a, check_finite=True): 

2425 """ 

2426 Invert the matrices a_i, using a Cholesky factorization of A, where 

2427 a_i resides in the last two dimensions of a and the other indices describe 

2428 the index i. 

2429 

2430 Overwrites the data in a. 

2431 

2432 Parameters 

2433 ---------- 

2434 a : array 

2435 Array of matrices to invert, where the matrices themselves are stored 

2436 in the last two dimensions. 

2437 check_finite : bool, optional 

2438 Whether to check that the input matrices contain only finite numbers. 

2439 Disabling may give a performance gain, but may result in problems 

2440 (crashes, non-termination) if the inputs do contain infinities or NaNs. 

2441 

2442 Returns 

2443 ------- 

2444 x : array 

2445 Array of inverses of the matrices ``a_i``. 

2446 

2447 See Also 

2448 -------- 

2449 scipy.linalg.cholesky : Cholesky factorization of a matrix 

2450 

2451 """ 

2452 if check_finite: 

2453 a1 = asarray_chkfinite(a) 

2454 else: 

2455 a1 = asarray(a) 

2456 if len(a1.shape) < 2 or a1.shape[-2] != a1.shape[-1]: 

2457 raise ValueError('expected square matrix in last two dimensions') 

2458 

2459 potrf, potri = get_lapack_funcs(('potrf', 'potri'), (a1,)) 

2460 

2461 triu_rows, triu_cols = np.triu_indices(a.shape[-2], k=1) 

2462 for index in np.ndindex(a1.shape[:-2]): 

2463 

2464 # Cholesky decomposition 

2465 a1[index], info = potrf(a1[index], lower=True, overwrite_a=False, 

2466 clean=False) 

2467 if info > 0: 

2468 raise LinAlgError("%d-th leading minor not positive definite" 

2469 % info) 

2470 if info < 0: 

2471 raise ValueError('illegal value in %d-th argument of internal' 

2472 ' potrf' % -info) 

2473 # Inversion 

2474 a1[index], info = potri(a1[index], lower=True, overwrite_c=False) 

2475 if info > 0: 

2476 raise LinAlgError("the inverse could not be computed") 

2477 if info < 0: 

2478 raise ValueError('illegal value in %d-th argument of internal' 

2479 ' potrf' % -info) 

2480 

2481 # Make symmetric (dpotri only fills in the lower triangle) 

2482 a1[index][triu_rows, triu_cols] = a1[index][triu_cols, triu_rows] 

2483 

2484 return a1 

2485 

2486 

2487class invwishart_gen(wishart_gen): 

2488 r"""An inverse Wishart random variable. 

2489 

2490 The `df` keyword specifies the degrees of freedom. The `scale` keyword 

2491 specifies the scale matrix, which must be symmetric and positive definite. 

2492 In this context, the scale matrix is often interpreted in terms of a 

2493 multivariate normal covariance matrix. 

2494 

2495 Methods 

2496 ------- 

2497 pdf(x, df, scale) 

2498 Probability density function. 

2499 logpdf(x, df, scale) 

2500 Log of the probability density function. 

2501 rvs(df, scale, size=1, random_state=None) 

2502 Draw random samples from an inverse Wishart distribution. 

2503 

2504 Parameters 

2505 ---------- 

2506 %(_doc_default_callparams)s 

2507 %(_doc_random_state)s 

2508 

2509 Raises 

2510 ------ 

2511 scipy.linalg.LinAlgError 

2512 If the scale matrix `scale` is not positive definite. 

2513 

2514 See Also 

2515 -------- 

2516 wishart 

2517 

2518 Notes 

2519 ----- 

2520 %(_doc_callparams_note)s 

2521 

2522 The scale matrix `scale` must be a symmetric positive definite 

2523 matrix. Singular matrices, including the symmetric positive semi-definite 

2524 case, are not supported. Symmetry is not checked; only the lower triangular 

2525 portion is used. 

2526 

2527 The inverse Wishart distribution is often denoted 

2528 

2529 .. math:: 

2530 

2531 W_p^{-1}(\nu, \Psi) 

2532 

2533 where :math:`\nu` is the degrees of freedom and :math:`\Psi` is the 

2534 :math:`p \times p` scale matrix. 

2535 

2536 The probability density function for `invwishart` has support over positive 

2537 definite matrices :math:`S`; if :math:`S \sim W^{-1}_p(\nu, \Sigma)`, 

2538 then its PDF is given by: 

2539 

2540 .. math:: 

2541 

2542 f(S) = \frac{|\Sigma|^\frac{\nu}{2}}{2^{ \frac{\nu p}{2} } 

2543 |S|^{\frac{\nu + p + 1}{2}} \Gamma_p \left(\frac{\nu}{2} \right)} 

2544 \exp\left( -tr(\Sigma S^{-1}) / 2 \right) 

2545 

2546 If :math:`S \sim W_p^{-1}(\nu, \Psi)` (inverse Wishart) then 

2547 :math:`S^{-1} \sim W_p(\nu, \Psi^{-1})` (Wishart). 

2548 

2549 If the scale matrix is 1-dimensional and equal to one, then the inverse 

2550 Wishart distribution :math:`W_1(\nu, 1)` collapses to the 

2551 inverse Gamma distribution with parameters shape = :math:`\frac{\nu}{2}` 

2552 and scale = :math:`\frac{1}{2}`. 

2553 

2554 .. versionadded:: 0.16.0 

2555 

2556 References 

2557 ---------- 

2558 .. [1] M.L. Eaton, "Multivariate Statistics: A Vector Space Approach", 

2559 Wiley, 1983. 

2560 .. [2] M.C. Jones, "Generating Inverse Wishart Matrices", Communications 

2561 in Statistics - Simulation and Computation, vol. 14.2, pp.511-514, 

2562 1985. 

2563 

2564 Examples 

2565 -------- 

2566 >>> import numpy as np 

2567 >>> import matplotlib.pyplot as plt 

2568 >>> from scipy.stats import invwishart, invgamma 

2569 >>> x = np.linspace(0.01, 1, 100) 

2570 >>> iw = invwishart.pdf(x, df=6, scale=1) 

2571 >>> iw[:3] 

2572 array([ 1.20546865e-15, 5.42497807e-06, 4.45813929e-03]) 

2573 >>> ig = invgamma.pdf(x, 6/2., scale=1./2) 

2574 >>> ig[:3] 

2575 array([ 1.20546865e-15, 5.42497807e-06, 4.45813929e-03]) 

2576 >>> plt.plot(x, iw) 

2577 >>> plt.show() 

2578 

2579 The input quantiles can be any shape of array, as long as the last 

2580 axis labels the components. 

2581 

2582 Alternatively, the object may be called (as a function) to fix the degrees 

2583 of freedom and scale parameters, returning a "frozen" inverse Wishart 

2584 random variable: 

2585 

2586 >>> rv = invwishart(df=1, scale=1) 

2587 >>> # Frozen object with the same methods but holding the given 

2588 >>> # degrees of freedom and scale fixed. 

2589 

2590 """ 

2591 

2592 def __init__(self, seed=None): 

2593 super().__init__(seed) 

2594 self.__doc__ = doccer.docformat(self.__doc__, wishart_docdict_params) 

2595 

2596 def __call__(self, df=None, scale=None, seed=None): 

2597 """Create a frozen inverse Wishart distribution. 

2598 

2599 See `invwishart_frozen` for more information. 

2600 

2601 """ 

2602 return invwishart_frozen(df, scale, seed) 

2603 

2604 def _logpdf(self, x, dim, df, scale, log_det_scale): 

2605 """Log of the inverse Wishart probability density function. 

2606 

2607 Parameters 

2608 ---------- 

2609 x : ndarray 

2610 Points at which to evaluate the log of the probability 

2611 density function. 

2612 dim : int 

2613 Dimension of the scale matrix 

2614 df : int 

2615 Degrees of freedom 

2616 scale : ndarray 

2617 Scale matrix 

2618 log_det_scale : float 

2619 Logarithm of the determinant of the scale matrix 

2620 

2621 Notes 

2622 ----- 

2623 As this function does no argument checking, it should not be 

2624 called directly; use 'logpdf' instead. 

2625 

2626 """ 

2627 log_det_x = np.empty(x.shape[-1]) 

2628 x_inv = np.copy(x).T 

2629 if dim > 1: 

2630 _cho_inv_batch(x_inv) # works in-place 

2631 else: 

2632 x_inv = 1./x_inv 

2633 tr_scale_x_inv = np.empty(x.shape[-1]) 

2634 

2635 for i in range(x.shape[-1]): 

2636 C, lower = scipy.linalg.cho_factor(x[:, :, i], lower=True) 

2637 

2638 log_det_x[i] = 2 * np.sum(np.log(C.diagonal())) 

2639 

2640 tr_scale_x_inv[i] = np.dot(scale, x_inv[i]).trace() 

2641 

2642 # Log PDF 

2643 out = ((0.5 * df * log_det_scale - 0.5 * tr_scale_x_inv) - 

2644 (0.5 * df * dim * _LOG_2 + 0.5 * (df + dim + 1) * log_det_x) - 

2645 multigammaln(0.5*df, dim)) 

2646 

2647 return out 

2648 

2649 def logpdf(self, x, df, scale): 

2650 """Log of the inverse Wishart probability density function. 

2651 

2652 Parameters 

2653 ---------- 

2654 x : array_like 

2655 Quantiles, with the last axis of `x` denoting the components. 

2656 Each quantile must be a symmetric positive definite matrix. 

2657 %(_doc_default_callparams)s 

2658 

2659 Returns 

2660 ------- 

2661 pdf : ndarray 

2662 Log of the probability density function evaluated at `x` 

2663 

2664 Notes 

2665 ----- 

2666 %(_doc_callparams_note)s 

2667 

2668 """ 

2669 dim, df, scale = self._process_parameters(df, scale) 

2670 x = self._process_quantiles(x, dim) 

2671 _, log_det_scale = self._cholesky_logdet(scale) 

2672 out = self._logpdf(x, dim, df, scale, log_det_scale) 

2673 return _squeeze_output(out) 

2674 

2675 def pdf(self, x, df, scale): 

2676 """Inverse Wishart probability density function. 

2677 

2678 Parameters 

2679 ---------- 

2680 x : array_like 

2681 Quantiles, with the last axis of `x` denoting the components. 

2682 Each quantile must be a symmetric positive definite matrix. 

2683 %(_doc_default_callparams)s 

2684 

2685 Returns 

2686 ------- 

2687 pdf : ndarray 

2688 Probability density function evaluated at `x` 

2689 

2690 Notes 

2691 ----- 

2692 %(_doc_callparams_note)s 

2693 

2694 """ 

2695 return np.exp(self.logpdf(x, df, scale)) 

2696 

2697 def _mean(self, dim, df, scale): 

2698 """Mean of the inverse Wishart distribution. 

2699 

2700 Parameters 

2701 ---------- 

2702 dim : int 

2703 Dimension of the scale matrix 

2704 %(_doc_default_callparams)s 

2705 

2706 Notes 

2707 ----- 

2708 As this function does no argument checking, it should not be 

2709 called directly; use 'mean' instead. 

2710 

2711 """ 

2712 if df > dim + 1: 

2713 out = scale / (df - dim - 1) 

2714 else: 

2715 out = None 

2716 return out 

2717 

2718 def mean(self, df, scale): 

2719 """Mean of the inverse Wishart distribution. 

2720 

2721 Only valid if the degrees of freedom are greater than the dimension of 

2722 the scale matrix plus one. 

2723 

2724 Parameters 

2725 ---------- 

2726 %(_doc_default_callparams)s 

2727 

2728 Returns 

2729 ------- 

2730 mean : float or None 

2731 The mean of the distribution 

2732 

2733 """ 

2734 dim, df, scale = self._process_parameters(df, scale) 

2735 out = self._mean(dim, df, scale) 

2736 return _squeeze_output(out) if out is not None else out 

2737 

2738 def _mode(self, dim, df, scale): 

2739 """Mode of the inverse Wishart distribution. 

2740 

2741 Parameters 

2742 ---------- 

2743 dim : int 

2744 Dimension of the scale matrix 

2745 %(_doc_default_callparams)s 

2746 

2747 Notes 

2748 ----- 

2749 As this function does no argument checking, it should not be 

2750 called directly; use 'mode' instead. 

2751 

2752 """ 

2753 return scale / (df + dim + 1) 

2754 

2755 def mode(self, df, scale): 

2756 """Mode of the inverse Wishart distribution. 

2757 

2758 Parameters 

2759 ---------- 

2760 %(_doc_default_callparams)s 

2761 

2762 Returns 

2763 ------- 

2764 mode : float 

2765 The Mode of the distribution 

2766 

2767 """ 

2768 dim, df, scale = self._process_parameters(df, scale) 

2769 out = self._mode(dim, df, scale) 

2770 return _squeeze_output(out) 

2771 

2772 def _var(self, dim, df, scale): 

2773 """Variance of the inverse Wishart distribution. 

2774 

2775 Parameters 

2776 ---------- 

2777 dim : int 

2778 Dimension of the scale matrix 

2779 %(_doc_default_callparams)s 

2780 

2781 Notes 

2782 ----- 

2783 As this function does no argument checking, it should not be 

2784 called directly; use 'var' instead. 

2785 

2786 """ 

2787 if df > dim + 3: 

2788 var = (df - dim + 1) * scale**2 

2789 diag = scale.diagonal() # 1 x dim array 

2790 var += (df - dim - 1) * np.outer(diag, diag) 

2791 var /= (df - dim) * (df - dim - 1)**2 * (df - dim - 3) 

2792 else: 

2793 var = None 

2794 return var 

2795 

2796 def var(self, df, scale): 

2797 """Variance of the inverse Wishart distribution. 

2798 

2799 Only valid if the degrees of freedom are greater than the dimension of 

2800 the scale matrix plus three. 

2801 

2802 Parameters 

2803 ---------- 

2804 %(_doc_default_callparams)s 

2805 

2806 Returns 

2807 ------- 

2808 var : float 

2809 The variance of the distribution 

2810 """ 

2811 dim, df, scale = self._process_parameters(df, scale) 

2812 out = self._var(dim, df, scale) 

2813 return _squeeze_output(out) if out is not None else out 

2814 

2815 def _rvs(self, n, shape, dim, df, C, random_state): 

2816 """Draw random samples from an inverse Wishart distribution. 

2817 

2818 Parameters 

2819 ---------- 

2820 n : integer 

2821 Number of variates to generate 

2822 shape : iterable 

2823 Shape of the variates to generate 

2824 dim : int 

2825 Dimension of the scale matrix 

2826 df : int 

2827 Degrees of freedom 

2828 C : ndarray 

2829 Cholesky factorization of the scale matrix, lower triagular. 

2830 %(_doc_random_state)s 

2831 

2832 Notes 

2833 ----- 

2834 As this function does no argument checking, it should not be 

2835 called directly; use 'rvs' instead. 

2836 

2837 """ 

2838 random_state = self._get_random_state(random_state) 

2839 # Get random draws A such that A ~ W(df, I) 

2840 A = super()._standard_rvs(n, shape, dim, df, random_state) 

2841 

2842 # Calculate SA = (CA)'^{-1} (CA)^{-1} ~ iW(df, scale) 

2843 eye = np.eye(dim) 

2844 trtrs = get_lapack_funcs(('trtrs'), (A,)) 

2845 

2846 for index in np.ndindex(A.shape[:-2]): 

2847 # Calculate CA 

2848 CA = np.dot(C, A[index]) 

2849 # Get (C A)^{-1} via triangular solver 

2850 if dim > 1: 

2851 CA, info = trtrs(CA, eye, lower=True) 

2852 if info > 0: 

2853 raise LinAlgError("Singular matrix.") 

2854 if info < 0: 

2855 raise ValueError('Illegal value in %d-th argument of' 

2856 ' internal trtrs' % -info) 

2857 else: 

2858 CA = 1. / CA 

2859 # Get SA 

2860 A[index] = np.dot(CA.T, CA) 

2861 

2862 return A 

2863 

2864 def rvs(self, df, scale, size=1, random_state=None): 

2865 """Draw random samples from an inverse Wishart distribution. 

2866 

2867 Parameters 

2868 ---------- 

2869 %(_doc_default_callparams)s 

2870 size : integer or iterable of integers, optional 

2871 Number of samples to draw (default 1). 

2872 %(_doc_random_state)s 

2873 

2874 Returns 

2875 ------- 

2876 rvs : ndarray 

2877 Random variates of shape (`size`) + (`dim`, `dim), where `dim` is 

2878 the dimension of the scale matrix. 

2879 

2880 Notes 

2881 ----- 

2882 %(_doc_callparams_note)s 

2883 

2884 """ 

2885 n, shape = self._process_size(size) 

2886 dim, df, scale = self._process_parameters(df, scale) 

2887 

2888 # Invert the scale 

2889 eye = np.eye(dim) 

2890 L, lower = scipy.linalg.cho_factor(scale, lower=True) 

2891 inv_scale = scipy.linalg.cho_solve((L, lower), eye) 

2892 # Cholesky decomposition of inverted scale 

2893 C = scipy.linalg.cholesky(inv_scale, lower=True) 

2894 

2895 out = self._rvs(n, shape, dim, df, C, random_state) 

2896 

2897 return _squeeze_output(out) 

2898 

2899 def entropy(self): 

2900 # Need to find reference for inverse Wishart entropy 

2901 raise AttributeError 

2902 

2903 

2904invwishart = invwishart_gen() 

2905 

2906 

2907class invwishart_frozen(multi_rv_frozen): 

2908 def __init__(self, df, scale, seed=None): 

2909 """Create a frozen inverse Wishart distribution. 

2910 

2911 Parameters 

2912 ---------- 

2913 df : array_like 

2914 Degrees of freedom of the distribution 

2915 scale : array_like 

2916 Scale matrix of the distribution 

2917 seed : {None, int, `numpy.random.Generator`}, optional 

2918 If `seed` is None the `numpy.random.Generator` singleton is used. 

2919 If `seed` is an int, a new ``Generator`` instance is used, 

2920 seeded with `seed`. 

2921 If `seed` is already a ``Generator`` instance then that instance is 

2922 used. 

2923 

2924 """ 

2925 self._dist = invwishart_gen(seed) 

2926 self.dim, self.df, self.scale = self._dist._process_parameters( 

2927 df, scale 

2928 ) 

2929 

2930 # Get the determinant via Cholesky factorization 

2931 C, lower = scipy.linalg.cho_factor(self.scale, lower=True) 

2932 self.log_det_scale = 2 * np.sum(np.log(C.diagonal())) 

2933 

2934 # Get the inverse using the Cholesky factorization 

2935 eye = np.eye(self.dim) 

2936 self.inv_scale = scipy.linalg.cho_solve((C, lower), eye) 

2937 

2938 # Get the Cholesky factorization of the inverse scale 

2939 self.C = scipy.linalg.cholesky(self.inv_scale, lower=True) 

2940 

2941 def logpdf(self, x): 

2942 x = self._dist._process_quantiles(x, self.dim) 

2943 out = self._dist._logpdf(x, self.dim, self.df, self.scale, 

2944 self.log_det_scale) 

2945 return _squeeze_output(out) 

2946 

2947 def pdf(self, x): 

2948 return np.exp(self.logpdf(x)) 

2949 

2950 def mean(self): 

2951 out = self._dist._mean(self.dim, self.df, self.scale) 

2952 return _squeeze_output(out) if out is not None else out 

2953 

2954 def mode(self): 

2955 out = self._dist._mode(self.dim, self.df, self.scale) 

2956 return _squeeze_output(out) 

2957 

2958 def var(self): 

2959 out = self._dist._var(self.dim, self.df, self.scale) 

2960 return _squeeze_output(out) if out is not None else out 

2961 

2962 def rvs(self, size=1, random_state=None): 

2963 n, shape = self._dist._process_size(size) 

2964 

2965 out = self._dist._rvs(n, shape, self.dim, self.df, 

2966 self.C, random_state) 

2967 

2968 return _squeeze_output(out) 

2969 

2970 def entropy(self): 

2971 # Need to find reference for inverse Wishart entropy 

2972 raise AttributeError 

2973 

2974 

2975# Set frozen generator docstrings from corresponding docstrings in 

2976# inverse Wishart and fill in default strings in class docstrings 

2977for name in ['logpdf', 'pdf', 'mean', 'mode', 'var', 'rvs']: 

2978 method = invwishart_gen.__dict__[name] 

2979 method_frozen = wishart_frozen.__dict__[name] 

2980 method_frozen.__doc__ = doccer.docformat( 

2981 method.__doc__, wishart_docdict_noparams) 

2982 method.__doc__ = doccer.docformat(method.__doc__, wishart_docdict_params) 

2983 

2984_multinomial_doc_default_callparams = """\ 

2985n : int 

2986 Number of trials 

2987p : array_like 

2988 Probability of a trial falling into each category; should sum to 1 

2989""" 

2990 

2991_multinomial_doc_callparams_note = """\ 

2992`n` should be a positive integer. Each element of `p` should be in the 

2993interval :math:`[0,1]` and the elements should sum to 1. If they do not sum to 

29941, the last element of the `p` array is not used and is replaced with the 

2995remaining probability left over from the earlier elements. 

2996""" 

2997 

2998_multinomial_doc_frozen_callparams = "" 

2999 

3000_multinomial_doc_frozen_callparams_note = """\ 

3001See class definition for a detailed description of parameters.""" 

3002 

3003multinomial_docdict_params = { 

3004 '_doc_default_callparams': _multinomial_doc_default_callparams, 

3005 '_doc_callparams_note': _multinomial_doc_callparams_note, 

3006 '_doc_random_state': _doc_random_state 

3007} 

3008 

3009multinomial_docdict_noparams = { 

3010 '_doc_default_callparams': _multinomial_doc_frozen_callparams, 

3011 '_doc_callparams_note': _multinomial_doc_frozen_callparams_note, 

3012 '_doc_random_state': _doc_random_state 

3013} 

3014 

3015 

3016class multinomial_gen(multi_rv_generic): 

3017 r"""A multinomial random variable. 

3018 

3019 Methods 

3020 ------- 

3021 pmf(x, n, p) 

3022 Probability mass function. 

3023 logpmf(x, n, p) 

3024 Log of the probability mass function. 

3025 rvs(n, p, size=1, random_state=None) 

3026 Draw random samples from a multinomial distribution. 

3027 entropy(n, p) 

3028 Compute the entropy of the multinomial distribution. 

3029 cov(n, p) 

3030 Compute the covariance matrix of the multinomial distribution. 

3031 

3032 Parameters 

3033 ---------- 

3034 %(_doc_default_callparams)s 

3035 %(_doc_random_state)s 

3036 

3037 Notes 

3038 ----- 

3039 %(_doc_callparams_note)s 

3040 

3041 The probability mass function for `multinomial` is 

3042 

3043 .. math:: 

3044 

3045 f(x) = \frac{n!}{x_1! \cdots x_k!} p_1^{x_1} \cdots p_k^{x_k}, 

3046 

3047 supported on :math:`x=(x_1, \ldots, x_k)` where each :math:`x_i` is a 

3048 nonnegative integer and their sum is :math:`n`. 

3049 

3050 .. versionadded:: 0.19.0 

3051 

3052 Examples 

3053 -------- 

3054 

3055 >>> from scipy.stats import multinomial 

3056 >>> rv = multinomial(8, [0.3, 0.2, 0.5]) 

3057 >>> rv.pmf([1, 3, 4]) 

3058 0.042000000000000072 

3059 

3060 The multinomial distribution for :math:`k=2` is identical to the 

3061 corresponding binomial distribution (tiny numerical differences 

3062 notwithstanding): 

3063 

3064 >>> from scipy.stats import binom 

3065 >>> multinomial.pmf([3, 4], n=7, p=[0.4, 0.6]) 

3066 0.29030399999999973 

3067 >>> binom.pmf(3, 7, 0.4) 

3068 0.29030400000000012 

3069 

3070 The functions ``pmf``, ``logpmf``, ``entropy``, and ``cov`` support 

3071 broadcasting, under the convention that the vector parameters (``x`` and 

3072 ``p``) are interpreted as if each row along the last axis is a single 

3073 object. For instance: 

3074 

3075 >>> multinomial.pmf([[3, 4], [3, 5]], n=[7, 8], p=[.3, .7]) 

3076 array([0.2268945, 0.25412184]) 

3077 

3078 Here, ``x.shape == (2, 2)``, ``n.shape == (2,)``, and ``p.shape == (2,)``, 

3079 but following the rules mentioned above they behave as if the rows 

3080 ``[3, 4]`` and ``[3, 5]`` in ``x`` and ``[.3, .7]`` in ``p`` were a single 

3081 object, and as if we had ``x.shape = (2,)``, ``n.shape = (2,)``, and 

3082 ``p.shape = ()``. To obtain the individual elements without broadcasting, 

3083 we would do this: 

3084 

3085 >>> multinomial.pmf([3, 4], n=7, p=[.3, .7]) 

3086 0.2268945 

3087 >>> multinomial.pmf([3, 5], 8, p=[.3, .7]) 

3088 0.25412184 

3089 

3090 This broadcasting also works for ``cov``, where the output objects are 

3091 square matrices of size ``p.shape[-1]``. For example: 

3092 

3093 >>> multinomial.cov([4, 5], [[.3, .7], [.4, .6]]) 

3094 array([[[ 0.84, -0.84], 

3095 [-0.84, 0.84]], 

3096 [[ 1.2 , -1.2 ], 

3097 [-1.2 , 1.2 ]]]) 

3098 

3099 In this example, ``n.shape == (2,)`` and ``p.shape == (2, 2)``, and 

3100 following the rules above, these broadcast as if ``p.shape == (2,)``. 

3101 Thus the result should also be of shape ``(2,)``, but since each output is 

3102 a :math:`2 \times 2` matrix, the result in fact has shape ``(2, 2, 2)``, 

3103 where ``result[0]`` is equal to ``multinomial.cov(n=4, p=[.3, .7])`` and 

3104 ``result[1]`` is equal to ``multinomial.cov(n=5, p=[.4, .6])``. 

3105 

3106 Alternatively, the object may be called (as a function) to fix the `n` and 

3107 `p` parameters, returning a "frozen" multinomial random variable: 

3108 

3109 >>> rv = multinomial(n=7, p=[.3, .7]) 

3110 >>> # Frozen object with the same methods but holding the given 

3111 >>> # degrees of freedom and scale fixed. 

3112 

3113 See also 

3114 -------- 

3115 scipy.stats.binom : The binomial distribution. 

3116 numpy.random.Generator.multinomial : Sampling from the multinomial distribution. 

3117 scipy.stats.multivariate_hypergeom : 

3118 The multivariate hypergeometric distribution. 

3119 """ # noqa: E501 

3120 

3121 def __init__(self, seed=None): 

3122 super().__init__(seed) 

3123 self.__doc__ = \ 

3124 doccer.docformat(self.__doc__, multinomial_docdict_params) 

3125 

3126 def __call__(self, n, p, seed=None): 

3127 """Create a frozen multinomial distribution. 

3128 

3129 See `multinomial_frozen` for more information. 

3130 """ 

3131 return multinomial_frozen(n, p, seed) 

3132 

3133 def _process_parameters(self, n, p, eps=1e-15): 

3134 """Returns: n_, p_, npcond. 

3135 

3136 n_ and p_ are arrays of the correct shape; npcond is a boolean array 

3137 flagging values out of the domain. 

3138 """ 

3139 p = np.array(p, dtype=np.float64, copy=True) 

3140 p_adjusted = 1. - p[..., :-1].sum(axis=-1) 

3141 i_adjusted = np.abs(p_adjusted) > eps 

3142 p[i_adjusted, -1] = p_adjusted[i_adjusted] 

3143 

3144 # true for bad p 

3145 pcond = np.any(p < 0, axis=-1) 

3146 pcond |= np.any(p > 1, axis=-1) 

3147 

3148 n = np.array(n, dtype=np.int_, copy=True) 

3149 

3150 # true for bad n 

3151 ncond = n <= 0 

3152 

3153 return n, p, ncond | pcond 

3154 

3155 def _process_quantiles(self, x, n, p): 

3156 """Returns: x_, xcond. 

3157 

3158 x_ is an int array; xcond is a boolean array flagging values out of the 

3159 domain. 

3160 """ 

3161 xx = np.asarray(x, dtype=np.int_) 

3162 

3163 if xx.ndim == 0: 

3164 raise ValueError("x must be an array.") 

3165 

3166 if xx.size != 0 and not xx.shape[-1] == p.shape[-1]: 

3167 raise ValueError("Size of each quantile should be size of p: " 

3168 "received %d, but expected %d." % 

3169 (xx.shape[-1], p.shape[-1])) 

3170 

3171 # true for x out of the domain 

3172 cond = np.any(xx != x, axis=-1) 

3173 cond |= np.any(xx < 0, axis=-1) 

3174 cond = cond | (np.sum(xx, axis=-1) != n) 

3175 

3176 return xx, cond 

3177 

3178 def _checkresult(self, result, cond, bad_value): 

3179 result = np.asarray(result) 

3180 

3181 if cond.ndim != 0: 

3182 result[cond] = bad_value 

3183 elif cond: 

3184 if result.ndim == 0: 

3185 return bad_value 

3186 result[...] = bad_value 

3187 return result 

3188 

3189 def _logpmf(self, x, n, p): 

3190 return gammaln(n+1) + np.sum(xlogy(x, p) - gammaln(x+1), axis=-1) 

3191 

3192 def logpmf(self, x, n, p): 

3193 """Log of the Multinomial probability mass function. 

3194 

3195 Parameters 

3196 ---------- 

3197 x : array_like 

3198 Quantiles, with the last axis of `x` denoting the components. 

3199 %(_doc_default_callparams)s 

3200 

3201 Returns 

3202 ------- 

3203 logpmf : ndarray or scalar 

3204 Log of the probability mass function evaluated at `x` 

3205 

3206 Notes 

3207 ----- 

3208 %(_doc_callparams_note)s 

3209 """ 

3210 n, p, npcond = self._process_parameters(n, p) 

3211 x, xcond = self._process_quantiles(x, n, p) 

3212 

3213 result = self._logpmf(x, n, p) 

3214 

3215 # replace values for which x was out of the domain; broadcast 

3216 # xcond to the right shape 

3217 xcond_ = xcond | np.zeros(npcond.shape, dtype=np.bool_) 

3218 result = self._checkresult(result, xcond_, np.NINF) 

3219 

3220 # replace values bad for n or p; broadcast npcond to the right shape 

3221 npcond_ = npcond | np.zeros(xcond.shape, dtype=np.bool_) 

3222 return self._checkresult(result, npcond_, np.NAN) 

3223 

3224 def pmf(self, x, n, p): 

3225 """Multinomial probability mass function. 

3226 

3227 Parameters 

3228 ---------- 

3229 x : array_like 

3230 Quantiles, with the last axis of `x` denoting the components. 

3231 %(_doc_default_callparams)s 

3232 

3233 Returns 

3234 ------- 

3235 pmf : ndarray or scalar 

3236 Probability density function evaluated at `x` 

3237 

3238 Notes 

3239 ----- 

3240 %(_doc_callparams_note)s 

3241 """ 

3242 return np.exp(self.logpmf(x, n, p)) 

3243 

3244 def mean(self, n, p): 

3245 """Mean of the Multinomial distribution. 

3246 

3247 Parameters 

3248 ---------- 

3249 %(_doc_default_callparams)s 

3250 

3251 Returns 

3252 ------- 

3253 mean : float 

3254 The mean of the distribution 

3255 """ 

3256 n, p, npcond = self._process_parameters(n, p) 

3257 result = n[..., np.newaxis]*p 

3258 return self._checkresult(result, npcond, np.NAN) 

3259 

3260 def cov(self, n, p): 

3261 """Covariance matrix of the multinomial distribution. 

3262 

3263 Parameters 

3264 ---------- 

3265 %(_doc_default_callparams)s 

3266 

3267 Returns 

3268 ------- 

3269 cov : ndarray 

3270 The covariance matrix of the distribution 

3271 """ 

3272 n, p, npcond = self._process_parameters(n, p) 

3273 

3274 nn = n[..., np.newaxis, np.newaxis] 

3275 result = nn * np.einsum('...j,...k->...jk', -p, p) 

3276 

3277 # change the diagonal 

3278 for i in range(p.shape[-1]): 

3279 result[..., i, i] += n*p[..., i] 

3280 

3281 return self._checkresult(result, npcond, np.nan) 

3282 

3283 def entropy(self, n, p): 

3284 r"""Compute the entropy of the multinomial distribution. 

3285 

3286 The entropy is computed using this expression: 

3287 

3288 .. math:: 

3289 

3290 f(x) = - \log n! - n\sum_{i=1}^k p_i \log p_i + 

3291 \sum_{i=1}^k \sum_{x=0}^n \binom n x p_i^x(1-p_i)^{n-x} \log x! 

3292 

3293 Parameters 

3294 ---------- 

3295 %(_doc_default_callparams)s 

3296 

3297 Returns 

3298 ------- 

3299 h : scalar 

3300 Entropy of the Multinomial distribution 

3301 

3302 Notes 

3303 ----- 

3304 %(_doc_callparams_note)s 

3305 """ 

3306 n, p, npcond = self._process_parameters(n, p) 

3307 

3308 x = np.r_[1:np.max(n)+1] 

3309 

3310 term1 = n*np.sum(entr(p), axis=-1) 

3311 term1 -= gammaln(n+1) 

3312 

3313 n = n[..., np.newaxis] 

3314 new_axes_needed = max(p.ndim, n.ndim) - x.ndim + 1 

3315 x.shape += (1,)*new_axes_needed 

3316 

3317 term2 = np.sum(binom.pmf(x, n, p)*gammaln(x+1), 

3318 axis=(-1, -1-new_axes_needed)) 

3319 

3320 return self._checkresult(term1 + term2, npcond, np.nan) 

3321 

3322 def rvs(self, n, p, size=None, random_state=None): 

3323 """Draw random samples from a Multinomial distribution. 

3324 

3325 Parameters 

3326 ---------- 

3327 %(_doc_default_callparams)s 

3328 size : integer or iterable of integers, optional 

3329 Number of samples to draw (default 1). 

3330 %(_doc_random_state)s 

3331 

3332 Returns 

3333 ------- 

3334 rvs : ndarray or scalar 

3335 Random variates of shape (`size`, `len(p)`) 

3336 

3337 Notes 

3338 ----- 

3339 %(_doc_callparams_note)s 

3340 """ 

3341 n, p, npcond = self._process_parameters(n, p) 

3342 random_state = self._get_random_state(random_state) 

3343 return random_state.multinomial(n, p, size) 

3344 

3345 

3346multinomial = multinomial_gen() 

3347 

3348 

3349class multinomial_frozen(multi_rv_frozen): 

3350 r"""Create a frozen Multinomial distribution. 

3351 

3352 Parameters 

3353 ---------- 

3354 n : int 

3355 number of trials 

3356 p: array_like 

3357 probability of a trial falling into each category; should sum to 1 

3358 seed : {None, int, `numpy.random.Generator`, `numpy.random.RandomState`}, optional 

3359 If `seed` is None (or `np.random`), the `numpy.random.RandomState` 

3360 singleton is used. 

3361 If `seed` is an int, a new ``RandomState`` instance is used, 

3362 seeded with `seed`. 

3363 If `seed` is already a ``Generator`` or ``RandomState`` instance then 

3364 that instance is used. 

3365 """ 

3366 def __init__(self, n, p, seed=None): 

3367 self._dist = multinomial_gen(seed) 

3368 self.n, self.p, self.npcond = self._dist._process_parameters(n, p) 

3369 

3370 # monkey patch self._dist 

3371 def _process_parameters(n, p): 

3372 return self.n, self.p, self.npcond 

3373 

3374 self._dist._process_parameters = _process_parameters 

3375 

3376 def logpmf(self, x): 

3377 return self._dist.logpmf(x, self.n, self.p) 

3378 

3379 def pmf(self, x): 

3380 return self._dist.pmf(x, self.n, self.p) 

3381 

3382 def mean(self): 

3383 return self._dist.mean(self.n, self.p) 

3384 

3385 def cov(self): 

3386 return self._dist.cov(self.n, self.p) 

3387 

3388 def entropy(self): 

3389 return self._dist.entropy(self.n, self.p) 

3390 

3391 def rvs(self, size=1, random_state=None): 

3392 return self._dist.rvs(self.n, self.p, size, random_state) 

3393 

3394 

3395# Set frozen generator docstrings from corresponding docstrings in 

3396# multinomial and fill in default strings in class docstrings 

3397for name in ['logpmf', 'pmf', 'mean', 'cov', 'rvs']: 

3398 method = multinomial_gen.__dict__[name] 

3399 method_frozen = multinomial_frozen.__dict__[name] 

3400 method_frozen.__doc__ = doccer.docformat( 

3401 method.__doc__, multinomial_docdict_noparams) 

3402 method.__doc__ = doccer.docformat(method.__doc__, 

3403 multinomial_docdict_params) 

3404 

3405 

3406class special_ortho_group_gen(multi_rv_generic): 

3407 r"""A Special Orthogonal matrix (SO(N)) random variable. 

3408 

3409 Return a random rotation matrix, drawn from the Haar distribution 

3410 (the only uniform distribution on SO(N)) with a determinant of +1. 

3411 

3412 The `dim` keyword specifies the dimension N. 

3413 

3414 Methods 

3415 ------- 

3416 rvs(dim=None, size=1, random_state=None) 

3417 Draw random samples from SO(N). 

3418 

3419 Parameters 

3420 ---------- 

3421 dim : scalar 

3422 Dimension of matrices 

3423 seed : {None, int, np.random.RandomState, np.random.Generator}, optional 

3424 Used for drawing random variates. 

3425 If `seed` is `None`, the `~np.random.RandomState` singleton is used. 

3426 If `seed` is an int, a new ``RandomState`` instance is used, seeded 

3427 with seed. 

3428 If `seed` is already a ``RandomState`` or ``Generator`` instance, 

3429 then that object is used. 

3430 Default is `None`. 

3431 

3432 Notes 

3433 ----- 

3434 This class is wrapping the random_rot code from the MDP Toolkit, 

3435 https://github.com/mdp-toolkit/mdp-toolkit 

3436 

3437 Return a random rotation matrix, drawn from the Haar distribution 

3438 (the only uniform distribution on SO(N)). 

3439 The algorithm is described in the paper 

3440 Stewart, G.W., "The efficient generation of random orthogonal 

3441 matrices with an application to condition estimators", SIAM Journal 

3442 on Numerical Analysis, 17(3), pp. 403-409, 1980. 

3443 For more information see 

3444 https://en.wikipedia.org/wiki/Orthogonal_matrix#Randomization 

3445 

3446 See also the similar `ortho_group`. For a random rotation in three 

3447 dimensions, see `scipy.spatial.transform.Rotation.random`. 

3448 

3449 Examples 

3450 -------- 

3451 >>> import numpy as np 

3452 >>> from scipy.stats import special_ortho_group 

3453 >>> x = special_ortho_group.rvs(3) 

3454 

3455 >>> np.dot(x, x.T) 

3456 array([[ 1.00000000e+00, 1.13231364e-17, -2.86852790e-16], 

3457 [ 1.13231364e-17, 1.00000000e+00, -1.46845020e-16], 

3458 [ -2.86852790e-16, -1.46845020e-16, 1.00000000e+00]]) 

3459 

3460 >>> import scipy.linalg 

3461 >>> scipy.linalg.det(x) 

3462 1.0 

3463 

3464 This generates one random matrix from SO(3). It is orthogonal and 

3465 has a determinant of 1. 

3466 

3467 Alternatively, the object may be called (as a function) to fix the `dim` 

3468 parameter, returning a "frozen" special_ortho_group random variable: 

3469 

3470 >>> rv = special_ortho_group(5) 

3471 >>> # Frozen object with the same methods but holding the 

3472 >>> # dimension parameter fixed. 

3473 

3474 See Also 

3475 -------- 

3476 ortho_group, scipy.spatial.transform.Rotation.random 

3477 

3478 """ 

3479 

3480 def __init__(self, seed=None): 

3481 super().__init__(seed) 

3482 self.__doc__ = doccer.docformat(self.__doc__) 

3483 

3484 def __call__(self, dim=None, seed=None): 

3485 """Create a frozen SO(N) distribution. 

3486 

3487 See `special_ortho_group_frozen` for more information. 

3488 """ 

3489 return special_ortho_group_frozen(dim, seed=seed) 

3490 

3491 def _process_parameters(self, dim): 

3492 """Dimension N must be specified; it cannot be inferred.""" 

3493 if dim is None or not np.isscalar(dim) or dim <= 1 or dim != int(dim): 

3494 raise ValueError("""Dimension of rotation must be specified, 

3495 and must be a scalar greater than 1.""") 

3496 

3497 return dim 

3498 

3499 def rvs(self, dim, size=1, random_state=None): 

3500 """Draw random samples from SO(N). 

3501 

3502 Parameters 

3503 ---------- 

3504 dim : integer 

3505 Dimension of rotation space (N). 

3506 size : integer, optional 

3507 Number of samples to draw (default 1). 

3508 

3509 Returns 

3510 ------- 

3511 rvs : ndarray or scalar 

3512 Random size N-dimensional matrices, dimension (size, dim, dim) 

3513 

3514 """ 

3515 random_state = self._get_random_state(random_state) 

3516 

3517 size = int(size) 

3518 size = (size,) if size > 1 else () 

3519 

3520 dim = self._process_parameters(dim) 

3521 

3522 # H represents a (dim, dim) matrix, while D represents the diagonal of 

3523 # a (dim, dim) diagonal matrix. The algorithm that follows is 

3524 # broadcasted on the leading shape in `size` to vectorize along 

3525 # samples. 

3526 H = np.empty(size + (dim, dim)) 

3527 H[..., :, :] = np.eye(dim) 

3528 D = np.empty(size + (dim,)) 

3529 

3530 for n in range(dim-1): 

3531 

3532 # x is a vector with length dim-n, xrow and xcol are views of it as 

3533 # a row vector and column vector respectively. It's important they 

3534 # are views and not copies because we are going to modify x 

3535 # in-place. 

3536 x = random_state.normal(size=size + (dim-n,)) 

3537 xrow = x[..., None, :] 

3538 xcol = x[..., :, None] 

3539 

3540 # This is the squared norm of x, without vectorization it would be 

3541 # dot(x, x), to have proper broadcasting we use matmul and squeeze 

3542 # out (convert to scalar) the resulting 1x1 matrix 

3543 norm2 = np.matmul(xrow, xcol).squeeze((-2, -1)) 

3544 

3545 x0 = x[..., 0].copy() 

3546 D[..., n] = np.where(x0 != 0, np.sign(x0), 1) 

3547 x[..., 0] += D[..., n]*np.sqrt(norm2) 

3548 

3549 # In renormalizing x we have to append an additional axis with 

3550 # [..., None] to broadcast the scalar against the vector x 

3551 x /= np.sqrt((norm2 - x0**2 + x[..., 0]**2) / 2.)[..., None] 

3552 

3553 # Householder transformation, without vectorization the RHS can be 

3554 # written as outer(H @ x, x) (apart from the slicing) 

3555 H[..., :, n:] -= np.matmul(H[..., :, n:], xcol) * xrow 

3556 

3557 D[..., -1] = (-1)**(dim-1)*D[..., :-1].prod(axis=-1) 

3558 

3559 # Without vectorization this could be written as H = diag(D) @ H, 

3560 # left-multiplication by a diagonal matrix amounts to multiplying each 

3561 # row of H by an element of the diagonal, so we add a dummy axis for 

3562 # the column index 

3563 H *= D[..., :, None] 

3564 return H 

3565 

3566 

3567special_ortho_group = special_ortho_group_gen() 

3568 

3569 

3570class special_ortho_group_frozen(multi_rv_frozen): 

3571 def __init__(self, dim=None, seed=None): 

3572 """Create a frozen SO(N) distribution. 

3573 

3574 Parameters 

3575 ---------- 

3576 dim : scalar 

3577 Dimension of matrices 

3578 seed : {None, int, `numpy.random.Generator`, `numpy.random.RandomState`}, optional 

3579 If `seed` is None (or `np.random`), the `numpy.random.RandomState` 

3580 singleton is used. 

3581 If `seed` is an int, a new ``RandomState`` instance is used, 

3582 seeded with `seed`. 

3583 If `seed` is already a ``Generator`` or ``RandomState`` instance 

3584 then that instance is used. 

3585 

3586 Examples 

3587 -------- 

3588 >>> from scipy.stats import special_ortho_group 

3589 >>> g = special_ortho_group(5) 

3590 >>> x = g.rvs() 

3591 

3592 """ 

3593 self._dist = special_ortho_group_gen(seed) 

3594 self.dim = self._dist._process_parameters(dim) 

3595 

3596 def rvs(self, size=1, random_state=None): 

3597 return self._dist.rvs(self.dim, size, random_state) 

3598 

3599 

3600class ortho_group_gen(multi_rv_generic): 

3601 r"""An Orthogonal matrix (O(N)) random variable. 

3602 

3603 Return a random orthogonal matrix, drawn from the O(N) Haar 

3604 distribution (the only uniform distribution on O(N)). 

3605 

3606 The `dim` keyword specifies the dimension N. 

3607 

3608 Methods 

3609 ------- 

3610 rvs(dim=None, size=1, random_state=None) 

3611 Draw random samples from O(N). 

3612 

3613 Parameters 

3614 ---------- 

3615 dim : scalar 

3616 Dimension of matrices 

3617 seed : {None, int, np.random.RandomState, np.random.Generator}, optional 

3618 Used for drawing random variates. 

3619 If `seed` is `None`, the `~np.random.RandomState` singleton is used. 

3620 If `seed` is an int, a new ``RandomState`` instance is used, seeded 

3621 with seed. 

3622 If `seed` is already a ``RandomState`` or ``Generator`` instance, 

3623 then that object is used. 

3624 Default is `None`. 

3625 

3626 Notes 

3627 ----- 

3628 This class is closely related to `special_ortho_group`. 

3629 

3630 Some care is taken to avoid numerical error, as per the paper by Mezzadri. 

3631 

3632 References 

3633 ---------- 

3634 .. [1] F. Mezzadri, "How to generate random matrices from the classical 

3635 compact groups", :arXiv:`math-ph/0609050v2`. 

3636 

3637 Examples 

3638 -------- 

3639 >>> import numpy as np 

3640 >>> from scipy.stats import ortho_group 

3641 >>> x = ortho_group.rvs(3) 

3642 

3643 >>> np.dot(x, x.T) 

3644 array([[ 1.00000000e+00, 1.13231364e-17, -2.86852790e-16], 

3645 [ 1.13231364e-17, 1.00000000e+00, -1.46845020e-16], 

3646 [ -2.86852790e-16, -1.46845020e-16, 1.00000000e+00]]) 

3647 

3648 >>> import scipy.linalg 

3649 >>> np.fabs(scipy.linalg.det(x)) 

3650 1.0 

3651 

3652 This generates one random matrix from O(3). It is orthogonal and 

3653 has a determinant of +1 or -1. 

3654 

3655 Alternatively, the object may be called (as a function) to fix the `dim` 

3656 parameter, returning a "frozen" ortho_group random variable: 

3657 

3658 >>> rv = ortho_group(5) 

3659 >>> # Frozen object with the same methods but holding the 

3660 >>> # dimension parameter fixed. 

3661 

3662 See Also 

3663 -------- 

3664 special_ortho_group 

3665 """ 

3666 

3667 def __init__(self, seed=None): 

3668 super().__init__(seed) 

3669 self.__doc__ = doccer.docformat(self.__doc__) 

3670 

3671 def __call__(self, dim=None, seed=None): 

3672 """Create a frozen O(N) distribution. 

3673 

3674 See `ortho_group_frozen` for more information. 

3675 """ 

3676 return ortho_group_frozen(dim, seed=seed) 

3677 

3678 def _process_parameters(self, dim): 

3679 """Dimension N must be specified; it cannot be inferred.""" 

3680 if dim is None or not np.isscalar(dim) or dim <= 1 or dim != int(dim): 

3681 raise ValueError("Dimension of rotation must be specified," 

3682 "and must be a scalar greater than 1.") 

3683 

3684 return dim 

3685 

3686 def rvs(self, dim, size=1, random_state=None): 

3687 """Draw random samples from O(N). 

3688 

3689 Parameters 

3690 ---------- 

3691 dim : integer 

3692 Dimension of rotation space (N). 

3693 size : integer, optional 

3694 Number of samples to draw (default 1). 

3695 

3696 Returns 

3697 ------- 

3698 rvs : ndarray or scalar 

3699 Random size N-dimensional matrices, dimension (size, dim, dim) 

3700 

3701 """ 

3702 random_state = self._get_random_state(random_state) 

3703 

3704 size = int(size) 

3705 if size > 1 and NumpyVersion(np.__version__) < '1.22.0': 

3706 return np.array([self.rvs(dim, size=1, random_state=random_state) 

3707 for i in range(size)]) 

3708 

3709 dim = self._process_parameters(dim) 

3710 

3711 size = (size,) if size > 1 else () 

3712 z = random_state.normal(size=size + (dim, dim)) 

3713 q, r = np.linalg.qr(z) 

3714 # The last two dimensions are the rows and columns of R matrices. 

3715 # Extract the diagonals. Note that this eliminates a dimension. 

3716 d = r.diagonal(offset=0, axis1=-2, axis2=-1) 

3717 # Add back a dimension for proper broadcasting: we're dividing 

3718 # each row of each R matrix by the diagonal of the R matrix. 

3719 q *= (d/abs(d))[..., np.newaxis, :] # to broadcast properly 

3720 return q 

3721 

3722 

3723ortho_group = ortho_group_gen() 

3724 

3725 

3726class ortho_group_frozen(multi_rv_frozen): 

3727 def __init__(self, dim=None, seed=None): 

3728 """Create a frozen O(N) distribution. 

3729 

3730 Parameters 

3731 ---------- 

3732 dim : scalar 

3733 Dimension of matrices 

3734 seed : {None, int, `numpy.random.Generator`, `numpy.random.RandomState`}, optional 

3735 If `seed` is None (or `np.random`), the `numpy.random.RandomState` 

3736 singleton is used. 

3737 If `seed` is an int, a new ``RandomState`` instance is used, 

3738 seeded with `seed`. 

3739 If `seed` is already a ``Generator`` or ``RandomState`` instance 

3740 then that instance is used. 

3741 

3742 Examples 

3743 -------- 

3744 >>> from scipy.stats import ortho_group 

3745 >>> g = ortho_group(5) 

3746 >>> x = g.rvs() 

3747 

3748 """ 

3749 self._dist = ortho_group_gen(seed) 

3750 self.dim = self._dist._process_parameters(dim) 

3751 

3752 def rvs(self, size=1, random_state=None): 

3753 return self._dist.rvs(self.dim, size, random_state) 

3754 

3755 

3756class random_correlation_gen(multi_rv_generic): 

3757 r"""A random correlation matrix. 

3758 

3759 Return a random correlation matrix, given a vector of eigenvalues. 

3760 

3761 The `eigs` keyword specifies the eigenvalues of the correlation matrix, 

3762 and implies the dimension. 

3763 

3764 Methods 

3765 ------- 

3766 rvs(eigs=None, random_state=None) 

3767 Draw random correlation matrices, all with eigenvalues eigs. 

3768 

3769 Parameters 

3770 ---------- 

3771 eigs : 1d ndarray 

3772 Eigenvalues of correlation matrix 

3773 seed : {None, int, `numpy.random.Generator`, `numpy.random.RandomState`}, optional 

3774 If `seed` is None (or `np.random`), the `numpy.random.RandomState` 

3775 singleton is used. 

3776 If `seed` is an int, a new ``RandomState`` instance is used, 

3777 seeded with `seed`. 

3778 If `seed` is already a ``Generator`` or ``RandomState`` instance 

3779 then that instance is used. 

3780 tol : float, optional 

3781 Tolerance for input parameter checks 

3782 diag_tol : float, optional 

3783 Tolerance for deviation of the diagonal of the resulting 

3784 matrix. Default: 1e-7 

3785 

3786 Raises 

3787 ------ 

3788 RuntimeError 

3789 Floating point error prevented generating a valid correlation 

3790 matrix. 

3791 

3792 Returns 

3793 ------- 

3794 rvs : ndarray or scalar 

3795 Random size N-dimensional matrices, dimension (size, dim, dim), 

3796 each having eigenvalues eigs. 

3797 

3798 Notes 

3799 ----- 

3800 

3801 Generates a random correlation matrix following a numerically stable 

3802 algorithm spelled out by Davies & Higham. This algorithm uses a single O(N) 

3803 similarity transformation to construct a symmetric positive semi-definite 

3804 matrix, and applies a series of Givens rotations to scale it to have ones 

3805 on the diagonal. 

3806 

3807 References 

3808 ---------- 

3809 

3810 .. [1] Davies, Philip I; Higham, Nicholas J; "Numerically stable generation 

3811 of correlation matrices and their factors", BIT 2000, Vol. 40, 

3812 No. 4, pp. 640 651 

3813 

3814 Examples 

3815 -------- 

3816 >>> import numpy as np 

3817 >>> from scipy.stats import random_correlation 

3818 >>> rng = np.random.default_rng() 

3819 >>> x = random_correlation.rvs((.5, .8, 1.2, 1.5), random_state=rng) 

3820 >>> x 

3821 array([[ 1. , -0.02423399, 0.03130519, 0.4946965 ], 

3822 [-0.02423399, 1. , 0.20334736, 0.04039817], 

3823 [ 0.03130519, 0.20334736, 1. , 0.02694275], 

3824 [ 0.4946965 , 0.04039817, 0.02694275, 1. ]]) 

3825 >>> import scipy.linalg 

3826 >>> e, v = scipy.linalg.eigh(x) 

3827 >>> e 

3828 array([ 0.5, 0.8, 1.2, 1.5]) 

3829 

3830 """ 

3831 

3832 def __init__(self, seed=None): 

3833 super().__init__(seed) 

3834 self.__doc__ = doccer.docformat(self.__doc__) 

3835 

3836 def __call__(self, eigs, seed=None, tol=1e-13, diag_tol=1e-7): 

3837 """Create a frozen random correlation matrix. 

3838 

3839 See `random_correlation_frozen` for more information. 

3840 """ 

3841 return random_correlation_frozen(eigs, seed=seed, tol=tol, 

3842 diag_tol=diag_tol) 

3843 

3844 def _process_parameters(self, eigs, tol): 

3845 eigs = np.asarray(eigs, dtype=float) 

3846 dim = eigs.size 

3847 

3848 if eigs.ndim != 1 or eigs.shape[0] != dim or dim <= 1: 

3849 raise ValueError("Array 'eigs' must be a vector of length " 

3850 "greater than 1.") 

3851 

3852 if np.fabs(np.sum(eigs) - dim) > tol: 

3853 raise ValueError("Sum of eigenvalues must equal dimensionality.") 

3854 

3855 for x in eigs: 

3856 if x < -tol: 

3857 raise ValueError("All eigenvalues must be non-negative.") 

3858 

3859 return dim, eigs 

3860 

3861 def _givens_to_1(self, aii, ajj, aij): 

3862 """Computes a 2x2 Givens matrix to put 1's on the diagonal. 

3863 

3864 The input matrix is a 2x2 symmetric matrix M = [ aii aij ; aij ajj ]. 

3865 

3866 The output matrix g is a 2x2 anti-symmetric matrix of the form 

3867 [ c s ; -s c ]; the elements c and s are returned. 

3868 

3869 Applying the output matrix to the input matrix (as b=g.T M g) 

3870 results in a matrix with bii=1, provided tr(M) - det(M) >= 1 

3871 and floating point issues do not occur. Otherwise, some other 

3872 valid rotation is returned. When tr(M)==2, also bjj=1. 

3873 

3874 """ 

3875 aiid = aii - 1. 

3876 ajjd = ajj - 1. 

3877 

3878 if ajjd == 0: 

3879 # ajj==1, so swap aii and ajj to avoid division by zero 

3880 return 0., 1. 

3881 

3882 dd = math.sqrt(max(aij**2 - aiid*ajjd, 0)) 

3883 

3884 # The choice of t should be chosen to avoid cancellation [1] 

3885 t = (aij + math.copysign(dd, aij)) / ajjd 

3886 c = 1. / math.sqrt(1. + t*t) 

3887 if c == 0: 

3888 # Underflow 

3889 s = 1.0 

3890 else: 

3891 s = c*t 

3892 return c, s 

3893 

3894 def _to_corr(self, m): 

3895 """ 

3896 Given a psd matrix m, rotate to put one's on the diagonal, turning it 

3897 into a correlation matrix. This also requires the trace equal the 

3898 dimensionality. Note: modifies input matrix 

3899 """ 

3900 # Check requirements for in-place Givens 

3901 if not (m.flags.c_contiguous and m.dtype == np.float64 and 

3902 m.shape[0] == m.shape[1]): 

3903 raise ValueError() 

3904 

3905 d = m.shape[0] 

3906 for i in range(d-1): 

3907 if m[i, i] == 1: 

3908 continue 

3909 elif m[i, i] > 1: 

3910 for j in range(i+1, d): 

3911 if m[j, j] < 1: 

3912 break 

3913 else: 

3914 for j in range(i+1, d): 

3915 if m[j, j] > 1: 

3916 break 

3917 

3918 c, s = self._givens_to_1(m[i, i], m[j, j], m[i, j]) 

3919 

3920 # Use BLAS to apply Givens rotations in-place. Equivalent to: 

3921 # g = np.eye(d) 

3922 # g[i, i] = g[j,j] = c 

3923 # g[j, i] = -s; g[i, j] = s 

3924 # m = np.dot(g.T, np.dot(m, g)) 

3925 mv = m.ravel() 

3926 drot(mv, mv, c, -s, n=d, 

3927 offx=i*d, incx=1, offy=j*d, incy=1, 

3928 overwrite_x=True, overwrite_y=True) 

3929 drot(mv, mv, c, -s, n=d, 

3930 offx=i, incx=d, offy=j, incy=d, 

3931 overwrite_x=True, overwrite_y=True) 

3932 

3933 return m 

3934 

3935 def rvs(self, eigs, random_state=None, tol=1e-13, diag_tol=1e-7): 

3936 """Draw random correlation matrices. 

3937 

3938 Parameters 

3939 ---------- 

3940 eigs : 1d ndarray 

3941 Eigenvalues of correlation matrix 

3942 tol : float, optional 

3943 Tolerance for input parameter checks 

3944 diag_tol : float, optional 

3945 Tolerance for deviation of the diagonal of the resulting 

3946 matrix. Default: 1e-7 

3947 

3948 Raises 

3949 ------ 

3950 RuntimeError 

3951 Floating point error prevented generating a valid correlation 

3952 matrix. 

3953 

3954 Returns 

3955 ------- 

3956 rvs : ndarray or scalar 

3957 Random size N-dimensional matrices, dimension (size, dim, dim), 

3958 each having eigenvalues eigs. 

3959 

3960 """ 

3961 dim, eigs = self._process_parameters(eigs, tol=tol) 

3962 

3963 random_state = self._get_random_state(random_state) 

3964 

3965 m = ortho_group.rvs(dim, random_state=random_state) 

3966 m = np.dot(np.dot(m, np.diag(eigs)), m.T) # Set the trace of m 

3967 m = self._to_corr(m) # Carefully rotate to unit diagonal 

3968 

3969 # Check diagonal 

3970 if abs(m.diagonal() - 1).max() > diag_tol: 

3971 raise RuntimeError("Failed to generate a valid correlation matrix") 

3972 

3973 return m 

3974 

3975 

3976random_correlation = random_correlation_gen() 

3977 

3978 

3979class random_correlation_frozen(multi_rv_frozen): 

3980 def __init__(self, eigs, seed=None, tol=1e-13, diag_tol=1e-7): 

3981 """Create a frozen random correlation matrix distribution. 

3982 

3983 Parameters 

3984 ---------- 

3985 eigs : 1d ndarray 

3986 Eigenvalues of correlation matrix 

3987 seed : {None, int, `numpy.random.Generator`, `numpy.random.RandomState`}, optional 

3988 If `seed` is None (or `np.random`), the `numpy.random.RandomState` 

3989 singleton is used. 

3990 If `seed` is an int, a new ``RandomState`` instance is used, 

3991 seeded with `seed`. 

3992 If `seed` is already a ``Generator`` or ``RandomState`` instance 

3993 then that instance is used. 

3994 tol : float, optional 

3995 Tolerance for input parameter checks 

3996 diag_tol : float, optional 

3997 Tolerance for deviation of the diagonal of the resulting 

3998 matrix. Default: 1e-7 

3999 

4000 Raises 

4001 ------ 

4002 RuntimeError 

4003 Floating point error prevented generating a valid correlation 

4004 matrix. 

4005 

4006 Returns 

4007 ------- 

4008 rvs : ndarray or scalar 

4009 Random size N-dimensional matrices, dimension (size, dim, dim), 

4010 each having eigenvalues eigs. 

4011 """ 

4012 

4013 self._dist = random_correlation_gen(seed) 

4014 self.tol = tol 

4015 self.diag_tol = diag_tol 

4016 _, self.eigs = self._dist._process_parameters(eigs, tol=self.tol) 

4017 

4018 def rvs(self, random_state=None): 

4019 return self._dist.rvs(self.eigs, random_state=random_state, 

4020 tol=self.tol, diag_tol=self.diag_tol) 

4021 

4022 

4023class unitary_group_gen(multi_rv_generic): 

4024 r"""A matrix-valued U(N) random variable. 

4025 

4026 Return a random unitary matrix. 

4027 

4028 The `dim` keyword specifies the dimension N. 

4029 

4030 Methods 

4031 ------- 

4032 rvs(dim=None, size=1, random_state=None) 

4033 Draw random samples from U(N). 

4034 

4035 Parameters 

4036 ---------- 

4037 dim : scalar 

4038 Dimension of matrices 

4039 seed : {None, int, np.random.RandomState, np.random.Generator}, optional 

4040 Used for drawing random variates. 

4041 If `seed` is `None`, the `~np.random.RandomState` singleton is used. 

4042 If `seed` is an int, a new ``RandomState`` instance is used, seeded 

4043 with seed. 

4044 If `seed` is already a ``RandomState`` or ``Generator`` instance, 

4045 then that object is used. 

4046 Default is `None`. 

4047 

4048 Notes 

4049 ----- 

4050 This class is similar to `ortho_group`. 

4051 

4052 References 

4053 ---------- 

4054 .. [1] F. Mezzadri, "How to generate random matrices from the classical 

4055 compact groups", :arXiv:`math-ph/0609050v2`. 

4056 

4057 Examples 

4058 -------- 

4059 >>> import numpy as np 

4060 >>> from scipy.stats import unitary_group 

4061 >>> x = unitary_group.rvs(3) 

4062 

4063 >>> np.dot(x, x.conj().T) 

4064 array([[ 1.00000000e+00, 1.13231364e-17, -2.86852790e-16], 

4065 [ 1.13231364e-17, 1.00000000e+00, -1.46845020e-16], 

4066 [ -2.86852790e-16, -1.46845020e-16, 1.00000000e+00]]) 

4067 

4068 This generates one random matrix from U(3). The dot product confirms that 

4069 it is unitary up to machine precision. 

4070 

4071 Alternatively, the object may be called (as a function) to fix the `dim` 

4072 parameter, return a "frozen" unitary_group random variable: 

4073 

4074 >>> rv = unitary_group(5) 

4075 

4076 See Also 

4077 -------- 

4078 ortho_group 

4079 

4080 """ 

4081 

4082 def __init__(self, seed=None): 

4083 super().__init__(seed) 

4084 self.__doc__ = doccer.docformat(self.__doc__) 

4085 

4086 def __call__(self, dim=None, seed=None): 

4087 """Create a frozen (U(N)) n-dimensional unitary matrix distribution. 

4088 

4089 See `unitary_group_frozen` for more information. 

4090 """ 

4091 return unitary_group_frozen(dim, seed=seed) 

4092 

4093 def _process_parameters(self, dim): 

4094 """Dimension N must be specified; it cannot be inferred.""" 

4095 if dim is None or not np.isscalar(dim) or dim <= 1 or dim != int(dim): 

4096 raise ValueError("Dimension of rotation must be specified," 

4097 "and must be a scalar greater than 1.") 

4098 

4099 return dim 

4100 

4101 def rvs(self, dim, size=1, random_state=None): 

4102 """Draw random samples from U(N). 

4103 

4104 Parameters 

4105 ---------- 

4106 dim : integer 

4107 Dimension of space (N). 

4108 size : integer, optional 

4109 Number of samples to draw (default 1). 

4110 

4111 Returns 

4112 ------- 

4113 rvs : ndarray or scalar 

4114 Random size N-dimensional matrices, dimension (size, dim, dim) 

4115 

4116 """ 

4117 random_state = self._get_random_state(random_state) 

4118 

4119 size = int(size) 

4120 if size > 1 and NumpyVersion(np.__version__) < '1.22.0': 

4121 return np.array([self.rvs(dim, size=1, random_state=random_state) 

4122 for i in range(size)]) 

4123 

4124 dim = self._process_parameters(dim) 

4125 

4126 size = (size,) if size > 1 else () 

4127 z = 1/math.sqrt(2)*(random_state.normal(size=size + (dim, dim)) + 

4128 1j*random_state.normal(size=size + (dim, dim))) 

4129 q, r = np.linalg.qr(z) 

4130 # The last two dimensions are the rows and columns of R matrices. 

4131 # Extract the diagonals. Note that this eliminates a dimension. 

4132 d = r.diagonal(offset=0, axis1=-2, axis2=-1) 

4133 # Add back a dimension for proper broadcasting: we're dividing 

4134 # each row of each R matrix by the diagonal of the R matrix. 

4135 q *= (d/abs(d))[..., np.newaxis, :] # to broadcast properly 

4136 return q 

4137 

4138 

4139unitary_group = unitary_group_gen() 

4140 

4141 

4142class unitary_group_frozen(multi_rv_frozen): 

4143 def __init__(self, dim=None, seed=None): 

4144 """Create a frozen (U(N)) n-dimensional unitary matrix distribution. 

4145 

4146 Parameters 

4147 ---------- 

4148 dim : scalar 

4149 Dimension of matrices 

4150 seed : {None, int, `numpy.random.Generator`, `numpy.random.RandomState`}, optional 

4151 If `seed` is None (or `np.random`), the `numpy.random.RandomState` 

4152 singleton is used. 

4153 If `seed` is an int, a new ``RandomState`` instance is used, 

4154 seeded with `seed`. 

4155 If `seed` is already a ``Generator`` or ``RandomState`` instance 

4156 then that instance is used. 

4157 

4158 Examples 

4159 -------- 

4160 >>> from scipy.stats import unitary_group 

4161 >>> x = unitary_group(3) 

4162 >>> x.rvs() 

4163 

4164 """ 

4165 self._dist = unitary_group_gen(seed) 

4166 self.dim = self._dist._process_parameters(dim) 

4167 

4168 def rvs(self, size=1, random_state=None): 

4169 return self._dist.rvs(self.dim, size, random_state) 

4170 

4171 

4172_mvt_doc_default_callparams = """\ 

4173loc : array_like, optional 

4174 Location of the distribution. (default ``0``) 

4175shape : array_like, optional 

4176 Positive semidefinite matrix of the distribution. (default ``1``) 

4177df : float, optional 

4178 Degrees of freedom of the distribution; must be greater than zero. 

4179 If ``np.inf`` then results are multivariate normal. The default is ``1``. 

4180allow_singular : bool, optional 

4181 Whether to allow a singular matrix. (default ``False``) 

4182""" 

4183 

4184_mvt_doc_callparams_note = """\ 

4185Setting the parameter `loc` to ``None`` is equivalent to having `loc` 

4186be the zero-vector. The parameter `shape` can be a scalar, in which case 

4187the shape matrix is the identity times that value, a vector of 

4188diagonal entries for the shape matrix, or a two-dimensional array_like. 

4189""" 

4190 

4191_mvt_doc_frozen_callparams_note = """\ 

4192See class definition for a detailed description of parameters.""" 

4193 

4194mvt_docdict_params = { 

4195 '_mvt_doc_default_callparams': _mvt_doc_default_callparams, 

4196 '_mvt_doc_callparams_note': _mvt_doc_callparams_note, 

4197 '_doc_random_state': _doc_random_state 

4198} 

4199 

4200mvt_docdict_noparams = { 

4201 '_mvt_doc_default_callparams': "", 

4202 '_mvt_doc_callparams_note': _mvt_doc_frozen_callparams_note, 

4203 '_doc_random_state': _doc_random_state 

4204} 

4205 

4206 

4207class multivariate_t_gen(multi_rv_generic): 

4208 r"""A multivariate t-distributed random variable. 

4209 

4210 The `loc` parameter specifies the location. The `shape` parameter specifies 

4211 the positive semidefinite shape matrix. The `df` parameter specifies the 

4212 degrees of freedom. 

4213 

4214 In addition to calling the methods below, the object itself may be called 

4215 as a function to fix the location, shape matrix, and degrees of freedom 

4216 parameters, returning a "frozen" multivariate t-distribution random. 

4217 

4218 Methods 

4219 ------- 

4220 pdf(x, loc=None, shape=1, df=1, allow_singular=False) 

4221 Probability density function. 

4222 logpdf(x, loc=None, shape=1, df=1, allow_singular=False) 

4223 Log of the probability density function. 

4224 rvs(loc=None, shape=1, df=1, size=1, random_state=None) 

4225 Draw random samples from a multivariate t-distribution. 

4226 

4227 Parameters 

4228 ---------- 

4229 %(_mvt_doc_default_callparams)s 

4230 %(_doc_random_state)s 

4231 

4232 Notes 

4233 ----- 

4234 %(_mvt_doc_callparams_note)s 

4235 The matrix `shape` must be a (symmetric) positive semidefinite matrix. The 

4236 determinant and inverse of `shape` are computed as the pseudo-determinant 

4237 and pseudo-inverse, respectively, so that `shape` does not need to have 

4238 full rank. 

4239 

4240 The probability density function for `multivariate_t` is 

4241 

4242 .. math:: 

4243 

4244 f(x) = \frac{\Gamma(\nu + p)/2}{\Gamma(\nu/2)\nu^{p/2}\pi^{p/2}|\Sigma|^{1/2}} 

4245 \left[1 + \frac{1}{\nu} (\mathbf{x} - \boldsymbol{\mu})^{\top} 

4246 \boldsymbol{\Sigma}^{-1} 

4247 (\mathbf{x} - \boldsymbol{\mu}) \right]^{-(\nu + p)/2}, 

4248 

4249 where :math:`p` is the dimension of :math:`\mathbf{x}`, 

4250 :math:`\boldsymbol{\mu}` is the :math:`p`-dimensional location, 

4251 :math:`\boldsymbol{\Sigma}` the :math:`p \times p`-dimensional shape 

4252 matrix, and :math:`\nu` is the degrees of freedom. 

4253 

4254 .. versionadded:: 1.6.0 

4255 

4256 Examples 

4257 -------- 

4258 The object may be called (as a function) to fix the `loc`, `shape`, 

4259 `df`, and `allow_singular` parameters, returning a "frozen" 

4260 multivariate_t random variable: 

4261 

4262 >>> import numpy as np 

4263 >>> from scipy.stats import multivariate_t 

4264 >>> rv = multivariate_t([1.0, -0.5], [[2.1, 0.3], [0.3, 1.5]], df=2) 

4265 >>> # Frozen object with the same methods but holding the given location, 

4266 >>> # scale, and degrees of freedom fixed. 

4267 

4268 Create a contour plot of the PDF. 

4269 

4270 >>> import matplotlib.pyplot as plt 

4271 >>> x, y = np.mgrid[-1:3:.01, -2:1.5:.01] 

4272 >>> pos = np.dstack((x, y)) 

4273 >>> fig, ax = plt.subplots(1, 1) 

4274 >>> ax.set_aspect('equal') 

4275 >>> plt.contourf(x, y, rv.pdf(pos)) 

4276 

4277 """ 

4278 

4279 def __init__(self, seed=None): 

4280 """Initialize a multivariate t-distributed random variable. 

4281 

4282 Parameters 

4283 ---------- 

4284 seed : Random state. 

4285 

4286 """ 

4287 super().__init__(seed) 

4288 self.__doc__ = doccer.docformat(self.__doc__, mvt_docdict_params) 

4289 self._random_state = check_random_state(seed) 

4290 

4291 def __call__(self, loc=None, shape=1, df=1, allow_singular=False, 

4292 seed=None): 

4293 """Create a frozen multivariate t-distribution. 

4294 

4295 See `multivariate_t_frozen` for parameters. 

4296 """ 

4297 if df == np.inf: 

4298 return multivariate_normal_frozen(mean=loc, cov=shape, 

4299 allow_singular=allow_singular, 

4300 seed=seed) 

4301 return multivariate_t_frozen(loc=loc, shape=shape, df=df, 

4302 allow_singular=allow_singular, seed=seed) 

4303 

4304 def pdf(self, x, loc=None, shape=1, df=1, allow_singular=False): 

4305 """Multivariate t-distribution probability density function. 

4306 

4307 Parameters 

4308 ---------- 

4309 x : array_like 

4310 Points at which to evaluate the probability density function. 

4311 %(_mvt_doc_default_callparams)s 

4312 

4313 Returns 

4314 ------- 

4315 pdf : Probability density function evaluated at `x`. 

4316 

4317 Examples 

4318 -------- 

4319 >>> from scipy.stats import multivariate_t 

4320 >>> x = [0.4, 5] 

4321 >>> loc = [0, 1] 

4322 >>> shape = [[1, 0.1], [0.1, 1]] 

4323 >>> df = 7 

4324 >>> multivariate_t.pdf(x, loc, shape, df) 

4325 array([0.00075713]) 

4326 

4327 """ 

4328 dim, loc, shape, df = self._process_parameters(loc, shape, df) 

4329 x = self._process_quantiles(x, dim) 

4330 shape_info = _PSD(shape, allow_singular=allow_singular) 

4331 logpdf = self._logpdf(x, loc, shape_info.U, shape_info.log_pdet, df, 

4332 dim, shape_info.rank) 

4333 return np.exp(logpdf) 

4334 

4335 def logpdf(self, x, loc=None, shape=1, df=1): 

4336 """Log of the multivariate t-distribution probability density function. 

4337 

4338 Parameters 

4339 ---------- 

4340 x : array_like 

4341 Points at which to evaluate the log of the probability density 

4342 function. 

4343 %(_mvt_doc_default_callparams)s 

4344 

4345 Returns 

4346 ------- 

4347 logpdf : Log of the probability density function evaluated at `x`. 

4348 

4349 Examples 

4350 -------- 

4351 >>> from scipy.stats import multivariate_t 

4352 >>> x = [0.4, 5] 

4353 >>> loc = [0, 1] 

4354 >>> shape = [[1, 0.1], [0.1, 1]] 

4355 >>> df = 7 

4356 >>> multivariate_t.logpdf(x, loc, shape, df) 

4357 array([-7.1859802]) 

4358 

4359 See Also 

4360 -------- 

4361 pdf : Probability density function. 

4362 

4363 """ 

4364 dim, loc, shape, df = self._process_parameters(loc, shape, df) 

4365 x = self._process_quantiles(x, dim) 

4366 shape_info = _PSD(shape) 

4367 return self._logpdf(x, loc, shape_info.U, shape_info.log_pdet, df, dim, 

4368 shape_info.rank) 

4369 

4370 def _logpdf(self, x, loc, prec_U, log_pdet, df, dim, rank): 

4371 """Utility method `pdf`, `logpdf` for parameters. 

4372 

4373 Parameters 

4374 ---------- 

4375 x : ndarray 

4376 Points at which to evaluate the log of the probability density 

4377 function. 

4378 loc : ndarray 

4379 Location of the distribution. 

4380 prec_U : ndarray 

4381 A decomposition such that `np.dot(prec_U, prec_U.T)` is the inverse 

4382 of the shape matrix. 

4383 log_pdet : float 

4384 Logarithm of the determinant of the shape matrix. 

4385 df : float 

4386 Degrees of freedom of the distribution. 

4387 dim : int 

4388 Dimension of the quantiles x. 

4389 rank : int 

4390 Rank of the shape matrix. 

4391 

4392 Notes 

4393 ----- 

4394 As this function does no argument checking, it should not be called 

4395 directly; use 'logpdf' instead. 

4396 

4397 """ 

4398 if df == np.inf: 

4399 return multivariate_normal._logpdf(x, loc, prec_U, log_pdet, rank) 

4400 

4401 dev = x - loc 

4402 maha = np.square(np.dot(dev, prec_U)).sum(axis=-1) 

4403 

4404 t = 0.5 * (df + dim) 

4405 A = gammaln(t) 

4406 B = gammaln(0.5 * df) 

4407 C = dim/2. * np.log(df * np.pi) 

4408 D = 0.5 * log_pdet 

4409 E = -t * np.log(1 + (1./df) * maha) 

4410 

4411 return _squeeze_output(A - B - C - D + E) 

4412 

4413 def rvs(self, loc=None, shape=1, df=1, size=1, random_state=None): 

4414 """Draw random samples from a multivariate t-distribution. 

4415 

4416 Parameters 

4417 ---------- 

4418 %(_mvt_doc_default_callparams)s 

4419 size : integer, optional 

4420 Number of samples to draw (default 1). 

4421 %(_doc_random_state)s 

4422 

4423 Returns 

4424 ------- 

4425 rvs : ndarray or scalar 

4426 Random variates of size (`size`, `P`), where `P` is the 

4427 dimension of the random variable. 

4428 

4429 Examples 

4430 -------- 

4431 >>> from scipy.stats import multivariate_t 

4432 >>> x = [0.4, 5] 

4433 >>> loc = [0, 1] 

4434 >>> shape = [[1, 0.1], [0.1, 1]] 

4435 >>> df = 7 

4436 >>> multivariate_t.rvs(loc, shape, df) 

4437 array([[0.93477495, 3.00408716]]) 

4438 

4439 """ 

4440 # For implementation details, see equation (3): 

4441 # 

4442 # Hofert, "On Sampling from the Multivariatet Distribution", 2013 

4443 # http://rjournal.github.io/archive/2013-2/hofert.pdf 

4444 # 

4445 dim, loc, shape, df = self._process_parameters(loc, shape, df) 

4446 if random_state is not None: 

4447 rng = check_random_state(random_state) 

4448 else: 

4449 rng = self._random_state 

4450 

4451 if np.isinf(df): 

4452 x = np.ones(size) 

4453 else: 

4454 x = rng.chisquare(df, size=size) / df 

4455 

4456 z = rng.multivariate_normal(np.zeros(dim), shape, size=size) 

4457 samples = loc + z / np.sqrt(x)[..., None] 

4458 return _squeeze_output(samples) 

4459 

4460 def _process_quantiles(self, x, dim): 

4461 """ 

4462 Adjust quantiles array so that last axis labels the components of 

4463 each data point. 

4464 """ 

4465 x = np.asarray(x, dtype=float) 

4466 if x.ndim == 0: 

4467 x = x[np.newaxis] 

4468 elif x.ndim == 1: 

4469 if dim == 1: 

4470 x = x[:, np.newaxis] 

4471 else: 

4472 x = x[np.newaxis, :] 

4473 return x 

4474 

4475 def _process_parameters(self, loc, shape, df): 

4476 """ 

4477 Infer dimensionality from location array and shape matrix, handle 

4478 defaults, and ensure compatible dimensions. 

4479 """ 

4480 if loc is None and shape is None: 

4481 loc = np.asarray(0, dtype=float) 

4482 shape = np.asarray(1, dtype=float) 

4483 dim = 1 

4484 elif loc is None: 

4485 shape = np.asarray(shape, dtype=float) 

4486 if shape.ndim < 2: 

4487 dim = 1 

4488 else: 

4489 dim = shape.shape[0] 

4490 loc = np.zeros(dim) 

4491 elif shape is None: 

4492 loc = np.asarray(loc, dtype=float) 

4493 dim = loc.size 

4494 shape = np.eye(dim) 

4495 else: 

4496 shape = np.asarray(shape, dtype=float) 

4497 loc = np.asarray(loc, dtype=float) 

4498 dim = loc.size 

4499 

4500 if dim == 1: 

4501 loc = loc.reshape(1) 

4502 shape = shape.reshape(1, 1) 

4503 

4504 if loc.ndim != 1 or loc.shape[0] != dim: 

4505 raise ValueError("Array 'loc' must be a vector of length %d." % 

4506 dim) 

4507 if shape.ndim == 0: 

4508 shape = shape * np.eye(dim) 

4509 elif shape.ndim == 1: 

4510 shape = np.diag(shape) 

4511 elif shape.ndim == 2 and shape.shape != (dim, dim): 

4512 rows, cols = shape.shape 

4513 if rows != cols: 

4514 msg = ("Array 'cov' must be square if it is two dimensional," 

4515 " but cov.shape = %s." % str(shape.shape)) 

4516 else: 

4517 msg = ("Dimension mismatch: array 'cov' is of shape %s," 

4518 " but 'loc' is a vector of length %d.") 

4519 msg = msg % (str(shape.shape), len(loc)) 

4520 raise ValueError(msg) 

4521 elif shape.ndim > 2: 

4522 raise ValueError("Array 'cov' must be at most two-dimensional," 

4523 " but cov.ndim = %d" % shape.ndim) 

4524 

4525 # Process degrees of freedom. 

4526 if df is None: 

4527 df = 1 

4528 elif df <= 0: 

4529 raise ValueError("'df' must be greater than zero.") 

4530 elif np.isnan(df): 

4531 raise ValueError("'df' is 'nan' but must be greater than zero or 'np.inf'.") 

4532 

4533 return dim, loc, shape, df 

4534 

4535 

4536class multivariate_t_frozen(multi_rv_frozen): 

4537 

4538 def __init__(self, loc=None, shape=1, df=1, allow_singular=False, 

4539 seed=None): 

4540 """Create a frozen multivariate t distribution. 

4541 

4542 Parameters 

4543 ---------- 

4544 %(_mvt_doc_default_callparams)s 

4545 

4546 Examples 

4547 -------- 

4548 >>> import numpy as np 

4549 >>> loc = np.zeros(3) 

4550 >>> shape = np.eye(3) 

4551 >>> df = 10 

4552 >>> dist = multivariate_t(loc, shape, df) 

4553 >>> dist.rvs() 

4554 array([[ 0.81412036, -1.53612361, 0.42199647]]) 

4555 >>> dist.pdf([1, 1, 1]) 

4556 array([0.01237803]) 

4557 

4558 """ 

4559 self._dist = multivariate_t_gen(seed) 

4560 dim, loc, shape, df = self._dist._process_parameters(loc, shape, df) 

4561 self.dim, self.loc, self.shape, self.df = dim, loc, shape, df 

4562 self.shape_info = _PSD(shape, allow_singular=allow_singular) 

4563 

4564 def logpdf(self, x): 

4565 x = self._dist._process_quantiles(x, self.dim) 

4566 U = self.shape_info.U 

4567 log_pdet = self.shape_info.log_pdet 

4568 return self._dist._logpdf(x, self.loc, U, log_pdet, self.df, self.dim, 

4569 self.shape_info.rank) 

4570 

4571 def pdf(self, x): 

4572 return np.exp(self.logpdf(x)) 

4573 

4574 def rvs(self, size=1, random_state=None): 

4575 return self._dist.rvs(loc=self.loc, 

4576 shape=self.shape, 

4577 df=self.df, 

4578 size=size, 

4579 random_state=random_state) 

4580 

4581 

4582multivariate_t = multivariate_t_gen() 

4583 

4584 

4585# Set frozen generator docstrings from corresponding docstrings in 

4586# matrix_normal_gen and fill in default strings in class docstrings 

4587for name in ['logpdf', 'pdf', 'rvs']: 

4588 method = multivariate_t_gen.__dict__[name] 

4589 method_frozen = multivariate_t_frozen.__dict__[name] 

4590 method_frozen.__doc__ = doccer.docformat(method.__doc__, 

4591 mvt_docdict_noparams) 

4592 method.__doc__ = doccer.docformat(method.__doc__, mvt_docdict_params) 

4593 

4594 

4595_mhg_doc_default_callparams = """\ 

4596m : array_like 

4597 The number of each type of object in the population. 

4598 That is, :math:`m[i]` is the number of objects of 

4599 type :math:`i`. 

4600n : array_like 

4601 The number of samples taken from the population. 

4602""" 

4603 

4604_mhg_doc_callparams_note = """\ 

4605`m` must be an array of positive integers. If the quantile 

4606:math:`i` contains values out of the range :math:`[0, m_i]` 

4607where :math:`m_i` is the number of objects of type :math:`i` 

4608in the population or if the parameters are inconsistent with one 

4609another (e.g. ``x.sum() != n``), methods return the appropriate 

4610value (e.g. ``0`` for ``pmf``). If `m` or `n` contain negative 

4611values, the result will contain ``nan`` there. 

4612""" 

4613 

4614_mhg_doc_frozen_callparams = "" 

4615 

4616_mhg_doc_frozen_callparams_note = """\ 

4617See class definition for a detailed description of parameters.""" 

4618 

4619mhg_docdict_params = { 

4620 '_doc_default_callparams': _mhg_doc_default_callparams, 

4621 '_doc_callparams_note': _mhg_doc_callparams_note, 

4622 '_doc_random_state': _doc_random_state 

4623} 

4624 

4625mhg_docdict_noparams = { 

4626 '_doc_default_callparams': _mhg_doc_frozen_callparams, 

4627 '_doc_callparams_note': _mhg_doc_frozen_callparams_note, 

4628 '_doc_random_state': _doc_random_state 

4629} 

4630 

4631 

4632class multivariate_hypergeom_gen(multi_rv_generic): 

4633 r"""A multivariate hypergeometric random variable. 

4634 

4635 Methods 

4636 ------- 

4637 pmf(x, m, n) 

4638 Probability mass function. 

4639 logpmf(x, m, n) 

4640 Log of the probability mass function. 

4641 rvs(m, n, size=1, random_state=None) 

4642 Draw random samples from a multivariate hypergeometric 

4643 distribution. 

4644 mean(m, n) 

4645 Mean of the multivariate hypergeometric distribution. 

4646 var(m, n) 

4647 Variance of the multivariate hypergeometric distribution. 

4648 cov(m, n) 

4649 Compute the covariance matrix of the multivariate 

4650 hypergeometric distribution. 

4651 

4652 Parameters 

4653 ---------- 

4654 %(_doc_default_callparams)s 

4655 %(_doc_random_state)s 

4656 

4657 Notes 

4658 ----- 

4659 %(_doc_callparams_note)s 

4660 

4661 The probability mass function for `multivariate_hypergeom` is 

4662 

4663 .. math:: 

4664 

4665 P(X_1 = x_1, X_2 = x_2, \ldots, X_k = x_k) = \frac{\binom{m_1}{x_1} 

4666 \binom{m_2}{x_2} \cdots \binom{m_k}{x_k}}{\binom{M}{n}}, \\ \quad 

4667 (x_1, x_2, \ldots, x_k) \in \mathbb{N}^k \text{ with } 

4668 \sum_{i=1}^k x_i = n 

4669 

4670 where :math:`m_i` are the number of objects of type :math:`i`, :math:`M` 

4671 is the total number of objects in the population (sum of all the 

4672 :math:`m_i`), and :math:`n` is the size of the sample to be taken 

4673 from the population. 

4674 

4675 .. versionadded:: 1.6.0 

4676 

4677 Examples 

4678 -------- 

4679 To evaluate the probability mass function of the multivariate 

4680 hypergeometric distribution, with a dichotomous population of size 

4681 :math:`10` and :math:`20`, at a sample of size :math:`12` with 

4682 :math:`8` objects of the first type and :math:`4` objects of the 

4683 second type, use: 

4684 

4685 >>> from scipy.stats import multivariate_hypergeom 

4686 >>> multivariate_hypergeom.pmf(x=[8, 4], m=[10, 20], n=12) 

4687 0.0025207176631464523 

4688 

4689 The `multivariate_hypergeom` distribution is identical to the 

4690 corresponding `hypergeom` distribution (tiny numerical differences 

4691 notwithstanding) when only two types (good and bad) of objects 

4692 are present in the population as in the example above. Consider 

4693 another example for a comparison with the hypergeometric distribution: 

4694 

4695 >>> from scipy.stats import hypergeom 

4696 >>> multivariate_hypergeom.pmf(x=[3, 1], m=[10, 5], n=4) 

4697 0.4395604395604395 

4698 >>> hypergeom.pmf(k=3, M=15, n=4, N=10) 

4699 0.43956043956044005 

4700 

4701 The functions ``pmf``, ``logpmf``, ``mean``, ``var``, ``cov``, and ``rvs`` 

4702 support broadcasting, under the convention that the vector parameters 

4703 (``x``, ``m``, and ``n``) are interpreted as if each row along the last 

4704 axis is a single object. For instance, we can combine the previous two 

4705 calls to `multivariate_hypergeom` as 

4706 

4707 >>> multivariate_hypergeom.pmf(x=[[8, 4], [3, 1]], m=[[10, 20], [10, 5]], 

4708 ... n=[12, 4]) 

4709 array([0.00252072, 0.43956044]) 

4710 

4711 This broadcasting also works for ``cov``, where the output objects are 

4712 square matrices of size ``m.shape[-1]``. For example: 

4713 

4714 >>> multivariate_hypergeom.cov(m=[[7, 9], [10, 15]], n=[8, 12]) 

4715 array([[[ 1.05, -1.05], 

4716 [-1.05, 1.05]], 

4717 [[ 1.56, -1.56], 

4718 [-1.56, 1.56]]]) 

4719 

4720 That is, ``result[0]`` is equal to 

4721 ``multivariate_hypergeom.cov(m=[7, 9], n=8)`` and ``result[1]`` is equal 

4722 to ``multivariate_hypergeom.cov(m=[10, 15], n=12)``. 

4723 

4724 Alternatively, the object may be called (as a function) to fix the `m` 

4725 and `n` parameters, returning a "frozen" multivariate hypergeometric 

4726 random variable. 

4727 

4728 >>> rv = multivariate_hypergeom(m=[10, 20], n=12) 

4729 >>> rv.pmf(x=[8, 4]) 

4730 0.0025207176631464523 

4731 

4732 See Also 

4733 -------- 

4734 scipy.stats.hypergeom : The hypergeometric distribution. 

4735 scipy.stats.multinomial : The multinomial distribution. 

4736 

4737 References 

4738 ---------- 

4739 .. [1] The Multivariate Hypergeometric Distribution, 

4740 http://www.randomservices.org/random/urn/MultiHypergeometric.html 

4741 .. [2] Thomas J. Sargent and John Stachurski, 2020, 

4742 Multivariate Hypergeometric Distribution 

4743 https://python.quantecon.org/_downloads/pdf/multi_hyper.pdf 

4744 """ 

4745 def __init__(self, seed=None): 

4746 super().__init__(seed) 

4747 self.__doc__ = doccer.docformat(self.__doc__, mhg_docdict_params) 

4748 

4749 def __call__(self, m, n, seed=None): 

4750 """Create a frozen multivariate_hypergeom distribution. 

4751 

4752 See `multivariate_hypergeom_frozen` for more information. 

4753 """ 

4754 return multivariate_hypergeom_frozen(m, n, seed=seed) 

4755 

4756 def _process_parameters(self, m, n): 

4757 m = np.asarray(m) 

4758 n = np.asarray(n) 

4759 if m.size == 0: 

4760 m = m.astype(int) 

4761 if n.size == 0: 

4762 n = n.astype(int) 

4763 if not np.issubdtype(m.dtype, np.integer): 

4764 raise TypeError("'m' must an array of integers.") 

4765 if not np.issubdtype(n.dtype, np.integer): 

4766 raise TypeError("'n' must an array of integers.") 

4767 if m.ndim == 0: 

4768 raise ValueError("'m' must be an array with" 

4769 " at least one dimension.") 

4770 

4771 # check for empty arrays 

4772 if m.size != 0: 

4773 n = n[..., np.newaxis] 

4774 

4775 m, n = np.broadcast_arrays(m, n) 

4776 

4777 # check for empty arrays 

4778 if m.size != 0: 

4779 n = n[..., 0] 

4780 

4781 mcond = m < 0 

4782 

4783 M = m.sum(axis=-1) 

4784 

4785 ncond = (n < 0) | (n > M) 

4786 return M, m, n, mcond, ncond, np.any(mcond, axis=-1) | ncond 

4787 

4788 def _process_quantiles(self, x, M, m, n): 

4789 x = np.asarray(x) 

4790 if not np.issubdtype(x.dtype, np.integer): 

4791 raise TypeError("'x' must an array of integers.") 

4792 if x.ndim == 0: 

4793 raise ValueError("'x' must be an array with" 

4794 " at least one dimension.") 

4795 if not x.shape[-1] == m.shape[-1]: 

4796 raise ValueError(f"Size of each quantile must be size of 'm': " 

4797 f"received {x.shape[-1]}, " 

4798 f"but expected {m.shape[-1]}.") 

4799 

4800 # check for empty arrays 

4801 if m.size != 0: 

4802 n = n[..., np.newaxis] 

4803 M = M[..., np.newaxis] 

4804 

4805 x, m, n, M = np.broadcast_arrays(x, m, n, M) 

4806 

4807 # check for empty arrays 

4808 if m.size != 0: 

4809 n, M = n[..., 0], M[..., 0] 

4810 

4811 xcond = (x < 0) | (x > m) 

4812 return (x, M, m, n, xcond, 

4813 np.any(xcond, axis=-1) | (x.sum(axis=-1) != n)) 

4814 

4815 def _checkresult(self, result, cond, bad_value): 

4816 result = np.asarray(result) 

4817 if cond.ndim != 0: 

4818 result[cond] = bad_value 

4819 elif cond: 

4820 return bad_value 

4821 if result.ndim == 0: 

4822 return result[()] 

4823 return result 

4824 

4825 def _logpmf(self, x, M, m, n, mxcond, ncond): 

4826 # This equation of the pmf comes from the relation, 

4827 # n combine r = beta(n+1, 1) / beta(r+1, n-r+1) 

4828 num = np.zeros_like(m, dtype=np.float_) 

4829 den = np.zeros_like(n, dtype=np.float_) 

4830 m, x = m[~mxcond], x[~mxcond] 

4831 M, n = M[~ncond], n[~ncond] 

4832 num[~mxcond] = (betaln(m+1, 1) - betaln(x+1, m-x+1)) 

4833 den[~ncond] = (betaln(M+1, 1) - betaln(n+1, M-n+1)) 

4834 num[mxcond] = np.nan 

4835 den[ncond] = np.nan 

4836 num = num.sum(axis=-1) 

4837 return num - den 

4838 

4839 def logpmf(self, x, m, n): 

4840 """Log of the multivariate hypergeometric probability mass function. 

4841 

4842 Parameters 

4843 ---------- 

4844 x : array_like 

4845 Quantiles, with the last axis of `x` denoting the components. 

4846 %(_doc_default_callparams)s 

4847 

4848 Returns 

4849 ------- 

4850 logpmf : ndarray or scalar 

4851 Log of the probability mass function evaluated at `x` 

4852 

4853 Notes 

4854 ----- 

4855 %(_doc_callparams_note)s 

4856 """ 

4857 M, m, n, mcond, ncond, mncond = self._process_parameters(m, n) 

4858 (x, M, m, n, xcond, 

4859 xcond_reduced) = self._process_quantiles(x, M, m, n) 

4860 mxcond = mcond | xcond 

4861 ncond = ncond | np.zeros(n.shape, dtype=np.bool_) 

4862 

4863 result = self._logpmf(x, M, m, n, mxcond, ncond) 

4864 

4865 # replace values for which x was out of the domain; broadcast 

4866 # xcond to the right shape 

4867 xcond_ = xcond_reduced | np.zeros(mncond.shape, dtype=np.bool_) 

4868 result = self._checkresult(result, xcond_, np.NINF) 

4869 

4870 # replace values bad for n or m; broadcast 

4871 # mncond to the right shape 

4872 mncond_ = mncond | np.zeros(xcond_reduced.shape, dtype=np.bool_) 

4873 return self._checkresult(result, mncond_, np.nan) 

4874 

4875 def pmf(self, x, m, n): 

4876 """Multivariate hypergeometric probability mass function. 

4877 

4878 Parameters 

4879 ---------- 

4880 x : array_like 

4881 Quantiles, with the last axis of `x` denoting the components. 

4882 %(_doc_default_callparams)s 

4883 

4884 Returns 

4885 ------- 

4886 pmf : ndarray or scalar 

4887 Probability density function evaluated at `x` 

4888 

4889 Notes 

4890 ----- 

4891 %(_doc_callparams_note)s 

4892 """ 

4893 out = np.exp(self.logpmf(x, m, n)) 

4894 return out 

4895 

4896 def mean(self, m, n): 

4897 """Mean of the multivariate hypergeometric distribution. 

4898 

4899 Parameters 

4900 ---------- 

4901 %(_doc_default_callparams)s 

4902 

4903 Returns 

4904 ------- 

4905 mean : array_like or scalar 

4906 The mean of the distribution 

4907 """ 

4908 M, m, n, _, _, mncond = self._process_parameters(m, n) 

4909 # check for empty arrays 

4910 if m.size != 0: 

4911 M, n = M[..., np.newaxis], n[..., np.newaxis] 

4912 cond = (M == 0) 

4913 M = np.ma.masked_array(M, mask=cond) 

4914 mu = n*(m/M) 

4915 if m.size != 0: 

4916 mncond = (mncond[..., np.newaxis] | 

4917 np.zeros(mu.shape, dtype=np.bool_)) 

4918 return self._checkresult(mu, mncond, np.nan) 

4919 

4920 def var(self, m, n): 

4921 """Variance of the multivariate hypergeometric distribution. 

4922 

4923 Parameters 

4924 ---------- 

4925 %(_doc_default_callparams)s 

4926 

4927 Returns 

4928 ------- 

4929 array_like 

4930 The variances of the components of the distribution. This is 

4931 the diagonal of the covariance matrix of the distribution 

4932 """ 

4933 M, m, n, _, _, mncond = self._process_parameters(m, n) 

4934 # check for empty arrays 

4935 if m.size != 0: 

4936 M, n = M[..., np.newaxis], n[..., np.newaxis] 

4937 cond = (M == 0) & (M-1 == 0) 

4938 M = np.ma.masked_array(M, mask=cond) 

4939 output = n * m/M * (M-m)/M * (M-n)/(M-1) 

4940 if m.size != 0: 

4941 mncond = (mncond[..., np.newaxis] | 

4942 np.zeros(output.shape, dtype=np.bool_)) 

4943 return self._checkresult(output, mncond, np.nan) 

4944 

4945 def cov(self, m, n): 

4946 """Covariance matrix of the multivariate hypergeometric distribution. 

4947 

4948 Parameters 

4949 ---------- 

4950 %(_doc_default_callparams)s 

4951 

4952 Returns 

4953 ------- 

4954 cov : array_like 

4955 The covariance matrix of the distribution 

4956 """ 

4957 # see [1]_ for the formula and [2]_ for implementation 

4958 # cov( x_i,x_j ) = -n * (M-n)/(M-1) * (K_i*K_j) / (M**2) 

4959 M, m, n, _, _, mncond = self._process_parameters(m, n) 

4960 # check for empty arrays 

4961 if m.size != 0: 

4962 M = M[..., np.newaxis, np.newaxis] 

4963 n = n[..., np.newaxis, np.newaxis] 

4964 cond = (M == 0) & (M-1 == 0) 

4965 M = np.ma.masked_array(M, mask=cond) 

4966 output = (-n * (M-n)/(M-1) * 

4967 np.einsum("...i,...j->...ij", m, m) / (M**2)) 

4968 # check for empty arrays 

4969 if m.size != 0: 

4970 M, n = M[..., 0, 0], n[..., 0, 0] 

4971 cond = cond[..., 0, 0] 

4972 dim = m.shape[-1] 

4973 # diagonal entries need to be computed differently 

4974 for i in range(dim): 

4975 output[..., i, i] = (n * (M-n) * m[..., i]*(M-m[..., i])) 

4976 output[..., i, i] = output[..., i, i] / (M-1) 

4977 output[..., i, i] = output[..., i, i] / (M**2) 

4978 if m.size != 0: 

4979 mncond = (mncond[..., np.newaxis, np.newaxis] | 

4980 np.zeros(output.shape, dtype=np.bool_)) 

4981 return self._checkresult(output, mncond, np.nan) 

4982 

4983 def rvs(self, m, n, size=None, random_state=None): 

4984 """Draw random samples from a multivariate hypergeometric distribution. 

4985 

4986 Parameters 

4987 ---------- 

4988 %(_doc_default_callparams)s 

4989 size : integer or iterable of integers, optional 

4990 Number of samples to draw. Default is ``None``, in which case a 

4991 single variate is returned as an array with shape ``m.shape``. 

4992 %(_doc_random_state)s 

4993 

4994 Returns 

4995 ------- 

4996 rvs : array_like 

4997 Random variates of shape ``size`` or ``m.shape`` 

4998 (if ``size=None``). 

4999 

5000 Notes 

5001 ----- 

5002 %(_doc_callparams_note)s 

5003 

5004 Also note that NumPy's `multivariate_hypergeometric` sampler is not 

5005 used as it doesn't support broadcasting. 

5006 """ 

5007 M, m, n, _, _, _ = self._process_parameters(m, n) 

5008 

5009 random_state = self._get_random_state(random_state) 

5010 

5011 if size is not None and isinstance(size, int): 

5012 size = (size, ) 

5013 

5014 if size is None: 

5015 rvs = np.empty(m.shape, dtype=m.dtype) 

5016 else: 

5017 rvs = np.empty(size + (m.shape[-1], ), dtype=m.dtype) 

5018 rem = M 

5019 

5020 # This sampler has been taken from numpy gh-13794 

5021 # https://github.com/numpy/numpy/pull/13794 

5022 for c in range(m.shape[-1] - 1): 

5023 rem = rem - m[..., c] 

5024 n0mask = n == 0 

5025 rvs[..., c] = (~n0mask * 

5026 random_state.hypergeometric(m[..., c], 

5027 rem + n0mask, 

5028 n + n0mask, 

5029 size=size)) 

5030 n = n - rvs[..., c] 

5031 rvs[..., m.shape[-1] - 1] = n 

5032 

5033 return rvs 

5034 

5035 

5036multivariate_hypergeom = multivariate_hypergeom_gen() 

5037 

5038 

5039class multivariate_hypergeom_frozen(multi_rv_frozen): 

5040 def __init__(self, m, n, seed=None): 

5041 self._dist = multivariate_hypergeom_gen(seed) 

5042 (self.M, self.m, self.n, 

5043 self.mcond, self.ncond, 

5044 self.mncond) = self._dist._process_parameters(m, n) 

5045 

5046 # monkey patch self._dist 

5047 def _process_parameters(m, n): 

5048 return (self.M, self.m, self.n, 

5049 self.mcond, self.ncond, 

5050 self.mncond) 

5051 self._dist._process_parameters = _process_parameters 

5052 

5053 def logpmf(self, x): 

5054 return self._dist.logpmf(x, self.m, self.n) 

5055 

5056 def pmf(self, x): 

5057 return self._dist.pmf(x, self.m, self.n) 

5058 

5059 def mean(self): 

5060 return self._dist.mean(self.m, self.n) 

5061 

5062 def var(self): 

5063 return self._dist.var(self.m, self.n) 

5064 

5065 def cov(self): 

5066 return self._dist.cov(self.m, self.n) 

5067 

5068 def rvs(self, size=1, random_state=None): 

5069 return self._dist.rvs(self.m, self.n, 

5070 size=size, 

5071 random_state=random_state) 

5072 

5073 

5074# Set frozen generator docstrings from corresponding docstrings in 

5075# multivariate_hypergeom and fill in default strings in class docstrings 

5076for name in ['logpmf', 'pmf', 'mean', 'var', 'cov', 'rvs']: 

5077 method = multivariate_hypergeom_gen.__dict__[name] 

5078 method_frozen = multivariate_hypergeom_frozen.__dict__[name] 

5079 method_frozen.__doc__ = doccer.docformat( 

5080 method.__doc__, mhg_docdict_noparams) 

5081 method.__doc__ = doccer.docformat(method.__doc__, 

5082 mhg_docdict_params) 

5083 

5084 

5085class random_table_gen(multi_rv_generic): 

5086 r"""Contingency tables from independent samples with fixed marginal sums. 

5087 

5088 This is the distribution of random tables with given row and column vector 

5089 sums. This distribution represents the set of random tables under the null 

5090 hypothesis that rows and columns are independent. It is used in hypothesis 

5091 tests of independence. 

5092 

5093 Because of assumed independence, the expected frequency of each table 

5094 element can be computed from the row and column sums, so that the 

5095 distribution is completely determined by these two vectors. 

5096 

5097 Methods 

5098 ------- 

5099 logpmf(x) 

5100 Log-probability of table `x` to occur in the distribution. 

5101 pmf(x) 

5102 Probability of table `x` to occur in the distribution. 

5103 mean(row, col) 

5104 Mean table. 

5105 rvs(row, col, size=None, method=None, random_state=None) 

5106 Draw random tables with given row and column vector sums. 

5107 

5108 Parameters 

5109 ---------- 

5110 %(_doc_row_col)s 

5111 %(_doc_random_state)s 

5112 

5113 Notes 

5114 ----- 

5115 %(_doc_row_col_note)s 

5116 

5117 Random elements from the distribution are generated either with Boyett's 

5118 [1]_ or Patefield's algorithm [2]_. Boyett's algorithm has 

5119 O(N) time and space complexity, where N is the total sum of entries in the 

5120 table. Patefield's algorithm has O(K x log(N)) time complexity, where K is 

5121 the number of cells in the table and requires only a small constant work 

5122 space. By default, the `rvs` method selects the fastest algorithm based on 

5123 the input, but you can specify the algorithm with the keyword `method`. 

5124 Allowed values are "boyett" and "patefield". 

5125 

5126 .. versionadded:: 1.10.0 

5127 

5128 Examples 

5129 -------- 

5130 >>> from scipy.stats import random_table 

5131 

5132 >>> row = [1, 5] 

5133 >>> col = [2, 3, 1] 

5134 >>> random_table.mean(row, col) 

5135 array([[0.33333333, 0.5 , 0.16666667], 

5136 [1.66666667, 2.5 , 0.83333333]]) 

5137 

5138 Alternatively, the object may be called (as a function) to fix the row 

5139 and column vector sums, returning a "frozen" distribution. 

5140 

5141 >>> dist = random_table(row, col) 

5142 >>> dist.rvs(random_state=123) 

5143 array([[1., 0., 0.], 

5144 [1., 3., 1.]]) 

5145 

5146 References 

5147 ---------- 

5148 .. [1] J. Boyett, AS 144 Appl. Statist. 28 (1979) 329-332 

5149 .. [2] W.M. Patefield, AS 159 Appl. Statist. 30 (1981) 91-97 

5150 """ 

5151 

5152 def __init__(self, seed=None): 

5153 super().__init__(seed) 

5154 

5155 def __call__(self, row, col, *, seed=None): 

5156 """Create a frozen distribution of tables with given marginals. 

5157 

5158 See `random_table_frozen` for more information. 

5159 """ 

5160 return random_table_frozen(row, col, seed=seed) 

5161 

5162 def logpmf(self, x, row, col): 

5163 """Log-probability of table to occur in the distribution. 

5164 

5165 Parameters 

5166 ---------- 

5167 %(_doc_x)s 

5168 %(_doc_row_col)s 

5169 

5170 Returns 

5171 ------- 

5172 logpmf : ndarray or scalar 

5173 Log of the probability mass function evaluated at `x`. 

5174 

5175 Notes 

5176 ----- 

5177 %(_doc_row_col_note)s 

5178 

5179 If row and column marginals of `x` do not match `row` and `col`, 

5180 negative infinity is returned. 

5181 

5182 Examples 

5183 -------- 

5184 >>> from scipy.stats import random_table 

5185 >>> import numpy as np 

5186 

5187 >>> x = [[1, 5, 1], [2, 3, 1]] 

5188 >>> row = np.sum(x, axis=1) 

5189 >>> col = np.sum(x, axis=0) 

5190 >>> random_table.logpmf(x, row, col) 

5191 -1.6306401200847027 

5192 

5193 Alternatively, the object may be called (as a function) to fix the row 

5194 and column vector sums, returning a "frozen" distribution. 

5195 

5196 >>> d = random_table(row, col) 

5197 >>> d.logpmf(x) 

5198 -1.6306401200847027 

5199 """ 

5200 r, c, n = self._process_parameters(row, col) 

5201 x = np.asarray(x) 

5202 

5203 if x.ndim < 2: 

5204 raise ValueError("`x` must be at least two-dimensional") 

5205 

5206 dtype_is_int = np.issubdtype(x.dtype, np.integer) 

5207 with np.errstate(invalid='ignore'): 

5208 if not dtype_is_int and not np.all(x.astype(int) == x): 

5209 raise ValueError("`x` must contain only integral values") 

5210 

5211 # x does not contain NaN if we arrive here 

5212 if np.any(x < 0): 

5213 raise ValueError("`x` must contain only non-negative values") 

5214 

5215 r2 = np.sum(x, axis=-1) 

5216 c2 = np.sum(x, axis=-2) 

5217 

5218 if r2.shape[-1] != len(r): 

5219 raise ValueError("shape of `x` must agree with `row`") 

5220 

5221 if c2.shape[-1] != len(c): 

5222 raise ValueError("shape of `x` must agree with `col`") 

5223 

5224 res = np.empty(x.shape[:-2]) 

5225 

5226 mask = np.all(r2 == r, axis=-1) & np.all(c2 == c, axis=-1) 

5227 

5228 def lnfac(x): 

5229 return gammaln(x + 1) 

5230 

5231 res[mask] = (np.sum(lnfac(r), axis=-1) + np.sum(lnfac(c), axis=-1) 

5232 - lnfac(n) - np.sum(lnfac(x[mask]), axis=(-1, -2))) 

5233 res[~mask] = -np.inf 

5234 

5235 return res[()] 

5236 

5237 def pmf(self, x, row, col): 

5238 """Probability of table to occur in the distribution. 

5239 

5240 Parameters 

5241 ---------- 

5242 %(_doc_x)s 

5243 %(_doc_row_col)s 

5244 

5245 Returns 

5246 ------- 

5247 pmf : ndarray or scalar 

5248 Probability mass function evaluated at `x`. 

5249 

5250 Notes 

5251 ----- 

5252 %(_doc_row_col_note)s 

5253 

5254 If row and column marginals of `x` do not match `row` and `col`, 

5255 zero is returned. 

5256 

5257 Examples 

5258 -------- 

5259 >>> from scipy.stats import random_table 

5260 >>> import numpy as np 

5261 

5262 >>> x = [[1, 5, 1], [2, 3, 1]] 

5263 >>> row = np.sum(x, axis=1) 

5264 >>> col = np.sum(x, axis=0) 

5265 >>> random_table.pmf(x, row, col) 

5266 0.19580419580419592 

5267 

5268 Alternatively, the object may be called (as a function) to fix the row 

5269 and column vector sums, returning a "frozen" distribution. 

5270 

5271 >>> d = random_table(row, col) 

5272 >>> d.pmf(x) 

5273 0.19580419580419592 

5274 """ 

5275 return np.exp(self.logpmf(x, row, col)) 

5276 

5277 def mean(self, row, col): 

5278 """Mean of distribution of conditional tables. 

5279 %(_doc_mean_params)s 

5280 

5281 Returns 

5282 ------- 

5283 mean: ndarray 

5284 Mean of the distribution. 

5285 

5286 Notes 

5287 ----- 

5288 %(_doc_row_col_note)s 

5289 

5290 Examples 

5291 -------- 

5292 >>> from scipy.stats import random_table 

5293 

5294 >>> row = [1, 5] 

5295 >>> col = [2, 3, 1] 

5296 >>> random_table.mean(row, col) 

5297 array([[0.33333333, 0.5 , 0.16666667], 

5298 [1.66666667, 2.5 , 0.83333333]]) 

5299 

5300 Alternatively, the object may be called (as a function) to fix the row 

5301 and column vector sums, returning a "frozen" distribution. 

5302 

5303 >>> d = random_table(row, col) 

5304 >>> d.mean() 

5305 array([[0.33333333, 0.5 , 0.16666667], 

5306 [1.66666667, 2.5 , 0.83333333]]) 

5307 """ 

5308 r, c, n = self._process_parameters(row, col) 

5309 return np.outer(r, c) / n 

5310 

5311 def rvs(self, row, col, *, size=None, method=None, random_state=None): 

5312 """Draw random tables with fixed column and row marginals. 

5313 

5314 Parameters 

5315 ---------- 

5316 %(_doc_row_col)s 

5317 size : integer, optional 

5318 Number of samples to draw (default 1). 

5319 method : str, optional 

5320 Which method to use, "boyett" or "patefield". If None (default), 

5321 selects the fastest method for this input. 

5322 %(_doc_random_state)s 

5323 

5324 Returns 

5325 ------- 

5326 rvs : ndarray 

5327 Random 2D tables of shape (`size`, `len(row)`, `len(col)`). 

5328 

5329 Notes 

5330 ----- 

5331 %(_doc_row_col_note)s 

5332 

5333 Examples 

5334 -------- 

5335 >>> from scipy.stats import random_table 

5336 

5337 >>> row = [1, 5] 

5338 >>> col = [2, 3, 1] 

5339 >>> random_table.rvs(row, col, random_state=123) 

5340 array([[1., 0., 0.], 

5341 [1., 3., 1.]]) 

5342 

5343 Alternatively, the object may be called (as a function) to fix the row 

5344 and column vector sums, returning a "frozen" distribution. 

5345 

5346 >>> d = random_table(row, col) 

5347 >>> d.rvs(random_state=123) 

5348 array([[1., 0., 0.], 

5349 [1., 3., 1.]]) 

5350 """ 

5351 r, c, n = self._process_parameters(row, col) 

5352 size, shape = self._process_size_shape(size, r, c) 

5353 

5354 random_state = self._get_random_state(random_state) 

5355 meth = self._process_rvs_method(method, r, c, n) 

5356 

5357 return meth(r, c, n, size, random_state).reshape(shape) 

5358 

5359 @staticmethod 

5360 def _process_parameters(row, col): 

5361 """ 

5362 Check that row and column vectors are one-dimensional, that they do 

5363 not contain negative or non-integer entries, and that the sums over 

5364 both vectors are equal. 

5365 """ 

5366 r = np.array(row, dtype=np.int64, copy=True) 

5367 c = np.array(col, dtype=np.int64, copy=True) 

5368 

5369 if np.ndim(r) != 1: 

5370 raise ValueError("`row` must be one-dimensional") 

5371 if np.ndim(c) != 1: 

5372 raise ValueError("`col` must be one-dimensional") 

5373 

5374 if np.any(r < 0): 

5375 raise ValueError("each element of `row` must be non-negative") 

5376 if np.any(c < 0): 

5377 raise ValueError("each element of `col` must be non-negative") 

5378 

5379 n = np.sum(r) 

5380 if n != np.sum(c): 

5381 raise ValueError("sums over `row` and `col` must be equal") 

5382 

5383 if not np.all(r == np.asarray(row)): 

5384 raise ValueError("each element of `row` must be an integer") 

5385 if not np.all(c == np.asarray(col)): 

5386 raise ValueError("each element of `col` must be an integer") 

5387 

5388 return r, c, n 

5389 

5390 @staticmethod 

5391 def _process_size_shape(size, r, c): 

5392 """ 

5393 Compute the number of samples to be drawn and the shape of the output 

5394 """ 

5395 shape = (len(r), len(c)) 

5396 

5397 if size is None: 

5398 return 1, shape 

5399 

5400 size = np.atleast_1d(size) 

5401 if not np.issubdtype(size.dtype, np.integer) or np.any(size < 0): 

5402 raise ValueError("`size` must be a non-negative integer or `None`") 

5403 

5404 return np.prod(size), tuple(size) + shape 

5405 

5406 @classmethod 

5407 def _process_rvs_method(cls, method, r, c, n): 

5408 known_methods = { 

5409 None: cls._rvs_select(r, c, n), 

5410 "boyett": cls._rvs_boyett, 

5411 "patefield": cls._rvs_patefield, 

5412 } 

5413 try: 

5414 return known_methods[method] 

5415 except KeyError: 

5416 raise ValueError(f"'{method}' not recognized, " 

5417 f"must be one of {set(known_methods)}") 

5418 

5419 @classmethod 

5420 def _rvs_select(cls, r, c, n): 

5421 fac = 1.0 # benchmarks show that this value is about 1 

5422 k = len(r) * len(c) # number of cells 

5423 # n + 1 guards against failure if n == 0 

5424 if n > fac * np.log(n + 1) * k: 

5425 return cls._rvs_patefield 

5426 return cls._rvs_boyett 

5427 

5428 @staticmethod 

5429 def _rvs_boyett(row, col, ntot, size, random_state): 

5430 return _rcont.rvs_rcont1(row, col, ntot, size, random_state) 

5431 

5432 @staticmethod 

5433 def _rvs_patefield(row, col, ntot, size, random_state): 

5434 return _rcont.rvs_rcont2(row, col, ntot, size, random_state) 

5435 

5436 

5437random_table = random_table_gen() 

5438 

5439 

5440class random_table_frozen(multi_rv_frozen): 

5441 def __init__(self, row, col, *, seed=None): 

5442 self._dist = random_table_gen(seed) 

5443 self._params = self._dist._process_parameters(row, col) 

5444 

5445 # monkey patch self._dist 

5446 def _process_parameters(r, c): 

5447 return self._params 

5448 self._dist._process_parameters = _process_parameters 

5449 

5450 def logpmf(self, x): 

5451 return self._dist.logpmf(x, None, None) 

5452 

5453 def pmf(self, x): 

5454 return self._dist.pmf(x, None, None) 

5455 

5456 def mean(self): 

5457 return self._dist.mean(None, None) 

5458 

5459 def rvs(self, size=None, method=None, random_state=None): 

5460 # optimisations are possible here 

5461 return self._dist.rvs(None, None, size=size, method=method, 

5462 random_state=random_state) 

5463 

5464 

5465_ctab_doc_row_col = """\ 

5466row : array_like 

5467 Sum of table entries in each row. 

5468col : array_like 

5469 Sum of table entries in each column.""" 

5470 

5471_ctab_doc_x = """\ 

5472x : array-like 

5473 Two-dimensional table of non-negative integers, or a 

5474 multi-dimensional array with the last two dimensions 

5475 corresponding with the tables.""" 

5476 

5477_ctab_doc_row_col_note = """\ 

5478The row and column vectors must be one-dimensional, not empty, 

5479and each sum up to the same value. They cannot contain negative 

5480or noninteger entries.""" 

5481 

5482_ctab_doc_mean_params = f""" 

5483Parameters 

5484---------- 

5485{_ctab_doc_row_col}""" 

5486 

5487_ctab_doc_row_col_note_frozen = """\ 

5488See class definition for a detailed description of parameters.""" 

5489 

5490_ctab_docdict = { 

5491 "_doc_random_state": _doc_random_state, 

5492 "_doc_row_col": _ctab_doc_row_col, 

5493 "_doc_x": _ctab_doc_x, 

5494 "_doc_mean_params": _ctab_doc_mean_params, 

5495 "_doc_row_col_note": _ctab_doc_row_col_note, 

5496} 

5497 

5498_ctab_docdict_frozen = _ctab_docdict.copy() 

5499_ctab_docdict_frozen.update({ 

5500 "_doc_row_col": "", 

5501 "_doc_mean_params": "", 

5502 "_doc_row_col_note": _ctab_doc_row_col_note_frozen, 

5503}) 

5504 

5505 

5506def _docfill(obj, docdict, template=None): 

5507 obj.__doc__ = doccer.docformat(template or obj.__doc__, docdict) 

5508 

5509 

5510# Set frozen generator docstrings from corresponding docstrings in 

5511# random_table and fill in default strings in class docstrings 

5512_docfill(random_table_gen, _ctab_docdict) 

5513for name in ['logpmf', 'pmf', 'mean', 'rvs']: 

5514 method = random_table_gen.__dict__[name] 

5515 method_frozen = random_table_frozen.__dict__[name] 

5516 _docfill(method_frozen, _ctab_docdict_frozen, method.__doc__) 

5517 _docfill(method, _ctab_docdict) 

5518 

5519 

5520class uniform_direction_gen(multi_rv_generic): 

5521 r"""A vector-valued uniform direction. 

5522 

5523 Return a random direction (unit vector). The `dim` keyword specifies 

5524 the dimensionality of the space. 

5525 

5526 Methods 

5527 ------- 

5528 rvs(dim=None, size=1, random_state=None) 

5529 Draw random directions. 

5530 

5531 Parameters 

5532 ---------- 

5533 dim : scalar 

5534 Dimension of directions. 

5535 seed : {None, int, `numpy.random.Generator`, 

5536 `numpy.random.RandomState`}, optional 

5537 

5538 Used for drawing random variates. 

5539 If `seed` is `None`, the `~np.random.RandomState` singleton is used. 

5540 If `seed` is an int, a new ``RandomState`` instance is used, seeded 

5541 with seed. 

5542 If `seed` is already a ``RandomState`` or ``Generator`` instance, 

5543 then that object is used. 

5544 Default is `None`. 

5545 

5546 Notes 

5547 ----- 

5548 This distribution generates unit vectors uniformly distributed on 

5549 the surface of a hypersphere. These can be interpreted as random 

5550 directions. 

5551 For example, if `dim` is 3, 3D vectors from the surface of :math:`S^2` 

5552 will be sampled. 

5553 

5554 References 

5555 ---------- 

5556 .. [1] Marsaglia, G. (1972). "Choosing a Point from the Surface of a 

5557 Sphere". Annals of Mathematical Statistics. 43 (2): 645-646. 

5558 

5559 Examples 

5560 -------- 

5561 >>> import numpy as np 

5562 >>> from scipy.stats import uniform_direction 

5563 >>> x = uniform_direction.rvs(3) 

5564 >>> np.linalg.norm(x) 

5565 1. 

5566 

5567 This generates one random direction, a vector on the surface of 

5568 :math:`S^2`. 

5569 

5570 Alternatively, the object may be called (as a function) to return a frozen 

5571 distribution with fixed `dim` parameter. Here, 

5572 we create a `uniform_direction` with ``dim=3`` and draw 5 observations. 

5573 The samples are then arranged in an array of shape 5x3. 

5574 

5575 >>> rng = np.random.default_rng() 

5576 >>> uniform_sphere_dist = uniform_direction(3) 

5577 >>> unit_vectors = uniform_sphere_dist.rvs(5, random_state=rng) 

5578 >>> unit_vectors 

5579 array([[ 0.56688642, -0.1332634 , -0.81294566], 

5580 [-0.427126 , -0.74779278, 0.50830044], 

5581 [ 0.3793989 , 0.92346629, 0.05715323], 

5582 [ 0.36428383, -0.92449076, -0.11231259], 

5583 [-0.27733285, 0.94410968, -0.17816678]]) 

5584 """ 

5585 

5586 def __init__(self, seed=None): 

5587 super().__init__(seed) 

5588 self.__doc__ = doccer.docformat(self.__doc__) 

5589 

5590 def __call__(self, dim=None, seed=None): 

5591 """Create a frozen n-dimensional uniform direction distribution. 

5592 

5593 See `uniform_direction` for more information. 

5594 """ 

5595 return uniform_direction_frozen(dim, seed=seed) 

5596 

5597 def _process_parameters(self, dim): 

5598 """Dimension N must be specified; it cannot be inferred.""" 

5599 if dim is None or not np.isscalar(dim) or dim < 1 or dim != int(dim): 

5600 raise ValueError("Dimension of vector must be specified, " 

5601 "and must be an integer greater than 0.") 

5602 

5603 return int(dim) 

5604 

5605 def rvs(self, dim, size=None, random_state=None): 

5606 """Draw random samples from S(N-1). 

5607 

5608 Parameters 

5609 ---------- 

5610 dim : integer 

5611 Dimension of space (N). 

5612 size : int or tuple of ints, optional 

5613 Given a shape of, for example, (m,n,k), m*n*k samples are 

5614 generated, and packed in an m-by-n-by-k arrangement. 

5615 Because each sample is N-dimensional, the output shape 

5616 is (m,n,k,N). If no shape is specified, a single (N-D) 

5617 sample is returned. 

5618 random_state : {None, int, `numpy.random.Generator`, 

5619 `numpy.random.RandomState`}, optional 

5620 

5621 Pseudorandom number generator state used to generate resamples. 

5622 

5623 If `random_state` is ``None`` (or `np.random`), the 

5624 `numpy.random.RandomState` singleton is used. 

5625 If `random_state` is an int, a new ``RandomState`` instance is 

5626 used, seeded with `random_state`. 

5627 If `random_state` is already a ``Generator`` or ``RandomState`` 

5628 instance then that instance is used. 

5629 

5630 Returns 

5631 ------- 

5632 rvs : ndarray 

5633 Random direction vectors 

5634 

5635 """ 

5636 random_state = self._get_random_state(random_state) 

5637 if size is None: 

5638 size = np.array([], dtype=int) 

5639 size = np.atleast_1d(size) 

5640 

5641 dim = self._process_parameters(dim) 

5642 

5643 samples = _sample_uniform_direction(dim, size, random_state) 

5644 return samples 

5645 

5646 

5647uniform_direction = uniform_direction_gen() 

5648 

5649 

5650class uniform_direction_frozen(multi_rv_frozen): 

5651 def __init__(self, dim=None, seed=None): 

5652 """Create a frozen n-dimensional uniform direction distribution. 

5653 

5654 Parameters 

5655 ---------- 

5656 dim : int 

5657 Dimension of matrices 

5658 seed : {None, int, `numpy.random.Generator`, 

5659 `numpy.random.RandomState`}, optional 

5660 

5661 If `seed` is None (or `np.random`), the `numpy.random.RandomState` 

5662 singleton is used. 

5663 If `seed` is an int, a new ``RandomState`` instance is used, 

5664 seeded with `seed`. 

5665 If `seed` is already a ``Generator`` or ``RandomState`` instance 

5666 then that instance is used. 

5667 

5668 Examples 

5669 -------- 

5670 >>> from scipy.stats import uniform_direction 

5671 >>> x = uniform_direction(3) 

5672 >>> x.rvs() 

5673 

5674 """ 

5675 self._dist = uniform_direction_gen(seed) 

5676 self.dim = self._dist._process_parameters(dim) 

5677 

5678 def rvs(self, size=None, random_state=None): 

5679 return self._dist.rvs(self.dim, size, random_state) 

5680 

5681 

5682def _sample_uniform_direction(dim, size, random_state): 

5683 """ 

5684 Private method to generate uniform directions 

5685 Reference: Marsaglia, G. (1972). "Choosing a Point from the Surface of a 

5686 Sphere". Annals of Mathematical Statistics. 43 (2): 645-646. 

5687 """ 

5688 samples_shape = np.append(size, dim) 

5689 samples = random_state.standard_normal(samples_shape) 

5690 samples /= np.linalg.norm(samples, axis=-1, keepdims=True) 

5691 return samples