Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/scipy/stats/_discrete_distns.py: 33%

642 statements  

« prev     ^ index     » next       coverage.py v7.3.2, created at 2023-12-12 06:31 +0000

1# 

2# Author: Travis Oliphant 2002-2011 with contributions from 

3# SciPy Developers 2004-2011 

4# 

5from functools import partial 

6import warnings 

7 

8from scipy import special 

9from scipy.special import entr, logsumexp, betaln, gammaln as gamln, zeta 

10from scipy._lib._util import _lazywhere, rng_integers 

11from scipy.interpolate import interp1d 

12 

13from numpy import floor, ceil, log, exp, sqrt, log1p, expm1, tanh, cosh, sinh 

14 

15import numpy as np 

16 

17from ._distn_infrastructure import (rv_discrete, get_distribution_names, 

18 _check_shape, _ShapeInfo) 

19import scipy.stats._boost as _boost 

20from ._biasedurn import (_PyFishersNCHypergeometric, 

21 _PyWalleniusNCHypergeometric, 

22 _PyStochasticLib3) 

23 

24 

25def _isintegral(x): 

26 return x == np.round(x) 

27 

28 

29class binom_gen(rv_discrete): 

30 r"""A binomial discrete random variable. 

31 

32 %(before_notes)s 

33 

34 Notes 

35 ----- 

36 The probability mass function for `binom` is: 

37 

38 .. math:: 

39 

40 f(k) = \binom{n}{k} p^k (1-p)^{n-k} 

41 

42 for :math:`k \in \{0, 1, \dots, n\}`, :math:`0 \leq p \leq 1` 

43 

44 `binom` takes :math:`n` and :math:`p` as shape parameters, 

45 where :math:`p` is the probability of a single success 

46 and :math:`1-p` is the probability of a single failure. 

47 

48 %(after_notes)s 

49 

50 %(example)s 

51 

52 See Also 

53 -------- 

54 hypergeom, nbinom, nhypergeom 

55 

56 """ 

57 def _shape_info(self): 

58 return [_ShapeInfo("n", True, (0, np.inf), (True, False)), 

59 _ShapeInfo("p", False, (0, 1), (True, True))] 

60 

61 def _rvs(self, n, p, size=None, random_state=None): 

62 return random_state.binomial(n, p, size) 

63 

64 def _argcheck(self, n, p): 

65 return (n >= 0) & _isintegral(n) & (p >= 0) & (p <= 1) 

66 

67 def _get_support(self, n, p): 

68 return self.a, n 

69 

70 def _logpmf(self, x, n, p): 

71 k = floor(x) 

72 combiln = (gamln(n+1) - (gamln(k+1) + gamln(n-k+1))) 

73 return combiln + special.xlogy(k, p) + special.xlog1py(n-k, -p) 

74 

75 def _pmf(self, x, n, p): 

76 # binom.pmf(k) = choose(n, k) * p**k * (1-p)**(n-k) 

77 return _boost._binom_pdf(x, n, p) 

78 

79 def _cdf(self, x, n, p): 

80 k = floor(x) 

81 return _boost._binom_cdf(k, n, p) 

82 

83 def _sf(self, x, n, p): 

84 k = floor(x) 

85 return _boost._binom_sf(k, n, p) 

86 

87 def _isf(self, x, n, p): 

88 return _boost._binom_isf(x, n, p) 

89 

90 def _ppf(self, q, n, p): 

91 return _boost._binom_ppf(q, n, p) 

92 

93 def _stats(self, n, p, moments='mv'): 

94 mu = _boost._binom_mean(n, p) 

95 var = _boost._binom_variance(n, p) 

96 g1, g2 = None, None 

97 if 's' in moments: 

98 g1 = _boost._binom_skewness(n, p) 

99 if 'k' in moments: 

100 g2 = _boost._binom_kurtosis_excess(n, p) 

101 return mu, var, g1, g2 

102 

103 def _entropy(self, n, p): 

104 k = np.r_[0:n + 1] 

105 vals = self._pmf(k, n, p) 

106 return np.sum(entr(vals), axis=0) 

107 

108 

109binom = binom_gen(name='binom') 

110 

111 

112class bernoulli_gen(binom_gen): 

113 r"""A Bernoulli discrete random variable. 

114 

115 %(before_notes)s 

116 

117 Notes 

118 ----- 

119 The probability mass function for `bernoulli` is: 

120 

121 .. math:: 

122 

123 f(k) = \begin{cases}1-p &\text{if } k = 0\\ 

124 p &\text{if } k = 1\end{cases} 

125 

126 for :math:`k` in :math:`\{0, 1\}`, :math:`0 \leq p \leq 1` 

127 

128 `bernoulli` takes :math:`p` as shape parameter, 

129 where :math:`p` is the probability of a single success 

130 and :math:`1-p` is the probability of a single failure. 

131 

132 %(after_notes)s 

133 

134 %(example)s 

135 

136 """ 

137 def _shape_info(self): 

138 return [_ShapeInfo("p", False, (0, 1), (True, True))] 

139 

140 def _rvs(self, p, size=None, random_state=None): 

141 return binom_gen._rvs(self, 1, p, size=size, random_state=random_state) 

142 

143 def _argcheck(self, p): 

144 return (p >= 0) & (p <= 1) 

145 

146 def _get_support(self, p): 

147 # Overrides binom_gen._get_support!x 

148 return self.a, self.b 

149 

150 def _logpmf(self, x, p): 

151 return binom._logpmf(x, 1, p) 

152 

153 def _pmf(self, x, p): 

154 # bernoulli.pmf(k) = 1-p if k = 0 

155 # = p if k = 1 

156 return binom._pmf(x, 1, p) 

157 

158 def _cdf(self, x, p): 

159 return binom._cdf(x, 1, p) 

160 

161 def _sf(self, x, p): 

162 return binom._sf(x, 1, p) 

163 

164 def _isf(self, x, p): 

165 return binom._isf(x, 1, p) 

166 

167 def _ppf(self, q, p): 

168 return binom._ppf(q, 1, p) 

169 

170 def _stats(self, p): 

171 return binom._stats(1, p) 

172 

173 def _entropy(self, p): 

174 return entr(p) + entr(1-p) 

175 

176 

177bernoulli = bernoulli_gen(b=1, name='bernoulli') 

178 

179 

180class betabinom_gen(rv_discrete): 

181 r"""A beta-binomial discrete random variable. 

182 

183 %(before_notes)s 

184 

185 Notes 

186 ----- 

187 The beta-binomial distribution is a binomial distribution with a 

188 probability of success `p` that follows a beta distribution. 

189 

190 The probability mass function for `betabinom` is: 

191 

192 .. math:: 

193 

194 f(k) = \binom{n}{k} \frac{B(k + a, n - k + b)}{B(a, b)} 

195 

196 for :math:`k \in \{0, 1, \dots, n\}`, :math:`n \geq 0`, :math:`a > 0`, 

197 :math:`b > 0`, where :math:`B(a, b)` is the beta function. 

198 

199 `betabinom` takes :math:`n`, :math:`a`, and :math:`b` as shape parameters. 

200 

201 References 

202 ---------- 

203 .. [1] https://en.wikipedia.org/wiki/Beta-binomial_distribution 

204 

205 %(after_notes)s 

206 

207 .. versionadded:: 1.4.0 

208 

209 See Also 

210 -------- 

211 beta, binom 

212 

213 %(example)s 

214 

215 """ 

216 def _shape_info(self): 

217 return [_ShapeInfo("n", True, (0, np.inf), (True, False)), 

218 _ShapeInfo("a", False, (0, np.inf), (False, False)), 

219 _ShapeInfo("b", False, (0, np.inf), (False, False))] 

220 

221 def _rvs(self, n, a, b, size=None, random_state=None): 

222 p = random_state.beta(a, b, size) 

223 return random_state.binomial(n, p, size) 

224 

225 def _get_support(self, n, a, b): 

226 return 0, n 

227 

228 def _argcheck(self, n, a, b): 

229 return (n >= 0) & _isintegral(n) & (a > 0) & (b > 0) 

230 

231 def _logpmf(self, x, n, a, b): 

232 k = floor(x) 

233 combiln = -log(n + 1) - betaln(n - k + 1, k + 1) 

234 return combiln + betaln(k + a, n - k + b) - betaln(a, b) 

235 

236 def _pmf(self, x, n, a, b): 

237 return exp(self._logpmf(x, n, a, b)) 

238 

239 def _stats(self, n, a, b, moments='mv'): 

240 e_p = a / (a + b) 

241 e_q = 1 - e_p 

242 mu = n * e_p 

243 var = n * (a + b + n) * e_p * e_q / (a + b + 1) 

244 g1, g2 = None, None 

245 if 's' in moments: 

246 g1 = 1.0 / sqrt(var) 

247 g1 *= (a + b + 2 * n) * (b - a) 

248 g1 /= (a + b + 2) * (a + b) 

249 if 'k' in moments: 

250 g2 = a + b 

251 g2 *= (a + b - 1 + 6 * n) 

252 g2 += 3 * a * b * (n - 2) 

253 g2 += 6 * n ** 2 

254 g2 -= 3 * e_p * b * n * (6 - n) 

255 g2 -= 18 * e_p * e_q * n ** 2 

256 g2 *= (a + b) ** 2 * (1 + a + b) 

257 g2 /= (n * a * b * (a + b + 2) * (a + b + 3) * (a + b + n)) 

258 g2 -= 3 

259 return mu, var, g1, g2 

260 

261 

262betabinom = betabinom_gen(name='betabinom') 

263 

264 

265class nbinom_gen(rv_discrete): 

266 r"""A negative binomial discrete random variable. 

267 

268 %(before_notes)s 

269 

270 Notes 

271 ----- 

272 Negative binomial distribution describes a sequence of i.i.d. Bernoulli 

273 trials, repeated until a predefined, non-random number of successes occurs. 

274 

275 The probability mass function of the number of failures for `nbinom` is: 

276 

277 .. math:: 

278 

279 f(k) = \binom{k+n-1}{n-1} p^n (1-p)^k 

280 

281 for :math:`k \ge 0`, :math:`0 < p \leq 1` 

282 

283 `nbinom` takes :math:`n` and :math:`p` as shape parameters where :math:`n` 

284 is the number of successes, :math:`p` is the probability of a single 

285 success, and :math:`1-p` is the probability of a single failure. 

286 

287 Another common parameterization of the negative binomial distribution is 

288 in terms of the mean number of failures :math:`\mu` to achieve :math:`n` 

289 successes. The mean :math:`\mu` is related to the probability of success 

290 as 

291 

292 .. math:: 

293 

294 p = \frac{n}{n + \mu} 

295 

296 The number of successes :math:`n` may also be specified in terms of a 

297 "dispersion", "heterogeneity", or "aggregation" parameter :math:`\alpha`, 

298 which relates the mean :math:`\mu` to the variance :math:`\sigma^2`, 

299 e.g. :math:`\sigma^2 = \mu + \alpha \mu^2`. Regardless of the convention 

300 used for :math:`\alpha`, 

301 

302 .. math:: 

303 

304 p &= \frac{\mu}{\sigma^2} \\ 

305 n &= \frac{\mu^2}{\sigma^2 - \mu} 

306 

307 %(after_notes)s 

308 

309 %(example)s 

310 

311 See Also 

312 -------- 

313 hypergeom, binom, nhypergeom 

314 

315 """ 

316 def _shape_info(self): 

317 return [_ShapeInfo("n", True, (0, np.inf), (True, False)), 

318 _ShapeInfo("p", False, (0, 1), (True, True))] 

319 

320 def _rvs(self, n, p, size=None, random_state=None): 

321 return random_state.negative_binomial(n, p, size) 

322 

323 def _argcheck(self, n, p): 

324 return (n > 0) & (p > 0) & (p <= 1) 

325 

326 def _pmf(self, x, n, p): 

327 # nbinom.pmf(k) = choose(k+n-1, n-1) * p**n * (1-p)**k 

328 return _boost._nbinom_pdf(x, n, p) 

329 

330 def _logpmf(self, x, n, p): 

331 coeff = gamln(n+x) - gamln(x+1) - gamln(n) 

332 return coeff + n*log(p) + special.xlog1py(x, -p) 

333 

334 def _cdf(self, x, n, p): 

335 k = floor(x) 

336 return _boost._nbinom_cdf(k, n, p) 

337 

338 def _logcdf(self, x, n, p): 

339 k = floor(x) 

340 cdf = self._cdf(k, n, p) 

341 cond = cdf > 0.5 

342 

343 def f1(k, n, p): 

344 return np.log1p(-special.betainc(k + 1, n, 1 - p)) 

345 

346 # do calc in place 

347 logcdf = cdf 

348 with np.errstate(divide='ignore'): 

349 logcdf[cond] = f1(k[cond], n[cond], p[cond]) 

350 logcdf[~cond] = np.log(cdf[~cond]) 

351 return logcdf 

352 

353 def _sf(self, x, n, p): 

354 k = floor(x) 

355 return _boost._nbinom_sf(k, n, p) 

356 

357 def _isf(self, x, n, p): 

358 with warnings.catch_warnings(): 

359 # See gh-14901 

360 message = "overflow encountered in _nbinom_isf" 

361 warnings.filterwarnings('ignore', message=message) 

362 return _boost._nbinom_isf(x, n, p) 

363 

364 def _ppf(self, q, n, p): 

365 with warnings.catch_warnings(): 

366 message = "overflow encountered in _nbinom_ppf" 

367 warnings.filterwarnings('ignore', message=message) 

368 return _boost._nbinom_ppf(q, n, p) 

369 

370 def _stats(self, n, p): 

371 return ( 

372 _boost._nbinom_mean(n, p), 

373 _boost._nbinom_variance(n, p), 

374 _boost._nbinom_skewness(n, p), 

375 _boost._nbinom_kurtosis_excess(n, p), 

376 ) 

377 

378 

379nbinom = nbinom_gen(name='nbinom') 

380 

381 

382class geom_gen(rv_discrete): 

383 r"""A geometric discrete random variable. 

384 

385 %(before_notes)s 

386 

387 Notes 

388 ----- 

389 The probability mass function for `geom` is: 

390 

391 .. math:: 

392 

393 f(k) = (1-p)^{k-1} p 

394 

395 for :math:`k \ge 1`, :math:`0 < p \leq 1` 

396 

397 `geom` takes :math:`p` as shape parameter, 

398 where :math:`p` is the probability of a single success 

399 and :math:`1-p` is the probability of a single failure. 

400 

401 %(after_notes)s 

402 

403 See Also 

404 -------- 

405 planck 

406 

407 %(example)s 

408 

409 """ 

410 

411 def _shape_info(self): 

412 return [_ShapeInfo("p", False, (0, 1), (True, True))] 

413 

414 def _rvs(self, p, size=None, random_state=None): 

415 return random_state.geometric(p, size=size) 

416 

417 def _argcheck(self, p): 

418 return (p <= 1) & (p > 0) 

419 

420 def _pmf(self, k, p): 

421 return np.power(1-p, k-1) * p 

422 

423 def _logpmf(self, k, p): 

424 return special.xlog1py(k - 1, -p) + log(p) 

425 

426 def _cdf(self, x, p): 

427 k = floor(x) 

428 return -expm1(log1p(-p)*k) 

429 

430 def _sf(self, x, p): 

431 return np.exp(self._logsf(x, p)) 

432 

433 def _logsf(self, x, p): 

434 k = floor(x) 

435 return k*log1p(-p) 

436 

437 def _ppf(self, q, p): 

438 vals = ceil(log1p(-q) / log1p(-p)) 

439 temp = self._cdf(vals-1, p) 

440 return np.where((temp >= q) & (vals > 0), vals-1, vals) 

441 

442 def _stats(self, p): 

443 mu = 1.0/p 

444 qr = 1.0-p 

445 var = qr / p / p 

446 g1 = (2.0-p) / sqrt(qr) 

447 g2 = np.polyval([1, -6, 6], p)/(1.0-p) 

448 return mu, var, g1, g2 

449 

450 

451geom = geom_gen(a=1, name='geom', longname="A geometric") 

452 

453 

454class hypergeom_gen(rv_discrete): 

455 r"""A hypergeometric discrete random variable. 

456 

457 The hypergeometric distribution models drawing objects from a bin. 

458 `M` is the total number of objects, `n` is total number of Type I objects. 

459 The random variate represents the number of Type I objects in `N` drawn 

460 without replacement from the total population. 

461 

462 %(before_notes)s 

463 

464 Notes 

465 ----- 

466 The symbols used to denote the shape parameters (`M`, `n`, and `N`) are not 

467 universally accepted. See the Examples for a clarification of the 

468 definitions used here. 

469 

470 The probability mass function is defined as, 

471 

472 .. math:: p(k, M, n, N) = \frac{\binom{n}{k} \binom{M - n}{N - k}} 

473 {\binom{M}{N}} 

474 

475 for :math:`k \in [\max(0, N - M + n), \min(n, N)]`, where the binomial 

476 coefficients are defined as, 

477 

478 .. math:: \binom{n}{k} \equiv \frac{n!}{k! (n - k)!}. 

479 

480 %(after_notes)s 

481 

482 Examples 

483 -------- 

484 >>> import numpy as np 

485 >>> from scipy.stats import hypergeom 

486 >>> import matplotlib.pyplot as plt 

487 

488 Suppose we have a collection of 20 animals, of which 7 are dogs. Then if 

489 we want to know the probability of finding a given number of dogs if we 

490 choose at random 12 of the 20 animals, we can initialize a frozen 

491 distribution and plot the probability mass function: 

492 

493 >>> [M, n, N] = [20, 7, 12] 

494 >>> rv = hypergeom(M, n, N) 

495 >>> x = np.arange(0, n+1) 

496 >>> pmf_dogs = rv.pmf(x) 

497 

498 >>> fig = plt.figure() 

499 >>> ax = fig.add_subplot(111) 

500 >>> ax.plot(x, pmf_dogs, 'bo') 

501 >>> ax.vlines(x, 0, pmf_dogs, lw=2) 

502 >>> ax.set_xlabel('# of dogs in our group of chosen animals') 

503 >>> ax.set_ylabel('hypergeom PMF') 

504 >>> plt.show() 

505 

506 Instead of using a frozen distribution we can also use `hypergeom` 

507 methods directly. To for example obtain the cumulative distribution 

508 function, use: 

509 

510 >>> prb = hypergeom.cdf(x, M, n, N) 

511 

512 And to generate random numbers: 

513 

514 >>> R = hypergeom.rvs(M, n, N, size=10) 

515 

516 See Also 

517 -------- 

518 nhypergeom, binom, nbinom 

519 

520 """ 

521 def _shape_info(self): 

522 return [_ShapeInfo("M", True, (0, np.inf), (True, False)), 

523 _ShapeInfo("n", True, (0, np.inf), (True, False)), 

524 _ShapeInfo("N", True, (0, np.inf), (True, False))] 

525 

526 def _rvs(self, M, n, N, size=None, random_state=None): 

527 return random_state.hypergeometric(n, M-n, N, size=size) 

528 

529 def _get_support(self, M, n, N): 

530 return np.maximum(N-(M-n), 0), np.minimum(n, N) 

531 

532 def _argcheck(self, M, n, N): 

533 cond = (M > 0) & (n >= 0) & (N >= 0) 

534 cond &= (n <= M) & (N <= M) 

535 cond &= _isintegral(M) & _isintegral(n) & _isintegral(N) 

536 return cond 

537 

538 def _logpmf(self, k, M, n, N): 

539 tot, good = M, n 

540 bad = tot - good 

541 result = (betaln(good+1, 1) + betaln(bad+1, 1) + betaln(tot-N+1, N+1) - 

542 betaln(k+1, good-k+1) - betaln(N-k+1, bad-N+k+1) - 

543 betaln(tot+1, 1)) 

544 return result 

545 

546 def _pmf(self, k, M, n, N): 

547 return _boost._hypergeom_pdf(k, n, N, M) 

548 

549 def _cdf(self, k, M, n, N): 

550 return _boost._hypergeom_cdf(k, n, N, M) 

551 

552 def _stats(self, M, n, N): 

553 M, n, N = 1. * M, 1. * n, 1. * N 

554 m = M - n 

555 

556 # Boost kurtosis_excess doesn't return the same as the value 

557 # computed here. 

558 g2 = M * (M + 1) - 6. * N * (M - N) - 6. * n * m 

559 g2 *= (M - 1) * M * M 

560 g2 += 6. * n * N * (M - N) * m * (5. * M - 6) 

561 g2 /= n * N * (M - N) * m * (M - 2.) * (M - 3.) 

562 return ( 

563 _boost._hypergeom_mean(n, N, M), 

564 _boost._hypergeom_variance(n, N, M), 

565 _boost._hypergeom_skewness(n, N, M), 

566 g2, 

567 ) 

568 

569 def _entropy(self, M, n, N): 

570 k = np.r_[N - (M - n):min(n, N) + 1] 

571 vals = self.pmf(k, M, n, N) 

572 return np.sum(entr(vals), axis=0) 

573 

574 def _sf(self, k, M, n, N): 

575 return _boost._hypergeom_sf(k, n, N, M) 

576 

577 def _logsf(self, k, M, n, N): 

578 res = [] 

579 for quant, tot, good, draw in zip(*np.broadcast_arrays(k, M, n, N)): 

580 if (quant + 0.5) * (tot + 0.5) < (good - 0.5) * (draw - 0.5): 

581 # Less terms to sum if we calculate log(1-cdf) 

582 res.append(log1p(-exp(self.logcdf(quant, tot, good, draw)))) 

583 else: 

584 # Integration over probability mass function using logsumexp 

585 k2 = np.arange(quant + 1, draw + 1) 

586 res.append(logsumexp(self._logpmf(k2, tot, good, draw))) 

587 return np.asarray(res) 

588 

589 def _logcdf(self, k, M, n, N): 

590 res = [] 

591 for quant, tot, good, draw in zip(*np.broadcast_arrays(k, M, n, N)): 

592 if (quant + 0.5) * (tot + 0.5) > (good - 0.5) * (draw - 0.5): 

593 # Less terms to sum if we calculate log(1-sf) 

594 res.append(log1p(-exp(self.logsf(quant, tot, good, draw)))) 

595 else: 

596 # Integration over probability mass function using logsumexp 

597 k2 = np.arange(0, quant + 1) 

598 res.append(logsumexp(self._logpmf(k2, tot, good, draw))) 

599 return np.asarray(res) 

600 

601 

602hypergeom = hypergeom_gen(name='hypergeom') 

603 

604 

605class nhypergeom_gen(rv_discrete): 

606 r"""A negative hypergeometric discrete random variable. 

607 

608 Consider a box containing :math:`M` balls:, :math:`n` red and 

609 :math:`M-n` blue. We randomly sample balls from the box, one 

610 at a time and *without* replacement, until we have picked :math:`r` 

611 blue balls. `nhypergeom` is the distribution of the number of 

612 red balls :math:`k` we have picked. 

613 

614 %(before_notes)s 

615 

616 Notes 

617 ----- 

618 The symbols used to denote the shape parameters (`M`, `n`, and `r`) are not 

619 universally accepted. See the Examples for a clarification of the 

620 definitions used here. 

621 

622 The probability mass function is defined as, 

623 

624 .. math:: f(k; M, n, r) = \frac{{{k+r-1}\choose{k}}{{M-r-k}\choose{n-k}}} 

625 {{M \choose n}} 

626 

627 for :math:`k \in [0, n]`, :math:`n \in [0, M]`, :math:`r \in [0, M-n]`, 

628 and the binomial coefficient is: 

629 

630 .. math:: \binom{n}{k} \equiv \frac{n!}{k! (n - k)!}. 

631 

632 It is equivalent to observing :math:`k` successes in :math:`k+r-1` 

633 samples with :math:`k+r`'th sample being a failure. The former 

634 can be modelled as a hypergeometric distribution. The probability 

635 of the latter is simply the number of failures remaining 

636 :math:`M-n-(r-1)` divided by the size of the remaining population 

637 :math:`M-(k+r-1)`. This relationship can be shown as: 

638 

639 .. math:: NHG(k;M,n,r) = HG(k;M,n,k+r-1)\frac{(M-n-(r-1))}{(M-(k+r-1))} 

640 

641 where :math:`NHG` is probability mass function (PMF) of the 

642 negative hypergeometric distribution and :math:`HG` is the 

643 PMF of the hypergeometric distribution. 

644 

645 %(after_notes)s 

646 

647 Examples 

648 -------- 

649 >>> import numpy as np 

650 >>> from scipy.stats import nhypergeom 

651 >>> import matplotlib.pyplot as plt 

652 

653 Suppose we have a collection of 20 animals, of which 7 are dogs. 

654 Then if we want to know the probability of finding a given number 

655 of dogs (successes) in a sample with exactly 12 animals that 

656 aren't dogs (failures), we can initialize a frozen distribution 

657 and plot the probability mass function: 

658 

659 >>> M, n, r = [20, 7, 12] 

660 >>> rv = nhypergeom(M, n, r) 

661 >>> x = np.arange(0, n+2) 

662 >>> pmf_dogs = rv.pmf(x) 

663 

664 >>> fig = plt.figure() 

665 >>> ax = fig.add_subplot(111) 

666 >>> ax.plot(x, pmf_dogs, 'bo') 

667 >>> ax.vlines(x, 0, pmf_dogs, lw=2) 

668 >>> ax.set_xlabel('# of dogs in our group with given 12 failures') 

669 >>> ax.set_ylabel('nhypergeom PMF') 

670 >>> plt.show() 

671 

672 Instead of using a frozen distribution we can also use `nhypergeom` 

673 methods directly. To for example obtain the probability mass 

674 function, use: 

675 

676 >>> prb = nhypergeom.pmf(x, M, n, r) 

677 

678 And to generate random numbers: 

679 

680 >>> R = nhypergeom.rvs(M, n, r, size=10) 

681 

682 To verify the relationship between `hypergeom` and `nhypergeom`, use: 

683 

684 >>> from scipy.stats import hypergeom, nhypergeom 

685 >>> M, n, r = 45, 13, 8 

686 >>> k = 6 

687 >>> nhypergeom.pmf(k, M, n, r) 

688 0.06180776620271643 

689 >>> hypergeom.pmf(k, M, n, k+r-1) * (M - n - (r-1)) / (M - (k+r-1)) 

690 0.06180776620271644 

691 

692 See Also 

693 -------- 

694 hypergeom, binom, nbinom 

695 

696 References 

697 ---------- 

698 .. [1] Negative Hypergeometric Distribution on Wikipedia 

699 https://en.wikipedia.org/wiki/Negative_hypergeometric_distribution 

700 

701 .. [2] Negative Hypergeometric Distribution from 

702 http://www.math.wm.edu/~leemis/chart/UDR/PDFs/Negativehypergeometric.pdf 

703 

704 """ 

705 

706 def _shape_info(self): 

707 return [_ShapeInfo("M", True, (0, np.inf), (True, False)), 

708 _ShapeInfo("n", True, (0, np.inf), (True, False)), 

709 _ShapeInfo("r", True, (0, np.inf), (True, False))] 

710 

711 def _get_support(self, M, n, r): 

712 return 0, n 

713 

714 def _argcheck(self, M, n, r): 

715 cond = (n >= 0) & (n <= M) & (r >= 0) & (r <= M-n) 

716 cond &= _isintegral(M) & _isintegral(n) & _isintegral(r) 

717 return cond 

718 

719 def _rvs(self, M, n, r, size=None, random_state=None): 

720 

721 @_vectorize_rvs_over_shapes 

722 def _rvs1(M, n, r, size, random_state): 

723 # invert cdf by calculating all values in support, scalar M, n, r 

724 a, b = self.support(M, n, r) 

725 ks = np.arange(a, b+1) 

726 cdf = self.cdf(ks, M, n, r) 

727 ppf = interp1d(cdf, ks, kind='next', fill_value='extrapolate') 

728 rvs = ppf(random_state.uniform(size=size)).astype(int) 

729 if size is None: 

730 return rvs.item() 

731 return rvs 

732 

733 return _rvs1(M, n, r, size=size, random_state=random_state) 

734 

735 def _logpmf(self, k, M, n, r): 

736 cond = ((r == 0) & (k == 0)) 

737 result = _lazywhere(~cond, (k, M, n, r), 

738 lambda k, M, n, r: 

739 (-betaln(k+1, r) + betaln(k+r, 1) - 

740 betaln(n-k+1, M-r-n+1) + betaln(M-r-k+1, 1) + 

741 betaln(n+1, M-n+1) - betaln(M+1, 1)), 

742 fillvalue=0.0) 

743 return result 

744 

745 def _pmf(self, k, M, n, r): 

746 # same as the following but numerically more precise 

747 # return comb(k+r-1, k) * comb(M-r-k, n-k) / comb(M, n) 

748 return exp(self._logpmf(k, M, n, r)) 

749 

750 def _stats(self, M, n, r): 

751 # Promote the datatype to at least float 

752 # mu = rn / (M-n+1) 

753 M, n, r = 1.*M, 1.*n, 1.*r 

754 mu = r*n / (M-n+1) 

755 

756 var = r*(M+1)*n / ((M-n+1)*(M-n+2)) * (1 - r / (M-n+1)) 

757 

758 # The skew and kurtosis are mathematically 

759 # intractable so return `None`. See [2]_. 

760 g1, g2 = None, None 

761 return mu, var, g1, g2 

762 

763 

764nhypergeom = nhypergeom_gen(name='nhypergeom') 

765 

766 

767# FIXME: Fails _cdfvec 

768class logser_gen(rv_discrete): 

769 r"""A Logarithmic (Log-Series, Series) discrete random variable. 

770 

771 %(before_notes)s 

772 

773 Notes 

774 ----- 

775 The probability mass function for `logser` is: 

776 

777 .. math:: 

778 

779 f(k) = - \frac{p^k}{k \log(1-p)} 

780 

781 for :math:`k \ge 1`, :math:`0 < p < 1` 

782 

783 `logser` takes :math:`p` as shape parameter, 

784 where :math:`p` is the probability of a single success 

785 and :math:`1-p` is the probability of a single failure. 

786 

787 %(after_notes)s 

788 

789 %(example)s 

790 

791 """ 

792 

793 def _shape_info(self): 

794 return [_ShapeInfo("p", False, (0, 1), (True, True))] 

795 

796 def _rvs(self, p, size=None, random_state=None): 

797 # looks wrong for p>0.5, too few k=1 

798 # trying to use generic is worse, no k=1 at all 

799 return random_state.logseries(p, size=size) 

800 

801 def _argcheck(self, p): 

802 return (p > 0) & (p < 1) 

803 

804 def _pmf(self, k, p): 

805 # logser.pmf(k) = - p**k / (k*log(1-p)) 

806 return -np.power(p, k) * 1.0 / k / special.log1p(-p) 

807 

808 def _stats(self, p): 

809 r = special.log1p(-p) 

810 mu = p / (p - 1.0) / r 

811 mu2p = -p / r / (p - 1.0)**2 

812 var = mu2p - mu*mu 

813 mu3p = -p / r * (1.0+p) / (1.0 - p)**3 

814 mu3 = mu3p - 3*mu*mu2p + 2*mu**3 

815 g1 = mu3 / np.power(var, 1.5) 

816 

817 mu4p = -p / r * ( 

818 1.0 / (p-1)**2 - 6*p / (p - 1)**3 + 6*p*p / (p-1)**4) 

819 mu4 = mu4p - 4*mu3p*mu + 6*mu2p*mu*mu - 3*mu**4 

820 g2 = mu4 / var**2 - 3.0 

821 return mu, var, g1, g2 

822 

823 

824logser = logser_gen(a=1, name='logser', longname='A logarithmic') 

825 

826 

827class poisson_gen(rv_discrete): 

828 r"""A Poisson discrete random variable. 

829 

830 %(before_notes)s 

831 

832 Notes 

833 ----- 

834 The probability mass function for `poisson` is: 

835 

836 .. math:: 

837 

838 f(k) = \exp(-\mu) \frac{\mu^k}{k!} 

839 

840 for :math:`k \ge 0`. 

841 

842 `poisson` takes :math:`\mu \geq 0` as shape parameter. 

843 When :math:`\mu = 0`, the ``pmf`` method 

844 returns ``1.0`` at quantile :math:`k = 0`. 

845 

846 %(after_notes)s 

847 

848 %(example)s 

849 

850 """ 

851 

852 def _shape_info(self): 

853 return [_ShapeInfo("mu", False, (0, np.inf), (True, False))] 

854 

855 # Override rv_discrete._argcheck to allow mu=0. 

856 def _argcheck(self, mu): 

857 return mu >= 0 

858 

859 def _rvs(self, mu, size=None, random_state=None): 

860 return random_state.poisson(mu, size) 

861 

862 def _logpmf(self, k, mu): 

863 Pk = special.xlogy(k, mu) - gamln(k + 1) - mu 

864 return Pk 

865 

866 def _pmf(self, k, mu): 

867 # poisson.pmf(k) = exp(-mu) * mu**k / k! 

868 return exp(self._logpmf(k, mu)) 

869 

870 def _cdf(self, x, mu): 

871 k = floor(x) 

872 return special.pdtr(k, mu) 

873 

874 def _sf(self, x, mu): 

875 k = floor(x) 

876 return special.pdtrc(k, mu) 

877 

878 def _ppf(self, q, mu): 

879 vals = ceil(special.pdtrik(q, mu)) 

880 vals1 = np.maximum(vals - 1, 0) 

881 temp = special.pdtr(vals1, mu) 

882 return np.where(temp >= q, vals1, vals) 

883 

884 def _stats(self, mu): 

885 var = mu 

886 tmp = np.asarray(mu) 

887 mu_nonzero = tmp > 0 

888 g1 = _lazywhere(mu_nonzero, (tmp,), lambda x: sqrt(1.0/x), np.inf) 

889 g2 = _lazywhere(mu_nonzero, (tmp,), lambda x: 1.0/x, np.inf) 

890 return mu, var, g1, g2 

891 

892 

893poisson = poisson_gen(name="poisson", longname='A Poisson') 

894 

895 

896class planck_gen(rv_discrete): 

897 r"""A Planck discrete exponential random variable. 

898 

899 %(before_notes)s 

900 

901 Notes 

902 ----- 

903 The probability mass function for `planck` is: 

904 

905 .. math:: 

906 

907 f(k) = (1-\exp(-\lambda)) \exp(-\lambda k) 

908 

909 for :math:`k \ge 0` and :math:`\lambda > 0`. 

910 

911 `planck` takes :math:`\lambda` as shape parameter. The Planck distribution 

912 can be written as a geometric distribution (`geom`) with 

913 :math:`p = 1 - \exp(-\lambda)` shifted by ``loc = -1``. 

914 

915 %(after_notes)s 

916 

917 See Also 

918 -------- 

919 geom 

920 

921 %(example)s 

922 

923 """ 

924 def _shape_info(self): 

925 return [_ShapeInfo("lambda", False, (0, np.inf), (False, False))] 

926 

927 def _argcheck(self, lambda_): 

928 return lambda_ > 0 

929 

930 def _pmf(self, k, lambda_): 

931 return -expm1(-lambda_)*exp(-lambda_*k) 

932 

933 def _cdf(self, x, lambda_): 

934 k = floor(x) 

935 return -expm1(-lambda_*(k+1)) 

936 

937 def _sf(self, x, lambda_): 

938 return exp(self._logsf(x, lambda_)) 

939 

940 def _logsf(self, x, lambda_): 

941 k = floor(x) 

942 return -lambda_*(k+1) 

943 

944 def _ppf(self, q, lambda_): 

945 vals = ceil(-1.0/lambda_ * log1p(-q)-1) 

946 vals1 = (vals-1).clip(*(self._get_support(lambda_))) 

947 temp = self._cdf(vals1, lambda_) 

948 return np.where(temp >= q, vals1, vals) 

949 

950 def _rvs(self, lambda_, size=None, random_state=None): 

951 # use relation to geometric distribution for sampling 

952 p = -expm1(-lambda_) 

953 return random_state.geometric(p, size=size) - 1.0 

954 

955 def _stats(self, lambda_): 

956 mu = 1/expm1(lambda_) 

957 var = exp(-lambda_)/(expm1(-lambda_))**2 

958 g1 = 2*cosh(lambda_/2.0) 

959 g2 = 4+2*cosh(lambda_) 

960 return mu, var, g1, g2 

961 

962 def _entropy(self, lambda_): 

963 C = -expm1(-lambda_) 

964 return lambda_*exp(-lambda_)/C - log(C) 

965 

966 

967planck = planck_gen(a=0, name='planck', longname='A discrete exponential ') 

968 

969 

970class boltzmann_gen(rv_discrete): 

971 r"""A Boltzmann (Truncated Discrete Exponential) random variable. 

972 

973 %(before_notes)s 

974 

975 Notes 

976 ----- 

977 The probability mass function for `boltzmann` is: 

978 

979 .. math:: 

980 

981 f(k) = (1-\exp(-\lambda)) \exp(-\lambda k) / (1-\exp(-\lambda N)) 

982 

983 for :math:`k = 0,..., N-1`. 

984 

985 `boltzmann` takes :math:`\lambda > 0` and :math:`N > 0` as shape parameters. 

986 

987 %(after_notes)s 

988 

989 %(example)s 

990 

991 """ 

992 def _shape_info(self): 

993 return [_ShapeInfo("lambda_", False, (0, np.inf), (False, False)), 

994 _ShapeInfo("N", True, (0, np.inf), (False, False))] 

995 

996 def _argcheck(self, lambda_, N): 

997 return (lambda_ > 0) & (N > 0) & _isintegral(N) 

998 

999 def _get_support(self, lambda_, N): 

1000 return self.a, N - 1 

1001 

1002 def _pmf(self, k, lambda_, N): 

1003 # boltzmann.pmf(k) = 

1004 # (1-exp(-lambda_)*exp(-lambda_*k)/(1-exp(-lambda_*N)) 

1005 fact = (1-exp(-lambda_))/(1-exp(-lambda_*N)) 

1006 return fact*exp(-lambda_*k) 

1007 

1008 def _cdf(self, x, lambda_, N): 

1009 k = floor(x) 

1010 return (1-exp(-lambda_*(k+1)))/(1-exp(-lambda_*N)) 

1011 

1012 def _ppf(self, q, lambda_, N): 

1013 qnew = q*(1-exp(-lambda_*N)) 

1014 vals = ceil(-1.0/lambda_ * log(1-qnew)-1) 

1015 vals1 = (vals-1).clip(0.0, np.inf) 

1016 temp = self._cdf(vals1, lambda_, N) 

1017 return np.where(temp >= q, vals1, vals) 

1018 

1019 def _stats(self, lambda_, N): 

1020 z = exp(-lambda_) 

1021 zN = exp(-lambda_*N) 

1022 mu = z/(1.0-z)-N*zN/(1-zN) 

1023 var = z/(1.0-z)**2 - N*N*zN/(1-zN)**2 

1024 trm = (1-zN)/(1-z) 

1025 trm2 = (z*trm**2 - N*N*zN) 

1026 g1 = z*(1+z)*trm**3 - N**3*zN*(1+zN) 

1027 g1 = g1 / trm2**(1.5) 

1028 g2 = z*(1+4*z+z*z)*trm**4 - N**4 * zN*(1+4*zN+zN*zN) 

1029 g2 = g2 / trm2 / trm2 

1030 return mu, var, g1, g2 

1031 

1032 

1033boltzmann = boltzmann_gen(name='boltzmann', a=0, 

1034 longname='A truncated discrete exponential ') 

1035 

1036 

1037class randint_gen(rv_discrete): 

1038 r"""A uniform discrete random variable. 

1039 

1040 %(before_notes)s 

1041 

1042 Notes 

1043 ----- 

1044 The probability mass function for `randint` is: 

1045 

1046 .. math:: 

1047 

1048 f(k) = \frac{1}{\texttt{high} - \texttt{low}} 

1049 

1050 for :math:`k \in \{\texttt{low}, \dots, \texttt{high} - 1\}`. 

1051 

1052 `randint` takes :math:`\texttt{low}` and :math:`\texttt{high}` as shape 

1053 parameters. 

1054 

1055 %(after_notes)s 

1056 

1057 %(example)s 

1058 

1059 """ 

1060 

1061 def _shape_info(self): 

1062 return [_ShapeInfo("low", True, (-np.inf, np.inf), (False, False)), 

1063 _ShapeInfo("high", True, (-np.inf, np.inf), (False, False))] 

1064 

1065 def _argcheck(self, low, high): 

1066 return (high > low) & _isintegral(low) & _isintegral(high) 

1067 

1068 def _get_support(self, low, high): 

1069 return low, high-1 

1070 

1071 def _pmf(self, k, low, high): 

1072 # randint.pmf(k) = 1./(high - low) 

1073 p = np.ones_like(k) / (high - low) 

1074 return np.where((k >= low) & (k < high), p, 0.) 

1075 

1076 def _cdf(self, x, low, high): 

1077 k = floor(x) 

1078 return (k - low + 1.) / (high - low) 

1079 

1080 def _ppf(self, q, low, high): 

1081 vals = ceil(q * (high - low) + low) - 1 

1082 vals1 = (vals - 1).clip(low, high) 

1083 temp = self._cdf(vals1, low, high) 

1084 return np.where(temp >= q, vals1, vals) 

1085 

1086 def _stats(self, low, high): 

1087 m2, m1 = np.asarray(high), np.asarray(low) 

1088 mu = (m2 + m1 - 1.0) / 2 

1089 d = m2 - m1 

1090 var = (d*d - 1) / 12.0 

1091 g1 = 0.0 

1092 g2 = -6.0/5.0 * (d*d + 1.0) / (d*d - 1.0) 

1093 return mu, var, g1, g2 

1094 

1095 def _rvs(self, low, high, size=None, random_state=None): 

1096 """An array of *size* random integers >= ``low`` and < ``high``.""" 

1097 if np.asarray(low).size == 1 and np.asarray(high).size == 1: 

1098 # no need to vectorize in that case 

1099 return rng_integers(random_state, low, high, size=size) 

1100 

1101 if size is not None: 

1102 # NumPy's RandomState.randint() doesn't broadcast its arguments. 

1103 # Use `broadcast_to()` to extend the shapes of low and high 

1104 # up to size. Then we can use the numpy.vectorize'd 

1105 # randint without needing to pass it a `size` argument. 

1106 low = np.broadcast_to(low, size) 

1107 high = np.broadcast_to(high, size) 

1108 randint = np.vectorize(partial(rng_integers, random_state), 

1109 otypes=[np.int_]) 

1110 return randint(low, high) 

1111 

1112 def _entropy(self, low, high): 

1113 return log(high - low) 

1114 

1115 

1116randint = randint_gen(name='randint', longname='A discrete uniform ' 

1117 '(random integer)') 

1118 

1119 

1120# FIXME: problems sampling. 

1121class zipf_gen(rv_discrete): 

1122 r"""A Zipf (Zeta) discrete random variable. 

1123 

1124 %(before_notes)s 

1125 

1126 See Also 

1127 -------- 

1128 zipfian 

1129 

1130 Notes 

1131 ----- 

1132 The probability mass function for `zipf` is: 

1133 

1134 .. math:: 

1135 

1136 f(k, a) = \frac{1}{\zeta(a) k^a} 

1137 

1138 for :math:`k \ge 1`, :math:`a > 1`. 

1139 

1140 `zipf` takes :math:`a > 1` as shape parameter. :math:`\zeta` is the 

1141 Riemann zeta function (`scipy.special.zeta`) 

1142 

1143 The Zipf distribution is also known as the zeta distribution, which is 

1144 a special case of the Zipfian distribution (`zipfian`). 

1145 

1146 %(after_notes)s 

1147 

1148 References 

1149 ---------- 

1150 .. [1] "Zeta Distribution", Wikipedia, 

1151 https://en.wikipedia.org/wiki/Zeta_distribution 

1152 

1153 %(example)s 

1154 

1155 Confirm that `zipf` is the large `n` limit of `zipfian`. 

1156 

1157 >>> import numpy as np 

1158 >>> from scipy.stats import zipfian 

1159 >>> k = np.arange(11) 

1160 >>> np.allclose(zipf.pmf(k, a), zipfian.pmf(k, a, n=10000000)) 

1161 True 

1162 

1163 """ 

1164 

1165 def _shape_info(self): 

1166 return [_ShapeInfo("a", False, (1, np.inf), (False, False))] 

1167 

1168 def _rvs(self, a, size=None, random_state=None): 

1169 return random_state.zipf(a, size=size) 

1170 

1171 def _argcheck(self, a): 

1172 return a > 1 

1173 

1174 def _pmf(self, k, a): 

1175 # zipf.pmf(k, a) = 1/(zeta(a) * k**a) 

1176 Pk = 1.0 / special.zeta(a, 1) / k**a 

1177 return Pk 

1178 

1179 def _munp(self, n, a): 

1180 return _lazywhere( 

1181 a > n + 1, (a, n), 

1182 lambda a, n: special.zeta(a - n, 1) / special.zeta(a, 1), 

1183 np.inf) 

1184 

1185 

1186zipf = zipf_gen(a=1, name='zipf', longname='A Zipf') 

1187 

1188 

1189def _gen_harmonic_gt1(n, a): 

1190 """Generalized harmonic number, a > 1""" 

1191 # See https://en.wikipedia.org/wiki/Harmonic_number; search for "hurwitz" 

1192 return zeta(a, 1) - zeta(a, n+1) 

1193 

1194 

1195def _gen_harmonic_leq1(n, a): 

1196 """Generalized harmonic number, a <= 1""" 

1197 if not np.size(n): 

1198 return n 

1199 n_max = np.max(n) # loop starts at maximum of all n 

1200 out = np.zeros_like(a, dtype=float) 

1201 # add terms of harmonic series; starting from smallest to avoid roundoff 

1202 for i in np.arange(n_max, 0, -1, dtype=float): 

1203 mask = i <= n # don't add terms after nth 

1204 out[mask] += 1/i**a[mask] 

1205 return out 

1206 

1207 

1208def _gen_harmonic(n, a): 

1209 """Generalized harmonic number""" 

1210 n, a = np.broadcast_arrays(n, a) 

1211 return _lazywhere(a > 1, (n, a), 

1212 f=_gen_harmonic_gt1, f2=_gen_harmonic_leq1) 

1213 

1214 

1215class zipfian_gen(rv_discrete): 

1216 r"""A Zipfian discrete random variable. 

1217 

1218 %(before_notes)s 

1219 

1220 See Also 

1221 -------- 

1222 zipf 

1223 

1224 Notes 

1225 ----- 

1226 The probability mass function for `zipfian` is: 

1227 

1228 .. math:: 

1229 

1230 f(k, a, n) = \frac{1}{H_{n,a} k^a} 

1231 

1232 for :math:`k \in \{1, 2, \dots, n-1, n\}`, :math:`a \ge 0`, 

1233 :math:`n \in \{1, 2, 3, \dots\}`. 

1234 

1235 `zipfian` takes :math:`a` and :math:`n` as shape parameters. 

1236 :math:`H_{n,a}` is the :math:`n`:sup:`th` generalized harmonic 

1237 number of order :math:`a`. 

1238 

1239 The Zipfian distribution reduces to the Zipf (zeta) distribution as 

1240 :math:`n \rightarrow \infty`. 

1241 

1242 %(after_notes)s 

1243 

1244 References 

1245 ---------- 

1246 .. [1] "Zipf's Law", Wikipedia, https://en.wikipedia.org/wiki/Zipf's_law 

1247 .. [2] Larry Leemis, "Zipf Distribution", Univariate Distribution 

1248 Relationships. http://www.math.wm.edu/~leemis/chart/UDR/PDFs/Zipf.pdf 

1249 

1250 %(example)s 

1251 

1252 Confirm that `zipfian` reduces to `zipf` for large `n`, `a > 1`. 

1253 

1254 >>> import numpy as np 

1255 >>> from scipy.stats import zipf 

1256 >>> k = np.arange(11) 

1257 >>> np.allclose(zipfian.pmf(k, a=3.5, n=10000000), zipf.pmf(k, a=3.5)) 

1258 True 

1259 

1260 """ 

1261 

1262 def _shape_info(self): 

1263 return [_ShapeInfo("a", False, (0, np.inf), (True, False)), 

1264 _ShapeInfo("n", True, (0, np.inf), (False, False))] 

1265 

1266 def _argcheck(self, a, n): 

1267 # we need np.asarray here because moment (maybe others) don't convert 

1268 return (a >= 0) & (n > 0) & (n == np.asarray(n, dtype=int)) 

1269 

1270 def _get_support(self, a, n): 

1271 return 1, n 

1272 

1273 def _pmf(self, k, a, n): 

1274 return 1.0 / _gen_harmonic(n, a) / k**a 

1275 

1276 def _cdf(self, k, a, n): 

1277 return _gen_harmonic(k, a) / _gen_harmonic(n, a) 

1278 

1279 def _sf(self, k, a, n): 

1280 k = k + 1 # # to match SciPy convention 

1281 # see http://www.math.wm.edu/~leemis/chart/UDR/PDFs/Zipf.pdf 

1282 return ((k**a*(_gen_harmonic(n, a) - _gen_harmonic(k, a)) + 1) 

1283 / (k**a*_gen_harmonic(n, a))) 

1284 

1285 def _stats(self, a, n): 

1286 # see # see http://www.math.wm.edu/~leemis/chart/UDR/PDFs/Zipf.pdf 

1287 Hna = _gen_harmonic(n, a) 

1288 Hna1 = _gen_harmonic(n, a-1) 

1289 Hna2 = _gen_harmonic(n, a-2) 

1290 Hna3 = _gen_harmonic(n, a-3) 

1291 Hna4 = _gen_harmonic(n, a-4) 

1292 mu1 = Hna1/Hna 

1293 mu2n = (Hna2*Hna - Hna1**2) 

1294 mu2d = Hna**2 

1295 mu2 = mu2n / mu2d 

1296 g1 = (Hna3/Hna - 3*Hna1*Hna2/Hna**2 + 2*Hna1**3/Hna**3)/mu2**(3/2) 

1297 g2 = (Hna**3*Hna4 - 4*Hna**2*Hna1*Hna3 + 6*Hna*Hna1**2*Hna2 

1298 - 3*Hna1**4) / mu2n**2 

1299 g2 -= 3 

1300 return mu1, mu2, g1, g2 

1301 

1302 

1303zipfian = zipfian_gen(a=1, name='zipfian', longname='A Zipfian') 

1304 

1305 

1306class dlaplace_gen(rv_discrete): 

1307 r"""A Laplacian discrete random variable. 

1308 

1309 %(before_notes)s 

1310 

1311 Notes 

1312 ----- 

1313 The probability mass function for `dlaplace` is: 

1314 

1315 .. math:: 

1316 

1317 f(k) = \tanh(a/2) \exp(-a |k|) 

1318 

1319 for integers :math:`k` and :math:`a > 0`. 

1320 

1321 `dlaplace` takes :math:`a` as shape parameter. 

1322 

1323 %(after_notes)s 

1324 

1325 %(example)s 

1326 

1327 """ 

1328 

1329 def _shape_info(self): 

1330 return [_ShapeInfo("a", False, (0, np.inf), (False, False))] 

1331 

1332 def _pmf(self, k, a): 

1333 # dlaplace.pmf(k) = tanh(a/2) * exp(-a*abs(k)) 

1334 return tanh(a/2.0) * exp(-a * abs(k)) 

1335 

1336 def _cdf(self, x, a): 

1337 k = floor(x) 

1338 f = lambda k, a: 1.0 - exp(-a * k) / (exp(a) + 1) 

1339 f2 = lambda k, a: exp(a * (k+1)) / (exp(a) + 1) 

1340 return _lazywhere(k >= 0, (k, a), f=f, f2=f2) 

1341 

1342 def _ppf(self, q, a): 

1343 const = 1 + exp(a) 

1344 vals = ceil(np.where(q < 1.0 / (1 + exp(-a)), 

1345 log(q*const) / a - 1, 

1346 -log((1-q) * const) / a)) 

1347 vals1 = vals - 1 

1348 return np.where(self._cdf(vals1, a) >= q, vals1, vals) 

1349 

1350 def _stats(self, a): 

1351 ea = exp(a) 

1352 mu2 = 2.*ea/(ea-1.)**2 

1353 mu4 = 2.*ea*(ea**2+10.*ea+1.) / (ea-1.)**4 

1354 return 0., mu2, 0., mu4/mu2**2 - 3. 

1355 

1356 def _entropy(self, a): 

1357 return a / sinh(a) - log(tanh(a/2.0)) 

1358 

1359 def _rvs(self, a, size=None, random_state=None): 

1360 # The discrete Laplace is equivalent to the two-sided geometric 

1361 # distribution with PMF: 

1362 # f(k) = (1 - alpha)/(1 + alpha) * alpha^abs(k) 

1363 # Reference: 

1364 # https://www.sciencedirect.com/science/ 

1365 # article/abs/pii/S0378375804003519 

1366 # Furthermore, the two-sided geometric distribution is 

1367 # equivalent to the difference between two iid geometric 

1368 # distributions. 

1369 # Reference (page 179): 

1370 # https://pdfs.semanticscholar.org/61b3/ 

1371 # b99f466815808fd0d03f5d2791eea8b541a1.pdf 

1372 # Thus, we can leverage the following: 

1373 # 1) alpha = e^-a 

1374 # 2) probability_of_success = 1 - alpha (Bernoulli trial) 

1375 probOfSuccess = -np.expm1(-np.asarray(a)) 

1376 x = random_state.geometric(probOfSuccess, size=size) 

1377 y = random_state.geometric(probOfSuccess, size=size) 

1378 return x - y 

1379 

1380 

1381dlaplace = dlaplace_gen(a=-np.inf, 

1382 name='dlaplace', longname='A discrete Laplacian') 

1383 

1384 

1385class skellam_gen(rv_discrete): 

1386 r"""A Skellam discrete random variable. 

1387 

1388 %(before_notes)s 

1389 

1390 Notes 

1391 ----- 

1392 Probability distribution of the difference of two correlated or 

1393 uncorrelated Poisson random variables. 

1394 

1395 Let :math:`k_1` and :math:`k_2` be two Poisson-distributed r.v. with 

1396 expected values :math:`\lambda_1` and :math:`\lambda_2`. Then, 

1397 :math:`k_1 - k_2` follows a Skellam distribution with parameters 

1398 :math:`\mu_1 = \lambda_1 - \rho \sqrt{\lambda_1 \lambda_2}` and 

1399 :math:`\mu_2 = \lambda_2 - \rho \sqrt{\lambda_1 \lambda_2}`, where 

1400 :math:`\rho` is the correlation coefficient between :math:`k_1` and 

1401 :math:`k_2`. If the two Poisson-distributed r.v. are independent then 

1402 :math:`\rho = 0`. 

1403 

1404 Parameters :math:`\mu_1` and :math:`\mu_2` must be strictly positive. 

1405 

1406 For details see: https://en.wikipedia.org/wiki/Skellam_distribution 

1407 

1408 `skellam` takes :math:`\mu_1` and :math:`\mu_2` as shape parameters. 

1409 

1410 %(after_notes)s 

1411 

1412 %(example)s 

1413 

1414 """ 

1415 def _shape_info(self): 

1416 return [_ShapeInfo("mu1", False, (0, np.inf), (False, False)), 

1417 _ShapeInfo("mu2", False, (0, np.inf), (False, False))] 

1418 

1419 def _rvs(self, mu1, mu2, size=None, random_state=None): 

1420 n = size 

1421 return (random_state.poisson(mu1, n) - 

1422 random_state.poisson(mu2, n)) 

1423 

1424 def _pmf(self, x, mu1, mu2): 

1425 with warnings.catch_warnings(): 

1426 message = "overflow encountered in _ncx2_pdf" 

1427 warnings.filterwarnings("ignore", message=message) 

1428 px = np.where(x < 0, 

1429 _boost._ncx2_pdf(2*mu2, 2*(1-x), 2*mu1)*2, 

1430 _boost._ncx2_pdf(2*mu1, 2*(1+x), 2*mu2)*2) 

1431 # ncx2.pdf() returns nan's for extremely low probabilities 

1432 return px 

1433 

1434 def _cdf(self, x, mu1, mu2): 

1435 x = floor(x) 

1436 px = np.where(x < 0, 

1437 _boost._ncx2_cdf(2*mu2, -2*x, 2*mu1), 

1438 1 - _boost._ncx2_cdf(2*mu1, 2*(x+1), 2*mu2)) 

1439 return px 

1440 

1441 def _stats(self, mu1, mu2): 

1442 mean = mu1 - mu2 

1443 var = mu1 + mu2 

1444 g1 = mean / sqrt((var)**3) 

1445 g2 = 1 / var 

1446 return mean, var, g1, g2 

1447 

1448 

1449skellam = skellam_gen(a=-np.inf, name="skellam", longname='A Skellam') 

1450 

1451 

1452class yulesimon_gen(rv_discrete): 

1453 r"""A Yule-Simon discrete random variable. 

1454 

1455 %(before_notes)s 

1456 

1457 Notes 

1458 ----- 

1459 

1460 The probability mass function for the `yulesimon` is: 

1461 

1462 .. math:: 

1463 

1464 f(k) = \alpha B(k, \alpha+1) 

1465 

1466 for :math:`k=1,2,3,...`, where :math:`\alpha>0`. 

1467 Here :math:`B` refers to the `scipy.special.beta` function. 

1468 

1469 The sampling of random variates is based on pg 553, Section 6.3 of [1]_. 

1470 Our notation maps to the referenced logic via :math:`\alpha=a-1`. 

1471 

1472 For details see the wikipedia entry [2]_. 

1473 

1474 References 

1475 ---------- 

1476 .. [1] Devroye, Luc. "Non-uniform Random Variate Generation", 

1477 (1986) Springer, New York. 

1478 

1479 .. [2] https://en.wikipedia.org/wiki/Yule-Simon_distribution 

1480 

1481 %(after_notes)s 

1482 

1483 %(example)s 

1484 

1485 """ 

1486 def _shape_info(self): 

1487 return [_ShapeInfo("alpha", False, (0, np.inf), (False, False))] 

1488 

1489 def _rvs(self, alpha, size=None, random_state=None): 

1490 E1 = random_state.standard_exponential(size) 

1491 E2 = random_state.standard_exponential(size) 

1492 ans = ceil(-E1 / log1p(-exp(-E2 / alpha))) 

1493 return ans 

1494 

1495 def _pmf(self, x, alpha): 

1496 return alpha * special.beta(x, alpha + 1) 

1497 

1498 def _argcheck(self, alpha): 

1499 return (alpha > 0) 

1500 

1501 def _logpmf(self, x, alpha): 

1502 return log(alpha) + special.betaln(x, alpha + 1) 

1503 

1504 def _cdf(self, x, alpha): 

1505 return 1 - x * special.beta(x, alpha + 1) 

1506 

1507 def _sf(self, x, alpha): 

1508 return x * special.beta(x, alpha + 1) 

1509 

1510 def _logsf(self, x, alpha): 

1511 return log(x) + special.betaln(x, alpha + 1) 

1512 

1513 def _stats(self, alpha): 

1514 mu = np.where(alpha <= 1, np.inf, alpha / (alpha - 1)) 

1515 mu2 = np.where(alpha > 2, 

1516 alpha**2 / ((alpha - 2.0) * (alpha - 1)**2), 

1517 np.inf) 

1518 mu2 = np.where(alpha <= 1, np.nan, mu2) 

1519 g1 = np.where(alpha > 3, 

1520 sqrt(alpha - 2) * (alpha + 1)**2 / (alpha * (alpha - 3)), 

1521 np.inf) 

1522 g1 = np.where(alpha <= 2, np.nan, g1) 

1523 g2 = np.where(alpha > 4, 

1524 (alpha + 3) + (alpha**3 - 49 * alpha - 22) / (alpha * 

1525 (alpha - 4) * (alpha - 3)), np.inf) 

1526 g2 = np.where(alpha <= 2, np.nan, g2) 

1527 return mu, mu2, g1, g2 

1528 

1529 

1530yulesimon = yulesimon_gen(name='yulesimon', a=1) 

1531 

1532 

1533def _vectorize_rvs_over_shapes(_rvs1): 

1534 """Decorator that vectorizes _rvs method to work on ndarray shapes""" 

1535 # _rvs1 must be a _function_ that accepts _scalar_ args as positional 

1536 # arguments, `size` and `random_state` as keyword arguments. 

1537 # _rvs1 must return a random variate array with shape `size`. If `size` is 

1538 # None, _rvs1 must return a scalar. 

1539 # When applied to _rvs1, this decorator broadcasts ndarray args 

1540 # and loops over them, calling _rvs1 for each set of scalar args. 

1541 # For usage example, see _nchypergeom_gen 

1542 def _rvs(*args, size, random_state): 

1543 _rvs1_size, _rvs1_indices = _check_shape(args[0].shape, size) 

1544 

1545 size = np.array(size) 

1546 _rvs1_size = np.array(_rvs1_size) 

1547 _rvs1_indices = np.array(_rvs1_indices) 

1548 

1549 if np.all(_rvs1_indices): # all args are scalars 

1550 return _rvs1(*args, size, random_state) 

1551 

1552 out = np.empty(size) 

1553 

1554 # out.shape can mix dimensions associated with arg_shape and _rvs1_size 

1555 # Sort them to arg_shape + _rvs1_size for easy indexing of dimensions 

1556 # corresponding with the different sets of scalar args 

1557 j0 = np.arange(out.ndim) 

1558 j1 = np.hstack((j0[~_rvs1_indices], j0[_rvs1_indices])) 

1559 out = np.moveaxis(out, j1, j0) 

1560 

1561 for i in np.ndindex(*size[~_rvs1_indices]): 

1562 # arg can be squeezed because singleton dimensions will be 

1563 # associated with _rvs1_size, not arg_shape per _check_shape 

1564 out[i] = _rvs1(*[np.squeeze(arg)[i] for arg in args], 

1565 _rvs1_size, random_state) 

1566 

1567 return np.moveaxis(out, j0, j1) # move axes back before returning 

1568 return _rvs 

1569 

1570 

1571class _nchypergeom_gen(rv_discrete): 

1572 r"""A noncentral hypergeometric discrete random variable. 

1573 

1574 For subclassing by nchypergeom_fisher_gen and nchypergeom_wallenius_gen. 

1575 

1576 """ 

1577 

1578 rvs_name = None 

1579 dist = None 

1580 

1581 def _shape_info(self): 

1582 return [_ShapeInfo("M", True, (0, np.inf), (True, False)), 

1583 _ShapeInfo("n", True, (0, np.inf), (True, False)), 

1584 _ShapeInfo("N", True, (0, np.inf), (True, False)), 

1585 _ShapeInfo("odds", False, (0, np.inf), (False, False))] 

1586 

1587 def _get_support(self, M, n, N, odds): 

1588 N, m1, n = M, n, N # follow Wikipedia notation 

1589 m2 = N - m1 

1590 x_min = np.maximum(0, n - m2) 

1591 x_max = np.minimum(n, m1) 

1592 return x_min, x_max 

1593 

1594 def _argcheck(self, M, n, N, odds): 

1595 M, n = np.asarray(M), np.asarray(n), 

1596 N, odds = np.asarray(N), np.asarray(odds) 

1597 cond1 = (M.astype(int) == M) & (M >= 0) 

1598 cond2 = (n.astype(int) == n) & (n >= 0) 

1599 cond3 = (N.astype(int) == N) & (N >= 0) 

1600 cond4 = odds > 0 

1601 cond5 = N <= M 

1602 cond6 = n <= M 

1603 return cond1 & cond2 & cond3 & cond4 & cond5 & cond6 

1604 

1605 def _rvs(self, M, n, N, odds, size=None, random_state=None): 

1606 

1607 @_vectorize_rvs_over_shapes 

1608 def _rvs1(M, n, N, odds, size, random_state): 

1609 length = np.prod(size) 

1610 urn = _PyStochasticLib3() 

1611 rv_gen = getattr(urn, self.rvs_name) 

1612 rvs = rv_gen(N, n, M, odds, length, random_state) 

1613 rvs = rvs.reshape(size) 

1614 return rvs 

1615 

1616 return _rvs1(M, n, N, odds, size=size, random_state=random_state) 

1617 

1618 def _pmf(self, x, M, n, N, odds): 

1619 

1620 x, M, n, N, odds = np.broadcast_arrays(x, M, n, N, odds) 

1621 if x.size == 0: # np.vectorize doesn't work with zero size input 

1622 return np.empty_like(x) 

1623 

1624 @np.vectorize 

1625 def _pmf1(x, M, n, N, odds): 

1626 urn = self.dist(N, n, M, odds, 1e-12) 

1627 return urn.probability(x) 

1628 

1629 return _pmf1(x, M, n, N, odds) 

1630 

1631 def _stats(self, M, n, N, odds, moments): 

1632 

1633 @np.vectorize 

1634 def _moments1(M, n, N, odds): 

1635 urn = self.dist(N, n, M, odds, 1e-12) 

1636 return urn.moments() 

1637 

1638 m, v = (_moments1(M, n, N, odds) if ("m" in moments or "v" in moments) 

1639 else (None, None)) 

1640 s, k = None, None 

1641 return m, v, s, k 

1642 

1643 

1644class nchypergeom_fisher_gen(_nchypergeom_gen): 

1645 r"""A Fisher's noncentral hypergeometric discrete random variable. 

1646 

1647 Fisher's noncentral hypergeometric distribution models drawing objects of 

1648 two types from a bin. `M` is the total number of objects, `n` is the 

1649 number of Type I objects, and `odds` is the odds ratio: the odds of 

1650 selecting a Type I object rather than a Type II object when there is only 

1651 one object of each type. 

1652 The random variate represents the number of Type I objects drawn if we 

1653 take a handful of objects from the bin at once and find out afterwards 

1654 that we took `N` objects. 

1655 

1656 %(before_notes)s 

1657 

1658 See Also 

1659 -------- 

1660 nchypergeom_wallenius, hypergeom, nhypergeom 

1661 

1662 Notes 

1663 ----- 

1664 Let mathematical symbols :math:`N`, :math:`n`, and :math:`M` correspond 

1665 with parameters `N`, `n`, and `M` (respectively) as defined above. 

1666 

1667 The probability mass function is defined as 

1668 

1669 .. math:: 

1670 

1671 p(x; M, n, N, \omega) = 

1672 \frac{\binom{n}{x}\binom{M - n}{N-x}\omega^x}{P_0}, 

1673 

1674 for 

1675 :math:`x \in [x_l, x_u]`, 

1676 :math:`M \in {\mathbb N}`, 

1677 :math:`n \in [0, M]`, 

1678 :math:`N \in [0, M]`, 

1679 :math:`\omega > 0`, 

1680 where 

1681 :math:`x_l = \max(0, N - (M - n))`, 

1682 :math:`x_u = \min(N, n)`, 

1683 

1684 .. math:: 

1685 

1686 P_0 = \sum_{y=x_l}^{x_u} \binom{n}{y}\binom{M - n}{N-y}\omega^y, 

1687 

1688 and the binomial coefficients are defined as 

1689 

1690 .. math:: \binom{n}{k} \equiv \frac{n!}{k! (n - k)!}. 

1691 

1692 `nchypergeom_fisher` uses the BiasedUrn package by Agner Fog with 

1693 permission for it to be distributed under SciPy's license. 

1694 

1695 The symbols used to denote the shape parameters (`N`, `n`, and `M`) are not 

1696 universally accepted; they are chosen for consistency with `hypergeom`. 

1697 

1698 Note that Fisher's noncentral hypergeometric distribution is distinct 

1699 from Wallenius' noncentral hypergeometric distribution, which models 

1700 drawing a pre-determined `N` objects from a bin one by one. 

1701 When the odds ratio is unity, however, both distributions reduce to the 

1702 ordinary hypergeometric distribution. 

1703 

1704 %(after_notes)s 

1705 

1706 References 

1707 ---------- 

1708 .. [1] Agner Fog, "Biased Urn Theory". 

1709 https://cran.r-project.org/web/packages/BiasedUrn/vignettes/UrnTheory.pdf 

1710 

1711 .. [2] "Fisher's noncentral hypergeometric distribution", Wikipedia, 

1712 https://en.wikipedia.org/wiki/Fisher's_noncentral_hypergeometric_distribution 

1713 

1714 %(example)s 

1715 

1716 """ 

1717 

1718 rvs_name = "rvs_fisher" 

1719 dist = _PyFishersNCHypergeometric 

1720 

1721 

1722nchypergeom_fisher = nchypergeom_fisher_gen( 

1723 name='nchypergeom_fisher', 

1724 longname="A Fisher's noncentral hypergeometric") 

1725 

1726 

1727class nchypergeom_wallenius_gen(_nchypergeom_gen): 

1728 r"""A Wallenius' noncentral hypergeometric discrete random variable. 

1729 

1730 Wallenius' noncentral hypergeometric distribution models drawing objects of 

1731 two types from a bin. `M` is the total number of objects, `n` is the 

1732 number of Type I objects, and `odds` is the odds ratio: the odds of 

1733 selecting a Type I object rather than a Type II object when there is only 

1734 one object of each type. 

1735 The random variate represents the number of Type I objects drawn if we 

1736 draw a pre-determined `N` objects from a bin one by one. 

1737 

1738 %(before_notes)s 

1739 

1740 See Also 

1741 -------- 

1742 nchypergeom_fisher, hypergeom, nhypergeom 

1743 

1744 Notes 

1745 ----- 

1746 Let mathematical symbols :math:`N`, :math:`n`, and :math:`M` correspond 

1747 with parameters `N`, `n`, and `M` (respectively) as defined above. 

1748 

1749 The probability mass function is defined as 

1750 

1751 .. math:: 

1752 

1753 p(x; N, n, M) = \binom{n}{x} \binom{M - n}{N-x} 

1754 \int_0^1 \left(1-t^{\omega/D}\right)^x\left(1-t^{1/D}\right)^{N-x} dt 

1755 

1756 for 

1757 :math:`x \in [x_l, x_u]`, 

1758 :math:`M \in {\mathbb N}`, 

1759 :math:`n \in [0, M]`, 

1760 :math:`N \in [0, M]`, 

1761 :math:`\omega > 0`, 

1762 where 

1763 :math:`x_l = \max(0, N - (M - n))`, 

1764 :math:`x_u = \min(N, n)`, 

1765 

1766 .. math:: 

1767 

1768 D = \omega(n - x) + ((M - n)-(N-x)), 

1769 

1770 and the binomial coefficients are defined as 

1771 

1772 .. math:: \binom{n}{k} \equiv \frac{n!}{k! (n - k)!}. 

1773 

1774 `nchypergeom_wallenius` uses the BiasedUrn package by Agner Fog with 

1775 permission for it to be distributed under SciPy's license. 

1776 

1777 The symbols used to denote the shape parameters (`N`, `n`, and `M`) are not 

1778 universally accepted; they are chosen for consistency with `hypergeom`. 

1779 

1780 Note that Wallenius' noncentral hypergeometric distribution is distinct 

1781 from Fisher's noncentral hypergeometric distribution, which models 

1782 take a handful of objects from the bin at once, finding out afterwards 

1783 that `N` objects were taken. 

1784 When the odds ratio is unity, however, both distributions reduce to the 

1785 ordinary hypergeometric distribution. 

1786 

1787 %(after_notes)s 

1788 

1789 References 

1790 ---------- 

1791 .. [1] Agner Fog, "Biased Urn Theory". 

1792 https://cran.r-project.org/web/packages/BiasedUrn/vignettes/UrnTheory.pdf 

1793 

1794 .. [2] "Wallenius' noncentral hypergeometric distribution", Wikipedia, 

1795 https://en.wikipedia.org/wiki/Wallenius'_noncentral_hypergeometric_distribution 

1796 

1797 %(example)s 

1798 

1799 """ 

1800 

1801 rvs_name = "rvs_wallenius" 

1802 dist = _PyWalleniusNCHypergeometric 

1803 

1804 

1805nchypergeom_wallenius = nchypergeom_wallenius_gen( 

1806 name='nchypergeom_wallenius', 

1807 longname="A Wallenius' noncentral hypergeometric") 

1808 

1809 

1810# Collect names of classes and objects in this module. 

1811pairs = list(globals().copy().items()) 

1812_distn_names, _distn_gen_names = get_distribution_names(pairs, rv_discrete) 

1813 

1814__all__ = _distn_names + _distn_gen_names