Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.9/dist-packages/dask/array/random.py: 1%

1from __future__ import annotations

3import contextlib

4import importlib

5import numbers

6from itertools import chain, product

7from numbers import Integral

8from operator import getitem

9from threading import Lock

11import numpy as np

13from dask.array.backends import array_creation_dispatch

14from dask.array.core import (

15 Array,

16 asarray,

17 broadcast_shapes,

18 broadcast_to,

19 normalize_chunks,

20 slices_from_chunks,

21)

22from dask.array.creation import arange

23from dask.array.utils import asarray_safe

24from dask.base import tokenize

25from dask.highlevelgraph import HighLevelGraph

26from dask.utils import derived_from, random_state_data, typename

29class Generator:

30 """

31 Container for the BitGenerators.

33 ``Generator`` exposes a number of methods for generating random

34 numbers drawn from a variety of probability distributions and serves

35 as a replacement for ``RandomState``. The main difference between the

36 two is that ``Generator`` relies on an additional ``BitGenerator`` to

37 manage state and generate the random bits, which are then transformed

38 into random values from useful distributions. The default ``BitGenerator``

39 used by ``Generator`` is ``PCG64``. The ``BitGenerator`` can be changed

40 by passing an instantiated ``BitGenerator`` to ``Generator``.

42 The function :func:`dask.array.random.default_rng` is the recommended way

43 to instantiate a ``Generator``.

45 .. warning::

47 No Compatibility Guarantee.

49 ``Generator`` does not provide a version compatibility guarantee. In

50 particular, as better algorithms evolve the bit stream may change.

52 Parameters

53 ----------

54 bit_generator : BitGenerator

55 BitGenerator to use as the core generator.

57 Notes

58 -----

59 In addition to the distribution-specific arguments, each ``Generator``

60 method takes a keyword argument `size` that defaults to ``None``. If

61 `size` is ``None``, then a single value is generated and returned. If

62 `size` is an integer, then a 1-D array filled with generated values is

63 returned. If `size` is a tuple, then an array with that shape is

64 filled and returned.

66 The Python stdlib module `random` contains pseudo-random number generator

67 with a number of methods that are similar to the ones available in

68 ``Generator``. It uses Mersenne Twister, and this bit generator can

69 be accessed using ``MT19937``. ``Generator``, besides being

70 Dask-aware, has the advantage that it provides a much larger number

71 of probability distributions to choose from.

73 All ``Generator`` methods are identical to ``np.random.Generator`` except

74 that they also take a `chunks=` keyword argument.

76 ``Generator`` does not guarantee parity in the generated numbers

77 with any third party library. In particular, numbers generated by

78 `Dask` and `NumPy` will differ even if they use the same seed.

80 Examples

81 --------

82 >>> from numpy.random import PCG64

83 >>> from dask.array.random import Generator

84 >>> rng = Generator(PCG64())

85 >>> rng.standard_normal().compute() # doctest: +SKIP

86 array(0.44595957) # random

88 See Also

89 --------

90 default_rng : Recommended constructor for `Generator`.

91 np.random.Generator

92 """

94 def __init__(self, bit_generator):

95 self._bit_generator = bit_generator

97 def __str__(self):

98 _str = self.__class__.__name__

99 _str += "(" + self._bit_generator.__class__.__name__ + ")"

100 return _str

101

102 @property

103 def _backend_name(self):

104 # Assumes typename(self._RandomState) starts with an

105 # array-library name (e.g. "numpy" or "cupy")

106 return typename(self._bit_generator).split(".")[0]

107

108 @property

109 def _backend(self):

110 # Assumes `self._backend_name` is an importable

111 # array-library name (e.g. "numpy" or "cupy")

112 return importlib.import_module(self._backend_name)

113

114 @derived_from(np.random.Generator, skipblocks=1)

115 def beta(self, a, b, size=None, chunks="auto", **kwargs):

116 return _wrap_func(self, "beta", a, b, size=size, chunks=chunks, **kwargs)

117

118 @derived_from(np.random.Generator, skipblocks=1)

119 def binomial(self, n, p, size=None, chunks="auto", **kwargs):

120 return _wrap_func(self, "binomial", n, p, size=size, chunks=chunks, **kwargs)

121

122 @derived_from(np.random.Generator, skipblocks=1)

123 def chisquare(self, df, size=None, chunks="auto", **kwargs):

124 return _wrap_func(self, "chisquare", df, size=size, chunks=chunks, **kwargs)

125

126 @derived_from(np.random.Generator, skipblocks=1)

127 def choice(

128 self,

129 a,

130 size=None,

131 replace=True,

132 p=None,

133 axis=0,

134 shuffle=True,

135 chunks="auto",

136 ):

137 (

138 a,

139 size,

140 replace,

141 p,

142 axis,

143 chunks,

144 meta,

145 dependencies,

146 ) = _choice_validate_params(self, a, size, replace, p, axis, chunks)

147

148 sizes = list(product(*chunks))

149 bitgens = _spawn_bitgens(self._bit_generator, len(sizes))

150

151 name = "da.random.choice-%s" % tokenize(

152 bitgens, size, chunks, a, replace, p, axis, shuffle

153 )

154 keys = product([name], *(range(len(bd)) for bd in chunks))

155 dsk = {

156 k: (_choice_rng, bitgen, a, size, replace, p, axis, shuffle)

157 for k, bitgen, size in zip(keys, bitgens, sizes)

158 }

159

160 graph = HighLevelGraph.from_collections(name, dsk, dependencies=dependencies)

161 return Array(graph, name, chunks, meta=meta)

162

163 @derived_from(np.random.Generator, skipblocks=1)

164 def exponential(self, scale=1.0, size=None, chunks="auto", **kwargs):

165 return _wrap_func(

166 self, "exponential", scale, size=size, chunks=chunks, **kwargs

167 )

168

169 @derived_from(np.random.Generator, skipblocks=1)

170 def f(self, dfnum, dfden, size=None, chunks="auto", **kwargs):

171 return _wrap_func(self, "f", dfnum, dfden, size=size, chunks=chunks, **kwargs)

172

173 @derived_from(np.random.Generator, skipblocks=1)

174 def gamma(self, shape, scale=1.0, size=None, chunks="auto", **kwargs):

175 return _wrap_func(

176 self, "gamma", shape, scale, size=size, chunks=chunks, **kwargs

177 )

178

179 @derived_from(np.random.Generator, skipblocks=1)

180 def geometric(self, p, size=None, chunks="auto", **kwargs):

181 return _wrap_func(self, "geometric", p, size=size, chunks=chunks, **kwargs)

182

183 @derived_from(np.random.Generator, skipblocks=1)

184 def gumbel(self, loc=0.0, scale=1.0, size=None, chunks="auto", **kwargs):

185 return _wrap_func(

186 self, "gumbel", loc, scale, size=size, chunks=chunks, **kwargs

187 )

188

189 @derived_from(np.random.Generator, skipblocks=1)

190 def hypergeometric(self, ngood, nbad, nsample, size=None, chunks="auto", **kwargs):

191 return _wrap_func(

192 self,

193 "hypergeometric",

194 ngood,

195 nbad,

196 nsample,

197 size=size,

198 chunks=chunks,

199 **kwargs,

200 )

201

202 @derived_from(np.random.Generator, skipblocks=1)

203 def integers(

204 self,

205 low,

206 high=None,

207 size=None,

208 dtype=np.int64,

209 endpoint=False,

210 chunks="auto",

211 **kwargs,

212 ):

213 return _wrap_func(

214 self,

215 "integers",

216 low,

217 high=high,

218 size=size,

219 dtype=dtype,

220 endpoint=endpoint,

221 chunks=chunks,

222 **kwargs,

223 )

224

225 @derived_from(np.random.Generator, skipblocks=1)

226 def laplace(self, loc=0.0, scale=1.0, size=None, chunks="auto", **kwargs):

227 return _wrap_func(

228 self, "laplace", loc, scale, size=size, chunks=chunks, **kwargs

229 )

230

231 @derived_from(np.random.Generator, skipblocks=1)

232 def logistic(self, loc=0.0, scale=1.0, size=None, chunks="auto", **kwargs):

233 return _wrap_func(

234 self, "logistic", loc, scale, size=size, chunks=chunks, **kwargs

235 )

236

237 @derived_from(np.random.Generator, skipblocks=1)

238 def lognormal(self, mean=0.0, sigma=1.0, size=None, chunks="auto", **kwargs):

239 return _wrap_func(

240 self, "lognormal", mean, sigma, size=size, chunks=chunks, **kwargs

241 )

242

243 @derived_from(np.random.Generator, skipblocks=1)

244 def logseries(self, p, size=None, chunks="auto", **kwargs):

245 return _wrap_func(self, "logseries", p, size=size, chunks=chunks, **kwargs)

246

247 @derived_from(np.random.Generator, skipblocks=1)

248 def multinomial(self, n, pvals, size=None, chunks="auto", **kwargs):

249 return _wrap_func(

250 self,

251 "multinomial",

252 n,

253 pvals,

254 size=size,

255 chunks=chunks,

256 extra_chunks=((len(pvals),),),

257 **kwargs,

258 )

259

260 @derived_from(np.random.Generator, skipblocks=1)

261 def multivariate_hypergeometric(

262 self, colors, nsample, size=None, method="marginals", chunks="auto", **kwargs

263 ):

264 return _wrap_func(

265 self,

266 "multivariate_hypergeometric",

267 colors,

268 nsample,

269 size=size,

270 method=method,

271 chunks=chunks,

272 **kwargs,

273 )

274

275 @derived_from(np.random.Generator, skipblocks=1)

276 def negative_binomial(self, n, p, size=None, chunks="auto", **kwargs):

277 return _wrap_func(

278 self, "negative_binomial", n, p, size=size, chunks=chunks, **kwargs

279 )

280

281 @derived_from(np.random.Generator, skipblocks=1)

282 def noncentral_chisquare(self, df, nonc, size=None, chunks="auto", **kwargs):

283 return _wrap_func(

284 self, "noncentral_chisquare", df, nonc, size=size, chunks=chunks, **kwargs

285 )

286

287 @derived_from(np.random.Generator, skipblocks=1)

288 def noncentral_f(self, dfnum, dfden, nonc, size=None, chunks="auto", **kwargs):

289 return _wrap_func(

290 self, "noncentral_f", dfnum, dfden, nonc, size=size, chunks=chunks, **kwargs

291 )

292

293 @derived_from(np.random.Generator, skipblocks=1)

294 def normal(self, loc=0.0, scale=1.0, size=None, chunks="auto", **kwargs):

295 return _wrap_func(

296 self, "normal", loc, scale, size=size, chunks=chunks, **kwargs

297 )

298

299 @derived_from(np.random.Generator, skipblocks=1)

300 def pareto(self, a, size=None, chunks="auto", **kwargs):

301 return _wrap_func(self, "pareto", a, size=size, chunks=chunks, **kwargs)

302

303 @derived_from(np.random.Generator, skipblocks=1)

304 def permutation(self, x):

305 from dask.array.slicing import shuffle_slice

306

307 if self._backend_name == "cupy":

308 raise NotImplementedError(

309 "`Generator.permutation` not supported for cupy-backed "

310 "Generator objects. Use the 'numpy' array backend to "

311 "call `dask.array.random.default_rng`, or pass in "

312 " `numpy.random.PCG64()`."

313 )

314

315 if isinstance(x, numbers.Number):

316 x = arange(x, chunks="auto")

317

318 index = self._backend.arange(len(x))

319 _shuffle(self._bit_generator, index)

320 return shuffle_slice(x, index)

321

322 @derived_from(np.random.Generator, skipblocks=1)

323 def poisson(self, lam=1.0, size=None, chunks="auto", **kwargs):

324 return _wrap_func(self, "poisson", lam, size=size, chunks=chunks, **kwargs)

325

326 @derived_from(np.random.Generator, skipblocks=1)

327 def power(self, a, size=None, chunks="auto", **kwargs):

328 return _wrap_func(self, "power", a, size=size, chunks=chunks, **kwargs)

329

330 @derived_from(np.random.Generator, skipblocks=1)

331 def random(self, size=None, dtype=np.float64, out=None, chunks="auto", **kwargs):

332 return _wrap_func(

333 self, "random", size=size, dtype=dtype, out=out, chunks=chunks, **kwargs

334 )

335

336 @derived_from(np.random.Generator, skipblocks=1)

337 def rayleigh(self, scale=1.0, size=None, chunks="auto", **kwargs):

338 return _wrap_func(self, "rayleigh", scale, size=size, chunks=chunks, **kwargs)

339

340 @derived_from(np.random.Generator, skipblocks=1)

341 def standard_cauchy(self, size=None, chunks="auto", **kwargs):

342 return _wrap_func(self, "standard_cauchy", size=size, chunks=chunks, **kwargs)

343

344 @derived_from(np.random.Generator, skipblocks=1)

345 def standard_exponential(self, size=None, chunks="auto", **kwargs):

346 return _wrap_func(

347 self, "standard_exponential", size=size, chunks=chunks, **kwargs

348 )

349

350 @derived_from(np.random.Generator, skipblocks=1)

351 def standard_gamma(self, shape, size=None, chunks="auto", **kwargs):

352 return _wrap_func(

353 self, "standard_gamma", shape, size=size, chunks=chunks, **kwargs

354 )

355

356 @derived_from(np.random.Generator, skipblocks=1)

357 def standard_normal(self, size=None, chunks="auto", **kwargs):

358 return _wrap_func(self, "standard_normal", size=size, chunks=chunks, **kwargs)

359

360 @derived_from(np.random.Generator, skipblocks=1)

361 def standard_t(self, df, size=None, chunks="auto", **kwargs):

362 return _wrap_func(self, "standard_t", df, size=size, chunks=chunks, **kwargs)

363

364 @derived_from(np.random.Generator, skipblocks=1)

365 def triangular(self, left, mode, right, size=None, chunks="auto", **kwargs):

366 return _wrap_func(

367 self, "triangular", left, mode, right, size=size, chunks=chunks, **kwargs

368 )

369

370 @derived_from(np.random.Generator, skipblocks=1)

371 def uniform(self, low=0.0, high=1.0, size=None, chunks="auto", **kwargs):

372 return _wrap_func(

373 self, "uniform", low, high, size=size, chunks=chunks, **kwargs

374 )

375

376 @derived_from(np.random.Generator, skipblocks=1)

377 def vonmises(self, mu, kappa, size=None, chunks="auto", **kwargs):

378 return _wrap_func(

379 self, "vonmises", mu, kappa, size=size, chunks=chunks, **kwargs

380 )

381

382 @derived_from(np.random.Generator, skipblocks=1)

383 def wald(self, mean, scale, size=None, chunks="auto", **kwargs):

384 return _wrap_func(self, "wald", mean, scale, size=size, chunks=chunks, **kwargs)

385

386 @derived_from(np.random.Generator, skipblocks=1)

387 def weibull(self, a, size=None, chunks="auto", **kwargs):

388 return _wrap_func(self, "weibull", a, size=size, chunks=chunks, **kwargs)

389

390 @derived_from(np.random.Generator, skipblocks=1)

391 def zipf(self, a, size=None, chunks="auto", **kwargs):

392 return _wrap_func(self, "zipf", a, size=size, chunks=chunks, **kwargs)

393

394

395def default_rng(seed=None):

396 """

397 Construct a new Generator with the default BitGenerator (PCG64).

398

399 Parameters

400 ----------

401 seed : {None, int, array_like[ints], SeedSequence, BitGenerator, Generator}, optional

402 A seed to initialize the `BitGenerator`. If None, then fresh,

403 unpredictable entropy will be pulled from the OS. If an ``int`` or

404 ``array_like[ints]`` is passed, then it will be passed to

405 `SeedSequence` to derive the initial `BitGenerator` state. One may

406 also pass in a `SeedSequence` instance.

407 Additionally, when passed a `BitGenerator`, it will be wrapped by

408 `Generator`. If passed a `Generator`, it will be returned unaltered.

409

410 Returns

411 -------

412 Generator

413 The initialized generator object.

414

415 Notes

416 -----

417 If ``seed`` is not a `BitGenerator` or a `Generator`, a new

418 `BitGenerator` is instantiated. This function does not manage a default

419 global instance.

420

421 Examples

422 --------

423 ``default_rng`` is the recommended constructor for the random number

424 class ``Generator``. Here are several ways we can construct a random

425 number generator using ``default_rng`` and the ``Generator`` class.

426

427 Here we use ``default_rng`` to generate a random float:

428

429 >>> import dask.array as da

430 >>> rng = da.random.default_rng(12345)

431 >>> print(rng)

432 Generator(PCG64)

433 >>> rfloat = rng.random().compute()

434 >>> rfloat

435 array(0.86999885)

436 >>> type(rfloat)

437 <class 'numpy.ndarray'>

438

439 Here we use ``default_rng`` to generate 3 random integers between 0

440 (inclusive) and 10 (exclusive):

441

442 >>> import dask.array as da

443 >>> rng = da.random.default_rng(12345)

444 >>> rints = rng.integers(low=0, high=10, size=3).compute()

445 >>> rints

446 array([2, 8, 7])

447 >>> type(rints[0])

448 <class 'numpy.int64'>

449

450 Here we specify a seed so that we have reproducible results:

451

452 >>> import dask.array as da

453 >>> rng = da.random.default_rng(seed=42)

454 >>> print(rng)

455 Generator(PCG64)

456 >>> arr1 = rng.random((3, 3)).compute()

457 >>> arr1

458 array([[0.91674416, 0.91098667, 0.8765925 ],

459 [0.30931841, 0.95465607, 0.17509458],

460 [0.99662814, 0.75203348, 0.15038118]])

461

462 If we exit and restart our Python interpreter, we'll see that we

463 generate the same random numbers again:

464

465 >>> import dask.array as da

466 >>> rng = da.random.default_rng(seed=42)

467 >>> arr2 = rng.random((3, 3)).compute()

468 >>> arr2

469 array([[0.91674416, 0.91098667, 0.8765925 ],

470 [0.30931841, 0.95465607, 0.17509458],

471 [0.99662814, 0.75203348, 0.15038118]])

472

473 See Also

474 --------

475 np.random.default_rng

476 """

477 if hasattr(seed, "capsule"):

478 # We are passed a BitGenerator, so just wrap it

479 return Generator(seed)

480 elif isinstance(seed, Generator):

481 # Pass through a Generator

482 return seed

483 elif hasattr(seed, "bit_generator"):

484 # a Generator. Just not ours

485 return Generator(seed.bit_generator)

486 # Otherwise, use the backend-default BitGenerator

487 return Generator(array_creation_dispatch.default_bit_generator(seed))

488

489

490class RandomState:

491 """

492 Mersenne Twister pseudo-random number generator

493

494 This object contains state to deterministically generate pseudo-random

495 numbers from a variety of probability distributions. It is identical to

496 ``np.random.RandomState`` except that all functions also take a ``chunks=``

497 keyword argument.

498

499 Parameters

500 ----------

501 seed: Number

502 Object to pass to RandomState to serve as deterministic seed

503 RandomState: Callable[seed] -> RandomState

504 A callable that, when provided with a ``seed`` keyword provides an

505 object that operates identically to ``np.random.RandomState`` (the

506 default). This might also be a function that returns a

507 ``mkl_random``, or ``cupy.random.RandomState`` object.

508

509 Examples

510 --------

511 >>> import dask.array as da

512 >>> state = da.random.RandomState(1234) # a seed

513 >>> x = state.normal(10, 0.1, size=3, chunks=(2,))

514 >>> x.compute()

515 array([10.01867852, 10.04812289, 9.89649746])

516

517 See Also

518 --------

519 np.random.RandomState

520 """

521

522 def __init__(self, seed=None, RandomState=None):

523 self._numpy_state = np.random.RandomState(seed)

524 self._RandomState = (

525 array_creation_dispatch.RandomState if RandomState is None else RandomState

526 )

527

528 @property

529 def _backend(self):

530 # Assumes typename(self._RandomState) starts with

531 # an importable array-library name (e.g. "numpy" or "cupy")

532 _backend_name = typename(self._RandomState).split(".")[0]

533 return importlib.import_module(_backend_name)

534

535 def seed(self, seed=None):

536 self._numpy_state.seed(seed)

537

538 @derived_from(np.random.RandomState, skipblocks=1)

539 def beta(self, a, b, size=None, chunks="auto", **kwargs):

540 return _wrap_func(self, "beta", a, b, size=size, chunks=chunks, **kwargs)

541

542 @derived_from(np.random.RandomState, skipblocks=1)

543 def binomial(self, n, p, size=None, chunks="auto", **kwargs):

544 return _wrap_func(self, "binomial", n, p, size=size, chunks=chunks, **kwargs)

545

546 @derived_from(np.random.RandomState, skipblocks=1)

547 def chisquare(self, df, size=None, chunks="auto", **kwargs):

548 return _wrap_func(self, "chisquare", df, size=size, chunks=chunks, **kwargs)

549

550 with contextlib.suppress(AttributeError):

551

552 @derived_from(np.random.RandomState, skipblocks=1)

553 def choice(self, a, size=None, replace=True, p=None, chunks="auto"):

554 (

555 a,

556 size,

557 replace,

558 p,

559 axis, # np.random.RandomState.choice does not use axis

560 chunks,

561 meta,

562 dependencies,

563 ) = _choice_validate_params(self, a, size, replace, p, 0, chunks)

564

565 sizes = list(product(*chunks))

566 state_data = random_state_data(len(sizes), self._numpy_state)

567

568 name = "da.random.choice-%s" % tokenize(

569 state_data, size, chunks, a, replace, p

570 )

571 keys = product([name], *(range(len(bd)) for bd in chunks))

572 dsk = {

573 k: (_choice_rs, state, a, size, replace, p)

574 for k, state, size in zip(keys, state_data, sizes)

575 }

576

577 graph = HighLevelGraph.from_collections(

578 name, dsk, dependencies=dependencies

579 )

580 return Array(graph, name, chunks, meta=meta)

581

582 @derived_from(np.random.RandomState, skipblocks=1)

583 def exponential(self, scale=1.0, size=None, chunks="auto", **kwargs):

584 return _wrap_func(

585 self, "exponential", scale, size=size, chunks=chunks, **kwargs

586 )

587

588 @derived_from(np.random.RandomState, skipblocks=1)

589 def f(self, dfnum, dfden, size=None, chunks="auto", **kwargs):

590 return _wrap_func(self, "f", dfnum, dfden, size=size, chunks=chunks, **kwargs)

591

592 @derived_from(np.random.RandomState, skipblocks=1)

593 def gamma(self, shape, scale=1.0, size=None, chunks="auto", **kwargs):

594 return _wrap_func(

595 self, "gamma", shape, scale, size=size, chunks=chunks, **kwargs

596 )

597

598 @derived_from(np.random.RandomState, skipblocks=1)

599 def geometric(self, p, size=None, chunks="auto", **kwargs):

600 return _wrap_func(self, "geometric", p, size=size, chunks=chunks, **kwargs)

601

602 @derived_from(np.random.RandomState, skipblocks=1)

603 def gumbel(self, loc=0.0, scale=1.0, size=None, chunks="auto", **kwargs):

604 return _wrap_func(

605 self, "gumbel", loc, scale, size=size, chunks=chunks, **kwargs

606 )

607

608 @derived_from(np.random.RandomState, skipblocks=1)

609 def hypergeometric(self, ngood, nbad, nsample, size=None, chunks="auto", **kwargs):

610 return _wrap_func(

611 self,

612 "hypergeometric",

613 ngood,

614 nbad,

615 nsample,

616 size=size,

617 chunks=chunks,

618 **kwargs,

619 )

620

621 @derived_from(np.random.RandomState, skipblocks=1)

622 def laplace(self, loc=0.0, scale=1.0, size=None, chunks="auto", **kwargs):

623 return _wrap_func(

624 self, "laplace", loc, scale, size=size, chunks=chunks, **kwargs

625 )

626

627 @derived_from(np.random.RandomState, skipblocks=1)

628 def logistic(self, loc=0.0, scale=1.0, size=None, chunks="auto", **kwargs):

629 return _wrap_func(

630 self, "logistic", loc, scale, size=size, chunks=chunks, **kwargs

631 )

632

633 @derived_from(np.random.RandomState, skipblocks=1)

634 def lognormal(self, mean=0.0, sigma=1.0, size=None, chunks="auto", **kwargs):

635 return _wrap_func(

636 self, "lognormal", mean, sigma, size=size, chunks=chunks, **kwargs

637 )

638

639 @derived_from(np.random.RandomState, skipblocks=1)

640 def logseries(self, p, size=None, chunks="auto", **kwargs):

641 return _wrap_func(self, "logseries", p, size=size, chunks=chunks, **kwargs)

642

643 @derived_from(np.random.RandomState, skipblocks=1)

644 def multinomial(self, n, pvals, size=None, chunks="auto", **kwargs):

645 return _wrap_func(

646 self,

647 "multinomial",

648 n,

649 pvals,

650 size=size,

651 chunks=chunks,

652 extra_chunks=((len(pvals),),),

653 **kwargs,

654 )

655

656 @derived_from(np.random.RandomState, skipblocks=1)

657 def negative_binomial(self, n, p, size=None, chunks="auto", **kwargs):

658 return _wrap_func(

659 self, "negative_binomial", n, p, size=size, chunks=chunks, **kwargs

660 )

661

662 @derived_from(np.random.RandomState, skipblocks=1)

663 def noncentral_chisquare(self, df, nonc, size=None, chunks="auto", **kwargs):

664 return _wrap_func(

665 self, "noncentral_chisquare", df, nonc, size=size, chunks=chunks, **kwargs

666 )

667

668 @derived_from(np.random.RandomState, skipblocks=1)

669 def noncentral_f(self, dfnum, dfden, nonc, size=None, chunks="auto", **kwargs):

670 return _wrap_func(

671 self, "noncentral_f", dfnum, dfden, nonc, size=size, chunks=chunks, **kwargs

672 )

673

674 @derived_from(np.random.RandomState, skipblocks=1)

675 def normal(self, loc=0.0, scale=1.0, size=None, chunks="auto", **kwargs):

676 return _wrap_func(

677 self, "normal", loc, scale, size=size, chunks=chunks, **kwargs

678 )

679

680 @derived_from(np.random.RandomState, skipblocks=1)

681 def pareto(self, a, size=None, chunks="auto", **kwargs):

682 return _wrap_func(self, "pareto", a, size=size, chunks=chunks, **kwargs)

683

684 @derived_from(np.random.RandomState, skipblocks=1)

685 def permutation(self, x):

686 from dask.array.slicing import shuffle_slice

687

688 if isinstance(x, numbers.Number):

689 x = arange(x, chunks="auto")

690

691 index = np.arange(len(x))

692 self._numpy_state.shuffle(index)

693 return shuffle_slice(x, index)

694

695 @derived_from(np.random.RandomState, skipblocks=1)

696 def poisson(self, lam=1.0, size=None, chunks="auto", **kwargs):

697 return _wrap_func(self, "poisson", lam, size=size, chunks=chunks, **kwargs)

698

699 @derived_from(np.random.RandomState, skipblocks=1)

700 def power(self, a, size=None, chunks="auto", **kwargs):

701 return _wrap_func(self, "power", a, size=size, chunks=chunks, **kwargs)

702

703 @derived_from(np.random.RandomState, skipblocks=1)

704 def randint(self, low, high=None, size=None, chunks="auto", dtype="l", **kwargs):

705 return _wrap_func(

706 self, "randint", low, high, size=size, chunks=chunks, dtype=dtype, **kwargs

707 )

708

709 @derived_from(np.random.RandomState, skipblocks=1)

710 def random_integers(self, low, high=None, size=None, chunks="auto", **kwargs):

711 return _wrap_func(

712 self, "random_integers", low, high, size=size, chunks=chunks, **kwargs

713 )

714

715 @derived_from(np.random.RandomState, skipblocks=1)

716 def random_sample(self, size=None, chunks="auto", **kwargs):

717 return _wrap_func(self, "random_sample", size=size, chunks=chunks, **kwargs)

718

719 random = random_sample

720

721 @derived_from(np.random.RandomState, skipblocks=1)

722 def rayleigh(self, scale=1.0, size=None, chunks="auto", **kwargs):

723 return _wrap_func(self, "rayleigh", scale, size=size, chunks=chunks, **kwargs)

724

725 @derived_from(np.random.RandomState, skipblocks=1)

726 def standard_cauchy(self, size=None, chunks="auto", **kwargs):

727 return _wrap_func(self, "standard_cauchy", size=size, chunks=chunks, **kwargs)

728

729 @derived_from(np.random.RandomState, skipblocks=1)

730 def standard_exponential(self, size=None, chunks="auto", **kwargs):

731 return _wrap_func(

732 self, "standard_exponential", size=size, chunks=chunks, **kwargs

733 )

734

735 @derived_from(np.random.RandomState, skipblocks=1)

736 def standard_gamma(self, shape, size=None, chunks="auto", **kwargs):

737 return _wrap_func(

738 self, "standard_gamma", shape, size=size, chunks=chunks, **kwargs

739 )

740

741 @derived_from(np.random.RandomState, skipblocks=1)

742 def standard_normal(self, size=None, chunks="auto", **kwargs):

743 return _wrap_func(self, "standard_normal", size=size, chunks=chunks, **kwargs)

744

745 @derived_from(np.random.RandomState, skipblocks=1)

746 def standard_t(self, df, size=None, chunks="auto", **kwargs):

747 return _wrap_func(self, "standard_t", df, size=size, chunks=chunks, **kwargs)

748

749 @derived_from(np.random.RandomState, skipblocks=1)

750 def tomaxint(self, size=None, chunks="auto", **kwargs):

751 return _wrap_func(self, "tomaxint", size=size, chunks=chunks, **kwargs)

752

753 @derived_from(np.random.RandomState, skipblocks=1)

754 def triangular(self, left, mode, right, size=None, chunks="auto", **kwargs):

755 return _wrap_func(

756 self, "triangular", left, mode, right, size=size, chunks=chunks, **kwargs

757 )

758

759 @derived_from(np.random.RandomState, skipblocks=1)

760 def uniform(self, low=0.0, high=1.0, size=None, chunks="auto", **kwargs):

761 return _wrap_func(

762 self, "uniform", low, high, size=size, chunks=chunks, **kwargs

763 )

764

765 @derived_from(np.random.RandomState, skipblocks=1)

766 def vonmises(self, mu, kappa, size=None, chunks="auto", **kwargs):

767 return _wrap_func(

768 self, "vonmises", mu, kappa, size=size, chunks=chunks, **kwargs

769 )

770

771 @derived_from(np.random.RandomState, skipblocks=1)

772 def wald(self, mean, scale, size=None, chunks="auto", **kwargs):

773 return _wrap_func(self, "wald", mean, scale, size=size, chunks=chunks, **kwargs)

774

775 @derived_from(np.random.RandomState, skipblocks=1)

776 def weibull(self, a, size=None, chunks="auto", **kwargs):

777 return _wrap_func(self, "weibull", a, size=size, chunks=chunks, **kwargs)

778

779 @derived_from(np.random.RandomState, skipblocks=1)

780 def zipf(self, a, size=None, chunks="auto", **kwargs):

781 return _wrap_func(self, "zipf", a, size=size, chunks=chunks, **kwargs)

782

783

784def _rng_from_bitgen(bitgen):

785 # Assumes typename(bitgen) starts with importable

786 # library name (e.g. "numpy" or "cupy")

787 backend_name = typename(bitgen).split(".")[0]

788 backend_lib = importlib.import_module(backend_name)

789 return backend_lib.random.default_rng(bitgen)

790

791

792def _shuffle(bit_generator, x, axis=0):

793 state_data = bit_generator.state

794 bit_generator = type(bit_generator)()

795 bit_generator.state = state_data

796 state = _rng_from_bitgen(bit_generator)

797 return state.shuffle(x, axis=axis)

798

799

800def _spawn_bitgens(bitgen, n_bitgens):

801 seeds = bitgen._seed_seq.spawn(n_bitgens)

802 bitgens = [type(bitgen)(seed) for seed in seeds]

803 return bitgens

804

805

806def _apply_random_func(rng, funcname, bitgen, size, args, kwargs):

807 """Apply random module method with seed"""

808 if isinstance(bitgen, np.random.SeedSequence):

809 bitgen = rng(bitgen)

810 rng = _rng_from_bitgen(bitgen)

811 func = getattr(rng, funcname)

812 return func(*args, size=size, **kwargs)

813

814

815def _apply_random(RandomState, funcname, state_data, size, args, kwargs):

816 """Apply RandomState method with seed"""

817 if RandomState is None:

818 RandomState = array_creation_dispatch.RandomState

819 state = RandomState(state_data)

820 func = getattr(state, funcname)

821 return func(*args, size=size, **kwargs)

822

823

824def _choice_rng(state_data, a, size, replace, p, axis, shuffle):

825 state = _rng_from_bitgen(state_data)

826 return state.choice(a, size=size, replace=replace, p=p, axis=axis, shuffle=shuffle)

827

828

829def _choice_rs(state_data, a, size, replace, p):

830 state = array_creation_dispatch.RandomState(state_data)

831 return state.choice(a, size=size, replace=replace, p=p)

832

833

834def _choice_validate_params(state, a, size, replace, p, axis, chunks):

835 dependencies = []

836 # Normalize and validate `a`

837 if isinstance(a, Integral):

838 if isinstance(state, Generator):

839 if state._backend_name == "cupy":

840 raise NotImplementedError(

841 "`choice` not supported for cupy-backed `Generator`."

842 )

843 meta = state._backend.random.default_rng().choice(1, size=(), p=None)

844 elif isinstance(state, RandomState):

845 # On windows the output dtype differs if p is provided or

846 # # absent, see https://github.com/numpy/numpy/issues/9867

847 dummy_p = state._backend.array([1]) if p is not None else p

848 meta = state._backend.random.RandomState().choice(1, size=(), p=dummy_p)

849 else:

850 raise ValueError("Unknown generator class")

851 len_a = a

852 if a < 0:

853 raise ValueError("a must be greater than 0")

854 else:

855 a = asarray(a)

856 a = a.rechunk(a.shape)

857 meta = a._meta

858 if a.ndim != 1:

859 raise ValueError("a must be one dimensional")

860 len_a = len(a)

861 dependencies.append(a)

862 a = a.__dask_keys__()[0]

863

864 # Normalize and validate `p`

865 if p is not None:

866 if not isinstance(p, Array):

867 # If p is not a dask array, first check the sum is close

868 # to 1 before converting.

869 p = asarray_safe(p, like=p)

870 if not np.isclose(p.sum(), 1, rtol=1e-7, atol=0):

871 raise ValueError("probabilities do not sum to 1")

872 p = asarray(p)

873 else:

874 p = p.rechunk(p.shape)

875

876 if p.ndim != 1:

877 raise ValueError("p must be one dimensional")

878 if len(p) != len_a:

879 raise ValueError("a and p must have the same size")

880

881 dependencies.append(p)

882 p = p.__dask_keys__()[0]

883

884 if size is None:

885 size = ()

886 elif not isinstance(size, (tuple, list)):

887 size = (size,)

888

889 if axis != 0:

890 raise ValueError("axis must be 0 since a is one dimensional")

891

892 chunks = normalize_chunks(chunks, size, dtype=np.float64)

893 if not replace and len(chunks[0]) > 1:

894 err_msg = (

895 "replace=False is not currently supported for "

896 "dask.array.choice with multi-chunk output "

897 "arrays"

898 )

899 raise NotImplementedError(err_msg)

900

901 return a, size, replace, p, axis, chunks, meta, dependencies

902

903

904def _wrap_func(

905 rng, funcname, *args, size=None, chunks="auto", extra_chunks=(), **kwargs

906):

907 """Wrap numpy random function to produce dask.array random function

908 extra_chunks should be a chunks tuple to append to the end of chunks

909 """

910 if size is not None and not isinstance(size, (tuple, list)):

911 size = (size,)

912

913 shapes = list(

914 {

915 ar.shape

916 for ar in chain(args, kwargs.values())

917 if isinstance(ar, (Array, np.ndarray))

918 }

919 )

920 if size is not None:

921 shapes.append(size)

922 # broadcast to the final size(shape)

923 size = broadcast_shapes(*shapes)

924 chunks = normalize_chunks(

925 chunks,

926 size, # ideally would use dtype here

927 dtype=kwargs.get("dtype", np.float64),

928 )

929 slices = slices_from_chunks(chunks)

930

931 def _broadcast_any(ar, shape, chunks):

932 if isinstance(ar, Array):

933 return broadcast_to(ar, shape).rechunk(chunks)

934 elif isinstance(ar, np.ndarray):

935 return np.ascontiguousarray(np.broadcast_to(ar, shape))

936 else:

937 raise TypeError("Unknown object type for broadcast")

938

939 # Broadcast all arguments, get tiny versions as well

940 # Start adding the relevant bits to the graph

941 dsk = {}

942 lookup = {}

943 small_args = []

944 dependencies = []

945 for i, ar in enumerate(args):

946 if isinstance(ar, (np.ndarray, Array)):

947 res = _broadcast_any(ar, size, chunks)

948 if isinstance(res, Array):

949 dependencies.append(res)

950 lookup[i] = res.name

951 elif isinstance(res, np.ndarray):

952 name = f"array-{tokenize(res)}"

953 lookup[i] = name

954 dsk[name] = res

955 small_args.append(ar[tuple(0 for _ in ar.shape)])

956 else:

957 small_args.append(ar)

958

959 small_kwargs = {}

960 for key, ar in kwargs.items():

961 if isinstance(ar, (np.ndarray, Array)):

962 res = _broadcast_any(ar, size, chunks)

963 if isinstance(res, Array):

964 dependencies.append(res)

965 lookup[key] = res.name

966 elif isinstance(res, np.ndarray):

967 name = f"array-{tokenize(res)}"

968 lookup[key] = name

969 dsk[name] = res

970 small_kwargs[key] = ar[tuple(0 for _ in ar.shape)]

971 else:

972 small_kwargs[key] = ar

973

974 sizes = list(product(*chunks))

975 if isinstance(rng, Generator):

976 bitgens = _spawn_bitgens(rng._bit_generator, len(sizes))

977 bitgen_token = tokenize(bitgens)

978 bitgens = [_bitgen._seed_seq for _bitgen in bitgens]

979 func_applier = _apply_random_func

980 gen = type(rng._bit_generator)

981 elif isinstance(rng, RandomState):

982 bitgens = random_state_data(len(sizes), rng._numpy_state)

983 bitgen_token = tokenize(bitgens)

984 func_applier = _apply_random

985 gen = rng._RandomState

986 else:

987 raise TypeError("Unknown object type: Not a Generator and Not a RandomState")

988 token = tokenize(bitgen_token, size, chunks, args, kwargs)

989 name = f"{funcname}-{token}"

990

991 keys = product(

992 [name], *([range(len(bd)) for bd in chunks] + [[0]] * len(extra_chunks))

993 )

994 blocks = product(*[range(len(bd)) for bd in chunks])

995

996 vals = []

997 for bitgen, size, slc, block in zip(bitgens, sizes, slices, blocks):

998 arg = []

999 for i, ar in enumerate(args):

1000 if i not in lookup:

1001 arg.append(ar)

1002 else:

1003 if isinstance(ar, Array):

1004 arg.append((lookup[i],) + block)

1005 elif isinstance(ar, np.ndarray):

1006 arg.append((getitem, lookup[i], slc))

1007 else:

1008 raise TypeError("Unknown object type in args")

1009 kwrg = {}

1010 for k, ar in kwargs.items():

1011 if k not in lookup:

1012 kwrg[k] = ar

1013 else:

1014 if isinstance(ar, Array):

1015 kwrg[k] = (lookup[k],) + block

1016 elif isinstance(ar, np.ndarray):

1017 kwrg[k] = (getitem, lookup[k], slc)

1018 else:

1019 raise TypeError("Unknown object type in kwargs")

1020 vals.append((func_applier, gen, funcname, bitgen, size, arg, kwrg))

1021

1022 meta = func_applier(

1023 gen,

1024 funcname,

1025 bitgen,

1026 (0,) * len(size),

1027 small_args,

1028 small_kwargs,

1029 )

1030

1031 dsk.update(dict(zip(keys, vals)))

1032

1033 graph = HighLevelGraph.from_collections(name, dsk, dependencies=dependencies)

1034 return Array(graph, name, chunks + extra_chunks, meta=meta)

1035

1036

1037"""

1038Lazy RNG-state machinery

1039

1040Many of the RandomState methods are exported as functions in da.random for

1041backward compatibility reasons. Their usage is discouraged.

1042Use da.random.default_rng() to get a Generator based rng and use its

1043methods instead.

1044"""

1045

1046_cached_states: dict[str, RandomState] = {}

1047_cached_states_lock = Lock()

1048

1049

1050def _make_api(attr):

1051 def wrapper(*args, **kwargs):

1052 key = array_creation_dispatch.backend

1053 with _cached_states_lock:

1054 try:

1055 state = _cached_states[key]

1056 except KeyError:

1057 _cached_states[key] = state = RandomState()

1058 return getattr(state, attr)(*args, **kwargs)

1059

1060 wrapper.__name__ = getattr(RandomState, attr).__name__

1061 wrapper.__doc__ = getattr(RandomState, attr).__doc__

1062 return wrapper

1063

1064

1065"""

1066RandomState only

1067"""

1068

1069seed = _make_api("seed")

1070

1071beta = _make_api("beta")

1072binomial = _make_api("binomial")

1073chisquare = _make_api("chisquare")

1074choice = _make_api("choice")

1075exponential = _make_api("exponential")

1076f = _make_api("f")

1077gamma = _make_api("gamma")

1078geometric = _make_api("geometric")

1079gumbel = _make_api("gumbel")

1080hypergeometric = _make_api("hypergeometric")

1081laplace = _make_api("laplace")

1082logistic = _make_api("logistic")

1083lognormal = _make_api("lognormal")

1084logseries = _make_api("logseries")

1085multinomial = _make_api("multinomial")

1086negative_binomial = _make_api("negative_binomial")

1087noncentral_chisquare = _make_api("noncentral_chisquare")

1088noncentral_f = _make_api("noncentral_f")

1089normal = _make_api("normal")

1090pareto = _make_api("pareto")

1091permutation = _make_api("permutation")

1092poisson = _make_api("poisson")

1093power = _make_api("power")

1094random_sample = _make_api("random_sample")

1095random = _make_api("random_sample")

1096randint = _make_api("randint")

1097random_integers = _make_api("random_integers")

1098rayleigh = _make_api("rayleigh")

1099standard_cauchy = _make_api("standard_cauchy")

1100standard_exponential = _make_api("standard_exponential")

1101standard_gamma = _make_api("standard_gamma")

1102standard_normal = _make_api("standard_normal")

1103standard_t = _make_api("standard_t")

1104triangular = _make_api("triangular")

1105uniform = _make_api("uniform")

1106vonmises = _make_api("vonmises")

1107wald = _make_api("wald")

1108weibull = _make_api("weibull")

1109zipf = _make_api("zipf")