Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.9/dist-packages/dask/array/random.py: 1%
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1from __future__ import annotations
3import contextlib
4import importlib
5import numbers
6from itertools import chain, product
7from numbers import Integral
8from operator import getitem
9from threading import Lock
11import numpy as np
13from dask.array.backends import array_creation_dispatch
14from dask.array.core import (
15 Array,
16 asarray,
17 broadcast_shapes,
18 broadcast_to,
19 normalize_chunks,
20 slices_from_chunks,
21)
22from dask.array.creation import arange
23from dask.array.utils import asarray_safe
24from dask.base import tokenize
25from dask.highlevelgraph import HighLevelGraph
26from dask.utils import derived_from, random_state_data, typename
29class Generator:
30 """
31 Container for the BitGenerators.
33 ``Generator`` exposes a number of methods for generating random
34 numbers drawn from a variety of probability distributions and serves
35 as a replacement for ``RandomState``. The main difference between the
36 two is that ``Generator`` relies on an additional ``BitGenerator`` to
37 manage state and generate the random bits, which are then transformed
38 into random values from useful distributions. The default ``BitGenerator``
39 used by ``Generator`` is ``PCG64``. The ``BitGenerator`` can be changed
40 by passing an instantiated ``BitGenerator`` to ``Generator``.
42 The function :func:`dask.array.random.default_rng` is the recommended way
43 to instantiate a ``Generator``.
45 .. warning::
47 No Compatibility Guarantee.
49 ``Generator`` does not provide a version compatibility guarantee. In
50 particular, as better algorithms evolve the bit stream may change.
52 Parameters
53 ----------
54 bit_generator : BitGenerator
55 BitGenerator to use as the core generator.
57 Notes
58 -----
59 In addition to the distribution-specific arguments, each ``Generator``
60 method takes a keyword argument `size` that defaults to ``None``. If
61 `size` is ``None``, then a single value is generated and returned. If
62 `size` is an integer, then a 1-D array filled with generated values is
63 returned. If `size` is a tuple, then an array with that shape is
64 filled and returned.
66 The Python stdlib module `random` contains pseudo-random number generator
67 with a number of methods that are similar to the ones available in
68 ``Generator``. It uses Mersenne Twister, and this bit generator can
69 be accessed using ``MT19937``. ``Generator``, besides being
70 Dask-aware, has the advantage that it provides a much larger number
71 of probability distributions to choose from.
73 All ``Generator`` methods are identical to ``np.random.Generator`` except
74 that they also take a `chunks=` keyword argument.
76 ``Generator`` does not guarantee parity in the generated numbers
77 with any third party library. In particular, numbers generated by
78 `Dask` and `NumPy` will differ even if they use the same seed.
80 Examples
81 --------
82 >>> from numpy.random import PCG64
83 >>> from dask.array.random import Generator
84 >>> rng = Generator(PCG64())
85 >>> rng.standard_normal().compute() # doctest: +SKIP
86 array(0.44595957) # random
88 See Also
89 --------
90 default_rng : Recommended constructor for `Generator`.
91 np.random.Generator
92 """
94 def __init__(self, bit_generator):
95 self._bit_generator = bit_generator
97 def __str__(self):
98 _str = self.__class__.__name__
99 _str += "(" + self._bit_generator.__class__.__name__ + ")"
100 return _str
102 @property
103 def _backend_name(self):
104 # Assumes typename(self._RandomState) starts with an
105 # array-library name (e.g. "numpy" or "cupy")
106 return typename(self._bit_generator).split(".")[0]
108 @property
109 def _backend(self):
110 # Assumes `self._backend_name` is an importable
111 # array-library name (e.g. "numpy" or "cupy")
112 return importlib.import_module(self._backend_name)
114 @derived_from(np.random.Generator, skipblocks=1)
115 def beta(self, a, b, size=None, chunks="auto", **kwargs):
116 return _wrap_func(self, "beta", a, b, size=size, chunks=chunks, **kwargs)
118 @derived_from(np.random.Generator, skipblocks=1)
119 def binomial(self, n, p, size=None, chunks="auto", **kwargs):
120 return _wrap_func(self, "binomial", n, p, size=size, chunks=chunks, **kwargs)
122 @derived_from(np.random.Generator, skipblocks=1)
123 def chisquare(self, df, size=None, chunks="auto", **kwargs):
124 return _wrap_func(self, "chisquare", df, size=size, chunks=chunks, **kwargs)
126 @derived_from(np.random.Generator, skipblocks=1)
127 def choice(
128 self,
129 a,
130 size=None,
131 replace=True,
132 p=None,
133 axis=0,
134 shuffle=True,
135 chunks="auto",
136 ):
137 (
138 a,
139 size,
140 replace,
141 p,
142 axis,
143 chunks,
144 meta,
145 dependencies,
146 ) = _choice_validate_params(self, a, size, replace, p, axis, chunks)
148 sizes = list(product(*chunks))
149 bitgens = _spawn_bitgens(self._bit_generator, len(sizes))
151 name = "da.random.choice-%s" % tokenize(
152 bitgens, size, chunks, a, replace, p, axis, shuffle
153 )
154 keys = product([name], *(range(len(bd)) for bd in chunks))
155 dsk = {
156 k: (_choice_rng, bitgen, a, size, replace, p, axis, shuffle)
157 for k, bitgen, size in zip(keys, bitgens, sizes)
158 }
160 graph = HighLevelGraph.from_collections(name, dsk, dependencies=dependencies)
161 return Array(graph, name, chunks, meta=meta)
163 @derived_from(np.random.Generator, skipblocks=1)
164 def exponential(self, scale=1.0, size=None, chunks="auto", **kwargs):
165 return _wrap_func(
166 self, "exponential", scale, size=size, chunks=chunks, **kwargs
167 )
169 @derived_from(np.random.Generator, skipblocks=1)
170 def f(self, dfnum, dfden, size=None, chunks="auto", **kwargs):
171 return _wrap_func(self, "f", dfnum, dfden, size=size, chunks=chunks, **kwargs)
173 @derived_from(np.random.Generator, skipblocks=1)
174 def gamma(self, shape, scale=1.0, size=None, chunks="auto", **kwargs):
175 return _wrap_func(
176 self, "gamma", shape, scale, size=size, chunks=chunks, **kwargs
177 )
179 @derived_from(np.random.Generator, skipblocks=1)
180 def geometric(self, p, size=None, chunks="auto", **kwargs):
181 return _wrap_func(self, "geometric", p, size=size, chunks=chunks, **kwargs)
183 @derived_from(np.random.Generator, skipblocks=1)
184 def gumbel(self, loc=0.0, scale=1.0, size=None, chunks="auto", **kwargs):
185 return _wrap_func(
186 self, "gumbel", loc, scale, size=size, chunks=chunks, **kwargs
187 )
189 @derived_from(np.random.Generator, skipblocks=1)
190 def hypergeometric(self, ngood, nbad, nsample, size=None, chunks="auto", **kwargs):
191 return _wrap_func(
192 self,
193 "hypergeometric",
194 ngood,
195 nbad,
196 nsample,
197 size=size,
198 chunks=chunks,
199 **kwargs,
200 )
202 @derived_from(np.random.Generator, skipblocks=1)
203 def integers(
204 self,
205 low,
206 high=None,
207 size=None,
208 dtype=np.int64,
209 endpoint=False,
210 chunks="auto",
211 **kwargs,
212 ):
213 return _wrap_func(
214 self,
215 "integers",
216 low,
217 high=high,
218 size=size,
219 dtype=dtype,
220 endpoint=endpoint,
221 chunks=chunks,
222 **kwargs,
223 )
225 @derived_from(np.random.Generator, skipblocks=1)
226 def laplace(self, loc=0.0, scale=1.0, size=None, chunks="auto", **kwargs):
227 return _wrap_func(
228 self, "laplace", loc, scale, size=size, chunks=chunks, **kwargs
229 )
231 @derived_from(np.random.Generator, skipblocks=1)
232 def logistic(self, loc=0.0, scale=1.0, size=None, chunks="auto", **kwargs):
233 return _wrap_func(
234 self, "logistic", loc, scale, size=size, chunks=chunks, **kwargs
235 )
237 @derived_from(np.random.Generator, skipblocks=1)
238 def lognormal(self, mean=0.0, sigma=1.0, size=None, chunks="auto", **kwargs):
239 return _wrap_func(
240 self, "lognormal", mean, sigma, size=size, chunks=chunks, **kwargs
241 )
243 @derived_from(np.random.Generator, skipblocks=1)
244 def logseries(self, p, size=None, chunks="auto", **kwargs):
245 return _wrap_func(self, "logseries", p, size=size, chunks=chunks, **kwargs)
247 @derived_from(np.random.Generator, skipblocks=1)
248 def multinomial(self, n, pvals, size=None, chunks="auto", **kwargs):
249 return _wrap_func(
250 self,
251 "multinomial",
252 n,
253 pvals,
254 size=size,
255 chunks=chunks,
256 extra_chunks=((len(pvals),),),
257 **kwargs,
258 )
260 @derived_from(np.random.Generator, skipblocks=1)
261 def multivariate_hypergeometric(
262 self, colors, nsample, size=None, method="marginals", chunks="auto", **kwargs
263 ):
264 return _wrap_func(
265 self,
266 "multivariate_hypergeometric",
267 colors,
268 nsample,
269 size=size,
270 method=method,
271 chunks=chunks,
272 **kwargs,
273 )
275 @derived_from(np.random.Generator, skipblocks=1)
276 def negative_binomial(self, n, p, size=None, chunks="auto", **kwargs):
277 return _wrap_func(
278 self, "negative_binomial", n, p, size=size, chunks=chunks, **kwargs
279 )
281 @derived_from(np.random.Generator, skipblocks=1)
282 def noncentral_chisquare(self, df, nonc, size=None, chunks="auto", **kwargs):
283 return _wrap_func(
284 self, "noncentral_chisquare", df, nonc, size=size, chunks=chunks, **kwargs
285 )
287 @derived_from(np.random.Generator, skipblocks=1)
288 def noncentral_f(self, dfnum, dfden, nonc, size=None, chunks="auto", **kwargs):
289 return _wrap_func(
290 self, "noncentral_f", dfnum, dfden, nonc, size=size, chunks=chunks, **kwargs
291 )
293 @derived_from(np.random.Generator, skipblocks=1)
294 def normal(self, loc=0.0, scale=1.0, size=None, chunks="auto", **kwargs):
295 return _wrap_func(
296 self, "normal", loc, scale, size=size, chunks=chunks, **kwargs
297 )
299 @derived_from(np.random.Generator, skipblocks=1)
300 def pareto(self, a, size=None, chunks="auto", **kwargs):
301 return _wrap_func(self, "pareto", a, size=size, chunks=chunks, **kwargs)
303 @derived_from(np.random.Generator, skipblocks=1)
304 def permutation(self, x):
305 from dask.array.slicing import shuffle_slice
307 if self._backend_name == "cupy":
308 raise NotImplementedError(
309 "`Generator.permutation` not supported for cupy-backed "
310 "Generator objects. Use the 'numpy' array backend to "
311 "call `dask.array.random.default_rng`, or pass in "
312 " `numpy.random.PCG64()`."
313 )
315 if isinstance(x, numbers.Number):
316 x = arange(x, chunks="auto")
318 index = self._backend.arange(len(x))
319 _shuffle(self._bit_generator, index)
320 return shuffle_slice(x, index)
322 @derived_from(np.random.Generator, skipblocks=1)
323 def poisson(self, lam=1.0, size=None, chunks="auto", **kwargs):
324 return _wrap_func(self, "poisson", lam, size=size, chunks=chunks, **kwargs)
326 @derived_from(np.random.Generator, skipblocks=1)
327 def power(self, a, size=None, chunks="auto", **kwargs):
328 return _wrap_func(self, "power", a, size=size, chunks=chunks, **kwargs)
330 @derived_from(np.random.Generator, skipblocks=1)
331 def random(self, size=None, dtype=np.float64, out=None, chunks="auto", **kwargs):
332 return _wrap_func(
333 self, "random", size=size, dtype=dtype, out=out, chunks=chunks, **kwargs
334 )
336 @derived_from(np.random.Generator, skipblocks=1)
337 def rayleigh(self, scale=1.0, size=None, chunks="auto", **kwargs):
338 return _wrap_func(self, "rayleigh", scale, size=size, chunks=chunks, **kwargs)
340 @derived_from(np.random.Generator, skipblocks=1)
341 def standard_cauchy(self, size=None, chunks="auto", **kwargs):
342 return _wrap_func(self, "standard_cauchy", size=size, chunks=chunks, **kwargs)
344 @derived_from(np.random.Generator, skipblocks=1)
345 def standard_exponential(self, size=None, chunks="auto", **kwargs):
346 return _wrap_func(
347 self, "standard_exponential", size=size, chunks=chunks, **kwargs
348 )
350 @derived_from(np.random.Generator, skipblocks=1)
351 def standard_gamma(self, shape, size=None, chunks="auto", **kwargs):
352 return _wrap_func(
353 self, "standard_gamma", shape, size=size, chunks=chunks, **kwargs
354 )
356 @derived_from(np.random.Generator, skipblocks=1)
357 def standard_normal(self, size=None, chunks="auto", **kwargs):
358 return _wrap_func(self, "standard_normal", size=size, chunks=chunks, **kwargs)
360 @derived_from(np.random.Generator, skipblocks=1)
361 def standard_t(self, df, size=None, chunks="auto", **kwargs):
362 return _wrap_func(self, "standard_t", df, size=size, chunks=chunks, **kwargs)
364 @derived_from(np.random.Generator, skipblocks=1)
365 def triangular(self, left, mode, right, size=None, chunks="auto", **kwargs):
366 return _wrap_func(
367 self, "triangular", left, mode, right, size=size, chunks=chunks, **kwargs
368 )
370 @derived_from(np.random.Generator, skipblocks=1)
371 def uniform(self, low=0.0, high=1.0, size=None, chunks="auto", **kwargs):
372 return _wrap_func(
373 self, "uniform", low, high, size=size, chunks=chunks, **kwargs
374 )
376 @derived_from(np.random.Generator, skipblocks=1)
377 def vonmises(self, mu, kappa, size=None, chunks="auto", **kwargs):
378 return _wrap_func(
379 self, "vonmises", mu, kappa, size=size, chunks=chunks, **kwargs
380 )
382 @derived_from(np.random.Generator, skipblocks=1)
383 def wald(self, mean, scale, size=None, chunks="auto", **kwargs):
384 return _wrap_func(self, "wald", mean, scale, size=size, chunks=chunks, **kwargs)
386 @derived_from(np.random.Generator, skipblocks=1)
387 def weibull(self, a, size=None, chunks="auto", **kwargs):
388 return _wrap_func(self, "weibull", a, size=size, chunks=chunks, **kwargs)
390 @derived_from(np.random.Generator, skipblocks=1)
391 def zipf(self, a, size=None, chunks="auto", **kwargs):
392 return _wrap_func(self, "zipf", a, size=size, chunks=chunks, **kwargs)
395def default_rng(seed=None):
396 """
397 Construct a new Generator with the default BitGenerator (PCG64).
399 Parameters
400 ----------
401 seed : {None, int, array_like[ints], SeedSequence, BitGenerator, Generator}, optional
402 A seed to initialize the `BitGenerator`. If None, then fresh,
403 unpredictable entropy will be pulled from the OS. If an ``int`` or
404 ``array_like[ints]`` is passed, then it will be passed to
405 `SeedSequence` to derive the initial `BitGenerator` state. One may
406 also pass in a `SeedSequence` instance.
407 Additionally, when passed a `BitGenerator`, it will be wrapped by
408 `Generator`. If passed a `Generator`, it will be returned unaltered.
410 Returns
411 -------
412 Generator
413 The initialized generator object.
415 Notes
416 -----
417 If ``seed`` is not a `BitGenerator` or a `Generator`, a new
418 `BitGenerator` is instantiated. This function does not manage a default
419 global instance.
421 Examples
422 --------
423 ``default_rng`` is the recommended constructor for the random number
424 class ``Generator``. Here are several ways we can construct a random
425 number generator using ``default_rng`` and the ``Generator`` class.
427 Here we use ``default_rng`` to generate a random float:
429 >>> import dask.array as da
430 >>> rng = da.random.default_rng(12345)
431 >>> print(rng)
432 Generator(PCG64)
433 >>> rfloat = rng.random().compute()
434 >>> rfloat
435 array(0.86999885)
436 >>> type(rfloat)
437 <class 'numpy.ndarray'>
439 Here we use ``default_rng`` to generate 3 random integers between 0
440 (inclusive) and 10 (exclusive):
442 >>> import dask.array as da
443 >>> rng = da.random.default_rng(12345)
444 >>> rints = rng.integers(low=0, high=10, size=3).compute()
445 >>> rints
446 array([2, 8, 7])
447 >>> type(rints[0])
448 <class 'numpy.int64'>
450 Here we specify a seed so that we have reproducible results:
452 >>> import dask.array as da
453 >>> rng = da.random.default_rng(seed=42)
454 >>> print(rng)
455 Generator(PCG64)
456 >>> arr1 = rng.random((3, 3)).compute()
457 >>> arr1
458 array([[0.91674416, 0.91098667, 0.8765925 ],
459 [0.30931841, 0.95465607, 0.17509458],
460 [0.99662814, 0.75203348, 0.15038118]])
462 If we exit and restart our Python interpreter, we'll see that we
463 generate the same random numbers again:
465 >>> import dask.array as da
466 >>> rng = da.random.default_rng(seed=42)
467 >>> arr2 = rng.random((3, 3)).compute()
468 >>> arr2
469 array([[0.91674416, 0.91098667, 0.8765925 ],
470 [0.30931841, 0.95465607, 0.17509458],
471 [0.99662814, 0.75203348, 0.15038118]])
473 See Also
474 --------
475 np.random.default_rng
476 """
477 if hasattr(seed, "capsule"):
478 # We are passed a BitGenerator, so just wrap it
479 return Generator(seed)
480 elif isinstance(seed, Generator):
481 # Pass through a Generator
482 return seed
483 elif hasattr(seed, "bit_generator"):
484 # a Generator. Just not ours
485 return Generator(seed.bit_generator)
486 # Otherwise, use the backend-default BitGenerator
487 return Generator(array_creation_dispatch.default_bit_generator(seed))
490class RandomState:
491 """
492 Mersenne Twister pseudo-random number generator
494 This object contains state to deterministically generate pseudo-random
495 numbers from a variety of probability distributions. It is identical to
496 ``np.random.RandomState`` except that all functions also take a ``chunks=``
497 keyword argument.
499 Parameters
500 ----------
501 seed: Number
502 Object to pass to RandomState to serve as deterministic seed
503 RandomState: Callable[seed] -> RandomState
504 A callable that, when provided with a ``seed`` keyword provides an
505 object that operates identically to ``np.random.RandomState`` (the
506 default). This might also be a function that returns a
507 ``mkl_random``, or ``cupy.random.RandomState`` object.
509 Examples
510 --------
511 >>> import dask.array as da
512 >>> state = da.random.RandomState(1234) # a seed
513 >>> x = state.normal(10, 0.1, size=3, chunks=(2,))
514 >>> x.compute()
515 array([10.01867852, 10.04812289, 9.89649746])
517 See Also
518 --------
519 np.random.RandomState
520 """
522 def __init__(self, seed=None, RandomState=None):
523 self._numpy_state = np.random.RandomState(seed)
524 self._RandomState = (
525 array_creation_dispatch.RandomState if RandomState is None else RandomState
526 )
528 @property
529 def _backend(self):
530 # Assumes typename(self._RandomState) starts with
531 # an importable array-library name (e.g. "numpy" or "cupy")
532 _backend_name = typename(self._RandomState).split(".")[0]
533 return importlib.import_module(_backend_name)
535 def seed(self, seed=None):
536 self._numpy_state.seed(seed)
538 @derived_from(np.random.RandomState, skipblocks=1)
539 def beta(self, a, b, size=None, chunks="auto", **kwargs):
540 return _wrap_func(self, "beta", a, b, size=size, chunks=chunks, **kwargs)
542 @derived_from(np.random.RandomState, skipblocks=1)
543 def binomial(self, n, p, size=None, chunks="auto", **kwargs):
544 return _wrap_func(self, "binomial", n, p, size=size, chunks=chunks, **kwargs)
546 @derived_from(np.random.RandomState, skipblocks=1)
547 def chisquare(self, df, size=None, chunks="auto", **kwargs):
548 return _wrap_func(self, "chisquare", df, size=size, chunks=chunks, **kwargs)
550 with contextlib.suppress(AttributeError):
552 @derived_from(np.random.RandomState, skipblocks=1)
553 def choice(self, a, size=None, replace=True, p=None, chunks="auto"):
554 (
555 a,
556 size,
557 replace,
558 p,
559 axis, # np.random.RandomState.choice does not use axis
560 chunks,
561 meta,
562 dependencies,
563 ) = _choice_validate_params(self, a, size, replace, p, 0, chunks)
565 sizes = list(product(*chunks))
566 state_data = random_state_data(len(sizes), self._numpy_state)
568 name = "da.random.choice-%s" % tokenize(
569 state_data, size, chunks, a, replace, p
570 )
571 keys = product([name], *(range(len(bd)) for bd in chunks))
572 dsk = {
573 k: (_choice_rs, state, a, size, replace, p)
574 for k, state, size in zip(keys, state_data, sizes)
575 }
577 graph = HighLevelGraph.from_collections(
578 name, dsk, dependencies=dependencies
579 )
580 return Array(graph, name, chunks, meta=meta)
582 @derived_from(np.random.RandomState, skipblocks=1)
583 def exponential(self, scale=1.0, size=None, chunks="auto", **kwargs):
584 return _wrap_func(
585 self, "exponential", scale, size=size, chunks=chunks, **kwargs
586 )
588 @derived_from(np.random.RandomState, skipblocks=1)
589 def f(self, dfnum, dfden, size=None, chunks="auto", **kwargs):
590 return _wrap_func(self, "f", dfnum, dfden, size=size, chunks=chunks, **kwargs)
592 @derived_from(np.random.RandomState, skipblocks=1)
593 def gamma(self, shape, scale=1.0, size=None, chunks="auto", **kwargs):
594 return _wrap_func(
595 self, "gamma", shape, scale, size=size, chunks=chunks, **kwargs
596 )
598 @derived_from(np.random.RandomState, skipblocks=1)
599 def geometric(self, p, size=None, chunks="auto", **kwargs):
600 return _wrap_func(self, "geometric", p, size=size, chunks=chunks, **kwargs)
602 @derived_from(np.random.RandomState, skipblocks=1)
603 def gumbel(self, loc=0.0, scale=1.0, size=None, chunks="auto", **kwargs):
604 return _wrap_func(
605 self, "gumbel", loc, scale, size=size, chunks=chunks, **kwargs
606 )
608 @derived_from(np.random.RandomState, skipblocks=1)
609 def hypergeometric(self, ngood, nbad, nsample, size=None, chunks="auto", **kwargs):
610 return _wrap_func(
611 self,
612 "hypergeometric",
613 ngood,
614 nbad,
615 nsample,
616 size=size,
617 chunks=chunks,
618 **kwargs,
619 )
621 @derived_from(np.random.RandomState, skipblocks=1)
622 def laplace(self, loc=0.0, scale=1.0, size=None, chunks="auto", **kwargs):
623 return _wrap_func(
624 self, "laplace", loc, scale, size=size, chunks=chunks, **kwargs
625 )
627 @derived_from(np.random.RandomState, skipblocks=1)
628 def logistic(self, loc=0.0, scale=1.0, size=None, chunks="auto", **kwargs):
629 return _wrap_func(
630 self, "logistic", loc, scale, size=size, chunks=chunks, **kwargs
631 )
633 @derived_from(np.random.RandomState, skipblocks=1)
634 def lognormal(self, mean=0.0, sigma=1.0, size=None, chunks="auto", **kwargs):
635 return _wrap_func(
636 self, "lognormal", mean, sigma, size=size, chunks=chunks, **kwargs
637 )
639 @derived_from(np.random.RandomState, skipblocks=1)
640 def logseries(self, p, size=None, chunks="auto", **kwargs):
641 return _wrap_func(self, "logseries", p, size=size, chunks=chunks, **kwargs)
643 @derived_from(np.random.RandomState, skipblocks=1)
644 def multinomial(self, n, pvals, size=None, chunks="auto", **kwargs):
645 return _wrap_func(
646 self,
647 "multinomial",
648 n,
649 pvals,
650 size=size,
651 chunks=chunks,
652 extra_chunks=((len(pvals),),),
653 **kwargs,
654 )
656 @derived_from(np.random.RandomState, skipblocks=1)
657 def negative_binomial(self, n, p, size=None, chunks="auto", **kwargs):
658 return _wrap_func(
659 self, "negative_binomial", n, p, size=size, chunks=chunks, **kwargs
660 )
662 @derived_from(np.random.RandomState, skipblocks=1)
663 def noncentral_chisquare(self, df, nonc, size=None, chunks="auto", **kwargs):
664 return _wrap_func(
665 self, "noncentral_chisquare", df, nonc, size=size, chunks=chunks, **kwargs
666 )
668 @derived_from(np.random.RandomState, skipblocks=1)
669 def noncentral_f(self, dfnum, dfden, nonc, size=None, chunks="auto", **kwargs):
670 return _wrap_func(
671 self, "noncentral_f", dfnum, dfden, nonc, size=size, chunks=chunks, **kwargs
672 )
674 @derived_from(np.random.RandomState, skipblocks=1)
675 def normal(self, loc=0.0, scale=1.0, size=None, chunks="auto", **kwargs):
676 return _wrap_func(
677 self, "normal", loc, scale, size=size, chunks=chunks, **kwargs
678 )
680 @derived_from(np.random.RandomState, skipblocks=1)
681 def pareto(self, a, size=None, chunks="auto", **kwargs):
682 return _wrap_func(self, "pareto", a, size=size, chunks=chunks, **kwargs)
684 @derived_from(np.random.RandomState, skipblocks=1)
685 def permutation(self, x):
686 from dask.array.slicing import shuffle_slice
688 if isinstance(x, numbers.Number):
689 x = arange(x, chunks="auto")
691 index = np.arange(len(x))
692 self._numpy_state.shuffle(index)
693 return shuffle_slice(x, index)
695 @derived_from(np.random.RandomState, skipblocks=1)
696 def poisson(self, lam=1.0, size=None, chunks="auto", **kwargs):
697 return _wrap_func(self, "poisson", lam, size=size, chunks=chunks, **kwargs)
699 @derived_from(np.random.RandomState, skipblocks=1)
700 def power(self, a, size=None, chunks="auto", **kwargs):
701 return _wrap_func(self, "power", a, size=size, chunks=chunks, **kwargs)
703 @derived_from(np.random.RandomState, skipblocks=1)
704 def randint(self, low, high=None, size=None, chunks="auto", dtype="l", **kwargs):
705 return _wrap_func(
706 self, "randint", low, high, size=size, chunks=chunks, dtype=dtype, **kwargs
707 )
709 @derived_from(np.random.RandomState, skipblocks=1)
710 def random_integers(self, low, high=None, size=None, chunks="auto", **kwargs):
711 return _wrap_func(
712 self, "random_integers", low, high, size=size, chunks=chunks, **kwargs
713 )
715 @derived_from(np.random.RandomState, skipblocks=1)
716 def random_sample(self, size=None, chunks="auto", **kwargs):
717 return _wrap_func(self, "random_sample", size=size, chunks=chunks, **kwargs)
719 random = random_sample
721 @derived_from(np.random.RandomState, skipblocks=1)
722 def rayleigh(self, scale=1.0, size=None, chunks="auto", **kwargs):
723 return _wrap_func(self, "rayleigh", scale, size=size, chunks=chunks, **kwargs)
725 @derived_from(np.random.RandomState, skipblocks=1)
726 def standard_cauchy(self, size=None, chunks="auto", **kwargs):
727 return _wrap_func(self, "standard_cauchy", size=size, chunks=chunks, **kwargs)
729 @derived_from(np.random.RandomState, skipblocks=1)
730 def standard_exponential(self, size=None, chunks="auto", **kwargs):
731 return _wrap_func(
732 self, "standard_exponential", size=size, chunks=chunks, **kwargs
733 )
735 @derived_from(np.random.RandomState, skipblocks=1)
736 def standard_gamma(self, shape, size=None, chunks="auto", **kwargs):
737 return _wrap_func(
738 self, "standard_gamma", shape, size=size, chunks=chunks, **kwargs
739 )
741 @derived_from(np.random.RandomState, skipblocks=1)
742 def standard_normal(self, size=None, chunks="auto", **kwargs):
743 return _wrap_func(self, "standard_normal", size=size, chunks=chunks, **kwargs)
745 @derived_from(np.random.RandomState, skipblocks=1)
746 def standard_t(self, df, size=None, chunks="auto", **kwargs):
747 return _wrap_func(self, "standard_t", df, size=size, chunks=chunks, **kwargs)
749 @derived_from(np.random.RandomState, skipblocks=1)
750 def tomaxint(self, size=None, chunks="auto", **kwargs):
751 return _wrap_func(self, "tomaxint", size=size, chunks=chunks, **kwargs)
753 @derived_from(np.random.RandomState, skipblocks=1)
754 def triangular(self, left, mode, right, size=None, chunks="auto", **kwargs):
755 return _wrap_func(
756 self, "triangular", left, mode, right, size=size, chunks=chunks, **kwargs
757 )
759 @derived_from(np.random.RandomState, skipblocks=1)
760 def uniform(self, low=0.0, high=1.0, size=None, chunks="auto", **kwargs):
761 return _wrap_func(
762 self, "uniform", low, high, size=size, chunks=chunks, **kwargs
763 )
765 @derived_from(np.random.RandomState, skipblocks=1)
766 def vonmises(self, mu, kappa, size=None, chunks="auto", **kwargs):
767 return _wrap_func(
768 self, "vonmises", mu, kappa, size=size, chunks=chunks, **kwargs
769 )
771 @derived_from(np.random.RandomState, skipblocks=1)
772 def wald(self, mean, scale, size=None, chunks="auto", **kwargs):
773 return _wrap_func(self, "wald", mean, scale, size=size, chunks=chunks, **kwargs)
775 @derived_from(np.random.RandomState, skipblocks=1)
776 def weibull(self, a, size=None, chunks="auto", **kwargs):
777 return _wrap_func(self, "weibull", a, size=size, chunks=chunks, **kwargs)
779 @derived_from(np.random.RandomState, skipblocks=1)
780 def zipf(self, a, size=None, chunks="auto", **kwargs):
781 return _wrap_func(self, "zipf", a, size=size, chunks=chunks, **kwargs)
784def _rng_from_bitgen(bitgen):
785 # Assumes typename(bitgen) starts with importable
786 # library name (e.g. "numpy" or "cupy")
787 backend_name = typename(bitgen).split(".")[0]
788 backend_lib = importlib.import_module(backend_name)
789 return backend_lib.random.default_rng(bitgen)
792def _shuffle(bit_generator, x, axis=0):
793 state_data = bit_generator.state
794 bit_generator = type(bit_generator)()
795 bit_generator.state = state_data
796 state = _rng_from_bitgen(bit_generator)
797 return state.shuffle(x, axis=axis)
800def _spawn_bitgens(bitgen, n_bitgens):
801 seeds = bitgen._seed_seq.spawn(n_bitgens)
802 bitgens = [type(bitgen)(seed) for seed in seeds]
803 return bitgens
806def _apply_random_func(rng, funcname, bitgen, size, args, kwargs):
807 """Apply random module method with seed"""
808 if isinstance(bitgen, np.random.SeedSequence):
809 bitgen = rng(bitgen)
810 rng = _rng_from_bitgen(bitgen)
811 func = getattr(rng, funcname)
812 return func(*args, size=size, **kwargs)
815def _apply_random(RandomState, funcname, state_data, size, args, kwargs):
816 """Apply RandomState method with seed"""
817 if RandomState is None:
818 RandomState = array_creation_dispatch.RandomState
819 state = RandomState(state_data)
820 func = getattr(state, funcname)
821 return func(*args, size=size, **kwargs)
824def _choice_rng(state_data, a, size, replace, p, axis, shuffle):
825 state = _rng_from_bitgen(state_data)
826 return state.choice(a, size=size, replace=replace, p=p, axis=axis, shuffle=shuffle)
829def _choice_rs(state_data, a, size, replace, p):
830 state = array_creation_dispatch.RandomState(state_data)
831 return state.choice(a, size=size, replace=replace, p=p)
834def _choice_validate_params(state, a, size, replace, p, axis, chunks):
835 dependencies = []
836 # Normalize and validate `a`
837 if isinstance(a, Integral):
838 if isinstance(state, Generator):
839 if state._backend_name == "cupy":
840 raise NotImplementedError(
841 "`choice` not supported for cupy-backed `Generator`."
842 )
843 meta = state._backend.random.default_rng().choice(1, size=(), p=None)
844 elif isinstance(state, RandomState):
845 # On windows the output dtype differs if p is provided or
846 # # absent, see https://github.com/numpy/numpy/issues/9867
847 dummy_p = state._backend.array([1]) if p is not None else p
848 meta = state._backend.random.RandomState().choice(1, size=(), p=dummy_p)
849 else:
850 raise ValueError("Unknown generator class")
851 len_a = a
852 if a < 0:
853 raise ValueError("a must be greater than 0")
854 else:
855 a = asarray(a)
856 a = a.rechunk(a.shape)
857 meta = a._meta
858 if a.ndim != 1:
859 raise ValueError("a must be one dimensional")
860 len_a = len(a)
861 dependencies.append(a)
862 a = a.__dask_keys__()[0]
864 # Normalize and validate `p`
865 if p is not None:
866 if not isinstance(p, Array):
867 # If p is not a dask array, first check the sum is close
868 # to 1 before converting.
869 p = asarray_safe(p, like=p)
870 if not np.isclose(p.sum(), 1, rtol=1e-7, atol=0):
871 raise ValueError("probabilities do not sum to 1")
872 p = asarray(p)
873 else:
874 p = p.rechunk(p.shape)
876 if p.ndim != 1:
877 raise ValueError("p must be one dimensional")
878 if len(p) != len_a:
879 raise ValueError("a and p must have the same size")
881 dependencies.append(p)
882 p = p.__dask_keys__()[0]
884 if size is None:
885 size = ()
886 elif not isinstance(size, (tuple, list)):
887 size = (size,)
889 if axis != 0:
890 raise ValueError("axis must be 0 since a is one dimensional")
892 chunks = normalize_chunks(chunks, size, dtype=np.float64)
893 if not replace and len(chunks[0]) > 1:
894 err_msg = (
895 "replace=False is not currently supported for "
896 "dask.array.choice with multi-chunk output "
897 "arrays"
898 )
899 raise NotImplementedError(err_msg)
901 return a, size, replace, p, axis, chunks, meta, dependencies
904def _wrap_func(
905 rng, funcname, *args, size=None, chunks="auto", extra_chunks=(), **kwargs
906):
907 """Wrap numpy random function to produce dask.array random function
908 extra_chunks should be a chunks tuple to append to the end of chunks
909 """
910 if size is not None and not isinstance(size, (tuple, list)):
911 size = (size,)
913 shapes = list(
914 {
915 ar.shape
916 for ar in chain(args, kwargs.values())
917 if isinstance(ar, (Array, np.ndarray))
918 }
919 )
920 if size is not None:
921 shapes.append(size)
922 # broadcast to the final size(shape)
923 size = broadcast_shapes(*shapes)
924 chunks = normalize_chunks(
925 chunks,
926 size, # ideally would use dtype here
927 dtype=kwargs.get("dtype", np.float64),
928 )
929 slices = slices_from_chunks(chunks)
931 def _broadcast_any(ar, shape, chunks):
932 if isinstance(ar, Array):
933 return broadcast_to(ar, shape).rechunk(chunks)
934 elif isinstance(ar, np.ndarray):
935 return np.ascontiguousarray(np.broadcast_to(ar, shape))
936 else:
937 raise TypeError("Unknown object type for broadcast")
939 # Broadcast all arguments, get tiny versions as well
940 # Start adding the relevant bits to the graph
941 dsk = {}
942 lookup = {}
943 small_args = []
944 dependencies = []
945 for i, ar in enumerate(args):
946 if isinstance(ar, (np.ndarray, Array)):
947 res = _broadcast_any(ar, size, chunks)
948 if isinstance(res, Array):
949 dependencies.append(res)
950 lookup[i] = res.name
951 elif isinstance(res, np.ndarray):
952 name = f"array-{tokenize(res)}"
953 lookup[i] = name
954 dsk[name] = res
955 small_args.append(ar[tuple(0 for _ in ar.shape)])
956 else:
957 small_args.append(ar)
959 small_kwargs = {}
960 for key, ar in kwargs.items():
961 if isinstance(ar, (np.ndarray, Array)):
962 res = _broadcast_any(ar, size, chunks)
963 if isinstance(res, Array):
964 dependencies.append(res)
965 lookup[key] = res.name
966 elif isinstance(res, np.ndarray):
967 name = f"array-{tokenize(res)}"
968 lookup[key] = name
969 dsk[name] = res
970 small_kwargs[key] = ar[tuple(0 for _ in ar.shape)]
971 else:
972 small_kwargs[key] = ar
974 sizes = list(product(*chunks))
975 if isinstance(rng, Generator):
976 bitgens = _spawn_bitgens(rng._bit_generator, len(sizes))
977 bitgen_token = tokenize(bitgens)
978 bitgens = [_bitgen._seed_seq for _bitgen in bitgens]
979 func_applier = _apply_random_func
980 gen = type(rng._bit_generator)
981 elif isinstance(rng, RandomState):
982 bitgens = random_state_data(len(sizes), rng._numpy_state)
983 bitgen_token = tokenize(bitgens)
984 func_applier = _apply_random
985 gen = rng._RandomState
986 else:
987 raise TypeError("Unknown object type: Not a Generator and Not a RandomState")
988 token = tokenize(bitgen_token, size, chunks, args, kwargs)
989 name = f"{funcname}-{token}"
991 keys = product(
992 [name], *([range(len(bd)) for bd in chunks] + [[0]] * len(extra_chunks))
993 )
994 blocks = product(*[range(len(bd)) for bd in chunks])
996 vals = []
997 for bitgen, size, slc, block in zip(bitgens, sizes, slices, blocks):
998 arg = []
999 for i, ar in enumerate(args):
1000 if i not in lookup:
1001 arg.append(ar)
1002 else:
1003 if isinstance(ar, Array):
1004 arg.append((lookup[i],) + block)
1005 elif isinstance(ar, np.ndarray):
1006 arg.append((getitem, lookup[i], slc))
1007 else:
1008 raise TypeError("Unknown object type in args")
1009 kwrg = {}
1010 for k, ar in kwargs.items():
1011 if k not in lookup:
1012 kwrg[k] = ar
1013 else:
1014 if isinstance(ar, Array):
1015 kwrg[k] = (lookup[k],) + block
1016 elif isinstance(ar, np.ndarray):
1017 kwrg[k] = (getitem, lookup[k], slc)
1018 else:
1019 raise TypeError("Unknown object type in kwargs")
1020 vals.append((func_applier, gen, funcname, bitgen, size, arg, kwrg))
1022 meta = func_applier(
1023 gen,
1024 funcname,
1025 bitgen,
1026 (0,) * len(size),
1027 small_args,
1028 small_kwargs,
1029 )
1031 dsk.update(dict(zip(keys, vals)))
1033 graph = HighLevelGraph.from_collections(name, dsk, dependencies=dependencies)
1034 return Array(graph, name, chunks + extra_chunks, meta=meta)
1037"""
1038Lazy RNG-state machinery
1040Many of the RandomState methods are exported as functions in da.random for
1041backward compatibility reasons. Their usage is discouraged.
1042Use da.random.default_rng() to get a Generator based rng and use its
1043methods instead.
1044"""
1046_cached_states: dict[str, RandomState] = {}
1047_cached_states_lock = Lock()
1050def _make_api(attr):
1051 def wrapper(*args, **kwargs):
1052 key = array_creation_dispatch.backend
1053 with _cached_states_lock:
1054 try:
1055 state = _cached_states[key]
1056 except KeyError:
1057 _cached_states[key] = state = RandomState()
1058 return getattr(state, attr)(*args, **kwargs)
1060 wrapper.__name__ = getattr(RandomState, attr).__name__
1061 wrapper.__doc__ = getattr(RandomState, attr).__doc__
1062 return wrapper
1065"""
1066RandomState only
1067"""
1069seed = _make_api("seed")
1071beta = _make_api("beta")
1072binomial = _make_api("binomial")
1073chisquare = _make_api("chisquare")
1074choice = _make_api("choice")
1075exponential = _make_api("exponential")
1076f = _make_api("f")
1077gamma = _make_api("gamma")
1078geometric = _make_api("geometric")
1079gumbel = _make_api("gumbel")
1080hypergeometric = _make_api("hypergeometric")
1081laplace = _make_api("laplace")
1082logistic = _make_api("logistic")
1083lognormal = _make_api("lognormal")
1084logseries = _make_api("logseries")
1085multinomial = _make_api("multinomial")
1086negative_binomial = _make_api("negative_binomial")
1087noncentral_chisquare = _make_api("noncentral_chisquare")
1088noncentral_f = _make_api("noncentral_f")
1089normal = _make_api("normal")
1090pareto = _make_api("pareto")
1091permutation = _make_api("permutation")
1092poisson = _make_api("poisson")
1093power = _make_api("power")
1094random_sample = _make_api("random_sample")
1095random = _make_api("random_sample")
1096randint = _make_api("randint")
1097random_integers = _make_api("random_integers")
1098rayleigh = _make_api("rayleigh")
1099standard_cauchy = _make_api("standard_cauchy")
1100standard_exponential = _make_api("standard_exponential")
1101standard_gamma = _make_api("standard_gamma")
1102standard_normal = _make_api("standard_normal")
1103standard_t = _make_api("standard_t")
1104triangular = _make_api("triangular")
1105uniform = _make_api("uniform")
1106vonmises = _make_api("vonmises")
1107wald = _make_api("wald")
1108weibull = _make_api("weibull")
1109zipf = _make_api("zipf")