1# This file is part of Hypothesis, which may be found at
2# https://github.com/HypothesisWorks/hypothesis/
3#
4# Copyright the Hypothesis Authors.
5# Individual contributors are listed in AUTHORS.rst and the git log.
6#
7# This Source Code Form is subject to the terms of the Mozilla Public License,
8# v. 2.0. If a copy of the MPL was not distributed with this file, You can
9# obtain one at https://mozilla.org/MPL/2.0/.
10
11import contextlib
12import gc
13import random
14import sys
15import warnings
16from collections.abc import Generator, Hashable
17from itertools import count
18from random import Random
19from typing import TYPE_CHECKING, Any, Callable, Optional
20from weakref import WeakValueDictionary
21
22import hypothesis.core
23from hypothesis.errors import HypothesisWarning, InvalidArgument
24from hypothesis.internal.compat import FREE_THREADED_CPYTHON, GRAALPY, PYPY
25
26if TYPE_CHECKING:
27 from typing import Protocol
28
29 # we can't use this at runtime until from_type supports
30 # protocols -- breaks ghostwriter tests
31 class RandomLike(Protocol):
32 def seed(self, *args: Any, **kwargs: Any) -> Any: ...
33 def getstate(self, *args: Any, **kwargs: Any) -> Any: ...
34 def setstate(self, *args: Any, **kwargs: Any) -> Any: ...
35
36else: # pragma: no cover
37 RandomLike = random.Random
38
39_RKEY = count()
40_global_random_rkey = next(_RKEY)
41# This is effectively a WeakSet, which allows us to associate the saved states
42# with their respective Random instances even as new ones are registered and old
43# ones go out of scope and get garbage collected. Keys are ascending integers.
44RANDOMS_TO_MANAGE: WeakValueDictionary[int, RandomLike] = WeakValueDictionary(
45 {_global_random_rkey: random}
46)
47
48
49class NumpyRandomWrapper:
50 def __init__(self) -> None:
51 assert "numpy" in sys.modules
52 # This class provides a shim that matches the numpy to stdlib random,
53 # and lets us avoid importing Numpy until it's already in use.
54 import numpy.random
55
56 self.seed = numpy.random.seed
57 self.getstate = numpy.random.get_state
58 self.setstate = numpy.random.set_state
59
60
61NP_RANDOM: Optional[RandomLike] = None
62
63
64if not (PYPY or GRAALPY):
65
66 def _get_platform_base_refcount(r: Any) -> int:
67 return sys.getrefcount(r)
68
69 # Determine the number of refcounts created by function scope for
70 # the given platform / version of Python.
71 _PLATFORM_REF_COUNT = _get_platform_base_refcount(object())
72else: # pragma: no cover
73 # PYPY and GRAALPY don't have `sys.getrefcount`
74 _PLATFORM_REF_COUNT = -1
75
76
77def register_random(r: RandomLike) -> None:
78 """Register (a weakref to) the given Random-like instance for management by
79 Hypothesis.
80
81 You can pass instances of structural subtypes of ``random.Random``
82 (i.e., objects with seed, getstate, and setstate methods) to
83 ``register_random(r)`` to have their states seeded and restored in the same
84 way as the global PRNGs from the ``random`` and ``numpy.random`` modules.
85
86 All global PRNGs, from e.g. simulation or scheduling frameworks, should
87 be registered to prevent flaky tests. Hypothesis will ensure that the
88 PRNG state is consistent for all test runs, always seeding them to zero and
89 restoring the previous state after the test, or, reproducibly varied if you
90 choose to use the :func:`~hypothesis.strategies.random_module` strategy.
91
92 ``register_random`` only makes `weakrefs
93 <https://docs.python.org/3/library/weakref.html#module-weakref>`_ to ``r``,
94 thus ``r`` will only be managed by Hypothesis as long as it has active
95 references elsewhere at runtime. The pattern ``register_random(MyRandom())``
96 will raise a ``ReferenceError`` to help protect users from this issue.
97 This check does not occur for the PyPy interpreter. See the following example for
98 an illustration of this issue
99
100 .. code-block:: python
101
102
103 def my_BROKEN_hook():
104 r = MyRandomLike()
105
106 # `r` will be garbage collected after the hook resolved
107 # and Hypothesis will 'forget' that it was registered
108 register_random(r) # Hypothesis will emit a warning
109
110
111 rng = MyRandomLike()
112
113
114 def my_WORKING_hook():
115 register_random(rng)
116 """
117 if not (hasattr(r, "seed") and hasattr(r, "getstate") and hasattr(r, "setstate")):
118 raise InvalidArgument(f"{r=} does not have all the required methods")
119
120 if r in [
121 random
122 for ref in RANDOMS_TO_MANAGE.data.copy().values() # type: ignore
123 if (random := ref()) is not None
124 ]:
125 return
126
127 if not (PYPY or GRAALPY): # pragma: no branch
128 # PYPY and GRAALPY do not have `sys.getrefcount`.
129 gc.collect()
130 if not gc.get_referrers(r):
131 if sys.getrefcount(r) <= _PLATFORM_REF_COUNT:
132 raise ReferenceError(
133 f"`register_random` was passed `r={r}` which will be "
134 "garbage collected immediately after `register_random` creates a "
135 "weakref to it. This will prevent Hypothesis from managing this "
136 "PRNG. See the docs for `register_random` for more "
137 "details."
138 )
139 elif not FREE_THREADED_CPYTHON: # pragma: no branch
140 # On CPython, check for the free-threaded build because
141 # gc.get_referrers() ignores objects with immortal refcounts
142 # and objects are immortalized in the Python 3.13
143 # free-threading implementation at runtime.
144
145 warnings.warn(
146 "It looks like `register_random` was passed an object that could "
147 "be garbage collected immediately after `register_random` creates "
148 "a weakref to it. This will prevent Hypothesis from managing this "
149 "PRNG. See the docs for `register_random` for more details.",
150 HypothesisWarning,
151 stacklevel=2,
152 )
153
154 RANDOMS_TO_MANAGE[next(_RKEY)] = r
155
156
157# Used to make the warning issued by `deprecate_random_in_strategy` thread-safe,
158# as well as to avoid warning on uses of st.randoms().
159# Store just the hash to reduce memory consumption. This is an underapproximation
160# of membership (distinct items might have the same hash), which is fine for the
161# warning, as it results in missed alarms, not false alarms.
162_known_random_state_hashes: set[Any] = set()
163
164
165def get_seeder_and_restorer(
166 seed: Hashable = 0,
167) -> tuple[Callable[[], None], Callable[[], None]]:
168 """Return a pair of functions which respectively seed all and restore
169 the state of all registered PRNGs.
170
171 This is used by the core engine via `deterministic_PRNG`, and by users
172 via `register_random`. We support registration of additional random.Random
173 instances (or other objects with seed, getstate, and setstate methods)
174 to force determinism on simulation or scheduling frameworks which avoid
175 using the global random state. See e.g. #1709.
176 """
177 assert isinstance(seed, int)
178 assert 0 <= seed < 2**32
179 states: dict[int, object] = {}
180
181 if "numpy" in sys.modules:
182 global NP_RANDOM
183 if NP_RANDOM is None:
184 # Protect this from garbage-collection by adding it to global scope
185 NP_RANDOM = RANDOMS_TO_MANAGE[next(_RKEY)] = NumpyRandomWrapper()
186
187 def seed_all() -> None:
188 global _most_recent_random_state_enter
189 assert not states
190 # access .data.copy().items() instead of .items() to avoid a "dictionary
191 # changed size during iteration" error under multithreading.
192 #
193 # I initially expected this to be fixed by
194 # https://github.com/python/cpython/commit/96d37dbcd23e65a7a57819aeced9034296ef747e,
195 # but I believe that is addressing the size change from weakrefs expiring
196 # during gc, not from the user adding new elements to the dict.
197 #
198 # Since we're accessing .data, we have to manually handle checking for
199 # expired ref instances during iteration. Normally WeakValueDictionary
200 # handles this for us.
201 #
202 # This command reproduces at time of writing:
203 # pytest hypothesis-python/tests/ -k test_intervals_are_equivalent_to_their_lists
204 # --parallel-threads 2
205 for k, ref in RANDOMS_TO_MANAGE.data.copy().items(): # type: ignore
206 r = ref()
207 if r is None:
208 # ie the random instance has been gc'd
209 continue # pragma: no cover
210 states[k] = r.getstate()
211 if k == _global_random_rkey:
212 # r.seed sets the random's state. We want to add that state to
213 # _known_random_states before calling r.seed, in case a thread
214 # switch occurs between the two. To figure out the seed -> state
215 # mapping, set the seed on a dummy random and add that state to
216 # _known_random_state.
217 #
218 # we could use a global dummy random here, but then we'd have to
219 # put a lock around it, and it's not clear to me if that's more
220 # efficient than constructing a new instance each time.
221 dummy_random = Random()
222 dummy_random.seed(seed)
223 _known_random_state_hashes.add(hash(dummy_random.getstate()))
224 # we expect `assert r.getstate() == dummy_random.getstate()` to
225 # hold here, but thread switches means it might not.
226
227 r.seed(seed)
228
229 def restore_all() -> None:
230 for k, state in states.items():
231 r = RANDOMS_TO_MANAGE.get(k)
232 if r is None: # i.e., has been garbage-collected
233 continue
234
235 if k == _global_random_rkey:
236 _known_random_state_hashes.add(hash(state))
237 r.setstate(state)
238
239 states.clear()
240
241 return seed_all, restore_all
242
243
244@contextlib.contextmanager
245def deterministic_PRNG(seed: int = 0) -> Generator[None, None, None]:
246 """Context manager that handles random.seed without polluting global state.
247
248 See issue #1255 and PR #1295 for details and motivation - in short,
249 leaving the global pseudo-random number generator (PRNG) seeded is a very
250 bad idea in principle, and breaks all kinds of independence assumptions
251 in practice.
252 """
253 if (
254 hypothesis.core.threadlocal._hypothesis_global_random is None
255 ): # pragma: no cover
256 hypothesis.core.threadlocal._hypothesis_global_random = Random()
257 register_random(hypothesis.core.threadlocal._hypothesis_global_random)
258
259 seed_all, restore_all = get_seeder_and_restorer(seed)
260 seed_all()
261 try:
262 yield
263 finally:
264 restore_all()
265 # TODO it would be nice to clean up _known_random_state_hashes when no
266 # active deterministic_PRNG contexts remain, to free memory (see similar
267 # logic in StackframeLimiter). But it's a bit annoying to get right, and
268 # likely not a big deal.