1# This file is part of Hypothesis, which may be found at
2# https://github.com/HypothesisWorks/hypothesis/
3#
4# Copyright the Hypothesis Authors.
5# Individual contributors are listed in AUTHORS.rst and the git log.
6#
7# This Source Code Form is subject to the terms of the Mozilla Public License,
8# v. 2.0. If a copy of the MPL was not distributed with this file, You can
9# obtain one at https://mozilla.org/MPL/2.0/.
10
11import abc
12import contextlib
13import math
14import sys
15import warnings
16from collections.abc import Iterable
17from contextlib import AbstractContextManager, contextmanager
18from functools import cached_property
19from random import Random
20from sys import float_info
21from typing import (
22 TYPE_CHECKING,
23 Any,
24 ClassVar,
25 Literal,
26 Optional,
27 TypeAlias,
28 TypedDict,
29 TypeVar,
30)
31
32from sortedcontainers import SortedSet
33
34from hypothesis.errors import HypothesisWarning
35from hypothesis.internal.cache import LRUCache
36from hypothesis.internal.compat import WINDOWS, int_from_bytes
37from hypothesis.internal.conjecture.choice import (
38 ChoiceConstraintsT,
39 ChoiceT,
40 ChoiceTypeT,
41 FloatConstraints,
42 choice_constraints_key,
43 choice_permitted,
44)
45from hypothesis.internal.conjecture.floats import lex_to_float
46from hypothesis.internal.conjecture.junkdrawer import bits_to_bytes
47from hypothesis.internal.conjecture.utils import (
48 INT_SIZES,
49 INT_SIZES_SAMPLER,
50 Sampler,
51 many,
52)
53from hypothesis.internal.constants_ast import (
54 Constants,
55 constants_from_module,
56 is_local_module_file,
57)
58from hypothesis.internal.floats import (
59 SIGNALING_NAN,
60 float_to_int,
61 make_float_clamper,
62 next_down,
63 next_up,
64)
65from hypothesis.internal.intervalsets import IntervalSet
66from hypothesis.internal.observability import InfoObservationType, TestCaseObservation
67
68if TYPE_CHECKING:
69 from hypothesis.internal.conjecture.data import ConjectureData
70 from hypothesis.internal.constants_ast import ConstantT
71
72T = TypeVar("T")
73LifetimeT: TypeAlias = Literal["test_case", "test_function"]
74COLLECTION_DEFAULT_MAX_SIZE = 10**10 # "arbitrarily large"
75
76
77#: Registered Hypothesis backends. This is a dictionary where keys are the name
78#: to be used in |settings.backend|. The value of a key can be either:
79#:
80#: * A string corresponding to an importable absolute path of a
81#: |PrimitiveProvider| subclass
82#: * A |PrimitiveProvider| subclass (the class itself, not an instance of the
83#: class)
84#:
85#: Hypothesis will instantiate the corresponding |PrimitiveProvider| subclass
86#: when the backend is requested by a test's |settings.backend| value.
87#:
88#: For example, the default Hypothesis backend is registered as:
89#:
90#: .. code-block:: python
91#:
92#: from hypothesis.internal.conjecture.providers import AVAILABLE_PROVIDERS
93#:
94#: AVAILABLE_PROVIDERS["hypothesis"] = "hypothesis.internal.conjecture.providers.HypothesisProvider"
95#: # or
96#: AVAILABLE_PROVIDERS["hypothesis"] = HypothesisProvider
97#:
98#: And can be used with:
99#:
100#: .. code-block:: python
101#:
102#: from hypothesis import given, settings, strategies as st
103#:
104#: @given(st.integers())
105#: @settings(backend="hypothesis")
106#: def f(n):
107#: pass
108#:
109#: Though, as ``backend="hypothesis"`` is the default setting, the above would
110#: typically not have any effect.
111#:
112#: For third-party backend authors, we strongly encourage ensuring that
113#: ``import hypothesis`` does not automatically import the expensive parts of
114#: your package, by:
115#:
116#: - setting a string path here, instead of a provider class
117#: - ensuring the registered hypothesis plugin path references a path which just
118#: sets AVAILABLE_PROVIDERS and does not import your package
119AVAILABLE_PROVIDERS: dict[str, str | type["PrimitiveProvider"]] = {
120 "hypothesis": "hypothesis.internal.conjecture.providers.HypothesisProvider",
121 "hypothesis-urandom": "hypothesis.internal.conjecture.providers.URandomProvider",
122}
123# cache the choice_permitted constants for a particular set of constraints.
124CacheKeyT: TypeAlias = tuple[ChoiceTypeT, tuple[Any, ...]]
125CacheValueT: TypeAlias = tuple[tuple["ConstantT", ...], tuple["ConstantT", ...]]
126CONSTANTS_CACHE: LRUCache[CacheKeyT, CacheValueT] = LRUCache(1024)
127
128
129_constants_integers = (
130 # powers of 2
131 [2**n for n in range(16, 66)]
132 # powers of 10
133 + [10**n for n in range(5, 20)]
134 # factorials
135 + [math.factorial(n) for n in range(9, 21)]
136 # a few primorial numbers https://en.wikipedia.org/wiki/Primorial
137 + [
138 510510,
139 6469693230,
140 304250263527210,
141 32589158477190044730,
142 ]
143)
144_constants_integers.extend(
145 [n - 1 for n in _constants_integers] + [n + 1 for n in _constants_integers]
146)
147_constants_integers.extend([-x for x in _constants_integers])
148
149# arbitrary cutoffs to keep our list bounded
150assert all(50_000 <= abs(n) <= 2**66 for n in _constants_integers)
151
152_constant_floats = (
153 [
154 0.5,
155 1.1,
156 1.5,
157 1.9,
158 1.0 / 3,
159 10e6,
160 10e-6,
161 1.175494351e-38,
162 next_up(0.0),
163 float_info.min,
164 float_info.max,
165 3.402823466e38,
166 9007199254740992.0,
167 1 - 10e-6,
168 2 + 10e-6,
169 1.192092896e-07,
170 2.2204460492503131e-016,
171 ]
172 + [2.0**-n for n in (24, 14, 149, 126)] # minimum (sub)normals for float16,32
173 + [float_info.min / n for n in (2, 10, 1000, 100_000)] # subnormal in float64
174)
175_constant_floats.extend([-x for x in _constant_floats])
176assert all(isinstance(f, float) for f in _constant_floats)
177
178_constant_strings = {
179 # strings which can be interpreted as code / logic
180 "undefined",
181 "null",
182 "NULL",
183 "nil",
184 "NIL",
185 "true",
186 "false",
187 "True",
188 "False",
189 "TRUE",
190 "FALSE",
191 "None",
192 "none",
193 "if",
194 "then",
195 "else",
196 "__dict__",
197 "__proto__", # javascript
198 # strings which can be interpreted as a number
199 "0",
200 "1e100",
201 "0..0",
202 "0/0",
203 "1/0",
204 "+0.0",
205 "Infinity",
206 "-Infinity",
207 "Inf",
208 "INF",
209 "NaN",
210 "9" * 30,
211 # common ascii characters
212 ",./;'[]\\-=<>?:\"{}|_+!@#$%^&*()`~",
213 # common unicode characters
214 "Ω≈ç√∫˜µ≤≥÷åß∂ƒ©˙∆˚¬…æœ∑´®†¥¨ˆøπ“‘¡™£¢∞§¶•ªº–≠¸˛Ç◊ı˜Â¯˘¿ÅÍÎÏ˝ÓÔÒÚÆ☃Œ„´‰ˇÁ¨ˆØ∏”’`⁄€‹›fifl‡°·‚—±",
215 # characters which increase in length when lowercased
216 "Ⱥ",
217 "Ⱦ",
218 # ligatures
219 "æœÆŒffʤʨß",
220 # emoticons
221 "(╯°□°)╯︵ ┻━┻)",
222 # emojis
223 "😍",
224 "🇺🇸",
225 # emoji modifiers
226 "🏻", # U+1F3FB Light Skin Tone,
227 "👍🏻", # 👍 followed by U+1F3FB
228 # RTL text
229 "الكل في المجمو عة",
230 # Ogham text, which contains the only character in the Space Separators
231 # unicode category (Zs) that isn't visually blank: . # noqa: RUF003
232 "᚛ᚄᚓᚐᚋᚒᚄ ᚑᚄᚂᚑᚏᚅ᚜",
233 # readable variations on text (bolt/italic/script)
234 "𝐓𝐡𝐞 𝐪𝐮𝐢𝐜𝐤 𝐛𝐫𝐨𝐰𝐧 𝐟𝐨𝐱 𝐣𝐮𝐦𝐩𝐬 𝐨𝐯𝐞𝐫 𝐭𝐡𝐞 𝐥𝐚𝐳𝐲 𝐝𝐨𝐠",
235 "𝕿𝖍𝖊 𝖖𝖚𝖎𝖈𝖐 𝖇𝖗𝖔𝖜𝖓 𝖋𝖔𝖝 𝖏𝖚𝖒𝖕𝖘 𝖔𝖛𝖊𝖗 𝖙𝖍𝖊 𝖑𝖆𝖟𝖞 𝖉𝖔𝖌",
236 "𝑻𝒉𝒆 𝒒𝒖𝒊𝒄𝒌 𝒃𝒓𝒐𝒘𝒏 𝒇𝒐𝒙 𝒋𝒖𝒎𝒑𝒔 𝒐𝒗𝒆𝒓 𝒕𝒉𝒆 𝒍𝒂𝒛𝒚 𝒅𝒐𝒈",
237 "𝓣𝓱𝓮 𝓺𝓾𝓲𝓬𝓴 𝓫𝓻𝓸𝔀𝓷 𝓯𝓸𝔁 𝓳𝓾𝓶𝓹𝓼 𝓸𝓿𝓮𝓻 𝓽𝓱𝓮 𝓵𝓪𝔃𝔂 𝓭𝓸𝓰",
238 "𝕋𝕙𝕖 𝕢𝕦𝕚𝕔𝕜 𝕓𝕣𝕠𝕨𝕟 𝕗𝕠𝕩 𝕛𝕦𝕞𝕡𝕤 𝕠𝕧𝕖𝕣 𝕥𝕙𝕖 𝕝𝕒𝕫𝕪 𝕕𝕠𝕘",
239 # upsidown text
240 "ʇǝɯɐ ʇᴉs ɹolop ɯnsdᴉ ɯǝɹo˥",
241 # reserved strings in windows
242 "NUL",
243 "COM1",
244 "LPT1",
245 # scunthorpe problem
246 "Scunthorpe",
247 # zalgo text
248 "Ṱ̺̺̕o͞ ̷i̲̬͇̪͙n̝̗͕v̟̜̘̦͟o̶̙̰̠kè͚̮̺̪̹̱̤ ̖t̝͕̳̣̻̪͞h̼͓̲̦̳̘̲e͇̣̰̦̬͎ ̢̼̻̱̘h͚͎͙̜̣̲ͅi̦̲̣̰̤v̻͍e̺̭̳̪̰-m̢iͅn̖̺̞̲̯̰d̵̼̟͙̩̼̘̳ ̞̥̱̳̭r̛̗̘e͙p͠r̼̞̻̭̗e̺̠̣͟s̘͇̳͍̝͉e͉̥̯̞̲͚̬͜ǹ̬͎͎̟̖͇̤t͍̬̤͓̼̭͘ͅi̪̱n͠g̴͉ ͏͉ͅc̬̟h͡a̫̻̯͘o̫̟̖͍̙̝͉s̗̦̲.̨̹͈̣",
249 #
250 # examples from https://faultlore.com/blah/text-hates-you/
251 "मनीष منش",
252 "पन्ह पन्ह त्र र्च कृकृ ड्ड न्हृे إلا بسم الله",
253 "lorem لا بسم الله ipsum 你好1234你好",
254}
255
256
257# we don't actually care what order the constants are sorted in, just that the
258# ordering is deterministic.
259GLOBAL_CONSTANTS = Constants(
260 integers=SortedSet(_constants_integers),
261 floats=SortedSet(_constant_floats, key=float_to_int),
262 bytes=SortedSet(),
263 strings=SortedSet(_constant_strings),
264)
265
266_local_constants = Constants(
267 integers=SortedSet(),
268 floats=SortedSet(key=float_to_int),
269 bytes=SortedSet(),
270 strings=SortedSet(),
271)
272# modules that we've already seen and processed for local constants. These are
273# are all modules, not necessarily local ones. This lets us quickly see which
274# modules are new without an expensive path.resolve() or is_local_module_file
275# cache lookup.
276# We track by module object when hashable, falling back to the module name
277# (str key in sys.modules) for unhashable entries like SimpleNamespace.
278_seen_modules: set = set()
279_sys_modules_len: int | None = None
280
281
282def _get_local_constants() -> Constants:
283 global _sys_modules_len, _local_constants
284
285 if sys.platform == "emscripten": # pragma: no cover
286 # pyodide builds bundle the stdlib in a nonstandard location, like
287 # `/lib/python312.zip/heapq.py`. To avoid identifying the entirety of
288 # the stdlib as local code and slowing down on emscripten, instead return
289 # that nothing is local.
290 #
291 # pyodide may provide some way to distinguish stdlib/third-party/local
292 # code. I haven't looked into it. If they do, we should correctly implement
293 # ModuleLocation for pyodide instead of this.
294 return _local_constants
295
296 count_constants = len(_local_constants)
297 # We call this function once per HypothesisProvider instance, i.e. once per
298 # input, so it needs to be performant. The logic here is more complicated
299 # than necessary because of this.
300 #
301 # First, we check whether there are any new modules with a very cheap length
302 # check. This check can be fooled if a module is added while another module is
303 # removed, but the more correct check against tuple(sys.modules.keys()) is
304 # substantially more expensive. Such a new module would eventually be discovered
305 # if / when the length changes again in the future.
306 #
307 # If the length has changed, we find just modules we haven't seen before. Of
308 # those, we find the ones which correspond to local modules, and extract their
309 # constants.
310
311 # careful: store sys.modules length when we first check to avoid race conditions
312 # with other threads loading a module before we set _sys_modules_len.
313 if (sys_modules_len := len(sys.modules)) != _sys_modules_len:
314 new_modules = []
315 for name, module in list(sys.modules.items()):
316 try:
317 seen = module in _seen_modules
318 except TypeError:
319 # unhashable module (e.g. SimpleNamespace); fall back to name
320 seen = name in _seen_modules
321 if not seen:
322 new_modules.append((name, module))
323 # Repeated SortedSet unions are expensive. Do the initial unions on a
324 # set(), then do a one-time union with _local_constants after.
325 new_constants = Constants()
326 for name, module in new_modules:
327 if (
328 module_file := getattr(module, "__file__", None)
329 ) is not None and is_local_module_file(module_file):
330 new_constants |= constants_from_module(module)
331 try:
332 _seen_modules.add(module)
333 except TypeError:
334 _seen_modules.add(name)
335 _local_constants |= new_constants
336 _sys_modules_len = sys_modules_len
337
338 # if we add any new constant, invalidate the constant cache for permitted values.
339 # A more efficient approach would be invalidating just the keys with this
340 # choice_type.
341 if len(_local_constants) > count_constants:
342 CONSTANTS_CACHE.cache.clear()
343
344 return _local_constants
345
346
347@contextmanager
348def with_register_backend(name, provider_cls):
349 try:
350 AVAILABLE_PROVIDERS[name] = provider_cls
351 yield
352 finally:
353 del AVAILABLE_PROVIDERS[name]
354
355
356class _BackendInfoMsg(TypedDict):
357 type: InfoObservationType
358 title: str
359 content: str | dict[str, Any]
360
361
362# TODO_DOCS: link to choice sequence explanation page
363
364
365class PrimitiveProvider(abc.ABC):
366 """
367 |PrimitiveProvider| is the implementation interface of a
368 :ref:`Hypothesis backend <alternative-backends>`.
369
370 A |PrimitiveProvider| is required to implement the following five
371 ``draw_*`` methods:
372
373 * |PrimitiveProvider.draw_integer|
374 * |PrimitiveProvider.draw_boolean|
375 * |PrimitiveProvider.draw_float|
376 * |PrimitiveProvider.draw_string|
377 * |PrimitiveProvider.draw_bytes|
378
379 Each strategy in Hypothesis generates values by drawing a series of choices
380 from these five methods. By overriding them, a |PrimitiveProvider| can control
381 the distribution of inputs generated by Hypothesis.
382
383 For example, :pypi:`hypothesis-crosshair` implements a |PrimitiveProvider|
384 which uses an SMT solver to generate inputs that uncover new branches.
385
386 Once you implement a |PrimitiveProvider|, you can make it available for use
387 through |AVAILABLE_PROVIDERS|.
388 """
389
390 #: The lifetime of a |PrimitiveProvider| instance. Either ``test_function``
391 #: or ``test_case``.
392 #:
393 #: If ``test_function`` (the default), a single provider instance will be
394 #: instantiated and used for the entirety of each test function (i.e., roughly
395 #: one provider per |@given| annotation). This can be useful for tracking state
396 #: over the entirety of a test function.
397 #:
398 #: If ``test_case``, a new provider instance will be instantiated and used for
399 #: each input Hypothesis generates.
400 #:
401 #: The ``conjecturedata`` argument to ``PrimitiveProvider.__init__`` will
402 #: be ``None`` for a lifetime of ``test_function``, and an instance of
403 #: ``ConjectureData`` for a lifetime of ``test_case``.
404 #:
405 #: Third-party providers likely want to set a lifetime of ``test_function``.
406 lifetime: ClassVar[LifetimeT] = "test_function"
407
408 #: Solver-based backends such as ``hypothesis-crosshair`` use symbolic values
409 #: which record operations performed on them in order to discover new paths.
410 #: If ``avoid_realization`` is set to ``True``, hypothesis will avoid interacting
411 #: with symbolic choices returned by the provider in any way that would force
412 #: the solver to narrow the range of possible values for that symbolic.
413 #:
414 #: Setting this to ``True`` disables some hypothesis features and optimizations.
415 #: Only set this to ``True`` if it is necessary for your backend.
416 avoid_realization: ClassVar[bool] = False
417
418 #: If ``True``, |PrimitiveProvider.on_observation| will be added as a
419 #: callback via |add_observability_callback|, enabling observability during
420 # the lifetime of this provider. If ``False``, |PrimitiveProvider.on_observation|
421 #: will never be called by Hypothesis.
422 #:
423 #: The opt-in behavior of observability is because enabling observability
424 #: might increase runtime or memory usage.
425 add_observability_callback: ClassVar[bool] = False
426
427 def __init__(self, conjecturedata: Optional["ConjectureData"], /) -> None:
428 self._cd = conjecturedata
429
430 @abc.abstractmethod
431 def draw_boolean(
432 self,
433 p: float = 0.5,
434 ) -> bool:
435 """
436 Draw a boolean choice.
437
438 Parameters
439 ----------
440 p: float
441 The probability of returning ``True``. Between 0 and 1 inclusive.
442
443 Except for ``0`` and ``1``, the value of ``p`` is a hint provided by
444 Hypothesis, and may be ignored by the backend.
445
446 If ``0``, the provider must return ``False``. If ``1``, the provider
447 must return ``True``.
448 """
449 raise NotImplementedError
450
451 @abc.abstractmethod
452 def draw_integer(
453 self,
454 min_value: int | None = None,
455 max_value: int | None = None,
456 *,
457 weights: dict[int, float] | None = None,
458 shrink_towards: int = 0,
459 ) -> int:
460 """
461 Draw an integer choice.
462
463 Parameters
464 ----------
465 min_value : int | None
466 (Inclusive) lower bound on the integer value. If ``None``, there is
467 no lower bound.
468 max_value : int | None
469 (Inclusive) upper bound on the integer value. If ``None``, there is
470 no upper bound.
471 weights: dict[int, float] | None
472 Maps keys in the range [``min_value``, ``max_value``] to the probability
473 of returning that key.
474 shrink_towards: int
475 The integer to shrink towards. This is not used during generation and
476 can be ignored by backends.
477 """
478 raise NotImplementedError
479
480 @abc.abstractmethod
481 def draw_float(
482 self,
483 *,
484 min_value: float = -math.inf,
485 max_value: float = math.inf,
486 allow_nan: bool = True,
487 smallest_nonzero_magnitude: float,
488 ) -> float:
489 """
490 Draw a float choice.
491
492 Parameters
493 ----------
494 min_value : float
495 (Inclusive) lower bound on the float value.
496 max_value : float
497 (Inclusive) upper bound on the float value.
498 allow_nan : bool
499 If ``False``, it is invalid to return ``math.nan``.
500 smallest_nonzero_magnitude : float
501 The smallest allowed nonzero magnitude. ``draw_float`` should not
502 return a float ``f`` if ``abs(f) < smallest_nonzero_magnitude``.
503 """
504 raise NotImplementedError
505
506 @abc.abstractmethod
507 def draw_string(
508 self,
509 intervals: IntervalSet,
510 *,
511 min_size: int = 0,
512 max_size: int = COLLECTION_DEFAULT_MAX_SIZE,
513 ) -> str:
514 """
515 Draw a string choice.
516
517 Parameters
518 ----------
519 intervals : IntervalSet
520 The set of codepoints to sample from.
521 min_size : int
522 (Inclusive) lower bound on the string length.
523 max_size : int
524 (Inclusive) upper bound on the string length.
525 """
526 raise NotImplementedError
527
528 @abc.abstractmethod
529 def draw_bytes(
530 self,
531 min_size: int = 0,
532 max_size: int = COLLECTION_DEFAULT_MAX_SIZE,
533 ) -> bytes:
534 """
535 Draw a bytes choice.
536
537 Parameters
538 ----------
539 min_size : int
540 (Inclusive) lower bound on the bytes length.
541 max_size : int
542 (Inclusive) upper bound on the bytes length.
543 """
544 raise NotImplementedError
545
546 def per_test_case_context_manager(self) -> AbstractContextManager:
547 """
548 Returns a context manager which will be entered each time Hypothesis
549 starts generating and executing one test case, and exited when that test
550 case finishes generating and executing, including if any exception is
551 thrown.
552
553 In the lifecycle of a Hypothesis test, this is called before
554 generating strategy values for each test case. This is just before any
555 :ref:`custom executor <custom-function-execution>` is called.
556
557 Even if not returning a custom context manager, |PrimitiveProvider|
558 subclasses are welcome to override this method to know when Hypothesis
559 starts and ends the execution of a single test case.
560 """
561 return contextlib.nullcontext()
562
563 def realize(self, value: T, *, for_failure: bool = False) -> T:
564 """
565 Called whenever hypothesis requires a concrete (non-symbolic) value from
566 a potentially symbolic value. Hypothesis will not check that ``value`` is
567 symbolic before calling ``realize``, so you should handle the case where
568 ``value`` is non-symbolic.
569
570 The returned value should be non-symbolic. If you cannot provide a value,
571 raise |BackendCannotProceed| with a value of ``"discard_test_case"``.
572
573 If ``for_failure`` is ``True``, the value is associated with a failing example.
574 In this case, the backend should spend substantially more effort when
575 attempting to realize the value, since it is important to avoid discarding
576 failing examples. Backends may still raise |BackendCannotProceed| when
577 ``for_failure`` is ``True``, if realization is truly impossible or if
578 realization takes significantly longer than expected (say, 5 minutes).
579 """
580 return value
581
582 def replay_choices(self, choices: tuple[ChoiceT, ...]) -> None:
583 """
584 Called when Hypothesis has discovered a choice sequence which the provider
585 may wish to enqueue to replay under its own instrumentation when we next
586 ask to generate a test case, rather than generating one from scratch.
587
588 This is used to e.g. warm-start :pypi:`hypothesis-crosshair` with a corpus
589 of high-code-coverage inputs discovered by
590 `HypoFuzz <https://hypofuzz.com/>`_.
591 """
592 return None
593
594 def observe_test_case(self) -> dict[str, Any]:
595 """Called at the end of the test case when :ref:`observability
596 <observability>` is enabled.
597
598 The return value should be a non-symbolic json-encodable dictionary,
599 and will be included in observations as ``observation["metadata"]["backend"]``.
600 """
601 return {}
602
603 def observe_information_messages(
604 self, *, lifetime: LifetimeT
605 ) -> Iterable[_BackendInfoMsg]:
606 """Called at the end of each test case and again at end of the test function.
607
608 Return an iterable of ``{type: info/alert/error, title: str, content: str | dict}``
609 dictionaries to be delivered as individual information messages. Hypothesis
610 adds the ``run_start`` timestamp and ``property`` name for you.
611 """
612 assert lifetime in ("test_case", "test_function")
613 yield from []
614
615 def on_observation(self, observation: TestCaseObservation) -> None: # noqa: B027
616 """
617 Called at the end of each test case which uses this provider, with the same
618 ``observation["type"] == "test_case"`` observation that is passed to
619 other callbacks added via |add_observability_callback|. This method is not
620 called with ``observation["type"] in {"info", "alert", "error"}``
621 observations.
622
623 .. important::
624
625 For |PrimitiveProvider.on_observation| to be called by Hypothesis,
626 |PrimitiveProvider.add_observability_callback| must be set to ``True``.
627
628 |PrimitiveProvider.on_observation| is explicitly opt-in, as enabling
629 observability might increase runtime or memory usage.
630
631 Calls to this method are guaranteed to alternate with calls to
632 |PrimitiveProvider.per_test_case_context_manager|. For example:
633
634 .. code-block:: python
635
636 # test function starts
637 per_test_case_context_manager()
638 on_observation()
639 per_test_case_context_manager()
640 on_observation()
641 ...
642 # test function ends
643
644 Note that |PrimitiveProvider.on_observation| will not be called for test
645 cases which did not use this provider during generation, for example
646 during |Phase.reuse| or |Phase.shrink|, or because Hypothesis switched
647 to the standard Hypothesis backend after this backend raised too many
648 |BackendCannotProceed| exceptions.
649 """
650
651 def span_start(self, label: int, /) -> None: # noqa: B027 # non-abstract noop
652 """Marks the beginning of a semantically meaningful span of choices.
653
654 Spans are a depth-first tree structure. A span is opened by a call to
655 |PrimitiveProvider.span_start|, and a call to |PrimitiveProvider.span_end|
656 closes the most recently opened span. So the following sequence of calls:
657
658 .. code-block:: python
659
660 span_start(label=1)
661 n1 = draw_integer()
662 span_start(label=2)
663 b1 = draw_boolean()
664 n2 = draw_integer()
665 span_end()
666 f1 = draw_float()
667 span_end()
668
669 produces the following two spans of choices:
670
671 .. code-block::
672
673 1: [n1, b1, n2, f1]
674 2: [b1, n2]
675
676 Hypothesis uses spans to denote "semantically meaningful" sequences of
677 choices. For instance, Hypothesis opens a span for the sequence of choices
678 made while drawing from each strategy. Not every span corresponds to a
679 strategy; the generation of e.g. each element in |st.lists| is also marked
680 with a span, among others.
681
682 ``label`` is an opaque integer, which has no defined semantics.
683 The only guarantee made by Hypothesis is that all spans with the same
684 "meaning" will share the same ``label``. So all spans from the same
685 strategy will share the same label, as will e.g. the spans for |st.lists|
686 elements.
687
688 Providers can track calls to |PrimitiveProvider.span_start| and
689 |PrimitiveProvider.span_end| to learn something about the semantics of
690 the test's choice sequence. For instance, a provider could track the depth
691 of the span tree, or the number of unique labels, which says something about
692 the complexity of the choices being generated. Or a provider could track
693 the span tree across test cases in order to determine what strategies are
694 being used in what contexts.
695
696 It is possible for Hypothesis to start and immediately stop a span,
697 without calling a ``draw_*`` method in between. These spans contain zero
698 choices.
699
700 Hypothesis will always balance the number of calls to
701 |PrimitiveProvider.span_start| and |PrimitiveProvider.span_end|. A call
702 to |PrimitiveProvider.span_start| will always be followed by a call to
703 |PrimitiveProvider.span_end| before the end of the test case.
704
705 |PrimitiveProvider.span_start| is called from ``ConjectureData.start_span()``
706 internally.
707 """
708
709 def span_end(self, discard: bool, /) -> None: # noqa: B027
710 """Marks the end of a semantically meaningful span of choices.
711
712 ``discard`` is ``True`` when the draw was filtered out or otherwise marked
713 as unlikely to contribute to the input data as seen by the user's test.
714 Note however that side effects can make this determination unsound.
715
716 |PrimitiveProvider.span_end| is called from ``ConjectureData.stop_span()``
717 internally.
718 """
719
720
721class HypothesisProvider(PrimitiveProvider):
722 lifetime = "test_case"
723
724 def __init__(self, conjecturedata: Optional["ConjectureData"], /):
725 super().__init__(conjecturedata)
726 self._random = None if self._cd is None else self._cd._random
727
728 @cached_property
729 def _local_constants(self):
730 # defer computation of local constants until/if we need it
731 return _get_local_constants()
732
733 def _maybe_draw_constant(
734 self,
735 choice_type: ChoiceTypeT,
736 constraints: ChoiceConstraintsT,
737 *,
738 p: float = 0.05,
739 ) -> Optional["ConstantT"]:
740 assert self._random is not None
741 assert choice_type != "boolean"
742 # check whether we even want a constant before spending time computing
743 # and caching the allowed constants.
744 if self._random.random() > p:
745 return None
746
747 # note: this property access results in computation being done
748 assert self._local_constants is not None
749
750 key = (choice_type, choice_constraints_key(choice_type, constraints))
751 if key not in CONSTANTS_CACHE:
752 CONSTANTS_CACHE[key] = (
753 tuple(
754 choice
755 for choice in GLOBAL_CONSTANTS.set_for_type(choice_type)
756 if choice_permitted(choice, constraints)
757 ),
758 tuple(
759 choice
760 for choice in self._local_constants.set_for_type(choice_type)
761 if choice_permitted(choice, constraints)
762 ),
763 )
764
765 # split constants into two pools, so we still have a good chance to draw
766 # global constants even if there are many local constants.
767 global_constants, local_constants = CONSTANTS_CACHE[key]
768 constants_lists = ([global_constants] if global_constants else []) + (
769 [local_constants] if local_constants else []
770 )
771 if not constants_lists:
772 return None
773
774 # At this point, we've decided to use a constant. Now we select which pool
775 # to draw that constant from.
776 #
777 # Note that this approach has a different probability distribution than
778 # attempting a random.random for both global_constants and local_constants.
779 constants = self._random.choice(constants_lists)
780 return self._random.choice(constants)
781
782 def draw_boolean(
783 self,
784 p: float = 0.5,
785 ) -> bool:
786 assert self._random is not None
787
788 if p <= 0:
789 return False
790 if p >= 1:
791 return True
792
793 return self._random.random() < p
794
795 def draw_integer(
796 self,
797 min_value: int | None = None,
798 max_value: int | None = None,
799 *,
800 weights: dict[int, float] | None = None,
801 shrink_towards: int = 0,
802 ) -> int:
803 assert self._cd is not None
804 if (
805 constant := self._maybe_draw_constant(
806 "integer",
807 {
808 "min_value": min_value,
809 "max_value": max_value,
810 "weights": weights,
811 "shrink_towards": shrink_towards,
812 },
813 )
814 ) is not None:
815 assert isinstance(constant, int)
816 return constant
817
818 center = 0
819 if min_value is not None:
820 center = max(min_value, center)
821 if max_value is not None:
822 center = min(max_value, center)
823
824 if weights is not None:
825 assert min_value is not None
826 assert max_value is not None
827
828 # format of weights is a mapping of ints to p, where sum(p) < 1.
829 # The remaining probability mass is uniformly distributed over
830 # *all* ints (not just the unmapped ones; this is somewhat undesirable,
831 # but simplifies things).
832 #
833 # We assert that sum(p) is strictly less than 1 because it simplifies
834 # handling forced values when we can force into the unmapped probability
835 # mass. We should eventually remove this restriction.
836 sampler = Sampler(
837 [1 - sum(weights.values()), *weights.values()], observe=False
838 )
839 # if we're forcing, it's easiest to force into the unmapped probability
840 # mass and then force the drawn value after.
841 idx = sampler.sample(self._cd)
842
843 if idx == 0:
844 return self._draw_bounded_integer(min_value, max_value)
845 # implicit reliance on dicts being sorted for determinism
846 return list(weights)[idx - 1]
847
848 if min_value is None and max_value is None:
849 return self._draw_unbounded_integer()
850
851 if min_value is None:
852 assert max_value is not None
853 probe = max_value + 1
854 while max_value < probe:
855 probe = center + self._draw_unbounded_integer()
856 return probe
857
858 if max_value is None:
859 assert min_value is not None
860 probe = min_value - 1
861 while probe < min_value:
862 probe = center + self._draw_unbounded_integer()
863 return probe
864
865 return self._draw_bounded_integer(min_value, max_value)
866
867 def draw_float(
868 self,
869 *,
870 min_value: float = -math.inf,
871 max_value: float = math.inf,
872 allow_nan: bool = True,
873 smallest_nonzero_magnitude: float,
874 ) -> float:
875 assert self._random is not None
876
877 constraints: FloatConstraints = {
878 "min_value": min_value,
879 "max_value": max_value,
880 "allow_nan": allow_nan,
881 "smallest_nonzero_magnitude": smallest_nonzero_magnitude,
882 }
883 if (
884 constant := self._maybe_draw_constant("float", constraints, p=0.15)
885 ) is not None:
886 assert isinstance(constant, float)
887 return constant
888
889 # on top of the probability to draw a constant float, we independently
890 # upweight 0.0/-0.0, math.inf, -math.inf, nans, and boundary values.
891 weird_floats = [
892 f
893 for f in [
894 0.0,
895 -0.0,
896 math.inf,
897 -math.inf,
898 math.nan,
899 -math.nan,
900 SIGNALING_NAN,
901 -SIGNALING_NAN,
902 min_value,
903 next_up(min_value),
904 min_value + 1,
905 max_value - 1,
906 next_down(max_value),
907 max_value,
908 ]
909 if choice_permitted(f, constraints)
910 ]
911
912 if weird_floats and self._random.random() < 0.05:
913 return self._random.choice(weird_floats)
914
915 clamper = make_float_clamper(
916 min_value,
917 max_value,
918 smallest_nonzero_magnitude=smallest_nonzero_magnitude,
919 allow_nan=allow_nan,
920 )
921
922 result = self._draw_float()
923 if allow_nan and math.isnan(result):
924 clamped = result # pragma: no cover
925 else:
926 clamped = clamper(result)
927 if float_to_int(clamped) != float_to_int(result) and not (
928 math.isnan(result) and allow_nan
929 ):
930 result = clamped
931 return result
932
933 def draw_string(
934 self,
935 intervals: IntervalSet,
936 *,
937 min_size: int = 0,
938 max_size: int = COLLECTION_DEFAULT_MAX_SIZE,
939 ) -> str:
940 assert self._cd is not None
941 assert self._random is not None
942
943 if len(intervals) == 0:
944 return ""
945
946 if (
947 constant := self._maybe_draw_constant(
948 "string",
949 {"intervals": intervals, "min_size": min_size, "max_size": max_size},
950 )
951 ) is not None:
952 assert isinstance(constant, str)
953 return constant
954
955 average_size = min(
956 max(min_size * 2, min_size + 5),
957 0.5 * (min_size + max_size),
958 )
959
960 chars = []
961 elements = many(
962 self._cd,
963 min_size=min_size,
964 max_size=max_size,
965 average_size=average_size,
966 observe=False,
967 )
968 while elements.more():
969 if len(intervals) > 256:
970 if self.draw_boolean(0.2):
971 i = self._random.randint(256, len(intervals) - 1)
972 else:
973 i = self._random.randint(0, 255)
974 else:
975 i = self._random.randint(0, len(intervals) - 1)
976
977 chars.append(intervals.char_in_shrink_order(i))
978
979 return "".join(chars)
980
981 def draw_bytes(
982 self,
983 min_size: int = 0,
984 max_size: int = COLLECTION_DEFAULT_MAX_SIZE,
985 ) -> bytes:
986 assert self._cd is not None
987 assert self._random is not None
988
989 if (
990 constant := self._maybe_draw_constant(
991 "bytes", {"min_size": min_size, "max_size": max_size}
992 )
993 ) is not None:
994 assert isinstance(constant, bytes)
995 return constant
996
997 buf = bytearray()
998 average_size = min(
999 max(min_size * 2, min_size + 5),
1000 0.5 * (min_size + max_size),
1001 )
1002 elements = many(
1003 self._cd,
1004 min_size=min_size,
1005 max_size=max_size,
1006 average_size=average_size,
1007 observe=False,
1008 )
1009 while elements.more():
1010 buf += self._random.randbytes(1)
1011
1012 return bytes(buf)
1013
1014 def _draw_float(self) -> float:
1015 assert self._random is not None
1016
1017 f = lex_to_float(self._random.getrandbits(64))
1018 sign = 1 if self._random.getrandbits(1) else -1
1019 return sign * f
1020
1021 def _draw_unbounded_integer(self) -> int:
1022 assert self._cd is not None
1023 assert self._random is not None
1024
1025 size = INT_SIZES[INT_SIZES_SAMPLER.sample(self._cd)]
1026
1027 r = self._random.getrandbits(size)
1028 sign = r & 1
1029 r >>= 1
1030 if sign:
1031 r = -r
1032 return r
1033
1034 def _draw_bounded_integer(
1035 self,
1036 lower: int,
1037 upper: int,
1038 *,
1039 vary_size: bool = True,
1040 ) -> int:
1041 assert lower <= upper
1042 assert self._cd is not None
1043 assert self._random is not None
1044
1045 if lower == upper:
1046 return lower
1047
1048 bits = (upper - lower).bit_length()
1049 if bits > 24 and vary_size and self._random.random() < 7 / 8:
1050 # For large ranges, we combine the uniform random distribution
1051 # with a weighting scheme with moderate chance. Cutoff at 2 ** 24 so that our
1052 # choice of unicode characters is uniform but the 32bit distribution is not.
1053 idx = INT_SIZES_SAMPLER.sample(self._cd)
1054 cap_bits = min(bits, INT_SIZES[idx])
1055 upper = min(upper, lower + 2**cap_bits - 1)
1056 return self._random.randint(lower, upper)
1057
1058 return self._random.randint(lower, upper)
1059
1060
1061# Masks for masking off the first byte of an n-bit buffer.
1062# The appropriate mask is stored at position n % 8.
1063BYTE_MASKS = [(1 << n) - 1 for n in range(8)]
1064BYTE_MASKS[0] = 255
1065
1066
1067class BytestringProvider(PrimitiveProvider):
1068 lifetime = "test_case"
1069
1070 def __init__(
1071 self, conjecturedata: Optional["ConjectureData"], /, *, bytestring: bytes
1072 ):
1073 super().__init__(conjecturedata)
1074 self.bytestring = bytestring
1075 self.index = 0
1076 self.drawn = bytearray()
1077
1078 def _draw_bits(self, n):
1079 if n == 0: # pragma: no cover
1080 return 0
1081 n_bytes = bits_to_bytes(n)
1082 if self.index + n_bytes > len(self.bytestring):
1083 self._cd.mark_overrun()
1084 buf = bytearray(self.bytestring[self.index : self.index + n_bytes])
1085 self.index += n_bytes
1086
1087 buf[0] &= BYTE_MASKS[n % 8]
1088 buf = bytes(buf)
1089 self.drawn += buf
1090 return int_from_bytes(buf)
1091
1092 def draw_boolean(
1093 self,
1094 p: float = 0.5,
1095 ) -> bool:
1096 if p <= 0:
1097 return False
1098 if p >= 1:
1099 return True
1100
1101 # always use one byte for booleans to maintain constant draw size.
1102 # If a probability requires more than 8 bits to represent precisely,
1103 # the result will be slightly biased, but not badly.
1104 bits = 8
1105 size = 2**bits
1106 # always leave at least one value that can be true, even for very small
1107 # p.
1108 falsey = max(1, math.floor(size * (1 - p)))
1109 n = self._draw_bits(bits)
1110 return n >= falsey
1111
1112 def draw_integer(
1113 self,
1114 min_value: int | None = None,
1115 max_value: int | None = None,
1116 *,
1117 weights: dict[int, float] | None = None,
1118 shrink_towards: int = 0,
1119 ) -> int:
1120 assert self._cd is not None
1121
1122 # we explicitly ignore integer weights for now, as they are likely net
1123 # negative on fuzzer performance.
1124
1125 if min_value is None and max_value is None:
1126 min_value = -(2**127)
1127 max_value = 2**127 - 1
1128 elif min_value is None:
1129 assert max_value is not None
1130 min_value = max_value - 2**64
1131 elif max_value is None:
1132 assert min_value is not None
1133 max_value = min_value + 2**64
1134
1135 if min_value == max_value:
1136 return min_value
1137
1138 bits = (max_value - min_value).bit_length()
1139 value = self._draw_bits(bits)
1140 while not (min_value <= value <= max_value):
1141 value = self._draw_bits(bits)
1142 return value
1143
1144 def draw_float(
1145 self,
1146 *,
1147 min_value: float = -math.inf,
1148 max_value: float = math.inf,
1149 allow_nan: bool = True,
1150 smallest_nonzero_magnitude: float,
1151 ) -> float:
1152 n = self._draw_bits(64)
1153 sign = -1 if n >> 64 else 1
1154 f = sign * lex_to_float(n & ((1 << 64) - 1))
1155 clamper = make_float_clamper(
1156 min_value,
1157 max_value,
1158 smallest_nonzero_magnitude=smallest_nonzero_magnitude,
1159 allow_nan=allow_nan,
1160 )
1161 return clamper(f)
1162
1163 def _draw_collection(self, min_size, max_size, *, alphabet_size):
1164 average_size = min(
1165 max(min_size * 2, min_size + 5),
1166 0.5 * (min_size + max_size),
1167 )
1168 elements = many(
1169 self._cd,
1170 min_size=min_size,
1171 max_size=max_size,
1172 average_size=average_size,
1173 observe=False,
1174 )
1175 values = []
1176 while elements.more():
1177 values.append(self.draw_integer(0, alphabet_size - 1))
1178 return values
1179
1180 def draw_string(
1181 self,
1182 intervals: IntervalSet,
1183 *,
1184 min_size: int = 0,
1185 max_size: int = COLLECTION_DEFAULT_MAX_SIZE,
1186 ) -> str:
1187 values = self._draw_collection(min_size, max_size, alphabet_size=len(intervals))
1188 return "".join(chr(intervals[v]) for v in values)
1189
1190 def draw_bytes(
1191 self,
1192 min_size: int = 0,
1193 max_size: int = COLLECTION_DEFAULT_MAX_SIZE,
1194 ) -> bytes:
1195 values = self._draw_collection(min_size, max_size, alphabet_size=2**8)
1196 return bytes(values)
1197
1198
1199class URandom(Random):
1200 # we reimplement a Random instance instead of using SystemRandom, because
1201 # os.urandom is not guaranteed to read from /dev/urandom.
1202
1203 @staticmethod
1204 def _urandom(size: int) -> bytes:
1205 with open("/dev/urandom", "rb") as f:
1206 return f.read(size)
1207
1208 def getrandbits(self, k: int) -> int:
1209 assert k >= 0
1210 size = bits_to_bytes(k)
1211 n = int_from_bytes(self._urandom(size))
1212 # trim excess bits
1213 return n >> (size * 8 - k)
1214
1215 def random(self) -> float:
1216 # adapted from random.SystemRandom.random
1217 return (int_from_bytes(self._urandom(7)) >> 3) * (2**-53)
1218
1219
1220class URandomProvider(HypothesisProvider):
1221 # A provider which reads directly from /dev/urandom as its source of randomness.
1222 # This provider exists to provide better Hypothesis integration with Antithesis
1223 # (https://antithesis.com/), which interprets calls to /dev/urandom as the
1224 # randomness to mutate. This effectively gives Antithesis control over
1225 # the choices made by the URandomProvider.
1226 #
1227 # If you are not using Antithesis, you probably don't want to use this
1228 # provider.
1229
1230 def __init__(self, conjecturedata: Optional["ConjectureData"], /):
1231 super().__init__(conjecturedata)
1232 if WINDOWS: # pragma: no cover
1233 warnings.warn(
1234 "/dev/urandom is not available on windows. Falling back to "
1235 'standard PRNG generation (equivalent to backend="hypothesis").',
1236 HypothesisWarning,
1237 stacklevel=1,
1238 )
1239 # don't overwrite the HypothesisProvider self._random attribute in
1240 # this case
1241 else:
1242 self._random = URandom()