1# This file is part of Hypothesis, which may be found at
2# https://github.com/HypothesisWorks/hypothesis/
3#
4# Copyright the Hypothesis Authors.
5# Individual contributors are listed in AUTHORS.rst and the git log.
6#
7# This Source Code Form is subject to the terms of the Mozilla Public License,
8# v. 2.0. If a copy of the MPL was not distributed with this file, You can
9# obtain one at https://mozilla.org/MPL/2.0/.
10
11import abc
12import contextlib
13import math
14import sys
15import warnings
16from collections.abc import Iterable
17from contextlib import AbstractContextManager, contextmanager
18from functools import cached_property
19from random import Random
20from sys import float_info
21from typing import (
22 TYPE_CHECKING,
23 Any,
24 ClassVar,
25 Literal,
26 Optional,
27 TypeAlias,
28 TypedDict,
29 TypeVar,
30)
31
32from sortedcontainers import SortedSet
33
34from hypothesis.errors import HypothesisWarning
35from hypothesis.internal.cache import LRUCache
36from hypothesis.internal.compat import WINDOWS, int_from_bytes
37from hypothesis.internal.conjecture.choice import (
38 ChoiceConstraintsT,
39 ChoiceT,
40 ChoiceTypeT,
41 FloatConstraints,
42 choice_constraints_key,
43 choice_permitted,
44)
45from hypothesis.internal.conjecture.floats import lex_to_float
46from hypothesis.internal.conjecture.junkdrawer import bits_to_bytes
47from hypothesis.internal.conjecture.utils import (
48 INT_SIZES,
49 INT_SIZES_SAMPLER,
50 Sampler,
51 many,
52)
53from hypothesis.internal.constants_ast import (
54 Constants,
55 constants_from_module,
56 is_local_module_file,
57)
58from hypothesis.internal.floats import (
59 SIGNALING_NAN,
60 float_to_int,
61 make_float_clamper,
62 next_down,
63 next_up,
64)
65from hypothesis.internal.intervalsets import IntervalSet
66from hypothesis.internal.observability import InfoObservationType, TestCaseObservation
67
68if TYPE_CHECKING:
69 from hypothesis.internal.conjecture.data import ConjectureData
70 from hypothesis.internal.constants_ast import ConstantT
71
72T = TypeVar("T")
73LifetimeT: TypeAlias = Literal["test_case", "test_function"]
74COLLECTION_DEFAULT_MAX_SIZE = 10**10 # "arbitrarily large"
75
76
77#: Registered Hypothesis backends. This is a dictionary where keys are the name
78#: to be used in |settings.backend|. The value of a key can be either:
79#:
80#: * A string corresponding to an importable absolute path of a
81#: |PrimitiveProvider| subclass
82#: * A |PrimitiveProvider| subclass (the class itself, not an instance of the
83#: class)
84#:
85#: Hypothesis will instantiate the corresponding |PrimitiveProvider| subclass
86#: when the backend is requested by a test's |settings.backend| value.
87#:
88#: For example, the default Hypothesis backend is registered as:
89#:
90#: .. code-block:: python
91#:
92#: from hypothesis.internal.conjecture.providers import AVAILABLE_PROVIDERS
93#:
94#: AVAILABLE_PROVIDERS["hypothesis"] = "hypothesis.internal.conjecture.providers.HypothesisProvider"
95#: # or
96#: AVAILABLE_PROVIDERS["hypothesis"] = HypothesisProvider
97#:
98#: And can be used with:
99#:
100#: .. code-block:: python
101#:
102#: from hypothesis import given, settings, strategies as st
103#:
104#: @given(st.integers())
105#: @settings(backend="hypothesis")
106#: def f(n):
107#: pass
108#:
109#: Though, as ``backend="hypothesis"`` is the default setting, the above would
110#: typically not have any effect.
111#:
112#: For third-party backend authors, we strongly encourage ensuring that
113#: ``import hypothesis`` does not automatically import the expensive parts of
114#: your package, by:
115#:
116#: - setting a string path here, instead of a provider class
117#: - ensuring the registered hypothesis plugin path references a path which just
118#: sets AVAILABLE_PROVIDERS and does not import your package
119AVAILABLE_PROVIDERS: dict[str, str | type["PrimitiveProvider"]] = {
120 "hypothesis": "hypothesis.internal.conjecture.providers.HypothesisProvider",
121 "hypothesis-urandom": "hypothesis.internal.conjecture.providers.URandomProvider",
122}
123# cache the choice_permitted constants for a particular set of constraints.
124CacheKeyT: TypeAlias = tuple[ChoiceTypeT, tuple[Any, ...]]
125CacheValueT: TypeAlias = tuple[tuple["ConstantT", ...], tuple["ConstantT", ...]]
126CONSTANTS_CACHE: LRUCache[CacheKeyT, CacheValueT] = LRUCache(1024)
127
128
129_constants_integers = (
130 # powers of 2
131 [2**n for n in range(16, 66)]
132 # powers of 10
133 + [10**n for n in range(5, 20)]
134 # factorials
135 + [math.factorial(n) for n in range(9, 21)]
136 # a few primorial numbers https://en.wikipedia.org/wiki/Primorial
137 + [
138 510510,
139 6469693230,
140 304250263527210,
141 32589158477190044730,
142 ]
143)
144_constants_integers.extend(
145 [n - 1 for n in _constants_integers] + [n + 1 for n in _constants_integers]
146)
147_constants_integers.extend([-x for x in _constants_integers])
148
149# arbitrary cutoffs to keep our list bounded
150assert all(50_000 <= abs(n) <= 2**66 for n in _constants_integers)
151
152_constant_floats = (
153 [
154 0.5,
155 1.1,
156 1.5,
157 1.9,
158 1.0 / 3,
159 10e6,
160 10e-6,
161 1.175494351e-38,
162 next_up(0.0),
163 float_info.min,
164 float_info.max,
165 3.402823466e38,
166 9007199254740992.0,
167 1 - 10e-6,
168 2 + 10e-6,
169 1.192092896e-07,
170 2.2204460492503131e-016,
171 ]
172 + [2.0**-n for n in (24, 14, 149, 126)] # minimum (sub)normals for float16,32
173 + [float_info.min / n for n in (2, 10, 1000, 100_000)] # subnormal in float64
174)
175_constant_floats.extend([-x for x in _constant_floats])
176assert all(isinstance(f, float) for f in _constant_floats)
177
178_constant_strings = {
179 # strings which can be interpreted as code / logic
180 "undefined",
181 "null",
182 "NULL",
183 "nil",
184 "NIL",
185 "true",
186 "false",
187 "True",
188 "False",
189 "TRUE",
190 "FALSE",
191 "None",
192 "none",
193 "if",
194 "then",
195 "else",
196 "__dict__",
197 "__proto__", # javascript
198 # strings which can be interpreted as a number
199 "0",
200 "1e100",
201 "0..0",
202 "0/0",
203 "1/0",
204 "+0.0",
205 "Infinity",
206 "-Infinity",
207 "Inf",
208 "INF",
209 "NaN",
210 "9" * 30,
211 # common ascii characters
212 ",./;'[]\\-=<>?:\"{}|_+!@#$%^&*()`~",
213 # common unicode characters
214 "Ω≈ç√∫˜µ≤≥÷åß∂ƒ©˙∆˚¬…æœ∑´®†¥¨ˆøπ“‘¡™£¢∞§¶•ªº–≠¸˛Ç◊ı˜Â¯˘¿ÅÍÎÏ˝ÓÔÒÚÆ☃Œ„´‰ˇÁ¨ˆØ∏”’`⁄€‹›fifl‡°·‚—±",
215 # characters which increase in length when lowercased
216 "Ⱥ",
217 "Ⱦ",
218 # ligatures
219 "æœÆŒffʤʨß",
220 # emoticons
221 "(╯°□°)╯︵ ┻━┻)",
222 # emojis
223 "😍",
224 "🇺🇸",
225 # emoji modifiers
226 "🏻", # U+1F3FB Light Skin Tone,
227 "👍🏻", # 👍 followed by U+1F3FB
228 # RTL text
229 "الكل في المجمو عة",
230 # Ogham text, which contains the only character in the Space Separators
231 # unicode category (Zs) that isn't visually blank: . # noqa: RUF003
232 "᚛ᚄᚓᚐᚋᚒᚄ ᚑᚄᚂᚑᚏᚅ᚜",
233 # thai consonant + spacing vowel combinations, which have unusual visual combining behavior
234 "กา",
235 "ก ำกำ",
236 # readable variations on text (bolt/italic/script)
237 "𝐓𝐡𝐞 𝐪𝐮𝐢𝐜𝐤 𝐛𝐫𝐨𝐰𝐧 𝐟𝐨𝐱 𝐣𝐮𝐦𝐩𝐬 𝐨𝐯𝐞𝐫 𝐭𝐡𝐞 𝐥𝐚𝐳𝐲 𝐝𝐨𝐠",
238 "𝕿𝖍𝖊 𝖖𝖚𝖎𝖈𝖐 𝖇𝖗𝖔𝖜𝖓 𝖋𝖔𝖝 𝖏𝖚𝖒𝖕𝖘 𝖔𝖛𝖊𝖗 𝖙𝖍𝖊 𝖑𝖆𝖟𝖞 𝖉𝖔𝖌",
239 "𝑻𝒉𝒆 𝒒𝒖𝒊𝒄𝒌 𝒃𝒓𝒐𝒘𝒏 𝒇𝒐𝒙 𝒋𝒖𝒎𝒑𝒔 𝒐𝒗𝒆𝒓 𝒕𝒉𝒆 𝒍𝒂𝒛𝒚 𝒅𝒐𝒈",
240 "𝓣𝓱𝓮 𝓺𝓾𝓲𝓬𝓴 𝓫𝓻𝓸𝔀𝓷 𝓯𝓸𝔁 𝓳𝓾𝓶𝓹𝓼 𝓸𝓿𝓮𝓻 𝓽𝓱𝓮 𝓵𝓪𝔃𝔂 𝓭𝓸𝓰",
241 "𝕋𝕙𝕖 𝕢𝕦𝕚𝕔𝕜 𝕓𝕣𝕠𝕨𝕟 𝕗𝕠𝕩 𝕛𝕦𝕞𝕡𝕤 𝕠𝕧𝕖𝕣 𝕥𝕙𝕖 𝕝𝕒𝕫𝕪 𝕕𝕠𝕘",
242 # upsidown text
243 "ʇǝɯɐ ʇᴉs ɹolop ɯnsdᴉ ɯǝɹo˥",
244 # reserved strings in windows
245 "NUL",
246 "COM1",
247 "LPT1",
248 # scunthorpe problem
249 "Scunthorpe",
250 # zalgo text
251 "Ṱ̺̺̕o͞ ̷i̲̬͇̪͙n̝̗͕v̟̜̘̦͟o̶̙̰̠kè͚̮̺̪̹̱̤ ̖t̝͕̳̣̻̪͞h̼͓̲̦̳̘̲e͇̣̰̦̬͎ ̢̼̻̱̘h͚͎͙̜̣̲ͅi̦̲̣̰̤v̻͍e̺̭̳̪̰-m̢iͅn̖̺̞̲̯̰d̵̼̟͙̩̼̘̳ ̞̥̱̳̭r̛̗̘e͙p͠r̼̞̻̭̗e̺̠̣͟s̘͇̳͍̝͉e͉̥̯̞̲͚̬͜ǹ̬͎͎̟̖͇̤t͍̬̤͓̼̭͘ͅi̪̱n͠g̴͉ ͏͉ͅc̬̟h͡a̫̻̯͘o̫̟̖͍̙̝͉s̗̦̲.̨̹͈̣",
252 #
253 # examples from https://faultlore.com/blah/text-hates-you/
254 "मनीष منش",
255 "पन्ह पन्ह त्र र्च कृकृ ड्ड न्हृे إلا بسم الله",
256 "lorem لا بسم الله ipsum 你好1234你好",
257 # unicode charaacters causing unconditional line breaks, as defined by UAX #14:
258 # https://www.unicode.org/reports/tr14/.
259 #
260 # We've seen multiple bugs caused by assuming `str.splitlines` is equivalent to
261 # splitting over "\n", while it actually splits over all line breaks!
262 #
263 # We intersperse the line breaks with normal characters to increase the likelihood
264 # of triggering such a bug.
265 (
266 "a"
267 "\u000a" # line feed (class: LF)
268 "b"
269 "\u000d" # carriage return (class: CR)
270 "c"
271 "\u0085" # next line (class: NL)
272 "d"
273 "\u000b" # line tabulation (class: BK)
274 "e"
275 "\u000c" # form feed (class: BK)
276 "f"
277 "\u2028" # line separator (class: BK)
278 "g"
279 "\u2029" # paragraph separator (class: BK)
280 "h"
281 "\u000d\u000a" # CR+LF
282 "i"
283 ),
284}
285
286
287# we don't actually care what order the constants are sorted in, just that the
288# ordering is deterministic.
289GLOBAL_CONSTANTS = Constants(
290 integers=SortedSet(_constants_integers),
291 floats=SortedSet(_constant_floats, key=float_to_int),
292 bytes=SortedSet(),
293 strings=SortedSet(_constant_strings),
294)
295
296_local_constants = Constants(
297 integers=SortedSet(),
298 floats=SortedSet(key=float_to_int),
299 bytes=SortedSet(),
300 strings=SortedSet(),
301)
302# modules that we've already seen and processed for local constants. These are
303# are all modules, not necessarily local ones. This lets us quickly see which
304# modules are new without an expensive path.resolve() or is_local_module_file
305# cache lookup.
306# We track by module object when hashable, falling back to the module name
307# (str key in sys.modules) for unhashable entries like SimpleNamespace.
308_seen_modules: set = set()
309_sys_modules_len: int | None = None
310
311
312def _get_local_constants() -> Constants:
313 global _sys_modules_len, _local_constants
314
315 if sys.platform == "emscripten": # pragma: no cover
316 # pyodide builds bundle the stdlib in a nonstandard location, like
317 # `/lib/python312.zip/heapq.py`. To avoid identifying the entirety of
318 # the stdlib as local code and slowing down on emscripten, instead return
319 # that nothing is local.
320 #
321 # pyodide may provide some way to distinguish stdlib/third-party/local
322 # code. I haven't looked into it. If they do, we should correctly implement
323 # ModuleLocation for pyodide instead of this.
324 return _local_constants
325
326 count_constants = len(_local_constants)
327 # We call this function once per HypothesisProvider instance, i.e. once per
328 # input, so it needs to be performant. The logic here is more complicated
329 # than necessary because of this.
330 #
331 # First, we check whether there are any new modules with a very cheap length
332 # check. This check can be fooled if a module is added while another module is
333 # removed, but the more correct check against tuple(sys.modules.keys()) is
334 # substantially more expensive. Such a new module would eventually be discovered
335 # if / when the length changes again in the future.
336 #
337 # If the length has changed, we find just modules we haven't seen before. Of
338 # those, we find the ones which correspond to local modules, and extract their
339 # constants.
340
341 # careful: store sys.modules length when we first check to avoid race conditions
342 # with other threads loading a module before we set _sys_modules_len.
343 if (sys_modules_len := len(sys.modules)) != _sys_modules_len:
344 new_modules = []
345 for name, module in list(sys.modules.items()):
346 try:
347 seen = module in _seen_modules
348 except TypeError:
349 # unhashable module (e.g. SimpleNamespace); fall back to name
350 seen = name in _seen_modules
351 if not seen:
352 new_modules.append((name, module))
353 # Repeated SortedSet unions are expensive. Do the initial unions on a
354 # set(), then do a one-time union with _local_constants after.
355 new_constants = Constants()
356 for name, module in new_modules:
357 if (
358 module_file := getattr(module, "__file__", None)
359 ) is not None and is_local_module_file(module_file):
360 new_constants |= constants_from_module(module)
361 try:
362 _seen_modules.add(module)
363 except TypeError:
364 _seen_modules.add(name)
365 _local_constants |= new_constants
366 _sys_modules_len = sys_modules_len
367
368 # if we add any new constant, invalidate the constant cache for permitted values.
369 # A more efficient approach would be invalidating just the keys with this
370 # choice_type.
371 if len(_local_constants) > count_constants:
372 CONSTANTS_CACHE.cache.clear()
373
374 return _local_constants
375
376
377@contextmanager
378def with_register_backend(name, provider_cls):
379 try:
380 AVAILABLE_PROVIDERS[name] = provider_cls
381 yield
382 finally:
383 del AVAILABLE_PROVIDERS[name]
384
385
386class _BackendInfoMsg(TypedDict):
387 type: InfoObservationType
388 title: str
389 content: str | dict[str, Any]
390
391
392# TODO_DOCS: link to choice sequence explanation page
393
394
395class PrimitiveProvider(abc.ABC):
396 """
397 |PrimitiveProvider| is the implementation interface of a
398 :ref:`Hypothesis backend <alternative-backends>`.
399
400 A |PrimitiveProvider| is required to implement the following five
401 ``draw_*`` methods:
402
403 * |PrimitiveProvider.draw_integer|
404 * |PrimitiveProvider.draw_boolean|
405 * |PrimitiveProvider.draw_float|
406 * |PrimitiveProvider.draw_string|
407 * |PrimitiveProvider.draw_bytes|
408
409 Each strategy in Hypothesis generates values by drawing a series of choices
410 from these five methods. By overriding them, a |PrimitiveProvider| can control
411 the distribution of inputs generated by Hypothesis.
412
413 For example, :pypi:`hypothesis-crosshair` implements a |PrimitiveProvider|
414 which uses an SMT solver to generate inputs that uncover new branches.
415
416 Once you implement a |PrimitiveProvider|, you can make it available for use
417 through |AVAILABLE_PROVIDERS|.
418 """
419
420 #: The lifetime of a |PrimitiveProvider| instance. Either ``test_function``
421 #: or ``test_case``.
422 #:
423 #: If ``test_function`` (the default), a single provider instance will be
424 #: instantiated and used for the entirety of each test function (i.e., roughly
425 #: one provider per |@given| annotation). This can be useful for tracking state
426 #: over the entirety of a test function.
427 #:
428 #: If ``test_case``, a new provider instance will be instantiated and used for
429 #: each input Hypothesis generates.
430 #:
431 #: The ``conjecturedata`` argument to ``PrimitiveProvider.__init__`` will
432 #: be ``None`` for a lifetime of ``test_function``, and an instance of
433 #: ``ConjectureData`` for a lifetime of ``test_case``.
434 #:
435 #: Third-party providers likely want to set a lifetime of ``test_function``.
436 lifetime: ClassVar[LifetimeT] = "test_function"
437
438 #: Solver-based backends such as ``hypothesis-crosshair`` use symbolic values
439 #: which record operations performed on them in order to discover new paths.
440 #: If ``avoid_realization`` is set to ``True``, hypothesis will avoid interacting
441 #: with symbolic choices returned by the provider in any way that would force
442 #: the solver to narrow the range of possible values for that symbolic.
443 #:
444 #: Setting this to ``True`` disables some hypothesis features and optimizations.
445 #: Only set this to ``True`` if it is necessary for your backend.
446 avoid_realization: ClassVar[bool] = False
447
448 #: If ``True``, |PrimitiveProvider.on_observation| will be added as a
449 #: callback via |add_observability_callback|, enabling observability during
450 # the lifetime of this provider. If ``False``, |PrimitiveProvider.on_observation|
451 #: will never be called by Hypothesis.
452 #:
453 #: The opt-in behavior of observability is because enabling observability
454 #: might increase runtime or memory usage.
455 add_observability_callback: ClassVar[bool] = False
456
457 def __init__(self, conjecturedata: Optional["ConjectureData"], /) -> None:
458 self._cd = conjecturedata
459
460 @abc.abstractmethod
461 def draw_boolean(
462 self,
463 p: float = 0.5,
464 ) -> bool:
465 """
466 Draw a boolean choice.
467
468 Parameters
469 ----------
470 p: float
471 The probability of returning ``True``. Between 0 and 1 inclusive.
472
473 Except for ``0`` and ``1``, the value of ``p`` is a hint provided by
474 Hypothesis, and may be ignored by the backend.
475
476 If ``0``, the provider must return ``False``. If ``1``, the provider
477 must return ``True``.
478 """
479 raise NotImplementedError
480
481 @abc.abstractmethod
482 def draw_integer(
483 self,
484 min_value: int | None = None,
485 max_value: int | None = None,
486 *,
487 weights: dict[int, float] | None = None,
488 shrink_towards: int = 0,
489 ) -> int:
490 """
491 Draw an integer choice.
492
493 Parameters
494 ----------
495 min_value : int | None
496 (Inclusive) lower bound on the integer value. If ``None``, there is
497 no lower bound.
498 max_value : int | None
499 (Inclusive) upper bound on the integer value. If ``None``, there is
500 no upper bound.
501 weights: dict[int, float] | None
502 Maps keys in the range [``min_value``, ``max_value``] to the probability
503 of returning that key.
504 shrink_towards: int
505 The integer to shrink towards. This is not used during generation and
506 can be ignored by backends.
507 """
508 raise NotImplementedError
509
510 @abc.abstractmethod
511 def draw_float(
512 self,
513 *,
514 min_value: float = -math.inf,
515 max_value: float = math.inf,
516 allow_nan: bool = True,
517 smallest_nonzero_magnitude: float,
518 ) -> float:
519 """
520 Draw a float choice.
521
522 Parameters
523 ----------
524 min_value : float
525 (Inclusive) lower bound on the float value.
526 max_value : float
527 (Inclusive) upper bound on the float value.
528 allow_nan : bool
529 If ``False``, it is invalid to return ``math.nan``.
530 smallest_nonzero_magnitude : float
531 The smallest allowed nonzero magnitude. ``draw_float`` should not
532 return a float ``f`` if ``abs(f) < smallest_nonzero_magnitude``.
533 """
534 raise NotImplementedError
535
536 @abc.abstractmethod
537 def draw_string(
538 self,
539 intervals: IntervalSet,
540 *,
541 min_size: int = 0,
542 max_size: int = COLLECTION_DEFAULT_MAX_SIZE,
543 ) -> str:
544 """
545 Draw a string choice.
546
547 Parameters
548 ----------
549 intervals : IntervalSet
550 The set of codepoints to sample from.
551 min_size : int
552 (Inclusive) lower bound on the string length.
553 max_size : int
554 (Inclusive) upper bound on the string length.
555 """
556 raise NotImplementedError
557
558 @abc.abstractmethod
559 def draw_bytes(
560 self,
561 min_size: int = 0,
562 max_size: int = COLLECTION_DEFAULT_MAX_SIZE,
563 ) -> bytes:
564 """
565 Draw a bytes choice.
566
567 Parameters
568 ----------
569 min_size : int
570 (Inclusive) lower bound on the bytes length.
571 max_size : int
572 (Inclusive) upper bound on the bytes length.
573 """
574 raise NotImplementedError
575
576 def per_test_case_context_manager(self) -> AbstractContextManager:
577 """
578 Returns a context manager which will be entered each time Hypothesis
579 starts generating and executing one test case, and exited when that test
580 case finishes generating and executing, including if any exception is
581 thrown.
582
583 In the lifecycle of a Hypothesis test, this is called before
584 generating strategy values for each test case. This is just before any
585 :ref:`custom executor <custom-function-execution>` is called.
586
587 Even if not returning a custom context manager, |PrimitiveProvider|
588 subclasses are welcome to override this method to know when Hypothesis
589 starts and ends the execution of a single test case.
590 """
591 return contextlib.nullcontext()
592
593 def realize(self, value: T, *, for_failure: bool = False) -> T:
594 """
595 Called whenever hypothesis requires a concrete (non-symbolic) value from
596 a potentially symbolic value. Hypothesis will not check that ``value`` is
597 symbolic before calling ``realize``, so you should handle the case where
598 ``value`` is non-symbolic.
599
600 The returned value should be non-symbolic. If you cannot provide a value,
601 raise |BackendCannotProceed| with a value of ``"discard_test_case"``.
602
603 If ``for_failure`` is ``True``, the value is associated with a failing example.
604 In this case, the backend should spend substantially more effort when
605 attempting to realize the value, since it is important to avoid discarding
606 failing examples. Backends may still raise |BackendCannotProceed| when
607 ``for_failure`` is ``True``, if realization is truly impossible or if
608 realization takes significantly longer than expected (say, 5 minutes).
609 """
610 return value
611
612 def replay_choices(self, choices: tuple[ChoiceT, ...]) -> None:
613 """
614 Called when Hypothesis has discovered a choice sequence which the provider
615 may wish to enqueue to replay under its own instrumentation when we next
616 ask to generate a test case, rather than generating one from scratch.
617
618 This is used to e.g. warm-start :pypi:`hypothesis-crosshair` with a corpus
619 of high-code-coverage inputs discovered by
620 `HypoFuzz <https://hypofuzz.com/>`_.
621 """
622 return None
623
624 def observe_test_case(self) -> dict[str, Any]:
625 """Called at the end of the test case when :ref:`observability
626 <observability>` is enabled.
627
628 The return value should be a non-symbolic json-encodable dictionary,
629 and will be included in observations as ``observation["metadata"]["backend"]``.
630 """
631 return {}
632
633 def observe_information_messages(
634 self, *, lifetime: LifetimeT
635 ) -> Iterable[_BackendInfoMsg]:
636 """Called at the end of each test case and again at end of the test function.
637
638 Return an iterable of ``{type: info/alert/error, title: str, content: str | dict}``
639 dictionaries to be delivered as individual information messages. Hypothesis
640 adds the ``run_start`` timestamp and ``property`` name for you.
641 """
642 assert lifetime in ("test_case", "test_function")
643 yield from []
644
645 def on_observation(self, observation: TestCaseObservation) -> None: # noqa: B027
646 """
647 Called at the end of each test case which uses this provider, with the same
648 ``observation["type"] == "test_case"`` observation that is passed to
649 other callbacks added via |add_observability_callback|. This method is not
650 called with ``observation["type"] in {"info", "alert", "error"}``
651 observations.
652
653 .. important::
654
655 For |PrimitiveProvider.on_observation| to be called by Hypothesis,
656 |PrimitiveProvider.add_observability_callback| must be set to ``True``.
657
658 |PrimitiveProvider.on_observation| is explicitly opt-in, as enabling
659 observability might increase runtime or memory usage.
660
661 Calls to this method are guaranteed to alternate with calls to
662 |PrimitiveProvider.per_test_case_context_manager|. For example:
663
664 .. code-block:: python
665
666 # test function starts
667 per_test_case_context_manager()
668 on_observation()
669 per_test_case_context_manager()
670 on_observation()
671 ...
672 # test function ends
673
674 Note that |PrimitiveProvider.on_observation| will not be called for test
675 cases which did not use this provider during generation, for example
676 during |Phase.reuse| or |Phase.shrink|, or because Hypothesis switched
677 to the standard Hypothesis backend after this backend raised too many
678 |BackendCannotProceed| exceptions.
679 """
680
681 def span_start(self, label: int, /) -> None: # noqa: B027 # non-abstract noop
682 """Marks the beginning of a semantically meaningful span of choices.
683
684 Spans are a depth-first tree structure. A span is opened by a call to
685 |PrimitiveProvider.span_start|, and a call to |PrimitiveProvider.span_end|
686 closes the most recently opened span. So the following sequence of calls:
687
688 .. code-block:: python
689
690 span_start(label=1)
691 n1 = draw_integer()
692 span_start(label=2)
693 b1 = draw_boolean()
694 n2 = draw_integer()
695 span_end()
696 f1 = draw_float()
697 span_end()
698
699 produces the following two spans of choices:
700
701 .. code-block::
702
703 1: [n1, b1, n2, f1]
704 2: [b1, n2]
705
706 Hypothesis uses spans to denote "semantically meaningful" sequences of
707 choices. For instance, Hypothesis opens a span for the sequence of choices
708 made while drawing from each strategy. Not every span corresponds to a
709 strategy; the generation of e.g. each element in |st.lists| is also marked
710 with a span, among others.
711
712 ``label`` is an opaque integer, which has no defined semantics.
713 The only guarantee made by Hypothesis is that all spans with the same
714 "meaning" will share the same ``label``. So all spans from the same
715 strategy will share the same label, as will e.g. the spans for |st.lists|
716 elements.
717
718 Providers can track calls to |PrimitiveProvider.span_start| and
719 |PrimitiveProvider.span_end| to learn something about the semantics of
720 the test's choice sequence. For instance, a provider could track the depth
721 of the span tree, or the number of unique labels, which says something about
722 the complexity of the choices being generated. Or a provider could track
723 the span tree across test cases in order to determine what strategies are
724 being used in what contexts.
725
726 It is possible for Hypothesis to start and immediately stop a span,
727 without calling a ``draw_*`` method in between. These spans contain zero
728 choices.
729
730 Hypothesis will always balance the number of calls to
731 |PrimitiveProvider.span_start| and |PrimitiveProvider.span_end|. A call
732 to |PrimitiveProvider.span_start| will always be followed by a call to
733 |PrimitiveProvider.span_end| before the end of the test case.
734
735 |PrimitiveProvider.span_start| is called from ``ConjectureData.start_span()``
736 internally.
737 """
738
739 def span_end(self, discard: bool, /) -> None: # noqa: B027
740 """Marks the end of a semantically meaningful span of choices.
741
742 ``discard`` is ``True`` when the draw was filtered out or otherwise marked
743 as unlikely to contribute to the input data as seen by the user's test.
744 Note however that side effects can make this determination unsound.
745
746 |PrimitiveProvider.span_end| is called from ``ConjectureData.stop_span()``
747 internally.
748 """
749
750
751class HypothesisProvider(PrimitiveProvider):
752 lifetime = "test_case"
753
754 def __init__(self, conjecturedata: Optional["ConjectureData"], /):
755 super().__init__(conjecturedata)
756 self._random = None if self._cd is None else self._cd._random
757
758 @cached_property
759 def _local_constants(self):
760 # defer computation of local constants until/if we need it
761 return _get_local_constants()
762
763 def _maybe_draw_constant(
764 self,
765 choice_type: ChoiceTypeT,
766 constraints: ChoiceConstraintsT,
767 *,
768 p: float = 0.05,
769 ) -> Optional["ConstantT"]:
770 assert self._random is not None
771 assert choice_type != "boolean"
772 # check whether we even want a constant before spending time computing
773 # and caching the allowed constants.
774 if self._random.random() > p:
775 return None
776
777 # note: this property access results in computation being done
778 assert self._local_constants is not None
779
780 key = (choice_type, choice_constraints_key(choice_type, constraints))
781 if key not in CONSTANTS_CACHE:
782 CONSTANTS_CACHE[key] = (
783 tuple(
784 choice
785 for choice in GLOBAL_CONSTANTS.set_for_type(choice_type)
786 if choice_permitted(choice, constraints)
787 ),
788 tuple(
789 choice
790 for choice in self._local_constants.set_for_type(choice_type)
791 if choice_permitted(choice, constraints)
792 ),
793 )
794
795 # split constants into two pools, so we still have a good chance to draw
796 # global constants even if there are many local constants.
797 global_constants, local_constants = CONSTANTS_CACHE[key]
798 constants_lists = ([global_constants] if global_constants else []) + (
799 [local_constants] if local_constants else []
800 )
801 if not constants_lists:
802 return None
803
804 # At this point, we've decided to use a constant. Now we select which pool
805 # to draw that constant from.
806 #
807 # Note that this approach has a different probability distribution than
808 # attempting a random.random for both global_constants and local_constants.
809 constants = self._random.choice(constants_lists)
810 return self._random.choice(constants)
811
812 def draw_boolean(
813 self,
814 p: float = 0.5,
815 ) -> bool:
816 assert self._random is not None
817
818 if p <= 0:
819 return False
820 if p >= 1:
821 return True
822
823 return self._random.random() < p
824
825 def draw_integer(
826 self,
827 min_value: int | None = None,
828 max_value: int | None = None,
829 *,
830 weights: dict[int, float] | None = None,
831 shrink_towards: int = 0,
832 ) -> int:
833 assert self._cd is not None
834 if (
835 constant := self._maybe_draw_constant(
836 "integer",
837 {
838 "min_value": min_value,
839 "max_value": max_value,
840 "weights": weights,
841 "shrink_towards": shrink_towards,
842 },
843 )
844 ) is not None:
845 assert isinstance(constant, int)
846 return constant
847
848 center = 0
849 if min_value is not None:
850 center = max(min_value, center)
851 if max_value is not None:
852 center = min(max_value, center)
853
854 if weights is not None:
855 assert min_value is not None
856 assert max_value is not None
857
858 # format of weights is a mapping of ints to p, where sum(p) < 1.
859 # The remaining probability mass is uniformly distributed over
860 # *all* ints (not just the unmapped ones; this is somewhat undesirable,
861 # but simplifies things).
862 #
863 # We assert that sum(p) is strictly less than 1 because it simplifies
864 # handling forced values when we can force into the unmapped probability
865 # mass. We should eventually remove this restriction.
866 sampler = Sampler(
867 [1 - sum(weights.values()), *weights.values()], observe=False
868 )
869 # if we're forcing, it's easiest to force into the unmapped probability
870 # mass and then force the drawn value after.
871 idx = sampler.sample(self._cd)
872
873 if idx == 0:
874 return self._draw_bounded_integer(min_value, max_value)
875 # implicit reliance on dicts being sorted for determinism
876 return list(weights)[idx - 1]
877
878 if min_value is None and max_value is None:
879 return self._draw_unbounded_integer()
880
881 if min_value is None:
882 assert max_value is not None
883 probe = max_value + 1
884 while max_value < probe:
885 probe = center + self._draw_unbounded_integer()
886 return probe
887
888 if max_value is None:
889 assert min_value is not None
890 probe = min_value - 1
891 while probe < min_value:
892 probe = center + self._draw_unbounded_integer()
893 return probe
894
895 return self._draw_bounded_integer(min_value, max_value)
896
897 def draw_float(
898 self,
899 *,
900 min_value: float = -math.inf,
901 max_value: float = math.inf,
902 allow_nan: bool = True,
903 smallest_nonzero_magnitude: float,
904 ) -> float:
905 assert self._random is not None
906
907 constraints: FloatConstraints = {
908 "min_value": min_value,
909 "max_value": max_value,
910 "allow_nan": allow_nan,
911 "smallest_nonzero_magnitude": smallest_nonzero_magnitude,
912 }
913 if (
914 constant := self._maybe_draw_constant("float", constraints, p=0.15)
915 ) is not None:
916 assert isinstance(constant, float)
917 return constant
918
919 # on top of the probability to draw a constant float, we independently
920 # upweight 0.0/-0.0, math.inf, -math.inf, nans, and boundary values.
921 weird_floats = [
922 f
923 for f in [
924 0.0,
925 -0.0,
926 math.inf,
927 -math.inf,
928 math.nan,
929 -math.nan,
930 SIGNALING_NAN,
931 -SIGNALING_NAN,
932 min_value,
933 next_up(min_value),
934 min_value + 1,
935 max_value - 1,
936 next_down(max_value),
937 max_value,
938 ]
939 if choice_permitted(f, constraints)
940 ]
941
942 if weird_floats and self._random.random() < 0.05:
943 return self._random.choice(weird_floats)
944
945 clamper = make_float_clamper(
946 min_value,
947 max_value,
948 smallest_nonzero_magnitude=smallest_nonzero_magnitude,
949 allow_nan=allow_nan,
950 )
951
952 result = self._draw_float()
953 if allow_nan and math.isnan(result):
954 clamped = result # pragma: no cover
955 else:
956 clamped = clamper(result)
957 if float_to_int(clamped) != float_to_int(result) and not (
958 math.isnan(result) and allow_nan
959 ):
960 result = clamped
961 return result
962
963 def draw_string(
964 self,
965 intervals: IntervalSet,
966 *,
967 min_size: int = 0,
968 max_size: int = COLLECTION_DEFAULT_MAX_SIZE,
969 ) -> str:
970 assert self._cd is not None
971 assert self._random is not None
972
973 if len(intervals) == 0:
974 return ""
975
976 if (
977 constant := self._maybe_draw_constant(
978 "string",
979 {"intervals": intervals, "min_size": min_size, "max_size": max_size},
980 )
981 ) is not None:
982 assert isinstance(constant, str)
983 return constant
984
985 average_size = min(
986 max(min_size * 2, min_size + 5),
987 0.5 * (min_size + max_size),
988 )
989
990 chars = []
991 elements = many(
992 self._cd,
993 min_size=min_size,
994 max_size=max_size,
995 average_size=average_size,
996 observe=False,
997 )
998 while elements.more():
999 if len(intervals) > 256:
1000 if self.draw_boolean(0.2):
1001 i = self._random.randint(256, len(intervals) - 1)
1002 else:
1003 i = self._random.randint(0, 255)
1004 else:
1005 i = self._random.randint(0, len(intervals) - 1)
1006
1007 chars.append(intervals.char_in_shrink_order(i))
1008
1009 return "".join(chars)
1010
1011 def draw_bytes(
1012 self,
1013 min_size: int = 0,
1014 max_size: int = COLLECTION_DEFAULT_MAX_SIZE,
1015 ) -> bytes:
1016 assert self._cd is not None
1017 assert self._random is not None
1018
1019 if (
1020 constant := self._maybe_draw_constant(
1021 "bytes", {"min_size": min_size, "max_size": max_size}
1022 )
1023 ) is not None:
1024 assert isinstance(constant, bytes)
1025 return constant
1026
1027 buf = bytearray()
1028 average_size = min(
1029 max(min_size * 2, min_size + 5),
1030 0.5 * (min_size + max_size),
1031 )
1032 elements = many(
1033 self._cd,
1034 min_size=min_size,
1035 max_size=max_size,
1036 average_size=average_size,
1037 observe=False,
1038 )
1039 while elements.more():
1040 buf += self._random.randbytes(1)
1041
1042 return bytes(buf)
1043
1044 def _draw_float(self) -> float:
1045 assert self._random is not None
1046
1047 f = lex_to_float(self._random.getrandbits(64))
1048 sign = 1 if self._random.getrandbits(1) else -1
1049 return sign * f
1050
1051 def _draw_unbounded_integer(self) -> int:
1052 assert self._cd is not None
1053 assert self._random is not None
1054
1055 size = INT_SIZES[INT_SIZES_SAMPLER.sample(self._cd)]
1056
1057 r = self._random.getrandbits(size)
1058 sign = r & 1
1059 r >>= 1
1060 if sign:
1061 r = -r
1062 return r
1063
1064 def _draw_bounded_integer(
1065 self,
1066 lower: int,
1067 upper: int,
1068 *,
1069 vary_size: bool = True,
1070 ) -> int:
1071 assert lower <= upper
1072 assert self._cd is not None
1073 assert self._random is not None
1074
1075 if lower == upper:
1076 return lower
1077
1078 bits = (upper - lower).bit_length()
1079 if bits > 24 and vary_size and self._random.random() < 7 / 8:
1080 # For large ranges, we combine the uniform random distribution
1081 # with a weighting scheme with moderate chance. Cutoff at 2 ** 24 so that our
1082 # choice of unicode characters is uniform but the 32bit distribution is not.
1083 idx = INT_SIZES_SAMPLER.sample(self._cd)
1084 cap_bits = min(bits, INT_SIZES[idx])
1085 upper = min(upper, lower + 2**cap_bits - 1)
1086 return self._random.randint(lower, upper)
1087
1088 return self._random.randint(lower, upper)
1089
1090
1091# Masks for masking off the first byte of an n-bit buffer.
1092# The appropriate mask is stored at position n % 8.
1093BYTE_MASKS = [(1 << n) - 1 for n in range(8)]
1094BYTE_MASKS[0] = 255
1095
1096
1097class BytestringProvider(PrimitiveProvider):
1098 lifetime = "test_case"
1099
1100 def __init__(
1101 self, conjecturedata: Optional["ConjectureData"], /, *, bytestring: bytes
1102 ):
1103 super().__init__(conjecturedata)
1104 self.bytestring = bytestring
1105 self.index = 0
1106 self.drawn = bytearray()
1107
1108 def _draw_bits(self, n):
1109 if n == 0: # pragma: no cover
1110 return 0
1111 n_bytes = bits_to_bytes(n)
1112 if self.index + n_bytes > len(self.bytestring):
1113 self._cd.mark_overrun()
1114 buf = bytearray(self.bytestring[self.index : self.index + n_bytes])
1115 self.index += n_bytes
1116
1117 buf[0] &= BYTE_MASKS[n % 8]
1118 buf = bytes(buf)
1119 self.drawn += buf
1120 return int_from_bytes(buf)
1121
1122 def draw_boolean(
1123 self,
1124 p: float = 0.5,
1125 ) -> bool:
1126 if p <= 0:
1127 return False
1128 if p >= 1:
1129 return True
1130
1131 # always use one byte for booleans to maintain constant draw size.
1132 # If a probability requires more than 8 bits to represent precisely,
1133 # the result will be slightly biased, but not badly.
1134 bits = 8
1135 size = 2**bits
1136 # always leave at least one value that can be true, even for very small
1137 # p.
1138 falsey = max(1, math.floor(size * (1 - p)))
1139 n = self._draw_bits(bits)
1140 return n >= falsey
1141
1142 def draw_integer(
1143 self,
1144 min_value: int | None = None,
1145 max_value: int | None = None,
1146 *,
1147 weights: dict[int, float] | None = None,
1148 shrink_towards: int = 0,
1149 ) -> int:
1150 assert self._cd is not None
1151
1152 # we explicitly ignore integer weights for now, as they are likely net
1153 # negative on fuzzer performance.
1154
1155 if min_value is None and max_value is None:
1156 min_value = -(2**127)
1157 max_value = 2**127 - 1
1158 elif min_value is None:
1159 assert max_value is not None
1160 min_value = max_value - 2**64
1161 elif max_value is None:
1162 assert min_value is not None
1163 max_value = min_value + 2**64
1164
1165 if min_value == max_value:
1166 return min_value
1167
1168 bits = (max_value - min_value).bit_length()
1169 value = self._draw_bits(bits)
1170 while not (min_value <= value <= max_value):
1171 value = self._draw_bits(bits)
1172 return value
1173
1174 def draw_float(
1175 self,
1176 *,
1177 min_value: float = -math.inf,
1178 max_value: float = math.inf,
1179 allow_nan: bool = True,
1180 smallest_nonzero_magnitude: float,
1181 ) -> float:
1182 n = self._draw_bits(64)
1183 sign = -1 if n >> 64 else 1
1184 f = sign * lex_to_float(n & ((1 << 64) - 1))
1185 clamper = make_float_clamper(
1186 min_value,
1187 max_value,
1188 smallest_nonzero_magnitude=smallest_nonzero_magnitude,
1189 allow_nan=allow_nan,
1190 )
1191 return clamper(f)
1192
1193 def _draw_collection(self, min_size, max_size, *, alphabet_size):
1194 average_size = min(
1195 max(min_size * 2, min_size + 5),
1196 0.5 * (min_size + max_size),
1197 )
1198 elements = many(
1199 self._cd,
1200 min_size=min_size,
1201 max_size=max_size,
1202 average_size=average_size,
1203 observe=False,
1204 )
1205 values = []
1206 while elements.more():
1207 values.append(self.draw_integer(0, alphabet_size - 1))
1208 return values
1209
1210 def draw_string(
1211 self,
1212 intervals: IntervalSet,
1213 *,
1214 min_size: int = 0,
1215 max_size: int = COLLECTION_DEFAULT_MAX_SIZE,
1216 ) -> str:
1217 values = self._draw_collection(min_size, max_size, alphabet_size=len(intervals))
1218 return "".join(chr(intervals[v]) for v in values)
1219
1220 def draw_bytes(
1221 self,
1222 min_size: int = 0,
1223 max_size: int = COLLECTION_DEFAULT_MAX_SIZE,
1224 ) -> bytes:
1225 values = self._draw_collection(min_size, max_size, alphabet_size=2**8)
1226 return bytes(values)
1227
1228
1229class URandom(Random):
1230 # we reimplement a Random instance instead of using SystemRandom, because
1231 # os.urandom is not guaranteed to read from /dev/urandom.
1232
1233 @staticmethod
1234 def _urandom(size: int) -> bytes:
1235 # By default, we buffer more data from /dev/urandom than the actual `size` number
1236 # of bytes we requested. This trips up anyone (and particularly Antithesis)
1237 # hooking /dev/urandom reads to control randomness.
1238 #
1239 # buffering=0 disables this behavior.
1240 with open("/dev/urandom", "rb", buffering=0) as f:
1241 return f.read(size)
1242
1243 def getrandbits(self, k: int) -> int:
1244 assert k >= 0
1245 size = bits_to_bytes(k)
1246 n = int_from_bytes(self._urandom(size))
1247 # trim excess bits
1248 return n >> (size * 8 - k)
1249
1250 def random(self) -> float:
1251 # adapted from random.SystemRandom.random
1252 return (int_from_bytes(self._urandom(7)) >> 3) * (2**-53)
1253
1254
1255class URandomProvider(HypothesisProvider):
1256 # A provider which reads directly from /dev/urandom as its source of randomness.
1257 # This provider exists to provide better Hypothesis integration with Antithesis
1258 # (https://antithesis.com/), which interprets calls to /dev/urandom as the
1259 # randomness to mutate. This effectively gives Antithesis control over
1260 # the choices made by the URandomProvider.
1261 #
1262 # If you are not using Antithesis, you probably don't want to use this
1263 # provider.
1264
1265 def __init__(self, conjecturedata: Optional["ConjectureData"], /):
1266 super().__init__(conjecturedata)
1267 if WINDOWS: # pragma: no cover
1268 warnings.warn(
1269 "/dev/urandom is not available on windows. Falling back to "
1270 'standard PRNG generation (equivalent to backend="hypothesis").',
1271 HypothesisWarning,
1272 stacklevel=1,
1273 )
1274 # don't overwrite the HypothesisProvider self._random attribute in
1275 # this case
1276 else:
1277 self._random = URandom()