1# This file is part of Hypothesis, which may be found at
2# https://github.com/HypothesisWorks/hypothesis/
3#
4# Copyright the Hypothesis Authors.
5# Individual contributors are listed in AUTHORS.rst and the git log.
6#
7# This Source Code Form is subject to the terms of the Mozilla Public License,
8# v. 2.0. If a copy of the MPL was not distributed with this file, You can
9# obtain one at https://mozilla.org/MPL/2.0/.
10
11import abc
12import contextlib
13import math
14import sys
15import warnings
16from collections.abc import Iterable
17from contextlib import AbstractContextManager, contextmanager
18from functools import cached_property
19from random import Random
20from sys import float_info
21from typing import (
22 TYPE_CHECKING,
23 Any,
24 ClassVar,
25 Literal,
26 Optional,
27 TypeAlias,
28 TypedDict,
29 TypeVar,
30)
31
32from sortedcontainers import SortedSet
33
34from hypothesis.errors import HypothesisWarning
35from hypothesis.internal.cache import LRUCache
36from hypothesis.internal.compat import WINDOWS, int_from_bytes
37from hypothesis.internal.conjecture.choice import (
38 ChoiceConstraintsT,
39 ChoiceT,
40 ChoiceTypeT,
41 FloatConstraints,
42 choice_constraints_key,
43 choice_permitted,
44)
45from hypothesis.internal.conjecture.floats import lex_to_float
46from hypothesis.internal.conjecture.junkdrawer import bits_to_bytes
47from hypothesis.internal.conjecture.utils import (
48 Sampler,
49 many,
50)
51from hypothesis.internal.constants_ast import (
52 Constants,
53 constants_from_module,
54 is_local_module_file,
55)
56from hypothesis.internal.floats import (
57 SIGNALING_NAN,
58 clamp,
59 float_to_int,
60 make_float_clamper,
61 next_down,
62 next_up,
63)
64from hypothesis.internal.intervalsets import IntervalSet
65from hypothesis.internal.observability import InfoObservationType, TestCaseObservation
66from hypothesis.internal.statistics import (
67 LogStudentTDistribution,
68 PiecewiseDistribution,
69 UniformDistribution,
70)
71
72if TYPE_CHECKING:
73 from hypothesis.internal.conjecture.data import ConjectureData
74 from hypothesis.internal.constants_ast import ConstantT
75
76T = TypeVar("T")
77LifetimeT: TypeAlias = Literal["test_case", "test_function"]
78COLLECTION_DEFAULT_MAX_SIZE = 10**10 # "arbitrarily large"
79
80# see https://github.com/HypothesisWorks/hypothesis/pull/4728 for details
81INTEGERS_DISTRIBUTION = PiecewiseDistribution(
82 inner=UniformDistribution(half_width=256),
83 outer=LogStudentTDistribution(scale_bits=13, df=2),
84 switchover=256,
85)
86
87
88#: Registered Hypothesis backends. This is a dictionary where keys are the name
89#: to be used in |settings.backend|. The value of a key can be either:
90#:
91#: * A string corresponding to an importable absolute path of a
92#: |PrimitiveProvider| subclass
93#: * A |PrimitiveProvider| subclass (the class itself, not an instance of the
94#: class)
95#:
96#: Hypothesis will instantiate the corresponding |PrimitiveProvider| subclass
97#: when the backend is requested by a test's |settings.backend| value.
98#:
99#: For example, the default Hypothesis backend is registered as:
100#:
101#: .. code-block:: python
102#:
103#: from hypothesis.internal.conjecture.providers import AVAILABLE_PROVIDERS
104#:
105#: AVAILABLE_PROVIDERS["hypothesis"] = "hypothesis.internal.conjecture.providers.HypothesisProvider"
106#: # or
107#: AVAILABLE_PROVIDERS["hypothesis"] = HypothesisProvider
108#:
109#: And can be used with:
110#:
111#: .. code-block:: python
112#:
113#: from hypothesis import given, settings, strategies as st
114#:
115#: @given(st.integers())
116#: @settings(backend="hypothesis")
117#: def f(n):
118#: pass
119#:
120#: Though, as ``backend="hypothesis"`` is the default setting, the above would
121#: typically not have any effect.
122#:
123#: For third-party backend authors, we strongly encourage ensuring that
124#: ``import hypothesis`` does not automatically import the expensive parts of
125#: your package, by:
126#:
127#: - setting a string path here, instead of a provider class
128#: - ensuring the registered hypothesis plugin path references a path which just
129#: sets AVAILABLE_PROVIDERS and does not import your package
130AVAILABLE_PROVIDERS: dict[str, str | type["PrimitiveProvider"]] = {
131 "hypothesis": "hypothesis.internal.conjecture.providers.HypothesisProvider",
132 "hypothesis-urandom": "hypothesis.internal.conjecture.providers.URandomProvider",
133}
134# cache the choice_permitted constants for a particular set of constraints.
135CacheKeyT: TypeAlias = tuple[ChoiceTypeT, tuple[Any, ...]]
136CacheValueT: TypeAlias = tuple[tuple["ConstantT", ...], tuple["ConstantT", ...]]
137CONSTANTS_CACHE: LRUCache[CacheKeyT, CacheValueT] = LRUCache(1024)
138
139
140_constants_integers = (
141 # powers of 2
142 [2**n for n in range(16, 66)]
143 # powers of 10
144 + [10**n for n in range(5, 20)]
145 # factorials
146 + [math.factorial(n) for n in range(9, 21)]
147 # a few primorial numbers https://en.wikipedia.org/wiki/Primorial
148 + [
149 510510,
150 6469693230,
151 304250263527210,
152 32589158477190044730,
153 ]
154)
155_constants_integers.extend(
156 [n - 1 for n in _constants_integers] + [n + 1 for n in _constants_integers]
157)
158_constants_integers.extend([-x for x in _constants_integers])
159
160# arbitrary cutoffs to keep our list bounded
161assert all(50_000 <= abs(n) <= 2**66 for n in _constants_integers)
162
163_constant_floats = (
164 [
165 0.5,
166 1.1,
167 1.5,
168 1.9,
169 1.0 / 3,
170 10e6,
171 10e-6,
172 1.175494351e-38,
173 next_up(0.0),
174 float_info.min,
175 float_info.max,
176 3.402823466e38,
177 9007199254740992.0,
178 1 - 10e-6,
179 2 + 10e-6,
180 1.192092896e-07,
181 2.2204460492503131e-016,
182 ]
183 + [2.0**-n for n in (24, 14, 149, 126)] # minimum (sub)normals for float16,32
184 + [float_info.min / n for n in (2, 10, 1000, 100_000)] # subnormal in float64
185)
186_constant_floats.extend([-x for x in _constant_floats])
187assert all(isinstance(f, float) for f in _constant_floats)
188
189_constant_strings = {
190 # strings which can be interpreted as code / logic
191 "undefined",
192 "null",
193 "NULL",
194 "nil",
195 "NIL",
196 "true",
197 "false",
198 "True",
199 "False",
200 "TRUE",
201 "FALSE",
202 "None",
203 "none",
204 "if",
205 "then",
206 "else",
207 "__dict__",
208 "__proto__", # javascript
209 # strings which can be interpreted as a number
210 "0",
211 "1e100",
212 "0..0",
213 "0/0",
214 "1/0",
215 "+0.0",
216 "Infinity",
217 "-Infinity",
218 "Inf",
219 "INF",
220 "NaN",
221 "9" * 30,
222 # common ascii characters
223 ",./;'[]\\-=<>?:\"{}|_+!@#$%^&*()`~",
224 # common unicode characters
225 "Ω≈ç√∫˜µ≤≥÷åß∂ƒ©˙∆˚¬…æœ∑´®†¥¨ˆøπ“‘¡™£¢∞§¶•ªº–≠¸˛Ç◊ı˜Â¯˘¿ÅÍÎÏ˝ÓÔÒÚÆ☃Œ„´‰ˇÁ¨ˆØ∏”’`⁄€‹›fifl‡°·‚—±",
226 # characters which increase in length when lowercased
227 "Ⱥ",
228 "Ⱦ",
229 # ligatures
230 "æœÆŒffʤʨß",
231 # emoticons
232 "(╯°□°)╯︵ ┻━┻)",
233 # emojis
234 "😍",
235 "🇺🇸",
236 # emoji modifiers
237 "🏻", # U+1F3FB Light Skin Tone,
238 "👍🏻", # 👍 followed by U+1F3FB
239 # RTL text
240 "الكل في المجمو عة",
241 # Ogham text, which contains the only character in the Space Separators
242 # unicode category (Zs) that isn't visually blank: . # noqa: RUF003
243 "᚛ᚄᚓᚐᚋᚒᚄ ᚑᚄᚂᚑᚏᚅ᚜",
244 # thai consonant + spacing vowel combinations, which have unusual visual combining behavior
245 "กา",
246 "ก ำกำ",
247 # readable variations on text (bolt/italic/script)
248 "𝐓𝐡𝐞 𝐪𝐮𝐢𝐜𝐤 𝐛𝐫𝐨𝐰𝐧 𝐟𝐨𝐱 𝐣𝐮𝐦𝐩𝐬 𝐨𝐯𝐞𝐫 𝐭𝐡𝐞 𝐥𝐚𝐳𝐲 𝐝𝐨𝐠",
249 "𝕿𝖍𝖊 𝖖𝖚𝖎𝖈𝖐 𝖇𝖗𝖔𝖜𝖓 𝖋𝖔𝖝 𝖏𝖚𝖒𝖕𝖘 𝖔𝖛𝖊𝖗 𝖙𝖍𝖊 𝖑𝖆𝖟𝖞 𝖉𝖔𝖌",
250 "𝑻𝒉𝒆 𝒒𝒖𝒊𝒄𝒌 𝒃𝒓𝒐𝒘𝒏 𝒇𝒐𝒙 𝒋𝒖𝒎𝒑𝒔 𝒐𝒗𝒆𝒓 𝒕𝒉𝒆 𝒍𝒂𝒛𝒚 𝒅𝒐𝒈",
251 "𝓣𝓱𝓮 𝓺𝓾𝓲𝓬𝓴 𝓫𝓻𝓸𝔀𝓷 𝓯𝓸𝔁 𝓳𝓾𝓶𝓹𝓼 𝓸𝓿𝓮𝓻 𝓽𝓱𝓮 𝓵𝓪𝔃𝔂 𝓭𝓸𝓰",
252 "𝕋𝕙𝕖 𝕢𝕦𝕚𝕔𝕜 𝕓𝕣𝕠𝕨𝕟 𝕗𝕠𝕩 𝕛𝕦𝕞𝕡𝕤 𝕠𝕧𝕖𝕣 𝕥𝕙𝕖 𝕝𝕒𝕫𝕪 𝕕𝕠𝕘",
253 # upsidown text
254 "ʇǝɯɐ ʇᴉs ɹolop ɯnsdᴉ ɯǝɹo˥",
255 # reserved strings in windows
256 "NUL",
257 "COM1",
258 "LPT1",
259 # scunthorpe problem
260 "Scunthorpe",
261 # zalgo text
262 "Ṱ̺̺̕o͞ ̷i̲̬͇̪͙n̝̗͕v̟̜̘̦͟o̶̙̰̠kè͚̮̺̪̹̱̤ ̖t̝͕̳̣̻̪͞h̼͓̲̦̳̘̲e͇̣̰̦̬͎ ̢̼̻̱̘h͚͎͙̜̣̲ͅi̦̲̣̰̤v̻͍e̺̭̳̪̰-m̢iͅn̖̺̞̲̯̰d̵̼̟͙̩̼̘̳ ̞̥̱̳̭r̛̗̘e͙p͠r̼̞̻̭̗e̺̠̣͟s̘͇̳͍̝͉e͉̥̯̞̲͚̬͜ǹ̬͎͎̟̖͇̤t͍̬̤͓̼̭͘ͅi̪̱n͠g̴͉ ͏͉ͅc̬̟h͡a̫̻̯͘o̫̟̖͍̙̝͉s̗̦̲.̨̹͈̣",
263 #
264 # examples from https://faultlore.com/blah/text-hates-you/
265 "मनीष منش",
266 "पन्ह पन्ह त्र र्च कृकृ ड्ड न्हृे إلا بسم الله",
267 "lorem لا بسم الله ipsum 你好1234你好",
268 # unicode charaacters causing unconditional line breaks, as defined by UAX #14:
269 # https://www.unicode.org/reports/tr14/.
270 #
271 # We've seen multiple bugs caused by assuming `str.splitlines` is equivalent to
272 # splitting over "\n", while it actually splits over all line breaks!
273 #
274 # We intersperse the line breaks with normal characters to increase the likelihood
275 # of triggering such a bug.
276 (
277 "a"
278 "\u000a" # line feed (class: LF)
279 "b"
280 "\u000d" # carriage return (class: CR)
281 "c"
282 "\u0085" # next line (class: NL)
283 "d"
284 "\u000b" # line tabulation (class: BK)
285 "e"
286 "\u000c" # form feed (class: BK)
287 "f"
288 "\u2028" # line separator (class: BK)
289 "g"
290 "\u2029" # paragraph separator (class: BK)
291 "h"
292 "\u000d\u000a" # CR+LF
293 "i"
294 ),
295}
296
297
298# we don't actually care what order the constants are sorted in, just that the
299# ordering is deterministic.
300GLOBAL_CONSTANTS = Constants(
301 integers=SortedSet(_constants_integers),
302 floats=SortedSet(_constant_floats, key=float_to_int),
303 bytes=SortedSet(),
304 strings=SortedSet(_constant_strings),
305)
306
307_local_constants = Constants(
308 integers=SortedSet(),
309 floats=SortedSet(key=float_to_int),
310 bytes=SortedSet(),
311 strings=SortedSet(),
312)
313# modules that we've already seen and processed for local constants. These are
314# are all modules, not necessarily local ones. This lets us quickly see which
315# modules are new without an expensive path.resolve() or is_local_module_file
316# cache lookup.
317# We track by module object when hashable, falling back to the module name
318# (str key in sys.modules) for unhashable entries like SimpleNamespace.
319_seen_modules: set = set()
320_sys_modules_len: int | None = None
321
322
323def _get_local_constants() -> Constants:
324 global _sys_modules_len, _local_constants
325
326 if sys.platform == "emscripten": # pragma: no cover
327 # pyodide builds bundle the stdlib in a nonstandard location, like
328 # `/lib/python312.zip/heapq.py`. To avoid identifying the entirety of
329 # the stdlib as local code and slowing down on emscripten, instead return
330 # that nothing is local.
331 #
332 # pyodide may provide some way to distinguish stdlib/third-party/local
333 # code. I haven't looked into it. If they do, we should correctly implement
334 # ModuleLocation for pyodide instead of this.
335 return _local_constants
336
337 count_constants = len(_local_constants)
338 # We call this function once per HypothesisProvider instance, i.e. once per
339 # input, so it needs to be performant. The logic here is more complicated
340 # than necessary because of this.
341 #
342 # First, we check whether there are any new modules with a very cheap length
343 # check. This check can be fooled if a module is added while another module is
344 # removed, but the more correct check against tuple(sys.modules.keys()) is
345 # substantially more expensive. Such a new module would eventually be discovered
346 # if / when the length changes again in the future.
347 #
348 # If the length has changed, we find just modules we haven't seen before. Of
349 # those, we find the ones which correspond to local modules, and extract their
350 # constants.
351
352 # careful: store sys.modules length when we first check to avoid race conditions
353 # with other threads loading a module before we set _sys_modules_len.
354 if (sys_modules_len := len(sys.modules)) != _sys_modules_len:
355 new_modules = []
356 for name, module in list(sys.modules.items()):
357 try:
358 seen = module in _seen_modules
359 except TypeError:
360 # unhashable module (e.g. SimpleNamespace); fall back to name
361 seen = name in _seen_modules
362 if not seen:
363 new_modules.append((name, module))
364 # Repeated SortedSet unions are expensive. Do the initial unions on a
365 # set(), then do a one-time union with _local_constants after.
366 new_constants = Constants()
367 for name, module in new_modules:
368 if (
369 module_file := getattr(module, "__file__", None)
370 ) is not None and is_local_module_file(module_file):
371 new_constants |= constants_from_module(module)
372 try:
373 _seen_modules.add(module)
374 except TypeError:
375 _seen_modules.add(name)
376 _local_constants |= new_constants
377 _sys_modules_len = sys_modules_len
378
379 # if we add any new constant, invalidate the constant cache for permitted values.
380 # A more efficient approach would be invalidating just the keys with this
381 # choice_type.
382 if len(_local_constants) > count_constants:
383 CONSTANTS_CACHE.cache.clear()
384
385 return _local_constants
386
387
388@contextmanager
389def with_register_backend(name, provider_cls):
390 try:
391 AVAILABLE_PROVIDERS[name] = provider_cls
392 yield
393 finally:
394 del AVAILABLE_PROVIDERS[name]
395
396
397class _BackendInfoMsg(TypedDict):
398 type: InfoObservationType
399 title: str
400 content: str | dict[str, Any]
401
402
403# TODO_DOCS: link to choice sequence explanation page
404
405
406class PrimitiveProvider(abc.ABC):
407 """
408 |PrimitiveProvider| is the implementation interface of a
409 :ref:`Hypothesis backend <alternative-backends>`.
410
411 A |PrimitiveProvider| is required to implement the following five
412 ``draw_*`` methods:
413
414 * |PrimitiveProvider.draw_integer|
415 * |PrimitiveProvider.draw_boolean|
416 * |PrimitiveProvider.draw_float|
417 * |PrimitiveProvider.draw_string|
418 * |PrimitiveProvider.draw_bytes|
419
420 Each strategy in Hypothesis generates values by drawing a series of choices
421 from these five methods. By overriding them, a |PrimitiveProvider| can control
422 the distribution of inputs generated by Hypothesis.
423
424 For example, :pypi:`hypothesis-crosshair` implements a |PrimitiveProvider|
425 which uses an SMT solver to generate inputs that uncover new branches.
426
427 Once you implement a |PrimitiveProvider|, you can make it available for use
428 through |AVAILABLE_PROVIDERS|.
429 """
430
431 #: The lifetime of a |PrimitiveProvider| instance. Either ``test_function``
432 #: or ``test_case``.
433 #:
434 #: If ``test_function`` (the default), a single provider instance will be
435 #: instantiated and used for the entirety of each test function (i.e., roughly
436 #: one provider per |@given| annotation). This can be useful for tracking state
437 #: over the entirety of a test function.
438 #:
439 #: If ``test_case``, a new provider instance will be instantiated and used for
440 #: each input Hypothesis generates.
441 #:
442 #: The ``conjecturedata`` argument to ``PrimitiveProvider.__init__`` will
443 #: be ``None`` for a lifetime of ``test_function``, and an instance of
444 #: ``ConjectureData`` for a lifetime of ``test_case``.
445 #:
446 #: Third-party providers likely want to set a lifetime of ``test_function``.
447 lifetime: ClassVar[LifetimeT] = "test_function"
448
449 #: Solver-based backends such as ``hypothesis-crosshair`` use symbolic values
450 #: which record operations performed on them in order to discover new paths.
451 #: If ``avoid_realization`` is set to ``True``, hypothesis will avoid interacting
452 #: with symbolic choices returned by the provider in any way that would force
453 #: the solver to narrow the range of possible values for that symbolic.
454 #:
455 #: Setting this to ``True`` disables some hypothesis features and optimizations.
456 #: Only set this to ``True`` if it is necessary for your backend.
457 avoid_realization: ClassVar[bool] = False
458
459 #: If ``True``, |PrimitiveProvider.on_observation| will be added as a
460 #: callback via |add_observability_callback|, enabling observability during
461 # the lifetime of this provider. If ``False``, |PrimitiveProvider.on_observation|
462 #: will never be called by Hypothesis.
463 #:
464 #: The opt-in behavior of observability is because enabling observability
465 #: might increase runtime or memory usage.
466 add_observability_callback: ClassVar[bool] = False
467
468 def __init__(self, conjecturedata: Optional["ConjectureData"], /) -> None:
469 self._cd = conjecturedata
470
471 @abc.abstractmethod
472 def draw_boolean(
473 self,
474 p: float = 0.5,
475 ) -> bool:
476 """
477 Draw a boolean choice.
478
479 Parameters
480 ----------
481 p: float
482 The probability of returning ``True``. Between 0 and 1 inclusive.
483
484 Except for ``0`` and ``1``, the value of ``p`` is a hint provided by
485 Hypothesis, and may be ignored by the backend.
486
487 If ``0``, the provider must return ``False``. If ``1``, the provider
488 must return ``True``.
489 """
490 raise NotImplementedError
491
492 @abc.abstractmethod
493 def draw_integer(
494 self,
495 min_value: int | None = None,
496 max_value: int | None = None,
497 *,
498 weights: dict[int, float] | None = None,
499 shrink_towards: int = 0,
500 ) -> int:
501 """
502 Draw an integer choice.
503
504 Parameters
505 ----------
506 min_value : int | None
507 (Inclusive) lower bound on the integer value. If ``None``, there is
508 no lower bound.
509 max_value : int | None
510 (Inclusive) upper bound on the integer value. If ``None``, there is
511 no upper bound.
512 weights: dict[int, float] | None
513 Maps keys in the range [``min_value``, ``max_value``] to the probability
514 of returning that key.
515 shrink_towards: int
516 The integer to shrink towards. This is not used during generation and
517 can be ignored by backends.
518 """
519 raise NotImplementedError
520
521 @abc.abstractmethod
522 def draw_float(
523 self,
524 *,
525 min_value: float = -math.inf,
526 max_value: float = math.inf,
527 allow_nan: bool = True,
528 smallest_nonzero_magnitude: float,
529 ) -> float:
530 """
531 Draw a float choice.
532
533 Parameters
534 ----------
535 min_value : float
536 (Inclusive) lower bound on the float value.
537 max_value : float
538 (Inclusive) upper bound on the float value.
539 allow_nan : bool
540 If ``False``, it is invalid to return ``math.nan``.
541 smallest_nonzero_magnitude : float
542 The smallest allowed nonzero magnitude. ``draw_float`` should not
543 return a float ``f`` if ``abs(f) < smallest_nonzero_magnitude``.
544 """
545 raise NotImplementedError
546
547 @abc.abstractmethod
548 def draw_string(
549 self,
550 intervals: IntervalSet,
551 *,
552 min_size: int = 0,
553 max_size: int = COLLECTION_DEFAULT_MAX_SIZE,
554 ) -> str:
555 """
556 Draw a string choice.
557
558 Parameters
559 ----------
560 intervals : IntervalSet
561 The set of codepoints to sample from.
562 min_size : int
563 (Inclusive) lower bound on the string length.
564 max_size : int
565 (Inclusive) upper bound on the string length.
566 """
567 raise NotImplementedError
568
569 @abc.abstractmethod
570 def draw_bytes(
571 self,
572 min_size: int = 0,
573 max_size: int = COLLECTION_DEFAULT_MAX_SIZE,
574 ) -> bytes:
575 """
576 Draw a bytes choice.
577
578 Parameters
579 ----------
580 min_size : int
581 (Inclusive) lower bound on the bytes length.
582 max_size : int
583 (Inclusive) upper bound on the bytes length.
584 """
585 raise NotImplementedError
586
587 def per_test_case_context_manager(self) -> AbstractContextManager:
588 """
589 Returns a context manager which will be entered each time Hypothesis
590 starts generating and executing one test case, and exited when that test
591 case finishes generating and executing, including if any exception is
592 thrown.
593
594 In the lifecycle of a Hypothesis test, this is called before
595 generating strategy values for each test case. This is just before any
596 :ref:`custom executor <custom-function-execution>` is called.
597
598 Even if not returning a custom context manager, |PrimitiveProvider|
599 subclasses are welcome to override this method to know when Hypothesis
600 starts and ends the execution of a single test case.
601 """
602 return contextlib.nullcontext()
603
604 def realize(self, value: T, *, for_failure: bool = False) -> T:
605 """
606 Called whenever hypothesis requires a concrete (non-symbolic) value from
607 a potentially symbolic value. Hypothesis will not check that ``value`` is
608 symbolic before calling ``realize``, so you should handle the case where
609 ``value`` is non-symbolic.
610
611 The returned value should be non-symbolic. If you cannot provide a value,
612 raise |BackendCannotProceed| with a value of ``"discard_test_case"``.
613
614 If ``for_failure`` is ``True``, the value is associated with a failing example.
615 In this case, the backend should spend substantially more effort when
616 attempting to realize the value, since it is important to avoid discarding
617 failing examples. Backends may still raise |BackendCannotProceed| when
618 ``for_failure`` is ``True``, if realization is truly impossible or if
619 realization takes significantly longer than expected (say, 5 minutes).
620 """
621 return value
622
623 def replay_choices(self, choices: tuple[ChoiceT, ...]) -> None:
624 """
625 Called when Hypothesis has discovered a choice sequence which the provider
626 may wish to enqueue to replay under its own instrumentation when we next
627 ask to generate a test case, rather than generating one from scratch.
628
629 This is used to e.g. warm-start :pypi:`hypothesis-crosshair` with a corpus
630 of high-code-coverage inputs discovered by
631 `HypoFuzz <https://hypofuzz.com/>`_.
632 """
633 return None
634
635 def observe_test_case(self) -> dict[str, Any]:
636 """Called at the end of the test case when :ref:`observability
637 <observability>` is enabled.
638
639 The return value should be a non-symbolic json-encodable dictionary,
640 and will be included in observations as ``observation["metadata"]["backend"]``.
641 """
642 return {}
643
644 def observe_information_messages(
645 self, *, lifetime: LifetimeT
646 ) -> Iterable[_BackendInfoMsg]:
647 """Called at the end of each test case and again at end of the test function.
648
649 Return an iterable of ``{type: info/alert/error, title: str, content: str | dict}``
650 dictionaries to be delivered as individual information messages. Hypothesis
651 adds the ``run_start`` timestamp and ``property`` name for you.
652 """
653 assert lifetime in ("test_case", "test_function")
654 yield from []
655
656 def on_observation(self, observation: TestCaseObservation) -> None: # noqa: B027
657 """
658 Called at the end of each test case which uses this provider, with the same
659 ``observation["type"] == "test_case"`` observation that is passed to
660 other callbacks added via |add_observability_callback|. This method is not
661 called with ``observation["type"] in {"info", "alert", "error"}``
662 observations.
663
664 .. important::
665
666 For |PrimitiveProvider.on_observation| to be called by Hypothesis,
667 |PrimitiveProvider.add_observability_callback| must be set to ``True``.
668
669 |PrimitiveProvider.on_observation| is explicitly opt-in, as enabling
670 observability might increase runtime or memory usage.
671
672 Calls to this method are guaranteed to alternate with calls to
673 |PrimitiveProvider.per_test_case_context_manager|. For example:
674
675 .. code-block:: python
676
677 # test function starts
678 per_test_case_context_manager()
679 on_observation()
680 per_test_case_context_manager()
681 on_observation()
682 ...
683 # test function ends
684
685 Note that |PrimitiveProvider.on_observation| will not be called for test
686 cases which did not use this provider during generation, for example
687 during |Phase.reuse| or |Phase.shrink|, or because Hypothesis switched
688 to the standard Hypothesis backend after this backend raised too many
689 |BackendCannotProceed| exceptions.
690 """
691
692 def span_start(self, label: int, /) -> None: # noqa: B027 # non-abstract noop
693 """Marks the beginning of a semantically meaningful span of choices.
694
695 Spans are a depth-first tree structure. A span is opened by a call to
696 |PrimitiveProvider.span_start|, and a call to |PrimitiveProvider.span_end|
697 closes the most recently opened span. So the following sequence of calls:
698
699 .. code-block:: python
700
701 span_start(label=1)
702 n1 = draw_integer()
703 span_start(label=2)
704 b1 = draw_boolean()
705 n2 = draw_integer()
706 span_end()
707 f1 = draw_float()
708 span_end()
709
710 produces the following two spans of choices:
711
712 .. code-block::
713
714 1: [n1, b1, n2, f1]
715 2: [b1, n2]
716
717 Hypothesis uses spans to denote "semantically meaningful" sequences of
718 choices. For instance, Hypothesis opens a span for the sequence of choices
719 made while drawing from each strategy. Not every span corresponds to a
720 strategy; the generation of e.g. each element in |st.lists| is also marked
721 with a span, among others.
722
723 ``label`` is an opaque integer, which has no defined semantics.
724 The only guarantee made by Hypothesis is that all spans with the same
725 "meaning" will share the same ``label``. So all spans from the same
726 strategy will share the same label, as will e.g. the spans for |st.lists|
727 elements.
728
729 Providers can track calls to |PrimitiveProvider.span_start| and
730 |PrimitiveProvider.span_end| to learn something about the semantics of
731 the test's choice sequence. For instance, a provider could track the depth
732 of the span tree, or the number of unique labels, which says something about
733 the complexity of the choices being generated. Or a provider could track
734 the span tree across test cases in order to determine what strategies are
735 being used in what contexts.
736
737 It is possible for Hypothesis to start and immediately stop a span,
738 without calling a ``draw_*`` method in between. These spans contain zero
739 choices.
740
741 Hypothesis will always balance the number of calls to
742 |PrimitiveProvider.span_start| and |PrimitiveProvider.span_end|. A call
743 to |PrimitiveProvider.span_start| will always be followed by a call to
744 |PrimitiveProvider.span_end| before the end of the test case.
745
746 |PrimitiveProvider.span_start| is called from ``ConjectureData.start_span()``
747 internally.
748 """
749
750 def span_end(self, discard: bool, /) -> None: # noqa: B027
751 """Marks the end of a semantically meaningful span of choices.
752
753 ``discard`` is ``True`` when the draw was filtered out or otherwise marked
754 as unlikely to contribute to the input data as seen by the user's test.
755 Note however that side effects can make this determination unsound.
756
757 |PrimitiveProvider.span_end| is called from ``ConjectureData.stop_span()``
758 internally.
759 """
760
761
762class HypothesisProvider(PrimitiveProvider):
763 lifetime = "test_case"
764
765 def __init__(self, conjecturedata: Optional["ConjectureData"], /):
766 super().__init__(conjecturedata)
767 self._random = None if self._cd is None else self._cd._random
768
769 @cached_property
770 def _local_constants(self):
771 # defer computation of local constants until/if we need it
772 return _get_local_constants()
773
774 def _maybe_draw_constant(
775 self,
776 choice_type: ChoiceTypeT,
777 constraints: ChoiceConstraintsT,
778 *,
779 p: float = 0.05,
780 ) -> Optional["ConstantT"]:
781 assert self._random is not None
782 assert choice_type != "boolean"
783 # check whether we even want a constant before spending time computing
784 # and caching the allowed constants.
785 if self._random.random() > p:
786 return None
787
788 # note: this property access results in computation being done
789 assert self._local_constants is not None
790
791 key = (choice_type, choice_constraints_key(choice_type, constraints))
792 if key not in CONSTANTS_CACHE:
793 CONSTANTS_CACHE[key] = (
794 tuple(
795 choice
796 for choice in GLOBAL_CONSTANTS.set_for_type(choice_type)
797 if choice_permitted(choice, constraints)
798 ),
799 tuple(
800 choice
801 for choice in self._local_constants.set_for_type(choice_type)
802 if choice_permitted(choice, constraints)
803 ),
804 )
805
806 # split constants into two pools, so we still have a good chance to draw
807 # global constants even if there are many local constants.
808 global_constants, local_constants = CONSTANTS_CACHE[key]
809 constants_lists = ([global_constants] if global_constants else []) + (
810 [local_constants] if local_constants else []
811 )
812 if not constants_lists:
813 return None
814
815 # At this point, we've decided to use a constant. Now we select which pool
816 # to draw that constant from.
817 #
818 # Note that this approach has a different probability distribution than
819 # attempting a random.random for both global_constants and local_constants.
820 constants = self._random.choice(constants_lists)
821 return self._random.choice(constants)
822
823 def draw_boolean(
824 self,
825 p: float = 0.5,
826 ) -> bool:
827 assert self._random is not None
828
829 if p <= 0:
830 return False
831 if p >= 1:
832 return True
833
834 return self._random.random() < p
835
836 def draw_integer(
837 self,
838 min_value: int | None = None,
839 max_value: int | None = None,
840 *,
841 weights: dict[int, float] | None = None,
842 shrink_towards: int = 0,
843 ) -> int:
844 assert self._cd is not None
845 if (
846 constant := self._maybe_draw_constant(
847 "integer",
848 {
849 "min_value": min_value,
850 "max_value": max_value,
851 "weights": weights,
852 "shrink_towards": shrink_towards,
853 },
854 )
855 ) is not None:
856 assert isinstance(constant, int)
857 return constant
858
859 if weights is not None:
860 assert min_value is not None
861 assert max_value is not None
862
863 # format of weights is a mapping of ints to p, where sum(p) < 1.
864 # The remaining probability mass is uniformly distributed over
865 # *all* ints (not just the unmapped ones; this is somewhat undesirable,
866 # but simplifies things).
867 #
868 # We assert that sum(p) is strictly less than 1 because it simplifies
869 # handling forced values when we can force into the unmapped probability
870 # mass. We should eventually remove this restriction.
871 sampler = Sampler(
872 [1 - sum(weights.values()), *weights.values()], observe=False
873 )
874 # if we're forcing, it's easiest to force into the unmapped probability
875 # mass and then force the drawn value after.
876 idx = sampler.sample(self._cd)
877
878 if idx == 0:
879 return self._draw_integer_from_distribution(min_value, max_value)
880 # implicit reliance on dicts being sorted for determinism
881 return list(weights)[idx - 1]
882
883 return self._draw_integer_from_distribution(min_value, max_value)
884
885 def _draw_integer_from_distribution(
886 self, min_value: int | None, max_value: int | None
887 ) -> int:
888 assert self._random is not None
889 dist = INTEGERS_DISTRIBUTION
890
891 # Our integers distribution is defined over the full float64 range. If the user
892 # has not placed a lower and/or upper bound, we don't actually want to sample
893 # from this full range, because we might otherwise generate integers
894 # that are so large as to cause problems. For example python errors when passing
895 # a too-large integer to c APIs (common and implicit in some python stdlib APIs).
896 #
897 # * If the user asks for an unbounded range, we bound at [-2**128, 2**128].
898 # * If the user asks for a half-bounded range starting at n, we bound at
899 # [n, 2**128] for n < 2**127, and [n, 2n] otherwise, so we always support
900 # generating large integers if a user explicitly asks for integers around that
901 # magnitude. The choice of a factor of two here is arbitrary.
902 if min_value is None and max_value is None:
903 min_value = -(2**128)
904 max_value = 2**128
905 elif min_value is None:
906 assert max_value is not None
907 min_value = -max(2**128, 2 * abs(max_value))
908 elif max_value is None:
909 max_value = max(2**128, 2 * abs(min_value))
910
911 lo = 0.0
912 hi = 1.0
913 safe_bounds = True
914 try:
915 cdf_min = min_value - 0.5
916 cdf_max = max_value + 0.5
917 except OverflowError:
918 safe_bounds = False
919
920 if safe_bounds:
921 lo = dist.cdf(cdf_min)
922 hi = dist.cdf(cdf_max)
923
924 # if the bounds in CDF-space are too close together, there isn't enough room
925 # to stably sample between hi and lo. For example, in the extreme case of
926 # hi = lo (which can happen even if min_value != max_value due to either float
927 # imprecision or numerical instability), our sampling algorithm would collapse
928 # to always return the sample value.
929 #
930 # The choice of 1e-13 here is somewhat arbitrary. At 1.0, epsilon in float64
931 # is 2^-53 ~= 1e-16. We want some breathing room from epsilon, because we need
932 # some variation to actually sample in. I haven't put much thought into where
933 # the correct tradeoff lies. The downside risk to choosing a too-aggressive
934 # bound is some integers may literally not be representable in the `hi - lo`
935 # sampling space. The downside risk to a too-conservative bound is switching
936 # off our good distribution too early.
937 if hi - lo < 1e-13:
938 safe_bounds = False
939
940 if safe_bounds:
941 # inverse_cdf requires strictly 0 < p < 1. Resample until we get it.
942 while (p := lo + self._random.random() * (hi - lo)) in {0, 1}:
943 pass # pragma: no cover
944 n = round(dist.inverse_cdf(p))
945 # As an extra measure of safety, clamp our generated value to the requested
946 # range. It would of course be nice if we provably satisfied this by
947 # construction - I'm just not 100% confident that we do.
948 return clamp(min_value, n, max_value)
949
950 # We have determined the bounds [min_value, max_value] are not numerically safe
951 # to use. Fall back to a simpler and safer distribution: a uniform distribution
952 # on [min_value, max_value].
953 return self._random.randint(min_value, max_value)
954
955 def draw_float(
956 self,
957 *,
958 min_value: float = -math.inf,
959 max_value: float = math.inf,
960 allow_nan: bool = True,
961 smallest_nonzero_magnitude: float,
962 ) -> float:
963 assert self._random is not None
964
965 constraints: FloatConstraints = {
966 "min_value": min_value,
967 "max_value": max_value,
968 "allow_nan": allow_nan,
969 "smallest_nonzero_magnitude": smallest_nonzero_magnitude,
970 }
971 if (
972 constant := self._maybe_draw_constant("float", constraints, p=0.15)
973 ) is not None:
974 assert isinstance(constant, float)
975 return constant
976
977 # on top of the probability to draw a constant float, we independently
978 # upweight 0.0/-0.0, math.inf, -math.inf, nans, and boundary values.
979 weird_floats = [
980 f
981 for f in [
982 0.0,
983 -0.0,
984 math.inf,
985 -math.inf,
986 math.nan,
987 -math.nan,
988 SIGNALING_NAN,
989 -SIGNALING_NAN,
990 min_value,
991 next_up(min_value),
992 min_value + 1,
993 max_value - 1,
994 next_down(max_value),
995 max_value,
996 ]
997 if choice_permitted(f, constraints)
998 ]
999
1000 if weird_floats and self._random.random() < 0.05:
1001 return self._random.choice(weird_floats)
1002
1003 clamper = make_float_clamper(
1004 min_value,
1005 max_value,
1006 smallest_nonzero_magnitude=smallest_nonzero_magnitude,
1007 allow_nan=allow_nan,
1008 )
1009
1010 result = self._draw_float()
1011 if allow_nan and math.isnan(result):
1012 clamped = result # pragma: no cover
1013 else:
1014 clamped = clamper(result)
1015 if float_to_int(clamped) != float_to_int(result) and not (
1016 math.isnan(result) and allow_nan
1017 ):
1018 result = clamped
1019 return result
1020
1021 def draw_string(
1022 self,
1023 intervals: IntervalSet,
1024 *,
1025 min_size: int = 0,
1026 max_size: int = COLLECTION_DEFAULT_MAX_SIZE,
1027 ) -> str:
1028 assert self._cd is not None
1029 assert self._random is not None
1030
1031 if len(intervals) == 0:
1032 return ""
1033
1034 if (
1035 constant := self._maybe_draw_constant(
1036 "string",
1037 {"intervals": intervals, "min_size": min_size, "max_size": max_size},
1038 )
1039 ) is not None:
1040 assert isinstance(constant, str)
1041 return constant
1042
1043 average_size = min(
1044 max(min_size * 2, min_size + 5),
1045 0.5 * (min_size + max_size),
1046 )
1047
1048 chars = []
1049 elements = many(
1050 self._cd,
1051 min_size=min_size,
1052 max_size=max_size,
1053 average_size=average_size,
1054 observe=False,
1055 )
1056 while elements.more():
1057 if len(intervals) > 256:
1058 if self.draw_boolean(0.2):
1059 i = self._random.randint(256, len(intervals) - 1)
1060 else:
1061 i = self._random.randint(0, 255)
1062 else:
1063 i = self._random.randint(0, len(intervals) - 1)
1064
1065 chars.append(intervals.char_in_shrink_order(i))
1066
1067 return "".join(chars)
1068
1069 def draw_bytes(
1070 self,
1071 min_size: int = 0,
1072 max_size: int = COLLECTION_DEFAULT_MAX_SIZE,
1073 ) -> bytes:
1074 assert self._cd is not None
1075 assert self._random is not None
1076
1077 if (
1078 constant := self._maybe_draw_constant(
1079 "bytes", {"min_size": min_size, "max_size": max_size}
1080 )
1081 ) is not None:
1082 assert isinstance(constant, bytes)
1083 return constant
1084
1085 buf = bytearray()
1086 average_size = min(
1087 max(min_size * 2, min_size + 5),
1088 0.5 * (min_size + max_size),
1089 )
1090 elements = many(
1091 self._cd,
1092 min_size=min_size,
1093 max_size=max_size,
1094 average_size=average_size,
1095 observe=False,
1096 )
1097 while elements.more():
1098 buf += self._random.randbytes(1)
1099
1100 return bytes(buf)
1101
1102 def _draw_float(self) -> float:
1103 assert self._random is not None
1104
1105 f = lex_to_float(self._random.getrandbits(64))
1106 sign = 1 if self._random.getrandbits(1) else -1
1107 return sign * f
1108
1109
1110# Masks for masking off the first byte of an n-bit buffer.
1111# The appropriate mask is stored at position n % 8.
1112BYTE_MASKS = [(1 << n) - 1 for n in range(8)]
1113BYTE_MASKS[0] = 255
1114
1115
1116class BytestringProvider(PrimitiveProvider):
1117 lifetime = "test_case"
1118
1119 def __init__(
1120 self, conjecturedata: Optional["ConjectureData"], /, *, bytestring: bytes
1121 ):
1122 super().__init__(conjecturedata)
1123 self.bytestring = bytestring
1124 self.index = 0
1125 self.drawn = bytearray()
1126
1127 def _draw_bits(self, n):
1128 if n == 0: # pragma: no cover
1129 return 0
1130 n_bytes = bits_to_bytes(n)
1131 if self.index + n_bytes > len(self.bytestring):
1132 self._cd.mark_overrun()
1133 buf = bytearray(self.bytestring[self.index : self.index + n_bytes])
1134 self.index += n_bytes
1135
1136 buf[0] &= BYTE_MASKS[n % 8]
1137 buf = bytes(buf)
1138 self.drawn += buf
1139 return int_from_bytes(buf)
1140
1141 def draw_boolean(
1142 self,
1143 p: float = 0.5,
1144 ) -> bool:
1145 if p <= 0:
1146 return False
1147 if p >= 1:
1148 return True
1149
1150 # always use one byte for booleans to maintain constant draw size.
1151 # If a probability requires more than 8 bits to represent precisely,
1152 # the result will be slightly biased, but not badly.
1153 bits = 8
1154 size = 2**bits
1155 # always leave at least one value that can be true, even for very small
1156 # p.
1157 falsey = max(1, math.floor(size * (1 - p)))
1158 n = self._draw_bits(bits)
1159 return n >= falsey
1160
1161 def draw_integer(
1162 self,
1163 min_value: int | None = None,
1164 max_value: int | None = None,
1165 *,
1166 weights: dict[int, float] | None = None,
1167 shrink_towards: int = 0,
1168 ) -> int:
1169 assert self._cd is not None
1170
1171 # we explicitly ignore integer weights for now, as they are likely net
1172 # negative on fuzzer performance.
1173
1174 if min_value is None and max_value is None:
1175 min_value = -(2**127)
1176 max_value = 2**127 - 1
1177 elif min_value is None:
1178 assert max_value is not None
1179 min_value = max_value - 2**64
1180 elif max_value is None:
1181 assert min_value is not None
1182 max_value = min_value + 2**64
1183
1184 if min_value == max_value:
1185 return min_value
1186
1187 bits = (max_value - min_value).bit_length()
1188 value = self._draw_bits(bits)
1189 while not (min_value <= value <= max_value):
1190 value = self._draw_bits(bits)
1191 return value
1192
1193 def draw_float(
1194 self,
1195 *,
1196 min_value: float = -math.inf,
1197 max_value: float = math.inf,
1198 allow_nan: bool = True,
1199 smallest_nonzero_magnitude: float,
1200 ) -> float:
1201 n = self._draw_bits(64)
1202 sign = -1 if n >> 64 else 1
1203 f = sign * lex_to_float(n & ((1 << 64) - 1))
1204 clamper = make_float_clamper(
1205 min_value,
1206 max_value,
1207 smallest_nonzero_magnitude=smallest_nonzero_magnitude,
1208 allow_nan=allow_nan,
1209 )
1210 return clamper(f)
1211
1212 def _draw_collection(self, min_size, max_size, *, alphabet_size):
1213 average_size = min(
1214 max(min_size * 2, min_size + 5),
1215 0.5 * (min_size + max_size),
1216 )
1217 elements = many(
1218 self._cd,
1219 min_size=min_size,
1220 max_size=max_size,
1221 average_size=average_size,
1222 observe=False,
1223 )
1224 values = []
1225 while elements.more():
1226 values.append(self.draw_integer(0, alphabet_size - 1))
1227 return values
1228
1229 def draw_string(
1230 self,
1231 intervals: IntervalSet,
1232 *,
1233 min_size: int = 0,
1234 max_size: int = COLLECTION_DEFAULT_MAX_SIZE,
1235 ) -> str:
1236 values = self._draw_collection(min_size, max_size, alphabet_size=len(intervals))
1237 return "".join(chr(intervals[v]) for v in values)
1238
1239 def draw_bytes(
1240 self,
1241 min_size: int = 0,
1242 max_size: int = COLLECTION_DEFAULT_MAX_SIZE,
1243 ) -> bytes:
1244 values = self._draw_collection(min_size, max_size, alphabet_size=2**8)
1245 return bytes(values)
1246
1247
1248class URandom(Random):
1249 # we reimplement a Random instance instead of using SystemRandom, because
1250 # os.urandom is not guaranteed to read from /dev/urandom.
1251
1252 @staticmethod
1253 def _urandom(size: int) -> bytes:
1254 # By default, we buffer more data from /dev/urandom than the actual `size` number
1255 # of bytes we requested. This trips up anyone (and particularly Antithesis)
1256 # hooking /dev/urandom reads to control randomness.
1257 #
1258 # buffering=0 disables this behavior.
1259 with open("/dev/urandom", "rb", buffering=0) as f:
1260 return f.read(size)
1261
1262 def getrandbits(self, k: int) -> int:
1263 assert k >= 0
1264 size = bits_to_bytes(k)
1265 n = int_from_bytes(self._urandom(size))
1266 # trim excess bits
1267 return n >> (size * 8 - k)
1268
1269 def random(self) -> float:
1270 # adapted from random.SystemRandom.random
1271 return (int_from_bytes(self._urandom(7)) >> 3) * (2**-53)
1272
1273
1274class URandomProvider(HypothesisProvider):
1275 # A provider which reads directly from /dev/urandom as its source of randomness.
1276 # This provider exists to provide better Hypothesis integration with Antithesis
1277 # (https://antithesis.com/), which interprets calls to /dev/urandom as the
1278 # randomness to mutate. This effectively gives Antithesis control over
1279 # the choices made by the URandomProvider.
1280 #
1281 # If you are not using Antithesis, you probably don't want to use this
1282 # provider.
1283
1284 def __init__(self, conjecturedata: Optional["ConjectureData"], /):
1285 super().__init__(conjecturedata)
1286 if WINDOWS: # pragma: no cover
1287 warnings.warn(
1288 "/dev/urandom is not available on windows. Falling back to "
1289 'standard PRNG generation (equivalent to backend="hypothesis").',
1290 HypothesisWarning,
1291 stacklevel=1,
1292 )
1293 # don't overwrite the HypothesisProvider self._random attribute in
1294 # this case
1295 else:
1296 self._random = URandom()