1# This file is part of Hypothesis, which may be found at
2# https://github.com/HypothesisWorks/hypothesis/
3#
4# Copyright the Hypothesis Authors.
5# Individual contributors are listed in AUTHORS.rst and the git log.
6#
7# This Source Code Form is subject to the terms of the Mozilla Public License,
8# v. 2.0. If a copy of the MPL was not distributed with this file, You can
9# obtain one at https://mozilla.org/MPL/2.0/.
10
11import abc
12import contextlib
13import math
14import sys
15import warnings
16from collections.abc import Iterable
17from contextlib import AbstractContextManager, contextmanager
18from functools import cached_property
19from random import Random
20from sys import float_info
21from typing import (
22 TYPE_CHECKING,
23 Any,
24 ClassVar,
25 Literal,
26 Optional,
27 TypeAlias,
28 TypedDict,
29 TypeVar,
30)
31
32from sortedcontainers import SortedSet
33
34from hypothesis.errors import HypothesisWarning
35from hypothesis.internal.cache import LRUCache
36from hypothesis.internal.compat import WINDOWS, int_from_bytes
37from hypothesis.internal.conjecture.choice import (
38 ChoiceConstraintsT,
39 ChoiceT,
40 ChoiceTypeT,
41 FloatConstraints,
42 choice_constraints_key,
43 choice_permitted,
44)
45from hypothesis.internal.conjecture.floats import lex_to_float
46from hypothesis.internal.conjecture.junkdrawer import bits_to_bytes
47from hypothesis.internal.conjecture.utils import (
48 Sampler,
49 many,
50)
51from hypothesis.internal.constants_ast import (
52 Constants,
53 constants_from_module,
54 is_local_module_file,
55)
56from hypothesis.internal.floats import (
57 SIGNALING_NAN,
58 clamp,
59 float_to_int,
60 make_float_clamper,
61 next_down,
62 next_up,
63)
64from hypothesis.internal.intervalsets import IntervalSet
65from hypothesis.internal.observability import InfoObservationType, TestCaseObservation
66from hypothesis.internal.statistics import (
67 LogStudentTDistribution,
68 PiecewiseDistribution,
69 UniformDistribution,
70)
71
72if TYPE_CHECKING:
73 from hypothesis.internal.conjecture.data import ConjectureData
74 from hypothesis.internal.constants_ast import ConstantT
75
76T = TypeVar("T")
77LifetimeT: TypeAlias = Literal["test_case", "test_function"]
78COLLECTION_DEFAULT_MAX_SIZE = 10**10 # "arbitrarily large"
79
80# see https://github.com/HypothesisWorks/hypothesis/pull/4728 for details
81INTEGERS_DISTRIBUTION = PiecewiseDistribution(
82 inner=UniformDistribution(half_width=256),
83 outer=LogStudentTDistribution(scale_bits=13, df=2),
84 switchover=256,
85)
86
87
88#: Registered Hypothesis backends. This is a dictionary where keys are the name
89#: to be used in |settings.backend|. The value of a key can be either:
90#:
91#: * A string corresponding to an importable absolute path of a
92#: |PrimitiveProvider| subclass
93#: * A |PrimitiveProvider| subclass (the class itself, not an instance of the
94#: class)
95#:
96#: Hypothesis will instantiate the corresponding |PrimitiveProvider| subclass
97#: when the backend is requested by a test's |settings.backend| value.
98#:
99#: For example, the default Hypothesis backend is registered as:
100#:
101#: .. code-block:: python
102#:
103#: from hypothesis.internal.conjecture.providers import AVAILABLE_PROVIDERS
104#:
105#: AVAILABLE_PROVIDERS["hypothesis"] = "hypothesis.internal.conjecture.providers.HypothesisProvider"
106#: # or
107#: AVAILABLE_PROVIDERS["hypothesis"] = HypothesisProvider
108#:
109#: And can be used with:
110#:
111#: .. code-block:: python
112#:
113#: from hypothesis import given, settings, strategies as st
114#:
115#: @given(st.integers())
116#: @settings(backend="hypothesis")
117#: def f(n):
118#: pass
119#:
120#: Though, as ``backend="hypothesis"`` is the default setting, the above would
121#: typically not have any effect.
122#:
123#: For third-party backend authors, we strongly encourage ensuring that
124#: ``import hypothesis`` does not automatically import the expensive parts of
125#: your package, by:
126#:
127#: - setting a string path here, instead of a provider class
128#: - ensuring the registered hypothesis plugin path references a path which just
129#: sets AVAILABLE_PROVIDERS and does not import your package
130AVAILABLE_PROVIDERS: dict[str, str | type["PrimitiveProvider"]] = {
131 "hypothesis": "hypothesis.internal.conjecture.providers.HypothesisProvider",
132 "hypothesis-urandom": "hypothesis.internal.conjecture.providers.URandomProvider",
133}
134# cache the choice_permitted constants for a particular set of constraints.
135CacheKeyT: TypeAlias = tuple[ChoiceTypeT, tuple[Any, ...]]
136CacheValueT: TypeAlias = tuple[tuple["ConstantT", ...], tuple["ConstantT", ...]]
137CONSTANTS_CACHE: LRUCache[CacheKeyT, CacheValueT] = LRUCache(1024)
138
139
140_constants_integers = (
141 # powers of 2
142 [2**n for n in range(16, 66)]
143 # powers of 10
144 + [10**n for n in range(5, 20)]
145 # factorials
146 + [math.factorial(n) for n in range(9, 21)]
147 # a few primorial numbers https://en.wikipedia.org/wiki/Primorial
148 + [
149 510510,
150 6469693230,
151 304250263527210,
152 32589158477190044730,
153 ]
154)
155_constants_integers.extend(
156 [n - 1 for n in _constants_integers] + [n + 1 for n in _constants_integers]
157)
158_constants_integers.extend([-x for x in _constants_integers])
159
160# arbitrary cutoffs to keep our list bounded
161assert all(50_000 <= abs(n) <= 2**66 for n in _constants_integers)
162
163_constant_floats = (
164 [
165 0.5,
166 1.1,
167 1.5,
168 1.9,
169 1.0 / 3,
170 10e6,
171 10e-6,
172 1.175494351e-38,
173 next_up(0.0),
174 float_info.min,
175 float_info.max,
176 3.402823466e38,
177 9007199254740992.0,
178 1 - 10e-6,
179 2 + 10e-6,
180 1.192092896e-07,
181 2.2204460492503131e-016,
182 ]
183 + [2.0**-n for n in (24, 14, 149, 126)] # minimum (sub)normals for float16,32
184 + [float_info.min / n for n in (2, 10, 1000, 100_000)] # subnormal in float64
185)
186_constant_floats.extend([-x for x in _constant_floats])
187assert all(isinstance(f, float) for f in _constant_floats)
188
189_constant_strings = {
190 # strings which can be interpreted as code / logic
191 "undefined",
192 "null",
193 "NULL",
194 "nil",
195 "NIL",
196 "true",
197 "false",
198 "True",
199 "False",
200 "TRUE",
201 "FALSE",
202 "None",
203 "none",
204 "if",
205 "then",
206 "else",
207 "__dict__",
208 "__proto__", # javascript
209 # strings which can be interpreted as a number
210 "0",
211 "1e100",
212 "0..0",
213 "0/0",
214 "1/0",
215 "+0.0",
216 "Infinity",
217 "-Infinity",
218 "Inf",
219 "INF",
220 "NaN",
221 "9" * 30,
222 # common ascii characters
223 ",./;'[]\\-=<>?:\"{}|_+!@#$%^&*()`~",
224 # common unicode characters
225 "Ω≈ç√∫˜µ≤≥÷åß∂ƒ©˙∆˚¬…æœ∑´®†¥¨ˆøπ“‘¡™£¢∞§¶•ªº–≠¸˛Ç◊ı˜Â¯˘¿ÅÍÎÏ˝ÓÔÒÚÆ☃Œ„´‰ˇÁ¨ˆØ∏”’`⁄€‹›fifl‡°·‚—±",
226 # characters which increase in length when lowercased
227 "Ⱥ",
228 "Ⱦ",
229 # ligatures
230 "æœÆŒffʤʨß",
231 # emoticons
232 "(╯°□°)╯︵ ┻━┻)",
233 # emojis
234 "😍",
235 "🇺🇸",
236 # emoji modifiers
237 "🏻", # U+1F3FB Light Skin Tone,
238 "👍🏻", # 👍 followed by U+1F3FB
239 # RTL text
240 "الكل في المجمو عة",
241 # Ogham text, which contains the only character in the Space Separators
242 # unicode category (Zs) that isn't visually blank: . # noqa: RUF003
243 "᚛ᚄᚓᚐᚋᚒᚄ ᚑᚄᚂᚑᚏᚅ᚜",
244 # thai consonant + spacing vowel combinations, which have unusual visual combining behavior
245 "กา",
246 "ก ำกำ",
247 # readable variations on text (bolt/italic/script)
248 "𝐓𝐡𝐞 𝐪𝐮𝐢𝐜𝐤 𝐛𝐫𝐨𝐰𝐧 𝐟𝐨𝐱 𝐣𝐮𝐦𝐩𝐬 𝐨𝐯𝐞𝐫 𝐭𝐡𝐞 𝐥𝐚𝐳𝐲 𝐝𝐨𝐠",
249 "𝕿𝖍𝖊 𝖖𝖚𝖎𝖈𝖐 𝖇𝖗𝖔𝖜𝖓 𝖋𝖔𝖝 𝖏𝖚𝖒𝖕𝖘 𝖔𝖛𝖊𝖗 𝖙𝖍𝖊 𝖑𝖆𝖟𝖞 𝖉𝖔𝖌",
250 "𝑻𝒉𝒆 𝒒𝒖𝒊𝒄𝒌 𝒃𝒓𝒐𝒘𝒏 𝒇𝒐𝒙 𝒋𝒖𝒎𝒑𝒔 𝒐𝒗𝒆𝒓 𝒕𝒉𝒆 𝒍𝒂𝒛𝒚 𝒅𝒐𝒈",
251 "𝓣𝓱𝓮 𝓺𝓾𝓲𝓬𝓴 𝓫𝓻𝓸𝔀𝓷 𝓯𝓸𝔁 𝓳𝓾𝓶𝓹𝓼 𝓸𝓿𝓮𝓻 𝓽𝓱𝓮 𝓵𝓪𝔃𝔂 𝓭𝓸𝓰",
252 "𝕋𝕙𝕖 𝕢𝕦𝕚𝕔𝕜 𝕓𝕣𝕠𝕨𝕟 𝕗𝕠𝕩 𝕛𝕦𝕞𝕡𝕤 𝕠𝕧𝕖𝕣 𝕥𝕙𝕖 𝕝𝕒𝕫𝕪 𝕕𝕠𝕘",
253 # upsidown text
254 "ʇǝɯɐ ʇᴉs ɹolop ɯnsdᴉ ɯǝɹo˥",
255 # reserved strings in windows
256 "NUL",
257 "COM1",
258 "LPT1",
259 # scunthorpe problem
260 "Scunthorpe",
261 # zalgo text
262 "Ṱ̺̺̕o͞ ̷i̲̬͇̪͙n̝̗͕v̟̜̘̦͟o̶̙̰̠kè͚̮̺̪̹̱̤ ̖t̝͕̳̣̻̪͞h̼͓̲̦̳̘̲e͇̣̰̦̬͎ ̢̼̻̱̘h͚͎͙̜̣̲ͅi̦̲̣̰̤v̻͍e̺̭̳̪̰-m̢iͅn̖̺̞̲̯̰d̵̼̟͙̩̼̘̳ ̞̥̱̳̭r̛̗̘e͙p͠r̼̞̻̭̗e̺̠̣͟s̘͇̳͍̝͉e͉̥̯̞̲͚̬͜ǹ̬͎͎̟̖͇̤t͍̬̤͓̼̭͘ͅi̪̱n͠g̴͉ ͏͉ͅc̬̟h͡a̫̻̯͘o̫̟̖͍̙̝͉s̗̦̲.̨̹͈̣",
263 #
264 # examples from https://faultlore.com/blah/text-hates-you/
265 "मनीष منش",
266 "पन्ह पन्ह त्र र्च कृकृ ड्ड न्हृे إلا بسم الله",
267 "lorem لا بسم الله ipsum 你好1234你好",
268 # unicode charaacters causing unconditional line breaks, as defined by UAX #14:
269 # https://www.unicode.org/reports/tr14/.
270 #
271 # We've seen multiple bugs caused by assuming `str.splitlines` is equivalent to
272 # splitting over "\n", while it actually splits over all line breaks!
273 #
274 # We intersperse the line breaks with normal characters to increase the likelihood
275 # of triggering such a bug.
276 (
277 "a"
278 "\u000a" # line feed (class: LF)
279 "b"
280 "\u000d" # carriage return (class: CR)
281 "c"
282 "\u0085" # next line (class: NL)
283 "d"
284 "\u000b" # line tabulation (class: BK)
285 "e"
286 "\u000c" # form feed (class: BK)
287 "f"
288 "\u2028" # line separator (class: BK)
289 "g"
290 "\u2029" # paragraph separator (class: BK)
291 "h"
292 "\u000d\u000a" # CR+LF
293 "i"
294 ),
295}
296
297
298# we don't actually care what order the constants are sorted in, just that the
299# ordering is deterministic.
300GLOBAL_CONSTANTS = Constants(
301 integers=SortedSet(_constants_integers),
302 floats=SortedSet(_constant_floats, key=float_to_int),
303 bytes=SortedSet(),
304 strings=SortedSet(_constant_strings),
305)
306
307_local_constants = Constants(
308 integers=SortedSet(),
309 floats=SortedSet(key=float_to_int),
310 bytes=SortedSet(),
311 strings=SortedSet(),
312)
313# Modules that we've already seen and processed for local constants. These are
314# are all modules, not necessarily local ones. This lets us quickly see which
315# modules are new without an expensive path.resolve() or is_local_module_file
316# cache lookup.
317#
318# We track by id so we can handle users that put unhashable types like SimpleNamespace
319# into sys.modules. ModuleType.__hash__ falls back to id, so this is equivalent
320# in the standard case.
321_seen_modules: set[int] = set()
322_sys_modules_len: int | None = None
323
324
325def _get_local_constants() -> Constants:
326 global _sys_modules_len, _local_constants
327
328 if sys.platform == "emscripten": # pragma: no cover
329 # pyodide builds bundle the stdlib in a nonstandard location, like
330 # `/lib/python312.zip/heapq.py`. To avoid identifying the entirety of
331 # the stdlib as local code and slowing down on emscripten, instead return
332 # that nothing is local.
333 #
334 # pyodide may provide some way to distinguish stdlib/third-party/local
335 # code. I haven't looked into it. If they do, we should correctly implement
336 # ModuleLocation for pyodide instead of this.
337 return _local_constants
338
339 count_constants = len(_local_constants)
340 # We call this function once per HypothesisProvider instance, i.e. once per
341 # input, so it needs to be performant. The logic here is more complicated
342 # than necessary because of this.
343 #
344 # First, we check whether there are any new modules with a very cheap length
345 # check. This check can be fooled if a module is added while another module is
346 # removed, but the more correct check against tuple(sys.modules.keys()) is
347 # substantially more expensive. Such a new module would eventually be discovered
348 # if / when the length changes again in the future.
349 #
350 # If the length has changed, we find just modules we haven't seen before. Of
351 # those, we find the ones which correspond to local modules, and extract their
352 # constants.
353
354 # careful: store sys.modules length when we first check to avoid race conditions
355 # with other threads loading a module before we set _sys_modules_len.
356 if (sys_modules_len := len(sys.modules)) != _sys_modules_len:
357 new_modules = [
358 module
359 for module in list(sys.modules.values())
360 if id(module) not in _seen_modules
361 ]
362 # Repeated SortedSet unions are expensive. Do the initial unions on a
363 # set(), then do a one-time union with _local_constants after.
364 new_constants = Constants()
365 for module in new_modules:
366 if (
367 module_file := getattr(module, "__file__", None)
368 ) is not None and is_local_module_file(module_file):
369 new_constants |= constants_from_module(module)
370 _seen_modules.add(id(module))
371 _local_constants |= new_constants
372 _sys_modules_len = sys_modules_len
373
374 # if we add any new constant, invalidate the constant cache for permitted values.
375 # A more efficient approach would be invalidating just the keys with this
376 # choice_type.
377 if len(_local_constants) > count_constants:
378 CONSTANTS_CACHE.cache.clear()
379
380 return _local_constants
381
382
383@contextmanager
384def with_register_backend(name, provider_cls):
385 try:
386 AVAILABLE_PROVIDERS[name] = provider_cls
387 yield
388 finally:
389 del AVAILABLE_PROVIDERS[name]
390
391
392class _BackendInfoMsg(TypedDict):
393 type: InfoObservationType
394 title: str
395 content: str | dict[str, Any]
396
397
398# TODO_DOCS: link to choice sequence explanation page
399
400
401class PrimitiveProvider(abc.ABC):
402 """
403 |PrimitiveProvider| is the implementation interface of a
404 :ref:`Hypothesis backend <alternative-backends>`.
405
406 A |PrimitiveProvider| is required to implement the following five
407 ``draw_*`` methods:
408
409 * |PrimitiveProvider.draw_integer|
410 * |PrimitiveProvider.draw_boolean|
411 * |PrimitiveProvider.draw_float|
412 * |PrimitiveProvider.draw_string|
413 * |PrimitiveProvider.draw_bytes|
414
415 Each strategy in Hypothesis generates values by drawing a series of choices
416 from these five methods. By overriding them, a |PrimitiveProvider| can control
417 the distribution of inputs generated by Hypothesis.
418
419 For example, :pypi:`hypothesis-crosshair` implements a |PrimitiveProvider|
420 which uses an SMT solver to generate inputs that uncover new branches.
421
422 Once you implement a |PrimitiveProvider|, you can make it available for use
423 through |AVAILABLE_PROVIDERS|.
424 """
425
426 #: The lifetime of a |PrimitiveProvider| instance. Either ``test_function``
427 #: or ``test_case``.
428 #:
429 #: If ``test_function`` (the default), a single provider instance will be
430 #: instantiated and used for the entirety of each test function (i.e., roughly
431 #: one provider per |@given| annotation). This can be useful for tracking state
432 #: over the entirety of a test function.
433 #:
434 #: If ``test_case``, a new provider instance will be instantiated and used for
435 #: each input Hypothesis generates.
436 #:
437 #: The ``conjecturedata`` argument to ``PrimitiveProvider.__init__`` will
438 #: be ``None`` for a lifetime of ``test_function``, and an instance of
439 #: ``ConjectureData`` for a lifetime of ``test_case``.
440 #:
441 #: Third-party providers likely want to set a lifetime of ``test_function``.
442 lifetime: ClassVar[LifetimeT] = "test_function"
443
444 #: Solver-based backends such as ``hypothesis-crosshair`` use symbolic values
445 #: which record operations performed on them in order to discover new paths.
446 #: If ``avoid_realization`` is set to ``True``, hypothesis will avoid interacting
447 #: with symbolic choices returned by the provider in any way that would force
448 #: the solver to narrow the range of possible values for that symbolic.
449 #:
450 #: Setting this to ``True`` disables some hypothesis features and optimizations.
451 #: Only set this to ``True`` if it is necessary for your backend.
452 avoid_realization: ClassVar[bool] = False
453
454 #: If ``True``, |PrimitiveProvider.on_observation| will be added as a
455 #: callback via |add_observability_callback|, enabling observability during
456 # the lifetime of this provider. If ``False``, |PrimitiveProvider.on_observation|
457 #: will never be called by Hypothesis.
458 #:
459 #: The opt-in behavior of observability is because enabling observability
460 #: might increase runtime or memory usage.
461 add_observability_callback: ClassVar[bool] = False
462
463 def __init__(self, conjecturedata: Optional["ConjectureData"], /) -> None:
464 self._cd = conjecturedata
465
466 @abc.abstractmethod
467 def draw_boolean(
468 self,
469 p: float = 0.5,
470 ) -> bool:
471 """
472 Draw a boolean choice.
473
474 Parameters
475 ----------
476 p: float
477 The probability of returning ``True``. Between 0 and 1 inclusive.
478
479 Except for ``0`` and ``1``, the value of ``p`` is a hint provided by
480 Hypothesis, and may be ignored by the backend.
481
482 If ``0``, the provider must return ``False``. If ``1``, the provider
483 must return ``True``.
484 """
485 raise NotImplementedError
486
487 @abc.abstractmethod
488 def draw_integer(
489 self,
490 min_value: int | None = None,
491 max_value: int | None = None,
492 *,
493 weights: dict[int, float] | None = None,
494 shrink_towards: int = 0,
495 ) -> int:
496 """
497 Draw an integer choice.
498
499 Parameters
500 ----------
501 min_value : int | None
502 (Inclusive) lower bound on the integer value. If ``None``, there is
503 no lower bound.
504 max_value : int | None
505 (Inclusive) upper bound on the integer value. If ``None``, there is
506 no upper bound.
507 weights: dict[int, float] | None
508 Maps keys in the range [``min_value``, ``max_value``] to the probability
509 of returning that key.
510 shrink_towards: int
511 The integer to shrink towards. This is not used during generation and
512 can be ignored by backends.
513 """
514 raise NotImplementedError
515
516 @abc.abstractmethod
517 def draw_float(
518 self,
519 *,
520 min_value: float = -math.inf,
521 max_value: float = math.inf,
522 allow_nan: bool = True,
523 smallest_nonzero_magnitude: float,
524 ) -> float:
525 """
526 Draw a float choice.
527
528 Parameters
529 ----------
530 min_value : float
531 (Inclusive) lower bound on the float value.
532 max_value : float
533 (Inclusive) upper bound on the float value.
534 allow_nan : bool
535 If ``False``, it is invalid to return ``math.nan``.
536 smallest_nonzero_magnitude : float
537 The smallest allowed nonzero magnitude. ``draw_float`` should not
538 return a float ``f`` if ``abs(f) < smallest_nonzero_magnitude``.
539 """
540 raise NotImplementedError
541
542 @abc.abstractmethod
543 def draw_string(
544 self,
545 intervals: IntervalSet,
546 *,
547 min_size: int = 0,
548 max_size: int = COLLECTION_DEFAULT_MAX_SIZE,
549 ) -> str:
550 """
551 Draw a string choice.
552
553 Parameters
554 ----------
555 intervals : IntervalSet
556 The set of codepoints to sample from.
557 min_size : int
558 (Inclusive) lower bound on the string length.
559 max_size : int
560 (Inclusive) upper bound on the string length.
561 """
562 raise NotImplementedError
563
564 @abc.abstractmethod
565 def draw_bytes(
566 self,
567 min_size: int = 0,
568 max_size: int = COLLECTION_DEFAULT_MAX_SIZE,
569 ) -> bytes:
570 """
571 Draw a bytes choice.
572
573 Parameters
574 ----------
575 min_size : int
576 (Inclusive) lower bound on the bytes length.
577 max_size : int
578 (Inclusive) upper bound on the bytes length.
579 """
580 raise NotImplementedError
581
582 def per_test_case_context_manager(self) -> AbstractContextManager:
583 """
584 Returns a context manager which will be entered each time Hypothesis
585 starts generating and executing one test case, and exited when that test
586 case finishes generating and executing, including if any exception is
587 thrown.
588
589 In the lifecycle of a Hypothesis test, this is called before
590 generating strategy values for each test case. This is just before any
591 :ref:`custom executor <custom-function-execution>` is called.
592
593 Even if not returning a custom context manager, |PrimitiveProvider|
594 subclasses are welcome to override this method to know when Hypothesis
595 starts and ends the execution of a single test case.
596 """
597 return contextlib.nullcontext()
598
599 def realize(self, value: T, *, for_failure: bool = False) -> T:
600 """
601 Called whenever hypothesis requires a concrete (non-symbolic) value from
602 a potentially symbolic value. Hypothesis will not check that ``value`` is
603 symbolic before calling ``realize``, so you should handle the case where
604 ``value`` is non-symbolic.
605
606 The returned value should be non-symbolic. If you cannot provide a value,
607 raise |BackendCannotProceed| with a value of ``"discard_test_case"``.
608
609 If ``for_failure`` is ``True``, the value is associated with a failing example.
610 In this case, the backend should spend substantially more effort when
611 attempting to realize the value, since it is important to avoid discarding
612 failing examples. Backends may still raise |BackendCannotProceed| when
613 ``for_failure`` is ``True``, if realization is truly impossible or if
614 realization takes significantly longer than expected (say, 5 minutes).
615 """
616 return value
617
618 def replay_choices(self, choices: tuple[ChoiceT, ...]) -> None:
619 """
620 Called when Hypothesis has discovered a choice sequence which the provider
621 may wish to enqueue to replay under its own instrumentation when we next
622 ask to generate a test case, rather than generating one from scratch.
623
624 This is used to e.g. warm-start :pypi:`hypothesis-crosshair` with a corpus
625 of high-code-coverage inputs discovered by
626 `HypoFuzz <https://hypofuzz.com/>`_.
627 """
628 return None
629
630 def observe_test_case(self) -> dict[str, Any]:
631 """Called at the end of the test case when :ref:`observability
632 <observability>` is enabled.
633
634 The return value should be a non-symbolic json-encodable dictionary,
635 and will be included in observations as ``observation["metadata"]["backend"]``.
636 """
637 return {}
638
639 def observe_information_messages(
640 self, *, lifetime: LifetimeT
641 ) -> Iterable[_BackendInfoMsg]:
642 """Called at the end of each test case and again at end of the test function.
643
644 Return an iterable of ``{type: info/alert/error, title: str, content: str | dict}``
645 dictionaries to be delivered as individual information messages. Hypothesis
646 adds the ``run_start`` timestamp and ``property`` name for you.
647 """
648 assert lifetime in ("test_case", "test_function")
649 yield from []
650
651 def on_observation(self, observation: TestCaseObservation) -> None: # noqa: B027
652 """
653 Called at the end of each test case which uses this provider, with the same
654 ``observation["type"] == "test_case"`` observation that is passed to
655 other callbacks added via |add_observability_callback|. This method is not
656 called with ``observation["type"] in {"info", "alert", "error"}``
657 observations.
658
659 .. important::
660
661 For |PrimitiveProvider.on_observation| to be called by Hypothesis,
662 |PrimitiveProvider.add_observability_callback| must be set to ``True``.
663
664 |PrimitiveProvider.on_observation| is explicitly opt-in, as enabling
665 observability might increase runtime or memory usage.
666
667 Calls to this method are guaranteed to alternate with calls to
668 |PrimitiveProvider.per_test_case_context_manager|. For example:
669
670 .. code-block:: python
671
672 # test function starts
673 per_test_case_context_manager()
674 on_observation()
675 per_test_case_context_manager()
676 on_observation()
677 ...
678 # test function ends
679
680 Note that |PrimitiveProvider.on_observation| will not be called for test
681 cases which did not use this provider during generation, for example
682 during |Phase.reuse| or |Phase.shrink|, or because Hypothesis switched
683 to the standard Hypothesis backend after this backend raised too many
684 |BackendCannotProceed| exceptions.
685 """
686
687 def span_start(self, label: int, /) -> None: # noqa: B027 # non-abstract noop
688 """Marks the beginning of a semantically meaningful span of choices.
689
690 Spans are a depth-first tree structure. A span is opened by a call to
691 |PrimitiveProvider.span_start|, and a call to |PrimitiveProvider.span_end|
692 closes the most recently opened span. So the following sequence of calls:
693
694 .. code-block:: python
695
696 span_start(label=1)
697 n1 = draw_integer()
698 span_start(label=2)
699 b1 = draw_boolean()
700 n2 = draw_integer()
701 span_end()
702 f1 = draw_float()
703 span_end()
704
705 produces the following two spans of choices:
706
707 .. code-block::
708
709 1: [n1, b1, n2, f1]
710 2: [b1, n2]
711
712 Hypothesis uses spans to denote "semantically meaningful" sequences of
713 choices. For instance, Hypothesis opens a span for the sequence of choices
714 made while drawing from each strategy. Not every span corresponds to a
715 strategy; the generation of e.g. each element in |st.lists| is also marked
716 with a span, among others.
717
718 ``label`` is an opaque integer, which has no defined semantics.
719 The only guarantee made by Hypothesis is that all spans with the same
720 "meaning" will share the same ``label``. So all spans from the same
721 strategy will share the same label, as will e.g. the spans for |st.lists|
722 elements.
723
724 Providers can track calls to |PrimitiveProvider.span_start| and
725 |PrimitiveProvider.span_end| to learn something about the semantics of
726 the test's choice sequence. For instance, a provider could track the depth
727 of the span tree, or the number of unique labels, which says something about
728 the complexity of the choices being generated. Or a provider could track
729 the span tree across test cases in order to determine what strategies are
730 being used in what contexts.
731
732 It is possible for Hypothesis to start and immediately stop a span,
733 without calling a ``draw_*`` method in between. These spans contain zero
734 choices.
735
736 Hypothesis will always balance the number of calls to
737 |PrimitiveProvider.span_start| and |PrimitiveProvider.span_end|. A call
738 to |PrimitiveProvider.span_start| will always be followed by a call to
739 |PrimitiveProvider.span_end| before the end of the test case.
740
741 |PrimitiveProvider.span_start| is called from ``ConjectureData.start_span()``
742 internally.
743 """
744
745 def span_end(self, discard: bool, /) -> None: # noqa: B027
746 """Marks the end of a semantically meaningful span of choices.
747
748 ``discard`` is ``True`` when the draw was filtered out or otherwise marked
749 as unlikely to contribute to the input data as seen by the user's test.
750 Note however that side effects can make this determination unsound.
751
752 |PrimitiveProvider.span_end| is called from ``ConjectureData.stop_span()``
753 internally.
754 """
755
756
757class HypothesisProvider(PrimitiveProvider):
758 lifetime = "test_case"
759
760 def __init__(self, conjecturedata: Optional["ConjectureData"], /):
761 super().__init__(conjecturedata)
762 self._random = None if self._cd is None else self._cd._random
763
764 @cached_property
765 def _local_constants(self):
766 # defer computation of local constants until/if we need it
767 return _get_local_constants()
768
769 def _maybe_draw_constant(
770 self,
771 choice_type: ChoiceTypeT,
772 constraints: ChoiceConstraintsT,
773 *,
774 p: float = 0.05,
775 ) -> Optional["ConstantT"]:
776 assert self._random is not None
777 assert choice_type != "boolean"
778 # check whether we even want a constant before spending time computing
779 # and caching the allowed constants.
780 if self._random.random() > p:
781 return None
782
783 # note: this property access results in computation being done
784 assert self._local_constants is not None
785
786 key = (choice_type, choice_constraints_key(choice_type, constraints))
787 if key not in CONSTANTS_CACHE:
788 CONSTANTS_CACHE[key] = (
789 tuple(
790 choice
791 for choice in GLOBAL_CONSTANTS.set_for_type(choice_type)
792 if choice_permitted(choice, constraints)
793 ),
794 tuple(
795 choice
796 for choice in self._local_constants.set_for_type(choice_type)
797 if choice_permitted(choice, constraints)
798 ),
799 )
800
801 # split constants into two pools, so we still have a good chance to draw
802 # global constants even if there are many local constants.
803 global_constants, local_constants = CONSTANTS_CACHE[key]
804 constants_lists = ([global_constants] if global_constants else []) + (
805 [local_constants] if local_constants else []
806 )
807 if not constants_lists:
808 return None
809
810 # At this point, we've decided to use a constant. Now we select which pool
811 # to draw that constant from.
812 #
813 # Note that this approach has a different probability distribution than
814 # attempting a random.random for both global_constants and local_constants.
815 constants = self._random.choice(constants_lists)
816 return self._random.choice(constants)
817
818 def draw_boolean(
819 self,
820 p: float = 0.5,
821 ) -> bool:
822 assert self._random is not None
823
824 if p <= 0:
825 return False
826 if p >= 1:
827 return True
828
829 return self._random.random() < p
830
831 def draw_integer(
832 self,
833 min_value: int | None = None,
834 max_value: int | None = None,
835 *,
836 weights: dict[int, float] | None = None,
837 shrink_towards: int = 0,
838 ) -> int:
839 assert self._cd is not None
840 if (
841 constant := self._maybe_draw_constant(
842 "integer",
843 {
844 "min_value": min_value,
845 "max_value": max_value,
846 "weights": weights,
847 "shrink_towards": shrink_towards,
848 },
849 )
850 ) is not None:
851 assert isinstance(constant, int)
852 return constant
853
854 if weights is not None:
855 assert min_value is not None
856 assert max_value is not None
857
858 # format of weights is a mapping of ints to p, where sum(p) < 1.
859 # The remaining probability mass is uniformly distributed over
860 # *all* ints (not just the unmapped ones; this is somewhat undesirable,
861 # but simplifies things).
862 #
863 # We assert that sum(p) is strictly less than 1 because it simplifies
864 # handling forced values when we can force into the unmapped probability
865 # mass. We should eventually remove this restriction.
866 sampler = Sampler(
867 [1 - sum(weights.values()), *weights.values()], observe=False
868 )
869 # if we're forcing, it's easiest to force into the unmapped probability
870 # mass and then force the drawn value after.
871 idx = sampler.sample(self._cd)
872
873 if idx == 0:
874 return self._draw_integer_from_distribution(min_value, max_value)
875 # implicit reliance on dicts being sorted for determinism
876 return list(weights)[idx - 1]
877
878 return self._draw_integer_from_distribution(min_value, max_value)
879
880 def _draw_integer_from_distribution(
881 self, min_value: int | None, max_value: int | None
882 ) -> int:
883 assert self._random is not None
884 dist = INTEGERS_DISTRIBUTION
885
886 # Our integers distribution is defined over the full float64 range. If the user
887 # has not placed a lower and/or upper bound, we don't actually want to sample
888 # from this full range, because we might otherwise generate integers
889 # that are so large as to cause problems. For example python errors when passing
890 # a too-large integer to c APIs (common and implicit in some python stdlib APIs).
891 #
892 # * If the user asks for an unbounded range, we bound at [-2**128, 2**128].
893 # * If the user asks for a half-bounded range starting at n, we bound at
894 # [n, 2**128] for n < 2**127, and [n, 2n] otherwise, so we always support
895 # generating large integers if a user explicitly asks for integers around that
896 # magnitude. The choice of a factor of two here is arbitrary.
897 if min_value is None and max_value is None:
898 min_value = -(2**128)
899 max_value = 2**128
900 elif min_value is None:
901 assert max_value is not None
902 min_value = -max(2**128, 2 * abs(max_value))
903 elif max_value is None:
904 max_value = max(2**128, 2 * abs(min_value))
905
906 lo = 0.0
907 hi = 1.0
908 safe_bounds = True
909 try:
910 cdf_min = min_value - 0.5
911 cdf_max = max_value + 0.5
912 except OverflowError:
913 safe_bounds = False
914
915 if safe_bounds:
916 lo = dist.cdf(cdf_min)
917 hi = dist.cdf(cdf_max)
918
919 # if the bounds in CDF-space are too close together, there isn't enough room
920 # to stably sample between hi and lo. For example, in the extreme case of
921 # hi = lo (which can happen even if min_value != max_value due to either float
922 # imprecision or numerical instability), our sampling algorithm would collapse
923 # to always return the sample value.
924 #
925 # The choice of 1e-13 here is somewhat arbitrary. At 1.0, epsilon in float64
926 # is 2^-53 ~= 1e-16. We want some breathing room from epsilon, because we need
927 # some variation to actually sample in. I haven't put much thought into where
928 # the correct tradeoff lies. The downside risk to choosing a too-aggressive
929 # bound is some integers may literally not be representable in the `hi - lo`
930 # sampling space. The downside risk to a too-conservative bound is switching
931 # off our good distribution too early.
932 if hi - lo < 1e-13:
933 safe_bounds = False
934
935 if safe_bounds:
936 # inverse_cdf requires strictly 0 < p < 1. Resample until we get it.
937 while (p := lo + self._random.random() * (hi - lo)) in {0, 1}:
938 pass # pragma: no cover
939 n = round(dist.inverse_cdf(p))
940 # As an extra measure of safety, clamp our generated value to the requested
941 # range. It would of course be nice if we provably satisfied this by
942 # construction - I'm just not 100% confident that we do.
943 return clamp(min_value, n, max_value)
944
945 # We have determined the bounds [min_value, max_value] are not numerically safe
946 # to use. Fall back to a simpler and safer distribution: a uniform distribution
947 # on [min_value, max_value].
948 return self._random.randint(min_value, max_value)
949
950 def draw_float(
951 self,
952 *,
953 min_value: float = -math.inf,
954 max_value: float = math.inf,
955 allow_nan: bool = True,
956 smallest_nonzero_magnitude: float,
957 ) -> float:
958 assert self._random is not None
959
960 constraints: FloatConstraints = {
961 "min_value": min_value,
962 "max_value": max_value,
963 "allow_nan": allow_nan,
964 "smallest_nonzero_magnitude": smallest_nonzero_magnitude,
965 }
966 if (
967 constant := self._maybe_draw_constant("float", constraints, p=0.15)
968 ) is not None:
969 assert isinstance(constant, float)
970 return constant
971
972 # on top of the probability to draw a constant float, we independently
973 # upweight 0.0/-0.0, math.inf, -math.inf, nans, and boundary values.
974 weird_floats = [
975 f
976 for f in [
977 0.0,
978 -0.0,
979 math.inf,
980 -math.inf,
981 math.nan,
982 -math.nan,
983 SIGNALING_NAN,
984 -SIGNALING_NAN,
985 min_value,
986 next_up(min_value),
987 min_value + 1,
988 max_value - 1,
989 next_down(max_value),
990 max_value,
991 ]
992 if choice_permitted(f, constraints)
993 ]
994
995 if weird_floats and self._random.random() < 0.05:
996 return self._random.choice(weird_floats)
997
998 clamper = make_float_clamper(
999 min_value,
1000 max_value,
1001 smallest_nonzero_magnitude=smallest_nonzero_magnitude,
1002 allow_nan=allow_nan,
1003 )
1004
1005 result = self._draw_float()
1006 if allow_nan and math.isnan(result):
1007 clamped = result # pragma: no cover
1008 else:
1009 clamped = clamper(result)
1010 if float_to_int(clamped) != float_to_int(result) and not (
1011 math.isnan(result) and allow_nan
1012 ):
1013 result = clamped
1014 return result
1015
1016 def draw_string(
1017 self,
1018 intervals: IntervalSet,
1019 *,
1020 min_size: int = 0,
1021 max_size: int = COLLECTION_DEFAULT_MAX_SIZE,
1022 ) -> str:
1023 assert self._cd is not None
1024 assert self._random is not None
1025
1026 if len(intervals) == 0:
1027 return ""
1028
1029 if (
1030 constant := self._maybe_draw_constant(
1031 "string",
1032 {"intervals": intervals, "min_size": min_size, "max_size": max_size},
1033 )
1034 ) is not None:
1035 assert isinstance(constant, str)
1036 return constant
1037
1038 average_size = min(
1039 max(min_size * 2, min_size + 5),
1040 0.5 * (min_size + max_size),
1041 )
1042
1043 chars = []
1044 elements = many(
1045 self._cd,
1046 min_size=min_size,
1047 max_size=max_size,
1048 average_size=average_size,
1049 observe=False,
1050 )
1051 while elements.more():
1052 if len(intervals) > 256:
1053 if self.draw_boolean(0.2):
1054 i = self._random.randint(256, len(intervals) - 1)
1055 else:
1056 i = self._random.randint(0, 255)
1057 else:
1058 i = self._random.randint(0, len(intervals) - 1)
1059
1060 chars.append(intervals.char_in_shrink_order(i))
1061
1062 return "".join(chars)
1063
1064 def draw_bytes(
1065 self,
1066 min_size: int = 0,
1067 max_size: int = COLLECTION_DEFAULT_MAX_SIZE,
1068 ) -> bytes:
1069 assert self._cd is not None
1070 assert self._random is not None
1071
1072 if (
1073 constant := self._maybe_draw_constant(
1074 "bytes", {"min_size": min_size, "max_size": max_size}
1075 )
1076 ) is not None:
1077 assert isinstance(constant, bytes)
1078 return constant
1079
1080 buf = bytearray()
1081 average_size = min(
1082 max(min_size * 2, min_size + 5),
1083 0.5 * (min_size + max_size),
1084 )
1085 elements = many(
1086 self._cd,
1087 min_size=min_size,
1088 max_size=max_size,
1089 average_size=average_size,
1090 observe=False,
1091 )
1092 while elements.more():
1093 buf += self._random.randbytes(1)
1094
1095 return bytes(buf)
1096
1097 def _draw_float(self) -> float:
1098 assert self._random is not None
1099
1100 f = lex_to_float(self._random.getrandbits(64))
1101 sign = 1 if self._random.getrandbits(1) else -1
1102 return sign * f
1103
1104
1105# Masks for masking off the first byte of an n-bit buffer.
1106# The appropriate mask is stored at position n % 8.
1107BYTE_MASKS = [(1 << n) - 1 for n in range(8)]
1108BYTE_MASKS[0] = 255
1109
1110
1111class BytestringProvider(PrimitiveProvider):
1112 lifetime = "test_case"
1113
1114 def __init__(
1115 self, conjecturedata: Optional["ConjectureData"], /, *, bytestring: bytes
1116 ):
1117 super().__init__(conjecturedata)
1118 self.bytestring = bytestring
1119 self.index = 0
1120 self.drawn = bytearray()
1121
1122 def _draw_bits(self, n):
1123 if n == 0: # pragma: no cover
1124 return 0
1125 n_bytes = bits_to_bytes(n)
1126 if self.index + n_bytes > len(self.bytestring):
1127 self._cd.mark_overrun()
1128 buf = bytearray(self.bytestring[self.index : self.index + n_bytes])
1129 self.index += n_bytes
1130
1131 buf[0] &= BYTE_MASKS[n % 8]
1132 buf = bytes(buf)
1133 self.drawn += buf
1134 return int_from_bytes(buf)
1135
1136 def draw_boolean(
1137 self,
1138 p: float = 0.5,
1139 ) -> bool:
1140 if p <= 0:
1141 return False
1142 if p >= 1:
1143 return True
1144
1145 # always use one byte for booleans to maintain constant draw size.
1146 # If a probability requires more than 8 bits to represent precisely,
1147 # the result will be slightly biased, but not badly.
1148 bits = 8
1149 size = 2**bits
1150 # always leave at least one value that can be true, even for very small
1151 # p.
1152 falsey = max(1, math.floor(size * (1 - p)))
1153 n = self._draw_bits(bits)
1154 return n >= falsey
1155
1156 def draw_integer(
1157 self,
1158 min_value: int | None = None,
1159 max_value: int | None = None,
1160 *,
1161 weights: dict[int, float] | None = None,
1162 shrink_towards: int = 0,
1163 ) -> int:
1164 assert self._cd is not None
1165
1166 # we explicitly ignore integer weights for now, as they are likely net
1167 # negative on fuzzer performance.
1168
1169 if min_value is None and max_value is None:
1170 min_value = -(2**127)
1171 max_value = 2**127 - 1
1172 elif min_value is None:
1173 assert max_value is not None
1174 min_value = max_value - 2**64
1175 elif max_value is None:
1176 assert min_value is not None
1177 max_value = min_value + 2**64
1178
1179 if min_value == max_value:
1180 return min_value
1181
1182 bits = (max_value - min_value).bit_length()
1183 value = self._draw_bits(bits)
1184 while not (min_value <= value <= max_value):
1185 value = self._draw_bits(bits)
1186 return value
1187
1188 def draw_float(
1189 self,
1190 *,
1191 min_value: float = -math.inf,
1192 max_value: float = math.inf,
1193 allow_nan: bool = True,
1194 smallest_nonzero_magnitude: float,
1195 ) -> float:
1196 n = self._draw_bits(64)
1197 sign = -1 if n >> 64 else 1
1198 f = sign * lex_to_float(n & ((1 << 64) - 1))
1199 clamper = make_float_clamper(
1200 min_value,
1201 max_value,
1202 smallest_nonzero_magnitude=smallest_nonzero_magnitude,
1203 allow_nan=allow_nan,
1204 )
1205 return clamper(f)
1206
1207 def _draw_collection(self, min_size, max_size, *, alphabet_size):
1208 average_size = min(
1209 max(min_size * 2, min_size + 5),
1210 0.5 * (min_size + max_size),
1211 )
1212 elements = many(
1213 self._cd,
1214 min_size=min_size,
1215 max_size=max_size,
1216 average_size=average_size,
1217 observe=False,
1218 )
1219 values = []
1220 while elements.more():
1221 values.append(self.draw_integer(0, alphabet_size - 1))
1222 return values
1223
1224 def draw_string(
1225 self,
1226 intervals: IntervalSet,
1227 *,
1228 min_size: int = 0,
1229 max_size: int = COLLECTION_DEFAULT_MAX_SIZE,
1230 ) -> str:
1231 values = self._draw_collection(min_size, max_size, alphabet_size=len(intervals))
1232 return "".join(chr(intervals[v]) for v in values)
1233
1234 def draw_bytes(
1235 self,
1236 min_size: int = 0,
1237 max_size: int = COLLECTION_DEFAULT_MAX_SIZE,
1238 ) -> bytes:
1239 values = self._draw_collection(min_size, max_size, alphabet_size=2**8)
1240 return bytes(values)
1241
1242
1243class URandom(Random):
1244 # we reimplement a Random instance instead of using SystemRandom, because
1245 # os.urandom is not guaranteed to read from /dev/urandom.
1246
1247 @staticmethod
1248 def _urandom(size: int) -> bytes:
1249 # By default, we buffer more data from /dev/urandom than the actual `size` number
1250 # of bytes we requested. This trips up anyone (and particularly Antithesis)
1251 # hooking /dev/urandom reads to control randomness.
1252 #
1253 # buffering=0 disables this behavior.
1254 with open("/dev/urandom", "rb", buffering=0) as f:
1255 return f.read(size)
1256
1257 def getrandbits(self, k: int) -> int:
1258 assert k >= 0
1259 size = bits_to_bytes(k)
1260 n = int_from_bytes(self._urandom(size))
1261 # trim excess bits
1262 return n >> (size * 8 - k)
1263
1264 def random(self) -> float:
1265 # adapted from random.SystemRandom.random
1266 return (int_from_bytes(self._urandom(7)) >> 3) * (2**-53)
1267
1268
1269class URandomProvider(HypothesisProvider):
1270 # A provider which reads directly from /dev/urandom as its source of randomness.
1271 # This provider exists to provide better Hypothesis integration with Antithesis
1272 # (https://antithesis.com/), which interprets calls to /dev/urandom as the
1273 # randomness to mutate. This effectively gives Antithesis control over
1274 # the choices made by the URandomProvider.
1275 #
1276 # If you are not using Antithesis, you probably don't want to use this
1277 # provider.
1278
1279 def __init__(self, conjecturedata: Optional["ConjectureData"], /):
1280 super().__init__(conjecturedata)
1281 if WINDOWS: # pragma: no cover
1282 warnings.warn(
1283 "/dev/urandom is not available on windows. Falling back to "
1284 'standard PRNG generation (equivalent to backend="hypothesis").',
1285 HypothesisWarning,
1286 stacklevel=1,
1287 )
1288 # don't overwrite the HypothesisProvider self._random attribute in
1289 # this case
1290 else:
1291 self._random = URandom()