1# This file is part of Hypothesis, which may be found at
2# https://github.com/HypothesisWorks/hypothesis/
3#
4# Copyright the Hypothesis Authors.
5# Individual contributors are listed in AUTHORS.rst and the git log.
6#
7# This Source Code Form is subject to the terms of the Mozilla Public License,
8# v. 2.0. If a copy of the MPL was not distributed with this file, You can
9# obtain one at https://mozilla.org/MPL/2.0/.
10
11import abc
12import contextlib
13import math
14import sys
15import warnings
16from collections.abc import Iterable
17from contextlib import AbstractContextManager, contextmanager
18from functools import cached_property
19from random import Random
20from sys import float_info
21from types import ModuleType
22from typing import (
23 TYPE_CHECKING,
24 Any,
25 ClassVar,
26 Literal,
27 Optional,
28 TypeAlias,
29 TypedDict,
30 TypeVar,
31)
32
33from sortedcontainers import SortedSet
34
35from hypothesis.errors import HypothesisWarning
36from hypothesis.internal.cache import LRUCache
37from hypothesis.internal.compat import WINDOWS, int_from_bytes
38from hypothesis.internal.conjecture.choice import (
39 ChoiceConstraintsT,
40 ChoiceT,
41 ChoiceTypeT,
42 FloatConstraints,
43 choice_constraints_key,
44 choice_permitted,
45)
46from hypothesis.internal.conjecture.floats import lex_to_float
47from hypothesis.internal.conjecture.junkdrawer import bits_to_bytes
48from hypothesis.internal.conjecture.utils import (
49 INT_SIZES,
50 INT_SIZES_SAMPLER,
51 Sampler,
52 many,
53)
54from hypothesis.internal.constants_ast import (
55 Constants,
56 constants_from_module,
57 is_local_module_file,
58)
59from hypothesis.internal.floats import (
60 SIGNALING_NAN,
61 float_to_int,
62 make_float_clamper,
63 next_down,
64 next_up,
65)
66from hypothesis.internal.intervalsets import IntervalSet
67from hypothesis.internal.observability import InfoObservationType, TestCaseObservation
68
69if TYPE_CHECKING:
70 from hypothesis.internal.conjecture.data import ConjectureData
71 from hypothesis.internal.constants_ast import ConstantT
72
73T = TypeVar("T")
74LifetimeT: TypeAlias = Literal["test_case", "test_function"]
75COLLECTION_DEFAULT_MAX_SIZE = 10**10 # "arbitrarily large"
76
77
78#: Registered Hypothesis backends. This is a dictionary where keys are the name
79#: to be used in |settings.backend|. The value of a key can be either:
80#:
81#: * A string corresponding to an importable absolute path of a
82#: |PrimitiveProvider| subclass
83#: * A |PrimitiveProvider| subclass (the class itself, not an instance of the
84#: class)
85#:
86#: Hypothesis will instantiate the corresponding |PrimitiveProvider| subclass
87#: when the backend is requested by a test's |settings.backend| value.
88#:
89#: For example, the default Hypothesis backend is registered as:
90#:
91#: .. code-block:: python
92#:
93#: from hypothesis.internal.conjecture.providers import AVAILABLE_PROVIDERS
94#:
95#: AVAILABLE_PROVIDERS["hypothesis"] = "hypothesis.internal.conjecture.providers.HypothesisProvider"
96#: # or
97#: AVAILABLE_PROVIDERS["hypothesis"] = HypothesisProvider
98#:
99#: And can be used with:
100#:
101#: .. code-block:: python
102#:
103#: from hypothesis import given, settings, strategies as st
104#:
105#: @given(st.integers())
106#: @settings(backend="hypothesis")
107#: def f(n):
108#: pass
109#:
110#: Though, as ``backend="hypothesis"`` is the default setting, the above would
111#: typically not have any effect.
112#:
113#: For third-party backend authors, we strongly encourage ensuring that
114#: ``import hypothesis`` does not automatically import the expensive parts of
115#: your package, by:
116#:
117#: - setting a string path here, instead of a provider class
118#: - ensuring the registered hypothesis plugin path references a path which just
119#: sets AVAILABLE_PROVIDERS and does not import your package
120AVAILABLE_PROVIDERS: dict[str, str | type["PrimitiveProvider"]] = {
121 "hypothesis": "hypothesis.internal.conjecture.providers.HypothesisProvider",
122 "hypothesis-urandom": "hypothesis.internal.conjecture.providers.URandomProvider",
123}
124# cache the choice_permitted constants for a particular set of constraints.
125CacheKeyT: TypeAlias = tuple[ChoiceTypeT, tuple[Any, ...]]
126CacheValueT: TypeAlias = tuple[tuple["ConstantT", ...], tuple["ConstantT", ...]]
127CONSTANTS_CACHE: LRUCache[CacheKeyT, CacheValueT] = LRUCache(1024)
128
129_constant_floats = (
130 [
131 0.5,
132 1.1,
133 1.5,
134 1.9,
135 1.0 / 3,
136 10e6,
137 10e-6,
138 1.175494351e-38,
139 next_up(0.0),
140 float_info.min,
141 float_info.max,
142 3.402823466e38,
143 9007199254740992.0,
144 1 - 10e-6,
145 2 + 10e-6,
146 1.192092896e-07,
147 2.2204460492503131e-016,
148 ]
149 + [2.0**-n for n in (24, 14, 149, 126)] # minimum (sub)normals for float16,32
150 + [float_info.min / n for n in (2, 10, 1000, 100_000)] # subnormal in float64
151)
152_constant_floats.extend([-x for x in _constant_floats])
153assert all(isinstance(f, float) for f in _constant_floats)
154
155_constant_strings = {
156 # strings which can be interpreted as code / logic
157 "undefined",
158 "null",
159 "NULL",
160 "nil",
161 "NIL",
162 "true",
163 "false",
164 "True",
165 "False",
166 "TRUE",
167 "FALSE",
168 "None",
169 "none",
170 "if",
171 "then",
172 "else",
173 # strings which can be interpreted as a number
174 "0",
175 "1e100",
176 "0..0",
177 "0/0",
178 "1/0",
179 "+0.0",
180 "Infinity",
181 "-Infinity",
182 "Inf",
183 "INF",
184 "NaN",
185 "9" * 30,
186 # common ascii characters
187 ",./;'[]\\-=<>?:\"{}|_+!@#$%^&*()`~",
188 # common unicode characters
189 "Ω≈ç√∫˜µ≤≥÷åß∂ƒ©˙∆˚¬…æœ∑´®†¥¨ˆøπ“‘¡™£¢∞§¶•ªº–≠¸˛Ç◊ı˜Â¯˘¿ÅÍÎÏ˝ÓÔÒÚÆ☃Œ„´‰ˇÁ¨ˆØ∏”’`⁄€‹›fifl‡°·‚—±",
190 # characters which increase in length when lowercased
191 "Ⱥ",
192 "Ⱦ",
193 # ligatures
194 "æœÆŒffʤʨß"
195 # emoticons
196 "(╯°□°)╯︵ ┻━┻)",
197 # emojis
198 "😍",
199 "🇺🇸",
200 # emoji modifiers
201 "🏻" # U+1F3FB Light Skin Tone,
202 "👍🏻", # 👍 followed by U+1F3FB
203 # RTL text
204 "الكل في المجمو عة",
205 # Ogham text, which contains the only character in the Space Separators
206 # unicode category (Zs) that isn't visually blank: . # noqa: RUF003
207 "᚛ᚄᚓᚐᚋᚒᚄ ᚑᚄᚂᚑᚏᚅ᚜",
208 # readable variations on text (bolt/italic/script)
209 "𝐓𝐡𝐞 𝐪𝐮𝐢𝐜𝐤 𝐛𝐫𝐨𝐰𝐧 𝐟𝐨𝐱 𝐣𝐮𝐦𝐩𝐬 𝐨𝐯𝐞𝐫 𝐭𝐡𝐞 𝐥𝐚𝐳𝐲 𝐝𝐨𝐠",
210 "𝕿𝖍𝖊 𝖖𝖚𝖎𝖈𝖐 𝖇𝖗𝖔𝖜𝖓 𝖋𝖔𝖝 𝖏𝖚𝖒𝖕𝖘 𝖔𝖛𝖊𝖗 𝖙𝖍𝖊 𝖑𝖆𝖟𝖞 𝖉𝖔𝖌",
211 "𝑻𝒉𝒆 𝒒𝒖𝒊𝒄𝒌 𝒃𝒓𝒐𝒘𝒏 𝒇𝒐𝒙 𝒋𝒖𝒎𝒑𝒔 𝒐𝒗𝒆𝒓 𝒕𝒉𝒆 𝒍𝒂𝒛𝒚 𝒅𝒐𝒈",
212 "𝓣𝓱𝓮 𝓺𝓾𝓲𝓬𝓴 𝓫𝓻𝓸𝔀𝓷 𝓯𝓸𝔁 𝓳𝓾𝓶𝓹𝓼 𝓸𝓿𝓮𝓻 𝓽𝓱𝓮 𝓵𝓪𝔃𝔂 𝓭𝓸𝓰",
213 "𝕋𝕙𝕖 𝕢𝕦𝕚𝕔𝕜 𝕓𝕣𝕠𝕨𝕟 𝕗𝕠𝕩 𝕛𝕦𝕞𝕡𝕤 𝕠𝕧𝕖𝕣 𝕥𝕙𝕖 𝕝𝕒𝕫𝕪 𝕕𝕠𝕘",
214 # upsidown text
215 "ʇǝɯɐ ʇᴉs ɹolop ɯnsdᴉ ɯǝɹo˥",
216 # reserved strings in windows
217 "NUL",
218 "COM1",
219 "LPT1",
220 # scunthorpe problem
221 "Scunthorpe",
222 # zalgo text
223 "Ṱ̺̺̕o͞ ̷i̲̬͇̪͙n̝̗͕v̟̜̘̦͟o̶̙̰̠kè͚̮̺̪̹̱̤ ̖t̝͕̳̣̻̪͞h̼͓̲̦̳̘̲e͇̣̰̦̬͎ ̢̼̻̱̘h͚͎͙̜̣̲ͅi̦̲̣̰̤v̻͍e̺̭̳̪̰-m̢iͅn̖̺̞̲̯̰d̵̼̟͙̩̼̘̳ ̞̥̱̳̭r̛̗̘e͙p͠r̼̞̻̭̗e̺̠̣͟s̘͇̳͍̝͉e͉̥̯̞̲͚̬͜ǹ̬͎͎̟̖͇̤t͍̬̤͓̼̭͘ͅi̪̱n͠g̴͉ ͏͉ͅc̬̟h͡a̫̻̯͘o̫̟̖͍̙̝͉s̗̦̲.̨̹͈̣",
224 #
225 # examples from https://faultlore.com/blah/text-hates-you/
226 "मनीष منش",
227 "पन्ह पन्ह त्र र्च कृकृ ड्ड न्हृे إلا بسم الله",
228 "lorem لا بسم الله ipsum 你好1234你好",
229}
230
231
232# we don't actually care what order the constants are sorted in, just that the
233# ordering is deterministic.
234GLOBAL_CONSTANTS = Constants(
235 integers=SortedSet(),
236 floats=SortedSet(_constant_floats, key=float_to_int),
237 bytes=SortedSet(),
238 strings=SortedSet(_constant_strings),
239)
240
241_local_constants = Constants(
242 integers=SortedSet(),
243 floats=SortedSet(key=float_to_int),
244 bytes=SortedSet(),
245 strings=SortedSet(),
246)
247# modules that we've already seen and processed for local constants. These are
248# are all modules, not necessarily local ones. This lets us quickly see which
249# modules are new without an expensive path.resolve() or is_local_module_file
250# cache lookup.
251_seen_modules: set[ModuleType] = set()
252_sys_modules_len: int | None = None
253
254
255def _get_local_constants() -> Constants:
256 global _sys_modules_len, _local_constants
257
258 if sys.platform == "emscripten": # pragma: no cover
259 # pyodide builds bundle the stdlib in a nonstandard location, like
260 # `/lib/python312.zip/heapq.py`. To avoid identifying the entirety of
261 # the stdlib as local code and slowing down on emscripten, instead return
262 # that nothing is local.
263 #
264 # pyodide may provide some way to distinguish stdlib/third-party/local
265 # code. I haven't looked into it. If they do, we should correctly implement
266 # ModuleLocation for pyodide instead of this.
267 return _local_constants
268
269 count_constants = len(_local_constants)
270 # We call this function once per HypothesisProvider instance, i.e. once per
271 # input, so it needs to be performant. The logic here is more complicated
272 # than necessary because of this.
273 #
274 # First, we check whether there are any new modules with a very cheap length
275 # check. This check can be fooled if a module is added while another module is
276 # removed, but the more correct check against tuple(sys.modules.keys()) is
277 # substantially more expensive. Such a new module would eventually be discovered
278 # if / when the length changes again in the future.
279 #
280 # If the length has changed, we find just modules we haven't seen before. Of
281 # those, we find the ones which correspond to local modules, and extract their
282 # constants.
283
284 # careful: store sys.modules length when we first check to avoid race conditions
285 # with other threads loading a module before we set _sys_modules_len.
286 if (sys_modules_len := len(sys.modules)) != _sys_modules_len:
287 # set(_seen_modules) shouldn't typically be required, but I have run into
288 # a "set changed size during iteration" error here when running
289 # test_provider_conformance_crosshair.
290 new_modules = set(sys.modules.values()) - set(_seen_modules)
291 # Repeated SortedSet unions are expensive. Do the initial unions on a
292 # set(), then do a one-time union with _local_constants after.
293 new_constants = Constants()
294 for module in new_modules:
295 if (
296 module_file := getattr(module, "__file__", None)
297 ) is not None and is_local_module_file(module_file):
298 new_constants |= constants_from_module(module)
299 _local_constants |= new_constants
300 _seen_modules.update(new_modules)
301 _sys_modules_len = sys_modules_len
302
303 # if we add any new constant, invalidate the constant cache for permitted values.
304 # A more efficient approach would be invalidating just the keys with this
305 # choice_type.
306 if len(_local_constants) > count_constants:
307 CONSTANTS_CACHE.cache.clear()
308
309 return _local_constants
310
311
312@contextmanager
313def with_register_backend(name, provider_cls):
314 try:
315 AVAILABLE_PROVIDERS[name] = provider_cls
316 yield
317 finally:
318 del AVAILABLE_PROVIDERS[name]
319
320
321class _BackendInfoMsg(TypedDict):
322 type: InfoObservationType
323 title: str
324 content: str | dict[str, Any]
325
326
327# TODO_DOCS: link to choice sequence explanation page
328
329
330class PrimitiveProvider(abc.ABC):
331 """
332 |PrimitiveProvider| is the implementation interface of a
333 :ref:`Hypothesis backend <alternative-backends>`.
334
335 A |PrimitiveProvider| is required to implement the following five
336 ``draw_*`` methods:
337
338 * |PrimitiveProvider.draw_integer|
339 * |PrimitiveProvider.draw_boolean|
340 * |PrimitiveProvider.draw_float|
341 * |PrimitiveProvider.draw_string|
342 * |PrimitiveProvider.draw_bytes|
343
344 Each strategy in Hypothesis generates values by drawing a series of choices
345 from these five methods. By overriding them, a |PrimitiveProvider| can control
346 the distribution of inputs generated by Hypothesis.
347
348 For example, :pypi:`hypothesis-crosshair` implements a |PrimitiveProvider|
349 which uses an SMT solver to generate inputs that uncover new branches.
350
351 Once you implement a |PrimitiveProvider|, you can make it available for use
352 through |AVAILABLE_PROVIDERS|.
353 """
354
355 #: The lifetime of a |PrimitiveProvider| instance. Either ``test_function``
356 #: or ``test_case``.
357 #:
358 #: If ``test_function`` (the default), a single provider instance will be
359 #: instantiated and used for the entirety of each test function (i.e., roughly
360 #: one provider per |@given| annotation). This can be useful for tracking state
361 #: over the entirety of a test function.
362 #:
363 #: If ``test_case``, a new provider instance will be instantiated and used for
364 #: each input Hypothesis generates.
365 #:
366 #: The ``conjecturedata`` argument to ``PrimitiveProvider.__init__`` will
367 #: be ``None`` for a lifetime of ``test_function``, and an instance of
368 #: ``ConjectureData`` for a lifetime of ``test_case``.
369 #:
370 #: Third-party providers likely want to set a lifetime of ``test_function``.
371 lifetime: ClassVar[LifetimeT] = "test_function"
372
373 #: Solver-based backends such as ``hypothesis-crosshair`` use symbolic values
374 #: which record operations performed on them in order to discover new paths.
375 #: If ``avoid_realization`` is set to ``True``, hypothesis will avoid interacting
376 #: with symbolic choices returned by the provider in any way that would force
377 #: the solver to narrow the range of possible values for that symbolic.
378 #:
379 #: Setting this to ``True`` disables some hypothesis features and optimizations.
380 #: Only set this to ``True`` if it is necessary for your backend.
381 avoid_realization: ClassVar[bool] = False
382
383 #: If ``True``, |PrimitiveProvider.on_observation| will be added as a
384 #: callback via |add_observability_callback|, enabling observability during
385 # the lifetime of this provider. If ``False``, |PrimitiveProvider.on_observation|
386 #: will never be called by Hypothesis.
387 #:
388 #: The opt-in behavior of observability is because enabling observability
389 #: might increase runtime or memory usage.
390 add_observability_callback: ClassVar[bool] = False
391
392 def __init__(self, conjecturedata: Optional["ConjectureData"], /) -> None:
393 self._cd = conjecturedata
394
395 @abc.abstractmethod
396 def draw_boolean(
397 self,
398 p: float = 0.5,
399 ) -> bool:
400 """
401 Draw a boolean choice.
402
403 Parameters
404 ----------
405 p: float
406 The probability of returning ``True``. Between 0 and 1 inclusive.
407
408 Except for ``0`` and ``1``, the value of ``p`` is a hint provided by
409 Hypothesis, and may be ignored by the backend.
410
411 If ``0``, the provider must return ``False``. If ``1``, the provider
412 must return ``True``.
413 """
414 raise NotImplementedError
415
416 @abc.abstractmethod
417 def draw_integer(
418 self,
419 min_value: int | None = None,
420 max_value: int | None = None,
421 *,
422 weights: dict[int, float] | None = None,
423 shrink_towards: int = 0,
424 ) -> int:
425 """
426 Draw an integer choice.
427
428 Parameters
429 ----------
430 min_value : int | None
431 (Inclusive) lower bound on the integer value. If ``None``, there is
432 no lower bound.
433 max_value : int | None
434 (Inclusive) upper bound on the integer value. If ``None``, there is
435 no upper bound.
436 weights: dict[int, float] | None
437 Maps keys in the range [``min_value``, ``max_value``] to the probability
438 of returning that key.
439 shrink_towards: int
440 The integer to shrink towards. This is not used during generation and
441 can be ignored by backends.
442 """
443 raise NotImplementedError
444
445 @abc.abstractmethod
446 def draw_float(
447 self,
448 *,
449 min_value: float = -math.inf,
450 max_value: float = math.inf,
451 allow_nan: bool = True,
452 smallest_nonzero_magnitude: float,
453 ) -> float:
454 """
455 Draw a float choice.
456
457 Parameters
458 ----------
459 min_value : float
460 (Inclusive) lower bound on the float value.
461 max_value : float
462 (Inclusive) upper bound on the float value.
463 allow_nan : bool
464 If ``False``, it is invalid to return ``math.nan``.
465 smallest_nonzero_magnitude : float
466 The smallest allowed nonzero magnitude. ``draw_float`` should not
467 return a float ``f`` if ``abs(f) < smallest_nonzero_magnitude``.
468 """
469 raise NotImplementedError
470
471 @abc.abstractmethod
472 def draw_string(
473 self,
474 intervals: IntervalSet,
475 *,
476 min_size: int = 0,
477 max_size: int = COLLECTION_DEFAULT_MAX_SIZE,
478 ) -> str:
479 """
480 Draw a string choice.
481
482 Parameters
483 ----------
484 intervals : IntervalSet
485 The set of codepoints to sample from.
486 min_size : int
487 (Inclusive) lower bound on the string length.
488 max_size : int
489 (Inclusive) upper bound on the string length.
490 """
491 raise NotImplementedError
492
493 @abc.abstractmethod
494 def draw_bytes(
495 self,
496 min_size: int = 0,
497 max_size: int = COLLECTION_DEFAULT_MAX_SIZE,
498 ) -> bytes:
499 """
500 Draw a bytes choice.
501
502 Parameters
503 ----------
504 min_size : int
505 (Inclusive) lower bound on the bytes length.
506 max_size : int
507 (Inclusive) upper bound on the bytes length.
508 """
509 raise NotImplementedError
510
511 def per_test_case_context_manager(self) -> AbstractContextManager:
512 """
513 Returns a context manager which will be entered each time Hypothesis
514 starts generating and executing one test case, and exited when that test
515 case finishes generating and executing, including if any exception is
516 thrown.
517
518 In the lifecycle of a Hypothesis test, this is called before
519 generating strategy values for each test case. This is just before any
520 :ref:`custom executor <custom-function-execution>` is called.
521
522 Even if not returning a custom context manager, |PrimitiveProvider|
523 subclasses are welcome to override this method to know when Hypothesis
524 starts and ends the execution of a single test case.
525 """
526 return contextlib.nullcontext()
527
528 def realize(self, value: T, *, for_failure: bool = False) -> T:
529 """
530 Called whenever hypothesis requires a concrete (non-symbolic) value from
531 a potentially symbolic value. Hypothesis will not check that ``value`` is
532 symbolic before calling ``realize``, so you should handle the case where
533 ``value`` is non-symbolic.
534
535 The returned value should be non-symbolic. If you cannot provide a value,
536 raise |BackendCannotProceed| with a value of ``"discard_test_case"``.
537
538 If ``for_failure`` is ``True``, the value is associated with a failing example.
539 In this case, the backend should spend substantially more effort when
540 attempting to realize the value, since it is important to avoid discarding
541 failing examples. Backends may still raise |BackendCannotProceed| when
542 ``for_failure`` is ``True``, if realization is truly impossible or if
543 realization takes significantly longer than expected (say, 5 minutes).
544 """
545 return value
546
547 def replay_choices(self, choices: tuple[ChoiceT, ...]) -> None:
548 """
549 Called when Hypothesis has discovered a choice sequence which the provider
550 may wish to enqueue to replay under its own instrumentation when we next
551 ask to generate a test case, rather than generating one from scratch.
552
553 This is used to e.g. warm-start :pypi:`hypothesis-crosshair` with a corpus
554 of high-code-coverage inputs discovered by
555 `HypoFuzz <https://hypofuzz.com/>`_.
556 """
557 return None
558
559 def observe_test_case(self) -> dict[str, Any]:
560 """Called at the end of the test case when :ref:`observability
561 <observability>` is enabled.
562
563 The return value should be a non-symbolic json-encodable dictionary,
564 and will be included in observations as ``observation["metadata"]["backend"]``.
565 """
566 return {}
567
568 def observe_information_messages(
569 self, *, lifetime: LifetimeT
570 ) -> Iterable[_BackendInfoMsg]:
571 """Called at the end of each test case and again at end of the test function.
572
573 Return an iterable of ``{type: info/alert/error, title: str, content: str | dict}``
574 dictionaries to be delivered as individual information messages. Hypothesis
575 adds the ``run_start`` timestamp and ``property`` name for you.
576 """
577 assert lifetime in ("test_case", "test_function")
578 yield from []
579
580 def on_observation(self, observation: TestCaseObservation) -> None: # noqa: B027
581 """
582 Called at the end of each test case which uses this provider, with the same
583 ``observation["type"] == "test_case"`` observation that is passed to
584 other callbacks added via |add_observability_callback|. This method is not
585 called with ``observation["type"] in {"info", "alert", "error"}``
586 observations.
587
588 .. important::
589
590 For |PrimitiveProvider.on_observation| to be called by Hypothesis,
591 |PrimitiveProvider.add_observability_callback| must be set to ``True``.
592
593 |PrimitiveProvider.on_observation| is explicitly opt-in, as enabling
594 observability might increase runtime or memory usage.
595
596 Calls to this method are guaranteed to alternate with calls to
597 |PrimitiveProvider.per_test_case_context_manager|. For example:
598
599 .. code-block:: python
600
601 # test function starts
602 per_test_case_context_manager()
603 on_observation()
604 per_test_case_context_manager()
605 on_observation()
606 ...
607 # test function ends
608
609 Note that |PrimitiveProvider.on_observation| will not be called for test
610 cases which did not use this provider during generation, for example
611 during |Phase.reuse| or |Phase.shrink|, or because Hypothesis switched
612 to the standard Hypothesis backend after this backend raised too many
613 |BackendCannotProceed| exceptions.
614 """
615
616 def span_start(self, label: int, /) -> None: # noqa: B027 # non-abstract noop
617 """Marks the beginning of a semantically meaningful span of choices.
618
619 Spans are a depth-first tree structure. A span is opened by a call to
620 |PrimitiveProvider.span_start|, and a call to |PrimitiveProvider.span_end|
621 closes the most recently opened span. So the following sequence of calls:
622
623 .. code-block:: python
624
625 span_start(label=1)
626 n1 = draw_integer()
627 span_start(label=2)
628 b1 = draw_boolean()
629 n2 = draw_integer()
630 span_end()
631 f1 = draw_float()
632 span_end()
633
634 produces the following two spans of choices:
635
636 .. code-block::
637
638 1: [n1, b1, n2, f1]
639 2: [b1, n2]
640
641 Hypothesis uses spans to denote "semantically meaningful" sequences of
642 choices. For instance, Hypothesis opens a span for the sequence of choices
643 made while drawing from each strategy. Not every span corresponds to a
644 strategy; the generation of e.g. each element in |st.lists| is also marked
645 with a span, among others.
646
647 ``label`` is an opaque integer, which has no defined semantics.
648 The only guarantee made by Hypothesis is that all spans with the same
649 "meaning" will share the same ``label``. So all spans from the same
650 strategy will share the same label, as will e.g. the spans for |st.lists|
651 elements.
652
653 Providers can track calls to |PrimitiveProvider.span_start| and
654 |PrimitiveProvider.span_end| to learn something about the semantics of
655 the test's choice sequence. For instance, a provider could track the depth
656 of the span tree, or the number of unique labels, which says something about
657 the complexity of the choices being generated. Or a provider could track
658 the span tree across test cases in order to determine what strategies are
659 being used in what contexts.
660
661 It is possible for Hypothesis to start and immediately stop a span,
662 without calling a ``draw_*`` method in between. These spans contain zero
663 choices.
664
665 Hypothesis will always balance the number of calls to
666 |PrimitiveProvider.span_start| and |PrimitiveProvider.span_end|. A call
667 to |PrimitiveProvider.span_start| will always be followed by a call to
668 |PrimitiveProvider.span_end| before the end of the test case.
669
670 |PrimitiveProvider.span_start| is called from ``ConjectureData.start_span()``
671 internally.
672 """
673
674 def span_end(self, discard: bool, /) -> None: # noqa: B027
675 """Marks the end of a semantically meaningful span of choices.
676
677 ``discard`` is ``True`` when the draw was filtered out or otherwise marked
678 as unlikely to contribute to the input data as seen by the user's test.
679 Note however that side effects can make this determination unsound.
680
681 |PrimitiveProvider.span_end| is called from ``ConjectureData.stop_span()``
682 internally.
683 """
684
685
686class HypothesisProvider(PrimitiveProvider):
687 lifetime = "test_case"
688
689 def __init__(self, conjecturedata: Optional["ConjectureData"], /):
690 super().__init__(conjecturedata)
691 self._random = None if self._cd is None else self._cd._random
692
693 @cached_property
694 def _local_constants(self):
695 # defer computation of local constants until/if we need it
696 return _get_local_constants()
697
698 def _maybe_draw_constant(
699 self,
700 choice_type: ChoiceTypeT,
701 constraints: ChoiceConstraintsT,
702 *,
703 p: float = 0.05,
704 ) -> Optional["ConstantT"]:
705 assert self._random is not None
706 assert choice_type != "boolean"
707 # check whether we even want a constant before spending time computing
708 # and caching the allowed constants.
709 if self._random.random() > p:
710 return None
711
712 # note: this property access results in computation being done
713 assert self._local_constants is not None
714
715 key = (choice_type, choice_constraints_key(choice_type, constraints))
716 if key not in CONSTANTS_CACHE:
717 CONSTANTS_CACHE[key] = (
718 tuple(
719 choice
720 for choice in GLOBAL_CONSTANTS.set_for_type(choice_type)
721 if choice_permitted(choice, constraints)
722 ),
723 tuple(
724 choice
725 for choice in self._local_constants.set_for_type(choice_type)
726 if choice_permitted(choice, constraints)
727 ),
728 )
729
730 # split constants into two pools, so we still have a good chance to draw
731 # global constants even if there are many local constants.
732 (global_constants, local_constants) = CONSTANTS_CACHE[key]
733 constants_lists = ([global_constants] if global_constants else []) + (
734 [local_constants] if local_constants else []
735 )
736 if not constants_lists:
737 return None
738
739 # At this point, we've decided to use a constant. Now we select which pool
740 # to draw that constant from.
741 #
742 # Note that this approach has a different probability distribution than
743 # attempting a random.random for both global_constants and local_constants.
744 constants = self._random.choice(constants_lists)
745 return self._random.choice(constants)
746
747 def draw_boolean(
748 self,
749 p: float = 0.5,
750 ) -> bool:
751 assert self._random is not None
752
753 if p <= 0:
754 return False
755 if p >= 1:
756 return True
757
758 return self._random.random() < p
759
760 def draw_integer(
761 self,
762 min_value: int | None = None,
763 max_value: int | None = None,
764 *,
765 weights: dict[int, float] | None = None,
766 shrink_towards: int = 0,
767 ) -> int:
768 assert self._cd is not None
769 if (
770 constant := self._maybe_draw_constant(
771 "integer",
772 {
773 "min_value": min_value,
774 "max_value": max_value,
775 "weights": weights,
776 "shrink_towards": shrink_towards,
777 },
778 )
779 ) is not None:
780 assert isinstance(constant, int)
781 return constant
782
783 center = 0
784 if min_value is not None:
785 center = max(min_value, center)
786 if max_value is not None:
787 center = min(max_value, center)
788
789 if weights is not None:
790 assert min_value is not None
791 assert max_value is not None
792
793 # format of weights is a mapping of ints to p, where sum(p) < 1.
794 # The remaining probability mass is uniformly distributed over
795 # *all* ints (not just the unmapped ones; this is somewhat undesirable,
796 # but simplifies things).
797 #
798 # We assert that sum(p) is strictly less than 1 because it simplifies
799 # handling forced values when we can force into the unmapped probability
800 # mass. We should eventually remove this restriction.
801 sampler = Sampler(
802 [1 - sum(weights.values()), *weights.values()], observe=False
803 )
804 # if we're forcing, it's easiest to force into the unmapped probability
805 # mass and then force the drawn value after.
806 idx = sampler.sample(self._cd)
807
808 if idx == 0:
809 return self._draw_bounded_integer(min_value, max_value)
810 # implicit reliance on dicts being sorted for determinism
811 return list(weights)[idx - 1]
812
813 if min_value is None and max_value is None:
814 return self._draw_unbounded_integer()
815
816 if min_value is None:
817 assert max_value is not None
818 probe = max_value + 1
819 while max_value < probe:
820 probe = center + self._draw_unbounded_integer()
821 return probe
822
823 if max_value is None:
824 assert min_value is not None
825 probe = min_value - 1
826 while probe < min_value:
827 probe = center + self._draw_unbounded_integer()
828 return probe
829
830 return self._draw_bounded_integer(min_value, max_value)
831
832 def draw_float(
833 self,
834 *,
835 min_value: float = -math.inf,
836 max_value: float = math.inf,
837 allow_nan: bool = True,
838 smallest_nonzero_magnitude: float,
839 ) -> float:
840 assert self._random is not None
841
842 constraints: FloatConstraints = {
843 "min_value": min_value,
844 "max_value": max_value,
845 "allow_nan": allow_nan,
846 "smallest_nonzero_magnitude": smallest_nonzero_magnitude,
847 }
848 if (
849 constant := self._maybe_draw_constant("float", constraints, p=0.15)
850 ) is not None:
851 assert isinstance(constant, float)
852 return constant
853
854 # on top of the probability to draw a constant float, we independently
855 # upweight 0.0/-0.0, math.inf, -math.inf, nans, and boundary values.
856 weird_floats = [
857 f
858 for f in [
859 0.0,
860 -0.0,
861 math.inf,
862 -math.inf,
863 math.nan,
864 -math.nan,
865 SIGNALING_NAN,
866 -SIGNALING_NAN,
867 min_value,
868 next_up(min_value),
869 min_value + 1,
870 max_value - 1,
871 next_down(max_value),
872 max_value,
873 ]
874 if choice_permitted(f, constraints)
875 ]
876
877 if weird_floats and self._random.random() < 0.05:
878 return self._random.choice(weird_floats)
879
880 clamper = make_float_clamper(
881 min_value,
882 max_value,
883 smallest_nonzero_magnitude=smallest_nonzero_magnitude,
884 allow_nan=allow_nan,
885 )
886
887 result = self._draw_float()
888 if allow_nan and math.isnan(result):
889 clamped = result # pragma: no cover
890 else:
891 clamped = clamper(result)
892 if float_to_int(clamped) != float_to_int(result) and not (
893 math.isnan(result) and allow_nan
894 ):
895 result = clamped
896 return result
897
898 def draw_string(
899 self,
900 intervals: IntervalSet,
901 *,
902 min_size: int = 0,
903 max_size: int = COLLECTION_DEFAULT_MAX_SIZE,
904 ) -> str:
905 assert self._cd is not None
906 assert self._random is not None
907
908 if len(intervals) == 0:
909 return ""
910
911 if (
912 constant := self._maybe_draw_constant(
913 "string",
914 {"intervals": intervals, "min_size": min_size, "max_size": max_size},
915 )
916 ) is not None:
917 assert isinstance(constant, str)
918 return constant
919
920 average_size = min(
921 max(min_size * 2, min_size + 5),
922 0.5 * (min_size + max_size),
923 )
924
925 chars = []
926 elements = many(
927 self._cd,
928 min_size=min_size,
929 max_size=max_size,
930 average_size=average_size,
931 observe=False,
932 )
933 while elements.more():
934 if len(intervals) > 256:
935 if self.draw_boolean(0.2):
936 i = self._random.randint(256, len(intervals) - 1)
937 else:
938 i = self._random.randint(0, 255)
939 else:
940 i = self._random.randint(0, len(intervals) - 1)
941
942 chars.append(intervals.char_in_shrink_order(i))
943
944 return "".join(chars)
945
946 def draw_bytes(
947 self,
948 min_size: int = 0,
949 max_size: int = COLLECTION_DEFAULT_MAX_SIZE,
950 ) -> bytes:
951 assert self._cd is not None
952 assert self._random is not None
953
954 if (
955 constant := self._maybe_draw_constant(
956 "bytes", {"min_size": min_size, "max_size": max_size}
957 )
958 ) is not None:
959 assert isinstance(constant, bytes)
960 return constant
961
962 buf = bytearray()
963 average_size = min(
964 max(min_size * 2, min_size + 5),
965 0.5 * (min_size + max_size),
966 )
967 elements = many(
968 self._cd,
969 min_size=min_size,
970 max_size=max_size,
971 average_size=average_size,
972 observe=False,
973 )
974 while elements.more():
975 buf += self._random.randbytes(1)
976
977 return bytes(buf)
978
979 def _draw_float(self) -> float:
980 assert self._random is not None
981
982 f = lex_to_float(self._random.getrandbits(64))
983 sign = 1 if self._random.getrandbits(1) else -1
984 return sign * f
985
986 def _draw_unbounded_integer(self) -> int:
987 assert self._cd is not None
988 assert self._random is not None
989
990 size = INT_SIZES[INT_SIZES_SAMPLER.sample(self._cd)]
991
992 r = self._random.getrandbits(size)
993 sign = r & 1
994 r >>= 1
995 if sign:
996 r = -r
997 return r
998
999 def _draw_bounded_integer(
1000 self,
1001 lower: int,
1002 upper: int,
1003 *,
1004 vary_size: bool = True,
1005 ) -> int:
1006 assert lower <= upper
1007 assert self._cd is not None
1008 assert self._random is not None
1009
1010 if lower == upper:
1011 return lower
1012
1013 bits = (upper - lower).bit_length()
1014 if bits > 24 and vary_size and self._random.random() < 7 / 8:
1015 # For large ranges, we combine the uniform random distribution
1016 # with a weighting scheme with moderate chance. Cutoff at 2 ** 24 so that our
1017 # choice of unicode characters is uniform but the 32bit distribution is not.
1018 idx = INT_SIZES_SAMPLER.sample(self._cd)
1019 cap_bits = min(bits, INT_SIZES[idx])
1020 upper = min(upper, lower + 2**cap_bits - 1)
1021 return self._random.randint(lower, upper)
1022
1023 return self._random.randint(lower, upper)
1024
1025
1026# Masks for masking off the first byte of an n-bit buffer.
1027# The appropriate mask is stored at position n % 8.
1028BYTE_MASKS = [(1 << n) - 1 for n in range(8)]
1029BYTE_MASKS[0] = 255
1030
1031
1032class BytestringProvider(PrimitiveProvider):
1033 lifetime = "test_case"
1034
1035 def __init__(
1036 self, conjecturedata: Optional["ConjectureData"], /, *, bytestring: bytes
1037 ):
1038 super().__init__(conjecturedata)
1039 self.bytestring = bytestring
1040 self.index = 0
1041 self.drawn = bytearray()
1042
1043 def _draw_bits(self, n):
1044 if n == 0: # pragma: no cover
1045 return 0
1046 n_bytes = bits_to_bytes(n)
1047 if self.index + n_bytes > len(self.bytestring):
1048 self._cd.mark_overrun()
1049 buf = bytearray(self.bytestring[self.index : self.index + n_bytes])
1050 self.index += n_bytes
1051
1052 buf[0] &= BYTE_MASKS[n % 8]
1053 buf = bytes(buf)
1054 self.drawn += buf
1055 return int_from_bytes(buf)
1056
1057 def draw_boolean(
1058 self,
1059 p: float = 0.5,
1060 ) -> bool:
1061 if p <= 0:
1062 return False
1063 if p >= 1:
1064 return True
1065
1066 # always use one byte for booleans to maintain constant draw size.
1067 # If a probability requires more than 8 bits to represent precisely,
1068 # the result will be slightly biased, but not badly.
1069 bits = 8
1070 size = 2**bits
1071 # always leave at least one value that can be true, even for very small
1072 # p.
1073 falsey = max(1, math.floor(size * (1 - p)))
1074 n = self._draw_bits(bits)
1075 return n >= falsey
1076
1077 def draw_integer(
1078 self,
1079 min_value: int | None = None,
1080 max_value: int | None = None,
1081 *,
1082 weights: dict[int, float] | None = None,
1083 shrink_towards: int = 0,
1084 ) -> int:
1085 assert self._cd is not None
1086
1087 # we explicitly ignore integer weights for now, as they are likely net
1088 # negative on fuzzer performance.
1089
1090 if min_value is None and max_value is None:
1091 min_value = -(2**127)
1092 max_value = 2**127 - 1
1093 elif min_value is None:
1094 assert max_value is not None
1095 min_value = max_value - 2**64
1096 elif max_value is None:
1097 assert min_value is not None
1098 max_value = min_value + 2**64
1099
1100 if min_value == max_value:
1101 return min_value
1102
1103 bits = (max_value - min_value).bit_length()
1104 value = self._draw_bits(bits)
1105 while not (min_value <= value <= max_value):
1106 value = self._draw_bits(bits)
1107 return value
1108
1109 def draw_float(
1110 self,
1111 *,
1112 min_value: float = -math.inf,
1113 max_value: float = math.inf,
1114 allow_nan: bool = True,
1115 smallest_nonzero_magnitude: float,
1116 ) -> float:
1117 n = self._draw_bits(64)
1118 sign = -1 if n >> 64 else 1
1119 f = sign * lex_to_float(n & ((1 << 64) - 1))
1120 clamper = make_float_clamper(
1121 min_value,
1122 max_value,
1123 smallest_nonzero_magnitude=smallest_nonzero_magnitude,
1124 allow_nan=allow_nan,
1125 )
1126 return clamper(f)
1127
1128 def _draw_collection(self, min_size, max_size, *, alphabet_size):
1129 average_size = min(
1130 max(min_size * 2, min_size + 5),
1131 0.5 * (min_size + max_size),
1132 )
1133 elements = many(
1134 self._cd,
1135 min_size=min_size,
1136 max_size=max_size,
1137 average_size=average_size,
1138 observe=False,
1139 )
1140 values = []
1141 while elements.more():
1142 values.append(self.draw_integer(0, alphabet_size - 1))
1143 return values
1144
1145 def draw_string(
1146 self,
1147 intervals: IntervalSet,
1148 *,
1149 min_size: int = 0,
1150 max_size: int = COLLECTION_DEFAULT_MAX_SIZE,
1151 ) -> str:
1152 values = self._draw_collection(min_size, max_size, alphabet_size=len(intervals))
1153 return "".join(chr(intervals[v]) for v in values)
1154
1155 def draw_bytes(
1156 self,
1157 min_size: int = 0,
1158 max_size: int = COLLECTION_DEFAULT_MAX_SIZE,
1159 ) -> bytes:
1160 values = self._draw_collection(min_size, max_size, alphabet_size=2**8)
1161 return bytes(values)
1162
1163
1164class URandom(Random):
1165 # we reimplement a Random instance instead of using SystemRandom, because
1166 # os.urandom is not guaranteed to read from /dev/urandom.
1167
1168 @staticmethod
1169 def _urandom(size: int) -> bytes:
1170 with open("/dev/urandom", "rb") as f:
1171 return f.read(size)
1172
1173 def getrandbits(self, k: int) -> int:
1174 assert k >= 0
1175 size = bits_to_bytes(k)
1176 n = int_from_bytes(self._urandom(size))
1177 # trim excess bits
1178 return n >> (size * 8 - k)
1179
1180 def random(self) -> float:
1181 # adapted from random.SystemRandom.random
1182 return (int_from_bytes(self._urandom(7)) >> 3) * (2**-53)
1183
1184
1185class URandomProvider(HypothesisProvider):
1186 # A provider which reads directly from /dev/urandom as its source of randomness.
1187 # This provider exists to provide better Hypothesis integration with Antithesis
1188 # (https://antithesis.com/), which interprets calls to /dev/urandom as the
1189 # randomness to mutate. This effectively gives Antithesis control over
1190 # the choices made by the URandomProvider.
1191 #
1192 # If you are not using Antithesis, you probably don't want to use this
1193 # provider.
1194
1195 def __init__(self, conjecturedata: Optional["ConjectureData"], /):
1196 super().__init__(conjecturedata)
1197 if WINDOWS: # pragma: no cover
1198 warnings.warn(
1199 "/dev/urandom is not available on windows. Falling back to "
1200 'standard PRNG generation (equivalent to backend="hypothesis").',
1201 HypothesisWarning,
1202 stacklevel=1,
1203 )
1204 # don't overwrite the HypothesisProvider self._random attribute in
1205 # this case
1206 else:
1207 self._random = URandom()