1# This file is part of Hypothesis, which may be found at
2# https://github.com/HypothesisWorks/hypothesis/
3#
4# Copyright the Hypothesis Authors.
5# Individual contributors are listed in AUTHORS.rst and the git log.
6#
7# This Source Code Form is subject to the terms of the Mozilla Public License,
8# v. 2.0. If a copy of the MPL was not distributed with this file, You can
9# obtain one at https://mozilla.org/MPL/2.0/.
10
11"""This file can approximately be considered the collection of hypothesis going
12to really unreasonable lengths to produce pretty output."""
13
14import ast
15import hashlib
16import inspect
17import linecache
18import os
19import re
20import sys
21import textwrap
22import types
23import warnings
24from collections.abc import MutableMapping, Sequence
25from functools import partial, wraps
26from inspect import Parameter, Signature
27from io import StringIO
28from keyword import iskeyword
29from random import _inst as global_random_instance
30from tokenize import COMMENT, detect_encoding, generate_tokens, untokenize
31from types import ModuleType
32from typing import Any, Callable, Optional, TypeVar
33from unittest.mock import _patch as PatchType
34from weakref import WeakKeyDictionary
35
36from hypothesis.errors import HypothesisWarning
37from hypothesis.internal.compat import is_typed_named_tuple
38from hypothesis.utils.conventions import not_set
39from hypothesis.vendor.pretty import pretty
40
41T = TypeVar("T")
42
43READTHEDOCS = os.environ.get("READTHEDOCS", None) == "True"
44LAMBDA_SOURCE_CACHE: MutableMapping[Callable, str] = WeakKeyDictionary()
45
46
47def is_mock(obj: object) -> bool:
48 """Determine if the given argument is a mock type."""
49
50 # We want to be able to detect these when dealing with various test
51 # args. As they are sneaky and can look like almost anything else,
52 # we'll check this by looking for an attribute with a name that it's really
53 # unlikely to implement accidentally, and that anyone who implements it
54 # deliberately should know what they're doing. This is more robust than
55 # looking for types.
56 return hasattr(obj, "hypothesis_internal_is_this_a_mock_check")
57
58
59def _clean_source(src: str) -> bytes:
60 """Return the source code as bytes, without decorators or comments.
61
62 Because this is part of our database key, we reduce the cache invalidation
63 rate by ignoring decorators, comments, trailing whitespace, and empty lines.
64 We can't just use the (dumped) AST directly because it changes between Python
65 versions (e.g. ast.Constant)
66 """
67 # Get the (one-indexed) line number of the function definition, and drop preceding
68 # lines - i.e. any decorators, so that adding `@example()`s keeps the same key.
69 try:
70 funcdef = ast.parse(src).body[0]
71 src = "".join(src.splitlines(keepends=True)[funcdef.lineno - 1 :])
72 except Exception:
73 pass
74 # Remove blank lines and use the tokenize module to strip out comments,
75 # so that those can be changed without changing the database key.
76 try:
77 src = untokenize(
78 t for t in generate_tokens(StringIO(src).readline) if t.type != COMMENT
79 )
80 except Exception:
81 pass
82 # Finally, remove any trailing whitespace and empty lines as a last cleanup.
83 return "\n".join(x.rstrip() for x in src.splitlines() if x.rstrip()).encode()
84
85
86def function_digest(function: Any) -> bytes:
87 """Returns a string that is stable across multiple invocations across
88 multiple processes and is prone to changing significantly in response to
89 minor changes to the function.
90
91 No guarantee of uniqueness though it usually will be. Digest collisions
92 lead to unfortunate but not fatal problems during database replay.
93 """
94 hasher = hashlib.sha384()
95 try:
96 src = inspect.getsource(function)
97 except (OSError, TypeError):
98 # If we can't actually get the source code, try for the name as a fallback.
99 # NOTE: We might want to change this to always adding function.__qualname__,
100 # to differentiate f.x. two classes having the same function implementation
101 # with class-dependent behaviour.
102 try:
103 hasher.update(function.__name__.encode())
104 except AttributeError:
105 pass
106 else:
107 hasher.update(_clean_source(src))
108 try:
109 # This is additional to the source code because it can include the effects
110 # of decorators, or of post-hoc assignment to the .__signature__ attribute.
111 hasher.update(repr(get_signature(function)).encode())
112 except Exception:
113 pass
114 try:
115 # We set this in order to distinguish e.g. @pytest.mark.parametrize cases.
116 hasher.update(function._hypothesis_internal_add_digest)
117 except AttributeError:
118 pass
119 return hasher.digest()
120
121
122def check_signature(sig: Signature) -> None:
123 # Backport from Python 3.11; see https://github.com/python/cpython/pull/92065
124 for p in sig.parameters.values():
125 if iskeyword(p.name) and p.kind is not p.POSITIONAL_ONLY:
126 raise ValueError(
127 f"Signature {sig!r} contains a parameter named {p.name!r}, "
128 f"but this is a SyntaxError because `{p.name}` is a keyword. "
129 "You, or a library you use, must have manually created an "
130 "invalid signature - this will be an error in Python 3.11+"
131 )
132
133
134def get_signature(
135 target: Any, *, follow_wrapped: bool = True, eval_str: bool = False
136) -> Signature:
137 # Special case for use of `@unittest.mock.patch` decorator, mimicking the
138 # behaviour of getfullargspec instead of reporting unusable arguments.
139 patches = getattr(target, "patchings", None)
140 if isinstance(patches, list) and all(isinstance(p, PatchType) for p in patches):
141 return Signature(
142 [
143 Parameter("args", Parameter.VAR_POSITIONAL),
144 Parameter("keywargs", Parameter.VAR_KEYWORD),
145 ]
146 )
147
148 if isinstance(getattr(target, "__signature__", None), Signature):
149 # This special case covers unusual codegen like Pydantic models
150 sig = target.__signature__
151 check_signature(sig)
152 # And *this* much more complicated block ignores the `self` argument
153 # if that's been (incorrectly) included in the custom signature.
154 if sig.parameters and (inspect.isclass(target) or inspect.ismethod(target)):
155 selfy = next(iter(sig.parameters.values()))
156 if (
157 selfy.name == "self"
158 and selfy.default is Parameter.empty
159 and selfy.kind.name.startswith("POSITIONAL_")
160 ):
161 return sig.replace(
162 parameters=[v for k, v in sig.parameters.items() if k != "self"]
163 )
164 return sig
165 # eval_str is only supported by Python 3.10 and newer
166 if sys.version_info[:2] >= (3, 10):
167 sig = inspect.signature(
168 target, follow_wrapped=follow_wrapped, eval_str=eval_str
169 )
170 else:
171 sig = inspect.signature(
172 target, follow_wrapped=follow_wrapped
173 ) # pragma: no cover
174 check_signature(sig)
175 return sig
176
177
178def arg_is_required(param: Parameter) -> bool:
179 return param.default is Parameter.empty and param.kind in (
180 Parameter.POSITIONAL_OR_KEYWORD,
181 Parameter.KEYWORD_ONLY,
182 )
183
184
185def required_args(target, args=(), kwargs=()):
186 """Return a set of names of required args to target that were not supplied
187 in args or kwargs.
188
189 This is used in builds() to determine which arguments to attempt to
190 fill from type hints. target may be any callable (including classes
191 and bound methods). args and kwargs should be as they are passed to
192 builds() - that is, a tuple of values and a dict of names: values.
193 """
194 # We start with a workaround for NamedTuples, which don't have nice inits
195 if inspect.isclass(target) and is_typed_named_tuple(target):
196 provided = set(kwargs) | set(target._fields[: len(args)])
197 return set(target._fields) - provided
198 # Then we try to do the right thing with inspect.signature
199 try:
200 sig = get_signature(target)
201 except (ValueError, TypeError):
202 return set()
203 return {
204 name
205 for name, param in list(sig.parameters.items())[len(args) :]
206 if arg_is_required(param) and name not in kwargs
207 }
208
209
210def convert_keyword_arguments(
211 function: Any, args: Sequence[object], kwargs: dict[str, object]
212) -> tuple[tuple[object, ...], dict[str, object]]:
213 """Returns a pair of a tuple and a dictionary which would be equivalent
214 passed as positional and keyword args to the function. Unless function has
215 kwonlyargs or **kwargs the dictionary will always be empty.
216 """
217 sig = inspect.signature(function, follow_wrapped=False)
218 bound = sig.bind(*args, **kwargs)
219 return bound.args, bound.kwargs
220
221
222def convert_positional_arguments(
223 function: Any, args: Sequence[object], kwargs: dict[str, object]
224) -> tuple[tuple[object, ...], dict[str, object]]:
225 """Return a tuple (new_args, new_kwargs) where all possible arguments have
226 been moved to kwargs.
227
228 new_args will only be non-empty if function has pos-only args or *args.
229 """
230 sig = inspect.signature(function, follow_wrapped=False)
231 bound = sig.bind(*args, **kwargs)
232 new_args = []
233 new_kwargs = dict(bound.arguments)
234 for p in sig.parameters.values():
235 if p.name in new_kwargs:
236 if p.kind is p.POSITIONAL_ONLY:
237 new_args.append(new_kwargs.pop(p.name))
238 elif p.kind is p.VAR_POSITIONAL:
239 new_args.extend(new_kwargs.pop(p.name))
240 elif p.kind is p.VAR_KEYWORD:
241 assert set(new_kwargs[p.name]).isdisjoint(set(new_kwargs) - {p.name})
242 new_kwargs.update(new_kwargs.pop(p.name))
243 return tuple(new_args), new_kwargs
244
245
246def ast_arguments_matches_signature(args: ast.arguments, sig: Signature) -> bool:
247 expected: list[tuple[str, int]] = []
248 for node in args.posonlyargs:
249 expected.append((node.arg, Parameter.POSITIONAL_ONLY))
250 for node in args.args:
251 expected.append((node.arg, Parameter.POSITIONAL_OR_KEYWORD))
252 if args.vararg is not None:
253 expected.append((args.vararg.arg, Parameter.VAR_POSITIONAL))
254 for node in args.kwonlyargs:
255 expected.append((node.arg, Parameter.KEYWORD_ONLY))
256 if args.kwarg is not None:
257 expected.append((args.kwarg.arg, Parameter.VAR_KEYWORD))
258 return expected == [(p.name, p.kind) for p in sig.parameters.values()]
259
260
261def is_first_param_referenced_in_function(f: Any) -> bool:
262 """Is the given name referenced within f?"""
263 try:
264 tree = ast.parse(textwrap.dedent(inspect.getsource(f)))
265 except Exception:
266 return True # Assume it's OK unless we know otherwise
267 name = next(iter(get_signature(f).parameters))
268 return any(
269 isinstance(node, ast.Name)
270 and node.id == name
271 and isinstance(node.ctx, ast.Load)
272 for node in ast.walk(tree)
273 )
274
275
276def extract_all_lambdas(tree, matching_signature):
277 lambdas = []
278
279 class Visitor(ast.NodeVisitor):
280 def visit_Lambda(self, node):
281 if ast_arguments_matches_signature(node.args, matching_signature):
282 lambdas.append(node)
283
284 Visitor().visit(tree)
285
286 return lambdas
287
288
289LINE_CONTINUATION = re.compile(r"\\\n")
290WHITESPACE = re.compile(r"\s+")
291PROBABLY_A_COMMENT = re.compile("""#[^'"]*$""")
292SPACE_FOLLOWS_OPEN_BRACKET = re.compile(r"\( ")
293SPACE_PRECEDES_CLOSE_BRACKET = re.compile(r" \)")
294
295
296def _extract_lambda_source(f):
297 """Extracts a single lambda expression from the string source. Returns a
298 string indicating an unknown body if it gets confused in any way.
299
300 This is not a good function and I am sorry for it. Forgive me my
301 sins, oh lord
302 """
303 # You might be wondering how a lambda can have a return-type annotation?
304 # The answer is that we add this at runtime, in new_given_signature(),
305 # and we do support strange choices as applying @given() to a lambda.
306 sig = inspect.signature(f)
307 assert sig.return_annotation in (Parameter.empty, None), sig
308
309 # Using pytest-xdist on Python 3.13, there's an entry in the linecache for
310 # file "<string>", which then returns nonsense to getsource. Discard it.
311 linecache.cache.pop("<string>", None)
312
313 if sig.parameters:
314 if_confused = f"lambda {str(sig)[1:-1]}: <unknown>"
315 else:
316 if_confused = "lambda: <unknown>"
317 try:
318 source = inspect.getsource(f)
319 except OSError:
320 return if_confused
321
322 source = LINE_CONTINUATION.sub(" ", source)
323 source = WHITESPACE.sub(" ", source)
324 source = source.strip()
325 if "lambda" not in source and sys.platform == "emscripten": # pragma: no cover
326 return if_confused # work around Pyodide bug in inspect.getsource()
327 assert "lambda" in source, source
328
329 tree = None
330
331 try:
332 tree = ast.parse(source)
333 except SyntaxError:
334 for i in range(len(source) - 1, len("lambda"), -1):
335 prefix = source[:i]
336 if "lambda" not in prefix:
337 break
338 try:
339 tree = ast.parse(prefix)
340 source = prefix
341 break
342 except SyntaxError:
343 continue
344 if tree is None and source.startswith(("@", ".")):
345 # This will always eventually find a valid expression because the
346 # decorator or chained operator must be a valid Python function call,
347 # so will eventually be syntactically valid and break out of the loop.
348 # Thus, this loop can never terminate normally.
349 for i in range(len(source) + 1):
350 p = source[1:i]
351 if "lambda" in p:
352 try:
353 tree = ast.parse(p)
354 source = p
355 break
356 except SyntaxError:
357 pass
358 else:
359 raise NotImplementedError("expected to be unreachable")
360
361 if tree is None:
362 return if_confused
363
364 aligned_lambdas = extract_all_lambdas(tree, matching_signature=sig)
365 if len(aligned_lambdas) != 1:
366 return if_confused
367 lambda_ast = aligned_lambdas[0]
368 assert lambda_ast.lineno == 1
369
370 # If the source code contains Unicode characters, the bytes of the original
371 # file don't line up with the string indexes, and `col_offset` doesn't match
372 # the string we're using. We need to convert the source code into bytes
373 # before slicing.
374 #
375 # Under the hood, the inspect module is using `tokenize.detect_encoding` to
376 # detect the encoding of the original source file. We'll use the same
377 # approach to get the source code as bytes.
378 #
379 # See https://github.com/HypothesisWorks/hypothesis/issues/1700 for an
380 # example of what happens if you don't correct for this.
381 #
382 # Note: if the code doesn't come from a file (but, for example, a doctest),
383 # `getsourcefile` will return `None` and the `open()` call will fail with
384 # an OSError. Or if `f` is a built-in function, in which case we get a
385 # TypeError. In both cases, fall back to splitting the Unicode string.
386 # It's not perfect, but it's the best we can do.
387 try:
388 with open(inspect.getsourcefile(f), "rb") as src_f:
389 encoding, _ = detect_encoding(src_f.readline)
390
391 source_bytes = source.encode(encoding)
392 source_bytes = source_bytes[lambda_ast.col_offset :].strip()
393 source = source_bytes.decode(encoding)
394 except (OSError, TypeError):
395 source = source[lambda_ast.col_offset :].strip()
396
397 # This ValueError can be thrown in Python 3 if:
398 #
399 # - There's a Unicode character in the line before the Lambda, and
400 # - For some reason we can't detect the source encoding of the file
401 #
402 # because slicing on `lambda_ast.col_offset` will account for bytes, but
403 # the slice will be on Unicode characters.
404 #
405 # In practice this seems relatively rare, so we just give up rather than
406 # trying to recover.
407 try:
408 source = source[source.index("lambda") :]
409 except ValueError:
410 return if_confused
411
412 for i in range(len(source), len("lambda"), -1): # pragma: no branch
413 try:
414 parsed = ast.parse(source[:i])
415 assert len(parsed.body) == 1
416 assert parsed.body
417 if isinstance(parsed.body[0].value, ast.Lambda):
418 source = source[:i]
419 break
420 except SyntaxError:
421 pass
422 lines = source.split("\n")
423 lines = [PROBABLY_A_COMMENT.sub("", l) for l in lines]
424 source = "\n".join(lines)
425
426 source = WHITESPACE.sub(" ", source)
427 source = SPACE_FOLLOWS_OPEN_BRACKET.sub("(", source)
428 source = SPACE_PRECEDES_CLOSE_BRACKET.sub(")", source)
429 return source.strip()
430
431
432def extract_lambda_source(f):
433 try:
434 return LAMBDA_SOURCE_CACHE[f]
435 except KeyError:
436 pass
437
438 source = _extract_lambda_source(f)
439 LAMBDA_SOURCE_CACHE[f] = source
440 return source
441
442
443def get_pretty_function_description(f: object) -> str:
444 if isinstance(f, partial):
445 return pretty(f)
446 if not hasattr(f, "__name__"):
447 return repr(f)
448 name = f.__name__ # type: ignore # validated by hasattr above
449 if name == "<lambda>":
450 return extract_lambda_source(f)
451 elif isinstance(f, (types.MethodType, types.BuiltinMethodType)):
452 self = f.__self__
453 # Some objects, like `builtins.abs` are of BuiltinMethodType but have
454 # their module as __self__. This might include c-extensions generally?
455 if not (self is None or inspect.isclass(self) or inspect.ismodule(self)):
456 if self is global_random_instance:
457 return f"random.{name}"
458 return f"{self!r}.{name}"
459 elif isinstance(name, str) and getattr(dict, name, object()) is f:
460 # special case for keys/values views in from_type() / ghostwriter output
461 return f"dict.{name}"
462 return name
463
464
465def nicerepr(v: Any) -> str:
466 if inspect.isfunction(v):
467 return get_pretty_function_description(v)
468 elif isinstance(v, type):
469 return v.__name__
470 else:
471 # With TypeVar T, show List[T] instead of TypeError on List[~T]
472 return re.sub(r"(\[)~([A-Z][a-z]*\])", r"\g<1>\g<2>", pretty(v))
473
474
475def repr_call(
476 f: Any, args: Sequence[object], kwargs: dict[str, object], *, reorder: bool = True
477) -> str:
478 # Note: for multi-line pretty-printing, see RepresentationPrinter.repr_call()
479 if reorder:
480 args, kwargs = convert_positional_arguments(f, args, kwargs)
481
482 bits = [nicerepr(x) for x in args]
483
484 for p in get_signature(f).parameters.values():
485 if p.name in kwargs and not p.kind.name.startswith("VAR_"):
486 bits.append(f"{p.name}={nicerepr(kwargs.pop(p.name))}")
487 if kwargs:
488 for a in sorted(kwargs):
489 bits.append(f"{a}={nicerepr(kwargs[a])}")
490
491 rep = nicerepr(f)
492 if rep.startswith("lambda") and ":" in rep:
493 rep = f"({rep})"
494 repr_len = len(rep) + sum(len(b) for b in bits) # approx
495 if repr_len > 30000:
496 warnings.warn(
497 "Generating overly large repr. This is an expensive operation, and with "
498 f"a length of {repr_len//1000} kB is unlikely to be useful. Use -Wignore "
499 "to ignore the warning, or -Werror to get a traceback.",
500 HypothesisWarning,
501 stacklevel=2,
502 )
503 return rep + "(" + ", ".join(bits) + ")"
504
505
506def check_valid_identifier(identifier: str) -> None:
507 if not identifier.isidentifier():
508 raise ValueError(f"{identifier!r} is not a valid python identifier")
509
510
511eval_cache: dict[str, ModuleType] = {}
512
513
514def source_exec_as_module(source: str) -> ModuleType:
515 try:
516 return eval_cache[source]
517 except KeyError:
518 pass
519
520 hexdigest = hashlib.sha384(source.encode()).hexdigest()
521 result = ModuleType("hypothesis_temporary_module_" + hexdigest)
522 assert isinstance(source, str)
523 exec(source, result.__dict__)
524 eval_cache[source] = result
525 return result
526
527
528COPY_SIGNATURE_SCRIPT = """
529from hypothesis.utils.conventions import not_set
530
531def accept({funcname}):
532 def {name}{signature}:
533 return {funcname}({invocation})
534 return {name}
535""".lstrip()
536
537
538def get_varargs(
539 sig: Signature, kind: int = Parameter.VAR_POSITIONAL
540) -> Optional[Parameter]:
541 for p in sig.parameters.values():
542 if p.kind is kind:
543 return p
544 return None
545
546
547def define_function_signature(name, docstring, signature):
548 """A decorator which sets the name, signature and docstring of the function
549 passed into it."""
550 if name == "<lambda>":
551 name = "_lambda_"
552 check_valid_identifier(name)
553 for a in signature.parameters:
554 check_valid_identifier(a)
555
556 used_names = {*signature.parameters, name}
557
558 newsig = signature.replace(
559 parameters=[
560 p if p.default is signature.empty else p.replace(default=not_set)
561 for p in (
562 p.replace(annotation=signature.empty)
563 for p in signature.parameters.values()
564 )
565 ],
566 return_annotation=signature.empty,
567 )
568
569 pos_args = [
570 p
571 for p in signature.parameters.values()
572 if p.kind.name.startswith("POSITIONAL_")
573 ]
574
575 def accept(f):
576 fsig = inspect.signature(f, follow_wrapped=False)
577 must_pass_as_kwargs = []
578 invocation_parts = []
579 for p in pos_args:
580 if p.name not in fsig.parameters and get_varargs(fsig) is None:
581 must_pass_as_kwargs.append(p.name)
582 else:
583 invocation_parts.append(p.name)
584 if get_varargs(signature) is not None:
585 invocation_parts.append("*" + get_varargs(signature).name)
586 for k in must_pass_as_kwargs:
587 invocation_parts.append(f"{k}={k}")
588 for p in signature.parameters.values():
589 if p.kind is p.KEYWORD_ONLY:
590 invocation_parts.append(f"{p.name}={p.name}")
591 varkw = get_varargs(signature, kind=Parameter.VAR_KEYWORD)
592 if varkw:
593 invocation_parts.append("**" + varkw.name)
594
595 candidate_names = ["f"] + [f"f_{i}" for i in range(1, len(used_names) + 2)]
596
597 for funcname in candidate_names: # pragma: no branch
598 if funcname not in used_names:
599 break
600
601 source = COPY_SIGNATURE_SCRIPT.format(
602 name=name,
603 funcname=funcname,
604 signature=str(newsig),
605 invocation=", ".join(invocation_parts),
606 )
607 result = source_exec_as_module(source).accept(f)
608 result.__doc__ = docstring
609 result.__defaults__ = tuple(
610 p.default
611 for p in signature.parameters.values()
612 if p.default is not signature.empty and "POSITIONAL" in p.kind.name
613 )
614 kwdefaults = {
615 p.name: p.default
616 for p in signature.parameters.values()
617 if p.default is not signature.empty and p.kind is p.KEYWORD_ONLY
618 }
619 if kwdefaults:
620 result.__kwdefaults__ = kwdefaults
621 annotations = {
622 p.name: p.annotation
623 for p in signature.parameters.values()
624 if p.annotation is not signature.empty
625 }
626 if signature.return_annotation is not signature.empty:
627 annotations["return"] = signature.return_annotation
628 if annotations:
629 result.__annotations__ = annotations
630 return result
631
632 return accept
633
634
635def impersonate(target):
636 """Decorator to update the attributes of a function so that to external
637 introspectors it will appear to be the target function.
638
639 Note that this updates the function in place, it doesn't return a
640 new one.
641 """
642
643 def accept(f):
644 # Lie shamelessly about where this code comes from, to hide the hypothesis
645 # internals from pytest, ipython, and other runtime introspection.
646 f.__code__ = f.__code__.replace(
647 co_filename=target.__code__.co_filename,
648 co_firstlineno=target.__code__.co_firstlineno,
649 )
650 f.__name__ = target.__name__
651 f.__module__ = target.__module__
652 f.__doc__ = target.__doc__
653 f.__globals__["__hypothesistracebackhide__"] = True
654 return f
655
656 return accept
657
658
659def proxies(target: T) -> Callable[[Callable], T]:
660 replace_sig = define_function_signature(
661 target.__name__.replace("<lambda>", "_lambda_"), # type: ignore
662 target.__doc__,
663 get_signature(target, follow_wrapped=False),
664 )
665
666 def accept(proxy):
667 return impersonate(target)(wraps(target)(replace_sig(proxy)))
668
669 return accept
670
671
672def is_identity_function(f: object) -> bool:
673 # TODO: pattern-match the AST to handle `def ...` identity functions too
674 return bool(re.fullmatch(r"lambda (\w+): \1", get_pretty_function_description(f)))