1# This file is part of Hypothesis, which may be found at
2# https://github.com/HypothesisWorks/hypothesis/
3#
4# Copyright the Hypothesis Authors.
5# Individual contributors are listed in AUTHORS.rst and the git log.
6#
7# This Source Code Form is subject to the terms of the Mozilla Public License,
8# v. 2.0. If a copy of the MPL was not distributed with this file, You can
9# obtain one at https://mozilla.org/MPL/2.0/.
10
11import math
12from collections.abc import Hashable, Iterable, Sequence
13from typing import (
14 TYPE_CHECKING,
15 Callable,
16 Literal,
17 Optional,
18 TypedDict,
19 TypeVar,
20 Union,
21 cast,
22)
23
24import attr
25
26from hypothesis.errors import ChoiceTooLarge
27from hypothesis.internal.conjecture.floats import float_to_lex, lex_to_float
28from hypothesis.internal.conjecture.utils import identity
29from hypothesis.internal.floats import float_to_int, make_float_clamper, sign_aware_lte
30from hypothesis.internal.intervalsets import IntervalSet
31
32T = TypeVar("T")
33
34if TYPE_CHECKING:
35 from typing import TypeAlias
36
37
38class IntegerConstraints(TypedDict):
39 min_value: Optional[int]
40 max_value: Optional[int]
41 weights: Optional[dict[int, float]]
42 shrink_towards: int
43
44
45class FloatConstraints(TypedDict):
46 min_value: float
47 max_value: float
48 allow_nan: bool
49 smallest_nonzero_magnitude: float
50
51
52class StringConstraints(TypedDict):
53 intervals: IntervalSet
54 min_size: int
55 max_size: int
56
57
58class BytesConstraints(TypedDict):
59 min_size: int
60 max_size: int
61
62
63class BooleanConstraints(TypedDict):
64 p: float
65
66
67ChoiceT: "TypeAlias" = Union[int, str, bool, float, bytes]
68ChoiceConstraintsT: "TypeAlias" = Union[
69 IntegerConstraints,
70 FloatConstraints,
71 StringConstraints,
72 BytesConstraints,
73 BooleanConstraints,
74]
75ChoiceTypeT: "TypeAlias" = Literal["integer", "string", "boolean", "float", "bytes"]
76ChoiceKeyT: "TypeAlias" = Union[
77 int, str, bytes, tuple[Literal["bool"], bool], tuple[Literal["float"], int]
78]
79
80
81@attr.s(slots=True)
82class ChoiceTemplate:
83 type: Literal["simplest"] = attr.ib()
84 count: Optional[int] = attr.ib()
85
86 def __attrs_post_init__(self) -> None:
87 if self.count is not None:
88 assert self.count > 0
89
90
91@attr.s(slots=True, repr=False, eq=False)
92class ChoiceNode:
93 type: ChoiceTypeT = attr.ib()
94 value: ChoiceT = attr.ib()
95 constraints: ChoiceConstraintsT = attr.ib()
96 was_forced: bool = attr.ib()
97 index: Optional[int] = attr.ib(default=None)
98
99 def copy(
100 self,
101 *,
102 with_value: Optional[ChoiceT] = None,
103 with_constraints: Optional[ChoiceConstraintsT] = None,
104 ) -> "ChoiceNode":
105 # we may want to allow this combination in the future, but for now it's
106 # a footgun.
107 if self.was_forced:
108 assert with_value is None, "modifying a forced node doesn't make sense"
109 # explicitly not copying index. node indices are only assigned via
110 # ExampleRecord. This prevents footguns with relying on stale indices
111 # after copying.
112 return ChoiceNode(
113 type=self.type,
114 value=self.value if with_value is None else with_value,
115 constraints=(
116 self.constraints if with_constraints is None else with_constraints
117 ),
118 was_forced=self.was_forced,
119 )
120
121 @property
122 def trivial(self) -> bool:
123 """
124 A node is trivial if it cannot be simplified any further. This does not
125 mean that modifying a trivial node can't produce simpler test cases when
126 viewing the tree as a whole. Just that when viewing this node in
127 isolation, this is the simplest the node can get.
128 """
129 if self.was_forced:
130 return True
131
132 if self.type != "float":
133 zero_value = choice_from_index(0, self.type, self.constraints)
134 return choice_equal(self.value, zero_value)
135 else:
136 constraints = cast(FloatConstraints, self.constraints)
137 min_value = constraints["min_value"]
138 max_value = constraints["max_value"]
139 shrink_towards = 0.0
140
141 if min_value == -math.inf and max_value == math.inf:
142 return choice_equal(self.value, shrink_towards)
143
144 if (
145 not math.isinf(min_value)
146 and not math.isinf(max_value)
147 and math.ceil(min_value) <= math.floor(max_value)
148 ):
149 # the interval contains an integer. the simplest integer is the
150 # one closest to shrink_towards
151 shrink_towards = max(math.ceil(min_value), shrink_towards)
152 shrink_towards = min(math.floor(max_value), shrink_towards)
153 return choice_equal(self.value, float(shrink_towards))
154
155 # the real answer here is "the value in [min_value, max_value] with
156 # the lowest denominator when represented as a fraction".
157 # It would be good to compute this correctly in the future, but it's
158 # also not incorrect to be conservative here.
159 return False
160
161 def __eq__(self, other: object) -> bool:
162 if not isinstance(other, ChoiceNode):
163 return NotImplemented
164
165 return (
166 self.type == other.type
167 and choice_equal(self.value, other.value)
168 and choice_constraints_equal(self.type, self.constraints, other.constraints)
169 and self.was_forced == other.was_forced
170 )
171
172 def __hash__(self) -> int:
173 return hash(
174 (
175 self.type,
176 choice_key(self.value),
177 choice_constraints_key(self.type, self.constraints),
178 self.was_forced,
179 )
180 )
181
182 def __repr__(self) -> str:
183 forced_marker = " [forced]" if self.was_forced else ""
184 return f"{self.type} {self.value!r}{forced_marker} {self.constraints!r}"
185
186
187def _size_to_index(size: int, *, alphabet_size: int) -> int:
188 # this is the closed form of this geometric series:
189 # for i in range(size):
190 # index += alphabet_size**i
191 if alphabet_size <= 0:
192 assert size == 0
193 return 0
194 if alphabet_size == 1:
195 return size
196 v = (alphabet_size**size - 1) // (alphabet_size - 1)
197 # mypy thinks (m: int) // (n: int) -> Any. assert it back to int.
198 return cast(int, v)
199
200
201def _index_to_size(index: int, alphabet_size: int) -> int:
202 if alphabet_size == 0:
203 return 0
204 elif alphabet_size == 1:
205 # there is only one string of each size, so the size is equal to its
206 # ordering.
207 return index
208
209 # the closed-form inverse of _size_to_index is
210 # size = math.floor(math.log(index * (alphabet_size - 1) + 1, alphabet_size))
211 # which is fast, but suffers from float precision errors. As performance is
212 # relatively critical here, we'll use this formula by default, but fall back to
213 # a much slower integer-only logarithm when the calculation is too close for
214 # comfort.
215 total = index * (alphabet_size - 1) + 1
216 size = math.log(total, alphabet_size)
217
218 # if this computation is close enough that it could have been affected by
219 # floating point errors, use a much slower integer-only logarithm instead,
220 # which is guaranteed to be precise.
221 if 0 < math.ceil(size) - size < 1e-7:
222 s = 0
223 while total >= alphabet_size:
224 total //= alphabet_size
225 s += 1
226 return s
227 return math.floor(size)
228
229
230def collection_index(
231 choice: Sequence[T],
232 *,
233 min_size: int,
234 alphabet_size: int,
235 to_order: Callable[[T], int],
236) -> int:
237 # Collections are ordered by counting the number of values of each size,
238 # starting with min_size. alphabet_size indicates how many options there
239 # are for a single element. to_order orders an element by returning an n ≥ 0.
240
241 # we start by adding the size to the index, relative to min_size.
242 index = _size_to_index(len(choice), alphabet_size=alphabet_size) - _size_to_index(
243 min_size, alphabet_size=alphabet_size
244 )
245 # We then add each element c to the index, starting from the end (so "ab" is
246 # simpler than "ba"). Each loop takes c at position i in the sequence and
247 # computes the number of sequences of size i which come before it in the ordering.
248
249 # this running_exp computation is equivalent to doing
250 # index += (alphabet_size**i) * n
251 # but reuses intermediate exponentiation steps for efficiency.
252 running_exp = 1
253 for c in reversed(choice):
254 index += running_exp * to_order(c)
255 running_exp *= alphabet_size
256 return index
257
258
259def collection_value(
260 index: int,
261 *,
262 min_size: int,
263 alphabet_size: int,
264 from_order: Callable[[int], T],
265) -> list[T]:
266 from hypothesis.internal.conjecture.engine import BUFFER_SIZE
267
268 # this function is probably easiest to make sense of as an inverse of
269 # collection_index, tracking ~corresponding lines of code between the two.
270
271 index += _size_to_index(min_size, alphabet_size=alphabet_size)
272 size = _index_to_size(index, alphabet_size=alphabet_size)
273 # index -> value computation can be arbitrarily expensive for arbitrarily
274 # large min_size collections. short-circuit if the resulting size would be
275 # obviously-too-large. callers will generally turn this into a .mark_overrun().
276 if size >= BUFFER_SIZE:
277 raise ChoiceTooLarge
278
279 # subtract out the amount responsible for the size
280 index -= _size_to_index(size, alphabet_size=alphabet_size)
281 vals: list[T] = []
282 for i in reversed(range(size)):
283 # optimization for common case when we hit index 0. Exponentiation
284 # on large integers is expensive!
285 if index == 0:
286 n = 0
287 else:
288 n = index // (alphabet_size**i)
289 # subtract out the nearest multiple of alphabet_size**i
290 index -= n * (alphabet_size**i)
291 vals.append(from_order(n))
292 return vals
293
294
295def zigzag_index(value: int, *, shrink_towards: int) -> int:
296 # value | 0 1 -1 2 -2 3 -3 4
297 # index | 0 1 2 3 4 5 6 7
298 index = 2 * abs(shrink_towards - value)
299 if value > shrink_towards:
300 index -= 1
301 return index
302
303
304def zigzag_value(index: int, *, shrink_towards: int) -> int:
305 assert index >= 0
306 # count how many "steps" away from shrink_towards we are.
307 n = (index + 1) // 2
308 # now check if we're stepping up or down from shrink_towards.
309 if (index % 2) == 0:
310 n *= -1
311 return shrink_towards + n
312
313
314def choice_to_index(choice: ChoiceT, constraints: ChoiceConstraintsT) -> int:
315 # This function takes a choice in the choice sequence and returns the
316 # complexity index of that choice from among its possible values, where 0
317 # is the simplest.
318 #
319 # Note that the index of a choice depends on its constraints. The simplest value
320 # (at index 0) for {"min_value": None, "max_value": None} is 0, while for
321 # {"min_value": 1, "max_value": None} the simplest value is 1.
322 #
323 # choice_from_index inverts this function. An invariant on both functions is
324 # that they must be injective. Unfortunately, floats do not currently respect
325 # this. That's not *good*, but nothing has blown up - yet. And ordering
326 # floats in a sane manner is quite hard, so I've left it for another day.
327
328 if isinstance(choice, int) and not isinstance(choice, bool):
329 # Let a = shrink_towards.
330 # * Unbounded: Ordered by (|a - x|, sgn(a - x)). Think of a zigzag.
331 # [a, a + 1, a - 1, a + 2, a - 2, ...]
332 # * Semi-bounded: Same as unbounded, except stop on one side when you hit
333 # {min, max}_value. so min_value=-1 a=0 has order
334 # [0, 1, -1, 2, 3, 4, ...]
335 # * Bounded: Same as unbounded and semibounded, except stop on each side
336 # when you hit {min, max}_value.
337 #
338 # To simplify and gain intuition about this ordering, you can think about
339 # the most common case where 0 is first (a = 0). We deviate from this only
340 # rarely, e.g. for datetimes, where we generally want year 2000 to be
341 # simpler than year 0.
342 constraints = cast(IntegerConstraints, constraints)
343 shrink_towards = constraints["shrink_towards"]
344 min_value = constraints["min_value"]
345 max_value = constraints["max_value"]
346
347 if min_value is not None:
348 shrink_towards = max(min_value, shrink_towards)
349 if max_value is not None:
350 shrink_towards = min(max_value, shrink_towards)
351
352 if min_value is None and max_value is None:
353 # case: unbounded
354 return zigzag_index(choice, shrink_towards=shrink_towards)
355 elif min_value is not None and max_value is None:
356 # case: semibounded below
357
358 # min_value = -2
359 # index | 0 1 2 3 4 5 6 7
360 # v | 0 1 -1 2 -2 3 4 5
361 if abs(choice - shrink_towards) <= (shrink_towards - min_value):
362 return zigzag_index(choice, shrink_towards=shrink_towards)
363 return choice - min_value
364 elif max_value is not None and min_value is None:
365 # case: semibounded above
366 if abs(choice - shrink_towards) <= (max_value - shrink_towards):
367 return zigzag_index(choice, shrink_towards=shrink_towards)
368 return max_value - choice
369 else:
370 # case: bounded
371
372 # range = [-2, 5]
373 # shrink_towards = 2
374 # index | 0 1 2 3 4 5 6 7
375 # v | 2 3 1 4 0 5 -1 -2
376 #
377 # ^ with zero weights at index = [0, 2, 6]
378 # index | 0 1 2 3 4
379 # v | 3 4 0 5 -2
380
381 assert min_value is not None
382 assert max_value is not None
383 assert constraints["weights"] is None or all(
384 w > 0 for w in constraints["weights"].values()
385 ), "technically possible but really annoying to support zero weights"
386
387 # check which side gets exhausted first
388 if (shrink_towards - min_value) < (max_value - shrink_towards):
389 # Below shrink_towards gets exhausted first. Equivalent to
390 # semibounded below
391 if abs(choice - shrink_towards) <= (shrink_towards - min_value):
392 return zigzag_index(choice, shrink_towards=shrink_towards)
393 return choice - min_value
394 else:
395 # Above shrink_towards gets exhausted first. Equivalent to semibounded
396 # above
397 if abs(choice - shrink_towards) <= (max_value - shrink_towards):
398 return zigzag_index(choice, shrink_towards=shrink_towards)
399 return max_value - choice
400 elif isinstance(choice, bool):
401 constraints = cast(BooleanConstraints, constraints)
402 # Ordered by [False, True].
403 p = constraints["p"]
404 if not (2 ** (-64) < p < (1 - 2 ** (-64))):
405 # only one option is possible, so whatever it is is first.
406 return 0
407 return int(choice)
408 elif isinstance(choice, bytes):
409 constraints = cast(BytesConstraints, constraints)
410 return collection_index(
411 list(choice),
412 min_size=constraints["min_size"],
413 alphabet_size=2**8,
414 to_order=identity,
415 )
416 elif isinstance(choice, str):
417 constraints = cast(StringConstraints, constraints)
418 intervals = constraints["intervals"]
419 return collection_index(
420 choice,
421 min_size=constraints["min_size"],
422 alphabet_size=len(intervals),
423 to_order=intervals.index_from_char_in_shrink_order,
424 )
425 elif isinstance(choice, float):
426 sign = int(math.copysign(1.0, choice) < 0)
427 return (sign << 64) | float_to_lex(abs(choice))
428 else:
429 raise NotImplementedError
430
431
432def choice_from_index(
433 index: int, choice_type: ChoiceTypeT, constraints: ChoiceConstraintsT
434) -> ChoiceT:
435 assert index >= 0
436 if choice_type == "integer":
437 constraints = cast(IntegerConstraints, constraints)
438 shrink_towards = constraints["shrink_towards"]
439 min_value = constraints["min_value"]
440 max_value = constraints["max_value"]
441
442 if min_value is not None:
443 shrink_towards = max(min_value, shrink_towards)
444 if max_value is not None:
445 shrink_towards = min(max_value, shrink_towards)
446
447 if min_value is None and max_value is None:
448 # case: unbounded
449 return zigzag_value(index, shrink_towards=shrink_towards)
450 elif min_value is not None and max_value is None:
451 # case: semibounded below
452 if index <= zigzag_index(min_value, shrink_towards=shrink_towards):
453 return zigzag_value(index, shrink_towards=shrink_towards)
454 return index + min_value
455 elif max_value is not None and min_value is None:
456 # case: semibounded above
457 if index <= zigzag_index(max_value, shrink_towards=shrink_towards):
458 return zigzag_value(index, shrink_towards=shrink_towards)
459 return max_value - index
460 else:
461 # case: bounded
462 assert min_value is not None
463 assert max_value is not None
464 assert constraints["weights"] is None or all(
465 w > 0 for w in constraints["weights"].values()
466 ), "possible but really annoying to support zero weights"
467
468 if (shrink_towards - min_value) < (max_value - shrink_towards):
469 # equivalent to semibounded below case
470 if index <= zigzag_index(min_value, shrink_towards=shrink_towards):
471 return zigzag_value(index, shrink_towards=shrink_towards)
472 return index + min_value
473 else:
474 # equivalent to semibounded above case
475 if index <= zigzag_index(max_value, shrink_towards=shrink_towards):
476 return zigzag_value(index, shrink_towards=shrink_towards)
477 return max_value - index
478 elif choice_type == "boolean":
479 constraints = cast(BooleanConstraints, constraints)
480 # Ordered by [False, True].
481 p = constraints["p"]
482 only = None
483 if p <= 2 ** (-64):
484 only = False
485 elif p >= (1 - 2 ** (-64)):
486 only = True
487
488 assert index in {0, 1}
489 if only is not None:
490 # only one choice
491 assert index == 0
492 return only
493 return bool(index)
494 elif choice_type == "bytes":
495 constraints = cast(BytesConstraints, constraints)
496 value_b = collection_value(
497 index,
498 min_size=constraints["min_size"],
499 alphabet_size=2**8,
500 from_order=identity,
501 )
502 return bytes(value_b)
503 elif choice_type == "string":
504 constraints = cast(StringConstraints, constraints)
505 intervals = constraints["intervals"]
506 # _s because mypy is unhappy with reusing different-typed names in branches,
507 # even if the branches are disjoint.
508 value_s = collection_value(
509 index,
510 min_size=constraints["min_size"],
511 alphabet_size=len(intervals),
512 from_order=intervals.char_in_shrink_order,
513 )
514 return "".join(value_s)
515 elif choice_type == "float":
516 constraints = cast(FloatConstraints, constraints)
517 sign = -1 if index >> 64 else 1
518 result = sign * lex_to_float(index & ((1 << 64) - 1))
519
520 clamper = make_float_clamper(
521 min_value=constraints["min_value"],
522 max_value=constraints["max_value"],
523 smallest_nonzero_magnitude=constraints["smallest_nonzero_magnitude"],
524 allow_nan=constraints["allow_nan"],
525 )
526 return clamper(result)
527 else:
528 raise NotImplementedError
529
530
531def choice_permitted(choice: ChoiceT, constraints: ChoiceConstraintsT) -> bool:
532 if isinstance(choice, int) and not isinstance(choice, bool):
533 constraints = cast(IntegerConstraints, constraints)
534 min_value = constraints["min_value"]
535 max_value = constraints["max_value"]
536 if min_value is not None and choice < min_value:
537 return False
538 return not (max_value is not None and choice > max_value)
539 elif isinstance(choice, float):
540 constraints = cast(FloatConstraints, constraints)
541 if math.isnan(choice):
542 return constraints["allow_nan"]
543 if 0 < abs(choice) < constraints["smallest_nonzero_magnitude"]:
544 return False
545 return sign_aware_lte(constraints["min_value"], choice) and sign_aware_lte(
546 choice, constraints["max_value"]
547 )
548 elif isinstance(choice, str):
549 constraints = cast(StringConstraints, constraints)
550 if len(choice) < constraints["min_size"]:
551 return False
552 if (
553 constraints["max_size"] is not None
554 and len(choice) > constraints["max_size"]
555 ):
556 return False
557 return all(ord(c) in constraints["intervals"] for c in choice)
558 elif isinstance(choice, bytes):
559 constraints = cast(BytesConstraints, constraints)
560 if len(choice) < constraints["min_size"]:
561 return False
562 return constraints["max_size"] is None or len(choice) <= constraints["max_size"]
563 elif isinstance(choice, bool):
564 constraints = cast(BooleanConstraints, constraints)
565 if constraints["p"] <= 0:
566 return choice is False
567 if constraints["p"] >= 1:
568 return choice is True
569 return True
570 else:
571 raise NotImplementedError(f"unhandled type {type(choice)} with value {choice}")
572
573
574def choices_key(choices: Sequence[ChoiceT]) -> tuple[ChoiceKeyT, ...]:
575 return tuple(choice_key(choice) for choice in choices)
576
577
578def choice_key(choice: ChoiceT) -> ChoiceKeyT:
579 if isinstance(choice, float):
580 # float_to_int to distinguish -0.0/0.0, signaling/nonsignaling nans, etc,
581 # and then add a "float" key to avoid colliding with actual integers.
582 return ("float", float_to_int(choice))
583 if isinstance(choice, bool):
584 # avoid choice_key(0) == choice_key(False)
585 return ("bool", choice)
586 return choice
587
588
589def choice_equal(choice1: ChoiceT, choice2: ChoiceT) -> bool:
590 assert type(choice1) is type(choice2), (choice1, choice2)
591 return choice_key(choice1) == choice_key(choice2)
592
593
594def choice_constraints_equal(
595 choice_type: ChoiceTypeT,
596 constraints1: ChoiceConstraintsT,
597 constraints2: ChoiceConstraintsT,
598) -> bool:
599 return choice_constraints_key(choice_type, constraints1) == choice_constraints_key(
600 choice_type, constraints2
601 )
602
603
604def choice_constraints_key(
605 choice_type: ChoiceTypeT, constraints: ChoiceConstraintsT
606) -> tuple[Hashable, ...]:
607 if choice_type == "float":
608 constraints = cast(FloatConstraints, constraints)
609 return (
610 float_to_int(constraints["min_value"]),
611 float_to_int(constraints["max_value"]),
612 constraints["allow_nan"],
613 constraints["smallest_nonzero_magnitude"],
614 )
615 if choice_type == "integer":
616 constraints = cast(IntegerConstraints, constraints)
617 return (
618 constraints["min_value"],
619 constraints["max_value"],
620 None if constraints["weights"] is None else tuple(constraints["weights"]),
621 constraints["shrink_towards"],
622 )
623 return tuple(constraints[key] for key in sorted(constraints)) # type: ignore
624
625
626def choices_size(choices: Iterable[ChoiceT]) -> int:
627 from hypothesis.database import choices_to_bytes
628
629 return len(choices_to_bytes(choices))