1# This file is part of Hypothesis, which may be found at
2# https://github.com/HypothesisWorks/hypothesis/
3#
4# Copyright the Hypothesis Authors.
5# Individual contributors are listed in AUTHORS.rst and the git log.
6#
7# This Source Code Form is subject to the terms of the Mozilla Public License,
8# v. 2.0. If a copy of the MPL was not distributed with this file, You can
9# obtain one at https://mozilla.org/MPL/2.0/.
10
11import math
12from collections.abc import Callable, Hashable, Iterable, Sequence
13from dataclasses import dataclass
14from typing import (
15 Literal,
16 TypeAlias,
17 TypedDict,
18 TypeVar,
19 cast,
20)
21
22from hypothesis.errors import ChoiceTooLarge
23from hypothesis.internal.conjecture.floats import float_to_lex, lex_to_float
24from hypothesis.internal.conjecture.utils import identity
25from hypothesis.internal.floats import float_to_int, make_float_clamper, sign_aware_lte
26from hypothesis.internal.intervalsets import IntervalSet
27
28T = TypeVar("T")
29
30
31class IntegerConstraints(TypedDict):
32 min_value: int | None
33 max_value: int | None
34 weights: dict[int, float] | None
35 shrink_towards: int
36
37
38class FloatConstraints(TypedDict):
39 min_value: float
40 max_value: float
41 allow_nan: bool
42 smallest_nonzero_magnitude: float
43
44
45class StringConstraints(TypedDict):
46 intervals: IntervalSet
47 min_size: int
48 max_size: int
49
50
51class BytesConstraints(TypedDict):
52 min_size: int
53 max_size: int
54
55
56class BooleanConstraints(TypedDict):
57 p: float
58
59
60ChoiceT: TypeAlias = int | str | bool | float | bytes
61ChoiceConstraintsT: TypeAlias = (
62 IntegerConstraints
63 | FloatConstraints
64 | StringConstraints
65 | BytesConstraints
66 | BooleanConstraints
67)
68ChoiceTypeT: TypeAlias = Literal["integer", "string", "boolean", "float", "bytes"]
69ChoiceKeyT: TypeAlias = (
70 int | str | bytes | tuple[Literal["bool"], bool] | tuple[Literal["float"], int]
71)
72
73
74@dataclass(slots=True, frozen=False)
75class ChoiceTemplate:
76 type: Literal["simplest"]
77 count: int | None
78
79 def __post_init__(self) -> None:
80 if self.count is not None:
81 assert self.count > 0
82
83
84@dataclass(slots=True, frozen=False)
85class ChoiceNode:
86 type: ChoiceTypeT
87 value: ChoiceT
88 constraints: ChoiceConstraintsT
89 was_forced: bool
90 index: int | None = None
91
92 def copy(
93 self,
94 *,
95 with_value: ChoiceT | None = None,
96 with_constraints: ChoiceConstraintsT | None = None,
97 ) -> "ChoiceNode":
98 # we may want to allow this combination in the future, but for now it's
99 # a footgun.
100 if self.was_forced:
101 assert with_value is None, "modifying a forced node doesn't make sense"
102 # explicitly not copying index. node indices are only assigned via
103 # ExampleRecord. This prevents footguns with relying on stale indices
104 # after copying.
105 return ChoiceNode(
106 type=self.type,
107 value=self.value if with_value is None else with_value,
108 constraints=(
109 self.constraints if with_constraints is None else with_constraints
110 ),
111 was_forced=self.was_forced,
112 )
113
114 @property
115 def trivial(self) -> bool:
116 """
117 A node is trivial if it cannot be simplified any further. This does not
118 mean that modifying a trivial node can't produce simpler test cases when
119 viewing the tree as a whole. Just that when viewing this node in
120 isolation, this is the simplest the node can get.
121 """
122 if self.was_forced:
123 return True
124
125 if self.type != "float":
126 zero_value = choice_from_index(0, self.type, self.constraints)
127 return choice_equal(self.value, zero_value)
128 else:
129 constraints = cast(FloatConstraints, self.constraints)
130 min_value = constraints["min_value"]
131 max_value = constraints["max_value"]
132 shrink_towards = 0.0
133
134 if min_value == -math.inf and max_value == math.inf:
135 return choice_equal(self.value, shrink_towards)
136
137 if (
138 not math.isinf(min_value)
139 and not math.isinf(max_value)
140 and math.ceil(min_value) <= math.floor(max_value)
141 ):
142 # the interval contains an integer. the simplest integer is the
143 # one closest to shrink_towards
144 shrink_towards = max(math.ceil(min_value), shrink_towards)
145 shrink_towards = min(math.floor(max_value), shrink_towards)
146 return choice_equal(self.value, float(shrink_towards))
147
148 # the real answer here is "the value in [min_value, max_value] with
149 # the lowest denominator when represented as a fraction".
150 # It would be good to compute this correctly in the future, but it's
151 # also not incorrect to be conservative here.
152 return False
153
154 def __eq__(self, other: object) -> bool:
155 if not isinstance(other, ChoiceNode):
156 return NotImplemented
157
158 return (
159 self.type == other.type
160 and choice_equal(self.value, other.value)
161 and choice_constraints_equal(self.type, self.constraints, other.constraints)
162 and self.was_forced == other.was_forced
163 )
164
165 def __hash__(self) -> int:
166 return hash(
167 (
168 self.type,
169 choice_key(self.value),
170 choice_constraints_key(self.type, self.constraints),
171 self.was_forced,
172 )
173 )
174
175 def __repr__(self) -> str:
176 forced_marker = " [forced]" if self.was_forced else ""
177 return f"{self.type} {self.value!r}{forced_marker} {self.constraints!r}"
178
179
180def _size_to_index(size: int, *, alphabet_size: int) -> int:
181 # this is the closed form of this geometric series:
182 # for i in range(size):
183 # index += alphabet_size**i
184 if alphabet_size <= 0:
185 assert size == 0
186 return 0
187 if alphabet_size == 1:
188 return size
189 v = (alphabet_size**size - 1) // (alphabet_size - 1)
190 # mypy thinks (m: int) // (n: int) -> Any. assert it back to int.
191 return cast(int, v)
192
193
194def _index_to_size(index: int, alphabet_size: int) -> int:
195 if alphabet_size == 0:
196 return 0
197 elif alphabet_size == 1:
198 # there is only one string of each size, so the size is equal to its
199 # ordering.
200 return index
201
202 # the closed-form inverse of _size_to_index is
203 # size = math.floor(math.log(index * (alphabet_size - 1) + 1, alphabet_size))
204 # which is fast, but suffers from float precision errors. As performance is
205 # relatively critical here, we'll use this formula by default, but fall back to
206 # a much slower integer-only logarithm when the calculation is too close for
207 # comfort.
208 total = index * (alphabet_size - 1) + 1
209 size = math.log(total, alphabet_size)
210
211 # if this computation is close enough that it could have been affected by
212 # floating point errors, use a much slower integer-only logarithm instead,
213 # which is guaranteed to be precise.
214 if 0 < math.ceil(size) - size < 1e-7:
215 s = 0
216 while total >= alphabet_size:
217 total //= alphabet_size
218 s += 1
219 return s
220 return math.floor(size)
221
222
223def collection_index(
224 choice: Sequence[T],
225 *,
226 min_size: int,
227 alphabet_size: int,
228 to_order: Callable[[T], int],
229) -> int:
230 # Collections are ordered by counting the number of values of each size,
231 # starting with min_size. alphabet_size indicates how many options there
232 # are for a single element. to_order orders an element by returning an n ≥ 0.
233
234 # we start by adding the size to the index, relative to min_size.
235 index = _size_to_index(len(choice), alphabet_size=alphabet_size) - _size_to_index(
236 min_size, alphabet_size=alphabet_size
237 )
238 # We then add each element c to the index, starting from the end (so "ab" is
239 # simpler than "ba"). Each loop takes c at position i in the sequence and
240 # computes the number of sequences of size i which come before it in the ordering.
241
242 # this running_exp computation is equivalent to doing
243 # index += (alphabet_size**i) * n
244 # but reuses intermediate exponentiation steps for efficiency.
245 running_exp = 1
246 for c in reversed(choice):
247 index += running_exp * to_order(c)
248 running_exp *= alphabet_size
249 return index
250
251
252def collection_value(
253 index: int,
254 *,
255 min_size: int,
256 alphabet_size: int,
257 from_order: Callable[[int], T],
258) -> list[T]:
259 from hypothesis.internal.conjecture.engine import BUFFER_SIZE
260
261 # this function is probably easiest to make sense of as an inverse of
262 # collection_index, tracking ~corresponding lines of code between the two.
263
264 index += _size_to_index(min_size, alphabet_size=alphabet_size)
265 size = _index_to_size(index, alphabet_size=alphabet_size)
266 # index -> value computation can be arbitrarily expensive for arbitrarily
267 # large min_size collections. short-circuit if the resulting size would be
268 # obviously-too-large. callers will generally turn this into a .mark_overrun().
269 if size >= BUFFER_SIZE:
270 raise ChoiceTooLarge
271
272 # subtract out the amount responsible for the size
273 index -= _size_to_index(size, alphabet_size=alphabet_size)
274 vals: list[T] = []
275 for i in reversed(range(size)):
276 # optimization for common case when we hit index 0. Exponentiation
277 # on large integers is expensive!
278 if index == 0:
279 n = 0
280 else:
281 n = index // (alphabet_size**i)
282 # subtract out the nearest multiple of alphabet_size**i
283 index -= n * (alphabet_size**i)
284 vals.append(from_order(n))
285 return vals
286
287
288def zigzag_index(value: int, *, shrink_towards: int) -> int:
289 # value | 0 1 -1 2 -2 3 -3 4
290 # index | 0 1 2 3 4 5 6 7
291 index = 2 * abs(shrink_towards - value)
292 if value > shrink_towards:
293 index -= 1
294 return index
295
296
297def zigzag_value(index: int, *, shrink_towards: int) -> int:
298 assert index >= 0
299 # count how many "steps" away from shrink_towards we are.
300 n = (index + 1) // 2
301 # now check if we're stepping up or down from shrink_towards.
302 if (index % 2) == 0:
303 n *= -1
304 return shrink_towards + n
305
306
307def choice_to_index(choice: ChoiceT, constraints: ChoiceConstraintsT) -> int:
308 # This function takes a choice in the choice sequence and returns the
309 # complexity index of that choice from among its possible values, where 0
310 # is the simplest.
311 #
312 # Note that the index of a choice depends on its constraints. The simplest value
313 # (at index 0) for {"min_value": None, "max_value": None} is 0, while for
314 # {"min_value": 1, "max_value": None} the simplest value is 1.
315 #
316 # choice_from_index inverts this function. An invariant on both functions is
317 # that they must be injective. Unfortunately, floats do not currently respect
318 # this. That's not *good*, but nothing has blown up - yet. And ordering
319 # floats in a sane manner is quite hard, so I've left it for another day.
320
321 if isinstance(choice, int) and not isinstance(choice, bool):
322 # Let a = shrink_towards.
323 # * Unbounded: Ordered by (|a - x|, sgn(a - x)). Think of a zigzag.
324 # [a, a + 1, a - 1, a + 2, a - 2, ...]
325 # * Semi-bounded: Same as unbounded, except stop on one side when you hit
326 # {min, max}_value. so min_value=-1 a=0 has order
327 # [0, 1, -1, 2, 3, 4, ...]
328 # * Bounded: Same as unbounded and semibounded, except stop on each side
329 # when you hit {min, max}_value.
330 #
331 # To simplify and gain intuition about this ordering, you can think about
332 # the most common case where 0 is first (a = 0). We deviate from this only
333 # rarely, e.g. for datetimes, where we generally want year 2000 to be
334 # simpler than year 0.
335 constraints = cast(IntegerConstraints, constraints)
336 shrink_towards = constraints["shrink_towards"]
337 min_value = constraints["min_value"]
338 max_value = constraints["max_value"]
339
340 if min_value is not None:
341 shrink_towards = max(min_value, shrink_towards)
342 if max_value is not None:
343 shrink_towards = min(max_value, shrink_towards)
344
345 if min_value is None and max_value is None:
346 # case: unbounded
347 return zigzag_index(choice, shrink_towards=shrink_towards)
348 elif min_value is not None and max_value is None:
349 # case: semibounded below
350
351 # min_value = -2
352 # index | 0 1 2 3 4 5 6 7
353 # v | 0 1 -1 2 -2 3 4 5
354 if abs(choice - shrink_towards) <= (shrink_towards - min_value):
355 return zigzag_index(choice, shrink_towards=shrink_towards)
356 return choice - min_value
357 elif max_value is not None and min_value is None:
358 # case: semibounded above
359 if abs(choice - shrink_towards) <= (max_value - shrink_towards):
360 return zigzag_index(choice, shrink_towards=shrink_towards)
361 return max_value - choice
362 else:
363 # case: bounded
364
365 # range = [-2, 5]
366 # shrink_towards = 2
367 # index | 0 1 2 3 4 5 6 7
368 # v | 2 3 1 4 0 5 -1 -2
369 #
370 # ^ with zero weights at index = [0, 2, 6]
371 # index | 0 1 2 3 4
372 # v | 3 4 0 5 -2
373
374 assert min_value is not None
375 assert max_value is not None
376 assert constraints["weights"] is None or all(
377 w > 0 for w in constraints["weights"].values()
378 ), "technically possible but really annoying to support zero weights"
379
380 # check which side gets exhausted first
381 if (shrink_towards - min_value) < (max_value - shrink_towards):
382 # Below shrink_towards gets exhausted first. Equivalent to
383 # semibounded below
384 if abs(choice - shrink_towards) <= (shrink_towards - min_value):
385 return zigzag_index(choice, shrink_towards=shrink_towards)
386 return choice - min_value
387 else:
388 # Above shrink_towards gets exhausted first. Equivalent to semibounded
389 # above
390 if abs(choice - shrink_towards) <= (max_value - shrink_towards):
391 return zigzag_index(choice, shrink_towards=shrink_towards)
392 return max_value - choice
393 elif isinstance(choice, bool):
394 constraints = cast(BooleanConstraints, constraints)
395 # Ordered by [False, True].
396 p = constraints["p"]
397 if not (2 ** (-64) < p < (1 - 2 ** (-64))):
398 # only one option is possible, so whatever it is is first.
399 return 0
400 return int(choice)
401 elif isinstance(choice, bytes):
402 constraints = cast(BytesConstraints, constraints)
403 return collection_index(
404 list(choice),
405 min_size=constraints["min_size"],
406 alphabet_size=2**8,
407 to_order=identity,
408 )
409 elif isinstance(choice, str):
410 constraints = cast(StringConstraints, constraints)
411 intervals = constraints["intervals"]
412 return collection_index(
413 choice,
414 min_size=constraints["min_size"],
415 alphabet_size=len(intervals),
416 to_order=intervals.index_from_char_in_shrink_order,
417 )
418 elif isinstance(choice, float):
419 sign = int(math.copysign(1.0, choice) < 0)
420 return (sign << 64) | float_to_lex(abs(choice))
421 else:
422 raise NotImplementedError
423
424
425def choice_from_index(
426 index: int, choice_type: ChoiceTypeT, constraints: ChoiceConstraintsT
427) -> ChoiceT:
428 assert index >= 0
429 if choice_type == "integer":
430 constraints = cast(IntegerConstraints, constraints)
431 shrink_towards = constraints["shrink_towards"]
432 min_value = constraints["min_value"]
433 max_value = constraints["max_value"]
434
435 if min_value is not None:
436 shrink_towards = max(min_value, shrink_towards)
437 if max_value is not None:
438 shrink_towards = min(max_value, shrink_towards)
439
440 if min_value is None and max_value is None:
441 # case: unbounded
442 return zigzag_value(index, shrink_towards=shrink_towards)
443 elif min_value is not None and max_value is None:
444 # case: semibounded below
445 if index <= zigzag_index(min_value, shrink_towards=shrink_towards):
446 return zigzag_value(index, shrink_towards=shrink_towards)
447 return index + min_value
448 elif max_value is not None and min_value is None:
449 # case: semibounded above
450 if index <= zigzag_index(max_value, shrink_towards=shrink_towards):
451 return zigzag_value(index, shrink_towards=shrink_towards)
452 return max_value - index
453 else:
454 # case: bounded
455 assert min_value is not None
456 assert max_value is not None
457 assert constraints["weights"] is None or all(
458 w > 0 for w in constraints["weights"].values()
459 ), "possible but really annoying to support zero weights"
460
461 if (shrink_towards - min_value) < (max_value - shrink_towards):
462 # equivalent to semibounded below case
463 if index <= zigzag_index(min_value, shrink_towards=shrink_towards):
464 return zigzag_value(index, shrink_towards=shrink_towards)
465 return index + min_value
466 else:
467 # equivalent to semibounded above case
468 if index <= zigzag_index(max_value, shrink_towards=shrink_towards):
469 return zigzag_value(index, shrink_towards=shrink_towards)
470 return max_value - index
471 elif choice_type == "boolean":
472 constraints = cast(BooleanConstraints, constraints)
473 # Ordered by [False, True].
474 p = constraints["p"]
475 only = None
476 if p <= 2 ** (-64):
477 only = False
478 elif p >= (1 - 2 ** (-64)):
479 only = True
480
481 assert index in {0, 1}
482 if only is not None:
483 # only one choice
484 assert index == 0
485 return only
486 return bool(index)
487 elif choice_type == "bytes":
488 constraints = cast(BytesConstraints, constraints)
489 value_b = collection_value(
490 index,
491 min_size=constraints["min_size"],
492 alphabet_size=2**8,
493 from_order=identity,
494 )
495 return bytes(value_b)
496 elif choice_type == "string":
497 constraints = cast(StringConstraints, constraints)
498 intervals = constraints["intervals"]
499 # _s because mypy is unhappy with reusing different-typed names in branches,
500 # even if the branches are disjoint.
501 value_s = collection_value(
502 index,
503 min_size=constraints["min_size"],
504 alphabet_size=len(intervals),
505 from_order=intervals.char_in_shrink_order,
506 )
507 return "".join(value_s)
508 elif choice_type == "float":
509 constraints = cast(FloatConstraints, constraints)
510 sign = -1 if index >> 64 else 1
511 result = sign * lex_to_float(index & ((1 << 64) - 1))
512
513 clamper = make_float_clamper(
514 min_value=constraints["min_value"],
515 max_value=constraints["max_value"],
516 smallest_nonzero_magnitude=constraints["smallest_nonzero_magnitude"],
517 allow_nan=constraints["allow_nan"],
518 )
519 return clamper(result)
520 else:
521 raise NotImplementedError
522
523
524def choice_permitted(choice: ChoiceT, constraints: ChoiceConstraintsT) -> bool:
525 if isinstance(choice, int) and not isinstance(choice, bool):
526 constraints = cast(IntegerConstraints, constraints)
527 min_value = constraints["min_value"]
528 max_value = constraints["max_value"]
529 if min_value is not None and choice < min_value:
530 return False
531 return not (max_value is not None and choice > max_value)
532 elif isinstance(choice, float):
533 constraints = cast(FloatConstraints, constraints)
534 if math.isnan(choice):
535 return constraints["allow_nan"]
536 if 0 < abs(choice) < constraints["smallest_nonzero_magnitude"]:
537 return False
538 return sign_aware_lte(constraints["min_value"], choice) and sign_aware_lte(
539 choice, constraints["max_value"]
540 )
541 elif isinstance(choice, str):
542 constraints = cast(StringConstraints, constraints)
543 if len(choice) < constraints["min_size"]:
544 return False
545 if (
546 constraints["max_size"] is not None
547 and len(choice) > constraints["max_size"]
548 ):
549 return False
550 return all(ord(c) in constraints["intervals"] for c in choice)
551 elif isinstance(choice, bytes):
552 constraints = cast(BytesConstraints, constraints)
553 if len(choice) < constraints["min_size"]:
554 return False
555 return constraints["max_size"] is None or len(choice) <= constraints["max_size"]
556 elif isinstance(choice, bool):
557 constraints = cast(BooleanConstraints, constraints)
558 if constraints["p"] <= 0:
559 return choice is False
560 if constraints["p"] >= 1:
561 return choice is True
562 return True
563 else:
564 raise NotImplementedError(f"unhandled type {type(choice)} with value {choice}")
565
566
567def choices_key(choices: Sequence[ChoiceT]) -> tuple[ChoiceKeyT, ...]:
568 return tuple(choice_key(choice) for choice in choices)
569
570
571def choice_key(choice: ChoiceT) -> ChoiceKeyT:
572 if isinstance(choice, float):
573 # float_to_int to distinguish -0.0/0.0, signaling/nonsignaling nans, etc,
574 # and then add a "float" key to avoid colliding with actual integers.
575 return ("float", float_to_int(choice))
576 if isinstance(choice, bool):
577 # avoid choice_key(0) == choice_key(False)
578 return ("bool", choice)
579 return choice
580
581
582def choice_equal(choice1: ChoiceT, choice2: ChoiceT) -> bool:
583 assert type(choice1) is type(choice2), (choice1, choice2)
584 return choice_key(choice1) == choice_key(choice2)
585
586
587def choice_constraints_equal(
588 choice_type: ChoiceTypeT,
589 constraints1: ChoiceConstraintsT,
590 constraints2: ChoiceConstraintsT,
591) -> bool:
592 return choice_constraints_key(choice_type, constraints1) == choice_constraints_key(
593 choice_type, constraints2
594 )
595
596
597def choice_constraints_key(
598 choice_type: ChoiceTypeT, constraints: ChoiceConstraintsT
599) -> tuple[Hashable, ...]:
600 if choice_type == "float":
601 constraints = cast(FloatConstraints, constraints)
602 return (
603 float_to_int(constraints["min_value"]),
604 float_to_int(constraints["max_value"]),
605 constraints["allow_nan"],
606 constraints["smallest_nonzero_magnitude"],
607 )
608 if choice_type == "integer":
609 constraints = cast(IntegerConstraints, constraints)
610 return (
611 constraints["min_value"],
612 constraints["max_value"],
613 None if constraints["weights"] is None else tuple(constraints["weights"]),
614 constraints["shrink_towards"],
615 )
616 return tuple(constraints[key] for key in sorted(constraints)) # type: ignore
617
618
619def choices_size(choices: Iterable[ChoiceT]) -> int:
620 from hypothesis.database import choices_to_bytes
621
622 return len(choices_to_bytes(choices))