1# This file is part of Hypothesis, which may be found at
2# https://github.com/HypothesisWorks/hypothesis/
3#
4# Copyright the Hypothesis Authors.
5# Individual contributors are listed in AUTHORS.rst and the git log.
6#
7# This Source Code Form is subject to the terms of the Mozilla Public License,
8# v. 2.0. If a copy of the MPL was not distributed with this file, You can
9# obtain one at https://mozilla.org/MPL/2.0/.
10
11import math
12from collections import defaultdict
13from collections.abc import Callable, Sequence
14from dataclasses import dataclass
15from typing import (
16 TYPE_CHECKING,
17 Any,
18 Literal,
19 TypeAlias,
20 cast,
21)
22
23from hypothesis.internal.conjecture.choice import (
24 ChoiceNode,
25 ChoiceT,
26 choice_equal,
27 choice_from_index,
28 choice_key,
29 choice_permitted,
30 choice_to_index,
31)
32from hypothesis.internal.conjecture.data import (
33 ConjectureData,
34 ConjectureResult,
35 Spans,
36 Status,
37 _Overrun,
38 draw_choice,
39)
40from hypothesis.internal.conjecture.junkdrawer import (
41 endswith,
42 find_integer,
43 replace_all,
44 startswith,
45)
46from hypothesis.internal.conjecture.shrinking import (
47 Bytes,
48 Float,
49 Integer,
50 Ordering,
51 String,
52)
53from hypothesis.internal.conjecture.shrinking.choicetree import (
54 ChoiceTree,
55 prefix_selection_order,
56 random_selection_order,
57)
58from hypothesis.internal.floats import MAX_PRECISE_INTEGER
59
60if TYPE_CHECKING:
61 from random import Random
62
63 from hypothesis.internal.conjecture.engine import ConjectureRunner
64
65ShrinkPredicateT: TypeAlias = Callable[[ConjectureResult | _Overrun], bool]
66
67
68def sort_key(nodes: Sequence[ChoiceNode]) -> tuple[int, tuple[int, ...]]:
69 """Returns a sort key such that "simpler" choice sequences are smaller than
70 "more complicated" ones.
71
72 We define sort_key so that x is simpler than y if x is shorter than y or if
73 they have the same length and map(choice_to_index, x) < map(choice_to_index, y).
74
75 The reason for using this ordering is:
76
77 1. If x is shorter than y then that means we had to make fewer decisions
78 in constructing the test case when we ran x than we did when we ran y.
79 2. If x is the same length as y then replacing a choice with a lower index
80 choice corresponds to replacing it with a simpler/smaller choice.
81 3. Because choices drawn early in generation potentially get used in more
82 places they potentially have a more significant impact on the final
83 result, so it makes sense to prioritise reducing earlier choices over
84 later ones.
85 """
86 return (
87 len(nodes),
88 tuple(choice_to_index(node.value, node.constraints) for node in nodes),
89 )
90
91
92@dataclass(slots=True, frozen=False)
93class ShrinkPass:
94 function: Any
95 name: str | None = None
96 last_prefix: Any = ()
97
98 # some execution statistics
99 calls: int = 0
100 misaligned: int = 0
101 shrinks: int = 0
102 deletions: int = 0
103
104 def __post_init__(self):
105 if self.name is None:
106 self.name = self.function.__name__
107
108 def __hash__(self):
109 return hash(self.name)
110
111
112class StopShrinking(Exception):
113 pass
114
115
116class Shrinker:
117 """A shrinker is a child object of a ConjectureRunner which is designed to
118 manage the associated state of a particular shrink problem. That is, we
119 have some initial ConjectureData object and some property of interest
120 that it satisfies, and we want to find a ConjectureData object with a
121 shortlex (see sort_key above) smaller choice sequence that exhibits the same
122 property.
123
124 Currently the only property of interest we use is that the status is
125 INTERESTING and the interesting_origin takes on some fixed value, but we
126 may potentially be interested in other use cases later.
127 However we assume that data with a status < VALID never satisfies the predicate.
128
129 The shrinker keeps track of a value shrink_target which represents the
130 current best known ConjectureData object satisfying the predicate.
131 It refines this value by repeatedly running *shrink passes*, which are
132 methods that perform a series of transformations to the current shrink_target
133 and evaluate the underlying test function to find new ConjectureData
134 objects. If any of these satisfy the predicate, the shrink_target
135 is updated automatically. Shrinking runs until no shrink pass can
136 improve the shrink_target, at which point it stops. It may also be
137 terminated if the underlying engine throws RunIsComplete, but that
138 is handled by the calling code rather than the Shrinker.
139
140 =======================
141 Designing Shrink Passes
142 =======================
143
144 Generally a shrink pass is just any function that calls
145 cached_test_function and/or consider_new_nodes a number of times,
146 but there are a couple of useful things to bear in mind.
147
148 A shrink pass *makes progress* if running it changes self.shrink_target
149 (i.e. it tries a shortlex smaller ConjectureData object satisfying
150 the predicate). The desired end state of shrinking is to find a
151 value such that no shrink pass can make progress, i.e. that we
152 are at a local minimum for each shrink pass.
153
154 In aid of this goal, the main invariant that a shrink pass much
155 satisfy is that whether it makes progress must be deterministic.
156 It is fine (encouraged even) for the specific progress it makes
157 to be non-deterministic, but if you run a shrink pass, it makes
158 no progress, and then you immediately run it again, it should
159 never succeed on the second time. This allows us to stop as soon
160 as we have run each shrink pass and seen no progress on any of
161 them.
162
163 This means that e.g. it's fine to try each of N deletions
164 or replacements in a random order, but it's not OK to try N random
165 deletions (unless you have already shrunk at least once, though we
166 don't currently take advantage of this loophole).
167
168 Shrink passes need to be written so as to be robust against
169 change in the underlying shrink target. It is generally safe
170 to assume that the shrink target does not change prior to the
171 point of first modification - e.g. if you change no bytes at
172 index ``i``, all spans whose start is ``<= i`` still exist,
173 as do all blocks, and the data object is still of length
174 ``>= i + 1``. This can only be violated by bad user code which
175 relies on an external source of non-determinism.
176
177 When the underlying shrink_target changes, shrink
178 passes should not run substantially more test_function calls
179 on success than they do on failure. Say, no more than a constant
180 factor more. In particular shrink passes should not iterate to a
181 fixed point.
182
183 This means that shrink passes are often written with loops that
184 are carefully designed to do the right thing in the case that no
185 shrinks occurred and try to adapt to any changes to do a reasonable
186 job. e.g. say we wanted to write a shrink pass that tried deleting
187 each individual choice (this isn't an especially good pass,
188 but it leads to a simple illustrative example), we might do it
189 by iterating over the choice sequence like so:
190
191 .. code-block:: python
192
193 i = 0
194 while i < len(self.shrink_target.nodes):
195 if not self.consider_new_nodes(
196 self.shrink_target.nodes[:i] + self.shrink_target.nodes[i + 1 :]
197 ):
198 i += 1
199
200 The reason for writing the loop this way is that i is always a
201 valid index into the current choice sequence, even if the current sequence
202 changes as a result of our actions. When the choice sequence changes,
203 we leave the index where it is rather than restarting from the
204 beginning, and carry on. This means that the number of steps we
205 run in this case is always bounded above by the number of steps
206 we would run if nothing works.
207
208 Another thing to bear in mind about shrink pass design is that
209 they should prioritise *progress*. If you have N operations that
210 you need to run, you should try to order them in such a way as
211 to avoid stalling, where you have long periods of test function
212 invocations where no shrinks happen. This is bad because whenever
213 we shrink we reduce the amount of work the shrinker has to do
214 in future, and often speed up the test function, so we ideally
215 wanted those shrinks to happen much earlier in the process.
216
217 Sometimes stalls are inevitable of course - e.g. if the pass
218 makes no progress, then the entire thing is just one long stall,
219 but it's helpful to design it so that stalls are less likely
220 in typical behaviour.
221
222 The two easiest ways to do this are:
223
224 * Just run the N steps in random order. As long as a
225 reasonably large proportion of the operations succeed, this
226 guarantees the expected stall length is quite short. The
227 book keeping for making sure this does the right thing when
228 it succeeds can be quite annoying.
229 * When you have any sort of nested loop, loop in such a way
230 that both loop variables change each time. This prevents
231 stalls which occur when one particular value for the outer
232 loop is impossible to make progress on, rendering the entire
233 inner loop into a stall.
234
235 However, although progress is good, too much progress can be
236 a bad sign! If you're *only* seeing successful reductions,
237 that's probably a sign that you are making changes that are
238 too timid. Two useful things to offset this:
239
240 * It's worth writing shrink passes which are *adaptive*, in
241 the sense that when operations seem to be working really
242 well we try to bundle multiple of them together. This can
243 often be used to turn what would be O(m) successful calls
244 into O(log(m)).
245 * It's often worth trying one or two special minimal values
246 before trying anything more fine grained (e.g. replacing
247 the whole thing with zero).
248
249 """
250
251 def derived_value(fn):
252 """It's useful during shrinking to have access to derived values of
253 the current shrink target.
254
255 This decorator allows you to define these as cached properties. They
256 are calculated once, then cached until the shrink target changes, then
257 recalculated the next time they are used."""
258
259 def accept(self):
260 try:
261 return self.__derived_values[fn.__name__]
262 except KeyError:
263 return self.__derived_values.setdefault(fn.__name__, fn(self))
264
265 accept.__name__ = fn.__name__
266 return property(accept)
267
268 def __init__(
269 self,
270 engine: "ConjectureRunner",
271 initial: ConjectureData | ConjectureResult,
272 predicate: ShrinkPredicateT | None,
273 *,
274 allow_transition: (
275 Callable[[ConjectureData | ConjectureResult, ConjectureData], bool] | None
276 ),
277 explain: bool,
278 in_target_phase: bool = False,
279 ):
280 """Create a shrinker for a particular engine, with a given starting
281 point and predicate. When shrink() is called it will attempt to find an
282 example for which predicate is True and which is strictly smaller than
283 initial.
284
285 Note that initial is a ConjectureData object, and predicate
286 takes ConjectureData objects.
287 """
288 assert predicate is not None or allow_transition is not None
289 self.engine = engine
290 self.__predicate = predicate or (lambda data: True)
291 self.__allow_transition = allow_transition or (lambda source, destination: True)
292 self.__derived_values: dict = {}
293
294 self.initial_size = len(initial.choices)
295 # We keep track of the current best example on the shrink_target
296 # attribute.
297 self.shrink_target = initial
298 self.clear_change_tracking()
299 self.shrinks = 0
300
301 # We terminate shrinks that seem to have reached their logical
302 # conclusion: If we've called the underlying test function at
303 # least self.max_stall times since the last time we shrunk,
304 # it's time to stop shrinking.
305 self.max_stall = 200
306 self.initial_calls = self.engine.call_count
307 self.initial_misaligned = self.engine.misaligned_count
308 self.calls_at_last_shrink = self.initial_calls
309
310 self.shrink_passes: list[ShrinkPass] = [
311 ShrinkPass(self.try_trivial_spans),
312 self.node_program("X" * 5),
313 self.node_program("X" * 4),
314 self.node_program("X" * 3),
315 self.node_program("X" * 2),
316 self.node_program("X" * 1),
317 ShrinkPass(self.pass_to_descendant),
318 ShrinkPass(self.reorder_spans),
319 ShrinkPass(self.minimize_duplicated_choices),
320 ShrinkPass(self.minimize_individual_choices),
321 ShrinkPass(self.redistribute_numeric_pairs),
322 ShrinkPass(self.lower_integers_together),
323 ShrinkPass(self.lower_duplicated_characters),
324 ]
325
326 # Because the shrinker is also used to `pareto_optimise` in the target phase,
327 # we sometimes want to allow extending buffers instead of aborting at the end.
328 self.__extend: Literal["full"] | int = "full" if in_target_phase else 0
329 self.should_explain = explain
330
331 @derived_value # type: ignore
332 def cached_calculations(self):
333 return {}
334
335 def cached(self, *keys):
336 def accept(f):
337 cache_key = (f.__name__, *keys)
338 try:
339 return self.cached_calculations[cache_key]
340 except KeyError:
341 return self.cached_calculations.setdefault(cache_key, f())
342
343 return accept
344
345 @property
346 def calls(self) -> int:
347 """Return the number of calls that have been made to the underlying
348 test function."""
349 return self.engine.call_count
350
351 @property
352 def misaligned(self) -> int:
353 return self.engine.misaligned_count
354
355 def check_calls(self) -> None:
356 if self.calls - self.calls_at_last_shrink >= self.max_stall:
357 raise StopShrinking
358
359 def cached_test_function(
360 self, nodes: Sequence[ChoiceNode]
361 ) -> tuple[bool, ConjectureResult | _Overrun | None]:
362 nodes = nodes[: len(self.nodes)]
363
364 if startswith(nodes, self.nodes):
365 return (True, None)
366
367 if sort_key(self.nodes) < sort_key(nodes):
368 return (False, None)
369
370 # sometimes our shrinking passes try obviously invalid things. We handle
371 # discarding them in one place here.
372 if any(not choice_permitted(node.value, node.constraints) for node in nodes):
373 return (False, None)
374
375 result = self.engine.cached_test_function(
376 [n.value for n in nodes], extend=self.__extend
377 )
378 previous = self.shrink_target
379 self.incorporate_test_data(result)
380 self.check_calls()
381 return (previous is not self.shrink_target, result)
382
383 def consider_new_nodes(self, nodes: Sequence[ChoiceNode]) -> bool:
384 return self.cached_test_function(nodes)[0]
385
386 def incorporate_test_data(self, data):
387 """Takes a ConjectureData or Overrun object updates the current
388 shrink_target if this data represents an improvement over it."""
389 if data.status < Status.VALID or data is self.shrink_target:
390 return
391 if (
392 self.__predicate(data)
393 and sort_key(data.nodes) < sort_key(self.shrink_target.nodes)
394 and self.__allow_transition(self.shrink_target, data)
395 ):
396 self.update_shrink_target(data)
397
398 def debug(self, msg: str) -> None:
399 self.engine.debug(msg)
400
401 @property
402 def random(self) -> "Random":
403 return self.engine.random
404
405 def shrink(self) -> None:
406 """Run the full set of shrinks and update shrink_target.
407
408 This method is "mostly idempotent" - calling it twice is unlikely to
409 have any effect, though it has a non-zero probability of doing so.
410 """
411
412 try:
413 self.initial_coarse_reduction()
414 self.greedy_shrink()
415 except StopShrinking:
416 # If we stopped shrinking because we're making slow progress (instead of
417 # reaching a local optimum), don't run the explain-phase logic.
418 self.should_explain = False
419 finally:
420 if self.engine.report_debug_info:
421
422 def s(n):
423 return "s" if n != 1 else ""
424
425 total_deleted = self.initial_size - len(self.shrink_target.choices)
426 calls = self.engine.call_count - self.initial_calls
427 misaligned = self.engine.misaligned_count - self.initial_misaligned
428
429 self.debug(
430 "---------------------\n"
431 "Shrink pass profiling\n"
432 "---------------------\n\n"
433 f"Shrinking made a total of {calls} call{s(calls)} of which "
434 f"{self.shrinks} shrank and {misaligned} were misaligned. This "
435 f"deleted {total_deleted} choices out of {self.initial_size}."
436 )
437 for useful in [True, False]:
438 self.debug("")
439 if useful:
440 self.debug("Useful passes:")
441 else:
442 self.debug("Useless passes:")
443 self.debug("")
444 for pass_ in sorted(
445 self.shrink_passes,
446 key=lambda t: (-t.calls, t.deletions, t.shrinks),
447 ):
448 if pass_.calls == 0:
449 continue
450 if (pass_.shrinks != 0) != useful:
451 continue
452
453 self.debug(
454 f" * {pass_.name} made {pass_.calls} call{s(pass_.calls)} of which "
455 f"{pass_.shrinks} shrank and {pass_.misaligned} were misaligned, "
456 f"deleting {pass_.deletions} choice{s(pass_.deletions)}."
457 )
458 self.debug("")
459 self.explain()
460
461 def explain(self) -> None:
462
463 if not self.should_explain or not self.shrink_target.arg_slices:
464 return
465
466 self.max_stall = 2**100
467 shrink_target = self.shrink_target
468 nodes = self.nodes
469 choices = self.choices
470 chunks: dict[tuple[int, int], list[tuple[ChoiceT, ...]]] = defaultdict(list)
471
472 # Before we start running experiments, let's check for known inputs which would
473 # make them redundant. The shrinking process means that we've already tried many
474 # variations on the minimal example, so this can save a lot of time.
475 seen_passing_seq = self.engine.passing_choice_sequences(
476 prefix=self.nodes[: min(self.shrink_target.arg_slices)[0]]
477 )
478
479 # Now that we've shrunk to a minimal failing example, it's time to try
480 # varying each part that we've noted will go in the final report. Consider
481 # slices in largest-first order
482 for start, end in sorted(
483 self.shrink_target.arg_slices, key=lambda x: (-(x[1] - x[0]), x)
484 ):
485 # Check for any previous examples that match the prefix and suffix,
486 # so we can skip if we found a passing example while shrinking.
487 if any(
488 startswith(seen, nodes[:start]) and endswith(seen, nodes[end:])
489 for seen in seen_passing_seq
490 ):
491 continue
492
493 # Run our experiments
494 n_same_failures = 0
495 note = "or any other generated value"
496 # TODO: is 100 same-failures out of 500 attempts a good heuristic?
497 for n_attempt in range(500): # pragma: no branch
498 # no-branch here because we don't coverage-test the abort-at-500 logic.
499
500 if n_attempt - 10 > n_same_failures * 5:
501 # stop early if we're seeing mostly invalid examples
502 break # pragma: no cover
503
504 # replace start:end with random values
505 replacement = []
506 for i in range(start, end):
507 node = nodes[i]
508 if not node.was_forced:
509 value = draw_choice(
510 node.type, node.constraints, random=self.random
511 )
512 node = node.copy(with_value=value)
513 replacement.append(node.value)
514
515 attempt = choices[:start] + tuple(replacement) + choices[end:]
516 result = self.engine.cached_test_function(attempt, extend="full")
517
518 if result.status is Status.OVERRUN:
519 continue # pragma: no cover # flakily covered
520 result = cast(ConjectureResult, result)
521 if not (
522 len(attempt) == len(result.choices)
523 and endswith(result.nodes, nodes[end:])
524 ):
525 # Turns out this was a variable-length part, so grab the infix...
526 for span1, span2 in zip(
527 shrink_target.spans, result.spans, strict=False
528 ):
529 assert span1.start == span2.start
530 assert span1.start <= start
531 assert span1.label == span2.label
532 if span1.start == start and span1.end == end:
533 result_end = span2.end
534 break
535 else:
536 raise NotImplementedError("Expected matching prefixes")
537
538 attempt = (
539 choices[:start]
540 + result.choices[start:result_end]
541 + choices[end:]
542 )
543 chunks[(start, end)].append(result.choices[start:result_end])
544 result = self.engine.cached_test_function(attempt)
545
546 if result.status is Status.OVERRUN:
547 continue # pragma: no cover # flakily covered
548 result = cast(ConjectureResult, result)
549 else:
550 chunks[(start, end)].append(result.choices[start:end])
551
552 if shrink_target is not self.shrink_target: # pragma: no cover
553 # If we've shrunk further without meaning to, bail out.
554 self.shrink_target.slice_comments.clear()
555 return
556 if result.status is Status.VALID:
557 # The test passed, indicating that this param can't vary freely.
558 # However, it's really hard to write a simple and reliable covering
559 # test, because of our `seen_passing_buffers` check above.
560 break # pragma: no cover
561 if self.__predicate(result): # pragma: no branch
562 n_same_failures += 1
563 if n_same_failures >= 100:
564 self.shrink_target.slice_comments[(start, end)] = note
565 break
566
567 # Finally, if we've found multiple independently-variable parts, check whether
568 # they can all be varied together.
569 if len(self.shrink_target.slice_comments) <= 1:
570 return
571 n_same_failures_together = 0
572 chunks_by_start_index = sorted(chunks.items())
573 for _ in range(500): # pragma: no branch
574 # no-branch here because we don't coverage-test the abort-at-500 logic.
575 new_choices: list[ChoiceT] = []
576 prev_end = 0
577 for (start, end), ls in chunks_by_start_index:
578 assert prev_end <= start < end, "these chunks must be nonoverlapping"
579 new_choices.extend(choices[prev_end:start])
580 new_choices.extend(self.random.choice(ls))
581 prev_end = end
582
583 result = self.engine.cached_test_function(new_choices)
584
585 # This *can't* be a shrink because none of the components were.
586 assert shrink_target is self.shrink_target
587 if result.status == Status.VALID:
588 self.shrink_target.slice_comments[(0, 0)] = (
589 "The test sometimes passed when commented parts were varied together."
590 )
591 break # Test passed, this param can't vary freely.
592 if self.__predicate(result): # pragma: no branch
593 n_same_failures_together += 1
594 if n_same_failures_together >= 100:
595 self.shrink_target.slice_comments[(0, 0)] = (
596 "The test always failed when commented parts were varied together."
597 )
598 break
599
600 def greedy_shrink(self) -> None:
601 """Run a full set of greedy shrinks (that is, ones that will only ever
602 move to a better target) and update shrink_target appropriately.
603
604 This method iterates to a fixed point and so is idempontent - calling
605 it twice will have exactly the same effect as calling it once.
606 """
607 self.fixate_shrink_passes(self.shrink_passes)
608
609 def initial_coarse_reduction(self):
610 """Performs some preliminary reductions that should not be
611 repeated as part of the main shrink passes.
612
613 The main reason why these can't be included as part of shrink
614 passes is that they have much more ability to make the test
615 case "worse". e.g. they might rerandomise part of it, significantly
616 increasing the value of individual nodes, which works in direct
617 opposition to the lexical shrinking and will frequently undo
618 its work.
619 """
620 self.reduce_each_alternative()
621
622 @derived_value # type: ignore
623 def spans_starting_at(self):
624 result = [[] for _ in self.shrink_target.nodes]
625 for i, ex in enumerate(self.spans):
626 # We can have zero-length spans that start at the end
627 if ex.start < len(result):
628 result[ex.start].append(i)
629 return tuple(map(tuple, result))
630
631 def reduce_each_alternative(self):
632 """This is a pass that is designed to rerandomise use of the
633 one_of strategy or things that look like it, in order to try
634 to move from later strategies to earlier ones in the branch
635 order.
636
637 It does this by trying to systematically lower each value it
638 finds that looks like it might be the branch decision for
639 one_of, and then attempts to repair any changes in shape that
640 this causes.
641 """
642 i = 0
643 while i < len(self.shrink_target.nodes):
644 nodes = self.shrink_target.nodes
645 node = nodes[i]
646 if (
647 node.type == "integer"
648 and not node.was_forced
649 and node.value <= 10
650 and node.constraints["min_value"] == 0
651 ):
652 assert isinstance(node.value, int)
653
654 # We've found a plausible candidate for a ``one_of`` choice.
655 # We now want to see if the shape of the test case actually depends
656 # on it. If it doesn't, then we don't need to do this (comparatively
657 # costly) pass, and can let much simpler lexicographic reduction
658 # handle it later.
659 #
660 # We test this by trying to set the value to zero and seeing if the
661 # shape changes, as measured by either changing the number of subsequent
662 # nodes, or changing the nodes in such a way as to cause one of the
663 # previous values to no longer be valid in its position.
664 zero_attempt = self.cached_test_function(
665 nodes[:i] + (nodes[i].copy(with_value=0),) + nodes[i + 1 :]
666 )[1]
667 if (
668 zero_attempt is not self.shrink_target
669 and zero_attempt is not None
670 and zero_attempt.status >= Status.VALID
671 ):
672 changed_shape = len(zero_attempt.nodes) != len(nodes)
673
674 if not changed_shape:
675 for j in range(i + 1, len(nodes)):
676 zero_node = zero_attempt.nodes[j]
677 orig_node = nodes[j]
678 if (
679 zero_node.type != orig_node.type
680 or not choice_permitted(
681 orig_node.value, zero_node.constraints
682 )
683 ):
684 changed_shape = True
685 break
686 if changed_shape:
687 for v in range(node.value):
688 if self.try_lower_node_as_alternative(i, v):
689 break
690 i += 1
691
692 def try_lower_node_as_alternative(self, i, v):
693 """Attempt to lower `self.shrink_target.nodes[i]` to `v`,
694 while rerandomising and attempting to repair any subsequent
695 changes to the shape of the test case that this causes."""
696 nodes = self.shrink_target.nodes
697 if self.consider_new_nodes(
698 nodes[:i] + (nodes[i].copy(with_value=v),) + nodes[i + 1 :]
699 ):
700 return True
701
702 prefix = nodes[:i] + (nodes[i].copy(with_value=v),)
703 initial = self.shrink_target
704 spans = self.spans_starting_at[i]
705 for _ in range(3):
706 random_attempt = self.engine.cached_test_function(
707 [n.value for n in prefix], extend=len(nodes)
708 )
709 if random_attempt.status < Status.VALID:
710 continue
711 self.incorporate_test_data(random_attempt)
712 for j in spans:
713 initial_span = initial.spans[j]
714 attempt_span = random_attempt.spans[j]
715 contents = random_attempt.nodes[attempt_span.start : attempt_span.end]
716 self.consider_new_nodes(
717 nodes[:i] + contents + nodes[initial_span.end :]
718 )
719 if initial is not self.shrink_target:
720 return True
721 return False
722
723 @derived_value # type: ignore
724 def shrink_pass_choice_trees(self) -> dict[Any, ChoiceTree]:
725 return defaultdict(ChoiceTree)
726
727 def step(self, shrink_pass: ShrinkPass, *, random_order: bool = False) -> bool:
728 tree = self.shrink_pass_choice_trees[shrink_pass]
729 if tree.exhausted:
730 return False
731
732 initial_shrinks = self.shrinks
733 initial_calls = self.calls
734 initial_misaligned = self.misaligned
735 size = len(self.shrink_target.choices)
736 assert shrink_pass.name is not None
737 self.engine.explain_next_call_as(shrink_pass.name)
738
739 if random_order:
740 selection_order = random_selection_order(self.random)
741 else:
742 selection_order = prefix_selection_order(shrink_pass.last_prefix)
743
744 try:
745 shrink_pass.last_prefix = tree.step(
746 selection_order,
747 lambda chooser: shrink_pass.function(chooser),
748 )
749 finally:
750 shrink_pass.calls += self.calls - initial_calls
751 shrink_pass.misaligned += self.misaligned - initial_misaligned
752 shrink_pass.shrinks += self.shrinks - initial_shrinks
753 shrink_pass.deletions += size - len(self.shrink_target.choices)
754 self.engine.clear_call_explanation()
755 return True
756
757 def fixate_shrink_passes(self, passes: list[ShrinkPass]) -> None:
758 """Run steps from each pass in ``passes`` until the current shrink target
759 is a fixed point of all of them."""
760 any_ran = True
761 while any_ran:
762 any_ran = False
763
764 reordering = {}
765
766 # We run remove_discarded after every pass to do cleanup
767 # keeping track of whether that actually works. Either there is
768 # no discarded data and it is basically free, or it reliably works
769 # and deletes data, or it doesn't work. In that latter case we turn
770 # it off for the rest of this loop through the passes, but will
771 # try again once all of the passes have been run.
772 can_discard = self.remove_discarded()
773
774 calls_at_loop_start = self.calls
775
776 # We keep track of how many calls can be made by a single step
777 # without making progress and use this to test how much to pad
778 # out self.max_stall by as we go along.
779 max_calls_per_failing_step = 1
780
781 for sp in passes:
782 if can_discard:
783 can_discard = self.remove_discarded()
784
785 before_sp = self.shrink_target
786
787 # Run the shrink pass until it fails to make any progress
788 # max_failures times in a row. This implicitly boosts shrink
789 # passes that are more likely to work.
790 failures = 0
791 max_failures = 20
792 while failures < max_failures:
793 # We don't allow more than max_stall consecutive failures
794 # to shrink, but this means that if we're unlucky and the
795 # shrink passes are in a bad order where only the ones at
796 # the end are useful, if we're not careful this heuristic
797 # might stop us before we've tried everything. In order to
798 # avoid that happening, we make sure that there's always
799 # plenty of breathing room to make it through a single
800 # iteration of the fixate_shrink_passes loop.
801 self.max_stall = max(
802 self.max_stall,
803 2 * max_calls_per_failing_step
804 + (self.calls - calls_at_loop_start),
805 )
806
807 prev = self.shrink_target
808 initial_calls = self.calls
809 # It's better for us to run shrink passes in a deterministic
810 # order, to avoid repeat work, but this can cause us to create
811 # long stalls when there are a lot of steps which fail to do
812 # anything useful. In order to avoid this, once we've noticed
813 # we're in a stall (i.e. half of max_failures calls have failed
814 # to do anything) we switch to randomly jumping around. If we
815 # find a success then we'll resume deterministic order from
816 # there which, with any luck, is in a new good region.
817 if not self.step(sp, random_order=failures >= max_failures // 2):
818 # step returns False when there is nothing to do because
819 # the entire choice tree is exhausted. If this happens
820 # we break because we literally can't run this pass any
821 # more than we already have until something else makes
822 # progress.
823 break
824 any_ran = True
825
826 # Don't count steps that didn't actually try to do
827 # anything as failures. Otherwise, this call is a failure
828 # if it failed to make any changes to the shrink target.
829 if initial_calls != self.calls:
830 if prev is not self.shrink_target:
831 failures = 0
832 else:
833 max_calls_per_failing_step = max(
834 max_calls_per_failing_step, self.calls - initial_calls
835 )
836 failures += 1
837
838 # We reorder the shrink passes so that on our next run through
839 # we try good ones first. The rule is that shrink passes that
840 # did nothing useful are the worst, shrink passes that reduced
841 # the length are the best.
842 if self.shrink_target is before_sp:
843 reordering[sp] = 1
844 elif len(self.choices) < len(before_sp.choices):
845 reordering[sp] = -1
846 else:
847 reordering[sp] = 0
848
849 passes.sort(key=reordering.__getitem__)
850
851 @property
852 def nodes(self) -> tuple[ChoiceNode, ...]:
853 return self.shrink_target.nodes
854
855 @property
856 def choices(self) -> tuple[ChoiceT, ...]:
857 return self.shrink_target.choices
858
859 @property
860 def spans(self) -> Spans:
861 return self.shrink_target.spans
862
863 @derived_value # type: ignore
864 def spans_by_label(self):
865 """
866 A mapping of labels to a list of spans with that label. Spans in the list
867 are ordered by their normal index order.
868 """
869
870 spans_by_label = defaultdict(list)
871 for ex in self.spans:
872 spans_by_label[ex.label].append(ex)
873 return dict(spans_by_label)
874
875 @derived_value # type: ignore
876 def distinct_labels(self):
877 return sorted(self.spans_by_label, key=str)
878
879 def pass_to_descendant(self, chooser):
880 """Attempt to replace each span with a descendant span.
881
882 This is designed to deal with strategies that call themselves
883 recursively. For example, suppose we had:
884
885 binary_tree = st.deferred(
886 lambda: st.one_of(
887 st.integers(), st.tuples(binary_tree, binary_tree)))
888
889 This pass guarantees that we can replace any binary tree with one of
890 its subtrees - each of those will create an interval that the parent
891 could validly be replaced with, and this pass will try doing that.
892
893 This is pretty expensive - it takes O(len(intervals)^2) - so we run it
894 late in the process when we've got the number of intervals as far down
895 as possible.
896 """
897
898 label = chooser.choose(
899 self.distinct_labels, lambda l: len(self.spans_by_label[l]) >= 2
900 )
901
902 spans = self.spans_by_label[label]
903 i = chooser.choose(range(len(spans) - 1))
904 ancestor = spans[i]
905
906 if i + 1 == len(spans) or spans[i + 1].start >= ancestor.end:
907 return
908
909 @self.cached(label, i)
910 def descendants():
911 lo = i + 1
912 hi = len(spans)
913 while lo + 1 < hi:
914 mid = (lo + hi) // 2
915 if spans[mid].start >= ancestor.end:
916 hi = mid
917 else:
918 lo = mid
919 return [
920 span
921 for span in spans[i + 1 : hi]
922 if span.choice_count < ancestor.choice_count
923 ]
924
925 descendant = chooser.choose(descendants, lambda ex: ex.choice_count > 0)
926
927 assert ancestor.start <= descendant.start
928 assert ancestor.end >= descendant.end
929 assert descendant.choice_count < ancestor.choice_count
930
931 self.consider_new_nodes(
932 self.nodes[: ancestor.start]
933 + self.nodes[descendant.start : descendant.end]
934 + self.nodes[ancestor.end :]
935 )
936
937 def lower_common_node_offset(self):
938 """Sometimes we find ourselves in a situation where changes to one part
939 of the choice sequence unlock changes to other parts. Sometimes this is
940 good, but sometimes this can cause us to exhibit exponential slow
941 downs!
942
943 e.g. suppose we had the following:
944
945 m = draw(integers(min_value=0))
946 n = draw(integers(min_value=0))
947 assert abs(m - n) > 1
948
949 If this fails then we'll end up with a loop where on each iteration we
950 reduce each of m and n by 2 - m can't go lower because of n, then n
951 can't go lower because of m.
952
953 This will take us O(m) iterations to complete, which is exponential in
954 the data size, as we gradually zig zag our way towards zero.
955
956 This can only happen if we're failing to reduce the size of the choice
957 sequence: The number of iterations that reduce the length of the choice
958 sequence is bounded by that length.
959
960 So what we do is this: We keep track of which nodes are changing, and
961 then if there's some non-zero common offset to them we try and minimize
962 them all at once by lowering that offset.
963
964 This may not work, and it definitely won't get us out of all possible
965 exponential slow downs (an example of where it doesn't is where the
966 shape of the nodes changes as a result of this bouncing behaviour),
967 but it fails fast when it doesn't work and gets us out of a really
968 nastily slow case when it does.
969 """
970 if len(self.__changed_nodes) <= 1:
971 return
972
973 changed = []
974 for i in sorted(self.__changed_nodes):
975 node = self.nodes[i]
976 if node.trivial or node.type != "integer":
977 continue
978 changed.append(node)
979
980 if not changed:
981 return
982
983 ints = [
984 abs(node.value - node.constraints["shrink_towards"]) for node in changed
985 ]
986 offset = min(ints)
987 assert offset > 0
988
989 for i in range(len(ints)):
990 ints[i] -= offset
991
992 st = self.shrink_target
993
994 def offset_node(node, n):
995 return (
996 node.index,
997 node.index + 1,
998 [node.copy(with_value=node.constraints["shrink_towards"] + n)],
999 )
1000
1001 def consider(n, sign):
1002 return self.consider_new_nodes(
1003 replace_all(
1004 st.nodes,
1005 [
1006 offset_node(node, sign * (n + v))
1007 for node, v in zip(changed, ints, strict=False)
1008 ],
1009 )
1010 )
1011
1012 # shrink from both sides
1013 Integer.shrink(offset, lambda n: consider(n, 1))
1014 Integer.shrink(offset, lambda n: consider(n, -1))
1015 self.clear_change_tracking()
1016
1017 def clear_change_tracking(self):
1018 self.__last_checked_changed_at = self.shrink_target
1019 self.__all_changed_nodes = set()
1020
1021 def mark_changed(self, i):
1022 self.__changed_nodes.add(i)
1023
1024 @property
1025 def __changed_nodes(self) -> set[int]:
1026 if self.__last_checked_changed_at is self.shrink_target:
1027 return self.__all_changed_nodes
1028
1029 prev_target = self.__last_checked_changed_at
1030 new_target = self.shrink_target
1031 assert prev_target is not new_target
1032 prev_nodes = prev_target.nodes
1033 new_nodes = new_target.nodes
1034 assert sort_key(new_target.nodes) < sort_key(prev_target.nodes)
1035
1036 if len(prev_nodes) != len(new_nodes) or any(
1037 n1.type != n2.type for n1, n2 in zip(prev_nodes, new_nodes, strict=True)
1038 ):
1039 # should we check constraints are equal as well?
1040 self.__all_changed_nodes = set()
1041 else:
1042 assert len(prev_nodes) == len(new_nodes)
1043 for i, (n1, n2) in enumerate(zip(prev_nodes, new_nodes, strict=True)):
1044 assert n1.type == n2.type
1045 if not choice_equal(n1.value, n2.value):
1046 self.__all_changed_nodes.add(i)
1047
1048 return self.__all_changed_nodes
1049
1050 def update_shrink_target(self, new_target):
1051 assert isinstance(new_target, ConjectureResult)
1052 self.shrinks += 1
1053 # If we are just taking a long time to shrink we don't want to
1054 # trigger this heuristic, so whenever we shrink successfully
1055 # we give ourselves a bit of breathing room to make sure we
1056 # would find a shrink that took that long to find the next time.
1057 # The case where we're taking a long time but making steady
1058 # progress is handled by `finish_shrinking_deadline` in engine.py
1059 self.max_stall = max(
1060 self.max_stall, (self.calls - self.calls_at_last_shrink) * 2
1061 )
1062 self.calls_at_last_shrink = self.calls
1063 self.shrink_target = new_target
1064 self.__derived_values = {}
1065
1066 def try_shrinking_nodes(self, nodes, n):
1067 """Attempts to replace each node in the nodes list with n. Returns
1068 True if it succeeded (which may include some additional modifications
1069 to shrink_target).
1070
1071 In current usage it is expected that each of the nodes currently have
1072 the same value and choice_type, although this is not essential. Note that
1073 n must be < the node at min(nodes) or this is not a valid shrink.
1074
1075 This method will attempt to do some small amount of work to delete data
1076 that occurs after the end of the nodes. This is useful for cases where
1077 there is some size dependency on the value of a node.
1078 """
1079 # If the length of the shrink target has changed from under us such that
1080 # the indices are out of bounds, give up on the replacement.
1081 # TODO_BETTER_SHRINK: we probably want to narrow down the root cause here at some point.
1082 if any(node.index >= len(self.nodes) for node in nodes):
1083 return # pragma: no cover
1084
1085 initial_attempt = replace_all(
1086 self.nodes,
1087 [(node.index, node.index + 1, [node.copy(with_value=n)]) for node in nodes],
1088 )
1089
1090 attempt = self.cached_test_function(initial_attempt)[1]
1091
1092 if attempt is None:
1093 return False
1094
1095 if attempt is self.shrink_target:
1096 # if the initial shrink was a success, try lowering offsets.
1097 self.lower_common_node_offset()
1098 return True
1099
1100 # If this produced something completely invalid we ditch it
1101 # here rather than trying to persevere.
1102 if attempt.status is Status.OVERRUN:
1103 return False
1104
1105 if attempt.status is Status.INVALID:
1106 return False
1107
1108 if attempt.misaligned_at is not None:
1109 # we're invalid due to a misalignment in the tree. We'll try to fix
1110 # a very specific type of misalignment here: where we have a node of
1111 # {"size": n} and tried to draw the same node, but with {"size": m < n}.
1112 # This can occur with eg
1113 #
1114 # n = data.draw_integer()
1115 # s = data.draw_string(min_size=n)
1116 #
1117 # where we try lowering n, resulting in the test_function drawing a lower
1118 # min_size than our attempt had for the draw_string node.
1119 #
1120 # We'll now try realigning this tree by:
1121 # * replacing the constraints in our attempt with what test_function tried
1122 # to draw in practice
1123 # * truncating the value of that node to match min_size
1124 #
1125 # This helps in the specific case of drawing a value and then drawing
1126 # a collection of that size...and not much else. In practice this
1127 # helps because this antipattern is fairly common.
1128
1129 # TODO we'll probably want to apply the same trick as in the valid
1130 # case of this function of preserving from the right instead of
1131 # preserving from the left. see test_can_shrink_variable_string_draws.
1132
1133 (index, attempt_choice_type, attempt_constraints, _attempt_forced) = (
1134 attempt.misaligned_at
1135 )
1136 node = self.nodes[index]
1137 if node.type != attempt_choice_type:
1138 return False # pragma: no cover
1139 if node.was_forced:
1140 return False # pragma: no cover
1141
1142 if node.type in {"string", "bytes"}:
1143 # if the size *increased*, we would have to guess what to pad with
1144 # in order to try fixing up this attempt. Just give up.
1145 if node.constraints["min_size"] <= attempt_constraints["min_size"]:
1146 # attempts which increase min_size tend to overrun rather than
1147 # be misaligned, making a covering case difficult.
1148 return False # pragma: no cover
1149 # the size decreased in our attempt. Try again, but truncate the value
1150 # to that size by removing any elements past min_size.
1151 return self.consider_new_nodes(
1152 initial_attempt[: node.index]
1153 + [
1154 initial_attempt[node.index].copy(
1155 with_constraints=attempt_constraints,
1156 with_value=initial_attempt[node.index].value[
1157 : attempt_constraints["min_size"]
1158 ],
1159 )
1160 ]
1161 + initial_attempt[node.index :]
1162 )
1163
1164 lost_nodes = len(self.nodes) - len(attempt.nodes)
1165 if lost_nodes <= 0:
1166 return False
1167
1168 start = nodes[0].index
1169 end = nodes[-1].index + 1
1170 # We now look for contiguous regions to delete that might help fix up
1171 # this failed shrink. We only look for contiguous regions of the right
1172 # lengths because doing anything more than that starts to get very
1173 # expensive. See minimize_individual_choices for where we
1174 # try to be more aggressive.
1175 regions_to_delete = {(end, end + lost_nodes)}
1176
1177 for ex in self.spans:
1178 if ex.start > start:
1179 continue
1180 if ex.end <= end:
1181 continue
1182
1183 if ex.index >= len(attempt.spans):
1184 continue # pragma: no cover
1185
1186 replacement = attempt.spans[ex.index]
1187 in_original = [c for c in ex.children if c.start >= end]
1188 in_replaced = [c for c in replacement.children if c.start >= end]
1189
1190 if len(in_replaced) >= len(in_original) or not in_replaced:
1191 continue
1192
1193 # We've found a span where some of the children went missing
1194 # as a result of this change, and just replacing it with the data
1195 # it would have had and removing the spillover didn't work. This
1196 # means that some of its children towards the right must be
1197 # important, so we try to arrange it so that it retains its
1198 # rightmost children instead of its leftmost.
1199 regions_to_delete.add(
1200 (in_original[0].start, in_original[-len(in_replaced)].start)
1201 )
1202
1203 for u, v in sorted(regions_to_delete, key=lambda x: x[1] - x[0], reverse=True):
1204 try_with_deleted = initial_attempt[:u] + initial_attempt[v:]
1205 if self.consider_new_nodes(try_with_deleted):
1206 return True
1207
1208 return False
1209
1210 def remove_discarded(self):
1211 """Try removing all bytes marked as discarded.
1212
1213 This is primarily to deal with data that has been ignored while
1214 doing rejection sampling - e.g. as a result of an integer range, or a
1215 filtered strategy.
1216
1217 Such data will also be handled by the adaptive_example_deletion pass,
1218 but that pass is necessarily more conservative and will try deleting
1219 each interval individually. The common case is that all data drawn and
1220 rejected can just be thrown away immediately in one block, so this pass
1221 will be much faster than trying each one individually when it works.
1222
1223 returns False if there is discarded data and removing it does not work,
1224 otherwise returns True.
1225 """
1226 while self.shrink_target.has_discards:
1227 discarded = []
1228
1229 for ex in self.shrink_target.spans:
1230 if (
1231 ex.choice_count > 0
1232 and ex.discarded
1233 and (not discarded or ex.start >= discarded[-1][-1])
1234 ):
1235 discarded.append((ex.start, ex.end))
1236
1237 # This can happen if we have discards but they are all of
1238 # zero length. This shouldn't happen very often so it's
1239 # faster to check for it here than at the point of example
1240 # generation.
1241 if not discarded:
1242 break
1243
1244 attempt = list(self.nodes)
1245 for u, v in reversed(discarded):
1246 del attempt[u:v]
1247
1248 if not self.consider_new_nodes(tuple(attempt)):
1249 return False
1250 return True
1251
1252 @derived_value # type: ignore
1253 def duplicated_nodes(self):
1254 """Returns a list of nodes grouped (choice_type, value)."""
1255 duplicates = defaultdict(list)
1256 for node in self.nodes:
1257 duplicates[(node.type, choice_key(node.value))].append(node)
1258 return list(duplicates.values())
1259
1260 def node_program(self, program: str) -> ShrinkPass:
1261 return ShrinkPass(
1262 lambda chooser: self._node_program(chooser, program),
1263 name=f"node_program_{program}",
1264 )
1265
1266 def _node_program(self, chooser, program):
1267 n = len(program)
1268 # Adaptively attempt to run the node program at the current
1269 # index. If this successfully applies the node program ``k`` times
1270 # then this runs in ``O(log(k))`` test function calls.
1271 i = chooser.choose(range(len(self.nodes) - n + 1))
1272
1273 # First, run the node program at the chosen index. If this fails,
1274 # don't do any extra work, so that failure is as cheap as possible.
1275 if not self.run_node_program(i, program, original=self.shrink_target):
1276 return
1277
1278 # Because we run in a random order we will often find ourselves in the middle
1279 # of a region where we could run the node program. We thus start by moving
1280 # left to the beginning of that region if possible in order to to start from
1281 # the beginning of that region.
1282 def offset_left(k):
1283 return i - k * n
1284
1285 i = offset_left(
1286 find_integer(
1287 lambda k: self.run_node_program(
1288 offset_left(k), program, original=self.shrink_target
1289 )
1290 )
1291 )
1292
1293 original = self.shrink_target
1294 # Now try to run the node program multiple times here.
1295 find_integer(
1296 lambda k: self.run_node_program(i, program, original=original, repeats=k)
1297 )
1298
1299 def minimize_duplicated_choices(self, chooser):
1300 """Find choices that have been duplicated in multiple places and attempt
1301 to minimize all of the duplicates simultaneously.
1302
1303 This lets us handle cases where two values can't be shrunk
1304 independently of each other but can easily be shrunk together.
1305 For example if we had something like:
1306
1307 ls = data.draw(lists(integers()))
1308 y = data.draw(integers())
1309 assert y not in ls
1310
1311 Suppose we drew y = 3 and after shrinking we have ls = [3]. If we were
1312 to replace both 3s with 0, this would be a valid shrink, but if we were
1313 to replace either 3 with 0 on its own the test would start passing.
1314
1315 It is also useful for when that duplication is accidental and the value
1316 of the choices don't matter very much because it allows us to replace
1317 more values at once.
1318 """
1319 nodes = chooser.choose(self.duplicated_nodes)
1320 # we can't lower any nodes which are trivial. try proceeding with the
1321 # remaining nodes.
1322 nodes = [node for node in nodes if not node.trivial]
1323 if len(nodes) <= 1:
1324 return
1325
1326 self.minimize_nodes(nodes)
1327
1328 def redistribute_numeric_pairs(self, chooser):
1329 """If there is a sum of generated numbers that we need their sum
1330 to exceed some bound, lowering one of them requires raising the
1331 other. This pass enables that."""
1332
1333 # look for a pair of nodes (node1, node2) which are both numeric
1334 # and aren't separated by too many other nodes. We'll decrease node1 and
1335 # increase node2 (note that the other way around doesn't make sense as
1336 # it's strictly worse in the ordering).
1337 def can_choose_node(node):
1338 # don't choose nan, inf, or floats above the threshold where f + 1 > f
1339 # (which is not necessarily true for floats above MAX_PRECISE_INTEGER).
1340 # The motivation for the last condition is to avoid trying weird
1341 # non-shrinks where we raise one node and think we lowered another
1342 # (but didn't).
1343 return node.type in {"integer", "float"} and not (
1344 node.type == "float"
1345 and (math.isnan(node.value) or abs(node.value) >= MAX_PRECISE_INTEGER)
1346 )
1347
1348 node1 = chooser.choose(
1349 self.nodes,
1350 lambda node: can_choose_node(node) and not node.trivial,
1351 )
1352 node2 = chooser.choose(
1353 self.nodes,
1354 lambda node: can_choose_node(node)
1355 # Note that it's fine for node2 to be trivial, because we're going to
1356 # explicitly make it *not* trivial by adding to its value.
1357 and not node.was_forced
1358 # to avoid quadratic behavior, scan ahead only a small amount for
1359 # the related node.
1360 and node1.index < node.index <= node1.index + 4,
1361 )
1362
1363 m: int | float = node1.value
1364 n: int | float = node2.value
1365
1366 def boost(k: int) -> bool:
1367 # floats always shrink towards 0
1368 shrink_towards = (
1369 node1.constraints["shrink_towards"] if node1.type == "integer" else 0
1370 )
1371 if k > abs(m - shrink_towards):
1372 return False
1373
1374 # We are trying to move node1 (m) closer to shrink_towards, and node2
1375 # (n) farther away from shrink_towards. If m is below shrink_towards,
1376 # we want to add to m and subtract from n, and vice versa if above
1377 # shrink_towards.
1378 if m < shrink_towards:
1379 k = -k
1380
1381 try:
1382 v1 = m - k
1383 v2 = n + k
1384 except OverflowError: # pragma: no cover
1385 # if n or m is a float and k is over sys.float_info.max, coercing
1386 # k to a float will overflow.
1387 return False
1388
1389 # if we've increased node2 to the point that we're past max precision,
1390 # give up - things have become too unstable.
1391 if node1.type == "float" and abs(v2) >= MAX_PRECISE_INTEGER:
1392 return False
1393
1394 return self.consider_new_nodes(
1395 self.nodes[: node1.index]
1396 + (node1.copy(with_value=v1),)
1397 + self.nodes[node1.index + 1 : node2.index]
1398 + (node2.copy(with_value=v2),)
1399 + self.nodes[node2.index + 1 :]
1400 )
1401
1402 find_integer(boost)
1403
1404 def lower_integers_together(self, chooser):
1405 node1 = chooser.choose(
1406 self.nodes, lambda n: n.type == "integer" and not n.trivial
1407 )
1408 # Search up to 3 nodes ahead, to avoid quadratic time.
1409 node2 = self.nodes[
1410 chooser.choose(
1411 range(node1.index + 1, min(len(self.nodes), node1.index + 3 + 1)),
1412 lambda i: self.nodes[i].type == "integer"
1413 and not self.nodes[i].was_forced,
1414 )
1415 ]
1416
1417 # one might expect us to require node2 to be nontrivial, and to minimize
1418 # the node which is closer to its shrink_towards, rather than node1
1419 # unconditionally. In reality, it's acceptable for us to transition node2
1420 # from trivial to nontrivial, because the shrink ordering is dominated by
1421 # the complexity of the earlier node1. What matters is minimizing node1.
1422 shrink_towards = node1.constraints["shrink_towards"]
1423
1424 def consider(n):
1425 return self.consider_new_nodes(
1426 self.nodes[: node1.index]
1427 + (node1.copy(with_value=node1.value - n),)
1428 + self.nodes[node1.index + 1 : node2.index]
1429 + (node2.copy(with_value=node2.value - n),)
1430 + self.nodes[node2.index + 1 :]
1431 )
1432
1433 find_integer(lambda n: consider(shrink_towards - n))
1434 find_integer(lambda n: consider(n - shrink_towards))
1435
1436 def lower_duplicated_characters(self, chooser):
1437 """
1438 Select two string choices no more than 4 choices apart and simultaneously
1439 lower characters which appear in both strings. This helps cases where the
1440 same character must appear in two strings, but the actual value of the
1441 character is not relevant.
1442
1443 This shrinking pass currently only tries lowering *all* instances of the
1444 duplicated character in both strings. So for instance, given two choices:
1445
1446 "bbac"
1447 "abbb"
1448
1449 we would try lowering all five of the b characters simultaneously. This
1450 may fail to shrink some cases where only certain character indices are
1451 correlated, for instance if only the b at index 1 could be lowered
1452 simultaneously and the rest did in fact actually have to be a `b`.
1453
1454 It would be nice to try shrinking that case as well, but we would need good
1455 safeguards because it could get very expensive to try all combinations.
1456 I expect lowering all duplicates to handle most cases in the meantime.
1457 """
1458 node1 = chooser.choose(
1459 self.nodes, lambda n: n.type == "string" and not n.trivial
1460 )
1461
1462 # limit search to up to 4 choices ahead, to avoid quadratic behavior
1463 node2 = self.nodes[
1464 chooser.choose(
1465 range(node1.index + 1, min(len(self.nodes), node1.index + 1 + 4)),
1466 lambda i: self.nodes[i].type == "string" and not self.nodes[i].trivial
1467 # select nodes which have at least one of the same character present
1468 and set(node1.value) & set(self.nodes[i].value),
1469 )
1470 ]
1471
1472 duplicated_characters = set(node1.value) & set(node2.value)
1473 # deterministic ordering
1474 char = chooser.choose(sorted(duplicated_characters))
1475 intervals = node1.constraints["intervals"]
1476
1477 def copy_node(node, n):
1478 # replace all duplicate characters in each string. This might miss
1479 # some shrinks compared to only replacing some, but trying all possible
1480 # combinations of indices could get expensive if done without some
1481 # thought.
1482 return node.copy(
1483 with_value=node.value.replace(char, intervals.char_in_shrink_order(n))
1484 )
1485
1486 Integer.shrink(
1487 intervals.index_from_char_in_shrink_order(char),
1488 lambda n: self.consider_new_nodes(
1489 self.nodes[: node1.index]
1490 + (copy_node(node1, n),)
1491 + self.nodes[node1.index + 1 : node2.index]
1492 + (copy_node(node2, n),)
1493 + self.nodes[node2.index + 1 :]
1494 ),
1495 )
1496
1497 def minimize_nodes(self, nodes):
1498 choice_type = nodes[0].type
1499 value = nodes[0].value
1500 # unlike choice_type and value, constraints are *not* guaranteed to be equal among all
1501 # passed nodes. We arbitrarily use the constraints of the first node. I think
1502 # this is unsound (= leads to us trying shrinks that could not have been
1503 # generated), but those get discarded at test-time, and this enables useful
1504 # slips where constraints are not equal but are close enough that doing the
1505 # same operation on both basically just works.
1506 constraints = nodes[0].constraints
1507 assert all(
1508 node.type == choice_type and choice_equal(node.value, value)
1509 for node in nodes
1510 )
1511
1512 if choice_type == "integer":
1513 shrink_towards = constraints["shrink_towards"]
1514 # try shrinking from both sides towards shrink_towards.
1515 # we're starting from n = abs(shrink_towards - value). Because the
1516 # shrinker will not check its starting value, we need to try
1517 # shrinking to n first.
1518 self.try_shrinking_nodes(nodes, abs(shrink_towards - value))
1519 Integer.shrink(
1520 abs(shrink_towards - value),
1521 lambda n: self.try_shrinking_nodes(nodes, shrink_towards + n),
1522 )
1523 Integer.shrink(
1524 abs(shrink_towards - value),
1525 lambda n: self.try_shrinking_nodes(nodes, shrink_towards - n),
1526 )
1527 elif choice_type == "float":
1528 self.try_shrinking_nodes(nodes, abs(value))
1529 Float.shrink(
1530 abs(value),
1531 lambda val: self.try_shrinking_nodes(nodes, val),
1532 )
1533 Float.shrink(
1534 abs(value),
1535 lambda val: self.try_shrinking_nodes(nodes, -val),
1536 )
1537 elif choice_type == "boolean":
1538 # must be True, otherwise would be trivial and not selected.
1539 assert value is True
1540 # only one thing to try: false!
1541 self.try_shrinking_nodes(nodes, False)
1542 elif choice_type == "bytes":
1543 Bytes.shrink(
1544 value,
1545 lambda val: self.try_shrinking_nodes(nodes, val),
1546 min_size=constraints["min_size"],
1547 )
1548 elif choice_type == "string":
1549 String.shrink(
1550 value,
1551 lambda val: self.try_shrinking_nodes(nodes, val),
1552 intervals=constraints["intervals"],
1553 min_size=constraints["min_size"],
1554 )
1555 else:
1556 raise NotImplementedError
1557
1558 def try_trivial_spans(self, chooser):
1559 i = chooser.choose(range(len(self.spans)))
1560
1561 prev = self.shrink_target
1562 nodes = self.shrink_target.nodes
1563 span = self.spans[i]
1564 prefix = nodes[: span.start]
1565 replacement = tuple(
1566 [
1567 (
1568 node
1569 if node.was_forced
1570 else node.copy(
1571 with_value=choice_from_index(0, node.type, node.constraints)
1572 )
1573 )
1574 for node in nodes[span.start : span.end]
1575 ]
1576 )
1577 suffix = nodes[span.end :]
1578 attempt = self.cached_test_function(prefix + replacement + suffix)[1]
1579
1580 if self.shrink_target is not prev:
1581 return
1582
1583 if isinstance(attempt, ConjectureResult):
1584 new_span = attempt.spans[i]
1585 new_replacement = attempt.nodes[new_span.start : new_span.end]
1586 self.consider_new_nodes(prefix + new_replacement + suffix)
1587
1588 def minimize_individual_choices(self, chooser):
1589 """Attempt to minimize each choice in sequence.
1590
1591 This is the pass that ensures that e.g. each integer we draw is a
1592 minimum value. So it's the part that guarantees that if we e.g. do
1593
1594 x = data.draw(integers())
1595 assert x < 10
1596
1597 then in our shrunk example, x = 10 rather than say 97.
1598
1599 If we are unsuccessful at minimizing a choice of interest we then
1600 check if that's because it's changing the size of the test case and,
1601 if so, we also make an attempt to delete parts of the test case to
1602 see if that fixes it.
1603
1604 We handle most of the common cases in try_shrinking_nodes which is
1605 pretty good at clearing out large contiguous blocks of dead space,
1606 but it fails when there is data that has to stay in particular places
1607 in the list.
1608 """
1609 node = chooser.choose(self.nodes, lambda node: not node.trivial)
1610 initial_target = self.shrink_target
1611
1612 self.minimize_nodes([node])
1613 if self.shrink_target is not initial_target:
1614 # the shrink target changed, so our shrink worked. Defer doing
1615 # anything more intelligent until this shrink fails.
1616 return
1617
1618 # the shrink failed. One particularly common case where minimizing a
1619 # node can fail is the antipattern of drawing a size and then drawing a
1620 # collection of that size, or more generally when there is a size
1621 # dependency on some single node. We'll explicitly try and fix up this
1622 # common case here: if decreasing an integer node by one would reduce
1623 # the size of the generated input, we'll try deleting things after that
1624 # node and see if the resulting attempt works.
1625
1626 if node.type != "integer":
1627 # Only try this fixup logic on integer draws. Almost all size
1628 # dependencies are on integer draws, and if it's not, it's doing
1629 # something convoluted enough that it is unlikely to shrink well anyway.
1630 # TODO: extent to floats? we probably currently fail on the following,
1631 # albeit convoluted example:
1632 # n = int(data.draw(st.floats()))
1633 # s = data.draw(st.lists(st.integers(), min_size=n, max_size=n))
1634 return
1635
1636 lowered = (
1637 self.nodes[: node.index]
1638 + (node.copy(with_value=node.value - 1),)
1639 + self.nodes[node.index + 1 :]
1640 )
1641 attempt = self.cached_test_function(lowered)[1]
1642 if (
1643 attempt is None
1644 or attempt.status < Status.VALID
1645 or len(attempt.nodes) == len(self.nodes)
1646 or len(attempt.nodes) == node.index + 1
1647 ):
1648 # no point in trying our size-dependency-logic if our attempt at
1649 # lowering the node resulted in:
1650 # * an invalid conjecture data
1651 # * the same number of nodes as before
1652 # * no nodes beyond the lowered node (nothing to try to delete afterwards)
1653 return
1654
1655 # If it were then the original shrink should have worked and we could
1656 # never have got here.
1657 assert attempt is not self.shrink_target
1658
1659 @self.cached(node.index)
1660 def first_span_after_node():
1661 lo = 0
1662 hi = len(self.spans)
1663 while lo + 1 < hi:
1664 mid = (lo + hi) // 2
1665 span = self.spans[mid]
1666 if span.start >= node.index:
1667 hi = mid
1668 else:
1669 lo = mid
1670 return hi
1671
1672 # we try deleting both entire spans, and single nodes.
1673 # If we wanted to get more aggressive, we could try deleting n
1674 # consecutive nodes (that don't cross a span boundary) for say
1675 # n <= 2 or n <= 3.
1676 if chooser.choose([True, False]):
1677 span = self.spans[
1678 chooser.choose(
1679 range(first_span_after_node, len(self.spans)),
1680 lambda i: self.spans[i].choice_count > 0,
1681 )
1682 ]
1683 self.consider_new_nodes(lowered[: span.start] + lowered[span.end :])
1684 else:
1685 node = self.nodes[chooser.choose(range(node.index + 1, len(self.nodes)))]
1686 self.consider_new_nodes(lowered[: node.index] + lowered[node.index + 1 :])
1687
1688 def reorder_spans(self, chooser):
1689 """This pass allows us to reorder the children of each span.
1690
1691 For example, consider the following:
1692
1693 .. code-block:: python
1694
1695 import hypothesis.strategies as st
1696 from hypothesis import given
1697
1698
1699 @given(st.text(), st.text())
1700 def test_not_equal(x, y):
1701 assert x != y
1702
1703 Without the ability to reorder x and y this could fail either with
1704 ``x=""``, ``y="0"``, or the other way around. With reordering it will
1705 reliably fail with ``x=""``, ``y="0"``.
1706 """
1707 span = chooser.choose(self.spans)
1708
1709 label = chooser.choose(span.children).label
1710 spans = [c for c in span.children if c.label == label]
1711 if len(spans) <= 1:
1712 return
1713
1714 endpoints = [(span.start, span.end) for span in spans]
1715 st = self.shrink_target
1716
1717 Ordering.shrink(
1718 range(len(spans)),
1719 lambda indices: self.consider_new_nodes(
1720 replace_all(
1721 st.nodes,
1722 [
1723 (
1724 u,
1725 v,
1726 st.nodes[spans[i].start : spans[i].end],
1727 )
1728 for (u, v), i in zip(endpoints, indices, strict=True)
1729 ],
1730 )
1731 ),
1732 key=lambda i: sort_key(st.nodes[spans[i].start : spans[i].end]),
1733 )
1734
1735 def run_node_program(self, i, program, original, repeats=1):
1736 """Node programs are a mini-DSL for node rewriting, defined as a sequence
1737 of commands that can be run at some index into the nodes
1738
1739 Commands are:
1740
1741 * "X", delete this node
1742
1743 This method runs the node program in ``program`` at node index
1744 ``i`` on the ConjectureData ``original``. If ``repeats > 1`` then it
1745 will attempt to approximate the results of running it that many times.
1746
1747 Returns True if this successfully changes the underlying shrink target,
1748 else False.
1749 """
1750 if i + len(program) > len(original.nodes) or i < 0:
1751 return False
1752 attempt = list(original.nodes)
1753 for _ in range(repeats):
1754 for k, command in reversed(list(enumerate(program))):
1755 j = i + k
1756 if j >= len(attempt):
1757 return False
1758
1759 if command == "X":
1760 del attempt[j]
1761 else:
1762 raise NotImplementedError(f"Unrecognised command {command!r}")
1763
1764 return self.consider_new_nodes(attempt)