Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/hypothesis/internal/conjecture/datatree.py: 19%

1# This file is part of Hypothesis, which may be found at

2# https://github.com/HypothesisWorks/hypothesis/

4# Copyright the Hypothesis Authors.

5# Individual contributors are listed in AUTHORS.rst and the git log.

7# This Source Code Form is subject to the terms of the Mozilla Public License,

8# v. 2.0. If a copy of the MPL was not distributed with this file, You can

9# obtain one at https://mozilla.org/MPL/2.0/.

11import math

12from collections.abc import Generator, Set

13from dataclasses import dataclass, field

14from random import Random

15from typing import TYPE_CHECKING, Final, TypeAlias, cast

17from hypothesis.errors import (

18 FlakyReplay,

19 FlakyStrategyDefinition,

20 HypothesisException,

21 StopTest,

22)

23from hypothesis.internal import floats as flt

24from hypothesis.internal.conjecture.choice import (

25 BooleanConstraints,

26 BytesConstraints,

27 ChoiceConstraintsT,

28 ChoiceT,

29 ChoiceTypeT,

30 FloatConstraints,

31 IntegerConstraints,

32 StringConstraints,

33 choice_from_index,

34)

35from hypothesis.internal.conjecture.data import ConjectureData, DataObserver, Status

36from hypothesis.internal.escalation import InterestingOrigin

37from hypothesis.internal.floats import (

38 count_between_floats,

39 float_to_int,

40 int_to_float,

41 sign_aware_lte,

42)

44if TYPE_CHECKING:

45 from hypothesis.vendor.pretty import RepresentationPrinter

47ChildrenCacheValueT: TypeAlias = tuple[

48 Generator[ChoiceT, None, None], list[ChoiceT], set[ChoiceT]

49]

52class PreviouslyUnseenBehaviour(HypothesisException):

53 pass

56_FLAKY_STRAT_MSG = (

57 "Inconsistent data generation! Data generation behaved differently "

58 "between different runs. Is your data generation depending on external "

59 "state?"

60)

63EMPTY: frozenset[int] = frozenset()

66@dataclass(slots=True, frozen=True)

67class Killed:

68 """Represents a transition to part of the tree which has been marked as

69 "killed", meaning we want to treat it as not worth exploring, so it will

70 be treated as if it were completely explored for the purposes of

71 exhaustion."""

73 next_node: "TreeNode"

75 def _repr_pretty_(self, p: "RepresentationPrinter", cycle: bool) -> None:

76 assert cycle is False

77 p.text("Killed")

80def _node_pretty(

81 choice_type: ChoiceTypeT,

82 value: ChoiceT,

83 constraints: ChoiceConstraintsT,

84 *,

85 forced: bool,

86) -> str:

87 forced_marker = " [forced]" if forced else ""

88 return f"{choice_type} {value!r}{forced_marker} {constraints}"

91@dataclass(slots=True, frozen=False)

92class Branch:

93 """Represents a transition where multiple choices can be made as to what

94 to drawn."""

96 constraints: ChoiceConstraintsT

97 choice_type: ChoiceTypeT

98 children: dict[ChoiceT, "TreeNode"] = field(repr=False)

100 @property

101 def max_children(self) -> int:

102 max_children = compute_max_children(self.choice_type, self.constraints)

103 assert max_children > 0

104 return max_children

105

106 def _repr_pretty_(self, p: "RepresentationPrinter", cycle: bool) -> None:

107 assert cycle is False

108 for i, (value, child) in enumerate(self.children.items()):

109 if i > 0:

110 p.break_()

111 p.text(

112 _node_pretty(self.choice_type, value, self.constraints, forced=False)

113 )

114 with p.indent(2):

115 p.break_()

116 p.pretty(child)

117

118

119@dataclass(slots=True, frozen=True)

120class Conclusion:

121 """Represents a transition to a finished state."""

122

123 status: Status

124 interesting_origin: InterestingOrigin | None

125

126 def _repr_pretty_(self, p: "RepresentationPrinter", cycle: bool) -> None:

127 assert cycle is False

128 o = self.interesting_origin

129 # avoid str(o), which can include multiple lines of context

130 origin = (

131 "" if o is None else f", {o.exc_type.__name__} at {o.filename}:{o.lineno}"

132 )

133 p.text(f"Conclusion ({self.status!r}{origin})")

134

135

136# The number of max children where, beyond this, it is practically impossible

137# for hypothesis to saturate / explore all children nodes in a reasonable time

138# frame. We use this to bail out of expensive max children computations early,

139# where the numbers involved are so large that we know they will be larger than

140# this number.

141#

142# Note that it's ok for us to underestimate the number of max children of a node

143# by using this. We just may think the node is exhausted when in fact it has more

144# possible children to be explored. This has the potential to finish generation

145# early due to exhausting the entire tree, but that is quite unlikely: (1) the

146# number of examples would have to be quite high, and (2) the tree would have to

147# contain only one or two nodes, or generate_novel_prefix would simply switch to

148# exploring another non-exhausted node.

149#

150# Also note that we may sometimes compute max children above this value. In other

151# words, this is *not* a hard maximum on the computed max children. It's the point

152# where further computation is not beneficial - but sometimes doing that computation

153# unconditionally is cheaper than estimating against this value.

154#

155# The one case where this may be detrimental is fuzzing, where the throughput of

156# examples is so high that it really may saturate important nodes. We'll cross

157# that bridge when we come to it.

158MAX_CHILDREN_EFFECTIVELY_INFINITE: Final[int] = 10_000_000

159

160

161def _count_distinct_strings(*, alphabet_size: int, min_size: int, max_size: int) -> int:

162 # We want to estimate if we're going to have more children than

163 # MAX_CHILDREN_EFFECTIVELY_INFINITE, without computing a potentially

164 # extremely expensive pow. We'll check the two extreme cases - if the

165 # number of strings in the largest string size alone is enough to put us

166 # over this limit (at alphabet_size >= 2), and if the variation in sizes

167 # (at alphabet_size == 1) is enough. If neither result in an early return,

168 # the exact result should be reasonably cheap to compute.

169 if alphabet_size == 0:

170 # Special-case the empty string, avoid error in math.log(0).

171 return 1

172 elif alphabet_size == 1:

173 # Special-case the constant alphabet, invalid in the geom-series sum.

174 return max_size - min_size + 1

175 else:

176 # Estimate against log, which is cheaper than computing a pow.

177 #

178 # m = max_size

179 # a = alphabet_size

180 # N = MAX_CHILDREN_EFFECTIVELY_INFINITE

181 #

182 # a**m > N

183 # <=> m * log(a) > log(N)

184 log_max_sized_children = max_size * math.log(alphabet_size)

185 if log_max_sized_children > math.log(MAX_CHILDREN_EFFECTIVELY_INFINITE):

186 return MAX_CHILDREN_EFFECTIVELY_INFINITE

187

188 # The sum of a geometric series is given by (ref: wikipedia):

189 # ᵐ∑ₖ₌₀ aᵏ = (aᵐ⁺¹ - 1) / (a - 1)

190 # = S(m) / S(0)

191 # assuming a != 1 and using the definition

192 # S(m) := aᵐ⁺¹ - 1.

193 # The sum we want, starting from a number n [0 <= n <= m] rather than zero, is

194 # ᵐ∑ₖ₌ₙ aᵏ = ᵐ∑ₖ₌₀ aᵏ - ⁿ⁻¹∑ₖ₌₀ aᵏ = S(m) / S(0) - S(n - 1) / S(0)

195 def S(n):

196 return alphabet_size ** (n + 1) - 1

197

198 return (S(max_size) - S(min_size - 1)) // S(0)

199

200

201def compute_max_children(

202 choice_type: ChoiceTypeT, constraints: ChoiceConstraintsT

203) -> int:

204 if choice_type == "integer":

205 constraints = cast(IntegerConstraints, constraints)

206 min_value = constraints["min_value"]

207 max_value = constraints["max_value"]

208

209 if min_value is None and max_value is None:

210 # full 128 bit range.

211 return 2**128 - 1

212 if min_value is not None and max_value is not None:

213 # count between min/max value.

214 return max_value - min_value + 1

215

216 # hard case: only one bound was specified. Here we probe either upwards

217 # or downwards with our full 128 bit generation, but only half of these

218 # (plus one for the case of generating zero) result in a probe in the

219 # direction we want. ((2**128 - 1) // 2) + 1 == 2 ** 127

220 assert (min_value is None) != (max_value is None)

221 return 2**127

222 elif choice_type == "boolean":

223 constraints = cast(BooleanConstraints, constraints)

224 p = constraints["p"]

225 # probabilities of 0 or 1 (or effectively 0 or 1) only have one choice.

226 if p <= 2 ** (-64) or p >= (1 - 2 ** (-64)):

227 return 1

228 return 2

229 elif choice_type == "bytes":

230 constraints = cast(BytesConstraints, constraints)

231 return _count_distinct_strings(

232 alphabet_size=2**8,

233 min_size=constraints["min_size"],

234 max_size=constraints["max_size"],

235 )

236 elif choice_type == "string":

237 constraints = cast(StringConstraints, constraints)

238 min_size = constraints["min_size"]

239 max_size = constraints["max_size"]

240 intervals = constraints["intervals"]

241

242 return _count_distinct_strings(

243 alphabet_size=len(intervals), min_size=min_size, max_size=max_size

244 )

245 elif choice_type == "float":

246 constraints = cast(FloatConstraints, constraints)

247 min_value_f = constraints["min_value"]

248 max_value_f = constraints["max_value"]

249 smallest_nonzero_magnitude = constraints["smallest_nonzero_magnitude"]

250

251 count = count_between_floats(min_value_f, max_value_f)

252

253 # we have two intervals:

254 # a. [min_value, max_value]

255 # b. [-smallest_nonzero_magnitude, smallest_nonzero_magnitude]

256 #

257 # which could be subsets (in either order), overlapping, or disjoint. We

258 # want the interval difference a - b.

259

260 # next_down because endpoints are ok with smallest_nonzero_magnitude

261 min_point = max(min_value_f, -flt.next_down(smallest_nonzero_magnitude))

262 max_point = min(max_value_f, flt.next_down(smallest_nonzero_magnitude))

263

264 if min_point > max_point:

265 # case: disjoint intervals.

266 return count

267

268 count -= count_between_floats(min_point, max_point)

269 if sign_aware_lte(min_value_f, -0.0) and sign_aware_lte(-0.0, max_value_f):

270 # account for -0.0

271 count += 1

272 if sign_aware_lte(min_value_f, 0.0) and sign_aware_lte(0.0, max_value_f):

273 # account for 0.0

274 count += 1

275 return count

276

277 raise NotImplementedError(f"unhandled choice_type {choice_type}")

278

279

280# In theory, this is a strict superset of the functionality of compute_max_children;

281#

282# assert len(all_children(choice_type, constraints)) == compute_max_children(choice_type, constraints)

283#

284# In practice, we maintain two distinct implementations for efficiency and space

285# reasons. If you just need the number of children, it is cheaper to use

286# compute_max_children than to reify the list of children (only to immediately

287# throw it away).

288def _floats_between(a: float, b: float) -> Generator[float, None, None]:

289 for n in range(float_to_int(a), float_to_int(b) + 1):

290 yield int_to_float(n)

291

292

293def all_children(

294 choice_type: ChoiceTypeT, constraints: ChoiceConstraintsT

295) -> Generator[ChoiceT, None, None]:

296 if choice_type != "float":

297 for index in range(compute_max_children(choice_type, constraints)):

298 yield choice_from_index(index, choice_type, constraints)

299 else:

300 constraints = cast(FloatConstraints, constraints)

301 # the float ordering is not injective (because of resampling

302 # out-of-bounds values), so using choice_from_index would result in

303 # duplicates. This violates invariants in datatree about being able

304 # to draw unique new children using all_children.

305 #

306 # We instead maintain a separate implementation for floats.

307 # TODO_IR write a better (bijective) ordering for floats and remove this!

308 min_value = constraints["min_value"]

309 max_value = constraints["max_value"]

310 smallest_nonzero_magnitude = constraints["smallest_nonzero_magnitude"]

311

312 # handle zeroes separately so smallest_nonzero_magnitude can think of

313 # itself as a complete interval (instead of a hole at ±0).

314 if sign_aware_lte(min_value, -0.0) and sign_aware_lte(-0.0, max_value):

315 yield -0.0

316 if sign_aware_lte(min_value, 0.0) and sign_aware_lte(0.0, max_value):

317 yield 0.0

318

319 if flt.is_negative(min_value):

320 if flt.is_negative(max_value):

321 # case: both negative.

322 max_point = min(max_value, -smallest_nonzero_magnitude)

323 # float_to_int increases as negative magnitude increases, so

324 # invert order.

325 yield from _floats_between(max_point, min_value)

326 else:

327 # case: straddles midpoint (which is between -0.0 and 0.0).

328 yield from _floats_between(-smallest_nonzero_magnitude, min_value)

329 yield from _floats_between(smallest_nonzero_magnitude, max_value)

330 else:

331 # case: both positive.

332 min_point = max(min_value, smallest_nonzero_magnitude)

333 yield from _floats_between(min_point, max_value)

334

335

336@dataclass(slots=True, frozen=False)

337class TreeNode:

338 """

339 A node, or collection of directly descended nodes, in a DataTree.

340

341 We store the DataTree as a radix tree (https://en.wikipedia.org/wiki/Radix_tree),

342 which means that nodes that are the only child of their parent are collapsed

343 into their parent to save space.

344

345 Conceptually, you can unfold a single TreeNode storing n values in its lists

346 into a sequence of n nodes, each a child of the last. In other words,

347 (constraints[i], values[i], choice_types[i]) corresponds to the single node at index

348 i.

349

350 Note that if a TreeNode represents a choice (i.e. the nodes cannot be compacted

351 via the radix tree definition), then its lists will be empty and it will

352 store a `Branch` representing that choce in its `transition`.

353

354 Examples

355 --------

356

357 Consider sequentially drawing a boolean, then an integer.

358

359 data.draw_boolean()

360 data.draw_integer(1, 3)

361

362 If we draw True and then 2, the tree may conceptually look like this.

363

364 ┌──────┐

365 │ root │

366 └──┬───┘

367 ┌──┴───┐

368 │ True │

369 └──┬───┘

370 ┌──┴───┐

371 │ 2 │

372 └──────┘

373

374 But since 2 is the only child of True, we will compact these nodes and store

375 them as a single TreeNode.

376

377 ┌──────┐

378 │ root │

379 └──┬───┘

380 ┌────┴──────┐

381 │ [True, 2] │

382 └───────────┘

383

384 If we then draw True and then 3, True will have multiple children and we

385 can no longer store this compacted representation. We would call split_at(0)

386 on the [True, 2] node to indicate that we need to add a choice at 0-index

387 node (True).

388

389 ┌──────┐

390 │ root │

391 └──┬───┘

392 ┌──┴───┐

393 ┌─┤ True ├─┐

394 │ └──────┘ │

395 ┌─┴─┐ ┌─┴─┐

396 │ 2 │ │ 3 │

397 └───┘ └───┘

398 """

399

400 # The constraints, value, and choice_types of the nodes stored here. These always

401 # have the same length. The values at index i belong to node i.

402 constraints: list[ChoiceConstraintsT] = field(default_factory=list)

403 values: list[ChoiceT] = field(default_factory=list)

404 choice_types: list[ChoiceTypeT] = field(default_factory=list)

405

406 # The indices of nodes which had forced values.

407 #

408 # Stored as None if no indices have been forced, purely for space saving

409 # reasons (we force quite rarely).

410 __forced: set[int] | None = field(default=None, init=False)

411

412 # What happens next after drawing these nodes. (conceptually, "what is the

413 # child/children of the last node stored here").

414 #

415 # One of:

416 # - None (we don't know yet)

417 # - Branch (we have seen multiple possible outcomes here)

418 # - Conclusion (ConjectureData.conclude_test was called here)

419 # - Killed (this branch is valid and may even have children, but should not

420 # be explored when generating novel prefixes)

421 transition: None | Branch | Conclusion | Killed = None

422

423 # A tree node is exhausted if every possible sequence of draws below it has

424 # been explored. We only update this when performing operations that could

425 # change the answer.

426 #

427 # See also TreeNode.check_exhausted.

428 is_exhausted: bool = field(default=False, init=False)

429

430 @property

431 def forced(self) -> Set[int]:

432 if not self.__forced:

433 return EMPTY

434 return self.__forced

435

436 def mark_forced(self, i: int) -> None:

437 """

438 Note that the draw at node i was forced.

439 """

440 assert 0 <= i < len(self.values)

441 if self.__forced is None:

442 self.__forced = set()

443 self.__forced.add(i)

444

445 def split_at(self, i: int) -> None:

446 """

447 Splits the tree so that it can incorporate a decision at the draw call

448 corresponding to the node at position i.

449

450 Raises FlakyStrategyDefinition if node i was forced.

451 """

452

453 if i in self.forced:

454 raise FlakyStrategyDefinition(_FLAKY_STRAT_MSG)

455

456 assert not self.is_exhausted

457

458 key = self.values[i]

459

460 child = TreeNode(

461 choice_types=self.choice_types[i + 1 :],

462 constraints=self.constraints[i + 1 :],

463 values=self.values[i + 1 :],

464 transition=self.transition,

465 )

466 self.transition = Branch(

467 constraints=self.constraints[i],

468 choice_type=self.choice_types[i],

469 children={key: child},

470 )

471 if self.__forced is not None:

472 child.__forced = {j - i - 1 for j in self.__forced if j > i}

473 self.__forced = {j for j in self.__forced if j < i}

474 child.check_exhausted()

475 del self.choice_types[i:]

476 del self.values[i:]

477 del self.constraints[i:]

478 assert len(self.values) == len(self.constraints) == len(self.choice_types) == i

479

480 def check_exhausted(self) -> bool:

481 """

482 Recalculates is_exhausted if necessary, and then returns it.

483

484 A node is exhausted if:

485 - Its transition is Conclusion or Killed

486 - It has the maximum number of children (i.e. we have found all of its

487 possible children), and all its children are exhausted

488

489 Therefore, we only need to compute this for a node when:

490 - We first create it in split_at

491 - We set its transition to either Conclusion or Killed

492 (TreeRecordingObserver.conclude_test or TreeRecordingObserver.kill_branch)

493 - We exhaust any of its children

494 """

495

496 if (

497 # a node cannot go from is_exhausted -> not is_exhausted.

498 not self.is_exhausted

499 # if we don't know what happens after this node, we don't have

500 # enough information to tell if it's exhausted.

501 and self.transition is not None

502 # if there are still any nodes left which are the only child of their

503 # parent (len(self.values) > 0), then this TreeNode must be not

504 # exhausted, unless all of those nodes were forced.

505 #

506 # This is because we maintain an invariant of only adding nodes to

507 # DataTree which have at least 2 possible values, so we know that if

508 # they do not have any siblings that we still have more choices to

509 # discover.

510 #

511 # (We actually *do* currently add single-valued nodes to the tree,

512 # but immediately split them into a transition to avoid falsifying

513 # this check. this is a bit of a hack.)

514 and len(self.forced) == len(self.values)

515 ):

516 if isinstance(self.transition, (Conclusion, Killed)):

517 self.is_exhausted = True

518 elif len(self.transition.children) == self.transition.max_children:

519 self.is_exhausted = all(

520 v.is_exhausted for v in self.transition.children.values()

521 )

522 return self.is_exhausted

523

524 def _repr_pretty_(self, p: "RepresentationPrinter", cycle: bool) -> None:

525 assert cycle is False

526 indent = 0

527 for i, (choice_type, constraints, value) in enumerate(

528 zip(self.choice_types, self.constraints, self.values, strict=True)

529 ):

530 with p.indent(indent):

531 if i > 0:

532 p.break_()

533 p.text(

534 _node_pretty(

535 choice_type, value, constraints, forced=i in self.forced

536 )

537 )

538 indent += 2

539

540 with p.indent(indent):

541 if len(self.values) > 0:

542 p.break_()

543 if self.transition is not None:

544 p.pretty(self.transition)

545 else:

546 p.text("unknown")

547

548

549class DataTree:

550 """

551 A DataTree tracks the structured history of draws in some test function,

552 across multiple ConjectureData objects.

553

554 This information is used by ConjectureRunner to generate novel prefixes of

555 this tree (see generate_novel_prefix). A novel prefix is a sequence of draws

556 which the tree has not seen before, and therefore the ConjectureRunner has

557 not generated as an input to the test function before.

558

559 DataTree tracks the following:

560

561 - Drawn choices in the choice sequence

562 - ConjectureData.draw_integer()

563 - ConjectureData.draw_float()

564 - ConjectureData.draw_string()

565 - ConjectureData.draw_boolean()

566 - ConjectureData.draw_bytes()

567 - Test conclusions (with some Status, e.g. Status.VALID)

568 - ConjectureData.conclude_test()

569

570 A DataTree is — surprise — a *tree*. A node in this tree is either a choice draw

571 with some value, a test conclusion with some Status, or a special `Killed` value,

572 which denotes that further draws may exist beyond this node but should not be

573 considered worth exploring when generating novel prefixes. A node is a leaf

574 iff it is a conclusion or Killed.

575

576 A branch from node A to node B indicates that we have previously seen some

577 sequence (a, b) of draws, where a and b are the values in nodes A and B.

578 Similar intuition holds for conclusion and Killed nodes.

579

580 Examples

581 --------

582

583 To see how a DataTree gets built through successive sets of draws, consider

584 the following code that calls through to some ConjecutreData object `data`.

585 The first call can be either True or False, and the second call can be any

586 integer in the range [1, 3].

587

588 data.draw_boolean()

589 data.draw_integer(1, 3)

590

591 To start, the corresponding DataTree object is completely empty.

592

593 ┌──────┐

594 │ root │

595 └──────┘

596

597 We happen to draw True and then 2 in the above code. The tree tracks this.

598 (2 also connects to a child Conclusion node with Status.VALID since it's the

599 final draw in the code. I'll omit Conclusion nodes in diagrams for brevity.)

600

601 ┌──────┐

602 │ root │

603 └──┬───┘

604 ┌──┴───┐

605 │ True │

606 └──┬───┘

607 ┌──┴───┐

608 │ 2 │

609 └──────┘

610

611 This is a very boring tree so far! But now we happen to draw False and

612 then 1. This causes a split in the tree. Remember, DataTree tracks history

613 over all invocations of a function, not just one. The end goal is to know

614 what invocations haven't been tried yet, after all.

615

616 ┌──────┐

617 ┌───┤ root ├───┐

618 │ └──────┘ │

619 ┌──┴───┐ ┌─┴─────┐

620 │ True │ │ False │

621 └──┬───┘ └──┬────┘

622 ┌─┴─┐ ┌─┴─┐

623 │ 2 │ │ 1 │

624 └───┘ └───┘

625

626 If we were to ask DataTree for a novel prefix at this point, it might

627 generate any of (True, 1), (True, 3), (False, 2), or (False, 3).

628

629 Note that the novel prefix stops as soon as it generates a novel node. For

630 instance, if we had generated a novel prefix back when the tree was only

631 root -> True -> 2, we could have gotten any of (True, 1), (True, 3), or

632 (False). But we could *not* have gotten (False, n), because both False and

633 n were novel at that point, and we stop at the first novel node — False.

634

635 I won't belabor this example. Here's what the tree looks like when fully

636 explored:

637

638 ┌──────┐

639 ┌──────┤ root ├──────┐

640 │ └──────┘ │

641 ┌──┴───┐ ┌─┴─────┐

642 ┌──┤ True ├──┐ ┌───┤ False ├──┐

643 │ └──┬───┘ │ │ └──┬────┘ │

644 ┌─┴─┐ ┌─┴─┐ ┌─┴─┐ ┌─┴─┐ ┌─┴─┐ ┌─┴─┐

645 │ 1 │ │ 2 │ │ 3 │ │ 1 │ │ 2 │ │ 3 │

646 └───┘ └───┘ └───┘ └───┘ └───┘ └───┘

647

648 You could imagine much more complicated trees than this arising in practice,

649 and indeed they do. In particular, the tree need not be balanced or 'nice'

650 like the tree above. For instance,

651

652 b = data.draw_boolean()

653 if b:

654 data.draw_integer(1, 3)

655

656 results in a tree with the entire right part lopped off, and False leading

657 straight to a conclusion node with Status.VALID. As another example,

658

659 n = data.draw_integers()

660 assume(n >= 3)

661 data.draw_string()

662

663 results in a tree with the 0, 1, and 2 nodes leading straight to a

664 conclusion node with Status.INVALID, and the rest branching off into all

665 the possibilities of draw_string.

666

667 Notes

668 -----

669

670 The above examples are slightly simplified and are intended to convey

671 intuition. In practice, there are some implementation details to be aware

672 of.

673

674 - In draw nodes, we store the constraints used in addition to the value drawn.

675 E.g. the node corresponding to data.draw_float(min_value=1.0, max_value=1.5)

676 would store {"min_value": 1.0, "max_value": 1.5, ...} (default values for

677 other constraints omitted).

678

679 The constraints parameters have the potential to change both the range of

680 possible outputs of a node, and the probability distribution within that

681 range, so we need to use these when drawing in DataTree as well. We draw

682 values using these constraints when (1) generating a novel value for a node

683 and (2) choosing a random child when traversing the tree.

684

685 - For space efficiency, rather than tracking the full tree structure, we

686 store DataTree as a radix tree. This is conceptually equivalent (radix

687 trees can always be "unfolded" to the full tree) but it means the internal

688 representation may differ in practice.

689

690 See TreeNode for more information.

691 """

692

693 def __init__(self) -> None:

694 self.root: TreeNode = TreeNode()

695 self._children_cache: dict[ChoiceT, ChildrenCacheValueT] = {}

696

697 @property

698 def is_exhausted(self) -> bool:

699 """

700 Returns True if every node is exhausted, and therefore the tree has

701 been fully explored.

702 """

703 return self.root.is_exhausted

704

705 def generate_novel_prefix(self, random: Random) -> tuple[ChoiceT, ...]:

706 """Generate a short random string that (after rewriting) is not

707 a prefix of any choice sequence previously added to the tree.

708

709 The resulting prefix is essentially arbitrary - it would be nice

710 for it to be uniform at random, but previous attempts to do that

711 have proven too expensive.

712 """

713 assert not self.is_exhausted

714 prefix = []

715

716 def append_choice(choice_type: ChoiceTypeT, choice: ChoiceT) -> None:

717 if choice_type == "float":

718 assert isinstance(choice, int)

719 choice = int_to_float(choice)

720 prefix.append(choice)

721

722 current_node = self.root

723 while True:

724 assert not current_node.is_exhausted

725 for i, (choice_type, constraints, value) in enumerate(

726 zip(

727 current_node.choice_types,

728 current_node.constraints,

729 current_node.values,

730 strict=True,

731 )

732 ):

733 if i in current_node.forced:

734 append_choice(choice_type, value)

735 else:

736 attempts = 0

737 while True:

738 if attempts <= 10:

739 try:

740 node_value = self._draw(

741 choice_type, constraints, random=random

742 )

743 except StopTest: # pragma: no cover

744 # it is possible that drawing from a fresh data can

745 # overrun BUFFER_SIZE, due to eg unlucky rejection sampling

746 # of integer probes. Retry these cases.

747 attempts += 1

748 continue

749 else:

750 node_value = self._draw_from_cache(

751 choice_type,

752 constraints,

753 key=id(current_node),

754 random=random,

755 )

756

757 if node_value != value:

758 append_choice(choice_type, node_value)

759 break

760 attempts += 1

761 self._reject_child(

762 choice_type,

763 constraints,

764 child=node_value,

765 key=id(current_node),

766 )

767 # We've now found a value that is allowed to

768 # vary, so what follows is not fixed.

769 return tuple(prefix)

770

771 assert not isinstance(current_node.transition, (Conclusion, Killed))

772 if current_node.transition is None:

773 return tuple(prefix)

774 branch = current_node.transition

775 assert isinstance(branch, Branch)

776

777 attempts = 0

778 while True:

779 if attempts <= 10:

780 try:

781 node_value = self._draw(

782 branch.choice_type, branch.constraints, random=random

783 )

784 except StopTest: # pragma: no cover

785 attempts += 1

786 continue

787 else:

788 node_value = self._draw_from_cache(

789 branch.choice_type,

790 branch.constraints,

791 key=id(branch),

792 random=random,

793 )

794 try:

795 child = branch.children[node_value]

796 except KeyError:

797 append_choice(branch.choice_type, node_value)

798 return tuple(prefix)

799 if not child.is_exhausted:

800 append_choice(branch.choice_type, node_value)

801 current_node = child

802 break

803 attempts += 1

804 self._reject_child(

805 branch.choice_type,

806 branch.constraints,

807 child=node_value,

808 key=id(branch),

809 )

810

811 # We don't expect this assertion to ever fire, but coverage

812 # wants the loop inside to run if you have branch checking

813 # on, hence the pragma.

814 assert ( # pragma: no cover

815 attempts != 1000

816 or len(branch.children) < branch.max_children

817 or any(not v.is_exhausted for v in branch.children.values())

818 )

819

820 def rewrite(self, choices):

821 """Use previously seen ConjectureData objects to return a tuple of

822 the rewritten choice sequence and the status we would get from running

823 that with the test function. If the status cannot be predicted

824 from the existing values it will be None."""

825 data = ConjectureData.for_choices(choices)

826 try:

827 self.simulate_test_function(data)

828 return (data.choices, data.status)

829 except PreviouslyUnseenBehaviour:

830 return (choices, None)

831

832 def simulate_test_function(self, data: ConjectureData) -> None:

833 """Run a simulated version of the test function recorded by

834 this tree. Note that this does not currently call ``stop_span``

835 or ``start_span`` as these are not currently recorded in the

836 tree. This will likely change in future."""

837 node = self.root

838

839 def draw(choice_type, constraints, *, forced=None, convert_forced=True):

840 if choice_type == "float" and forced is not None and convert_forced:

841 forced = int_to_float(forced)

842

843 draw_func = getattr(data, f"draw_{choice_type}")

844 value = draw_func(**constraints, forced=forced)

845

846 if choice_type == "float":

847 value = float_to_int(value)

848 return value

849

850 try:

851 while True:

852 for i, (choice_type, constraints, previous) in enumerate(

853 zip(node.choice_types, node.constraints, node.values, strict=True)

854 ):

855 v = draw(

856 choice_type,

857 constraints,

858 forced=previous if i in node.forced else None,

859 )

860 if v != previous:

861 raise PreviouslyUnseenBehaviour

862 if isinstance(node.transition, Conclusion):

863 t = node.transition

864 data.conclude_test(t.status, t.interesting_origin)

865 elif node.transition is None:

866 raise PreviouslyUnseenBehaviour

867 elif isinstance(node.transition, Branch):

868 v = draw(node.transition.choice_type, node.transition.constraints)

869 try:

870 node = node.transition.children[v]

871 except KeyError as err:

872 raise PreviouslyUnseenBehaviour from err

873 else:

874 assert isinstance(node.transition, Killed)

875 data.observer.kill_branch()

876 node = node.transition.next_node

877 except StopTest:

878 pass

879

880 def new_observer(self):

881 return TreeRecordingObserver(self)

882

883 def _draw(

884 self,

885 choice_type: ChoiceTypeT,

886 constraints: ChoiceConstraintsT,

887 *,

888 random: Random,

889 ) -> ChoiceT:

890 from hypothesis.internal.conjecture.data import draw_choice

891

892 value = draw_choice(choice_type, constraints, random=random)

893 # using floats as keys into branch.children breaks things, because

894 # e.g. hash(0.0) == hash(-0.0) would collide as keys when they are

895 # in fact distinct child branches.

896 # To distinguish floats here we'll use their bits representation. This

897 # entails some bookkeeping such that we're careful about when the

898 # float key is in its bits form (as a key into branch.children) and

899 # when it is in its float form (as a value we want to write to the

900 # choice sequence), and converting between the two forms as appropriate.

901 if choice_type == "float":

902 assert isinstance(value, float)

903 value = float_to_int(value)

904 return value

905

906 def _get_children_cache(

907 self, choice_type: ChoiceTypeT, constraints: ChoiceConstraintsT, *, key: ChoiceT

908 ) -> ChildrenCacheValueT:

909 # cache the state of the children generator per node/branch (passed as

910 # `key` here), such that we track which children we've already tried

911 # for this branch across draws.

912 # We take advantage of python generators here as one-way iterables,

913 # so each time we iterate we implicitly store our position in the

914 # children generator and don't re-draw children. `children` is the

915 # concrete list of children draw from the generator that we will work

916 # with. Whenever we need to top up this list, we will draw a new value

917 # from the generator.

918 if key not in self._children_cache:

919 generator = all_children(choice_type, constraints)

920 children: list[ChoiceT] = []

921 rejected: set[ChoiceT] = set()

922 self._children_cache[key] = (generator, children, rejected)

923

924 return self._children_cache[key]

925

926 def _draw_from_cache(

927 self,

928 choice_type: ChoiceTypeT,

929 constraints: ChoiceConstraintsT,

930 *,

931 key: ChoiceT,

932 random: Random,

933 ) -> ChoiceT:

934 (generator, children, rejected) = self._get_children_cache(

935 choice_type, constraints, key=key

936 )

937 # Keep a stock of 100 potentially-valid children at all times.

938 # This number is chosen to balance memory/speed vs randomness. Ideally

939 # we would sample uniformly from all not-yet-rejected children, but

940 # computing and storing said children is not free.

941 # no-branch because coverage of the fall-through case here is a bit

942 # annoying.

943 if len(children) < 100: # pragma: no branch

944 for v in generator:

945 if choice_type == "float":

946 assert isinstance(v, float)

947 v = float_to_int(v)

948 if v in rejected:

949 continue

950 children.append(v)

951 if len(children) >= 100:

952 break

953

954 return random.choice(children)

955

956 def _reject_child(

957 self,

958 choice_type: ChoiceTypeT,

959 constraints: ChoiceConstraintsT,

960 *,

961 child: ChoiceT,

962 key: ChoiceT,

963 ) -> None:

964 (_generator, children, rejected) = self._get_children_cache(

965 choice_type, constraints, key=key

966 )

967 rejected.add(child)

968 # we remove a child from the list of possible children *only* when it is

969 # rejected, and not when it is initially drawn in _draw_from_cache. The

970 # reason is that a child being drawn does not guarantee that child will

971 # be used in a way such that it is written back to the tree, so it needs

972 # to be available for future draws until we are certain it has been

973 # used.

974 #

975 # For instance, if we generated novel prefixes in a loop (but never used

976 # those prefixes to generate new values!) then we don't want to remove

977 # the drawn children from the available pool until they are actually

978 # used.

979 #

980 # This does result in a small inefficiency: we may draw a child,

981 # immediately use it (so we know it cannot be drawn again), but still

982 # wait to draw and reject it here, because DataTree cannot guarantee

983 # the drawn child has been used.

984 if child in children:

985 children.remove(child)

986

987 def _repr_pretty_(self, p: "RepresentationPrinter", cycle: bool) -> None:

988 assert cycle is False

989 p.pretty(self.root)

990

991

992class TreeRecordingObserver(DataObserver):

993 def __init__(self, tree: DataTree):

994 # this attr isn't read, but is very useful for local debugging flaky

995 # errors, with

996 # `from hypothesis.vendor import pretty; print(pretty.pretty(self._root))`

997 self._root = tree.root

998 self._current_node: TreeNode = tree.root

999 self._index_in_current_node: int = 0

1000 self._trail: list[TreeNode] = [self._current_node]

1001 self.killed: bool = False

1002

1003 def draw_integer(

1004 self, value: int, *, was_forced: bool, constraints: IntegerConstraints

1005 ) -> None:

1006 self.draw_value(

1007 "integer", value, was_forced=was_forced, constraints=constraints

1008 )

1009

1010 def draw_float(

1011 self, value: float, *, was_forced: bool, constraints: FloatConstraints

1012 ) -> None:

1013 self.draw_value("float", value, was_forced=was_forced, constraints=constraints)

1014

1015 def draw_string(

1016 self, value: str, *, was_forced: bool, constraints: StringConstraints

1017 ) -> None:

1018 self.draw_value("string", value, was_forced=was_forced, constraints=constraints)

1019

1020 def draw_bytes(

1021 self, value: bytes, *, was_forced: bool, constraints: BytesConstraints

1022 ) -> None:

1023 self.draw_value("bytes", value, was_forced=was_forced, constraints=constraints)

1024

1025 def draw_boolean(

1026 self, value: bool, *, was_forced: bool, constraints: BooleanConstraints

1027 ) -> None:

1028 self.draw_value(

1029 "boolean", value, was_forced=was_forced, constraints=constraints

1030 )

1031

1032 def draw_value(

1033 self,

1034 choice_type: ChoiceTypeT,

1035 value: ChoiceT,

1036 *,

1037 was_forced: bool,

1038 constraints: ChoiceConstraintsT,

1039 ) -> None:

1040 i = self._index_in_current_node

1041 self._index_in_current_node += 1

1042 node = self._current_node

1043

1044 if isinstance(value, float):

1045 value = float_to_int(value)

1046

1047 assert len(node.constraints) == len(node.values) == len(node.choice_types)

1048 if i < len(node.values):

1049 if (

1050 choice_type != node.choice_types[i]

1051 or constraints != node.constraints[i]

1052 ):

1053 raise FlakyStrategyDefinition(_FLAKY_STRAT_MSG)

1054 # Note that we don't check whether a previously

1055 # forced value is now free. That will be caught

1056 # if we ever split the node there, but otherwise

1057 # may pass silently. This is acceptable because it

1058 # means we skip a hash set lookup on every

1059 # draw and that's a pretty niche failure mode.

1060 if was_forced and i not in node.forced:

1061 raise FlakyStrategyDefinition(_FLAKY_STRAT_MSG)

1062 if value != node.values[i]:

1063 node.split_at(i)

1064 assert i == len(node.values)

1065 new_node = TreeNode()

1066 assert isinstance(node.transition, Branch)

1067 node.transition.children[value] = new_node

1068 self._current_node = new_node

1069 self._index_in_current_node = 0

1070 else:

1071 trans = node.transition

1072 if trans is None:

1073 node.choice_types.append(choice_type)

1074 node.constraints.append(constraints)

1075 node.values.append(value)

1076 if was_forced:

1077 node.mark_forced(i)

1078 # generate_novel_prefix assumes the following invariant: any one

1079 # of the series of draws in a particular node can vary, i.e. the

1080 # max number of children is at least 2. However, some draws are

1081 # pseudo-choices and only have a single value, such as

1082 # integers(0, 0).

1083 #

1084 # Currently, we address this by forcefully splitting such

1085 # single-valued nodes into a transition when we see them. An

1086 # exception to this is if it was forced: forced pseudo-choices

1087 # do not cause the above issue because they inherently cannot

1088 # vary, and moreover they trip other invariants about never

1089 # splitting forced nodes.

1090 #

1091 # An alternative is not writing such choices to the tree at

1092 # all, and thus guaranteeing that each node has at least 2 max

1093 # children.

1094 if (

1095 compute_max_children(choice_type, constraints) == 1

1096 and not was_forced

1097 ):

1098 node.split_at(i)

1099 assert isinstance(node.transition, Branch)

1100 self._current_node = node.transition.children[value]

1101 self._index_in_current_node = 0

1102 elif isinstance(trans, Conclusion):

1103 assert trans.status != Status.OVERRUN

1104 # We tried to draw where history says we should have

1105 # stopped

1106 raise FlakyStrategyDefinition(_FLAKY_STRAT_MSG)

1107 else:

1108 assert isinstance(trans, Branch), trans

1109 if choice_type != trans.choice_type or constraints != trans.constraints:

1110 raise FlakyStrategyDefinition(_FLAKY_STRAT_MSG)

1111 try:

1112 self._current_node = trans.children[value]

1113 except KeyError:

1114 self._current_node = trans.children.setdefault(value, TreeNode())

1115 self._index_in_current_node = 0

1116 if self._trail[-1] is not self._current_node:

1117 self._trail.append(self._current_node)

1118

1119 def kill_branch(self) -> None:

1120 """Mark this part of the tree as not worth re-exploring."""

1121 if self.killed:

1122 return

1123

1124 self.killed = True

1125

1126 if self._index_in_current_node < len(self._current_node.values) or (

1127 self._current_node.transition is not None

1128 and not isinstance(self._current_node.transition, Killed)

1129 ):

1130 raise FlakyStrategyDefinition(_FLAKY_STRAT_MSG)

1131

1132 if self._current_node.transition is None:

1133 self._current_node.transition = Killed(TreeNode())

1134 self.__update_exhausted()

1135

1136 self._current_node = self._current_node.transition.next_node

1137 self._index_in_current_node = 0

1138 self._trail.append(self._current_node)

1139

1140 def conclude_test(

1141 self, status: Status, interesting_origin: InterestingOrigin | None

1142 ) -> None:

1143 """Says that ``status`` occurred at node ``node``. This updates the

1144 node if necessary and checks for consistency."""

1145 if status == Status.OVERRUN:

1146 return

1147 i = self._index_in_current_node

1148 node = self._current_node

1149

1150 if i < len(node.values) or isinstance(node.transition, Branch):

1151 raise FlakyStrategyDefinition(_FLAKY_STRAT_MSG)

1152

1153 new_transition = Conclusion(status, interesting_origin)

1154

1155 if node.transition is not None and node.transition != new_transition:

1156 # As an, I'm afraid, horrible bodge, we deliberately ignore flakiness

1157 # where tests go from interesting to valid, because it's much easier

1158 # to produce good error messages for these further up the stack.

1159 if isinstance(node.transition, Conclusion) and (

1160 node.transition.status != Status.INTERESTING

1161 or new_transition.status != Status.VALID

1162 ):

1163 old_origin = node.transition.interesting_origin

1164 new_origin = new_transition.interesting_origin

1165 raise FlakyReplay(

1166 f"Inconsistent results from replaying a test case!\n"

1167 f" last: {node.transition.status.name} from {old_origin}\n"

1168 f" this: {new_transition.status.name} from {new_origin}",

1169 (old_origin, new_origin),

1170 )

1171 else:

1172 node.transition = new_transition

1173

1174 assert node is self._trail[-1]

1175 node.check_exhausted()

1176 assert len(node.values) > 0 or node.check_exhausted()

1177

1178 if not self.killed:

1179 self.__update_exhausted()

1180

1181 def __update_exhausted(self) -> None:

1182 for t in reversed(self._trail):

1183 # Any node we've traversed might have now become exhausted.

1184 # We check from the right. As soon as we hit a node that

1185 # isn't exhausted, this automatically implies that all of

1186 # its parents are not exhausted, so we stop.

1187 if not t.check_exhausted():

1188 break