Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/hypothesis/internal/conjecture/datatree.py: 19%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

416 statements  

1# This file is part of Hypothesis, which may be found at 

2# https://github.com/HypothesisWorks/hypothesis/ 

3# 

4# Copyright the Hypothesis Authors. 

5# Individual contributors are listed in AUTHORS.rst and the git log. 

6# 

7# This Source Code Form is subject to the terms of the Mozilla Public License, 

8# v. 2.0. If a copy of the MPL was not distributed with this file, You can 

9# obtain one at https://mozilla.org/MPL/2.0/. 

10 

11import math 

12from collections.abc import Generator, Set 

13from dataclasses import dataclass, field 

14from random import Random 

15from typing import TYPE_CHECKING, Final, TypeAlias, cast 

16 

17from hypothesis.errors import ( 

18 FlakyReplay, 

19 FlakyStrategyDefinition, 

20 HypothesisException, 

21 StopTest, 

22) 

23from hypothesis.internal import floats as flt 

24from hypothesis.internal.conjecture.choice import ( 

25 BooleanConstraints, 

26 BytesConstraints, 

27 ChoiceConstraintsT, 

28 ChoiceT, 

29 ChoiceTypeT, 

30 FloatConstraints, 

31 IntegerConstraints, 

32 StringConstraints, 

33 choice_from_index, 

34) 

35from hypothesis.internal.conjecture.data import ConjectureData, DataObserver, Status 

36from hypothesis.internal.escalation import InterestingOrigin 

37from hypothesis.internal.floats import ( 

38 count_between_floats, 

39 float_to_int, 

40 int_to_float, 

41 sign_aware_lte, 

42) 

43 

44if TYPE_CHECKING: 

45 from hypothesis.vendor.pretty import RepresentationPrinter 

46 

47ChildrenCacheValueT: TypeAlias = tuple[ 

48 Generator[ChoiceT, None, None], list[ChoiceT], set[ChoiceT] 

49] 

50 

51 

52class PreviouslyUnseenBehaviour(HypothesisException): 

53 pass 

54 

55 

56EMPTY: frozenset[int] = frozenset() 

57 

58 

59@dataclass(slots=True, frozen=True) 

60class Killed: 

61 """Represents a transition to part of the tree which has been marked as 

62 "killed", meaning we want to treat it as not worth exploring, so it will 

63 be treated as if it were completely explored for the purposes of 

64 exhaustion.""" 

65 

66 next_node: "TreeNode" 

67 

68 def _repr_pretty_(self, p: "RepresentationPrinter", cycle: bool) -> None: 

69 assert cycle is False 

70 p.text("Killed") 

71 

72 

73def _node_pretty( 

74 choice_type: ChoiceTypeT, 

75 value: ChoiceT, 

76 constraints: ChoiceConstraintsT, 

77 *, 

78 forced: bool, 

79) -> str: 

80 forced_marker = " [forced]" if forced else "" 

81 return f"{choice_type} {value!r}{forced_marker} {constraints}" 

82 

83 

84@dataclass(slots=True, frozen=False) 

85class Branch: 

86 """Represents a transition where multiple choices can be made as to what 

87 to drawn.""" 

88 

89 constraints: ChoiceConstraintsT 

90 choice_type: ChoiceTypeT 

91 children: dict[ChoiceT, "TreeNode"] = field(repr=False) 

92 

93 @property 

94 def max_children(self) -> int: 

95 max_children = compute_max_children(self.choice_type, self.constraints) 

96 assert max_children > 0 

97 return max_children 

98 

99 def _repr_pretty_(self, p: "RepresentationPrinter", cycle: bool) -> None: 

100 assert cycle is False 

101 for i, (value, child) in enumerate(self.children.items()): 

102 if i > 0: 

103 p.break_() 

104 p.text( 

105 _node_pretty(self.choice_type, value, self.constraints, forced=False) 

106 ) 

107 with p.indent(2): 

108 p.break_() 

109 p.pretty(child) 

110 

111 

112@dataclass(slots=True, frozen=True) 

113class Conclusion: 

114 """Represents a transition to a finished state.""" 

115 

116 status: Status 

117 interesting_origin: InterestingOrigin | None 

118 

119 def _repr_pretty_(self, p: "RepresentationPrinter", cycle: bool) -> None: 

120 assert cycle is False 

121 o = self.interesting_origin 

122 # avoid str(o), which can include multiple lines of context 

123 origin = ( 

124 "" if o is None else f", {o.exc_type.__name__} at {o.filename}:{o.lineno}" 

125 ) 

126 p.text(f"Conclusion ({self.status!r}{origin})") 

127 

128 

129# The number of max children where, beyond this, it is practically impossible 

130# for hypothesis to saturate / explore all children nodes in a reasonable time 

131# frame. We use this to bail out of expensive max children computations early, 

132# where the numbers involved are so large that we know they will be larger than 

133# this number. 

134# 

135# Note that it's ok for us to underestimate the number of max children of a node 

136# by using this. We just may think the node is exhausted when in fact it has more 

137# possible children to be explored. This has the potential to finish generation 

138# early due to exhausting the entire tree, but that is quite unlikely: (1) the 

139# number of examples would have to be quite high, and (2) the tree would have to 

140# contain only one or two nodes, or generate_novel_prefix would simply switch to 

141# exploring another non-exhausted node. 

142# 

143# Also note that we may sometimes compute max children above this value. In other 

144# words, this is *not* a hard maximum on the computed max children. It's the point 

145# where further computation is not beneficial - but sometimes doing that computation 

146# unconditionally is cheaper than estimating against this value. 

147# 

148# The one case where this may be detrimental is fuzzing, where the throughput of 

149# examples is so high that it really may saturate important nodes. We'll cross 

150# that bridge when we come to it. 

151MAX_CHILDREN_EFFECTIVELY_INFINITE: Final[int] = 10_000_000 

152 

153 

154def _count_distinct_strings(*, alphabet_size: int, min_size: int, max_size: int) -> int: 

155 # We want to estimate if we're going to have more children than 

156 # MAX_CHILDREN_EFFECTIVELY_INFINITE, without computing a potentially 

157 # extremely expensive pow. We'll check the two extreme cases - if the 

158 # number of strings in the largest string size alone is enough to put us 

159 # over this limit (at alphabet_size >= 2), and if the variation in sizes 

160 # (at alphabet_size == 1) is enough. If neither result in an early return, 

161 # the exact result should be reasonably cheap to compute. 

162 if alphabet_size == 0: 

163 # Special-case the empty string, avoid error in math.log(0). 

164 return 1 

165 elif alphabet_size == 1: 

166 # Special-case the constant alphabet, invalid in the geom-series sum. 

167 return max_size - min_size + 1 

168 else: 

169 # Estimate against log, which is cheaper than computing a pow. 

170 # 

171 # m = max_size 

172 # a = alphabet_size 

173 # N = MAX_CHILDREN_EFFECTIVELY_INFINITE 

174 # 

175 # a**m > N 

176 # <=> m * log(a) > log(N) 

177 log_max_sized_children = max_size * math.log(alphabet_size) 

178 if log_max_sized_children > math.log(MAX_CHILDREN_EFFECTIVELY_INFINITE): 

179 return MAX_CHILDREN_EFFECTIVELY_INFINITE 

180 

181 # The sum of a geometric series is given by (ref: wikipedia): 

182 # ᵐ∑ₖ₌₀ aᵏ = (aᵐ⁺¹ - 1) / (a - 1) 

183 # = S(m) / S(0) 

184 # assuming a != 1 and using the definition 

185 # S(m) := aᵐ⁺¹ - 1. 

186 # The sum we want, starting from a number n [0 <= n <= m] rather than zero, is 

187 # ᵐ∑ₖ₌ₙ aᵏ = ᵐ∑ₖ₌₀ aᵏ - ⁿ⁻¹∑ₖ₌₀ aᵏ = S(m) / S(0) - S(n - 1) / S(0) 

188 def S(n): 

189 return alphabet_size ** (n + 1) - 1 

190 

191 return (S(max_size) - S(min_size - 1)) // S(0) 

192 

193 

194def compute_max_children( 

195 choice_type: ChoiceTypeT, constraints: ChoiceConstraintsT 

196) -> int: 

197 if choice_type == "integer": 

198 constraints = cast(IntegerConstraints, constraints) 

199 min_value = constraints["min_value"] 

200 max_value = constraints["max_value"] 

201 

202 if min_value is None and max_value is None: 

203 # full 128 bit range. 

204 return 2**128 - 1 

205 if min_value is not None and max_value is not None: 

206 # count between min/max value. 

207 return max_value - min_value + 1 

208 

209 # hard case: only one bound was specified. Here we probe either upwards 

210 # or downwards with our full 128 bit generation, but only half of these 

211 # (plus one for the case of generating zero) result in a probe in the 

212 # direction we want. ((2**128 - 1) // 2) + 1 == 2 ** 127 

213 assert (min_value is None) != (max_value is None) 

214 return 2**127 

215 elif choice_type == "boolean": 

216 constraints = cast(BooleanConstraints, constraints) 

217 p = constraints["p"] 

218 # probabilities of 0 or 1 (or effectively 0 or 1) only have one choice. 

219 if p <= 2 ** (-64) or p >= (1 - 2 ** (-64)): 

220 return 1 

221 return 2 

222 elif choice_type == "bytes": 

223 constraints = cast(BytesConstraints, constraints) 

224 return _count_distinct_strings( 

225 alphabet_size=2**8, 

226 min_size=constraints["min_size"], 

227 max_size=constraints["max_size"], 

228 ) 

229 elif choice_type == "string": 

230 constraints = cast(StringConstraints, constraints) 

231 min_size = constraints["min_size"] 

232 max_size = constraints["max_size"] 

233 intervals = constraints["intervals"] 

234 

235 return _count_distinct_strings( 

236 alphabet_size=len(intervals), min_size=min_size, max_size=max_size 

237 ) 

238 elif choice_type == "float": 

239 constraints = cast(FloatConstraints, constraints) 

240 min_value_f = constraints["min_value"] 

241 max_value_f = constraints["max_value"] 

242 smallest_nonzero_magnitude = constraints["smallest_nonzero_magnitude"] 

243 

244 count = count_between_floats(min_value_f, max_value_f) 

245 

246 # we have two intervals: 

247 # a. [min_value, max_value] 

248 # b. [-smallest_nonzero_magnitude, smallest_nonzero_magnitude] 

249 # 

250 # which could be subsets (in either order), overlapping, or disjoint. We 

251 # want the interval difference a - b. 

252 

253 # next_down because endpoints are ok with smallest_nonzero_magnitude 

254 min_point = max(min_value_f, -flt.next_down(smallest_nonzero_magnitude)) 

255 max_point = min(max_value_f, flt.next_down(smallest_nonzero_magnitude)) 

256 

257 if min_point > max_point: 

258 # case: disjoint intervals. 

259 return count 

260 

261 count -= count_between_floats(min_point, max_point) 

262 if sign_aware_lte(min_value_f, -0.0) and sign_aware_lte(-0.0, max_value_f): 

263 # account for -0.0 

264 count += 1 

265 if sign_aware_lte(min_value_f, 0.0) and sign_aware_lte(0.0, max_value_f): 

266 # account for 0.0 

267 count += 1 

268 return count 

269 

270 raise NotImplementedError(f"unhandled choice_type {choice_type}") 

271 

272 

273# In theory, this is a strict superset of the functionality of compute_max_children; 

274# 

275# assert len(all_children(choice_type, constraints)) == compute_max_children(choice_type, constraints) 

276# 

277# In practice, we maintain two distinct implementations for efficiency and space 

278# reasons. If you just need the number of children, it is cheaper to use 

279# compute_max_children than to reify the list of children (only to immediately 

280# throw it away). 

281def _floats_between(a: float, b: float) -> Generator[float, None, None]: 

282 for n in range(float_to_int(a), float_to_int(b) + 1): 

283 yield int_to_float(n) 

284 

285 

286def all_children( 

287 choice_type: ChoiceTypeT, constraints: ChoiceConstraintsT 

288) -> Generator[ChoiceT, None, None]: 

289 if choice_type != "float": 

290 for index in range(compute_max_children(choice_type, constraints)): 

291 yield choice_from_index(index, choice_type, constraints) 

292 else: 

293 constraints = cast(FloatConstraints, constraints) 

294 # the float ordering is not injective (because of resampling 

295 # out-of-bounds values), so using choice_from_index would result in 

296 # duplicates. This violates invariants in datatree about being able 

297 # to draw unique new children using all_children. 

298 # 

299 # We instead maintain a separate implementation for floats. 

300 # TODO_IR write a better (bijective) ordering for floats and remove this! 

301 min_value = constraints["min_value"] 

302 max_value = constraints["max_value"] 

303 smallest_nonzero_magnitude = constraints["smallest_nonzero_magnitude"] 

304 

305 # handle zeroes separately so smallest_nonzero_magnitude can think of 

306 # itself as a complete interval (instead of a hole at ±0). 

307 if sign_aware_lte(min_value, -0.0) and sign_aware_lte(-0.0, max_value): 

308 yield -0.0 

309 if sign_aware_lte(min_value, 0.0) and sign_aware_lte(0.0, max_value): 

310 yield 0.0 

311 

312 if flt.is_negative(min_value): 

313 if flt.is_negative(max_value): 

314 # case: both negative. 

315 max_point = min(max_value, -smallest_nonzero_magnitude) 

316 # float_to_int increases as negative magnitude increases, so 

317 # invert order. 

318 yield from _floats_between(max_point, min_value) 

319 else: 

320 # case: straddles midpoint (which is between -0.0 and 0.0). 

321 yield from _floats_between(-smallest_nonzero_magnitude, min_value) 

322 yield from _floats_between(smallest_nonzero_magnitude, max_value) 

323 else: 

324 # case: both positive. 

325 min_point = max(min_value, smallest_nonzero_magnitude) 

326 yield from _floats_between(min_point, max_value) 

327 

328 

329@dataclass(slots=True, frozen=False) 

330class TreeNode: 

331 """ 

332 A node, or collection of directly descended nodes, in a DataTree. 

333 

334 We store the DataTree as a radix tree (https://en.wikipedia.org/wiki/Radix_tree), 

335 which means that nodes that are the only child of their parent are collapsed 

336 into their parent to save space. 

337 

338 Conceptually, you can unfold a single TreeNode storing n values in its lists 

339 into a sequence of n nodes, each a child of the last. In other words, 

340 (constraints[i], values[i], choice_types[i]) corresponds to the single node at index 

341 i. 

342 

343 Note that if a TreeNode represents a choice (i.e. the nodes cannot be compacted 

344 via the radix tree definition), then its lists will be empty and it will 

345 store a `Branch` representing that choce in its `transition`. 

346 

347 Examples 

348 -------- 

349 

350 Consider sequentially drawing a boolean, then an integer. 

351 

352 data.draw_boolean() 

353 data.draw_integer(1, 3) 

354 

355 If we draw True and then 2, the tree may conceptually look like this. 

356 

357 ┌──────┐ 

358 │ root │ 

359 └──┬───┘ 

360 ┌──┴───┐ 

361 │ True │ 

362 └──┬───┘ 

363 ┌──┴───┐ 

364 │ 2 │ 

365 └──────┘ 

366 

367 But since 2 is the only child of True, we will compact these nodes and store 

368 them as a single TreeNode. 

369 

370 ┌──────┐ 

371 │ root │ 

372 └──┬───┘ 

373 ┌────┴──────┐ 

374 │ [True, 2] │ 

375 └───────────┘ 

376 

377 If we then draw True and then 3, True will have multiple children and we 

378 can no longer store this compacted representation. We would call split_at(0) 

379 on the [True, 2] node to indicate that we need to add a choice at 0-index 

380 node (True). 

381 

382 ┌──────┐ 

383 │ root │ 

384 └──┬───┘ 

385 ┌──┴───┐ 

386 ┌─┤ True ├─┐ 

387 │ └──────┘ │ 

388 ┌─┴─┐ ┌─┴─┐ 

389 │ 2 │ │ 3 │ 

390 └───┘ └───┘ 

391 """ 

392 

393 # The constraints, value, and choice_types of the nodes stored here. These always 

394 # have the same length. The values at index i belong to node i. 

395 constraints: list[ChoiceConstraintsT] = field(default_factory=list) 

396 values: list[ChoiceT] = field(default_factory=list) 

397 choice_types: list[ChoiceTypeT] = field(default_factory=list) 

398 

399 # The indices of nodes which had forced values. 

400 # 

401 # Stored as None if no indices have been forced, purely for space saving 

402 # reasons (we force quite rarely). 

403 __forced: set[int] | None = field(default=None, init=False) 

404 

405 # What happens next after drawing these nodes. (conceptually, "what is the 

406 # child/children of the last node stored here"). 

407 # 

408 # One of: 

409 # - None (we don't know yet) 

410 # - Branch (we have seen multiple possible outcomes here) 

411 # - Conclusion (ConjectureData.conclude_test was called here) 

412 # - Killed (this branch is valid and may even have children, but should not 

413 # be explored when generating novel prefixes) 

414 transition: None | Branch | Conclusion | Killed = None 

415 

416 # A tree node is exhausted if every possible sequence of draws below it has 

417 # been explored. We only update this when performing operations that could 

418 # change the answer. 

419 # 

420 # See also TreeNode.check_exhausted. 

421 is_exhausted: bool = field(default=False, init=False) 

422 

423 @property 

424 def forced(self) -> Set[int]: 

425 if not self.__forced: 

426 return EMPTY 

427 return self.__forced 

428 

429 def mark_forced(self, i: int) -> None: 

430 """ 

431 Note that the draw at node i was forced. 

432 """ 

433 assert 0 <= i < len(self.values) 

434 if self.__forced is None: 

435 self.__forced = set() 

436 self.__forced.add(i) 

437 

438 def split_at(self, i: int, *, new_value: object = None) -> None: 

439 """ 

440 Splits the tree so that it can incorporate a decision at the draw call 

441 corresponding to the node at position i. 

442 

443 Raises FlakyStrategyDefinition if node i was forced. 

444 """ 

445 

446 if i in self.forced: 

447 raise FlakyStrategyDefinition.with_detail( 

448 f"The {self.choice_types[i]} value was forced to " 

449 f"{self.values[i]!r} in the first run, but the second run " 

450 f"drew {new_value!r}.\n" 

451 ) 

452 

453 assert not self.is_exhausted 

454 

455 key = self.values[i] 

456 

457 child = TreeNode( 

458 choice_types=self.choice_types[i + 1 :], 

459 constraints=self.constraints[i + 1 :], 

460 values=self.values[i + 1 :], 

461 transition=self.transition, 

462 ) 

463 self.transition = Branch( 

464 constraints=self.constraints[i], 

465 choice_type=self.choice_types[i], 

466 children={key: child}, 

467 ) 

468 if self.__forced is not None: 

469 child.__forced = {j - i - 1 for j in self.__forced if j > i} 

470 self.__forced = {j for j in self.__forced if j < i} 

471 child.check_exhausted() 

472 del self.choice_types[i:] 

473 del self.values[i:] 

474 del self.constraints[i:] 

475 assert len(self.values) == len(self.constraints) == len(self.choice_types) == i 

476 

477 def check_exhausted(self) -> bool: 

478 """ 

479 Recalculates is_exhausted if necessary, and then returns it. 

480 

481 A node is exhausted if: 

482 - Its transition is Conclusion or Killed 

483 - It has the maximum number of children (i.e. we have found all of its 

484 possible children), and all its children are exhausted 

485 

486 Therefore, we only need to compute this for a node when: 

487 - We first create it in split_at 

488 - We set its transition to either Conclusion or Killed 

489 (TreeRecordingObserver.conclude_test or TreeRecordingObserver.kill_branch) 

490 - We exhaust any of its children 

491 """ 

492 

493 if ( 

494 # a node cannot go from is_exhausted -> not is_exhausted. 

495 not self.is_exhausted 

496 # if we don't know what happens after this node, we don't have 

497 # enough information to tell if it's exhausted. 

498 and self.transition is not None 

499 # if there are still any nodes left which are the only child of their 

500 # parent (len(self.values) > 0), then this TreeNode must be not 

501 # exhausted, unless all of those nodes were forced. 

502 # 

503 # This is because we maintain an invariant of only adding nodes to 

504 # DataTree which have at least 2 possible values, so we know that if 

505 # they do not have any siblings that we still have more choices to 

506 # discover. 

507 # 

508 # (We actually *do* currently add single-valued nodes to the tree, 

509 # but immediately split them into a transition to avoid falsifying 

510 # this check. this is a bit of a hack.) 

511 and len(self.forced) == len(self.values) 

512 ): 

513 if isinstance(self.transition, (Conclusion, Killed)): 

514 self.is_exhausted = True 

515 elif len(self.transition.children) == self.transition.max_children: 

516 self.is_exhausted = all( 

517 v.is_exhausted for v in self.transition.children.values() 

518 ) 

519 return self.is_exhausted 

520 

521 def _repr_pretty_(self, p: "RepresentationPrinter", cycle: bool) -> None: 

522 assert cycle is False 

523 indent = 0 

524 for i, (choice_type, constraints, value) in enumerate( 

525 zip(self.choice_types, self.constraints, self.values, strict=True) 

526 ): 

527 with p.indent(indent): 

528 if i > 0: 

529 p.break_() 

530 p.text( 

531 _node_pretty( 

532 choice_type, value, constraints, forced=i in self.forced 

533 ) 

534 ) 

535 indent += 2 

536 

537 with p.indent(indent): 

538 if len(self.values) > 0: 

539 p.break_() 

540 if self.transition is not None: 

541 p.pretty(self.transition) 

542 else: 

543 p.text("unknown") 

544 

545 

546class DataTree: 

547 """ 

548 A DataTree tracks the structured history of draws in some test function, 

549 across multiple ConjectureData objects. 

550 

551 This information is used by ConjectureRunner to generate novel prefixes of 

552 this tree (see generate_novel_prefix). A novel prefix is a sequence of draws 

553 which the tree has not seen before, and therefore the ConjectureRunner has 

554 not generated as an input to the test function before. 

555 

556 DataTree tracks the following: 

557 

558 - Drawn choices in the choice sequence 

559 - ConjectureData.draw_integer() 

560 - ConjectureData.draw_float() 

561 - ConjectureData.draw_string() 

562 - ConjectureData.draw_boolean() 

563 - ConjectureData.draw_bytes() 

564 - Test conclusions (with some Status, e.g. Status.VALID) 

565 - ConjectureData.conclude_test() 

566 

567 A DataTree is — surprise — a *tree*. A node in this tree is either a choice draw 

568 with some value, a test conclusion with some Status, or a special `Killed` value, 

569 which denotes that further draws may exist beyond this node but should not be 

570 considered worth exploring when generating novel prefixes. A node is a leaf 

571 iff it is a conclusion or Killed. 

572 

573 A branch from node A to node B indicates that we have previously seen some 

574 sequence (a, b) of draws, where a and b are the values in nodes A and B. 

575 Similar intuition holds for conclusion and Killed nodes. 

576 

577 Examples 

578 -------- 

579 

580 To see how a DataTree gets built through successive sets of draws, consider 

581 the following code that calls through to some ConjecutreData object `data`. 

582 The first call can be either True or False, and the second call can be any 

583 integer in the range [1, 3]. 

584 

585 data.draw_boolean() 

586 data.draw_integer(1, 3) 

587 

588 To start, the corresponding DataTree object is completely empty. 

589 

590 ┌──────┐ 

591 │ root │ 

592 └──────┘ 

593 

594 We happen to draw True and then 2 in the above code. The tree tracks this. 

595 (2 also connects to a child Conclusion node with Status.VALID since it's the 

596 final draw in the code. I'll omit Conclusion nodes in diagrams for brevity.) 

597 

598 ┌──────┐ 

599 │ root │ 

600 └──┬───┘ 

601 ┌──┴───┐ 

602 │ True │ 

603 └──┬───┘ 

604 ┌──┴───┐ 

605 │ 2 │ 

606 └──────┘ 

607 

608 This is a very boring tree so far! But now we happen to draw False and 

609 then 1. This causes a split in the tree. Remember, DataTree tracks history 

610 over all invocations of a function, not just one. The end goal is to know 

611 what invocations haven't been tried yet, after all. 

612 

613 ┌──────┐ 

614 ┌───┤ root ├───┐ 

615 │ └──────┘ │ 

616 ┌──┴───┐ ┌─┴─────┐ 

617 │ True │ │ False │ 

618 └──┬───┘ └──┬────┘ 

619 ┌─┴─┐ ┌─┴─┐ 

620 │ 2 │ │ 1 │ 

621 └───┘ └───┘ 

622 

623 If we were to ask DataTree for a novel prefix at this point, it might 

624 generate any of (True, 1), (True, 3), (False, 2), or (False, 3). 

625 

626 Note that the novel prefix stops as soon as it generates a novel node. For 

627 instance, if we had generated a novel prefix back when the tree was only 

628 root -> True -> 2, we could have gotten any of (True, 1), (True, 3), or 

629 (False). But we could *not* have gotten (False, n), because both False and 

630 n were novel at that point, and we stop at the first novel node — False. 

631 

632 I won't belabor this example. Here's what the tree looks like when fully 

633 explored: 

634 

635 ┌──────┐ 

636 ┌──────┤ root ├──────┐ 

637 │ └──────┘ │ 

638 ┌──┴───┐ ┌─┴─────┐ 

639 ┌──┤ True ├──┐ ┌───┤ False ├──┐ 

640 │ └──┬───┘ │ │ └──┬────┘ │ 

641 ┌─┴─┐ ┌─┴─┐ ┌─┴─┐ ┌─┴─┐ ┌─┴─┐ ┌─┴─┐ 

642 │ 1 │ │ 2 │ │ 3 │ │ 1 │ │ 2 │ │ 3 │ 

643 └───┘ └───┘ └───┘ └───┘ └───┘ └───┘ 

644 

645 You could imagine much more complicated trees than this arising in practice, 

646 and indeed they do. In particular, the tree need not be balanced or 'nice' 

647 like the tree above. For instance, 

648 

649 b = data.draw_boolean() 

650 if b: 

651 data.draw_integer(1, 3) 

652 

653 results in a tree with the entire right part lopped off, and False leading 

654 straight to a conclusion node with Status.VALID. As another example, 

655 

656 n = data.draw_integers() 

657 assume(n >= 3) 

658 data.draw_string() 

659 

660 results in a tree with the 0, 1, and 2 nodes leading straight to a 

661 conclusion node with Status.INVALID, and the rest branching off into all 

662 the possibilities of draw_string. 

663 

664 Notes 

665 ----- 

666 

667 The above examples are slightly simplified and are intended to convey 

668 intuition. In practice, there are some implementation details to be aware 

669 of. 

670 

671 - In draw nodes, we store the constraints used in addition to the value drawn. 

672 E.g. the node corresponding to data.draw_float(min_value=1.0, max_value=1.5) 

673 would store {"min_value": 1.0, "max_value": 1.5, ...} (default values for 

674 other constraints omitted). 

675 

676 The constraints parameters have the potential to change both the range of 

677 possible outputs of a node, and the probability distribution within that 

678 range, so we need to use these when drawing in DataTree as well. We draw 

679 values using these constraints when (1) generating a novel value for a node 

680 and (2) choosing a random child when traversing the tree. 

681 

682 - For space efficiency, rather than tracking the full tree structure, we 

683 store DataTree as a radix tree. This is conceptually equivalent (radix 

684 trees can always be "unfolded" to the full tree) but it means the internal 

685 representation may differ in practice. 

686 

687 See TreeNode for more information. 

688 """ 

689 

690 def __init__(self) -> None: 

691 self.root: TreeNode = TreeNode() 

692 self._children_cache: dict[ChoiceT, ChildrenCacheValueT] = {} 

693 

694 @property 

695 def is_exhausted(self) -> bool: 

696 """ 

697 Returns True if every node is exhausted, and therefore the tree has 

698 been fully explored. 

699 """ 

700 return self.root.is_exhausted 

701 

702 def generate_novel_prefix(self, random: Random) -> tuple[ChoiceT, ...]: 

703 """Generate a short random string that (after rewriting) is not 

704 a prefix of any choice sequence previously added to the tree. 

705 

706 The resulting prefix is essentially arbitrary - it would be nice 

707 for it to be uniform at random, but previous attempts to do that 

708 have proven too expensive. 

709 """ 

710 assert not self.is_exhausted 

711 prefix = [] 

712 

713 def append_choice(choice_type: ChoiceTypeT, choice: ChoiceT) -> None: 

714 if choice_type == "float": 

715 assert isinstance(choice, int) 

716 choice = int_to_float(choice) 

717 prefix.append(choice) 

718 

719 current_node = self.root 

720 while True: 

721 assert not current_node.is_exhausted 

722 for i, (choice_type, constraints, value) in enumerate( 

723 zip( 

724 current_node.choice_types, 

725 current_node.constraints, 

726 current_node.values, 

727 strict=True, 

728 ) 

729 ): 

730 if i in current_node.forced: 

731 append_choice(choice_type, value) 

732 else: 

733 attempts = 0 

734 while True: 

735 if attempts <= 10: 

736 try: 

737 node_value = self._draw( 

738 choice_type, constraints, random=random 

739 ) 

740 except StopTest: # pragma: no cover 

741 # it is possible that drawing from a fresh data can 

742 # overrun BUFFER_SIZE, due to eg unlucky rejection sampling 

743 # of integer probes. Retry these cases. 

744 attempts += 1 

745 continue 

746 else: 

747 node_value = self._draw_from_cache( 

748 choice_type, 

749 constraints, 

750 key=id(current_node), 

751 random=random, 

752 ) 

753 

754 if node_value != value: 

755 append_choice(choice_type, node_value) 

756 break 

757 attempts += 1 

758 self._reject_child( 

759 choice_type, 

760 constraints, 

761 child=node_value, 

762 key=id(current_node), 

763 ) 

764 # We've now found a value that is allowed to 

765 # vary, so what follows is not fixed. 

766 return tuple(prefix) 

767 

768 assert not isinstance(current_node.transition, (Conclusion, Killed)) 

769 if current_node.transition is None: 

770 return tuple(prefix) 

771 branch = current_node.transition 

772 assert isinstance(branch, Branch) 

773 

774 attempts = 0 

775 while True: 

776 if attempts <= 10: 

777 try: 

778 node_value = self._draw( 

779 branch.choice_type, branch.constraints, random=random 

780 ) 

781 except StopTest: # pragma: no cover 

782 attempts += 1 

783 continue 

784 else: 

785 node_value = self._draw_from_cache( 

786 branch.choice_type, 

787 branch.constraints, 

788 key=id(branch), 

789 random=random, 

790 ) 

791 try: 

792 child = branch.children[node_value] 

793 except KeyError: 

794 append_choice(branch.choice_type, node_value) 

795 return tuple(prefix) 

796 if not child.is_exhausted: 

797 append_choice(branch.choice_type, node_value) 

798 current_node = child 

799 break 

800 attempts += 1 

801 self._reject_child( 

802 branch.choice_type, 

803 branch.constraints, 

804 child=node_value, 

805 key=id(branch), 

806 ) 

807 

808 # We don't expect this assertion to ever fire, but coverage 

809 # wants the loop inside to run if you have branch checking 

810 # on, hence the pragma. 

811 assert ( # pragma: no cover 

812 attempts != 1000 

813 or len(branch.children) < branch.max_children 

814 or any(not v.is_exhausted for v in branch.children.values()) 

815 ) 

816 

817 def rewrite(self, choices): 

818 """Use previously seen ConjectureData objects to return a tuple of 

819 the rewritten choice sequence and the status we would get from running 

820 that with the test function. If the status cannot be predicted 

821 from the existing values it will be None.""" 

822 data = ConjectureData.for_choices(choices) 

823 try: 

824 self.simulate_test_function(data) 

825 return (data.choices, data.status) 

826 except PreviouslyUnseenBehaviour: 

827 return (choices, None) 

828 

829 def simulate_test_function(self, data: ConjectureData) -> None: 

830 """Run a simulated version of the test function recorded by 

831 this tree. Note that this does not currently call ``stop_span`` 

832 or ``start_span`` as these are not currently recorded in the 

833 tree. This will likely change in future.""" 

834 node = self.root 

835 

836 def draw(choice_type, constraints, *, forced=None, convert_forced=True): 

837 if choice_type == "float" and forced is not None and convert_forced: 

838 forced = int_to_float(forced) 

839 

840 draw_func = getattr(data, f"draw_{choice_type}") 

841 value = draw_func(**constraints, forced=forced) 

842 

843 if choice_type == "float": 

844 value = float_to_int(value) 

845 return value 

846 

847 try: 

848 while True: 

849 for i, (choice_type, constraints, previous) in enumerate( 

850 zip(node.choice_types, node.constraints, node.values, strict=True) 

851 ): 

852 v = draw( 

853 choice_type, 

854 constraints, 

855 forced=previous if i in node.forced else None, 

856 ) 

857 if v != previous: 

858 raise PreviouslyUnseenBehaviour 

859 if isinstance(node.transition, Conclusion): 

860 t = node.transition 

861 data.conclude_test(t.status, t.interesting_origin) 

862 elif node.transition is None: 

863 raise PreviouslyUnseenBehaviour 

864 elif isinstance(node.transition, Branch): 

865 v = draw(node.transition.choice_type, node.transition.constraints) 

866 try: 

867 node = node.transition.children[v] 

868 except KeyError as err: 

869 raise PreviouslyUnseenBehaviour from err 

870 else: 

871 assert isinstance(node.transition, Killed) 

872 data.observer.kill_branch() 

873 node = node.transition.next_node 

874 except StopTest: 

875 pass 

876 

877 def new_observer(self): 

878 return TreeRecordingObserver(self) 

879 

880 def _draw( 

881 self, 

882 choice_type: ChoiceTypeT, 

883 constraints: ChoiceConstraintsT, 

884 *, 

885 random: Random, 

886 ) -> ChoiceT: 

887 from hypothesis.internal.conjecture.data import draw_choice 

888 

889 value = draw_choice(choice_type, constraints, random=random) 

890 # using floats as keys into branch.children breaks things, because 

891 # e.g. hash(0.0) == hash(-0.0) would collide as keys when they are 

892 # in fact distinct child branches. 

893 # To distinguish floats here we'll use their bits representation. This 

894 # entails some bookkeeping such that we're careful about when the 

895 # float key is in its bits form (as a key into branch.children) and 

896 # when it is in its float form (as a value we want to write to the 

897 # choice sequence), and converting between the two forms as appropriate. 

898 if choice_type == "float": 

899 assert isinstance(value, float) 

900 value = float_to_int(value) 

901 return value 

902 

903 def _get_children_cache( 

904 self, choice_type: ChoiceTypeT, constraints: ChoiceConstraintsT, *, key: ChoiceT 

905 ) -> ChildrenCacheValueT: 

906 # cache the state of the children generator per node/branch (passed as 

907 # `key` here), such that we track which children we've already tried 

908 # for this branch across draws. 

909 # We take advantage of python generators here as one-way iterables, 

910 # so each time we iterate we implicitly store our position in the 

911 # children generator and don't re-draw children. `children` is the 

912 # concrete list of children draw from the generator that we will work 

913 # with. Whenever we need to top up this list, we will draw a new value 

914 # from the generator. 

915 if key not in self._children_cache: 

916 generator = all_children(choice_type, constraints) 

917 children: list[ChoiceT] = [] 

918 rejected: set[ChoiceT] = set() 

919 self._children_cache[key] = (generator, children, rejected) 

920 

921 return self._children_cache[key] 

922 

923 def _draw_from_cache( 

924 self, 

925 choice_type: ChoiceTypeT, 

926 constraints: ChoiceConstraintsT, 

927 *, 

928 key: ChoiceT, 

929 random: Random, 

930 ) -> ChoiceT: 

931 generator, children, rejected = self._get_children_cache( 

932 choice_type, constraints, key=key 

933 ) 

934 # Keep a stock of 100 potentially-valid children at all times. 

935 # This number is chosen to balance memory/speed vs randomness. Ideally 

936 # we would sample uniformly from all not-yet-rejected children, but 

937 # computing and storing said children is not free. 

938 # no-branch because coverage of the fall-through case here is a bit 

939 # annoying. 

940 if len(children) < 100: # pragma: no branch 

941 for v in generator: 

942 if choice_type == "float": 

943 assert isinstance(v, float) 

944 v = float_to_int(v) 

945 if v in rejected: 

946 continue 

947 children.append(v) 

948 if len(children) >= 100: 

949 break 

950 

951 return random.choice(children) 

952 

953 def _reject_child( 

954 self, 

955 choice_type: ChoiceTypeT, 

956 constraints: ChoiceConstraintsT, 

957 *, 

958 child: ChoiceT, 

959 key: ChoiceT, 

960 ) -> None: 

961 _generator, children, rejected = self._get_children_cache( 

962 choice_type, constraints, key=key 

963 ) 

964 rejected.add(child) 

965 # we remove a child from the list of possible children *only* when it is 

966 # rejected, and not when it is initially drawn in _draw_from_cache. The 

967 # reason is that a child being drawn does not guarantee that child will 

968 # be used in a way such that it is written back to the tree, so it needs 

969 # to be available for future draws until we are certain it has been 

970 # used. 

971 # 

972 # For instance, if we generated novel prefixes in a loop (but never used 

973 # those prefixes to generate new values!) then we don't want to remove 

974 # the drawn children from the available pool until they are actually 

975 # used. 

976 # 

977 # This does result in a small inefficiency: we may draw a child, 

978 # immediately use it (so we know it cannot be drawn again), but still 

979 # wait to draw and reject it here, because DataTree cannot guarantee 

980 # the drawn child has been used. 

981 if child in children: 

982 children.remove(child) 

983 

984 def _repr_pretty_(self, p: "RepresentationPrinter", cycle: bool) -> None: 

985 assert cycle is False 

986 p.pretty(self.root) 

987 

988 

989class TreeRecordingObserver(DataObserver): 

990 def __init__(self, tree: DataTree): 

991 # this attr isn't read, but is very useful for local debugging flaky 

992 # errors, with 

993 # `from hypothesis.vendor import pretty; print(pretty.pretty(self._root))` 

994 self._root = tree.root 

995 self._current_node: TreeNode = tree.root 

996 self._index_in_current_node: int = 0 

997 self._trail: list[TreeNode] = [self._current_node] 

998 self.killed: bool = False 

999 

1000 def draw_integer( 

1001 self, value: int, *, was_forced: bool, constraints: IntegerConstraints 

1002 ) -> None: 

1003 self.draw_value( 

1004 "integer", value, was_forced=was_forced, constraints=constraints 

1005 ) 

1006 

1007 def draw_float( 

1008 self, value: float, *, was_forced: bool, constraints: FloatConstraints 

1009 ) -> None: 

1010 self.draw_value("float", value, was_forced=was_forced, constraints=constraints) 

1011 

1012 def draw_string( 

1013 self, value: str, *, was_forced: bool, constraints: StringConstraints 

1014 ) -> None: 

1015 self.draw_value("string", value, was_forced=was_forced, constraints=constraints) 

1016 

1017 def draw_bytes( 

1018 self, value: bytes, *, was_forced: bool, constraints: BytesConstraints 

1019 ) -> None: 

1020 self.draw_value("bytes", value, was_forced=was_forced, constraints=constraints) 

1021 

1022 def draw_boolean( 

1023 self, value: bool, *, was_forced: bool, constraints: BooleanConstraints 

1024 ) -> None: 

1025 self.draw_value( 

1026 "boolean", value, was_forced=was_forced, constraints=constraints 

1027 ) 

1028 

1029 def draw_value( 

1030 self, 

1031 choice_type: ChoiceTypeT, 

1032 value: ChoiceT, 

1033 *, 

1034 was_forced: bool, 

1035 constraints: ChoiceConstraintsT, 

1036 ) -> None: 

1037 i = self._index_in_current_node 

1038 self._index_in_current_node += 1 

1039 node = self._current_node 

1040 

1041 if isinstance(value, float): 

1042 value = float_to_int(value) 

1043 

1044 assert len(node.constraints) == len(node.values) == len(node.choice_types) 

1045 if i < len(node.values): 

1046 if ( 

1047 choice_type != node.choice_types[i] 

1048 or constraints != node.constraints[i] 

1049 ): 

1050 raise FlakyStrategyDefinition.from_mismatch( 

1051 node.choice_types[i], 

1052 node.constraints[i], 

1053 choice_type, 

1054 constraints, 

1055 ) 

1056 # Note that we don't check whether a previously 

1057 # forced value is now free. That will be caught 

1058 # if we ever split the node there, but otherwise 

1059 # may pass silently. This is acceptable because it 

1060 # means we skip a hash set lookup on every 

1061 # draw and that's a pretty niche failure mode. 

1062 if was_forced and i not in node.forced: 

1063 raise FlakyStrategyDefinition.with_detail( 

1064 f"The {choice_type} value was forced to a specific value " 

1065 f"but was not forced on the first run.\n" 

1066 ) 

1067 if value != node.values[i]: 

1068 node.split_at(i, new_value=value) 

1069 assert i == len(node.values) 

1070 new_node = TreeNode() 

1071 assert isinstance(node.transition, Branch) 

1072 node.transition.children[value] = new_node 

1073 self._current_node = new_node 

1074 self._index_in_current_node = 0 

1075 else: 

1076 trans = node.transition 

1077 if trans is None: 

1078 node.choice_types.append(choice_type) 

1079 node.constraints.append(constraints) 

1080 node.values.append(value) 

1081 if was_forced: 

1082 node.mark_forced(i) 

1083 # generate_novel_prefix assumes the following invariant: any one 

1084 # of the series of draws in a particular node can vary, i.e. the 

1085 # max number of children is at least 2. However, some draws are 

1086 # pseudo-choices and only have a single value, such as 

1087 # integers(0, 0). 

1088 # 

1089 # Currently, we address this by forcefully splitting such 

1090 # single-valued nodes into a transition when we see them. An 

1091 # exception to this is if it was forced: forced pseudo-choices 

1092 # do not cause the above issue because they inherently cannot 

1093 # vary, and moreover they trip other invariants about never 

1094 # splitting forced nodes. 

1095 # 

1096 # An alternative is not writing such choices to the tree at 

1097 # all, and thus guaranteeing that each node has at least 2 max 

1098 # children. 

1099 if ( 

1100 compute_max_children(choice_type, constraints) == 1 

1101 and not was_forced 

1102 ): 

1103 node.split_at(i, new_value=value) 

1104 assert isinstance(node.transition, Branch) 

1105 self._current_node = node.transition.children[value] 

1106 self._index_in_current_node = 0 

1107 elif isinstance(trans, Conclusion): 

1108 assert trans.status != Status.OVERRUN 

1109 # We tried to draw where history says we should have 

1110 # stopped 

1111 raise FlakyStrategyDefinition.with_detail( 

1112 "The second run drew more data than the first run.\n" 

1113 ) 

1114 else: 

1115 assert isinstance(trans, Branch), trans 

1116 if choice_type != trans.choice_type or constraints != trans.constraints: 

1117 raise FlakyStrategyDefinition.from_mismatch( 

1118 trans.choice_type, 

1119 trans.constraints, 

1120 choice_type, 

1121 constraints, 

1122 ) 

1123 try: 

1124 self._current_node = trans.children[value] 

1125 except KeyError: 

1126 self._current_node = trans.children.setdefault(value, TreeNode()) 

1127 self._index_in_current_node = 0 

1128 if self._trail[-1] is not self._current_node: 

1129 self._trail.append(self._current_node) 

1130 

1131 def kill_branch(self) -> None: 

1132 """Mark this part of the tree as not worth re-exploring.""" 

1133 if self.killed: 

1134 return 

1135 

1136 self.killed = True 

1137 

1138 if self._index_in_current_node < len(self._current_node.values) or ( 

1139 self._current_node.transition is not None 

1140 and not isinstance(self._current_node.transition, Killed) 

1141 ): 

1142 raise FlakyStrategyDefinition.with_detail( 

1143 "The second run stopped drawing earlier than the first run, " 

1144 "which continued to draw more data.\n" 

1145 ) 

1146 

1147 if self._current_node.transition is None: 

1148 self._current_node.transition = Killed(TreeNode()) 

1149 self.__update_exhausted() 

1150 

1151 self._current_node = self._current_node.transition.next_node 

1152 self._index_in_current_node = 0 

1153 self._trail.append(self._current_node) 

1154 

1155 def conclude_test( 

1156 self, status: Status, interesting_origin: InterestingOrigin | None 

1157 ) -> None: 

1158 """Says that ``status`` occurred at node ``node``. This updates the 

1159 node if necessary and checks for consistency.""" 

1160 if status == Status.OVERRUN: 

1161 return 

1162 i = self._index_in_current_node 

1163 node = self._current_node 

1164 

1165 if i < len(node.values) or isinstance(node.transition, Branch): 

1166 raise FlakyStrategyDefinition.with_detail( 

1167 "The second run stopped drawing earlier than the first run, " 

1168 "which continued to draw more data.\n" 

1169 ) 

1170 

1171 new_transition = Conclusion(status, interesting_origin) 

1172 

1173 if node.transition is not None and node.transition != new_transition: 

1174 # As an, I'm afraid, horrible bodge, we deliberately ignore flakiness 

1175 # where tests go from interesting to valid, because it's much easier 

1176 # to produce good error messages for these further up the stack. 

1177 if isinstance(node.transition, Conclusion) and ( 

1178 node.transition.status != Status.INTERESTING 

1179 or new_transition.status != Status.VALID 

1180 ): 

1181 old_origin = node.transition.interesting_origin 

1182 new_origin = new_transition.interesting_origin 

1183 raise FlakyReplay( 

1184 f"Inconsistent results from replaying a test case!\n" 

1185 f" last: {node.transition.status.name} from {old_origin}\n" 

1186 f" this: {new_transition.status.name} from {new_origin}", 

1187 (old_origin, new_origin), 

1188 ) 

1189 else: 

1190 node.transition = new_transition 

1191 

1192 assert node is self._trail[-1] 

1193 node.check_exhausted() 

1194 assert len(node.values) > 0 or node.check_exhausted() 

1195 

1196 if not self.killed: 

1197 self.__update_exhausted() 

1198 

1199 def __update_exhausted(self) -> None: 

1200 for t in reversed(self._trail): 

1201 # Any node we've traversed might have now become exhausted. 

1202 # We check from the right. As soon as we hit a node that 

1203 # isn't exhausted, this automatically implies that all of 

1204 # its parents are not exhausted, so we stop. 

1205 if not t.check_exhausted(): 

1206 break