Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/hypothesis/internal/conjecture/datatree.py: 19%

1# This file is part of Hypothesis, which may be found at

2# https://github.com/HypothesisWorks/hypothesis/

4# Copyright the Hypothesis Authors.

5# Individual contributors are listed in AUTHORS.rst and the git log.

7# This Source Code Form is subject to the terms of the Mozilla Public License,

8# v. 2.0. If a copy of the MPL was not distributed with this file, You can

9# obtain one at https://mozilla.org/MPL/2.0/.

11import math

12from collections.abc import Generator

13from random import Random

14from typing import TYPE_CHECKING, AbstractSet, Final, Optional, Union, cast

16import attr

18from hypothesis.errors import (

19 FlakyReplay,

20 FlakyStrategyDefinition,

21 HypothesisException,

22 StopTest,

23)

24from hypothesis.internal import floats as flt

25from hypothesis.internal.conjecture.choice import (

26 BooleanConstraints,

27 BytesConstraints,

28 ChoiceConstraintsT,

29 ChoiceT,

30 ChoiceTypeT,

31 FloatConstraints,

32 IntegerConstraints,

33 StringConstraints,

34 choice_from_index,

35)

36from hypothesis.internal.conjecture.data import ConjectureData, DataObserver, Status

37from hypothesis.internal.escalation import InterestingOrigin

38from hypothesis.internal.floats import (

39 count_between_floats,

40 float_to_int,

41 int_to_float,

42 sign_aware_lte,

43)

45if TYPE_CHECKING:

46 from typing import TypeAlias

48 from hypothesis.vendor.pretty import RepresentationPrinter

50ChildrenCacheValueT: "TypeAlias" = tuple[

51 Generator[ChoiceT, None, None], list[ChoiceT], set[ChoiceT]

52]

55class PreviouslyUnseenBehaviour(HypothesisException):

56 pass

59_FLAKY_STRAT_MSG = (

60 "Inconsistent data generation! Data generation behaved differently "

61 "between different runs. Is your data generation depending on external "

62 "state?"

63)

66EMPTY: frozenset[int] = frozenset()

69@attr.s(slots=True)

70class Killed:

71 """Represents a transition to part of the tree which has been marked as

72 "killed", meaning we want to treat it as not worth exploring, so it will

73 be treated as if it were completely explored for the purposes of

74 exhaustion."""

76 next_node: "TreeNode" = attr.ib()

78 def _repr_pretty_(self, p: "RepresentationPrinter", cycle: bool) -> None:

79 assert cycle is False

80 p.text("Killed")

83def _node_pretty(

84 choice_type: ChoiceTypeT,

85 value: ChoiceT,

86 constraints: ChoiceConstraintsT,

87 *,

88 forced: bool,

89) -> str:

90 forced_marker = " [forced]" if forced else ""

91 return f"{choice_type} {value!r}{forced_marker} {constraints}"

94@attr.s(slots=True)

95class Branch:

96 """Represents a transition where multiple choices can be made as to what

97 to drawn."""

99 constraints: ChoiceConstraintsT = attr.ib()

100 choice_type: ChoiceTypeT = attr.ib()

101 children: dict[ChoiceT, "TreeNode"] = attr.ib(repr=False)

102

103 @property

104 def max_children(self) -> int:

105 max_children = compute_max_children(self.choice_type, self.constraints)

106 assert max_children > 0

107 return max_children

108

109 def _repr_pretty_(self, p: "RepresentationPrinter", cycle: bool) -> None:

110 assert cycle is False

111 for i, (value, child) in enumerate(self.children.items()):

112 if i > 0:

113 p.break_()

114 p.text(

115 _node_pretty(self.choice_type, value, self.constraints, forced=False)

116 )

117 with p.indent(2):

118 p.break_()

119 p.pretty(child)

120

121

122@attr.s(slots=True, frozen=True)

123class Conclusion:

124 """Represents a transition to a finished state."""

125

126 status: Status = attr.ib()

127 interesting_origin: Optional[InterestingOrigin] = attr.ib()

128

129 def _repr_pretty_(self, p: "RepresentationPrinter", cycle: bool) -> None:

130 assert cycle is False

131 o = self.interesting_origin

132 # avoid str(o), which can include multiple lines of context

133 origin = (

134 "" if o is None else f", {o.exc_type.__name__} at {o.filename}:{o.lineno}"

135 )

136 p.text(f"Conclusion ({self.status!r}{origin})")

137

138

139# The number of max children where, beyond this, it is practically impossible

140# for hypothesis to saturate / explore all children nodes in a reasonable time

141# frame. We use this to bail out of expensive max children computations early,

142# where the numbers involved are so large that we know they will be larger than

143# this number.

144#

145# Note that it's ok for us to underestimate the number of max children of a node

146# by using this. We just may think the node is exhausted when in fact it has more

147# possible children to be explored. This has the potential to finish generation

148# early due to exhausting the entire tree, but that is quite unlikely: (1) the

149# number of examples would have to be quite high, and (2) the tree would have to

150# contain only one or two nodes, or generate_novel_prefix would simply switch to

151# exploring another non-exhausted node.

152#

153# Also note that we may sometimes compute max children above this value. In other

154# words, this is *not* a hard maximum on the computed max children. It's the point

155# where further computation is not beneficial - but sometimes doing that computation

156# unconditionally is cheaper than estimating against this value.

157#

158# The one case where this may be detrimental is fuzzing, where the throughput of

159# examples is so high that it really may saturate important nodes. We'll cross

160# that bridge when we come to it.

161MAX_CHILDREN_EFFECTIVELY_INFINITE: Final[int] = 10_000_000

162

163

164def _count_distinct_strings(*, alphabet_size: int, min_size: int, max_size: int) -> int:

165 # We want to estimate if we're going to have more children than

166 # MAX_CHILDREN_EFFECTIVELY_INFINITE, without computing a potentially

167 # extremely expensive pow. We'll check the two extreme cases - if the

168 # number of strings in the largest string size alone is enough to put us

169 # over this limit (at alphabet_size >= 2), and if the variation in sizes

170 # (at alphabet_size == 1) is enough. If neither result in an early return,

171 # the exact result should be reasonably cheap to compute.

172 if alphabet_size == 0:

173 # Special-case the empty string, avoid error in math.log(0).

174 return 1

175 elif alphabet_size == 1:

176 # Special-case the constant alphabet, invalid in the geom-series sum.

177 return max_size - min_size + 1

178 else:

179 # Estimate against log, which is cheaper than computing a pow.

180 #

181 # m = max_size

182 # a = alphabet_size

183 # N = MAX_CHILDREN_EFFECTIVELY_INFINITE

184 #

185 # a**m > N

186 # <=> m * log(a) > log(N)

187 log_max_sized_children = max_size * math.log(alphabet_size)

188 if log_max_sized_children > math.log(MAX_CHILDREN_EFFECTIVELY_INFINITE):

189 return MAX_CHILDREN_EFFECTIVELY_INFINITE

190

191 # The sum of a geometric series is given by (ref: wikipedia):

192 # ᵐ∑ₖ₌₀ aᵏ = (aᵐ⁺¹ - 1) / (a - 1)

193 # = S(m) / S(0)

194 # assuming a != 1 and using the definition

195 # S(m) := aᵐ⁺¹ - 1.

196 # The sum we want, starting from a number n [0 <= n <= m] rather than zero, is

197 # ᵐ∑ₖ₌ₙ aᵏ = ᵐ∑ₖ₌₀ aᵏ - ⁿ⁻¹∑ₖ₌₀ aᵏ = S(m) / S(0) - S(n - 1) / S(0)

198 def S(n):

199 return alphabet_size ** (n + 1) - 1

200

201 return (S(max_size) - S(min_size - 1)) // S(0)

202

203

204def compute_max_children(

205 choice_type: ChoiceTypeT, constraints: ChoiceConstraintsT

206) -> int:

207 if choice_type == "integer":

208 constraints = cast(IntegerConstraints, constraints)

209 min_value = constraints["min_value"]

210 max_value = constraints["max_value"]

211

212 if min_value is None and max_value is None:

213 # full 128 bit range.

214 return 2**128 - 1

215 if min_value is not None and max_value is not None:

216 # count between min/max value.

217 return max_value - min_value + 1

218

219 # hard case: only one bound was specified. Here we probe either upwards

220 # or downwards with our full 128 bit generation, but only half of these

221 # (plus one for the case of generating zero) result in a probe in the

222 # direction we want. ((2**128 - 1) // 2) + 1 == 2 ** 127

223 assert (min_value is None) ^ (max_value is None)

224 return 2**127

225 elif choice_type == "boolean":

226 constraints = cast(BooleanConstraints, constraints)

227 p = constraints["p"]

228 # probabilities of 0 or 1 (or effectively 0 or 1) only have one choice.

229 if p <= 2 ** (-64) or p >= (1 - 2 ** (-64)):

230 return 1

231 return 2

232 elif choice_type == "bytes":

233 constraints = cast(BytesConstraints, constraints)

234 return _count_distinct_strings(

235 alphabet_size=2**8,

236 min_size=constraints["min_size"],

237 max_size=constraints["max_size"],

238 )

239 elif choice_type == "string":

240 constraints = cast(StringConstraints, constraints)

241 min_size = constraints["min_size"]

242 max_size = constraints["max_size"]

243 intervals = constraints["intervals"]

244

245 return _count_distinct_strings(

246 alphabet_size=len(intervals), min_size=min_size, max_size=max_size

247 )

248 elif choice_type == "float":

249 constraints = cast(FloatConstraints, constraints)

250 min_value_f = constraints["min_value"]

251 max_value_f = constraints["max_value"]

252 smallest_nonzero_magnitude = constraints["smallest_nonzero_magnitude"]

253

254 count = count_between_floats(min_value_f, max_value_f)

255

256 # we have two intervals:

257 # a. [min_value, max_value]

258 # b. [-smallest_nonzero_magnitude, smallest_nonzero_magnitude]

259 #

260 # which could be subsets (in either order), overlapping, or disjoint. We

261 # want the interval difference a - b.

262

263 # next_down because endpoints are ok with smallest_nonzero_magnitude

264 min_point = max(min_value_f, -flt.next_down(smallest_nonzero_magnitude))

265 max_point = min(max_value_f, flt.next_down(smallest_nonzero_magnitude))

266

267 if min_point > max_point:

268 # case: disjoint intervals.

269 return count

270

271 count -= count_between_floats(min_point, max_point)

272 if sign_aware_lte(min_value_f, -0.0) and sign_aware_lte(-0.0, max_value_f):

273 # account for -0.0

274 count += 1

275 if sign_aware_lte(min_value_f, 0.0) and sign_aware_lte(0.0, max_value_f):

276 # account for 0.0

277 count += 1

278 return count

279

280 raise NotImplementedError(f"unhandled choice_type {choice_type}")

281

282

283# In theory, this is a strict superset of the functionality of compute_max_children;

284#

285# assert len(all_children(choice_type, constraints)) == compute_max_children(choice_type, constraints)

286#

287# In practice, we maintain two distinct implementations for efficiency and space

288# reasons. If you just need the number of children, it is cheaper to use

289# compute_max_children than to reify the list of children (only to immediately

290# throw it away).

291def _floats_between(a: float, b: float) -> Generator[float, None, None]:

292 for n in range(float_to_int(a), float_to_int(b) + 1):

293 yield int_to_float(n)

294

295

296def all_children(

297 choice_type: ChoiceTypeT, constraints: ChoiceConstraintsT

298) -> Generator[ChoiceT, None, None]:

299 if choice_type != "float":

300 for index in range(compute_max_children(choice_type, constraints)):

301 yield choice_from_index(index, choice_type, constraints)

302 else:

303 constraints = cast(FloatConstraints, constraints)

304 # the float ordering is not injective (because of resampling

305 # out-of-bounds values), so using choice_from_index would result in

306 # duplicates. This violates invariants in datatree about being able

307 # to draw unique new children using all_children.

308 #

309 # We instead maintain a separate implementation for floats.

310 # TODO_IR write a better (bijective) ordering for floats and remove this!

311 min_value = constraints["min_value"]

312 max_value = constraints["max_value"]

313 smallest_nonzero_magnitude = constraints["smallest_nonzero_magnitude"]

314

315 # handle zeroes separately so smallest_nonzero_magnitude can think of

316 # itself as a complete interval (instead of a hole at ±0).

317 if sign_aware_lte(min_value, -0.0) and sign_aware_lte(-0.0, max_value):

318 yield -0.0

319 if sign_aware_lte(min_value, 0.0) and sign_aware_lte(0.0, max_value):

320 yield 0.0

321

322 if flt.is_negative(min_value):

323 if flt.is_negative(max_value):

324 # case: both negative.

325 max_point = min(max_value, -smallest_nonzero_magnitude)

326 # float_to_int increases as negative magnitude increases, so

327 # invert order.

328 yield from _floats_between(max_point, min_value)

329 else:

330 # case: straddles midpoint (which is between -0.0 and 0.0).

331 yield from _floats_between(-smallest_nonzero_magnitude, min_value)

332 yield from _floats_between(smallest_nonzero_magnitude, max_value)

333 else:

334 # case: both positive.

335 min_point = max(min_value, smallest_nonzero_magnitude)

336 yield from _floats_between(min_point, max_value)

337

338

339@attr.s(slots=True)

340class TreeNode:

341 """

342 A node, or collection of directly descended nodes, in a DataTree.

343

344 We store the DataTree as a radix tree (https://en.wikipedia.org/wiki/Radix_tree),

345 which means that nodes that are the only child of their parent are collapsed

346 into their parent to save space.

347

348 Conceptually, you can unfold a single TreeNode storing n values in its lists

349 into a sequence of n nodes, each a child of the last. In other words,

350 (constraints[i], values[i], choice_types[i]) corresponds to the single node at index

351 i.

352

353 Note that if a TreeNode represents a choice (i.e. the nodes cannot be compacted

354 via the radix tree definition), then its lists will be empty and it will

355 store a `Branch` representing that choce in its `transition`.

356

357 Examples

358 --------

359

360 Consider sequentially drawing a boolean, then an integer.

361

362 data.draw_boolean()

363 data.draw_integer(1, 3)

364

365 If we draw True and then 2, the tree may conceptually look like this.

366

367 ┌──────┐

368 │ root │

369 └──┬───┘

370 ┌──┴───┐

371 │ True │

372 └──┬───┘

373 ┌──┴───┐

374 │ 2 │

375 └──────┘

376

377 But since 2 is the only child of True, we will compact these nodes and store

378 them as a single TreeNode.

379

380 ┌──────┐

381 │ root │

382 └──┬───┘

383 ┌────┴──────┐

384 │ [True, 2] │

385 └───────────┘

386

387 If we then draw True and then 3, True will have multiple children and we

388 can no longer store this compacted representation. We would call split_at(0)

389 on the [True, 2] node to indicate that we need to add a choice at 0-index

390 node (True).

391

392 ┌──────┐

393 │ root │

394 └──┬───┘

395 ┌──┴───┐

396 ┌─┤ True ├─┐

397 │ └──────┘ │

398 ┌─┴─┐ ┌─┴─┐

399 │ 2 │ │ 3 │

400 └───┘ └───┘

401 """

402

403 # The constraints, value, and choice_types of the nodes stored here. These always

404 # have the same length. The values at index i belong to node i.

405 constraints: list[ChoiceConstraintsT] = attr.ib(factory=list)

406 values: list[ChoiceT] = attr.ib(factory=list)

407 choice_types: list[ChoiceTypeT] = attr.ib(factory=list)

408

409 # The indices of nodes which had forced values.

410 #

411 # Stored as None if no indices have been forced, purely for space saving

412 # reasons (we force quite rarely).

413 __forced: Optional[set[int]] = attr.ib(default=None, init=False)

414

415 # What happens next after drawing these nodes. (conceptually, "what is the

416 # child/children of the last node stored here").

417 #

418 # One of:

419 # - None (we don't know yet)

420 # - Branch (we have seen multiple possible outcomes here)

421 # - Conclusion (ConjectureData.conclude_test was called here)

422 # - Killed (this branch is valid and may even have children, but should not

423 # be explored when generating novel prefixes)

424 transition: Union[None, Branch, Conclusion, Killed] = attr.ib(default=None)

425

426 # A tree node is exhausted if every possible sequence of draws below it has

427 # been explored. We only update this when performing operations that could

428 # change the answer.

429 #

430 # See also TreeNode.check_exhausted.

431 is_exhausted: bool = attr.ib(default=False, init=False)

432

433 @property

434 def forced(self) -> AbstractSet[int]:

435 if not self.__forced:

436 return EMPTY

437 return self.__forced

438

439 def mark_forced(self, i: int) -> None:

440 """

441 Note that the draw at node i was forced.

442 """

443 assert 0 <= i < len(self.values)

444 if self.__forced is None:

445 self.__forced = set()

446 self.__forced.add(i)

447

448 def split_at(self, i: int) -> None:

449 """

450 Splits the tree so that it can incorporate a decision at the draw call

451 corresponding to the node at position i.

452

453 Raises FlakyStrategyDefinition if node i was forced.

454 """

455

456 if i in self.forced:

457 raise FlakyStrategyDefinition(_FLAKY_STRAT_MSG)

458

459 assert not self.is_exhausted

460

461 key = self.values[i]

462

463 child = TreeNode(

464 choice_types=self.choice_types[i + 1 :],

465 constraints=self.constraints[i + 1 :],

466 values=self.values[i + 1 :],

467 transition=self.transition,

468 )

469 self.transition = Branch(

470 constraints=self.constraints[i],

471 choice_type=self.choice_types[i],

472 children={key: child},

473 )

474 if self.__forced is not None:

475 child.__forced = {j - i - 1 for j in self.__forced if j > i}

476 self.__forced = {j for j in self.__forced if j < i}

477 child.check_exhausted()

478 del self.choice_types[i:]

479 del self.values[i:]

480 del self.constraints[i:]

481 assert len(self.values) == len(self.constraints) == len(self.choice_types) == i

482

483 def check_exhausted(self) -> bool:

484 """

485 Recalculates is_exhausted if necessary, and then returns it.

486

487 A node is exhausted if:

488 - Its transition is Conclusion or Killed

489 - It has the maximum number of children (i.e. we have found all of its

490 possible children), and all its children are exhausted

491

492 Therefore, we only need to compute this for a node when:

493 - We first create it in split_at

494 - We set its transition to either Conclusion or Killed

495 (TreeRecordingObserver.conclude_test or TreeRecordingObserver.kill_branch)

496 - We exhaust any of its children

497 """

498

499 if (

500 # a node cannot go from is_exhausted -> not is_exhausted.

501 not self.is_exhausted

502 # if we don't know what happens after this node, we don't have

503 # enough information to tell if it's exhausted.

504 and self.transition is not None

505 # if there are still any nodes left which are the only child of their

506 # parent (len(self.values) > 0), then this TreeNode must be not

507 # exhausted, unless all of those nodes were forced.

508 #

509 # This is because we maintain an invariant of only adding nodes to

510 # DataTree which have at least 2 possible values, so we know that if

511 # they do not have any siblings that we still have more choices to

512 # discover.

513 #

514 # (We actually *do* currently add single-valued nodes to the tree,

515 # but immediately split them into a transition to avoid falsifying

516 # this check. this is a bit of a hack.)

517 and len(self.forced) == len(self.values)

518 ):

519 if isinstance(self.transition, (Conclusion, Killed)):

520 self.is_exhausted = True

521 elif len(self.transition.children) == self.transition.max_children:

522 self.is_exhausted = all(

523 v.is_exhausted for v in self.transition.children.values()

524 )

525 return self.is_exhausted

526

527 def _repr_pretty_(self, p: "RepresentationPrinter", cycle: bool) -> None:

528 assert cycle is False

529 indent = 0

530 for i, (choice_type, constraints, value) in enumerate(

531 zip(self.choice_types, self.constraints, self.values)

532 ):

533 with p.indent(indent):

534 if i > 0:

535 p.break_()

536 p.text(

537 _node_pretty(

538 choice_type, value, constraints, forced=i in self.forced

539 )

540 )

541 indent += 2

542

543 with p.indent(indent):

544 if len(self.values) > 0:

545 p.break_()

546 if self.transition is not None:

547 p.pretty(self.transition)

548 else:

549 p.text("unknown")

550

551

552class DataTree:

553 """

554 A DataTree tracks the structured history of draws in some test function,

555 across multiple ConjectureData objects.

556

557 This information is used by ConjectureRunner to generate novel prefixes of

558 this tree (see generate_novel_prefix). A novel prefix is a sequence of draws

559 which the tree has not seen before, and therefore the ConjectureRunner has

560 not generated as an input to the test function before.

561

562 DataTree tracks the following:

563

564 - Drawn choices in the choice sequence

565 - ConjectureData.draw_integer()

566 - ConjectureData.draw_float()

567 - ConjectureData.draw_string()

568 - ConjectureData.draw_boolean()

569 - ConjectureData.draw_bytes()

570 - Test conclusions (with some Status, e.g. Status.VALID)

571 - ConjectureData.conclude_test()

572

573 A DataTree is — surprise — a *tree*. A node in this tree is either a choice draw

574 with some value, a test conclusion with some Status, or a special `Killed` value,

575 which denotes that further draws may exist beyond this node but should not be

576 considered worth exploring when generating novel prefixes. A node is a leaf

577 iff it is a conclusion or Killed.

578

579 A branch from node A to node B indicates that we have previously seen some

580 sequence (a, b) of draws, where a and b are the values in nodes A and B.

581 Similar intuition holds for conclusion and Killed nodes.

582

583 Examples

584 --------

585

586 To see how a DataTree gets built through successive sets of draws, consider

587 the following code that calls through to some ConjecutreData object `data`.

588 The first call can be either True or False, and the second call can be any

589 integer in the range [1, 3].

590

591 data.draw_boolean()

592 data.draw_integer(1, 3)

593

594 To start, the corresponding DataTree object is completely empty.

595

596 ┌──────┐

597 │ root │

598 └──────┘

599

600 We happen to draw True and then 2 in the above code. The tree tracks this.

601 (2 also connects to a child Conclusion node with Status.VALID since it's the

602 final draw in the code. I'll omit Conclusion nodes in diagrams for brevity.)

603

604 ┌──────┐

605 │ root │

606 └──┬───┘

607 ┌──┴───┐

608 │ True │

609 └──┬───┘

610 ┌──┴───┐

611 │ 2 │

612 └──────┘

613

614 This is a very boring tree so far! But now we happen to draw False and

615 then 1. This causes a split in the tree. Remember, DataTree tracks history

616 over all invocations of a function, not just one. The end goal is to know

617 what invocations haven't been tried yet, after all.

618

619 ┌──────┐

620 ┌───┤ root ├───┐

621 │ └──────┘ │

622 ┌──┴───┐ ┌─┴─────┐

623 │ True │ │ False │

624 └──┬───┘ └──┬────┘

625 ┌─┴─┐ ┌─┴─┐

626 │ 2 │ │ 1 │

627 └───┘ └───┘

628

629 If we were to ask DataTree for a novel prefix at this point, it might

630 generate any of (True, 1), (True, 3), (False, 2), or (False, 3).

631

632 Note that the novel prefix stops as soon as it generates a novel node. For

633 instance, if we had generated a novel prefix back when the tree was only

634 root -> True -> 2, we could have gotten any of (True, 1), (True, 3), or

635 (False). But we could *not* have gotten (False, n), because both False and

636 n were novel at that point, and we stop at the first novel node — False.

637

638 I won't belabor this example. Here's what the tree looks like when fully

639 explored:

640

641 ┌──────┐

642 ┌──────┤ root ├──────┐

643 │ └──────┘ │

644 ┌──┴───┐ ┌─┴─────┐

645 ┌──┤ True ├──┐ ┌───┤ False ├──┐

646 │ └──┬───┘ │ │ └──┬────┘ │

647 ┌─┴─┐ ┌─┴─┐ ┌─┴─┐ ┌─┴─┐ ┌─┴─┐ ┌─┴─┐

648 │ 1 │ │ 2 │ │ 3 │ │ 1 │ │ 2 │ │ 3 │

649 └───┘ └───┘ └───┘ └───┘ └───┘ └───┘

650

651 You could imagine much more complicated trees than this arising in practice,

652 and indeed they do. In particular, the tree need not be balanced or 'nice'

653 like the tree above. For instance,

654

655 b = data.draw_boolean()

656 if b:

657 data.draw_integer(1, 3)

658

659 results in a tree with the entire right part lopped off, and False leading

660 straight to a conclusion node with Status.VALID. As another example,

661

662 n = data.draw_integers()

663 assume(n >= 3)

664 data.draw_string()

665

666 results in a tree with the 0, 1, and 2 nodes leading straight to a

667 conclusion node with Status.INVALID, and the rest branching off into all

668 the possibilities of draw_string.

669

670 Notes

671 -----

672

673 The above examples are slightly simplified and are intended to convey

674 intuition. In practice, there are some implementation details to be aware

675 of.

676

677 - In draw nodes, we store the constraints used in addition to the value drawn.

678 E.g. the node corresponding to data.draw_float(min_value=1.0, max_value=1.5)

679 would store {"min_value": 1.0, "max_value": 1.5, ...} (default values for

680 other constraints omitted).

681

682 The constraints parameters have the potential to change both the range of

683 possible outputs of a node, and the probability distribution within that

684 range, so we need to use these when drawing in DataTree as well. We draw

685 values using these constraints when (1) generating a novel value for a node

686 and (2) choosing a random child when traversing the tree.

687

688 - For space efficiency, rather than tracking the full tree structure, we

689 store DataTree as a radix tree. This is conceptually equivalent (radix

690 trees can always be "unfolded" to the full tree) but it means the internal

691 representation may differ in practice.

692

693 See TreeNode for more information.

694 """

695

696 def __init__(self) -> None:

697 self.root: TreeNode = TreeNode()

698 self._children_cache: dict[ChoiceT, ChildrenCacheValueT] = {}

699

700 @property

701 def is_exhausted(self) -> bool:

702 """

703 Returns True if every node is exhausted, and therefore the tree has

704 been fully explored.

705 """

706 return self.root.is_exhausted

707

708 def generate_novel_prefix(self, random: Random) -> tuple[ChoiceT, ...]:

709 """Generate a short random string that (after rewriting) is not

710 a prefix of any choice sequence previously added to the tree.

711

712 The resulting prefix is essentially arbitrary - it would be nice

713 for it to be uniform at random, but previous attempts to do that

714 have proven too expensive.

715 """

716 assert not self.is_exhausted

717 prefix = []

718

719 def append_choice(choice_type: ChoiceTypeT, choice: ChoiceT) -> None:

720 if choice_type == "float":

721 assert isinstance(choice, int)

722 choice = int_to_float(choice)

723 prefix.append(choice)

724

725 current_node = self.root

726 while True:

727 assert not current_node.is_exhausted

728 for i, (choice_type, constraints, value) in enumerate(

729 zip(

730 current_node.choice_types,

731 current_node.constraints,

732 current_node.values,

733 )

734 ):

735 if i in current_node.forced:

736 append_choice(choice_type, value)

737 else:

738 attempts = 0

739 while True:

740 if attempts <= 10:

741 try:

742 node_value = self._draw(

743 choice_type, constraints, random=random

744 )

745 except StopTest: # pragma: no cover

746 # it is possible that drawing from a fresh data can

747 # overrun BUFFER_SIZE, due to eg unlucky rejection sampling

748 # of integer probes. Retry these cases.

749 attempts += 1

750 continue

751 else:

752 node_value = self._draw_from_cache(

753 choice_type,

754 constraints,

755 key=id(current_node),

756 random=random,

757 )

758

759 if node_value != value:

760 append_choice(choice_type, node_value)

761 break

762 attempts += 1

763 self._reject_child(

764 choice_type,

765 constraints,

766 child=node_value,

767 key=id(current_node),

768 )

769 # We've now found a value that is allowed to

770 # vary, so what follows is not fixed.

771 return tuple(prefix)

772 else:

773 assert not isinstance(current_node.transition, (Conclusion, Killed))

774 if current_node.transition is None:

775 return tuple(prefix)

776 branch = current_node.transition

777 assert isinstance(branch, Branch)

778

779 attempts = 0

780 while True:

781 if attempts <= 10:

782 try:

783 node_value = self._draw(

784 branch.choice_type, branch.constraints, random=random

785 )

786 except StopTest: # pragma: no cover

787 attempts += 1

788 continue

789 else:

790 node_value = self._draw_from_cache(

791 branch.choice_type,

792 branch.constraints,

793 key=id(branch),

794 random=random,

795 )

796 try:

797 child = branch.children[node_value]

798 except KeyError:

799 append_choice(branch.choice_type, node_value)

800 return tuple(prefix)

801 if not child.is_exhausted:

802 append_choice(branch.choice_type, node_value)

803 current_node = child

804 break

805 attempts += 1

806 self._reject_child(

807 branch.choice_type,

808 branch.constraints,

809 child=node_value,

810 key=id(branch),

811 )

812

813 # We don't expect this assertion to ever fire, but coverage

814 # wants the loop inside to run if you have branch checking

815 # on, hence the pragma.

816 assert ( # pragma: no cover

817 attempts != 1000

818 or len(branch.children) < branch.max_children

819 or any(not v.is_exhausted for v in branch.children.values())

820 )

821

822 def rewrite(self, choices):

823 """Use previously seen ConjectureData objects to return a tuple of

824 the rewritten choice sequence and the status we would get from running

825 that with the test function. If the status cannot be predicted

826 from the existing values it will be None."""

827 data = ConjectureData.for_choices(choices)

828 try:

829 self.simulate_test_function(data)

830 return (data.choices, data.status)

831 except PreviouslyUnseenBehaviour:

832 return (choices, None)

833

834 def simulate_test_function(self, data: ConjectureData) -> None:

835 """Run a simulated version of the test function recorded by

836 this tree. Note that this does not currently call ``stop_span``

837 or ``start_span`` as these are not currently recorded in the

838 tree. This will likely change in future."""

839 node = self.root

840

841 def draw(choice_type, constraints, *, forced=None, convert_forced=True):

842 if choice_type == "float" and forced is not None and convert_forced:

843 forced = int_to_float(forced)

844

845 draw_func = getattr(data, f"draw_{choice_type}")

846 value = draw_func(**constraints, forced=forced)

847

848 if choice_type == "float":

849 value = float_to_int(value)

850 return value

851

852 try:

853 while True:

854 for i, (choice_type, constraints, previous) in enumerate(

855 zip(node.choice_types, node.constraints, node.values)

856 ):

857 v = draw(

858 choice_type,

859 constraints,

860 forced=previous if i in node.forced else None,

861 )

862 if v != previous:

863 raise PreviouslyUnseenBehaviour

864 if isinstance(node.transition, Conclusion):

865 t = node.transition

866 data.conclude_test(t.status, t.interesting_origin)

867 elif node.transition is None:

868 raise PreviouslyUnseenBehaviour

869 elif isinstance(node.transition, Branch):

870 v = draw(node.transition.choice_type, node.transition.constraints)

871 try:

872 node = node.transition.children[v]

873 except KeyError as err:

874 raise PreviouslyUnseenBehaviour from err

875 else:

876 assert isinstance(node.transition, Killed)

877 data.observer.kill_branch()

878 node = node.transition.next_node

879 except StopTest:

880 pass

881

882 def new_observer(self):

883 return TreeRecordingObserver(self)

884

885 def _draw(

886 self,

887 choice_type: ChoiceTypeT,

888 constraints: ChoiceConstraintsT,

889 *,

890 random: Random,

891 ) -> ChoiceT:

892 from hypothesis.internal.conjecture.data import draw_choice

893

894 value = draw_choice(choice_type, constraints, random=random)

895 # using floats as keys into branch.children breaks things, because

896 # e.g. hash(0.0) == hash(-0.0) would collide as keys when they are

897 # in fact distinct child branches.

898 # To distinguish floats here we'll use their bits representation. This

899 # entails some bookkeeping such that we're careful about when the

900 # float key is in its bits form (as a key into branch.children) and

901 # when it is in its float form (as a value we want to write to the

902 # choice sequence), and converting between the two forms as appropriate.

903 if choice_type == "float":

904 assert isinstance(value, float)

905 value = float_to_int(value)

906 return value

907

908 def _get_children_cache(

909 self, choice_type: ChoiceTypeT, constraints: ChoiceConstraintsT, *, key: ChoiceT

910 ) -> ChildrenCacheValueT:

911 # cache the state of the children generator per node/branch (passed as

912 # `key` here), such that we track which children we've already tried

913 # for this branch across draws.

914 # We take advantage of python generators here as one-way iterables,

915 # so each time we iterate we implicitly store our position in the

916 # children generator and don't re-draw children. `children` is the

917 # concrete list of children draw from the generator that we will work

918 # with. Whenever we need to top up this list, we will draw a new value

919 # from the generator.

920 if key not in self._children_cache:

921 generator = all_children(choice_type, constraints)

922 children: list[ChoiceT] = []

923 rejected: set[ChoiceT] = set()

924 self._children_cache[key] = (generator, children, rejected)

925

926 return self._children_cache[key]

927

928 def _draw_from_cache(

929 self,

930 choice_type: ChoiceTypeT,

931 constraints: ChoiceConstraintsT,

932 *,

933 key: ChoiceT,

934 random: Random,

935 ) -> ChoiceT:

936 (generator, children, rejected) = self._get_children_cache(

937 choice_type, constraints, key=key

938 )

939 # Keep a stock of 100 potentially-valid children at all times.

940 # This number is chosen to balance memory/speed vs randomness. Ideally

941 # we would sample uniformly from all not-yet-rejected children, but

942 # computing and storing said children is not free.

943 # no-branch because coverage of the fall-through case here is a bit

944 # annoying.

945 if len(children) < 100: # pragma: no branch

946 for v in generator:

947 if choice_type == "float":

948 assert isinstance(v, float)

949 v = float_to_int(v)

950 if v in rejected:

951 continue

952 children.append(v)

953 if len(children) >= 100:

954 break

955

956 return random.choice(children)

957

958 def _reject_child(

959 self,

960 choice_type: ChoiceTypeT,

961 constraints: ChoiceConstraintsT,

962 *,

963 child: ChoiceT,

964 key: ChoiceT,

965 ) -> None:

966 (_generator, children, rejected) = self._get_children_cache(

967 choice_type, constraints, key=key

968 )

969 rejected.add(child)

970 # we remove a child from the list of possible children *only* when it is

971 # rejected, and not when it is initially drawn in _draw_from_cache. The

972 # reason is that a child being drawn does not guarantee that child will

973 # be used in a way such that it is written back to the tree, so it needs

974 # to be available for future draws until we are certain it has been

975 # used.

976 #

977 # For instance, if we generated novel prefixes in a loop (but never used

978 # those prefixes to generate new values!) then we don't want to remove

979 # the drawn children from the available pool until they are actually

980 # used.

981 #

982 # This does result in a small inefficiency: we may draw a child,

983 # immediately use it (so we know it cannot be drawn again), but still

984 # wait to draw and reject it here, because DataTree cannot guarantee

985 # the drawn child has been used.

986 if child in children:

987 children.remove(child)

988

989 def _repr_pretty_(self, p: "RepresentationPrinter", cycle: bool) -> None:

990 assert cycle is False

991 p.pretty(self.root)

992

993

994class TreeRecordingObserver(DataObserver):

995 def __init__(self, tree: DataTree):

996 # this attr isn't read, but is very useful for local debugging flaky

997 # errors, with

998 # `from hypothesis.vendor import pretty; print(pretty.pretty(self._root))`

999 self._root = tree.root

1000 self._current_node: TreeNode = tree.root

1001 self._index_in_current_node: int = 0

1002 self._trail: list[TreeNode] = [self._current_node]

1003 self.killed: bool = False

1004

1005 def draw_integer(

1006 self, value: int, *, was_forced: bool, constraints: IntegerConstraints

1007 ) -> None:

1008 self.draw_value(

1009 "integer", value, was_forced=was_forced, constraints=constraints

1010 )

1011

1012 def draw_float(

1013 self, value: float, *, was_forced: bool, constraints: FloatConstraints

1014 ) -> None:

1015 self.draw_value("float", value, was_forced=was_forced, constraints=constraints)

1016

1017 def draw_string(

1018 self, value: str, *, was_forced: bool, constraints: StringConstraints

1019 ) -> None:

1020 self.draw_value("string", value, was_forced=was_forced, constraints=constraints)

1021

1022 def draw_bytes(

1023 self, value: bytes, *, was_forced: bool, constraints: BytesConstraints

1024 ) -> None:

1025 self.draw_value("bytes", value, was_forced=was_forced, constraints=constraints)

1026

1027 def draw_boolean(

1028 self, value: bool, *, was_forced: bool, constraints: BooleanConstraints

1029 ) -> None:

1030 self.draw_value(

1031 "boolean", value, was_forced=was_forced, constraints=constraints

1032 )

1033

1034 def draw_value(

1035 self,

1036 choice_type: ChoiceTypeT,

1037 value: ChoiceT,

1038 *,

1039 was_forced: bool,

1040 constraints: ChoiceConstraintsT,

1041 ) -> None:

1042 i = self._index_in_current_node

1043 self._index_in_current_node += 1

1044 node = self._current_node

1045

1046 if isinstance(value, float):

1047 value = float_to_int(value)

1048

1049 assert len(node.constraints) == len(node.values) == len(node.choice_types)

1050 if i < len(node.values):

1051 if (

1052 choice_type != node.choice_types[i]

1053 or constraints != node.constraints[i]

1054 ):

1055 raise FlakyStrategyDefinition(_FLAKY_STRAT_MSG)

1056 # Note that we don't check whether a previously

1057 # forced value is now free. That will be caught

1058 # if we ever split the node there, but otherwise

1059 # may pass silently. This is acceptable because it

1060 # means we skip a hash set lookup on every

1061 # draw and that's a pretty niche failure mode.

1062 if was_forced and i not in node.forced:

1063 raise FlakyStrategyDefinition(_FLAKY_STRAT_MSG)

1064 if value != node.values[i]:

1065 node.split_at(i)

1066 assert i == len(node.values)

1067 new_node = TreeNode()

1068 assert isinstance(node.transition, Branch)

1069 node.transition.children[value] = new_node

1070 self._current_node = new_node

1071 self._index_in_current_node = 0

1072 else:

1073 trans = node.transition

1074 if trans is None:

1075 node.choice_types.append(choice_type)

1076 node.constraints.append(constraints)

1077 node.values.append(value)

1078 if was_forced:

1079 node.mark_forced(i)

1080 # generate_novel_prefix assumes the following invariant: any one

1081 # of the series of draws in a particular node can vary, i.e. the

1082 # max number of children is at least 2. However, some draws are

1083 # pseudo-choices and only have a single value, such as

1084 # integers(0, 0).

1085 #

1086 # Currently, we address this by forcefully splitting such

1087 # single-valued nodes into a transition when we see them. An

1088 # exception to this is if it was forced: forced pseudo-choices

1089 # do not cause the above issue because they inherently cannot

1090 # vary, and moreover they trip other invariants about never

1091 # splitting forced nodes.

1092 #

1093 # An alternative is not writing such choices to the tree at

1094 # all, and thus guaranteeing that each node has at least 2 max

1095 # children.

1096 if (

1097 compute_max_children(choice_type, constraints) == 1

1098 and not was_forced

1099 ):

1100 node.split_at(i)

1101 assert isinstance(node.transition, Branch)

1102 self._current_node = node.transition.children[value]

1103 self._index_in_current_node = 0

1104 elif isinstance(trans, Conclusion):

1105 assert trans.status != Status.OVERRUN

1106 # We tried to draw where history says we should have

1107 # stopped

1108 raise FlakyStrategyDefinition(_FLAKY_STRAT_MSG)

1109 else:

1110 assert isinstance(trans, Branch), trans

1111 if choice_type != trans.choice_type or constraints != trans.constraints:

1112 raise FlakyStrategyDefinition(_FLAKY_STRAT_MSG)

1113 try:

1114 self._current_node = trans.children[value]

1115 except KeyError:

1116 self._current_node = trans.children.setdefault(value, TreeNode())

1117 self._index_in_current_node = 0

1118 if self._trail[-1] is not self._current_node:

1119 self._trail.append(self._current_node)

1120

1121 def kill_branch(self) -> None:

1122 """Mark this part of the tree as not worth re-exploring."""

1123 if self.killed:

1124 return

1125

1126 self.killed = True

1127

1128 if self._index_in_current_node < len(self._current_node.values) or (

1129 self._current_node.transition is not None

1130 and not isinstance(self._current_node.transition, Killed)

1131 ):

1132 raise FlakyStrategyDefinition(_FLAKY_STRAT_MSG)

1133

1134 if self._current_node.transition is None:

1135 self._current_node.transition = Killed(TreeNode())

1136 self.__update_exhausted()

1137

1138 self._current_node = self._current_node.transition.next_node

1139 self._index_in_current_node = 0

1140 self._trail.append(self._current_node)

1141

1142 def conclude_test(

1143 self, status: Status, interesting_origin: Optional[InterestingOrigin]

1144 ) -> None:

1145 """Says that ``status`` occurred at node ``node``. This updates the

1146 node if necessary and checks for consistency."""

1147 if status == Status.OVERRUN:

1148 return

1149 i = self._index_in_current_node

1150 node = self._current_node

1151

1152 if i < len(node.values) or isinstance(node.transition, Branch):

1153 raise FlakyStrategyDefinition(_FLAKY_STRAT_MSG)

1154

1155 new_transition = Conclusion(status, interesting_origin)

1156

1157 if node.transition is not None and node.transition != new_transition:

1158 # As an, I'm afraid, horrible bodge, we deliberately ignore flakiness

1159 # where tests go from interesting to valid, because it's much easier

1160 # to produce good error messages for these further up the stack.

1161 if isinstance(node.transition, Conclusion) and (

1162 node.transition.status != Status.INTERESTING

1163 or new_transition.status != Status.VALID

1164 ):

1165 old_origin = node.transition.interesting_origin

1166 new_origin = new_transition.interesting_origin

1167 raise FlakyReplay(

1168 f"Inconsistent results from replaying a test case!\n"

1169 f" last: {node.transition.status.name} from {old_origin}\n"

1170 f" this: {new_transition.status.name} from {new_origin}",

1171 (old_origin, new_origin),

1172 )

1173 else:

1174 node.transition = new_transition

1175

1176 assert node is self._trail[-1]

1177 node.check_exhausted()

1178 assert len(node.values) > 0 or node.check_exhausted()

1179

1180 if not self.killed:

1181 self.__update_exhausted()

1182

1183 def __update_exhausted(self) -> None:

1184 for t in reversed(self._trail):

1185 # Any node we've traversed might have now become exhausted.

1186 # We check from the right. As soon as we hit a node that

1187 # isn't exhausted, this automatically implies that all of

1188 # its parents are not exhausted, so we stop.

1189 if not t.check_exhausted():

1190 break