Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.10/site-packages/hypothesis/internal/conjecture/datatree.py: 19%

1# This file is part of Hypothesis, which may be found at

2# https://github.com/HypothesisWorks/hypothesis/

4# Copyright the Hypothesis Authors.

5# Individual contributors are listed in AUTHORS.rst and the git log.

7# This Source Code Form is subject to the terms of the Mozilla Public License,

8# v. 2.0. If a copy of the MPL was not distributed with this file, You can

9# obtain one at https://mozilla.org/MPL/2.0/.

11import math

12from collections.abc import Generator

13from random import Random

14from typing import TYPE_CHECKING, AbstractSet, Final, Optional, Union, cast

16import attr

18from hypothesis.errors import (

19 FlakyReplay,

20 FlakyStrategyDefinition,

21 HypothesisException,

22 StopTest,

23)

24from hypothesis.internal import floats as flt

25from hypothesis.internal.conjecture.choice import (

26 BooleanConstraints,

27 BytesConstraints,

28 ChoiceConstraintsT,

29 ChoiceT,

30 ChoiceTypeT,

31 FloatConstraints,

32 IntegerConstraints,

33 StringConstraints,

34 choice_from_index,

35)

36from hypothesis.internal.conjecture.data import ConjectureData, DataObserver, Status

37from hypothesis.internal.escalation import InterestingOrigin

38from hypothesis.internal.floats import (

39 count_between_floats,

40 float_to_int,

41 int_to_float,

42 sign_aware_lte,

43)

45if TYPE_CHECKING:

46 from typing import TypeAlias

48 from hypothesis.vendor.pretty import RepresentationPrinter

50ChildrenCacheValueT: "TypeAlias" = tuple[

51 Generator[ChoiceT, None, None], list[ChoiceT], set[ChoiceT]

52]

55class PreviouslyUnseenBehaviour(HypothesisException):

56 pass

59_FLAKY_STRAT_MSG = (

60 "Inconsistent data generation! Data generation behaved differently "

61 "between different runs. Is your data generation depending on external "

62 "state?"

63)

66EMPTY: frozenset[int] = frozenset()

69@attr.s(slots=True)

70class Killed:

71 """Represents a transition to part of the tree which has been marked as

72 "killed", meaning we want to treat it as not worth exploring, so it will

73 be treated as if it were completely explored for the purposes of

74 exhaustion."""

76 next_node: "TreeNode" = attr.ib()

78 def _repr_pretty_(self, p: "RepresentationPrinter", cycle: bool) -> None:

79 assert cycle is False

80 p.text("Killed")

83def _node_pretty(

84 choice_type: ChoiceTypeT,

85 value: ChoiceT,

86 constraints: ChoiceConstraintsT,

87 *,

88 forced: bool,

89) -> str:

90 forced_marker = " [forced]" if forced else ""

91 return f"{choice_type} {value!r}{forced_marker} {constraints}"

94@attr.s(slots=True)

95class Branch:

96 """Represents a transition where multiple choices can be made as to what

97 to drawn."""

99 constraints: ChoiceConstraintsT = attr.ib()

100 choice_type: ChoiceTypeT = attr.ib()

101 children: dict[ChoiceT, "TreeNode"] = attr.ib(repr=False)

102

103 @property

104 def max_children(self) -> int:

105 max_children = compute_max_children(self.choice_type, self.constraints)

106 assert max_children > 0

107 return max_children

108

109 def _repr_pretty_(self, p: "RepresentationPrinter", cycle: bool) -> None:

110 assert cycle is False

111 for i, (value, child) in enumerate(self.children.items()):

112 if i > 0:

113 p.break_()

114 p.text(

115 _node_pretty(self.choice_type, value, self.constraints, forced=False)

116 )

117 with p.indent(2):

118 p.break_()

119 p.pretty(child)

120

121

122@attr.s(slots=True, frozen=True)

123class Conclusion:

124 """Represents a transition to a finished state."""

125

126 status: Status = attr.ib()

127 interesting_origin: Optional[InterestingOrigin] = attr.ib()

128

129 def _repr_pretty_(self, p: "RepresentationPrinter", cycle: bool) -> None:

130 assert cycle is False

131 o = self.interesting_origin

132 # avoid str(o), which can include multiple lines of context

133 origin = (

134 "" if o is None else f", {o.exc_type.__name__} at {o.filename}:{o.lineno}"

135 )

136 p.text(f"Conclusion ({self.status!r}{origin})")

137

138

139# The number of max children where, beyond this, it is practically impossible

140# for hypothesis to saturate / explore all children nodes in a reasonable time

141# frame. We use this to bail out of expensive max children computations early,

142# where the numbers involved are so large that we know they will be larger than

143# this number.

144#

145# Note that it's ok for us to underestimate the number of max children of a node

146# by using this. We just may think the node is exhausted when in fact it has more

147# possible children to be explored. This has the potential to finish generation

148# early due to exhausting the entire tree, but that is quite unlikely: (1) the

149# number of examples would have to be quite high, and (2) the tree would have to

150# contain only one or two nodes, or generate_novel_prefix would simply switch to

151# exploring another non-exhausted node.

152#

153# Also note that we may sometimes compute max children above this value. In other

154# words, this is *not* a hard maximum on the computed max children. It's the point

155# where further computation is not beneficial - but sometimes doing that computation

156# unconditionally is cheaper than estimating against this value.

157#

158# The one case where this may be detrimental is fuzzing, where the throughput of

159# examples is so high that it really may saturate important nodes. We'll cross

160# that bridge when we come to it.

161MAX_CHILDREN_EFFECTIVELY_INFINITE: Final[int] = 10_000_000

162

163

164def _count_distinct_strings(*, alphabet_size: int, min_size: int, max_size: int) -> int:

165 # We want to estimate if we're going to have more children than

166 # MAX_CHILDREN_EFFECTIVELY_INFINITE, without computing a potentially

167 # extremely expensive pow. We'll check the two extreme cases - if the

168 # number of strings in the largest string size alone is enough to put us

169 # over this limit (at alphabet_size >= 2), and if the variation in sizes

170 # (at alphabet_size == 1) is enough. If neither result in an early return,

171 # the exact result should be reasonably cheap to compute.

172 if alphabet_size == 0:

173 # Special-case the empty string, avoid error in math.log(0).

174 return 1

175 elif alphabet_size == 1:

176 # Special-case the constant alphabet, invalid in the geom-series sum.

177 return max_size - min_size + 1

178 else:

179 # Estimate against log, which is cheaper than computing a pow.

180 #

181 # m = max_size

182 # a = alphabet_size

183 # N = MAX_CHILDREN_EFFECTIVELY_INFINITE

184 #

185 # a**m > N

186 # <=> m * log(a) > log(N)

187 log_max_sized_children = max_size * math.log(alphabet_size)

188 if log_max_sized_children > math.log(MAX_CHILDREN_EFFECTIVELY_INFINITE):

189 return MAX_CHILDREN_EFFECTIVELY_INFINITE

190

191 # The sum of a geometric series is given by (ref: wikipedia):

192 # ᵐ∑ₖ₌₀ aᵏ = (aᵐ⁺¹ - 1) / (a - 1)

193 # = S(m) / S(0)

194 # assuming a != 1 and using the definition

195 # S(m) := aᵐ⁺¹ - 1.

196 # The sum we want, starting from a number n [0 <= n <= m] rather than zero, is

197 # ᵐ∑ₖ₌ₙ aᵏ = ᵐ∑ₖ₌₀ aᵏ - ⁿ⁻¹∑ₖ₌₀ aᵏ = S(m) / S(0) - S(n - 1) / S(0)

198 def S(n):

199 return alphabet_size ** (n + 1) - 1

200

201 return (S(max_size) - S(min_size - 1)) // S(0)

202

203

204def compute_max_children(

205 choice_type: ChoiceTypeT, constraints: ChoiceConstraintsT

206) -> int:

207 if choice_type == "integer":

208 constraints = cast(IntegerConstraints, constraints)

209 min_value = constraints["min_value"]

210 max_value = constraints["max_value"]

211

212 if min_value is None and max_value is None:

213 # full 128 bit range.

214 return 2**128 - 1

215 if min_value is not None and max_value is not None:

216 # count between min/max value.

217 return max_value - min_value + 1

218

219 # hard case: only one bound was specified. Here we probe either upwards

220 # or downwards with our full 128 bit generation, but only half of these

221 # (plus one for the case of generating zero) result in a probe in the

222 # direction we want. ((2**128 - 1) // 2) + 1 == 2 ** 127

223 assert (min_value is None) ^ (max_value is None)

224 return 2**127

225 elif choice_type == "boolean":

226 constraints = cast(BooleanConstraints, constraints)

227 p = constraints["p"]

228 # probabilities of 0 or 1 (or effectively 0 or 1) only have one choice.

229 if p <= 2 ** (-64) or p >= (1 - 2 ** (-64)):

230 return 1

231 return 2

232 elif choice_type == "bytes":

233 constraints = cast(BytesConstraints, constraints)

234 return _count_distinct_strings(

235 alphabet_size=2**8,

236 min_size=constraints["min_size"],

237 max_size=constraints["max_size"],

238 )

239 elif choice_type == "string":

240 constraints = cast(StringConstraints, constraints)

241 min_size = constraints["min_size"]

242 max_size = constraints["max_size"]

243 intervals = constraints["intervals"]

244

245 return _count_distinct_strings(

246 alphabet_size=len(intervals), min_size=min_size, max_size=max_size

247 )

248 elif choice_type == "float":

249 constraints = cast(FloatConstraints, constraints)

250 min_value_f = constraints["min_value"]

251 max_value_f = constraints["max_value"]

252 smallest_nonzero_magnitude = constraints["smallest_nonzero_magnitude"]

253

254 count = count_between_floats(min_value_f, max_value_f)

255

256 # we have two intervals:

257 # a. [min_value, max_value]

258 # b. [-smallest_nonzero_magnitude, smallest_nonzero_magnitude]

259 #

260 # which could be subsets (in either order), overlapping, or disjoint. We

261 # want the interval difference a - b.

262

263 # next_down because endpoints are ok with smallest_nonzero_magnitude

264 min_point = max(min_value_f, -flt.next_down(smallest_nonzero_magnitude))

265 max_point = min(max_value_f, flt.next_down(smallest_nonzero_magnitude))

266

267 if min_point > max_point:

268 # case: disjoint intervals.

269 return count

270

271 count -= count_between_floats(min_point, max_point)

272 if sign_aware_lte(min_value_f, -0.0) and sign_aware_lte(-0.0, max_value_f):

273 # account for -0.0

274 count += 1

275 if sign_aware_lte(min_value_f, 0.0) and sign_aware_lte(0.0, max_value_f):

276 # account for 0.0

277 count += 1

278 return count

279

280 raise NotImplementedError(f"unhandled choice_type {choice_type}")

281

282

283# In theory, this is a strict superset of the functionality of compute_max_children;

284#

285# assert len(all_children(choice_type, constraints)) == compute_max_children(choice_type, constraints)

286#

287# In practice, we maintain two distinct implementations for efficiency and space

288# reasons. If you just need the number of children, it is cheaper to use

289# compute_max_children than to reify the list of children (only to immediately

290# throw it away).

291def _floats_between(a: float, b: float) -> Generator[float, None, None]:

292 for n in range(float_to_int(a), float_to_int(b) + 1):

293 yield int_to_float(n)

294

295

296def all_children(

297 choice_type: ChoiceTypeT, constraints: ChoiceConstraintsT

298) -> Generator[ChoiceT, None, None]:

299 if choice_type != "float":

300 for index in range(compute_max_children(choice_type, constraints)):

301 yield choice_from_index(index, choice_type, constraints)

302 else:

303 constraints = cast(FloatConstraints, constraints)

304 # the float ordering is not injective (because of resampling

305 # out-of-bounds values), so using choice_from_index would result in

306 # duplicates. This violates invariants in datatree about being able

307 # to draw unique new children using all_children.

308 #

309 # We instead maintain a separate implementation for floats.

310 # TODO_IR write a better (bijective) ordering for floats and remove this!

311 min_value = constraints["min_value"]

312 max_value = constraints["max_value"]

313 smallest_nonzero_magnitude = constraints["smallest_nonzero_magnitude"]

314

315 # handle zeroes separately so smallest_nonzero_magnitude can think of

316 # itself as a complete interval (instead of a hole at ±0).

317 if sign_aware_lte(min_value, -0.0) and sign_aware_lte(-0.0, max_value):

318 yield -0.0

319 if sign_aware_lte(min_value, 0.0) and sign_aware_lte(0.0, max_value):

320 yield 0.0

321

322 if flt.is_negative(min_value):

323 if flt.is_negative(max_value):

324 # case: both negative.

325 max_point = min(max_value, -smallest_nonzero_magnitude)

326 # float_to_int increases as negative magnitude increases, so

327 # invert order.

328 yield from _floats_between(max_point, min_value)

329 else:

330 # case: straddles midpoint (which is between -0.0 and 0.0).

331 yield from _floats_between(-smallest_nonzero_magnitude, min_value)

332 yield from _floats_between(smallest_nonzero_magnitude, max_value)

333 else:

334 # case: both positive.

335 min_point = max(min_value, smallest_nonzero_magnitude)

336 yield from _floats_between(min_point, max_value)

337

338

339@attr.s(slots=True)

340class TreeNode:

341 """

342 A node, or collection of directly descended nodes, in a DataTree.

343

344 We store the DataTree as a radix tree (https://en.wikipedia.org/wiki/Radix_tree),

345 which means that nodes that are the only child of their parent are collapsed

346 into their parent to save space.

347

348 Conceptually, you can unfold a single TreeNode storing n values in its lists

349 into a sequence of n nodes, each a child of the last. In other words,

350 (constraints[i], values[i], choice_types[i]) corresponds to the single node at index

351 i.

352

353 Note that if a TreeNode represents a choice (i.e. the nodes cannot be compacted

354 via the radix tree definition), then its lists will be empty and it will

355 store a `Branch` representing that choce in its `transition`.

356

357 Examples

358 --------

359

360 Consider sequentially drawing a boolean, then an integer.

361

362 data.draw_boolean()

363 data.draw_integer(1, 3)

364

365 If we draw True and then 2, the tree may conceptually look like this.

366

367 ┌──────┐

368 │ root │

369 └──┬───┘

370 ┌──┴───┐

371 │ True │

372 └──┬───┘

373 ┌──┴───┐

374 │ 2 │

375 └──────┘

376

377 But since 2 is the only child of True, we will compact these nodes and store

378 them as a single TreeNode.

379

380 ┌──────┐

381 │ root │

382 └──┬───┘

383 ┌────┴──────┐

384 │ [True, 2] │

385 └───────────┘

386

387 If we then draw True and then 3, True will have multiple children and we

388 can no longer store this compacted representation. We would call split_at(0)

389 on the [True, 2] node to indicate that we need to add a choice at 0-index

390 node (True).

391

392 ┌──────┐

393 │ root │

394 └──┬───┘

395 ┌──┴───┐

396 ┌─┤ True ├─┐

397 │ └──────┘ │

398 ┌─┴─┐ ┌─┴─┐

399 │ 2 │ │ 3 │

400 └───┘ └───┘

401 """

402

403 # The constraints, value, and choice_types of the nodes stored here. These always

404 # have the same length. The values at index i belong to node i.

405 constraints: list[ChoiceConstraintsT] = attr.ib(factory=list)

406 values: list[ChoiceT] = attr.ib(factory=list)

407 choice_types: list[ChoiceTypeT] = attr.ib(factory=list)

408

409 # The indices of nodes which had forced values.

410 #

411 # Stored as None if no indices have been forced, purely for space saving

412 # reasons (we force quite rarely).

413 __forced: Optional[set[int]] = attr.ib(default=None, init=False)

414

415 # What happens next after drawing these nodes. (conceptually, "what is the

416 # child/children of the last node stored here").

417 #

418 # One of:

419 # - None (we don't know yet)

420 # - Branch (we have seen multiple possible outcomes here)

421 # - Conclusion (ConjectureData.conclude_test was called here)

422 # - Killed (this branch is valid and may even have children, but should not

423 # be explored when generating novel prefixes)

424 transition: Union[None, Branch, Conclusion, Killed] = attr.ib(default=None)

425

426 # A tree node is exhausted if every possible sequence of draws below it has

427 # been explored. We only update this when performing operations that could

428 # change the answer.

429 #

430 # See also TreeNode.check_exhausted.

431 is_exhausted: bool = attr.ib(default=False, init=False)

432

433 @property

434 def forced(self) -> AbstractSet[int]:

435 if not self.__forced:

436 return EMPTY

437 return self.__forced

438

439 def mark_forced(self, i: int) -> None:

440 """

441 Note that the draw at node i was forced.

442 """

443 assert 0 <= i < len(self.values)

444 if self.__forced is None:

445 self.__forced = set()

446 self.__forced.add(i)

447

448 def split_at(self, i: int) -> None:

449 """

450 Splits the tree so that it can incorporate a decision at the draw call

451 corresponding to the node at position i.

452

453 Raises FlakyStrategyDefinition if node i was forced.

454 """

455

456 if i in self.forced:

457 raise FlakyStrategyDefinition(_FLAKY_STRAT_MSG)

458

459 assert not self.is_exhausted

460

461 key = self.values[i]

462

463 child = TreeNode(

464 choice_types=self.choice_types[i + 1 :],

465 constraints=self.constraints[i + 1 :],

466 values=self.values[i + 1 :],

467 transition=self.transition,

468 )

469 self.transition = Branch(

470 constraints=self.constraints[i],

471 choice_type=self.choice_types[i],

472 children={key: child},

473 )

474 if self.__forced is not None:

475 child.__forced = {j - i - 1 for j in self.__forced if j > i}

476 self.__forced = {j for j in self.__forced if j < i}

477 child.check_exhausted()

478 del self.choice_types[i:]

479 del self.values[i:]

480 del self.constraints[i:]

481 assert len(self.values) == len(self.constraints) == len(self.choice_types) == i

482

483 def check_exhausted(self) -> bool:

484 """

485 Recalculates is_exhausted if necessary, and then returns it.

486

487 A node is exhausted if:

488 - Its transition is Conclusion or Killed

489 - It has the maximum number of children (i.e. we have found all of its

490 possible children), and all its children are exhausted

491

492 Therefore, we only need to compute this for a node when:

493 - We first create it in split_at

494 - We set its transition to either Conclusion or Killed

495 (TreeRecordingObserver.conclude_test or TreeRecordingObserver.kill_branch)

496 - We exhaust any of its children

497 """

498

499 if (

500 # a node cannot go from is_exhausted -> not is_exhausted.

501 not self.is_exhausted

502 # if we don't know what happens after this node, we don't have

503 # enough information to tell if it's exhausted.

504 and self.transition is not None

505 # if there are still any nodes left which are the only child of their

506 # parent (len(self.values) > 0), then this TreeNode must be not

507 # exhausted, unless all of those nodes were forced.

508 #

509 # This is because we maintain an invariant of only adding nodes to

510 # DataTree which have at least 2 possible values, so we know that if

511 # they do not have any siblings that we still have more choices to

512 # discover.

513 #

514 # (We actually *do* currently add single-valued nodes to the tree,

515 # but immediately split them into a transition to avoid falsifying

516 # this check. this is a bit of a hack.)

517 and len(self.forced) == len(self.values)

518 ):

519 if isinstance(self.transition, (Conclusion, Killed)):

520 self.is_exhausted = True

521 elif len(self.transition.children) == self.transition.max_children:

522 self.is_exhausted = all(

523 v.is_exhausted for v in self.transition.children.values()

524 )

525 return self.is_exhausted

526

527 def _repr_pretty_(self, p: "RepresentationPrinter", cycle: bool) -> None:

528 assert cycle is False

529 indent = 0

530 for i, (choice_type, constraints, value) in enumerate(

531 zip(self.choice_types, self.constraints, self.values)

532 ):

533 with p.indent(indent):

534 if i > 0:

535 p.break_()

536 p.text(

537 _node_pretty(

538 choice_type, value, constraints, forced=i in self.forced

539 )

540 )

541 indent += 2

542

543 with p.indent(indent):

544 if len(self.values) > 0:

545 p.break_()

546 if self.transition is not None:

547 p.pretty(self.transition)

548 else:

549 p.text("unknown")

550

551

552class DataTree:

553 """

554 A DataTree tracks the structured history of draws in some test function,

555 across multiple ConjectureData objects.

556

557 This information is used by ConjectureRunner to generate novel prefixes of

558 this tree (see generate_novel_prefix). A novel prefix is a sequence of draws

559 which the tree has not seen before, and therefore the ConjectureRunner has

560 not generated as an input to the test function before.

561

562 DataTree tracks the following:

563

564 - Drawn choices in the choice sequence

565 - ConjectureData.draw_integer()

566 - ConjectureData.draw_float()

567 - ConjectureData.draw_string()

568 - ConjectureData.draw_boolean()

569 - ConjectureData.draw_bytes()

570 - Test conclusions (with some Status, e.g. Status.VALID)

571 - ConjectureData.conclude_test()

572

573 A DataTree is — surprise — a *tree*. A node in this tree is either a choice draw

574 with some value, a test conclusion with some Status, or a special `Killed` value,

575 which denotes that further draws may exist beyond this node but should not be

576 considered worth exploring when generating novel prefixes. A node is a leaf

577 iff it is a conclusion or Killed.

578

579 A branch from node A to node B indicates that we have previously seen some

580 sequence (a, b) of draws, where a and b are the values in nodes A and B.

581 Similar intuition holds for conclusion and Killed nodes.

582

583 Examples

584 --------

585

586 To see how a DataTree gets built through successive sets of draws, consider

587 the following code that calls through to some ConjecutreData object `data`.

588 The first call can be either True or False, and the second call can be any

589 integer in the range [1, 3].

590

591 data.draw_boolean()

592 data.draw_integer(1, 3)

593

594 To start, the corresponding DataTree object is completely empty.

595

596 ┌──────┐

597 │ root │

598 └──────┘

599

600 We happen to draw True and then 2 in the above code. The tree tracks this.

601 (2 also connects to a child Conclusion node with Status.VALID since it's the

602 final draw in the code. I'll omit Conclusion nodes in diagrams for brevity.)

603

604 ┌──────┐

605 │ root │

606 └──┬───┘

607 ┌──┴───┐

608 │ True │

609 └──┬───┘

610 ┌──┴───┐

611 │ 2 │

612 └──────┘

613

614 This is a very boring tree so far! But now we happen to draw False and

615 then 1. This causes a split in the tree. Remember, DataTree tracks history

616 over all invocations of a function, not just one. The end goal is to know

617 what invocations haven't been tried yet, after all.

618

619 ┌──────┐

620 ┌───┤ root ├───┐

621 │ └──────┘ │

622 ┌──┴───┐ ┌─┴─────┐

623 │ True │ │ False │

624 └──┬───┘ └──┬────┘

625 ┌─┴─┐ ┌─┴─┐

626 │ 2 │ │ 1 │

627 └───┘ └───┘

628

629 If we were to ask DataTree for a novel prefix at this point, it might

630 generate any of (True, 1), (True, 3), (False, 2), or (False, 3).

631

632 Note that the novel prefix stops as soon as it generates a novel node. For

633 instance, if we had generated a novel prefix back when the tree was only

634 root -> True -> 2, we could have gotten any of (True, 1), (True, 3), or

635 (False). But we could *not* have gotten (False, n), because both False and

636 n were novel at that point, and we stop at the first novel node — False.

637

638 I won't belabor this example. Here's what the tree looks like when fully

639 explored:

640

641 ┌──────┐

642 ┌──────┤ root ├──────┐

643 │ └──────┘ │

644 ┌──┴───┐ ┌─┴─────┐

645 ┌──┤ True ├──┐ ┌───┤ False ├──┐

646 │ └──┬───┘ │ │ └──┬────┘ │

647 ┌─┴─┐ ┌─┴─┐ ┌─┴─┐ ┌─┴─┐ ┌─┴─┐ ┌─┴─┐

648 │ 1 │ │ 2 │ │ 3 │ │ 1 │ │ 2 │ │ 3 │

649 └───┘ └───┘ └───┘ └───┘ └───┘ └───┘

650

651 You could imagine much more complicated trees than this arising in practice,

652 and indeed they do. In particular, the tree need not be balanced or 'nice'

653 like the tree above. For instance,

654

655 b = data.draw_boolean()

656 if b:

657 data.draw_integer(1, 3)

658

659 results in a tree with the entire right part lopped off, and False leading

660 straight to a conclusion node with Status.VALID. As another example,

661

662 n = data.draw_integers()

663 assume(n >= 3)

664 data.draw_string()

665

666 results in a tree with the 0, 1, and 2 nodes leading straight to a

667 conclusion node with Status.INVALID, and the rest branching off into all

668 the possibilities of draw_string.

669

670 Notes

671 -----

672

673 The above examples are slightly simplified and are intended to convey

674 intuition. In practice, there are some implementation details to be aware

675 of.

676

677 - In draw nodes, we store the constraints used in addition to the value drawn.

678 E.g. the node corresponding to data.draw_float(min_value=1.0, max_value=1.5)

679 would store {"min_value": 1.0, "max_value": 1.5, ...} (default values for

680 other constraints omitted).

681

682 The constraints parameters have the potential to change both the range of

683 possible outputs of a node, and the probability distribution within that

684 range, so we need to use these when drawing in DataTree as well. We draw

685 values using these constraints when (1) generating a novel value for a node

686 and (2) choosing a random child when traversing the tree.

687

688 - For space efficiency, rather than tracking the full tree structure, we

689 store DataTree as a radix tree. This is conceptually equivalent (radix

690 trees can always be "unfolded" to the full tree) but it means the internal

691 representation may differ in practice.

692

693 See TreeNode for more information.

694 """

695

696 def __init__(self) -> None:

697 self.root: TreeNode = TreeNode()

698 self._children_cache: dict[ChoiceT, ChildrenCacheValueT] = {}

699

700 @property

701 def is_exhausted(self) -> bool:

702 """

703 Returns True if every node is exhausted, and therefore the tree has

704 been fully explored.

705 """

706 return self.root.is_exhausted

707

708 def generate_novel_prefix(self, random: Random) -> tuple[ChoiceT, ...]:

709 """Generate a short random string that (after rewriting) is not

710 a prefix of any buffer previously added to the tree.

711

712 The resulting prefix is essentially arbitrary - it would be nice

713 for it to be uniform at random, but previous attempts to do that

714 have proven too expensive.

715 """

716 assert not self.is_exhausted

717 prefix = []

718

719 def append_choice(choice_type: ChoiceTypeT, choice: ChoiceT) -> None:

720 if choice_type == "float":

721 assert isinstance(choice, int)

722 choice = int_to_float(choice)

723 prefix.append(choice)

724

725 current_node = self.root

726 while True:

727 assert not current_node.is_exhausted

728 for i, (choice_type, constraints, value) in enumerate(

729 zip(

730 current_node.choice_types,

731 current_node.constraints,

732 current_node.values,

733 )

734 ):

735 if i in current_node.forced:

736 append_choice(choice_type, value)

737 else:

738 attempts = 0

739 while True:

740 if attempts <= 10:

741 try:

742 node_value = self._draw(

743 choice_type, constraints, random=random

744 )

745 except StopTest: # pragma: no cover

746 # it is possible that drawing from a fresh data can

747 # overrun BUFFER_SIZE, due to eg unlucky rejection sampling

748 # of integer probes. Retry these cases.

749 attempts += 1

750 continue

751 else:

752 node_value = self._draw_from_cache(

753 choice_type,

754 constraints,

755 key=id(current_node),

756 random=random,

757 )

758

759 if node_value != value:

760 append_choice(choice_type, node_value)

761 break

762 attempts += 1

763 self._reject_child(

764 choice_type,

765 constraints,

766 child=node_value,

767 key=id(current_node),

768 )

769 # We've now found a value that is allowed to

770 # vary, so what follows is not fixed.

771 return tuple(prefix)

772 else:

773 assert not isinstance(current_node.transition, (Conclusion, Killed))

774 if current_node.transition is None:

775 return tuple(prefix)

776 branch = current_node.transition

777 assert isinstance(branch, Branch)

778

779 attempts = 0

780 while True:

781 if attempts <= 10:

782 try:

783 node_value = self._draw(

784 branch.choice_type, branch.constraints, random=random

785 )

786 except StopTest: # pragma: no cover

787 attempts += 1

788 continue

789 else:

790 node_value = self._draw_from_cache(

791 branch.choice_type,

792 branch.constraints,

793 key=id(branch),

794 random=random,

795 )

796 try:

797 child = branch.children[node_value]

798 except KeyError:

799 append_choice(branch.choice_type, node_value)

800 return tuple(prefix)

801 if not child.is_exhausted:

802 append_choice(branch.choice_type, node_value)

803 current_node = child

804 break

805 attempts += 1

806 self._reject_child(

807 branch.choice_type,

808 branch.constraints,

809 child=node_value,

810 key=id(branch),

811 )

812

813 # We don't expect this assertion to ever fire, but coverage

814 # wants the loop inside to run if you have branch checking

815 # on, hence the pragma.

816 assert ( # pragma: no cover

817 attempts != 1000

818 or len(branch.children) < branch.max_children

819 or any(not v.is_exhausted for v in branch.children.values())

820 )

821

822 def rewrite(self, choices):

823 """Use previously seen ConjectureData objects to return a tuple of

824 the rewritten choice sequence and the status we would get from running

825 that with the test function. If the status cannot be predicted

826 from the existing values it will be None."""

827 data = ConjectureData.for_choices(choices)

828 try:

829 self.simulate_test_function(data)

830 return (data.choices, data.status)

831 except PreviouslyUnseenBehaviour:

832 return (choices, None)

833

834 def simulate_test_function(self, data: ConjectureData) -> None:

835 """Run a simulated version of the test function recorded by

836 this tree. Note that this does not currently call ``stop_span``

837 or ``start_span`` as these are not currently recorded in the

838 tree. This will likely change in future."""

839 node = self.root

840

841 def draw(choice_type, constraints, *, forced=None, convert_forced=True):

842 if choice_type == "float" and forced is not None and convert_forced:

843 forced = int_to_float(forced)

844

845 draw_func = getattr(data, f"draw_{choice_type}")

846 value = draw_func(**constraints, forced=forced)

847

848 if choice_type == "float":

849 value = float_to_int(value)

850 return value

851

852 try:

853 while True:

854 for i, (choice_type, constraints, previous) in enumerate(

855 zip(node.choice_types, node.constraints, node.values)

856 ):

857 v = draw(

858 choice_type,

859 constraints,

860 forced=previous if i in node.forced else None,

861 )

862 if v != previous:

863 raise PreviouslyUnseenBehaviour

864 if isinstance(node.transition, Conclusion):

865 t = node.transition

866 data.conclude_test(t.status, t.interesting_origin)

867 elif node.transition is None:

868 raise PreviouslyUnseenBehaviour

869 elif isinstance(node.transition, Branch):

870 v = draw(node.transition.choice_type, node.transition.constraints)

871 try:

872 node = node.transition.children[v]

873 except KeyError as err:

874 raise PreviouslyUnseenBehaviour from err

875 else:

876 assert isinstance(node.transition, Killed)

877 data.observer.kill_branch()

878 node = node.transition.next_node

879 except StopTest:

880 pass

881

882 def new_observer(self):

883 return TreeRecordingObserver(self)

884

885 def _draw(

886 self,

887 choice_type: ChoiceTypeT,

888 constraints: ChoiceConstraintsT,

889 *,

890 random: Random,

891 ) -> ChoiceT:

892 from hypothesis.internal.conjecture.data import draw_choice

893

894 value = draw_choice(choice_type, constraints, random=random)

895 # using floats as keys into branch.children breaks things, because

896 # e.g. hash(0.0) == hash(-0.0) would collide as keys when they are

897 # in fact distinct child branches.

898 # To distinguish floats here we'll use their bits representation. This

899 # entails some bookkeeping such that we're careful about when the

900 # float key is in its bits form (as a key into branch.children) and

901 # when it is in its float form (as a value we want to write to the

902 # buffer), and converting between the two forms as appropriate.

903 if choice_type == "float":

904 value = float_to_int(value)

905 return value

906

907 def _get_children_cache(

908 self, choice_type: ChoiceTypeT, constraints: ChoiceConstraintsT, *, key: ChoiceT

909 ) -> ChildrenCacheValueT:

910 # cache the state of the children generator per node/branch (passed as

911 # `key` here), such that we track which children we've already tried

912 # for this branch across draws.

913 # We take advantage of python generators here as one-way iterables,

914 # so each time we iterate we implicitly store our position in the

915 # children generator and don't re-draw children. `children` is the

916 # concrete list of children draw from the generator that we will work

917 # with. Whenever we need to top up this list, we will draw a new value

918 # from the generator.

919 if key not in self._children_cache:

920 generator = all_children(choice_type, constraints)

921 children: list[ChoiceT] = []

922 rejected: set[ChoiceT] = set()

923 self._children_cache[key] = (generator, children, rejected)

924

925 return self._children_cache[key]

926

927 def _draw_from_cache(

928 self,

929 choice_type: ChoiceTypeT,

930 constraints: ChoiceConstraintsT,

931 *,

932 key: ChoiceT,

933 random: Random,

934 ) -> ChoiceT:

935 (generator, children, rejected) = self._get_children_cache(

936 choice_type, constraints, key=key

937 )

938 # Keep a stock of 100 potentially-valid children at all times.

939 # This number is chosen to balance memory/speed vs randomness. Ideally

940 # we would sample uniformly from all not-yet-rejected children, but

941 # computing and storing said children is not free.

942 # no-branch because coverage of the fall-through case here is a bit

943 # annoying.

944 if len(children) < 100: # pragma: no branch

945 for v in generator:

946 if choice_type == "float":

947 assert isinstance(v, float)

948 v = float_to_int(v)

949 if v in rejected:

950 continue

951 children.append(v)

952 if len(children) >= 100:

953 break

954

955 return random.choice(children)

956

957 def _reject_child(

958 self,

959 choice_type: ChoiceTypeT,

960 constraints: ChoiceConstraintsT,

961 *,

962 child: ChoiceT,

963 key: ChoiceT,

964 ) -> None:

965 (_generator, children, rejected) = self._get_children_cache(

966 choice_type, constraints, key=key

967 )

968 rejected.add(child)

969 # we remove a child from the list of possible children *only* when it is

970 # rejected, and not when it is initially drawn in _draw_from_cache. The

971 # reason is that a child being drawn does not guarantee that child will

972 # be used in a way such that it is written back to the tree, so it needs

973 # to be available for future draws until we are certain it has been

974 # used.

975 #

976 # For instance, if we generated novel prefixes in a loop (but never used

977 # those prefixes to generate new values!) then we don't want to remove

978 # the drawn children from the available pool until they are actually

979 # used.

980 #

981 # This does result in a small inefficiency: we may draw a child,

982 # immediately use it (so we know it cannot be drawn again), but still

983 # wait to draw and reject it here, because DataTree cannot guarantee

984 # the drawn child has been used.

985 if child in children:

986 children.remove(child)

987

988 def _repr_pretty_(self, p: "RepresentationPrinter", cycle: bool) -> None:

989 assert cycle is False

990 p.pretty(self.root)

991

992

993class TreeRecordingObserver(DataObserver):

994 def __init__(self, tree: DataTree):

995 # this attr isn't read, but is very useful for local debugging flaky

996 # errors, with

997 # `from hypothesis.vendor import pretty; print(pretty.pretty(self._root))`

998 self._root = tree.root

999 self._current_node: TreeNode = tree.root

1000 self._index_in_current_node: int = 0

1001 self._trail: list[TreeNode] = [self._current_node]

1002 self.killed: bool = False

1003

1004 def draw_integer(

1005 self, value: int, *, was_forced: bool, constraints: IntegerConstraints

1006 ) -> None:

1007 self.draw_value(

1008 "integer", value, was_forced=was_forced, constraints=constraints

1009 )

1010

1011 def draw_float(

1012 self, value: float, *, was_forced: bool, constraints: FloatConstraints

1013 ) -> None:

1014 self.draw_value("float", value, was_forced=was_forced, constraints=constraints)

1015

1016 def draw_string(

1017 self, value: str, *, was_forced: bool, constraints: StringConstraints

1018 ) -> None:

1019 self.draw_value("string", value, was_forced=was_forced, constraints=constraints)

1020

1021 def draw_bytes(

1022 self, value: bytes, *, was_forced: bool, constraints: BytesConstraints

1023 ) -> None:

1024 self.draw_value("bytes", value, was_forced=was_forced, constraints=constraints)

1025

1026 def draw_boolean(

1027 self, value: bool, *, was_forced: bool, constraints: BooleanConstraints

1028 ) -> None:

1029 self.draw_value(

1030 "boolean", value, was_forced=was_forced, constraints=constraints

1031 )

1032

1033 def draw_value(

1034 self,

1035 choice_type: ChoiceTypeT,

1036 value: ChoiceT,

1037 *,

1038 was_forced: bool,

1039 constraints: ChoiceConstraintsT,

1040 ) -> None:

1041 i = self._index_in_current_node

1042 self._index_in_current_node += 1

1043 node = self._current_node

1044

1045 if isinstance(value, float):

1046 value = float_to_int(value)

1047

1048 assert len(node.constraints) == len(node.values) == len(node.choice_types)

1049 if i < len(node.values):

1050 if (

1051 choice_type != node.choice_types[i]

1052 or constraints != node.constraints[i]

1053 ):

1054 raise FlakyStrategyDefinition(_FLAKY_STRAT_MSG)

1055 # Note that we don't check whether a previously

1056 # forced value is now free. That will be caught

1057 # if we ever split the node there, but otherwise

1058 # may pass silently. This is acceptable because it

1059 # means we skip a hash set lookup on every

1060 # draw and that's a pretty niche failure mode.

1061 if was_forced and i not in node.forced:

1062 raise FlakyStrategyDefinition(_FLAKY_STRAT_MSG)

1063 if value != node.values[i]:

1064 node.split_at(i)

1065 assert i == len(node.values)

1066 new_node = TreeNode()

1067 assert isinstance(node.transition, Branch)

1068 node.transition.children[value] = new_node

1069 self._current_node = new_node

1070 self._index_in_current_node = 0

1071 else:

1072 trans = node.transition

1073 if trans is None:

1074 node.choice_types.append(choice_type)

1075 node.constraints.append(constraints)

1076 node.values.append(value)

1077 if was_forced:

1078 node.mark_forced(i)

1079 # generate_novel_prefix assumes the following invariant: any one

1080 # of the series of draws in a particular node can vary, i.e. the

1081 # max number of children is at least 2. However, some draws are

1082 # pseudo-choices and only have a single value, such as

1083 # integers(0, 0).

1084 #

1085 # Currently, we address this by forcefully splitting such

1086 # single-valued nodes into a transition when we see them. An

1087 # exception to this is if it was forced: forced pseudo-choices

1088 # do not cause the above issue because they inherently cannot

1089 # vary, and moreover they trip other invariants about never

1090 # splitting forced nodes.

1091 #

1092 # An alternative is not writing such choices to the tree at

1093 # all, and thus guaranteeing that each node has at least 2 max

1094 # children.

1095 if (

1096 compute_max_children(choice_type, constraints) == 1

1097 and not was_forced

1098 ):

1099 node.split_at(i)

1100 assert isinstance(node.transition, Branch)

1101 self._current_node = node.transition.children[value]

1102 self._index_in_current_node = 0

1103 elif isinstance(trans, Conclusion):

1104 assert trans.status != Status.OVERRUN

1105 # We tried to draw where history says we should have

1106 # stopped

1107 raise FlakyStrategyDefinition(_FLAKY_STRAT_MSG)

1108 else:

1109 assert isinstance(trans, Branch), trans

1110 if choice_type != trans.choice_type or constraints != trans.constraints:

1111 raise FlakyStrategyDefinition(_FLAKY_STRAT_MSG)

1112 try:

1113 self._current_node = trans.children[value]

1114 except KeyError:

1115 self._current_node = trans.children.setdefault(value, TreeNode())

1116 self._index_in_current_node = 0

1117 if self._trail[-1] is not self._current_node:

1118 self._trail.append(self._current_node)

1119

1120 def kill_branch(self) -> None:

1121 """Mark this part of the tree as not worth re-exploring."""

1122 if self.killed:

1123 return

1124

1125 self.killed = True

1126

1127 if self._index_in_current_node < len(self._current_node.values) or (

1128 self._current_node.transition is not None

1129 and not isinstance(self._current_node.transition, Killed)

1130 ):

1131 raise FlakyStrategyDefinition(_FLAKY_STRAT_MSG)

1132

1133 if self._current_node.transition is None:

1134 self._current_node.transition = Killed(TreeNode())

1135 self.__update_exhausted()

1136

1137 self._current_node = self._current_node.transition.next_node

1138 self._index_in_current_node = 0

1139 self._trail.append(self._current_node)

1140

1141 def conclude_test(

1142 self, status: Status, interesting_origin: Optional[InterestingOrigin]

1143 ) -> None:

1144 """Says that ``status`` occurred at node ``node``. This updates the

1145 node if necessary and checks for consistency."""

1146 if status == Status.OVERRUN:

1147 return

1148 i = self._index_in_current_node

1149 node = self._current_node

1150

1151 if i < len(node.values) or isinstance(node.transition, Branch):

1152 raise FlakyStrategyDefinition(_FLAKY_STRAT_MSG)

1153

1154 new_transition = Conclusion(status, interesting_origin)

1155

1156 if node.transition is not None and node.transition != new_transition:

1157 # As an, I'm afraid, horrible bodge, we deliberately ignore flakiness

1158 # where tests go from interesting to valid, because it's much easier

1159 # to produce good error messages for these further up the stack.

1160 if isinstance(node.transition, Conclusion) and (

1161 node.transition.status != Status.INTERESTING

1162 or new_transition.status != Status.VALID

1163 ):

1164 old_origin = node.transition.interesting_origin

1165 new_origin = new_transition.interesting_origin

1166 raise FlakyReplay(

1167 f"Inconsistent results from replaying a test case!\n"

1168 f" last: {node.transition.status.name} from {old_origin}\n"

1169 f" this: {new_transition.status.name} from {new_origin}",

1170 (old_origin, new_origin),

1171 )

1172 else:

1173 node.transition = new_transition

1174

1175 assert node is self._trail[-1]

1176 node.check_exhausted()

1177 assert len(node.values) > 0 or node.check_exhausted()

1178

1179 if not self.killed:

1180 self.__update_exhausted()

1181

1182 def __update_exhausted(self) -> None:

1183 for t in reversed(self._trail):

1184 # Any node we've traversed might have now become exhausted.

1185 # We check from the right. As soon as we hit a node that

1186 # isn't exhausted, this automatically implies that all of

1187 # its parents are not exhausted, so we stop.

1188 if not t.check_exhausted():

1189 break