1# This file is part of Hypothesis, which may be found at
2# https://github.com/HypothesisWorks/hypothesis/
3#
4# Copyright the Hypothesis Authors.
5# Individual contributors are listed in AUTHORS.rst and the git log.
6#
7# This Source Code Form is subject to the terms of the Mozilla Public License,
8# v. 2.0. If a copy of the MPL was not distributed with this file, You can
9# obtain one at https://mozilla.org/MPL/2.0/.
10
11from collections.abc import Iterable, Sequence
12from typing import TYPE_CHECKING, Union, cast, final
13
14if TYPE_CHECKING:
15 from typing import TypeAlias
16
17 from typing_extensions import Self
18
19IntervalsT: "TypeAlias" = tuple[tuple[int, int], ...]
20
21
22# @final makes mypy happy with the Self return annotations. We otherwise run
23# afoul of:
24# > You should not use Self as the return annotation if the method is not
25# > guaranteed to return an instance of a subclass when the class is subclassed
26# > https://docs.python.org/3/library/typing.html#typing.Self
27
28
29@final
30class IntervalSet:
31 """
32 A compact and efficient representation of a set of ``(a, b)`` intervals. Can
33 be treated like a set of integers, in that ``n in intervals`` will return
34 ``True`` if ``n`` is contained in any of the ``(a, b)`` intervals, and
35 ``False`` otherwise.
36 """
37
38 @classmethod
39 def from_string(cls, s: str) -> "Self":
40 """Return a tuple of intervals, covering the codepoints of characters in `s`.
41
42 >>> IntervalSet.from_string('abcdef0123456789')
43 ((48, 57), (97, 102))
44 """
45 x = cls([(ord(c), ord(c)) for c in sorted(s)])
46 return x.union(x)
47
48 def __init__(self, intervals: Iterable[Sequence[int]] = ()) -> None:
49 self.intervals: IntervalsT = cast(
50 IntervalsT, tuple(tuple(v) for v in intervals)
51 )
52 # cast above is validated by this length assertion. check here instead of
53 # before to not exhaust generators before we create intervals from it
54 assert all(len(v) == 2 for v in self.intervals)
55
56 self.offsets: list[int] = [0]
57 for u, v in self.intervals:
58 self.offsets.append(self.offsets[-1] + v - u + 1)
59 self.size = self.offsets.pop()
60 self._idx_of_zero = self.index_above(ord("0"))
61 self._idx_of_Z = min(self.index_above(ord("Z")), len(self) - 1)
62
63 def __len__(self) -> int:
64 return self.size
65
66 def __iter__(self) -> Iterable[int]:
67 for u, v in self.intervals:
68 yield from range(u, v + 1)
69
70 def __getitem__(self, i: int) -> int:
71 if i < 0:
72 i = self.size + i
73 if i < 0 or i >= self.size:
74 raise IndexError(f"Invalid index {i} for [0, {self.size})")
75 # Want j = maximal such that offsets[j] <= i
76
77 j = len(self.intervals) - 1
78 if self.offsets[j] > i:
79 hi = j
80 lo = 0
81 # Invariant: offsets[lo] <= i < offsets[hi]
82 while lo + 1 < hi:
83 mid = (lo + hi) // 2
84 if self.offsets[mid] <= i:
85 lo = mid
86 else:
87 hi = mid
88 j = lo
89 t = i - self.offsets[j]
90 u, v = self.intervals[j]
91 r = u + t
92 assert r <= v
93 return r
94
95 def __contains__(self, elem: Union[str, int]) -> bool:
96 if isinstance(elem, str):
97 elem = ord(elem)
98 assert 0 <= elem <= 0x10FFFF
99 return any(start <= elem <= end for start, end in self.intervals)
100
101 def __repr__(self) -> str:
102 return f"IntervalSet({self.intervals!r})"
103
104 def index(self, value: int) -> int:
105 for offset, (u, v) in zip(self.offsets, self.intervals):
106 if u == value:
107 return offset
108 elif u > value:
109 raise ValueError(f"{value} is not in list")
110 if value <= v:
111 return offset + (value - u)
112 raise ValueError(f"{value} is not in list")
113
114 def index_above(self, value: int) -> int:
115 for offset, (u, v) in zip(self.offsets, self.intervals):
116 if u >= value:
117 return offset
118 if value <= v:
119 return offset + (value - u)
120 return self.size
121
122 def __or__(self, other: "Self") -> "Self":
123 return self.union(other)
124
125 def __sub__(self, other: "Self") -> "Self":
126 return self.difference(other)
127
128 def __and__(self, other: "Self") -> "Self":
129 return self.intersection(other)
130
131 def __eq__(self, other: object) -> bool:
132 return isinstance(other, IntervalSet) and (other.intervals == self.intervals)
133
134 def __hash__(self) -> int:
135 return hash(self.intervals)
136
137 def union(self, other: "Self") -> "Self":
138 """Merge two sequences of intervals into a single tuple of intervals.
139
140 Any integer bounded by `x` or `y` is also bounded by the result.
141
142 >>> union([(3, 10)], [(1, 2), (5, 17)])
143 ((1, 17),)
144 """
145 assert isinstance(other, type(self))
146 x = self.intervals
147 y = other.intervals
148 if not x:
149 return IntervalSet(y)
150 if not y:
151 return IntervalSet(x)
152 intervals = sorted(x + y, reverse=True)
153 result = [intervals.pop()]
154 while intervals:
155 # 1. intervals is in descending order
156 # 2. pop() takes from the RHS.
157 # 3. (a, b) was popped 1st, then (u, v) was popped 2nd
158 # 4. Therefore: a <= u
159 # 5. We assume that u <= v and a <= b
160 # 6. So we need to handle 2 cases of overlap, and one disjoint case
161 # | u--v | u----v | u--v |
162 # | a----b | a--b | a--b |
163 u, v = intervals.pop()
164 a, b = result[-1]
165 if u <= b + 1:
166 # Overlap cases
167 result[-1] = (a, max(v, b))
168 else:
169 # Disjoint case
170 result.append((u, v))
171 return IntervalSet(result)
172
173 def difference(self, other: "Self") -> "Self":
174 """Set difference for lists of intervals. That is, returns a list of
175 intervals that bounds all values bounded by x that are not also bounded by
176 y. x and y are expected to be in sorted order.
177
178 For example difference([(1, 10)], [(2, 3), (9, 15)]) would
179 return [(1, 1), (4, 8)], removing the values 2, 3, 9 and 10 from the
180 interval.
181 """
182 assert isinstance(other, type(self))
183 x = self.intervals
184 y = other.intervals
185 if not y:
186 return IntervalSet(x)
187 x = list(map(list, x))
188 i = 0
189 j = 0
190 result: list[Iterable[int]] = []
191 while i < len(x) and j < len(y):
192 # Iterate in parallel over x and y. j stays pointing at the smallest
193 # interval in the left hand side that could still overlap with some
194 # element of x at index >= i.
195 # Similarly, i is not incremented until we know that it does not
196 # overlap with any element of y at index >= j.
197
198 xl, xr = x[i]
199 assert xl <= xr
200 yl, yr = y[j]
201 assert yl <= yr
202
203 if yr < xl:
204 # The interval at y[j] is strictly to the left of the interval at
205 # x[i], so will not overlap with it or any later interval of x.
206 j += 1
207 elif yl > xr:
208 # The interval at y[j] is strictly to the right of the interval at
209 # x[i], so all of x[i] goes into the result as no further intervals
210 # in y will intersect it.
211 result.append(x[i])
212 i += 1
213 elif yl <= xl:
214 if yr >= xr:
215 # x[i] is contained entirely in y[j], so we just skip over it
216 # without adding it to the result.
217 i += 1
218 else:
219 # The beginning of x[i] is contained in y[j], so we update the
220 # left endpoint of x[i] to remove this, and increment j as we
221 # now have moved past it. Note that this is not added to the
222 # result as is, as more intervals from y may intersect it so it
223 # may need updating further.
224 x[i][0] = yr + 1
225 j += 1
226 else:
227 # yl > xl, so the left hand part of x[i] is not contained in y[j],
228 # so there are some values we should add to the result.
229 result.append((xl, yl - 1))
230
231 if yr + 1 <= xr:
232 # If y[j] finishes before x[i] does, there may be some values
233 # in x[i] left that should go in the result (or they may be
234 # removed by a later interval in y), so we update x[i] to
235 # reflect that and increment j because it no longer overlaps
236 # with any remaining element of x.
237 x[i][0] = yr + 1
238 j += 1
239 else:
240 # Every element of x[i] other than the initial part we have
241 # already added is contained in y[j], so we move to the next
242 # interval.
243 i += 1
244 # Any remaining intervals in x do not overlap with any of y, as if they did
245 # we would not have incremented j to the end, so can be added to the result
246 # as they are.
247 result.extend(x[i:])
248 return IntervalSet(map(tuple, result))
249
250 def intersection(self, other: "Self") -> "Self":
251 """Set intersection for lists of intervals."""
252 assert isinstance(other, type(self)), other
253 intervals = []
254 i = j = 0
255 while i < len(self.intervals) and j < len(other.intervals):
256 u, v = self.intervals[i]
257 U, V = other.intervals[j]
258 if u > V:
259 j += 1
260 elif U > v:
261 i += 1
262 else:
263 intervals.append((max(u, U), min(v, V)))
264 if v < V:
265 i += 1
266 else:
267 j += 1
268 return IntervalSet(intervals)
269
270 def char_in_shrink_order(self, i: int) -> str:
271 # We would like it so that, where possible, shrinking replaces
272 # characters with simple ascii characters, so we rejig this
273 # bit so that the smallest values are 0, 1, 2, ..., Z.
274 #
275 # Imagine that numbers are laid out as abc0yyyZ...
276 # this rearranges them so that they are laid out as
277 # 0yyyZcba..., which gives a better shrinking order.
278 if i <= self._idx_of_Z:
279 # We want to rewrite the integers [0, n] inclusive
280 # to [zero_point, Z_point].
281 n = self._idx_of_Z - self._idx_of_zero
282 if i <= n:
283 i += self._idx_of_zero
284 else:
285 # We want to rewrite the integers [n + 1, Z_point] to
286 # [zero_point, 0] (reversing the order so that codepoints below
287 # zero_point shrink upwards).
288 i = self._idx_of_zero - (i - n)
289 assert i < self._idx_of_zero
290 assert 0 <= i <= self._idx_of_Z
291
292 return chr(self[i])
293
294 def index_from_char_in_shrink_order(self, c: str) -> int:
295 """
296 Inverse of char_in_shrink_order.
297 """
298 assert len(c) == 1
299 i = self.index(ord(c))
300
301 if i <= self._idx_of_Z:
302 n = self._idx_of_Z - self._idx_of_zero
303 # Rewrite [zero_point, Z_point] to [0, n].
304 if self._idx_of_zero <= i <= self._idx_of_Z:
305 i -= self._idx_of_zero
306 assert 0 <= i <= n
307 # Rewrite [zero_point, 0] to [n + 1, Z_point].
308 else:
309 i = self._idx_of_zero - i + n
310 assert n + 1 <= i <= self._idx_of_Z
311 assert 0 <= i <= self._idx_of_Z
312
313 return i