Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/hypothesis/internal/intervalsets.py: 57%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

166 statements  

1# This file is part of Hypothesis, which may be found at 

2# https://github.com/HypothesisWorks/hypothesis/ 

3# 

4# Copyright the Hypothesis Authors. 

5# Individual contributors are listed in AUTHORS.rst and the git log. 

6# 

7# This Source Code Form is subject to the terms of the Mozilla Public License, 

8# v. 2.0. If a copy of the MPL was not distributed with this file, You can 

9# obtain one at https://mozilla.org/MPL/2.0/. 

10 

11from collections.abc import Iterable, Sequence 

12from typing import TYPE_CHECKING, Union, cast, final 

13 

14if TYPE_CHECKING: 

15 from typing import TypeAlias 

16 

17 from typing_extensions import Self 

18 

19IntervalsT: "TypeAlias" = tuple[tuple[int, int], ...] 

20 

21 

22# @final makes mypy happy with the Self return annotations. We otherwise run 

23# afoul of: 

24# > You should not use Self as the return annotation if the method is not 

25# > guaranteed to return an instance of a subclass when the class is subclassed 

26# > https://docs.python.org/3/library/typing.html#typing.Self 

27 

28 

29@final 

30class IntervalSet: 

31 """ 

32 A compact and efficient representation of a set of ``(a, b)`` intervals. Can 

33 be treated like a set of integers, in that ``n in intervals`` will return 

34 ``True`` if ``n`` is contained in any of the ``(a, b)`` intervals, and 

35 ``False`` otherwise. 

36 """ 

37 

38 @classmethod 

39 def from_string(cls, s: str) -> "Self": 

40 """Return a tuple of intervals, covering the codepoints of characters in `s`. 

41 

42 >>> IntervalSet.from_string('abcdef0123456789') 

43 ((48, 57), (97, 102)) 

44 """ 

45 x = cls([(ord(c), ord(c)) for c in sorted(s)]) 

46 return x.union(x) 

47 

48 def __init__(self, intervals: Iterable[Sequence[int]] = ()) -> None: 

49 self.intervals: IntervalsT = cast( 

50 IntervalsT, tuple(tuple(v) for v in intervals) 

51 ) 

52 # cast above is validated by this length assertion. check here instead of 

53 # before to not exhaust generators before we create intervals from it 

54 assert all(len(v) == 2 for v in self.intervals) 

55 

56 self.offsets: list[int] = [0] 

57 for u, v in self.intervals: 

58 self.offsets.append(self.offsets[-1] + v - u + 1) 

59 self.size = self.offsets.pop() 

60 self._idx_of_zero = self.index_above(ord("0")) 

61 self._idx_of_Z = min(self.index_above(ord("Z")), len(self) - 1) 

62 

63 def __len__(self) -> int: 

64 return self.size 

65 

66 def __iter__(self) -> Iterable[int]: 

67 for u, v in self.intervals: 

68 yield from range(u, v + 1) 

69 

70 def __getitem__(self, i: int) -> int: 

71 if i < 0: 

72 i = self.size + i 

73 if i < 0 or i >= self.size: 

74 raise IndexError(f"Invalid index {i} for [0, {self.size})") 

75 # Want j = maximal such that offsets[j] <= i 

76 

77 j = len(self.intervals) - 1 

78 if self.offsets[j] > i: 

79 hi = j 

80 lo = 0 

81 # Invariant: offsets[lo] <= i < offsets[hi] 

82 while lo + 1 < hi: 

83 mid = (lo + hi) // 2 

84 if self.offsets[mid] <= i: 

85 lo = mid 

86 else: 

87 hi = mid 

88 j = lo 

89 t = i - self.offsets[j] 

90 u, v = self.intervals[j] 

91 r = u + t 

92 assert r <= v 

93 return r 

94 

95 def __contains__(self, elem: Union[str, int]) -> bool: 

96 if isinstance(elem, str): 

97 elem = ord(elem) 

98 assert 0 <= elem <= 0x10FFFF 

99 return any(start <= elem <= end for start, end in self.intervals) 

100 

101 def __repr__(self) -> str: 

102 return f"IntervalSet({self.intervals!r})" 

103 

104 def index(self, value: int) -> int: 

105 for offset, (u, v) in zip(self.offsets, self.intervals): 

106 if u == value: 

107 return offset 

108 elif u > value: 

109 raise ValueError(f"{value} is not in list") 

110 if value <= v: 

111 return offset + (value - u) 

112 raise ValueError(f"{value} is not in list") 

113 

114 def index_above(self, value: int) -> int: 

115 for offset, (u, v) in zip(self.offsets, self.intervals): 

116 if u >= value: 

117 return offset 

118 if value <= v: 

119 return offset + (value - u) 

120 return self.size 

121 

122 def __or__(self, other: "Self") -> "Self": 

123 return self.union(other) 

124 

125 def __sub__(self, other: "Self") -> "Self": 

126 return self.difference(other) 

127 

128 def __and__(self, other: "Self") -> "Self": 

129 return self.intersection(other) 

130 

131 def __eq__(self, other: object) -> bool: 

132 return isinstance(other, IntervalSet) and (other.intervals == self.intervals) 

133 

134 def __hash__(self) -> int: 

135 return hash(self.intervals) 

136 

137 def union(self, other: "Self") -> "Self": 

138 """Merge two sequences of intervals into a single tuple of intervals. 

139 

140 Any integer bounded by `x` or `y` is also bounded by the result. 

141 

142 >>> union([(3, 10)], [(1, 2), (5, 17)]) 

143 ((1, 17),) 

144 """ 

145 assert isinstance(other, type(self)) 

146 x = self.intervals 

147 y = other.intervals 

148 if not x: 

149 return IntervalSet(y) 

150 if not y: 

151 return IntervalSet(x) 

152 intervals = sorted(x + y, reverse=True) 

153 result = [intervals.pop()] 

154 while intervals: 

155 # 1. intervals is in descending order 

156 # 2. pop() takes from the RHS. 

157 # 3. (a, b) was popped 1st, then (u, v) was popped 2nd 

158 # 4. Therefore: a <= u 

159 # 5. We assume that u <= v and a <= b 

160 # 6. So we need to handle 2 cases of overlap, and one disjoint case 

161 # | u--v | u----v | u--v | 

162 # | a----b | a--b | a--b | 

163 u, v = intervals.pop() 

164 a, b = result[-1] 

165 if u <= b + 1: 

166 # Overlap cases 

167 result[-1] = (a, max(v, b)) 

168 else: 

169 # Disjoint case 

170 result.append((u, v)) 

171 return IntervalSet(result) 

172 

173 def difference(self, other: "Self") -> "Self": 

174 """Set difference for lists of intervals. That is, returns a list of 

175 intervals that bounds all values bounded by x that are not also bounded by 

176 y. x and y are expected to be in sorted order. 

177 

178 For example difference([(1, 10)], [(2, 3), (9, 15)]) would 

179 return [(1, 1), (4, 8)], removing the values 2, 3, 9 and 10 from the 

180 interval. 

181 """ 

182 assert isinstance(other, type(self)) 

183 x = self.intervals 

184 y = other.intervals 

185 if not y: 

186 return IntervalSet(x) 

187 x = list(map(list, x)) 

188 i = 0 

189 j = 0 

190 result: list[Iterable[int]] = [] 

191 while i < len(x) and j < len(y): 

192 # Iterate in parallel over x and y. j stays pointing at the smallest 

193 # interval in the left hand side that could still overlap with some 

194 # element of x at index >= i. 

195 # Similarly, i is not incremented until we know that it does not 

196 # overlap with any element of y at index >= j. 

197 

198 xl, xr = x[i] 

199 assert xl <= xr 

200 yl, yr = y[j] 

201 assert yl <= yr 

202 

203 if yr < xl: 

204 # The interval at y[j] is strictly to the left of the interval at 

205 # x[i], so will not overlap with it or any later interval of x. 

206 j += 1 

207 elif yl > xr: 

208 # The interval at y[j] is strictly to the right of the interval at 

209 # x[i], so all of x[i] goes into the result as no further intervals 

210 # in y will intersect it. 

211 result.append(x[i]) 

212 i += 1 

213 elif yl <= xl: 

214 if yr >= xr: 

215 # x[i] is contained entirely in y[j], so we just skip over it 

216 # without adding it to the result. 

217 i += 1 

218 else: 

219 # The beginning of x[i] is contained in y[j], so we update the 

220 # left endpoint of x[i] to remove this, and increment j as we 

221 # now have moved past it. Note that this is not added to the 

222 # result as is, as more intervals from y may intersect it so it 

223 # may need updating further. 

224 x[i][0] = yr + 1 

225 j += 1 

226 else: 

227 # yl > xl, so the left hand part of x[i] is not contained in y[j], 

228 # so there are some values we should add to the result. 

229 result.append((xl, yl - 1)) 

230 

231 if yr + 1 <= xr: 

232 # If y[j] finishes before x[i] does, there may be some values 

233 # in x[i] left that should go in the result (or they may be 

234 # removed by a later interval in y), so we update x[i] to 

235 # reflect that and increment j because it no longer overlaps 

236 # with any remaining element of x. 

237 x[i][0] = yr + 1 

238 j += 1 

239 else: 

240 # Every element of x[i] other than the initial part we have 

241 # already added is contained in y[j], so we move to the next 

242 # interval. 

243 i += 1 

244 # Any remaining intervals in x do not overlap with any of y, as if they did 

245 # we would not have incremented j to the end, so can be added to the result 

246 # as they are. 

247 result.extend(x[i:]) 

248 return IntervalSet(map(tuple, result)) 

249 

250 def intersection(self, other: "Self") -> "Self": 

251 """Set intersection for lists of intervals.""" 

252 assert isinstance(other, type(self)), other 

253 intervals = [] 

254 i = j = 0 

255 while i < len(self.intervals) and j < len(other.intervals): 

256 u, v = self.intervals[i] 

257 U, V = other.intervals[j] 

258 if u > V: 

259 j += 1 

260 elif U > v: 

261 i += 1 

262 else: 

263 intervals.append((max(u, U), min(v, V))) 

264 if v < V: 

265 i += 1 

266 else: 

267 j += 1 

268 return IntervalSet(intervals) 

269 

270 def char_in_shrink_order(self, i: int) -> str: 

271 # We would like it so that, where possible, shrinking replaces 

272 # characters with simple ascii characters, so we rejig this 

273 # bit so that the smallest values are 0, 1, 2, ..., Z. 

274 # 

275 # Imagine that numbers are laid out as abc0yyyZ... 

276 # this rearranges them so that they are laid out as 

277 # 0yyyZcba..., which gives a better shrinking order. 

278 if i <= self._idx_of_Z: 

279 # We want to rewrite the integers [0, n] inclusive 

280 # to [zero_point, Z_point]. 

281 n = self._idx_of_Z - self._idx_of_zero 

282 if i <= n: 

283 i += self._idx_of_zero 

284 else: 

285 # We want to rewrite the integers [n + 1, Z_point] to 

286 # [zero_point, 0] (reversing the order so that codepoints below 

287 # zero_point shrink upwards). 

288 i = self._idx_of_zero - (i - n) 

289 assert i < self._idx_of_zero 

290 assert 0 <= i <= self._idx_of_Z 

291 

292 return chr(self[i]) 

293 

294 def index_from_char_in_shrink_order(self, c: str) -> int: 

295 """ 

296 Inverse of char_in_shrink_order. 

297 """ 

298 assert len(c) == 1 

299 i = self.index(ord(c)) 

300 

301 if i <= self._idx_of_Z: 

302 n = self._idx_of_Z - self._idx_of_zero 

303 # Rewrite [zero_point, Z_point] to [0, n]. 

304 if self._idx_of_zero <= i <= self._idx_of_Z: 

305 i -= self._idx_of_zero 

306 assert 0 <= i <= n 

307 # Rewrite [zero_point, 0] to [n + 1, Z_point]. 

308 else: 

309 i = self._idx_of_zero - i + n 

310 assert n + 1 <= i <= self._idx_of_Z 

311 assert 0 <= i <= self._idx_of_Z 

312 

313 return i