Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/hypothesis/strategies/_internal/datetime.py: 23%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

211 statements  

1# This file is part of Hypothesis, which may be found at 

2# https://github.com/HypothesisWorks/hypothesis/ 

3# 

4# Copyright the Hypothesis Authors. 

5# Individual contributors are listed in AUTHORS.rst and the git log. 

6# 

7# This Source Code Form is subject to the terms of the Mozilla Public License, 

8# v. 2.0. If a copy of the MPL was not distributed with this file, You can 

9# obtain one at https://mozilla.org/MPL/2.0/. 

10 

11import datetime as dt 

12import operator as op 

13import warnings 

14import zoneinfo 

15from functools import cache, partial 

16from importlib import resources 

17from pathlib import Path 

18 

19from hypothesis.errors import InvalidArgument 

20from hypothesis.internal.validation import check_type, check_valid_interval 

21from hypothesis.strategies._internal.core import sampled_from 

22from hypothesis.strategies._internal.misc import just, none, nothing 

23from hypothesis.strategies._internal.strategies import SearchStrategy 

24from hypothesis.strategies._internal.utils import defines_strategy 

25 

26DATENAMES = ("year", "month", "day") 

27TIMENAMES = ("hour", "minute", "second", "microsecond") 

28 

29 

30def is_pytz_timezone(tz): 

31 if not isinstance(tz, dt.tzinfo): 

32 return False 

33 module = type(tz).__module__ 

34 return module == "pytz" or module.startswith("pytz.") 

35 

36 

37def replace_tzinfo(value, timezone): 

38 if is_pytz_timezone(timezone): 

39 # Pytz timezones are a little complicated, and using the .replace method 

40 # can cause some weird issues, so we use their special "localize" instead. 

41 # 

42 # We use the fold attribute as a convenient boolean for is_dst, even though 

43 # they're semantically distinct. For ambiguous or imaginary hours, fold says 

44 # whether you should use the offset that applies before the gap (fold=0) or 

45 # the offset that applies after the gap (fold=1). is_dst says whether you 

46 # should choose the side that is "DST" or "STD" (STD->STD or DST->DST 

47 # transitions are unclear as you might expect). 

48 # 

49 # WARNING: this is INCORRECT for timezones with negative DST offsets such as 

50 # "Europe/Dublin", but it's unclear what we could do instead beyond 

51 # documenting the problem and recommending use of `dateutil` instead. 

52 return timezone.localize(value, is_dst=not value.fold) 

53 return value.replace(tzinfo=timezone) 

54 

55 

56def datetime_does_not_exist(value): 

57 """This function tests whether the given datetime can be round-tripped to and 

58 from UTC. It is an exact inverse of (and very similar to) the dateutil method 

59 https://dateutil.readthedocs.io/en/stable/tz.html#dateutil.tz.datetime_exists 

60 """ 

61 # Naive datetimes cannot be imaginary, but we need this special case because 

62 # chaining .astimezone() ends with *the system local timezone*, not None. 

63 # See bug report in https://github.com/HypothesisWorks/hypothesis/issues/2662 

64 if value.tzinfo is None: 

65 return False 

66 try: 

67 # Does the naive portion of the datetime change when round-tripped to 

68 # UTC? If so, or if this overflows, we say that it does not exist. 

69 roundtrip = value.astimezone(dt.timezone.utc).astimezone(value.tzinfo) 

70 except OverflowError: 

71 # Overflows at datetime.min or datetime.max boundary condition. 

72 # Rejecting these is acceptable, because timezones are close to 

73 # meaningless before ~1900 and subject to a lot of change by 

74 # 9999, so it should be a very small fraction of possible values. 

75 return True 

76 

77 if ( 

78 value.tzinfo is not roundtrip.tzinfo 

79 and value.utcoffset() != roundtrip.utcoffset() 

80 ): 

81 # This only ever occurs during imaginary (i.e. nonexistent) datetimes, 

82 # and only for pytz timezones which do not follow PEP-495 semantics. 

83 # (may exclude a few other edge cases, but you should use zoneinfo anyway) 

84 return True 

85 

86 assert value.tzinfo is roundtrip.tzinfo, "so only the naive portions are compared" 

87 return value != roundtrip 

88 

89 

90def _num_days_in_month(year, month): 

91 """Branchless equivalent of ``monthrange(year, month)[1]`` for valid inputs. 

92 

93 Written using only arithmetic and (in)equality, with no branching or indexing. 

94 This avoids concretizing the input or adding more path constraints than necessary. 

95 """ 

96 leap = (year % 4 == 0) * (1 - (year % 100 == 0) * (year % 400 != 0)) 

97 is_feb = month == 2 

98 is_30_day = 1 - (month != 4) * (month != 6) * (month != 9) * (month != 11) 

99 return 31 - is_30_day - is_feb * (3 - leap) 

100 

101 

102def draw_capped_multipart( 

103 data, min_value, max_value, duration_names=DATENAMES + TIMENAMES 

104): 

105 assert isinstance(min_value, (dt.date, dt.time, dt.datetime)) 

106 assert type(min_value) == type(max_value) 

107 assert min_value <= max_value 

108 

109 # cap_{low, high} records whether every field drawn so far has equalled 

110 # ``min_value``'s / ``max_value``'s, i.e. whether that bound is still "active" and 

111 # constrains the next field. 

112 # 

113 # cap_{low, high} are conceptually booleans. We define them as integers and interpret 

114 # boolean operations on them as multiplication, so that we don't concretize or 

115 # branch under symbolic backends. See 

116 # https://github.com/HypothesisWorks/hypothesis/issues/4759. 

117 cap_low = 1 

118 cap_high = 1 

119 result = {} 

120 for name in duration_names: 

121 natural_low = getattr(dt.datetime.min, name) 

122 if name == "day": 

123 natural_high = _num_days_in_month(result["year"], result["month"]) 

124 else: 

125 natural_high = getattr(dt.datetime.max, name) 

126 # equivalent to: 

127 # low = min_value.<name> if cap_low else natural_low 

128 # high = max_value.<name> if cap_high else natural_high 

129 low = natural_low + cap_low * (getattr(min_value, name) - natural_low) 

130 high = natural_high + cap_high * (getattr(max_value, name) - natural_high) 

131 if name == "year": 

132 val = data.draw_integer(low, high, shrink_towards=2000) 

133 else: 

134 val = data.draw_integer(low, high) 

135 result[name] = val 

136 cap_low = cap_low * (val == low) 

137 cap_high = cap_high * (val == high) 

138 if hasattr(min_value, "fold"): 

139 # The `fold` attribute is ignored in comparison of naive datetimes. 

140 # In tz-aware datetimes it would require *very* invasive changes to 

141 # the logic above, and be very sensitive to the specific timezone 

142 # (at the cost of efficient shrinking and mutation), so at least for 

143 # now we stick with the status quo and generate it independently. 

144 result["fold"] = data.draw_integer(0, 1) 

145 return result 

146 

147 

148class DatetimeStrategy(SearchStrategy): 

149 def __init__(self, min_value, max_value, timezones_strat, allow_imaginary): 

150 super().__init__() 

151 assert isinstance(min_value, dt.datetime) 

152 assert isinstance(max_value, dt.datetime) 

153 assert min_value.tzinfo is None 

154 assert max_value.tzinfo is None 

155 assert min_value <= max_value 

156 assert isinstance(timezones_strat, SearchStrategy) 

157 assert isinstance(allow_imaginary, bool) 

158 self.min_value = min_value 

159 self.max_value = max_value 

160 self.tz_strat = timezones_strat 

161 self.allow_imaginary = allow_imaginary 

162 

163 def do_draw(self, data): 

164 # We start by drawing a timezone, and an initial datetime. 

165 tz = data.draw(self.tz_strat) 

166 result = self.draw_naive_datetime_and_combine(data, tz) 

167 

168 # TODO: with some probability, systematically search for one of 

169 # - an imaginary time (if allowed), 

170 # - a time within 24hrs of a leap second (if there any are within bounds), 

171 # - other subtle, little-known, or nasty issues as described in 

172 # https://github.com/HypothesisWorks/hypothesis/issues/69 

173 

174 # If we happened to end up with a disallowed imaginary time, reject it. 

175 if (not self.allow_imaginary) and datetime_does_not_exist(result): 

176 data.mark_invalid(f"{result} does not exist (usually a DST transition)") 

177 return result 

178 

179 def draw_naive_datetime_and_combine(self, data, tz): 

180 result = draw_capped_multipart(data, self.min_value, self.max_value) 

181 try: 

182 return replace_tzinfo(dt.datetime(**result), timezone=tz) 

183 except (ValueError, OverflowError): 

184 data.mark_invalid( 

185 f"Failed to draw a datetime between {self.min_value!r} and " 

186 f"{self.max_value!r} with timezone from {self.tz_strat!r}." 

187 ) 

188 

189 

190@defines_strategy(force_reusable_values=True) 

191def datetimes( 

192 min_value: dt.datetime = dt.datetime.min, 

193 max_value: dt.datetime = dt.datetime.max, 

194 *, 

195 timezones: SearchStrategy[dt.tzinfo | None] = none(), 

196 allow_imaginary: bool = True, 

197) -> SearchStrategy[dt.datetime]: 

198 """datetimes(min_value=datetime.datetime.min, max_value=datetime.datetime.max, *, timezones=none(), allow_imaginary=True) 

199 

200 A strategy for generating datetimes, which may be timezone-aware. 

201 

202 This strategy works by drawing a naive datetime between ``min_value`` 

203 and ``max_value``, which must both be naive (have no timezone). 

204 

205 ``timezones`` must be a strategy that generates either ``None``, for naive 

206 datetimes, or :class:`~python:datetime.tzinfo` objects for 'aware' datetimes. 

207 You can construct your own, though we recommend using one of these built-in 

208 strategies: 

209 

210 * with the standard library: :func:`hypothesis.strategies.timezones`; 

211 * with :pypi:`dateutil <python-dateutil>`: 

212 :func:`hypothesis.extra.dateutil.timezones`; or 

213 * with :pypi:`pytz`: :func:`hypothesis.extra.pytz.timezones`. 

214 

215 You may pass ``allow_imaginary=False`` to filter out "imaginary" datetimes 

216 which did not (or will not) occur due to daylight savings, leap seconds, 

217 timezone and calendar adjustments, etc. Imaginary datetimes are allowed 

218 by default, because malformed timestamps are a common source of bugs. 

219 

220 Examples from this strategy shrink towards midnight on January 1st 2000, 

221 local time. 

222 """ 

223 # Why must bounds be naive? In principle, we could also write a strategy 

224 # that took aware bounds, but the API and validation is much harder. 

225 # If you want to generate datetimes between two particular moments in 

226 # time I suggest (a) just filtering out-of-bounds values; (b) if bounds 

227 # are very close, draw a value and subtract its UTC offset, handling 

228 # overflows and nonexistent times; or (c) do something customised to 

229 # handle datetimes in e.g. a four-microsecond span which is not 

230 # representable in UTC. Handling (d), all of the above, leads to a much 

231 # more complex API for all users and a useful feature for very few. 

232 check_type(bool, allow_imaginary, "allow_imaginary") 

233 check_type(dt.datetime, min_value, "min_value") 

234 check_type(dt.datetime, max_value, "max_value") 

235 if min_value.tzinfo is not None: 

236 raise InvalidArgument(f"{min_value=} must not have tzinfo") 

237 if max_value.tzinfo is not None: 

238 raise InvalidArgument(f"{max_value=} must not have tzinfo") 

239 check_valid_interval(min_value, max_value, "min_value", "max_value") 

240 if not isinstance(timezones, SearchStrategy): 

241 raise InvalidArgument( 

242 f"{timezones=} must be a SearchStrategy that can " 

243 "provide tzinfo for datetimes (either None or dt.tzinfo objects)" 

244 ) 

245 return DatetimeStrategy(min_value, max_value, timezones, allow_imaginary) 

246 

247 

248class TimeStrategy(SearchStrategy): 

249 def __init__(self, min_value, max_value, timezones_strat): 

250 super().__init__() 

251 self.min_value = min_value 

252 self.max_value = max_value 

253 self.tz_strat = timezones_strat 

254 

255 def do_draw(self, data): 

256 result = draw_capped_multipart(data, self.min_value, self.max_value, TIMENAMES) 

257 tz = data.draw(self.tz_strat) 

258 return dt.time(**result, tzinfo=tz) 

259 

260 

261@defines_strategy(force_reusable_values=True) 

262def times( 

263 min_value: dt.time = dt.time.min, 

264 max_value: dt.time = dt.time.max, 

265 *, 

266 timezones: SearchStrategy[dt.tzinfo | None] = none(), 

267) -> SearchStrategy[dt.time]: 

268 """times(min_value=datetime.time.min, max_value=datetime.time.max, *, timezones=none()) 

269 

270 A strategy for times between ``min_value`` and ``max_value``. 

271 

272 The ``timezones`` argument is handled as for :py:func:`datetimes`. 

273 

274 Examples from this strategy shrink towards midnight, with the timezone 

275 component shrinking as for the strategy that provided it. 

276 """ 

277 check_type(dt.time, min_value, "min_value") 

278 check_type(dt.time, max_value, "max_value") 

279 if min_value.tzinfo is not None: 

280 raise InvalidArgument(f"{min_value=} must not have tzinfo") 

281 if max_value.tzinfo is not None: 

282 raise InvalidArgument(f"{max_value=} must not have tzinfo") 

283 check_valid_interval(min_value, max_value, "min_value", "max_value") 

284 return TimeStrategy(min_value, max_value, timezones) 

285 

286 

287class DateStrategy(SearchStrategy): 

288 def __init__(self, min_value, max_value): 

289 super().__init__() 

290 assert isinstance(min_value, dt.date) 

291 assert isinstance(max_value, dt.date) 

292 assert min_value < max_value 

293 self.min_value = min_value 

294 self.max_value = max_value 

295 

296 def do_draw(self, data): 

297 return dt.date( 

298 **draw_capped_multipart(data, self.min_value, self.max_value, DATENAMES) 

299 ) 

300 

301 def filter(self, condition): 

302 if ( 

303 isinstance(condition, partial) 

304 and len(args := condition.args) == 1 

305 and not condition.keywords 

306 and isinstance(arg := args[0], dt.date) 

307 and condition.func in (op.lt, op.le, op.eq, op.ge, op.gt) 

308 ): 

309 try: 

310 arg += dt.timedelta(days={op.lt: 1, op.gt: -1}.get(condition.func, 0)) 

311 except OverflowError: # gt date.max, or lt date.min 

312 return nothing() 

313 lo, hi = { 

314 # We're talking about op(arg, x) - the reverse of our usual intuition! 

315 op.lt: (arg, self.max_value), # lambda x: arg < x 

316 op.le: (arg, self.max_value), # lambda x: arg <= x 

317 op.eq: (arg, arg), # lambda x: arg == x 

318 op.ge: (self.min_value, arg), # lambda x: arg >= x 

319 op.gt: (self.min_value, arg), # lambda x: arg > x 

320 }[condition.func] 

321 lo = max(lo, self.min_value) 

322 hi = min(hi, self.max_value) 

323 print(lo, hi) 

324 if hi < lo: 

325 return nothing() 

326 if lo <= self.min_value and self.max_value <= hi: 

327 return self 

328 return dates(lo, hi) 

329 

330 return super().filter(condition) 

331 

332 

333@defines_strategy(force_reusable_values=True) 

334def dates( 

335 min_value: dt.date = dt.date.min, max_value: dt.date = dt.date.max 

336) -> SearchStrategy[dt.date]: 

337 """dates(min_value=datetime.date.min, max_value=datetime.date.max) 

338 

339 A strategy for dates between ``min_value`` and ``max_value``. 

340 

341 Examples from this strategy shrink towards January 1st 2000. 

342 """ 

343 check_type(dt.date, min_value, "min_value") 

344 check_type(dt.date, max_value, "max_value") 

345 check_valid_interval(min_value, max_value, "min_value", "max_value") 

346 if min_value == max_value: 

347 return just(min_value) 

348 return DateStrategy(min_value, max_value) 

349 

350 

351class TimedeltaStrategy(SearchStrategy): 

352 def __init__(self, min_value, max_value): 

353 super().__init__() 

354 assert isinstance(min_value, dt.timedelta) 

355 assert isinstance(max_value, dt.timedelta) 

356 assert min_value < max_value 

357 self.min_value = min_value 

358 self.max_value = max_value 

359 

360 def do_draw(self, data): 

361 result = {} 

362 low_bound = True 

363 high_bound = True 

364 for name in ("days", "seconds", "microseconds"): 

365 low = getattr(self.min_value if low_bound else dt.timedelta.min, name) 

366 high = getattr(self.max_value if high_bound else dt.timedelta.max, name) 

367 val = data.draw_integer(low, high) 

368 result[name] = val 

369 low_bound = low_bound and val == low 

370 high_bound = high_bound and val == high 

371 return dt.timedelta(**result) 

372 

373 

374@defines_strategy(force_reusable_values=True) 

375def timedeltas( 

376 min_value: dt.timedelta = dt.timedelta.min, 

377 max_value: dt.timedelta = dt.timedelta.max, 

378) -> SearchStrategy[dt.timedelta]: 

379 """timedeltas(min_value=datetime.timedelta.min, max_value=datetime.timedelta.max) 

380 

381 A strategy for timedeltas between ``min_value`` and ``max_value``. 

382 

383 Examples from this strategy shrink towards zero. 

384 """ 

385 check_type(dt.timedelta, min_value, "min_value") 

386 check_type(dt.timedelta, max_value, "max_value") 

387 check_valid_interval(min_value, max_value, "min_value", "max_value") 

388 if min_value == max_value: 

389 return just(min_value) 

390 return TimedeltaStrategy(min_value=min_value, max_value=max_value) 

391 

392 

393@cache 

394def _valid_key_cacheable(tzpath, key): 

395 assert isinstance(tzpath, tuple) # zoneinfo changed, better update this function! 

396 for root in tzpath: 

397 if Path(root).joinpath(key).exists(): # pragma: no branch 

398 # No branch because most systems only have one TZPATH component. 

399 return True 

400 else: # pragma: no cover 

401 # This branch is only taken for names which are known to zoneinfo 

402 # but not present on the filesystem, i.e. on Windows with tzdata, 

403 # and so is never executed by our coverage tests. 

404 *package_loc, resource_name = key.split("/") 

405 package = "tzdata.zoneinfo." + ".".join(package_loc) 

406 try: 

407 return (resources.files(package) / resource_name).exists() 

408 except ModuleNotFoundError: 

409 return False 

410 

411 

412@defines_strategy(force_reusable_values=True) 

413def timezone_keys( 

414 *, 

415 # allow_alias: bool = True, 

416 # allow_deprecated: bool = True, 

417 allow_prefix: bool = True, 

418) -> SearchStrategy[str]: 

419 """A strategy for :wikipedia:`IANA timezone names <List_of_tz_database_time_zones>`. 

420 

421 As well as timezone names like ``"UTC"``, ``"Australia/Sydney"``, or 

422 ``"America/New_York"``, this strategy can generate: 

423 

424 - Aliases such as ``"Antarctica/McMurdo"``, which links to ``"Pacific/Auckland"``. 

425 - Deprecated names such as ``"Antarctica/South_Pole"``, which *also* links to 

426 ``"Pacific/Auckland"``. Note that most but 

427 not all deprecated timezone names are also aliases. 

428 - Timezone names with the ``"posix/"`` or ``"right/"`` prefixes, unless 

429 ``allow_prefix=False``. 

430 

431 These strings are provided separately from Tzinfo objects - such as ZoneInfo 

432 instances from the timezones() strategy - to facilitate testing of timezone 

433 logic without needing workarounds to access non-canonical names. 

434 

435 .. note:: 

436 

437 `The tzdata package is required on Windows 

438 <https://docs.python.org/3/library/zoneinfo.html#data-sources>`__. 

439 ``pip install hypothesis[zoneinfo]`` installs it, if and only if needed. 

440 

441 On Windows, you may need to access IANA timezone data via the :pypi:`tzdata` 

442 package. For non-IANA timezones, such as Windows-native names or GNU TZ 

443 strings, we recommend using :func:`~hypothesis.strategies.sampled_from` with 

444 the :pypi:`dateutil <python-dateutil>` package, e.g. 

445 :meth:`dateutil:dateutil.tz.tzwin.list`. 

446 """ 

447 # check_type(bool, allow_alias, "allow_alias") 

448 # check_type(bool, allow_deprecated, "allow_deprecated") 

449 check_type(bool, allow_prefix, "allow_prefix") 

450 

451 with warnings.catch_warnings(): 

452 try: 

453 warnings.simplefilter("ignore", EncodingWarning) 

454 except NameError: # pragma: no cover 

455 pass 

456 # On Python 3.12 (and others?), `available_timezones()` opens files 

457 # without specifying an encoding - which our selftests make an error. 

458 available_timezones = ("UTC", *sorted(zoneinfo.available_timezones())) 

459 

460 # TODO: filter out alias and deprecated names if disallowed 

461 

462 # When prefixes are allowed, we first choose a key and then flatmap to get our 

463 # choice with one of the available prefixes. That in turn means that we need 

464 # some logic to determine which prefixes are available for a given key: 

465 

466 def valid_key(key): 

467 return key == "UTC" or _valid_key_cacheable(zoneinfo.TZPATH, key) 

468 

469 # TODO: work out how to place a higher priority on "weird" timezones 

470 # For details see https://github.com/HypothesisWorks/hypothesis/issues/2414 

471 strategy = sampled_from([key for key in available_timezones if valid_key(key)]) 

472 

473 if not allow_prefix: 

474 return strategy 

475 

476 def sample_with_prefixes(zone): 

477 keys_with_prefixes = (zone, f"posix/{zone}", f"right/{zone}") 

478 return sampled_from([key for key in keys_with_prefixes if valid_key(key)]) 

479 

480 return strategy.flatmap(sample_with_prefixes) 

481 

482 

483@defines_strategy(force_reusable_values=True) 

484def timezones(*, no_cache: bool = False) -> SearchStrategy["zoneinfo.ZoneInfo"]: 

485 """A strategy for :class:`python:zoneinfo.ZoneInfo` objects. 

486 

487 If ``no_cache=True``, the generated instances are constructed using 

488 :meth:`ZoneInfo.no_cache <python:zoneinfo.ZoneInfo.no_cache>` instead 

489 of the usual constructor. This may change the semantics of your datetimes 

490 in surprising ways, so only use it if you know that you need to! 

491 

492 .. note:: 

493 

494 `The tzdata package is required on Windows 

495 <https://docs.python.org/3/library/zoneinfo.html#data-sources>`__. 

496 ``pip install hypothesis[zoneinfo]`` installs it, if and only if needed. 

497 """ 

498 check_type(bool, no_cache, "no_cache") 

499 return timezone_keys().map( 

500 zoneinfo.ZoneInfo.no_cache if no_cache else zoneinfo.ZoneInfo 

501 )