1# This file is part of Hypothesis, which may be found at
2# https://github.com/HypothesisWorks/hypothesis/
3#
4# Copyright the Hypothesis Authors.
5# Individual contributors are listed in AUTHORS.rst and the git log.
6#
7# This Source Code Form is subject to the terms of the Mozilla Public License,
8# v. 2.0. If a copy of the MPL was not distributed with this file, You can
9# obtain one at https://mozilla.org/MPL/2.0/.
10
11import datetime as dt
12import operator as op
13import zoneinfo
14from calendar import monthrange
15from functools import cache, partial
16from importlib import resources
17from pathlib import Path
18
19from hypothesis.errors import InvalidArgument
20from hypothesis.internal.validation import check_type, check_valid_interval
21from hypothesis.strategies._internal.core import sampled_from
22from hypothesis.strategies._internal.misc import just, none, nothing
23from hypothesis.strategies._internal.strategies import SearchStrategy
24from hypothesis.strategies._internal.utils import defines_strategy
25
26DATENAMES = ("year", "month", "day")
27TIMENAMES = ("hour", "minute", "second", "microsecond")
28
29
30def is_pytz_timezone(tz):
31 if not isinstance(tz, dt.tzinfo):
32 return False
33 module = type(tz).__module__
34 return module == "pytz" or module.startswith("pytz.")
35
36
37def replace_tzinfo(value, timezone):
38 if is_pytz_timezone(timezone):
39 # Pytz timezones are a little complicated, and using the .replace method
40 # can cause some weird issues, so we use their special "localize" instead.
41 #
42 # We use the fold attribute as a convenient boolean for is_dst, even though
43 # they're semantically distinct. For ambiguous or imaginary hours, fold says
44 # whether you should use the offset that applies before the gap (fold=0) or
45 # the offset that applies after the gap (fold=1). is_dst says whether you
46 # should choose the side that is "DST" or "STD" (STD->STD or DST->DST
47 # transitions are unclear as you might expect).
48 #
49 # WARNING: this is INCORRECT for timezones with negative DST offsets such as
50 # "Europe/Dublin", but it's unclear what we could do instead beyond
51 # documenting the problem and recommending use of `dateutil` instead.
52 return timezone.localize(value, is_dst=not value.fold)
53 return value.replace(tzinfo=timezone)
54
55
56def datetime_does_not_exist(value):
57 """This function tests whether the given datetime can be round-tripped to and
58 from UTC. It is an exact inverse of (and very similar to) the dateutil method
59 https://dateutil.readthedocs.io/en/stable/tz.html#dateutil.tz.datetime_exists
60 """
61 # Naive datetimes cannot be imaginary, but we need this special case because
62 # chaining .astimezone() ends with *the system local timezone*, not None.
63 # See bug report in https://github.com/HypothesisWorks/hypothesis/issues/2662
64 if value.tzinfo is None:
65 return False
66 try:
67 # Does the naive portion of the datetime change when round-tripped to
68 # UTC? If so, or if this overflows, we say that it does not exist.
69 roundtrip = value.astimezone(dt.timezone.utc).astimezone(value.tzinfo)
70 except OverflowError:
71 # Overflows at datetime.min or datetime.max boundary condition.
72 # Rejecting these is acceptable, because timezones are close to
73 # meaningless before ~1900 and subject to a lot of change by
74 # 9999, so it should be a very small fraction of possible values.
75 return True
76
77 if (
78 value.tzinfo is not roundtrip.tzinfo
79 and value.utcoffset() != roundtrip.utcoffset()
80 ):
81 # This only ever occurs during imaginary (i.e. nonexistent) datetimes,
82 # and only for pytz timezones which do not follow PEP-495 semantics.
83 # (may exclude a few other edge cases, but you should use zoneinfo anyway)
84 return True
85
86 assert value.tzinfo is roundtrip.tzinfo, "so only the naive portions are compared"
87 return value != roundtrip
88
89
90def draw_capped_multipart(
91 data, min_value, max_value, duration_names=DATENAMES + TIMENAMES
92):
93 assert isinstance(min_value, (dt.date, dt.time, dt.datetime))
94 assert type(min_value) == type(max_value)
95 assert min_value <= max_value
96 result = {}
97 cap_low, cap_high = True, True
98 for name in duration_names:
99 low = getattr(min_value if cap_low else dt.datetime.min, name)
100 high = getattr(max_value if cap_high else dt.datetime.max, name)
101 if name == "day" and not cap_high:
102 _, high = monthrange(**result)
103 if name == "year":
104 val = data.draw_integer(low, high, shrink_towards=2000)
105 else:
106 val = data.draw_integer(low, high)
107 result[name] = val
108 cap_low = cap_low and val == low
109 cap_high = cap_high and val == high
110 if hasattr(min_value, "fold"):
111 # The `fold` attribute is ignored in comparison of naive datetimes.
112 # In tz-aware datetimes it would require *very* invasive changes to
113 # the logic above, and be very sensitive to the specific timezone
114 # (at the cost of efficient shrinking and mutation), so at least for
115 # now we stick with the status quo and generate it independently.
116 result["fold"] = data.draw_integer(0, 1)
117 return result
118
119
120class DatetimeStrategy(SearchStrategy):
121 def __init__(self, min_value, max_value, timezones_strat, allow_imaginary):
122 super().__init__()
123 assert isinstance(min_value, dt.datetime)
124 assert isinstance(max_value, dt.datetime)
125 assert min_value.tzinfo is None
126 assert max_value.tzinfo is None
127 assert min_value <= max_value
128 assert isinstance(timezones_strat, SearchStrategy)
129 assert isinstance(allow_imaginary, bool)
130 self.min_value = min_value
131 self.max_value = max_value
132 self.tz_strat = timezones_strat
133 self.allow_imaginary = allow_imaginary
134
135 def do_draw(self, data):
136 # We start by drawing a timezone, and an initial datetime.
137 tz = data.draw(self.tz_strat)
138 result = self.draw_naive_datetime_and_combine(data, tz)
139
140 # TODO: with some probability, systematically search for one of
141 # - an imaginary time (if allowed),
142 # - a time within 24hrs of a leap second (if there any are within bounds),
143 # - other subtle, little-known, or nasty issues as described in
144 # https://github.com/HypothesisWorks/hypothesis/issues/69
145
146 # If we happened to end up with a disallowed imaginary time, reject it.
147 if (not self.allow_imaginary) and datetime_does_not_exist(result):
148 data.mark_invalid(f"{result} does not exist (usually a DST transition)")
149 return result
150
151 def draw_naive_datetime_and_combine(self, data, tz):
152 result = draw_capped_multipart(data, self.min_value, self.max_value)
153 try:
154 return replace_tzinfo(dt.datetime(**result), timezone=tz)
155 except (ValueError, OverflowError):
156 data.mark_invalid(
157 f"Failed to draw a datetime between {self.min_value!r} and "
158 f"{self.max_value!r} with timezone from {self.tz_strat!r}."
159 )
160
161
162@defines_strategy(force_reusable_values=True)
163def datetimes(
164 min_value: dt.datetime = dt.datetime.min,
165 max_value: dt.datetime = dt.datetime.max,
166 *,
167 timezones: SearchStrategy[dt.tzinfo | None] = none(),
168 allow_imaginary: bool = True,
169) -> SearchStrategy[dt.datetime]:
170 """datetimes(min_value=datetime.datetime.min, max_value=datetime.datetime.max, *, timezones=none(), allow_imaginary=True)
171
172 A strategy for generating datetimes, which may be timezone-aware.
173
174 This strategy works by drawing a naive datetime between ``min_value``
175 and ``max_value``, which must both be naive (have no timezone).
176
177 ``timezones`` must be a strategy that generates either ``None``, for naive
178 datetimes, or :class:`~python:datetime.tzinfo` objects for 'aware' datetimes.
179 You can construct your own, though we recommend using one of these built-in
180 strategies:
181
182 * with the standard library: :func:`hypothesis.strategies.timezones`;
183 * with :pypi:`dateutil <python-dateutil>`:
184 :func:`hypothesis.extra.dateutil.timezones`; or
185 * with :pypi:`pytz`: :func:`hypothesis.extra.pytz.timezones`.
186
187 You may pass ``allow_imaginary=False`` to filter out "imaginary" datetimes
188 which did not (or will not) occur due to daylight savings, leap seconds,
189 timezone and calendar adjustments, etc. Imaginary datetimes are allowed
190 by default, because malformed timestamps are a common source of bugs.
191
192 Examples from this strategy shrink towards midnight on January 1st 2000,
193 local time.
194 """
195 # Why must bounds be naive? In principle, we could also write a strategy
196 # that took aware bounds, but the API and validation is much harder.
197 # If you want to generate datetimes between two particular moments in
198 # time I suggest (a) just filtering out-of-bounds values; (b) if bounds
199 # are very close, draw a value and subtract its UTC offset, handling
200 # overflows and nonexistent times; or (c) do something customised to
201 # handle datetimes in e.g. a four-microsecond span which is not
202 # representable in UTC. Handling (d), all of the above, leads to a much
203 # more complex API for all users and a useful feature for very few.
204 check_type(bool, allow_imaginary, "allow_imaginary")
205 check_type(dt.datetime, min_value, "min_value")
206 check_type(dt.datetime, max_value, "max_value")
207 if min_value.tzinfo is not None:
208 raise InvalidArgument(f"{min_value=} must not have tzinfo")
209 if max_value.tzinfo is not None:
210 raise InvalidArgument(f"{max_value=} must not have tzinfo")
211 check_valid_interval(min_value, max_value, "min_value", "max_value")
212 if not isinstance(timezones, SearchStrategy):
213 raise InvalidArgument(
214 f"{timezones=} must be a SearchStrategy that can "
215 "provide tzinfo for datetimes (either None or dt.tzinfo objects)"
216 )
217 return DatetimeStrategy(min_value, max_value, timezones, allow_imaginary)
218
219
220class TimeStrategy(SearchStrategy):
221 def __init__(self, min_value, max_value, timezones_strat):
222 super().__init__()
223 self.min_value = min_value
224 self.max_value = max_value
225 self.tz_strat = timezones_strat
226
227 def do_draw(self, data):
228 result = draw_capped_multipart(data, self.min_value, self.max_value, TIMENAMES)
229 tz = data.draw(self.tz_strat)
230 return dt.time(**result, tzinfo=tz)
231
232
233@defines_strategy(force_reusable_values=True)
234def times(
235 min_value: dt.time = dt.time.min,
236 max_value: dt.time = dt.time.max,
237 *,
238 timezones: SearchStrategy[dt.tzinfo | None] = none(),
239) -> SearchStrategy[dt.time]:
240 """times(min_value=datetime.time.min, max_value=datetime.time.max, *, timezones=none())
241
242 A strategy for times between ``min_value`` and ``max_value``.
243
244 The ``timezones`` argument is handled as for :py:func:`datetimes`.
245
246 Examples from this strategy shrink towards midnight, with the timezone
247 component shrinking as for the strategy that provided it.
248 """
249 check_type(dt.time, min_value, "min_value")
250 check_type(dt.time, max_value, "max_value")
251 if min_value.tzinfo is not None:
252 raise InvalidArgument(f"{min_value=} must not have tzinfo")
253 if max_value.tzinfo is not None:
254 raise InvalidArgument(f"{max_value=} must not have tzinfo")
255 check_valid_interval(min_value, max_value, "min_value", "max_value")
256 return TimeStrategy(min_value, max_value, timezones)
257
258
259class DateStrategy(SearchStrategy):
260 def __init__(self, min_value, max_value):
261 super().__init__()
262 assert isinstance(min_value, dt.date)
263 assert isinstance(max_value, dt.date)
264 assert min_value < max_value
265 self.min_value = min_value
266 self.max_value = max_value
267
268 def do_draw(self, data):
269 return dt.date(
270 **draw_capped_multipart(data, self.min_value, self.max_value, DATENAMES)
271 )
272
273 def filter(self, condition):
274 if (
275 isinstance(condition, partial)
276 and len(args := condition.args) == 1
277 and not condition.keywords
278 and isinstance(arg := args[0], dt.date)
279 and condition.func in (op.lt, op.le, op.eq, op.ge, op.gt)
280 ):
281 try:
282 arg += dt.timedelta(days={op.lt: 1, op.gt: -1}.get(condition.func, 0))
283 except OverflowError: # gt date.max, or lt date.min
284 return nothing()
285 lo, hi = {
286 # We're talking about op(arg, x) - the reverse of our usual intuition!
287 op.lt: (arg, self.max_value), # lambda x: arg < x
288 op.le: (arg, self.max_value), # lambda x: arg <= x
289 op.eq: (arg, arg), # lambda x: arg == x
290 op.ge: (self.min_value, arg), # lambda x: arg >= x
291 op.gt: (self.min_value, arg), # lambda x: arg > x
292 }[condition.func]
293 lo = max(lo, self.min_value)
294 hi = min(hi, self.max_value)
295 print(lo, hi)
296 if hi < lo:
297 return nothing()
298 if lo <= self.min_value and self.max_value <= hi:
299 return self
300 return dates(lo, hi)
301
302 return super().filter(condition)
303
304
305@defines_strategy(force_reusable_values=True)
306def dates(
307 min_value: dt.date = dt.date.min, max_value: dt.date = dt.date.max
308) -> SearchStrategy[dt.date]:
309 """dates(min_value=datetime.date.min, max_value=datetime.date.max)
310
311 A strategy for dates between ``min_value`` and ``max_value``.
312
313 Examples from this strategy shrink towards January 1st 2000.
314 """
315 check_type(dt.date, min_value, "min_value")
316 check_type(dt.date, max_value, "max_value")
317 check_valid_interval(min_value, max_value, "min_value", "max_value")
318 if min_value == max_value:
319 return just(min_value)
320 return DateStrategy(min_value, max_value)
321
322
323class TimedeltaStrategy(SearchStrategy):
324 def __init__(self, min_value, max_value):
325 super().__init__()
326 assert isinstance(min_value, dt.timedelta)
327 assert isinstance(max_value, dt.timedelta)
328 assert min_value < max_value
329 self.min_value = min_value
330 self.max_value = max_value
331
332 def do_draw(self, data):
333 result = {}
334 low_bound = True
335 high_bound = True
336 for name in ("days", "seconds", "microseconds"):
337 low = getattr(self.min_value if low_bound else dt.timedelta.min, name)
338 high = getattr(self.max_value if high_bound else dt.timedelta.max, name)
339 val = data.draw_integer(low, high)
340 result[name] = val
341 low_bound = low_bound and val == low
342 high_bound = high_bound and val == high
343 return dt.timedelta(**result)
344
345
346@defines_strategy(force_reusable_values=True)
347def timedeltas(
348 min_value: dt.timedelta = dt.timedelta.min,
349 max_value: dt.timedelta = dt.timedelta.max,
350) -> SearchStrategy[dt.timedelta]:
351 """timedeltas(min_value=datetime.timedelta.min, max_value=datetime.timedelta.max)
352
353 A strategy for timedeltas between ``min_value`` and ``max_value``.
354
355 Examples from this strategy shrink towards zero.
356 """
357 check_type(dt.timedelta, min_value, "min_value")
358 check_type(dt.timedelta, max_value, "max_value")
359 check_valid_interval(min_value, max_value, "min_value", "max_value")
360 if min_value == max_value:
361 return just(min_value)
362 return TimedeltaStrategy(min_value=min_value, max_value=max_value)
363
364
365@cache
366def _valid_key_cacheable(tzpath, key):
367 assert isinstance(tzpath, tuple) # zoneinfo changed, better update this function!
368 for root in tzpath:
369 if Path(root).joinpath(key).exists(): # pragma: no branch
370 # No branch because most systems only have one TZPATH component.
371 return True
372 else: # pragma: no cover
373 # This branch is only taken for names which are known to zoneinfo
374 # but not present on the filesystem, i.e. on Windows with tzdata,
375 # and so is never executed by our coverage tests.
376 *package_loc, resource_name = key.split("/")
377 package = "tzdata.zoneinfo." + ".".join(package_loc)
378 try:
379 return (resources.files(package) / resource_name).exists()
380 except ModuleNotFoundError:
381 return False
382
383
384@defines_strategy(force_reusable_values=True)
385def timezone_keys(
386 *,
387 # allow_alias: bool = True,
388 # allow_deprecated: bool = True,
389 allow_prefix: bool = True,
390) -> SearchStrategy[str]:
391 """A strategy for :wikipedia:`IANA timezone names <List_of_tz_database_time_zones>`.
392
393 As well as timezone names like ``"UTC"``, ``"Australia/Sydney"``, or
394 ``"America/New_York"``, this strategy can generate:
395
396 - Aliases such as ``"Antarctica/McMurdo"``, which links to ``"Pacific/Auckland"``.
397 - Deprecated names such as ``"Antarctica/South_Pole"``, which *also* links to
398 ``"Pacific/Auckland"``. Note that most but
399 not all deprecated timezone names are also aliases.
400 - Timezone names with the ``"posix/"`` or ``"right/"`` prefixes, unless
401 ``allow_prefix=False``.
402
403 These strings are provided separately from Tzinfo objects - such as ZoneInfo
404 instances from the timezones() strategy - to facilitate testing of timezone
405 logic without needing workarounds to access non-canonical names.
406
407 .. note::
408
409 `The tzdata package is required on Windows
410 <https://docs.python.org/3/library/zoneinfo.html#data-sources>`__.
411 ``pip install hypothesis[zoneinfo]`` installs it, if and only if needed.
412
413 On Windows, you may need to access IANA timezone data via the :pypi:`tzdata`
414 package. For non-IANA timezones, such as Windows-native names or GNU TZ
415 strings, we recommend using :func:`~hypothesis.strategies.sampled_from` with
416 the :pypi:`dateutil <python-dateutil>` package, e.g.
417 :meth:`dateutil:dateutil.tz.tzwin.list`.
418 """
419 # check_type(bool, allow_alias, "allow_alias")
420 # check_type(bool, allow_deprecated, "allow_deprecated")
421 check_type(bool, allow_prefix, "allow_prefix")
422
423 available_timezones = ("UTC", *sorted(zoneinfo.available_timezones()))
424
425 # TODO: filter out alias and deprecated names if disallowed
426
427 # When prefixes are allowed, we first choose a key and then flatmap to get our
428 # choice with one of the available prefixes. That in turn means that we need
429 # some logic to determine which prefixes are available for a given key:
430
431 def valid_key(key):
432 return key == "UTC" or _valid_key_cacheable(zoneinfo.TZPATH, key)
433
434 # TODO: work out how to place a higher priority on "weird" timezones
435 # For details see https://github.com/HypothesisWorks/hypothesis/issues/2414
436 strategy = sampled_from([key for key in available_timezones if valid_key(key)])
437
438 if not allow_prefix:
439 return strategy
440
441 def sample_with_prefixes(zone):
442 keys_with_prefixes = (zone, f"posix/{zone}", f"right/{zone}")
443 return sampled_from([key for key in keys_with_prefixes if valid_key(key)])
444
445 return strategy.flatmap(sample_with_prefixes)
446
447
448@defines_strategy(force_reusable_values=True)
449def timezones(*, no_cache: bool = False) -> SearchStrategy["zoneinfo.ZoneInfo"]:
450 """A strategy for :class:`python:zoneinfo.ZoneInfo` objects.
451
452 If ``no_cache=True``, the generated instances are constructed using
453 :meth:`ZoneInfo.no_cache <python:zoneinfo.ZoneInfo.no_cache>` instead
454 of the usual constructor. This may change the semantics of your datetimes
455 in surprising ways, so only use it if you know that you need to!
456
457 .. note::
458
459 `The tzdata package is required on Windows
460 <https://docs.python.org/3/library/zoneinfo.html#data-sources>`__.
461 ``pip install hypothesis[zoneinfo]`` installs it, if and only if needed.
462 """
463 check_type(bool, no_cache, "no_cache")
464 return timezone_keys().map(
465 zoneinfo.ZoneInfo.no_cache if no_cache else zoneinfo.ZoneInfo
466 )