1# This file is part of Hypothesis, which may be found at
2# https://github.com/HypothesisWorks/hypothesis/
3#
4# Copyright the Hypothesis Authors.
5# Individual contributors are listed in AUTHORS.rst and the git log.
6#
7# This Source Code Form is subject to the terms of the Mozilla Public License,
8# v. 2.0. If a copy of the MPL was not distributed with this file, You can
9# obtain one at https://mozilla.org/MPL/2.0/.
10
11import datetime as dt
12import operator as op
13import zoneinfo
14from calendar import monthrange
15from functools import cache, partial
16from importlib import resources
17from pathlib import Path
18from typing import Optional
19
20from hypothesis.errors import InvalidArgument
21from hypothesis.internal.validation import check_type, check_valid_interval
22from hypothesis.strategies._internal.core import sampled_from
23from hypothesis.strategies._internal.misc import just, none, nothing
24from hypothesis.strategies._internal.strategies import SearchStrategy
25from hypothesis.strategies._internal.utils import defines_strategy
26
27DATENAMES = ("year", "month", "day")
28TIMENAMES = ("hour", "minute", "second", "microsecond")
29
30
31def is_pytz_timezone(tz):
32 if not isinstance(tz, dt.tzinfo):
33 return False
34 module = type(tz).__module__
35 return module == "pytz" or module.startswith("pytz.")
36
37
38def replace_tzinfo(value, timezone):
39 if is_pytz_timezone(timezone):
40 # Pytz timezones are a little complicated, and using the .replace method
41 # can cause some weird issues, so we use their special "localize" instead.
42 #
43 # We use the fold attribute as a convenient boolean for is_dst, even though
44 # they're semantically distinct. For ambiguous or imaginary hours, fold says
45 # whether you should use the offset that applies before the gap (fold=0) or
46 # the offset that applies after the gap (fold=1). is_dst says whether you
47 # should choose the side that is "DST" or "STD" (STD->STD or DST->DST
48 # transitions are unclear as you might expect).
49 #
50 # WARNING: this is INCORRECT for timezones with negative DST offsets such as
51 # "Europe/Dublin", but it's unclear what we could do instead beyond
52 # documenting the problem and recommending use of `dateutil` instead.
53 return timezone.localize(value, is_dst=not value.fold)
54 return value.replace(tzinfo=timezone)
55
56
57def datetime_does_not_exist(value):
58 """This function tests whether the given datetime can be round-tripped to and
59 from UTC. It is an exact inverse of (and very similar to) the dateutil method
60 https://dateutil.readthedocs.io/en/stable/tz.html#dateutil.tz.datetime_exists
61 """
62 # Naive datetimes cannot be imaginary, but we need this special case because
63 # chaining .astimezone() ends with *the system local timezone*, not None.
64 # See bug report in https://github.com/HypothesisWorks/hypothesis/issues/2662
65 if value.tzinfo is None:
66 return False
67 try:
68 # Does the naive portion of the datetime change when round-tripped to
69 # UTC? If so, or if this overflows, we say that it does not exist.
70 roundtrip = value.astimezone(dt.timezone.utc).astimezone(value.tzinfo)
71 except OverflowError:
72 # Overflows at datetime.min or datetime.max boundary condition.
73 # Rejecting these is acceptable, because timezones are close to
74 # meaningless before ~1900 and subject to a lot of change by
75 # 9999, so it should be a very small fraction of possible values.
76 return True
77
78 if (
79 value.tzinfo is not roundtrip.tzinfo
80 and value.utcoffset() != roundtrip.utcoffset()
81 ):
82 # This only ever occurs during imaginary (i.e. nonexistent) datetimes,
83 # and only for pytz timezones which do not follow PEP-495 semantics.
84 # (may exclude a few other edge cases, but you should use zoneinfo anyway)
85 return True
86
87 assert value.tzinfo is roundtrip.tzinfo, "so only the naive portions are compared"
88 return value != roundtrip
89
90
91def draw_capped_multipart(
92 data, min_value, max_value, duration_names=DATENAMES + TIMENAMES
93):
94 assert isinstance(min_value, (dt.date, dt.time, dt.datetime))
95 assert type(min_value) == type(max_value)
96 assert min_value <= max_value
97 result = {}
98 cap_low, cap_high = True, True
99 for name in duration_names:
100 low = getattr(min_value if cap_low else dt.datetime.min, name)
101 high = getattr(max_value if cap_high else dt.datetime.max, name)
102 if name == "day" and not cap_high:
103 _, high = monthrange(**result)
104 if name == "year":
105 val = data.draw_integer(low, high, shrink_towards=2000)
106 else:
107 val = data.draw_integer(low, high)
108 result[name] = val
109 cap_low = cap_low and val == low
110 cap_high = cap_high and val == high
111 if hasattr(min_value, "fold"):
112 # The `fold` attribute is ignored in comparison of naive datetimes.
113 # In tz-aware datetimes it would require *very* invasive changes to
114 # the logic above, and be very sensitive to the specific timezone
115 # (at the cost of efficient shrinking and mutation), so at least for
116 # now we stick with the status quo and generate it independently.
117 result["fold"] = data.draw_integer(0, 1)
118 return result
119
120
121class DatetimeStrategy(SearchStrategy):
122 def __init__(self, min_value, max_value, timezones_strat, allow_imaginary):
123 super().__init__()
124 assert isinstance(min_value, dt.datetime)
125 assert isinstance(max_value, dt.datetime)
126 assert min_value.tzinfo is None
127 assert max_value.tzinfo is None
128 assert min_value <= max_value
129 assert isinstance(timezones_strat, SearchStrategy)
130 assert isinstance(allow_imaginary, bool)
131 self.min_value = min_value
132 self.max_value = max_value
133 self.tz_strat = timezones_strat
134 self.allow_imaginary = allow_imaginary
135
136 def do_draw(self, data):
137 # We start by drawing a timezone, and an initial datetime.
138 tz = data.draw(self.tz_strat)
139 result = self.draw_naive_datetime_and_combine(data, tz)
140
141 # TODO: with some probability, systematically search for one of
142 # - an imaginary time (if allowed),
143 # - a time within 24hrs of a leap second (if there any are within bounds),
144 # - other subtle, little-known, or nasty issues as described in
145 # https://github.com/HypothesisWorks/hypothesis/issues/69
146
147 # If we happened to end up with a disallowed imaginary time, reject it.
148 if (not self.allow_imaginary) and datetime_does_not_exist(result):
149 data.mark_invalid(f"{result} does not exist (usually a DST transition)")
150 return result
151
152 def draw_naive_datetime_and_combine(self, data, tz):
153 result = draw_capped_multipart(data, self.min_value, self.max_value)
154 try:
155 return replace_tzinfo(dt.datetime(**result), timezone=tz)
156 except (ValueError, OverflowError):
157 data.mark_invalid(
158 f"Failed to draw a datetime between {self.min_value!r} and "
159 f"{self.max_value!r} with timezone from {self.tz_strat!r}."
160 )
161
162
163@defines_strategy(force_reusable_values=True)
164def datetimes(
165 min_value: dt.datetime = dt.datetime.min,
166 max_value: dt.datetime = dt.datetime.max,
167 *,
168 timezones: SearchStrategy[Optional[dt.tzinfo]] = none(),
169 allow_imaginary: bool = True,
170) -> SearchStrategy[dt.datetime]:
171 """datetimes(min_value=datetime.datetime.min, max_value=datetime.datetime.max, *, timezones=none(), allow_imaginary=True)
172
173 A strategy for generating datetimes, which may be timezone-aware.
174
175 This strategy works by drawing a naive datetime between ``min_value``
176 and ``max_value``, which must both be naive (have no timezone).
177
178 ``timezones`` must be a strategy that generates either ``None``, for naive
179 datetimes, or :class:`~python:datetime.tzinfo` objects for 'aware' datetimes.
180 You can construct your own, though we recommend using one of these built-in
181 strategies:
182
183 * with the standard library: :func:`hypothesis.strategies.timezones`;
184 * with :pypi:`dateutil <python-dateutil>`:
185 :func:`hypothesis.extra.dateutil.timezones`; or
186 * with :pypi:`pytz`: :func:`hypothesis.extra.pytz.timezones`.
187
188 You may pass ``allow_imaginary=False`` to filter out "imaginary" datetimes
189 which did not (or will not) occur due to daylight savings, leap seconds,
190 timezone and calendar adjustments, etc. Imaginary datetimes are allowed
191 by default, because malformed timestamps are a common source of bugs.
192
193 Examples from this strategy shrink towards midnight on January 1st 2000,
194 local time.
195 """
196 # Why must bounds be naive? In principle, we could also write a strategy
197 # that took aware bounds, but the API and validation is much harder.
198 # If you want to generate datetimes between two particular moments in
199 # time I suggest (a) just filtering out-of-bounds values; (b) if bounds
200 # are very close, draw a value and subtract its UTC offset, handling
201 # overflows and nonexistent times; or (c) do something customised to
202 # handle datetimes in e.g. a four-microsecond span which is not
203 # representable in UTC. Handling (d), all of the above, leads to a much
204 # more complex API for all users and a useful feature for very few.
205 check_type(bool, allow_imaginary, "allow_imaginary")
206 check_type(dt.datetime, min_value, "min_value")
207 check_type(dt.datetime, max_value, "max_value")
208 if min_value.tzinfo is not None:
209 raise InvalidArgument(f"{min_value=} must not have tzinfo")
210 if max_value.tzinfo is not None:
211 raise InvalidArgument(f"{max_value=} must not have tzinfo")
212 check_valid_interval(min_value, max_value, "min_value", "max_value")
213 if not isinstance(timezones, SearchStrategy):
214 raise InvalidArgument(
215 f"{timezones=} must be a SearchStrategy that can "
216 "provide tzinfo for datetimes (either None or dt.tzinfo objects)"
217 )
218 return DatetimeStrategy(min_value, max_value, timezones, allow_imaginary)
219
220
221class TimeStrategy(SearchStrategy):
222 def __init__(self, min_value, max_value, timezones_strat):
223 super().__init__()
224 self.min_value = min_value
225 self.max_value = max_value
226 self.tz_strat = timezones_strat
227
228 def do_draw(self, data):
229 result = draw_capped_multipart(data, self.min_value, self.max_value, TIMENAMES)
230 tz = data.draw(self.tz_strat)
231 return dt.time(**result, tzinfo=tz)
232
233
234@defines_strategy(force_reusable_values=True)
235def times(
236 min_value: dt.time = dt.time.min,
237 max_value: dt.time = dt.time.max,
238 *,
239 timezones: SearchStrategy[Optional[dt.tzinfo]] = none(),
240) -> SearchStrategy[dt.time]:
241 """times(min_value=datetime.time.min, max_value=datetime.time.max, *, timezones=none())
242
243 A strategy for times between ``min_value`` and ``max_value``.
244
245 The ``timezones`` argument is handled as for :py:func:`datetimes`.
246
247 Examples from this strategy shrink towards midnight, with the timezone
248 component shrinking as for the strategy that provided it.
249 """
250 check_type(dt.time, min_value, "min_value")
251 check_type(dt.time, max_value, "max_value")
252 if min_value.tzinfo is not None:
253 raise InvalidArgument(f"{min_value=} must not have tzinfo")
254 if max_value.tzinfo is not None:
255 raise InvalidArgument(f"{max_value=} must not have tzinfo")
256 check_valid_interval(min_value, max_value, "min_value", "max_value")
257 return TimeStrategy(min_value, max_value, timezones)
258
259
260class DateStrategy(SearchStrategy):
261 def __init__(self, min_value, max_value):
262 super().__init__()
263 assert isinstance(min_value, dt.date)
264 assert isinstance(max_value, dt.date)
265 assert min_value < max_value
266 self.min_value = min_value
267 self.max_value = max_value
268
269 def do_draw(self, data):
270 return dt.date(
271 **draw_capped_multipart(data, self.min_value, self.max_value, DATENAMES)
272 )
273
274 def filter(self, condition):
275 if (
276 isinstance(condition, partial)
277 and len(args := condition.args) == 1
278 and not condition.keywords
279 and isinstance(arg := args[0], dt.date)
280 and condition.func in (op.lt, op.le, op.eq, op.ge, op.gt)
281 ):
282 try:
283 arg += dt.timedelta(days={op.lt: 1, op.gt: -1}.get(condition.func, 0))
284 except OverflowError: # gt date.max, or lt date.min
285 return nothing()
286 lo, hi = {
287 # We're talking about op(arg, x) - the reverse of our usual intuition!
288 op.lt: (arg, self.max_value), # lambda x: arg < x
289 op.le: (arg, self.max_value), # lambda x: arg <= x
290 op.eq: (arg, arg), # lambda x: arg == x
291 op.ge: (self.min_value, arg), # lambda x: arg >= x
292 op.gt: (self.min_value, arg), # lambda x: arg > x
293 }[condition.func]
294 lo = max(lo, self.min_value)
295 hi = min(hi, self.max_value)
296 print(lo, hi)
297 if hi < lo:
298 return nothing()
299 if lo <= self.min_value and self.max_value <= hi:
300 return self
301 return dates(lo, hi)
302
303 return super().filter(condition)
304
305
306@defines_strategy(force_reusable_values=True)
307def dates(
308 min_value: dt.date = dt.date.min, max_value: dt.date = dt.date.max
309) -> SearchStrategy[dt.date]:
310 """dates(min_value=datetime.date.min, max_value=datetime.date.max)
311
312 A strategy for dates between ``min_value`` and ``max_value``.
313
314 Examples from this strategy shrink towards January 1st 2000.
315 """
316 check_type(dt.date, min_value, "min_value")
317 check_type(dt.date, max_value, "max_value")
318 check_valid_interval(min_value, max_value, "min_value", "max_value")
319 if min_value == max_value:
320 return just(min_value)
321 return DateStrategy(min_value, max_value)
322
323
324class TimedeltaStrategy(SearchStrategy):
325 def __init__(self, min_value, max_value):
326 super().__init__()
327 assert isinstance(min_value, dt.timedelta)
328 assert isinstance(max_value, dt.timedelta)
329 assert min_value < max_value
330 self.min_value = min_value
331 self.max_value = max_value
332
333 def do_draw(self, data):
334 result = {}
335 low_bound = True
336 high_bound = True
337 for name in ("days", "seconds", "microseconds"):
338 low = getattr(self.min_value if low_bound else dt.timedelta.min, name)
339 high = getattr(self.max_value if high_bound else dt.timedelta.max, name)
340 val = data.draw_integer(low, high)
341 result[name] = val
342 low_bound = low_bound and val == low
343 high_bound = high_bound and val == high
344 return dt.timedelta(**result)
345
346
347@defines_strategy(force_reusable_values=True)
348def timedeltas(
349 min_value: dt.timedelta = dt.timedelta.min,
350 max_value: dt.timedelta = dt.timedelta.max,
351) -> SearchStrategy[dt.timedelta]:
352 """timedeltas(min_value=datetime.timedelta.min, max_value=datetime.timedelta.max)
353
354 A strategy for timedeltas between ``min_value`` and ``max_value``.
355
356 Examples from this strategy shrink towards zero.
357 """
358 check_type(dt.timedelta, min_value, "min_value")
359 check_type(dt.timedelta, max_value, "max_value")
360 check_valid_interval(min_value, max_value, "min_value", "max_value")
361 if min_value == max_value:
362 return just(min_value)
363 return TimedeltaStrategy(min_value=min_value, max_value=max_value)
364
365
366@cache
367def _valid_key_cacheable(tzpath, key):
368 assert isinstance(tzpath, tuple) # zoneinfo changed, better update this function!
369 for root in tzpath:
370 if Path(root).joinpath(key).exists(): # pragma: no branch
371 # No branch because most systems only have one TZPATH component.
372 return True
373 else: # pragma: no cover
374 # This branch is only taken for names which are known to zoneinfo
375 # but not present on the filesystem, i.e. on Windows with tzdata,
376 # and so is never executed by our coverage tests.
377 *package_loc, resource_name = key.split("/")
378 package = "tzdata.zoneinfo." + ".".join(package_loc)
379 try:
380 return (resources.files(package) / resource_name).exists()
381 except ModuleNotFoundError:
382 return False
383
384
385@defines_strategy(force_reusable_values=True)
386def timezone_keys(
387 *,
388 # allow_alias: bool = True,
389 # allow_deprecated: bool = True,
390 allow_prefix: bool = True,
391) -> SearchStrategy[str]:
392 """A strategy for :wikipedia:`IANA timezone names <List_of_tz_database_time_zones>`.
393
394 As well as timezone names like ``"UTC"``, ``"Australia/Sydney"``, or
395 ``"America/New_York"``, this strategy can generate:
396
397 - Aliases such as ``"Antarctica/McMurdo"``, which links to ``"Pacific/Auckland"``.
398 - Deprecated names such as ``"Antarctica/South_Pole"``, which *also* links to
399 ``"Pacific/Auckland"``. Note that most but
400 not all deprecated timezone names are also aliases.
401 - Timezone names with the ``"posix/"`` or ``"right/"`` prefixes, unless
402 ``allow_prefix=False``.
403
404 These strings are provided separately from Tzinfo objects - such as ZoneInfo
405 instances from the timezones() strategy - to facilitate testing of timezone
406 logic without needing workarounds to access non-canonical names.
407
408 .. note::
409
410 `The tzdata package is required on Windows
411 <https://docs.python.org/3/library/zoneinfo.html#data-sources>`__.
412 ``pip install hypothesis[zoneinfo]`` installs it, if and only if needed.
413
414 On Windows, you may need to access IANA timezone data via the :pypi:`tzdata`
415 package. For non-IANA timezones, such as Windows-native names or GNU TZ
416 strings, we recommend using :func:`~hypothesis.strategies.sampled_from` with
417 the :pypi:`dateutil <python-dateutil>` package, e.g.
418 :meth:`dateutil:dateutil.tz.tzwin.list`.
419 """
420 # check_type(bool, allow_alias, "allow_alias")
421 # check_type(bool, allow_deprecated, "allow_deprecated")
422 check_type(bool, allow_prefix, "allow_prefix")
423
424 available_timezones = ("UTC", *sorted(zoneinfo.available_timezones()))
425
426 # TODO: filter out alias and deprecated names if disallowed
427
428 # When prefixes are allowed, we first choose a key and then flatmap to get our
429 # choice with one of the available prefixes. That in turn means that we need
430 # some logic to determine which prefixes are available for a given key:
431
432 def valid_key(key):
433 return key == "UTC" or _valid_key_cacheable(zoneinfo.TZPATH, key)
434
435 # TODO: work out how to place a higher priority on "weird" timezones
436 # For details see https://github.com/HypothesisWorks/hypothesis/issues/2414
437 strategy = sampled_from([key for key in available_timezones if valid_key(key)])
438
439 if not allow_prefix:
440 return strategy
441
442 def sample_with_prefixes(zone):
443 keys_with_prefixes = (zone, f"posix/{zone}", f"right/{zone}")
444 return sampled_from([key for key in keys_with_prefixes if valid_key(key)])
445
446 return strategy.flatmap(sample_with_prefixes)
447
448
449@defines_strategy(force_reusable_values=True)
450def timezones(*, no_cache: bool = False) -> SearchStrategy["zoneinfo.ZoneInfo"]:
451 """A strategy for :class:`python:zoneinfo.ZoneInfo` objects.
452
453 If ``no_cache=True``, the generated instances are constructed using
454 :meth:`ZoneInfo.no_cache <python:zoneinfo.ZoneInfo.no_cache>` instead
455 of the usual constructor. This may change the semantics of your datetimes
456 in surprising ways, so only use it if you know that you need to!
457
458 .. note::
459
460 `The tzdata package is required on Windows
461 <https://docs.python.org/3/library/zoneinfo.html#data-sources>`__.
462 ``pip install hypothesis[zoneinfo]`` installs it, if and only if needed.
463 """
464 check_type(bool, no_cache, "no_cache")
465 return timezone_keys().map(
466 zoneinfo.ZoneInfo.no_cache if no_cache else zoneinfo.ZoneInfo
467 )