1# This file is part of Hypothesis, which may be found at
2# https://github.com/HypothesisWorks/hypothesis/
3#
4# Copyright the Hypothesis Authors.
5# Individual contributors are listed in AUTHORS.rst and the git log.
6#
7# This Source Code Form is subject to the terms of the Mozilla Public License,
8# v. 2.0. If a copy of the MPL was not distributed with this file, You can
9# obtain one at https://mozilla.org/MPL/2.0/.
10
11import datetime as dt
12import operator as op
13import zoneinfo
14from calendar import monthrange
15from functools import cache, partial
16from importlib import resources
17from pathlib import Path
18from typing import Optional
19
20from hypothesis.errors import InvalidArgument
21from hypothesis.internal.validation import check_type, check_valid_interval
22from hypothesis.strategies._internal.core import sampled_from
23from hypothesis.strategies._internal.misc import just, none, nothing
24from hypothesis.strategies._internal.strategies import SearchStrategy
25from hypothesis.strategies._internal.utils import defines_strategy
26
27DATENAMES = ("year", "month", "day")
28TIMENAMES = ("hour", "minute", "second", "microsecond")
29
30
31def is_pytz_timezone(tz):
32 if not isinstance(tz, dt.tzinfo):
33 return False
34 module = type(tz).__module__
35 return module == "pytz" or module.startswith("pytz.")
36
37
38def replace_tzinfo(value, timezone):
39 if is_pytz_timezone(timezone):
40 # Pytz timezones are a little complicated, and using the .replace method
41 # can cause some weird issues, so we use their special "localize" instead.
42 #
43 # We use the fold attribute as a convenient boolean for is_dst, even though
44 # they're semantically distinct. For ambiguous or imaginary hours, fold says
45 # whether you should use the offset that applies before the gap (fold=0) or
46 # the offset that applies after the gap (fold=1). is_dst says whether you
47 # should choose the side that is "DST" or "STD" (STD->STD or DST->DST
48 # transitions are unclear as you might expect).
49 #
50 # WARNING: this is INCORRECT for timezones with negative DST offsets such as
51 # "Europe/Dublin", but it's unclear what we could do instead beyond
52 # documenting the problem and recommending use of `dateutil` instead.
53 return timezone.localize(value, is_dst=not value.fold)
54 return value.replace(tzinfo=timezone)
55
56
57def datetime_does_not_exist(value):
58 """This function tests whether the given datetime can be round-tripped to and
59 from UTC. It is an exact inverse of (and very similar to) the dateutil method
60 https://dateutil.readthedocs.io/en/stable/tz.html#dateutil.tz.datetime_exists
61 """
62 # Naive datetimes cannot be imaginary, but we need this special case because
63 # chaining .astimezone() ends with *the system local timezone*, not None.
64 # See bug report in https://github.com/HypothesisWorks/hypothesis/issues/2662
65 if value.tzinfo is None:
66 return False
67 try:
68 # Does the naive portion of the datetime change when round-tripped to
69 # UTC? If so, or if this overflows, we say that it does not exist.
70 roundtrip = value.astimezone(dt.timezone.utc).astimezone(value.tzinfo)
71 except OverflowError:
72 # Overflows at datetime.min or datetime.max boundary condition.
73 # Rejecting these is acceptable, because timezones are close to
74 # meaningless before ~1900 and subject to a lot of change by
75 # 9999, so it should be a very small fraction of possible values.
76 return True
77
78 if (
79 value.tzinfo is not roundtrip.tzinfo
80 and value.utcoffset() != roundtrip.utcoffset()
81 ):
82 # This only ever occurs during imaginary (i.e. nonexistent) datetimes,
83 # and only for pytz timezones which do not follow PEP-495 semantics.
84 # (may exclude a few other edge cases, but you should use zoneinfo anyway)
85 return True
86
87 assert value.tzinfo is roundtrip.tzinfo, "so only the naive portions are compared"
88 return value != roundtrip
89
90
91def draw_capped_multipart(
92 data, min_value, max_value, duration_names=DATENAMES + TIMENAMES
93):
94 assert isinstance(min_value, (dt.date, dt.time, dt.datetime))
95 assert type(min_value) == type(max_value)
96 assert min_value <= max_value
97 result = {}
98 cap_low, cap_high = True, True
99 for name in duration_names:
100 low = getattr(min_value if cap_low else dt.datetime.min, name)
101 high = getattr(max_value if cap_high else dt.datetime.max, name)
102 if name == "day" and not cap_high:
103 _, high = monthrange(**result)
104 if name == "year":
105 val = data.draw_integer(low, high, shrink_towards=2000)
106 else:
107 val = data.draw_integer(low, high)
108 result[name] = val
109 cap_low = cap_low and val == low
110 cap_high = cap_high and val == high
111 if hasattr(min_value, "fold"):
112 # The `fold` attribute is ignored in comparison of naive datetimes.
113 # In tz-aware datetimes it would require *very* invasive changes to
114 # the logic above, and be very sensitive to the specific timezone
115 # (at the cost of efficient shrinking and mutation), so at least for
116 # now we stick with the status quo and generate it independently.
117 result["fold"] = data.draw_integer(0, 1)
118 return result
119
120
121class DatetimeStrategy(SearchStrategy):
122 def __init__(self, min_value, max_value, timezones_strat, allow_imaginary):
123 assert isinstance(min_value, dt.datetime)
124 assert isinstance(max_value, dt.datetime)
125 assert min_value.tzinfo is None
126 assert max_value.tzinfo is None
127 assert min_value <= max_value
128 assert isinstance(timezones_strat, SearchStrategy)
129 assert isinstance(allow_imaginary, bool)
130 self.min_value = min_value
131 self.max_value = max_value
132 self.tz_strat = timezones_strat
133 self.allow_imaginary = allow_imaginary
134
135 def do_draw(self, data):
136 # We start by drawing a timezone, and an initial datetime.
137 tz = data.draw(self.tz_strat)
138 result = self.draw_naive_datetime_and_combine(data, tz)
139
140 # TODO: with some probability, systematically search for one of
141 # - an imaginary time (if allowed),
142 # - a time within 24hrs of a leap second (if there any are within bounds),
143 # - other subtle, little-known, or nasty issues as described in
144 # https://github.com/HypothesisWorks/hypothesis/issues/69
145
146 # If we happened to end up with a disallowed imaginary time, reject it.
147 if (not self.allow_imaginary) and datetime_does_not_exist(result):
148 data.mark_invalid(f"{result} does not exist (usually a DST transition)")
149 return result
150
151 def draw_naive_datetime_and_combine(self, data, tz):
152 result = draw_capped_multipart(data, self.min_value, self.max_value)
153 try:
154 return replace_tzinfo(dt.datetime(**result), timezone=tz)
155 except (ValueError, OverflowError):
156 data.mark_invalid(
157 f"Failed to draw a datetime between {self.min_value!r} and "
158 f"{self.max_value!r} with timezone from {self.tz_strat!r}."
159 )
160
161
162@defines_strategy(force_reusable_values=True)
163def datetimes(
164 min_value: dt.datetime = dt.datetime.min,
165 max_value: dt.datetime = dt.datetime.max,
166 *,
167 timezones: SearchStrategy[Optional[dt.tzinfo]] = none(),
168 allow_imaginary: bool = True,
169) -> SearchStrategy[dt.datetime]:
170 """datetimes(min_value=datetime.datetime.min, max_value=datetime.datetime.max, *, timezones=none(), allow_imaginary=True)
171
172 A strategy for generating datetimes, which may be timezone-aware.
173
174 This strategy works by drawing a naive datetime between ``min_value``
175 and ``max_value``, which must both be naive (have no timezone).
176
177 ``timezones`` must be a strategy that generates either ``None``, for naive
178 datetimes, or :class:`~python:datetime.tzinfo` objects for 'aware' datetimes.
179 You can construct your own, though we recommend using one of these built-in
180 strategies:
181
182 * with the standard library: :func:`hypothesis.strategies.timezones`;
183 * with :pypi:`dateutil <python-dateutil>`:
184 :func:`hypothesis.extra.dateutil.timezones`; or
185 * with :pypi:`pytz`: :func:`hypothesis.extra.pytz.timezones`.
186
187 You may pass ``allow_imaginary=False`` to filter out "imaginary" datetimes
188 which did not (or will not) occur due to daylight savings, leap seconds,
189 timezone and calendar adjustments, etc. Imaginary datetimes are allowed
190 by default, because malformed timestamps are a common source of bugs.
191
192 Examples from this strategy shrink towards midnight on January 1st 2000,
193 local time.
194 """
195 # Why must bounds be naive? In principle, we could also write a strategy
196 # that took aware bounds, but the API and validation is much harder.
197 # If you want to generate datetimes between two particular moments in
198 # time I suggest (a) just filtering out-of-bounds values; (b) if bounds
199 # are very close, draw a value and subtract its UTC offset, handling
200 # overflows and nonexistent times; or (c) do something customised to
201 # handle datetimes in e.g. a four-microsecond span which is not
202 # representable in UTC. Handling (d), all of the above, leads to a much
203 # more complex API for all users and a useful feature for very few.
204 check_type(bool, allow_imaginary, "allow_imaginary")
205 check_type(dt.datetime, min_value, "min_value")
206 check_type(dt.datetime, max_value, "max_value")
207 if min_value.tzinfo is not None:
208 raise InvalidArgument(f"{min_value=} must not have tzinfo")
209 if max_value.tzinfo is not None:
210 raise InvalidArgument(f"{max_value=} must not have tzinfo")
211 check_valid_interval(min_value, max_value, "min_value", "max_value")
212 if not isinstance(timezones, SearchStrategy):
213 raise InvalidArgument(
214 f"{timezones=} must be a SearchStrategy that can "
215 "provide tzinfo for datetimes (either None or dt.tzinfo objects)"
216 )
217 return DatetimeStrategy(min_value, max_value, timezones, allow_imaginary)
218
219
220class TimeStrategy(SearchStrategy):
221 def __init__(self, min_value, max_value, timezones_strat):
222 self.min_value = min_value
223 self.max_value = max_value
224 self.tz_strat = timezones_strat
225
226 def do_draw(self, data):
227 result = draw_capped_multipart(data, self.min_value, self.max_value, TIMENAMES)
228 tz = data.draw(self.tz_strat)
229 return dt.time(**result, tzinfo=tz)
230
231
232@defines_strategy(force_reusable_values=True)
233def times(
234 min_value: dt.time = dt.time.min,
235 max_value: dt.time = dt.time.max,
236 *,
237 timezones: SearchStrategy[Optional[dt.tzinfo]] = none(),
238) -> SearchStrategy[dt.time]:
239 """times(min_value=datetime.time.min, max_value=datetime.time.max, *, timezones=none())
240
241 A strategy for times between ``min_value`` and ``max_value``.
242
243 The ``timezones`` argument is handled as for :py:func:`datetimes`.
244
245 Examples from this strategy shrink towards midnight, with the timezone
246 component shrinking as for the strategy that provided it.
247 """
248 check_type(dt.time, min_value, "min_value")
249 check_type(dt.time, max_value, "max_value")
250 if min_value.tzinfo is not None:
251 raise InvalidArgument(f"{min_value=} must not have tzinfo")
252 if max_value.tzinfo is not None:
253 raise InvalidArgument(f"{max_value=} must not have tzinfo")
254 check_valid_interval(min_value, max_value, "min_value", "max_value")
255 return TimeStrategy(min_value, max_value, timezones)
256
257
258class DateStrategy(SearchStrategy):
259 def __init__(self, min_value, max_value):
260 assert isinstance(min_value, dt.date)
261 assert isinstance(max_value, dt.date)
262 assert min_value < max_value
263 self.min_value = min_value
264 self.max_value = max_value
265
266 def do_draw(self, data):
267 return dt.date(
268 **draw_capped_multipart(data, self.min_value, self.max_value, DATENAMES)
269 )
270
271 def filter(self, condition):
272 if (
273 isinstance(condition, partial)
274 and len(args := condition.args) == 1
275 and not condition.keywords
276 and isinstance(arg := args[0], dt.date)
277 and condition.func in (op.lt, op.le, op.eq, op.ge, op.gt)
278 ):
279 try:
280 arg += dt.timedelta(days={op.lt: 1, op.gt: -1}.get(condition.func, 0))
281 except OverflowError: # gt date.max, or lt date.min
282 return nothing()
283 lo, hi = {
284 # We're talking about op(arg, x) - the reverse of our usual intuition!
285 op.lt: (arg, self.max_value), # lambda x: arg < x
286 op.le: (arg, self.max_value), # lambda x: arg <= x
287 op.eq: (arg, arg), # lambda x: arg == x
288 op.ge: (self.min_value, arg), # lambda x: arg >= x
289 op.gt: (self.min_value, arg), # lambda x: arg > x
290 }[condition.func]
291 lo = max(lo, self.min_value)
292 hi = min(hi, self.max_value)
293 print(lo, hi)
294 if hi < lo:
295 return nothing()
296 if lo <= self.min_value and self.max_value <= hi:
297 return self
298 return dates(lo, hi)
299
300 return super().filter(condition)
301
302
303@defines_strategy(force_reusable_values=True)
304def dates(
305 min_value: dt.date = dt.date.min, max_value: dt.date = dt.date.max
306) -> SearchStrategy[dt.date]:
307 """dates(min_value=datetime.date.min, max_value=datetime.date.max)
308
309 A strategy for dates between ``min_value`` and ``max_value``.
310
311 Examples from this strategy shrink towards January 1st 2000.
312 """
313 check_type(dt.date, min_value, "min_value")
314 check_type(dt.date, max_value, "max_value")
315 check_valid_interval(min_value, max_value, "min_value", "max_value")
316 if min_value == max_value:
317 return just(min_value)
318 return DateStrategy(min_value, max_value)
319
320
321class TimedeltaStrategy(SearchStrategy):
322 def __init__(self, min_value, max_value):
323 assert isinstance(min_value, dt.timedelta)
324 assert isinstance(max_value, dt.timedelta)
325 assert min_value < max_value
326 self.min_value = min_value
327 self.max_value = max_value
328
329 def do_draw(self, data):
330 result = {}
331 low_bound = True
332 high_bound = True
333 for name in ("days", "seconds", "microseconds"):
334 low = getattr(self.min_value if low_bound else dt.timedelta.min, name)
335 high = getattr(self.max_value if high_bound else dt.timedelta.max, name)
336 val = data.draw_integer(low, high)
337 result[name] = val
338 low_bound = low_bound and val == low
339 high_bound = high_bound and val == high
340 return dt.timedelta(**result)
341
342
343@defines_strategy(force_reusable_values=True)
344def timedeltas(
345 min_value: dt.timedelta = dt.timedelta.min,
346 max_value: dt.timedelta = dt.timedelta.max,
347) -> SearchStrategy[dt.timedelta]:
348 """timedeltas(min_value=datetime.timedelta.min, max_value=datetime.timedelta.max)
349
350 A strategy for timedeltas between ``min_value`` and ``max_value``.
351
352 Examples from this strategy shrink towards zero.
353 """
354 check_type(dt.timedelta, min_value, "min_value")
355 check_type(dt.timedelta, max_value, "max_value")
356 check_valid_interval(min_value, max_value, "min_value", "max_value")
357 if min_value == max_value:
358 return just(min_value)
359 return TimedeltaStrategy(min_value=min_value, max_value=max_value)
360
361
362@cache
363def _valid_key_cacheable(tzpath, key):
364 assert isinstance(tzpath, tuple) # zoneinfo changed, better update this function!
365 for root in tzpath:
366 if Path(root).joinpath(key).exists(): # pragma: no branch
367 # No branch because most systems only have one TZPATH component.
368 return True
369 else: # pragma: no cover
370 # This branch is only taken for names which are known to zoneinfo
371 # but not present on the filesystem, i.e. on Windows with tzdata,
372 # and so is never executed by our coverage tests.
373 *package_loc, resource_name = key.split("/")
374 package = "tzdata.zoneinfo." + ".".join(package_loc)
375 try:
376 return (resources.files(package) / resource_name).exists()
377 except ModuleNotFoundError:
378 return False
379
380
381@defines_strategy(force_reusable_values=True)
382def timezone_keys(
383 *,
384 # allow_alias: bool = True,
385 # allow_deprecated: bool = True,
386 allow_prefix: bool = True,
387) -> SearchStrategy[str]:
388 """A strategy for :wikipedia:`IANA timezone names <List_of_tz_database_time_zones>`.
389
390 As well as timezone names like ``"UTC"``, ``"Australia/Sydney"``, or
391 ``"America/New_York"``, this strategy can generate:
392
393 - Aliases such as ``"Antarctica/McMurdo"``, which links to ``"Pacific/Auckland"``.
394 - Deprecated names such as ``"Antarctica/South_Pole"``, which *also* links to
395 ``"Pacific/Auckland"``. Note that most but
396 not all deprecated timezone names are also aliases.
397 - Timezone names with the ``"posix/"`` or ``"right/"`` prefixes, unless
398 ``allow_prefix=False``.
399
400 These strings are provided separately from Tzinfo objects - such as ZoneInfo
401 instances from the timezones() strategy - to facilitate testing of timezone
402 logic without needing workarounds to access non-canonical names.
403
404 .. note::
405
406 `The tzdata package is required on Windows
407 <https://docs.python.org/3/library/zoneinfo.html#data-sources>`__.
408 ``pip install hypothesis[zoneinfo]`` installs it, if and only if needed.
409
410 On Windows, you may need to access IANA timezone data via the :pypi:`tzdata`
411 package. For non-IANA timezones, such as Windows-native names or GNU TZ
412 strings, we recommend using :func:`~hypothesis.strategies.sampled_from` with
413 the :pypi:`dateutil <python-dateutil>` package, e.g.
414 :meth:`dateutil:dateutil.tz.tzwin.list`.
415 """
416 # check_type(bool, allow_alias, "allow_alias")
417 # check_type(bool, allow_deprecated, "allow_deprecated")
418 check_type(bool, allow_prefix, "allow_prefix")
419
420 available_timezones = ("UTC", *sorted(zoneinfo.available_timezones()))
421
422 # TODO: filter out alias and deprecated names if disallowed
423
424 # When prefixes are allowed, we first choose a key and then flatmap to get our
425 # choice with one of the available prefixes. That in turn means that we need
426 # some logic to determine which prefixes are available for a given key:
427
428 def valid_key(key):
429 return key == "UTC" or _valid_key_cacheable(zoneinfo.TZPATH, key)
430
431 # TODO: work out how to place a higher priority on "weird" timezones
432 # For details see https://github.com/HypothesisWorks/hypothesis/issues/2414
433 strategy = sampled_from([key for key in available_timezones if valid_key(key)])
434
435 if not allow_prefix:
436 return strategy
437
438 def sample_with_prefixes(zone):
439 keys_with_prefixes = (zone, f"posix/{zone}", f"right/{zone}")
440 return sampled_from([key for key in keys_with_prefixes if valid_key(key)])
441
442 return strategy.flatmap(sample_with_prefixes)
443
444
445@defines_strategy(force_reusable_values=True)
446def timezones(*, no_cache: bool = False) -> SearchStrategy["zoneinfo.ZoneInfo"]:
447 """A strategy for :class:`python:zoneinfo.ZoneInfo` objects.
448
449 If ``no_cache=True``, the generated instances are constructed using
450 :meth:`ZoneInfo.no_cache <python:zoneinfo.ZoneInfo.no_cache>` instead
451 of the usual constructor. This may change the semantics of your datetimes
452 in surprising ways, so only use it if you know that you need to!
453
454 .. note::
455
456 `The tzdata package is required on Windows
457 <https://docs.python.org/3/library/zoneinfo.html#data-sources>`__.
458 ``pip install hypothesis[zoneinfo]`` installs it, if and only if needed.
459 """
460 check_type(bool, no_cache, "no_cache")
461 return timezone_keys().map(
462 zoneinfo.ZoneInfo.no_cache if no_cache else zoneinfo.ZoneInfo
463 )