1# This file is part of Hypothesis, which may be found at
2# https://github.com/HypothesisWorks/hypothesis/
3#
4# Copyright the Hypothesis Authors.
5# Individual contributors are listed in AUTHORS.rst and the git log.
6#
7# This Source Code Form is subject to the terms of the Mozilla Public License,
8# v. 2.0. If a copy of the MPL was not distributed with this file, You can
9# obtain one at https://mozilla.org/MPL/2.0/.
10
11import datetime as dt
12from calendar import monthrange
13from functools import lru_cache
14from importlib import resources
15from pathlib import Path
16from typing import Optional
17
18from hypothesis.errors import InvalidArgument
19from hypothesis.internal.validation import check_type, check_valid_interval
20from hypothesis.strategies._internal.core import sampled_from
21from hypothesis.strategies._internal.misc import just, none
22from hypothesis.strategies._internal.strategies import SearchStrategy
23from hypothesis.strategies._internal.utils import defines_strategy
24
25# The zoneinfo module, required for the timezones() and timezone_keys()
26# strategies, is new in Python 3.9 and the backport might be missing.
27try:
28 import zoneinfo
29except ImportError:
30 try:
31 from backports import zoneinfo # type: ignore
32 except ImportError:
33 # We raise an error recommending `pip install hypothesis[zoneinfo]`
34 # when timezones() or timezone_keys() strategies are actually used.
35 zoneinfo = None # type: ignore
36
37DATENAMES = ("year", "month", "day")
38TIMENAMES = ("hour", "minute", "second", "microsecond")
39
40
41def is_pytz_timezone(tz):
42 if not isinstance(tz, dt.tzinfo):
43 return False
44 module = type(tz).__module__
45 return module == "pytz" or module.startswith("pytz.")
46
47
48def replace_tzinfo(value, timezone):
49 if is_pytz_timezone(timezone):
50 # Pytz timezones are a little complicated, and using the .replace method
51 # can cause some weird issues, so we use their special "localize" instead.
52 #
53 # We use the fold attribute as a convenient boolean for is_dst, even though
54 # they're semantically distinct. For ambiguous or imaginary hours, fold says
55 # whether you should use the offset that applies before the gap (fold=0) or
56 # the offset that applies after the gap (fold=1). is_dst says whether you
57 # should choose the side that is "DST" or "STD" (STD->STD or DST->DST
58 # transitions are unclear as you might expect).
59 #
60 # WARNING: this is INCORRECT for timezones with negative DST offsets such as
61 # "Europe/Dublin", but it's unclear what we could do instead beyond
62 # documenting the problem and recommending use of `dateutil` instead.
63 return timezone.localize(value, is_dst=not value.fold)
64 return value.replace(tzinfo=timezone)
65
66
67def datetime_does_not_exist(value):
68 """This function tests whether the given datetime can be round-tripped to and
69 from UTC. It is an exact inverse of (and very similar to) the dateutil method
70 https://dateutil.readthedocs.io/en/stable/tz.html#dateutil.tz.datetime_exists
71 """
72 # Naive datetimes cannot be imaginary, but we need this special case because
73 # chaining .astimezone() ends with *the system local timezone*, not None.
74 # See bug report in https://github.com/HypothesisWorks/hypothesis/issues/2662
75 if value.tzinfo is None:
76 return False
77 try:
78 # Does the naive portion of the datetime change when round-tripped to
79 # UTC? If so, or if this overflows, we say that it does not exist.
80 roundtrip = value.astimezone(dt.timezone.utc).astimezone(value.tzinfo)
81 except OverflowError:
82 # Overflows at datetime.min or datetime.max boundary condition.
83 # Rejecting these is acceptable, because timezones are close to
84 # meaningless before ~1900 and subject to a lot of change by
85 # 9999, so it should be a very small fraction of possible values.
86 return True
87
88 if (
89 value.tzinfo is not roundtrip.tzinfo
90 and value.utcoffset() != roundtrip.utcoffset()
91 ):
92 # This only ever occurs during imaginary (i.e. nonexistent) datetimes,
93 # and only for pytz timezones which do not follow PEP-495 semantics.
94 # (may exclude a few other edge cases, but you should use zoneinfo anyway)
95 return True
96
97 assert value.tzinfo is roundtrip.tzinfo, "so only the naive portions are compared"
98 return value != roundtrip
99
100
101def draw_capped_multipart(
102 data, min_value, max_value, duration_names=DATENAMES + TIMENAMES
103):
104 assert isinstance(min_value, (dt.date, dt.time, dt.datetime))
105 assert type(min_value) == type(max_value)
106 assert min_value <= max_value
107 result = {}
108 cap_low, cap_high = True, True
109 for name in duration_names:
110 low = getattr(min_value if cap_low else dt.datetime.min, name)
111 high = getattr(max_value if cap_high else dt.datetime.max, name)
112 if name == "day" and not cap_high:
113 _, high = monthrange(**result)
114 if name == "year":
115 val = data.draw_integer(low, high, shrink_towards=2000)
116 else:
117 val = data.draw_integer(low, high)
118 result[name] = val
119 cap_low = cap_low and val == low
120 cap_high = cap_high and val == high
121 if hasattr(min_value, "fold"):
122 # The `fold` attribute is ignored in comparison of naive datetimes.
123 # In tz-aware datetimes it would require *very* invasive changes to
124 # the logic above, and be very sensitive to the specific timezone
125 # (at the cost of efficient shrinking and mutation), so at least for
126 # now we stick with the status quo and generate it independently.
127 result["fold"] = data.draw_integer(0, 1)
128 return result
129
130
131class DatetimeStrategy(SearchStrategy):
132 def __init__(self, min_value, max_value, timezones_strat, allow_imaginary):
133 assert isinstance(min_value, dt.datetime)
134 assert isinstance(max_value, dt.datetime)
135 assert min_value.tzinfo is None
136 assert max_value.tzinfo is None
137 assert min_value <= max_value
138 assert isinstance(timezones_strat, SearchStrategy)
139 assert isinstance(allow_imaginary, bool)
140 self.min_value = min_value
141 self.max_value = max_value
142 self.tz_strat = timezones_strat
143 self.allow_imaginary = allow_imaginary
144
145 def do_draw(self, data):
146 # We start by drawing a timezone, and an initial datetime.
147 tz = data.draw(self.tz_strat)
148 result = self.draw_naive_datetime_and_combine(data, tz)
149
150 # TODO: with some probability, systematically search for one of
151 # - an imaginary time (if allowed),
152 # - a time within 24hrs of a leap second (if there any are within bounds),
153 # - other subtle, little-known, or nasty issues as described in
154 # https://github.com/HypothesisWorks/hypothesis/issues/69
155
156 # If we happened to end up with a disallowed imaginary time, reject it.
157 if (not self.allow_imaginary) and datetime_does_not_exist(result):
158 data.mark_invalid(f"{result} does not exist (usually a DST transition)")
159 return result
160
161 def draw_naive_datetime_and_combine(self, data, tz):
162 result = draw_capped_multipart(data, self.min_value, self.max_value)
163 try:
164 return replace_tzinfo(dt.datetime(**result), timezone=tz)
165 except (ValueError, OverflowError):
166 data.mark_invalid(
167 f"Failed to draw a datetime between {self.min_value!r} and "
168 f"{self.max_value!r} with timezone from {self.tz_strat!r}."
169 )
170
171
172@defines_strategy(force_reusable_values=True)
173def datetimes(
174 min_value: dt.datetime = dt.datetime.min,
175 max_value: dt.datetime = dt.datetime.max,
176 *,
177 timezones: SearchStrategy[Optional[dt.tzinfo]] = none(),
178 allow_imaginary: bool = True,
179) -> SearchStrategy[dt.datetime]:
180 """datetimes(min_value=datetime.datetime.min, max_value=datetime.datetime.max, *, timezones=none(), allow_imaginary=True)
181
182 A strategy for generating datetimes, which may be timezone-aware.
183
184 This strategy works by drawing a naive datetime between ``min_value``
185 and ``max_value``, which must both be naive (have no timezone).
186
187 ``timezones`` must be a strategy that generates either ``None``, for naive
188 datetimes, or :class:`~python:datetime.tzinfo` objects for 'aware' datetimes.
189 You can construct your own, though we recommend using one of these built-in
190 strategies:
191
192 * with Python 3.9 or newer or :pypi:`backports.zoneinfo`:
193 :func:`hypothesis.strategies.timezones`;
194 * with :pypi:`dateutil <python-dateutil>`:
195 :func:`hypothesis.extra.dateutil.timezones`; or
196 * with :pypi:`pytz`: :func:`hypothesis.extra.pytz.timezones`.
197
198 You may pass ``allow_imaginary=False`` to filter out "imaginary" datetimes
199 which did not (or will not) occur due to daylight savings, leap seconds,
200 timezone and calendar adjustments, etc. Imaginary datetimes are allowed
201 by default, because malformed timestamps are a common source of bugs.
202
203 Examples from this strategy shrink towards midnight on January 1st 2000,
204 local time.
205 """
206 # Why must bounds be naive? In principle, we could also write a strategy
207 # that took aware bounds, but the API and validation is much harder.
208 # If you want to generate datetimes between two particular moments in
209 # time I suggest (a) just filtering out-of-bounds values; (b) if bounds
210 # are very close, draw a value and subtract its UTC offset, handling
211 # overflows and nonexistent times; or (c) do something customised to
212 # handle datetimes in e.g. a four-microsecond span which is not
213 # representable in UTC. Handling (d), all of the above, leads to a much
214 # more complex API for all users and a useful feature for very few.
215 check_type(bool, allow_imaginary, "allow_imaginary")
216 check_type(dt.datetime, min_value, "min_value")
217 check_type(dt.datetime, max_value, "max_value")
218 if min_value.tzinfo is not None:
219 raise InvalidArgument(f"{min_value=} must not have tzinfo")
220 if max_value.tzinfo is not None:
221 raise InvalidArgument(f"{max_value=} must not have tzinfo")
222 check_valid_interval(min_value, max_value, "min_value", "max_value")
223 if not isinstance(timezones, SearchStrategy):
224 raise InvalidArgument(
225 f"{timezones=} must be a SearchStrategy that can "
226 "provide tzinfo for datetimes (either None or dt.tzinfo objects)"
227 )
228 return DatetimeStrategy(min_value, max_value, timezones, allow_imaginary)
229
230
231class TimeStrategy(SearchStrategy):
232 def __init__(self, min_value, max_value, timezones_strat):
233 self.min_value = min_value
234 self.max_value = max_value
235 self.tz_strat = timezones_strat
236
237 def do_draw(self, data):
238 result = draw_capped_multipart(data, self.min_value, self.max_value, TIMENAMES)
239 tz = data.draw(self.tz_strat)
240 return dt.time(**result, tzinfo=tz)
241
242
243@defines_strategy(force_reusable_values=True)
244def times(
245 min_value: dt.time = dt.time.min,
246 max_value: dt.time = dt.time.max,
247 *,
248 timezones: SearchStrategy[Optional[dt.tzinfo]] = none(),
249) -> SearchStrategy[dt.time]:
250 """times(min_value=datetime.time.min, max_value=datetime.time.max, *, timezones=none())
251
252 A strategy for times between ``min_value`` and ``max_value``.
253
254 The ``timezones`` argument is handled as for :py:func:`datetimes`.
255
256 Examples from this strategy shrink towards midnight, with the timezone
257 component shrinking as for the strategy that provided it.
258 """
259 check_type(dt.time, min_value, "min_value")
260 check_type(dt.time, max_value, "max_value")
261 if min_value.tzinfo is not None:
262 raise InvalidArgument(f"{min_value=} must not have tzinfo")
263 if max_value.tzinfo is not None:
264 raise InvalidArgument(f"{max_value=} must not have tzinfo")
265 check_valid_interval(min_value, max_value, "min_value", "max_value")
266 return TimeStrategy(min_value, max_value, timezones)
267
268
269class DateStrategy(SearchStrategy):
270 def __init__(self, min_value, max_value):
271 assert isinstance(min_value, dt.date)
272 assert isinstance(max_value, dt.date)
273 assert min_value < max_value
274 self.min_value = min_value
275 self.max_value = max_value
276
277 def do_draw(self, data):
278 return dt.date(
279 **draw_capped_multipart(data, self.min_value, self.max_value, DATENAMES)
280 )
281
282
283@defines_strategy(force_reusable_values=True)
284def dates(
285 min_value: dt.date = dt.date.min, max_value: dt.date = dt.date.max
286) -> SearchStrategy[dt.date]:
287 """dates(min_value=datetime.date.min, max_value=datetime.date.max)
288
289 A strategy for dates between ``min_value`` and ``max_value``.
290
291 Examples from this strategy shrink towards January 1st 2000.
292 """
293 check_type(dt.date, min_value, "min_value")
294 check_type(dt.date, max_value, "max_value")
295 check_valid_interval(min_value, max_value, "min_value", "max_value")
296 if min_value == max_value:
297 return just(min_value)
298 return DateStrategy(min_value, max_value)
299
300
301class TimedeltaStrategy(SearchStrategy):
302 def __init__(self, min_value, max_value):
303 assert isinstance(min_value, dt.timedelta)
304 assert isinstance(max_value, dt.timedelta)
305 assert min_value < max_value
306 self.min_value = min_value
307 self.max_value = max_value
308
309 def do_draw(self, data):
310 result = {}
311 low_bound = True
312 high_bound = True
313 for name in ("days", "seconds", "microseconds"):
314 low = getattr(self.min_value if low_bound else dt.timedelta.min, name)
315 high = getattr(self.max_value if high_bound else dt.timedelta.max, name)
316 val = data.draw_integer(low, high)
317 result[name] = val
318 low_bound = low_bound and val == low
319 high_bound = high_bound and val == high
320 return dt.timedelta(**result)
321
322
323@defines_strategy(force_reusable_values=True)
324def timedeltas(
325 min_value: dt.timedelta = dt.timedelta.min,
326 max_value: dt.timedelta = dt.timedelta.max,
327) -> SearchStrategy[dt.timedelta]:
328 """timedeltas(min_value=datetime.timedelta.min, max_value=datetime.timedelta.max)
329
330 A strategy for timedeltas between ``min_value`` and ``max_value``.
331
332 Examples from this strategy shrink towards zero.
333 """
334 check_type(dt.timedelta, min_value, "min_value")
335 check_type(dt.timedelta, max_value, "max_value")
336 check_valid_interval(min_value, max_value, "min_value", "max_value")
337 if min_value == max_value:
338 return just(min_value)
339 return TimedeltaStrategy(min_value=min_value, max_value=max_value)
340
341
342@lru_cache(maxsize=None)
343def _valid_key_cacheable(tzpath, key):
344 assert isinstance(tzpath, tuple) # zoneinfo changed, better update this function!
345 for root in tzpath:
346 if Path(root).joinpath(key).exists(): # pragma: no branch
347 # No branch because most systems only have one TZPATH component.
348 return True
349 else: # pragma: no cover
350 # This branch is only taken for names which are known to zoneinfo
351 # but not present on the filesystem, i.e. on Windows with tzdata,
352 # and so is never executed by our coverage tests.
353 *package_loc, resource_name = key.split("/")
354 package = "tzdata.zoneinfo." + ".".join(package_loc)
355 try:
356 try:
357 traversable = resources.files(package) / resource_name
358 return traversable.exists()
359 except (AttributeError, ValueError):
360 # .files() was added in Python 3.9
361 return resources.is_resource(package, resource_name)
362 except ModuleNotFoundError:
363 return False
364
365
366@defines_strategy(force_reusable_values=True)
367def timezone_keys(
368 *,
369 # allow_alias: bool = True,
370 # allow_deprecated: bool = True,
371 allow_prefix: bool = True,
372) -> SearchStrategy[str]:
373 """A strategy for :wikipedia:`IANA timezone names <List_of_tz_database_time_zones>`.
374
375 As well as timezone names like ``"UTC"``, ``"Australia/Sydney"``, or
376 ``"America/New_York"``, this strategy can generate:
377
378 - Aliases such as ``"Antarctica/McMurdo"``, which links to ``"Pacific/Auckland"``.
379 - Deprecated names such as ``"Antarctica/South_Pole"``, which *also* links to
380 ``"Pacific/Auckland"``. Note that most but
381 not all deprecated timezone names are also aliases.
382 - Timezone names with the ``"posix/"`` or ``"right/"`` prefixes, unless
383 ``allow_prefix=False``.
384
385 These strings are provided separately from Tzinfo objects - such as ZoneInfo
386 instances from the timezones() strategy - to facilitate testing of timezone
387 logic without needing workarounds to access non-canonical names.
388
389 .. note::
390
391 The :mod:`python:zoneinfo` module is new in Python 3.9, so you will need
392 to install the :pypi:`backports.zoneinfo` module on earlier versions.
393
394 `On Windows, you will also need to install the tzdata package
395 <https://docs.python.org/3/library/zoneinfo.html#data-sources>`__.
396
397 ``pip install hypothesis[zoneinfo]`` will install these conditional
398 dependencies if and only if they are needed.
399
400 On Windows, you may need to access IANA timezone data via the :pypi:`tzdata`
401 package. For non-IANA timezones, such as Windows-native names or GNU TZ
402 strings, we recommend using :func:`~hypothesis.strategies.sampled_from` with
403 the :pypi:`dateutil <python-dateutil>` package, e.g.
404 :meth:`dateutil:dateutil.tz.tzwin.list`.
405 """
406 # check_type(bool, allow_alias, "allow_alias")
407 # check_type(bool, allow_deprecated, "allow_deprecated")
408 check_type(bool, allow_prefix, "allow_prefix")
409 if zoneinfo is None: # pragma: no cover
410 raise ModuleNotFoundError(
411 "The zoneinfo module is required, but could not be imported. "
412 "Run `pip install hypothesis[zoneinfo]` and try again."
413 )
414
415 available_timezones = ("UTC", *sorted(zoneinfo.available_timezones()))
416
417 # TODO: filter out alias and deprecated names if disallowed
418
419 # When prefixes are allowed, we first choose a key and then flatmap to get our
420 # choice with one of the available prefixes. That in turn means that we need
421 # some logic to determine which prefixes are available for a given key:
422
423 def valid_key(key):
424 return key == "UTC" or _valid_key_cacheable(zoneinfo.TZPATH, key)
425
426 # TODO: work out how to place a higher priority on "weird" timezones
427 # For details see https://github.com/HypothesisWorks/hypothesis/issues/2414
428 strategy = sampled_from([key for key in available_timezones if valid_key(key)])
429
430 if not allow_prefix:
431 return strategy
432
433 def sample_with_prefixes(zone):
434 keys_with_prefixes = (zone, f"posix/{zone}", f"right/{zone}")
435 return sampled_from([key for key in keys_with_prefixes if valid_key(key)])
436
437 return strategy.flatmap(sample_with_prefixes)
438
439
440@defines_strategy(force_reusable_values=True)
441def timezones(*, no_cache: bool = False) -> SearchStrategy["zoneinfo.ZoneInfo"]:
442 """A strategy for :class:`python:zoneinfo.ZoneInfo` objects.
443
444 If ``no_cache=True``, the generated instances are constructed using
445 :meth:`ZoneInfo.no_cache <python:zoneinfo.ZoneInfo.no_cache>` instead
446 of the usual constructor. This may change the semantics of your datetimes
447 in surprising ways, so only use it if you know that you need to!
448
449 .. note::
450
451 The :mod:`python:zoneinfo` module is new in Python 3.9, so you will need
452 to install the :pypi:`backports.zoneinfo` module on earlier versions.
453
454 `On Windows, you will also need to install the tzdata package
455 <https://docs.python.org/3/library/zoneinfo.html#data-sources>`__.
456
457 ``pip install hypothesis[zoneinfo]`` will install these conditional
458 dependencies if and only if they are needed.
459 """
460 check_type(bool, no_cache, "no_cache")
461 if zoneinfo is None: # pragma: no cover
462 raise ModuleNotFoundError(
463 "The zoneinfo module is required, but could not be imported. "
464 "Run `pip install hypothesis[zoneinfo]` and try again."
465 )
466 return timezone_keys().map(
467 zoneinfo.ZoneInfo.no_cache if no_cache else zoneinfo.ZoneInfo
468 )