1# This file is part of Hypothesis, which may be found at
2# https://github.com/HypothesisWorks/hypothesis/
3#
4# Copyright the Hypothesis Authors.
5# Individual contributors are listed in AUTHORS.rst and the git log.
6#
7# This Source Code Form is subject to the terms of the Mozilla Public License,
8# v. 2.0. If a copy of the MPL was not distributed with this file, You can
9# obtain one at https://mozilla.org/MPL/2.0/.
10
11import datetime as dt
12import operator as op
13import warnings
14import zoneinfo
15from functools import cache, partial
16from importlib import resources
17from pathlib import Path
18
19from hypothesis.errors import InvalidArgument
20from hypothesis.internal.validation import check_type, check_valid_interval
21from hypothesis.strategies._internal.core import sampled_from
22from hypothesis.strategies._internal.misc import just, none, nothing
23from hypothesis.strategies._internal.strategies import SearchStrategy
24from hypothesis.strategies._internal.utils import defines_strategy
25
26DATENAMES = ("year", "month", "day")
27TIMENAMES = ("hour", "minute", "second", "microsecond")
28
29
30def is_pytz_timezone(tz):
31 if not isinstance(tz, dt.tzinfo):
32 return False
33 module = type(tz).__module__
34 return module == "pytz" or module.startswith("pytz.")
35
36
37def replace_tzinfo(value, timezone):
38 if is_pytz_timezone(timezone):
39 # Pytz timezones are a little complicated, and using the .replace method
40 # can cause some weird issues, so we use their special "localize" instead.
41 #
42 # We use the fold attribute as a convenient boolean for is_dst, even though
43 # they're semantically distinct. For ambiguous or imaginary hours, fold says
44 # whether you should use the offset that applies before the gap (fold=0) or
45 # the offset that applies after the gap (fold=1). is_dst says whether you
46 # should choose the side that is "DST" or "STD" (STD->STD or DST->DST
47 # transitions are unclear as you might expect).
48 #
49 # WARNING: this is INCORRECT for timezones with negative DST offsets such as
50 # "Europe/Dublin", but it's unclear what we could do instead beyond
51 # documenting the problem and recommending use of `dateutil` instead.
52 return timezone.localize(value, is_dst=not value.fold)
53 return value.replace(tzinfo=timezone)
54
55
56def datetime_does_not_exist(value):
57 """This function tests whether the given datetime can be round-tripped to and
58 from UTC. It is an exact inverse of (and very similar to) the dateutil method
59 https://dateutil.readthedocs.io/en/stable/tz.html#dateutil.tz.datetime_exists
60 """
61 # Naive datetimes cannot be imaginary, but we need this special case because
62 # chaining .astimezone() ends with *the system local timezone*, not None.
63 # See bug report in https://github.com/HypothesisWorks/hypothesis/issues/2662
64 if value.tzinfo is None:
65 return False
66 try:
67 # Does the naive portion of the datetime change when round-tripped to
68 # UTC? If so, or if this overflows, we say that it does not exist.
69 roundtrip = value.astimezone(dt.timezone.utc).astimezone(value.tzinfo)
70 except OverflowError:
71 # Overflows at datetime.min or datetime.max boundary condition.
72 # Rejecting these is acceptable, because timezones are close to
73 # meaningless before ~1900 and subject to a lot of change by
74 # 9999, so it should be a very small fraction of possible values.
75 return True
76
77 if (
78 value.tzinfo is not roundtrip.tzinfo
79 and value.utcoffset() != roundtrip.utcoffset()
80 ):
81 # This only ever occurs during imaginary (i.e. nonexistent) datetimes,
82 # and only for pytz timezones which do not follow PEP-495 semantics.
83 # (may exclude a few other edge cases, but you should use zoneinfo anyway)
84 return True
85
86 assert value.tzinfo is roundtrip.tzinfo, "so only the naive portions are compared"
87 return value != roundtrip
88
89
90def _num_days_in_month(year, month):
91 """Branchless equivalent of ``monthrange(year, month)[1]`` for valid inputs.
92
93 Written using only arithmetic and (in)equality, with no branching or indexing.
94 This avoids concretizing the input or adding more path constraints than necessary.
95 """
96 leap = (year % 4 == 0) * (1 - (year % 100 == 0) * (year % 400 != 0))
97 is_feb = month == 2
98 is_30_day = 1 - (month != 4) * (month != 6) * (month != 9) * (month != 11)
99 return 31 - is_30_day - is_feb * (3 - leap)
100
101
102def draw_capped_multipart(
103 data, min_value, max_value, duration_names=DATENAMES + TIMENAMES
104):
105 assert isinstance(min_value, (dt.date, dt.time, dt.datetime))
106 assert type(min_value) == type(max_value)
107 assert min_value <= max_value
108
109 # cap_{low, high} records whether every field drawn so far has equalled
110 # ``min_value``'s / ``max_value``'s, i.e. whether that bound is still "active" and
111 # constrains the next field.
112 #
113 # cap_{low, high} are conceptually booleans. We define them as integers and interpret
114 # boolean operations on them as multiplication, so that we don't concretize or
115 # branch under symbolic backends. See
116 # https://github.com/HypothesisWorks/hypothesis/issues/4759.
117 cap_low = 1
118 cap_high = 1
119 result = {}
120 for name in duration_names:
121 natural_low = getattr(dt.datetime.min, name)
122 if name == "day":
123 natural_high = _num_days_in_month(result["year"], result["month"])
124 else:
125 natural_high = getattr(dt.datetime.max, name)
126 # equivalent to:
127 # low = min_value.<name> if cap_low else natural_low
128 # high = max_value.<name> if cap_high else natural_high
129 low = natural_low + cap_low * (getattr(min_value, name) - natural_low)
130 high = natural_high + cap_high * (getattr(max_value, name) - natural_high)
131 if name == "year":
132 val = data.draw_integer(low, high, shrink_towards=2000)
133 else:
134 val = data.draw_integer(low, high)
135 result[name] = val
136 cap_low = cap_low * (val == low)
137 cap_high = cap_high * (val == high)
138 if hasattr(min_value, "fold"):
139 # The `fold` attribute is ignored in comparison of naive datetimes.
140 # In tz-aware datetimes it would require *very* invasive changes to
141 # the logic above, and be very sensitive to the specific timezone
142 # (at the cost of efficient shrinking and mutation), so at least for
143 # now we stick with the status quo and generate it independently.
144 result["fold"] = data.draw_integer(0, 1)
145 return result
146
147
148class DatetimeStrategy(SearchStrategy):
149 def __init__(self, min_value, max_value, timezones_strat, allow_imaginary):
150 super().__init__()
151 assert isinstance(min_value, dt.datetime)
152 assert isinstance(max_value, dt.datetime)
153 assert min_value.tzinfo is None
154 assert max_value.tzinfo is None
155 assert min_value <= max_value
156 assert isinstance(timezones_strat, SearchStrategy)
157 assert isinstance(allow_imaginary, bool)
158 self.min_value = min_value
159 self.max_value = max_value
160 self.tz_strat = timezones_strat
161 self.allow_imaginary = allow_imaginary
162
163 def do_draw(self, data):
164 # We start by drawing a timezone, and an initial datetime.
165 tz = data.draw(self.tz_strat)
166 result = self.draw_naive_datetime_and_combine(data, tz)
167
168 # TODO: with some probability, systematically search for one of
169 # - an imaginary time (if allowed),
170 # - a time within 24hrs of a leap second (if there any are within bounds),
171 # - other subtle, little-known, or nasty issues as described in
172 # https://github.com/HypothesisWorks/hypothesis/issues/69
173
174 # If we happened to end up with a disallowed imaginary time, reject it.
175 if (not self.allow_imaginary) and datetime_does_not_exist(result):
176 data.mark_invalid(f"{result} does not exist (usually a DST transition)")
177 return result
178
179 def draw_naive_datetime_and_combine(self, data, tz):
180 result = draw_capped_multipart(data, self.min_value, self.max_value)
181 try:
182 return replace_tzinfo(dt.datetime(**result), timezone=tz)
183 except (ValueError, OverflowError):
184 data.mark_invalid(
185 f"Failed to draw a datetime between {self.min_value!r} and "
186 f"{self.max_value!r} with timezone from {self.tz_strat!r}."
187 )
188
189
190@defines_strategy(force_reusable_values=True)
191def datetimes(
192 min_value: dt.datetime = dt.datetime.min,
193 max_value: dt.datetime = dt.datetime.max,
194 *,
195 timezones: SearchStrategy[dt.tzinfo | None] = none(),
196 allow_imaginary: bool = True,
197) -> SearchStrategy[dt.datetime]:
198 """datetimes(min_value=datetime.datetime.min, max_value=datetime.datetime.max, *, timezones=none(), allow_imaginary=True)
199
200 A strategy for generating datetimes, which may be timezone-aware.
201
202 This strategy works by drawing a naive datetime between ``min_value``
203 and ``max_value``, which must both be naive (have no timezone).
204
205 ``timezones`` must be a strategy that generates either ``None``, for naive
206 datetimes, or :class:`~python:datetime.tzinfo` objects for 'aware' datetimes.
207 You can construct your own, though we recommend using one of these built-in
208 strategies:
209
210 * with the standard library: :func:`hypothesis.strategies.timezones`;
211 * with :pypi:`dateutil <python-dateutil>`:
212 :func:`hypothesis.extra.dateutil.timezones`; or
213 * with :pypi:`pytz`: :func:`hypothesis.extra.pytz.timezones`.
214
215 You may pass ``allow_imaginary=False`` to filter out "imaginary" datetimes
216 which did not (or will not) occur due to daylight savings, leap seconds,
217 timezone and calendar adjustments, etc. Imaginary datetimes are allowed
218 by default, because malformed timestamps are a common source of bugs.
219
220 Examples from this strategy shrink towards midnight on January 1st 2000,
221 local time.
222 """
223 # Why must bounds be naive? In principle, we could also write a strategy
224 # that took aware bounds, but the API and validation is much harder.
225 # If you want to generate datetimes between two particular moments in
226 # time I suggest (a) just filtering out-of-bounds values; (b) if bounds
227 # are very close, draw a value and subtract its UTC offset, handling
228 # overflows and nonexistent times; or (c) do something customised to
229 # handle datetimes in e.g. a four-microsecond span which is not
230 # representable in UTC. Handling (d), all of the above, leads to a much
231 # more complex API for all users and a useful feature for very few.
232 check_type(bool, allow_imaginary, "allow_imaginary")
233 check_type(dt.datetime, min_value, "min_value")
234 check_type(dt.datetime, max_value, "max_value")
235 if min_value.tzinfo is not None:
236 raise InvalidArgument(f"{min_value=} must not have tzinfo")
237 if max_value.tzinfo is not None:
238 raise InvalidArgument(f"{max_value=} must not have tzinfo")
239 check_valid_interval(min_value, max_value, "min_value", "max_value")
240 if not isinstance(timezones, SearchStrategy):
241 raise InvalidArgument(
242 f"{timezones=} must be a SearchStrategy that can "
243 "provide tzinfo for datetimes (either None or dt.tzinfo objects)"
244 )
245 return DatetimeStrategy(min_value, max_value, timezones, allow_imaginary)
246
247
248class TimeStrategy(SearchStrategy):
249 def __init__(self, min_value, max_value, timezones_strat):
250 super().__init__()
251 self.min_value = min_value
252 self.max_value = max_value
253 self.tz_strat = timezones_strat
254
255 def do_draw(self, data):
256 result = draw_capped_multipart(data, self.min_value, self.max_value, TIMENAMES)
257 tz = data.draw(self.tz_strat)
258 return dt.time(**result, tzinfo=tz)
259
260
261@defines_strategy(force_reusable_values=True)
262def times(
263 min_value: dt.time = dt.time.min,
264 max_value: dt.time = dt.time.max,
265 *,
266 timezones: SearchStrategy[dt.tzinfo | None] = none(),
267) -> SearchStrategy[dt.time]:
268 """times(min_value=datetime.time.min, max_value=datetime.time.max, *, timezones=none())
269
270 A strategy for times between ``min_value`` and ``max_value``.
271
272 The ``timezones`` argument is handled as for :py:func:`datetimes`.
273
274 Examples from this strategy shrink towards midnight, with the timezone
275 component shrinking as for the strategy that provided it.
276 """
277 check_type(dt.time, min_value, "min_value")
278 check_type(dt.time, max_value, "max_value")
279 if min_value.tzinfo is not None:
280 raise InvalidArgument(f"{min_value=} must not have tzinfo")
281 if max_value.tzinfo is not None:
282 raise InvalidArgument(f"{max_value=} must not have tzinfo")
283 check_valid_interval(min_value, max_value, "min_value", "max_value")
284 return TimeStrategy(min_value, max_value, timezones)
285
286
287class DateStrategy(SearchStrategy):
288 def __init__(self, min_value, max_value):
289 super().__init__()
290 assert isinstance(min_value, dt.date)
291 assert isinstance(max_value, dt.date)
292 assert min_value < max_value
293 self.min_value = min_value
294 self.max_value = max_value
295
296 def do_draw(self, data):
297 return dt.date(
298 **draw_capped_multipart(data, self.min_value, self.max_value, DATENAMES)
299 )
300
301 def filter(self, condition):
302 if (
303 isinstance(condition, partial)
304 and len(args := condition.args) == 1
305 and not condition.keywords
306 and isinstance(arg := args[0], dt.date)
307 and condition.func in (op.lt, op.le, op.eq, op.ge, op.gt)
308 ):
309 try:
310 arg += dt.timedelta(days={op.lt: 1, op.gt: -1}.get(condition.func, 0))
311 except OverflowError: # gt date.max, or lt date.min
312 return nothing()
313 lo, hi = {
314 # We're talking about op(arg, x) - the reverse of our usual intuition!
315 op.lt: (arg, self.max_value), # lambda x: arg < x
316 op.le: (arg, self.max_value), # lambda x: arg <= x
317 op.eq: (arg, arg), # lambda x: arg == x
318 op.ge: (self.min_value, arg), # lambda x: arg >= x
319 op.gt: (self.min_value, arg), # lambda x: arg > x
320 }[condition.func]
321 lo = max(lo, self.min_value)
322 hi = min(hi, self.max_value)
323 print(lo, hi)
324 if hi < lo:
325 return nothing()
326 if lo <= self.min_value and self.max_value <= hi:
327 return self
328 return dates(lo, hi)
329
330 return super().filter(condition)
331
332
333@defines_strategy(force_reusable_values=True)
334def dates(
335 min_value: dt.date = dt.date.min, max_value: dt.date = dt.date.max
336) -> SearchStrategy[dt.date]:
337 """dates(min_value=datetime.date.min, max_value=datetime.date.max)
338
339 A strategy for dates between ``min_value`` and ``max_value``.
340
341 Examples from this strategy shrink towards January 1st 2000.
342 """
343 check_type(dt.date, min_value, "min_value")
344 check_type(dt.date, max_value, "max_value")
345 check_valid_interval(min_value, max_value, "min_value", "max_value")
346 if min_value == max_value:
347 return just(min_value)
348 return DateStrategy(min_value, max_value)
349
350
351class TimedeltaStrategy(SearchStrategy):
352 def __init__(self, min_value, max_value):
353 super().__init__()
354 assert isinstance(min_value, dt.timedelta)
355 assert isinstance(max_value, dt.timedelta)
356 assert min_value < max_value
357 self.min_value = min_value
358 self.max_value = max_value
359
360 def do_draw(self, data):
361 result = {}
362 low_bound = True
363 high_bound = True
364 for name in ("days", "seconds", "microseconds"):
365 low = getattr(self.min_value if low_bound else dt.timedelta.min, name)
366 high = getattr(self.max_value if high_bound else dt.timedelta.max, name)
367 val = data.draw_integer(low, high)
368 result[name] = val
369 low_bound = low_bound and val == low
370 high_bound = high_bound and val == high
371 return dt.timedelta(**result)
372
373
374@defines_strategy(force_reusable_values=True)
375def timedeltas(
376 min_value: dt.timedelta = dt.timedelta.min,
377 max_value: dt.timedelta = dt.timedelta.max,
378) -> SearchStrategy[dt.timedelta]:
379 """timedeltas(min_value=datetime.timedelta.min, max_value=datetime.timedelta.max)
380
381 A strategy for timedeltas between ``min_value`` and ``max_value``.
382
383 Examples from this strategy shrink towards zero.
384 """
385 check_type(dt.timedelta, min_value, "min_value")
386 check_type(dt.timedelta, max_value, "max_value")
387 check_valid_interval(min_value, max_value, "min_value", "max_value")
388 if min_value == max_value:
389 return just(min_value)
390 return TimedeltaStrategy(min_value=min_value, max_value=max_value)
391
392
393@cache
394def _valid_key_cacheable(tzpath, key):
395 assert isinstance(tzpath, tuple) # zoneinfo changed, better update this function!
396 for root in tzpath:
397 if Path(root).joinpath(key).exists(): # pragma: no branch
398 # No branch because most systems only have one TZPATH component.
399 return True
400 else: # pragma: no cover
401 # This branch is only taken for names which are known to zoneinfo
402 # but not present on the filesystem, i.e. on Windows with tzdata,
403 # and so is never executed by our coverage tests.
404 *package_loc, resource_name = key.split("/")
405 package = "tzdata.zoneinfo." + ".".join(package_loc)
406 try:
407 return (resources.files(package) / resource_name).exists()
408 except ModuleNotFoundError:
409 return False
410
411
412@defines_strategy(force_reusable_values=True)
413def timezone_keys(
414 *,
415 # allow_alias: bool = True,
416 # allow_deprecated: bool = True,
417 allow_prefix: bool = True,
418) -> SearchStrategy[str]:
419 """A strategy for :wikipedia:`IANA timezone names <List_of_tz_database_time_zones>`.
420
421 As well as timezone names like ``"UTC"``, ``"Australia/Sydney"``, or
422 ``"America/New_York"``, this strategy can generate:
423
424 - Aliases such as ``"Antarctica/McMurdo"``, which links to ``"Pacific/Auckland"``.
425 - Deprecated names such as ``"Antarctica/South_Pole"``, which *also* links to
426 ``"Pacific/Auckland"``. Note that most but
427 not all deprecated timezone names are also aliases.
428 - Timezone names with the ``"posix/"`` or ``"right/"`` prefixes, unless
429 ``allow_prefix=False``.
430
431 These strings are provided separately from Tzinfo objects - such as ZoneInfo
432 instances from the timezones() strategy - to facilitate testing of timezone
433 logic without needing workarounds to access non-canonical names.
434
435 .. note::
436
437 `The tzdata package is required on Windows
438 <https://docs.python.org/3/library/zoneinfo.html#data-sources>`__.
439 ``pip install hypothesis[zoneinfo]`` installs it, if and only if needed.
440
441 On Windows, you may need to access IANA timezone data via the :pypi:`tzdata`
442 package. For non-IANA timezones, such as Windows-native names or GNU TZ
443 strings, we recommend using :func:`~hypothesis.strategies.sampled_from` with
444 the :pypi:`dateutil <python-dateutil>` package, e.g.
445 :meth:`dateutil:dateutil.tz.tzwin.list`.
446 """
447 # check_type(bool, allow_alias, "allow_alias")
448 # check_type(bool, allow_deprecated, "allow_deprecated")
449 check_type(bool, allow_prefix, "allow_prefix")
450
451 with warnings.catch_warnings():
452 try:
453 warnings.simplefilter("ignore", EncodingWarning)
454 except NameError: # pragma: no cover
455 pass
456 # On Python 3.12 (and others?), `available_timezones()` opens files
457 # without specifying an encoding - which our selftests make an error.
458 available_timezones = ("UTC", *sorted(zoneinfo.available_timezones()))
459
460 # TODO: filter out alias and deprecated names if disallowed
461
462 # When prefixes are allowed, we first choose a key and then flatmap to get our
463 # choice with one of the available prefixes. That in turn means that we need
464 # some logic to determine which prefixes are available for a given key:
465
466 def valid_key(key):
467 return key == "UTC" or _valid_key_cacheable(zoneinfo.TZPATH, key)
468
469 # TODO: work out how to place a higher priority on "weird" timezones
470 # For details see https://github.com/HypothesisWorks/hypothesis/issues/2414
471 strategy = sampled_from([key for key in available_timezones if valid_key(key)])
472
473 if not allow_prefix:
474 return strategy
475
476 def sample_with_prefixes(zone):
477 keys_with_prefixes = (zone, f"posix/{zone}", f"right/{zone}")
478 return sampled_from([key for key in keys_with_prefixes if valid_key(key)])
479
480 return strategy.flatmap(sample_with_prefixes)
481
482
483@defines_strategy(force_reusable_values=True)
484def timezones(*, no_cache: bool = False) -> SearchStrategy["zoneinfo.ZoneInfo"]:
485 """A strategy for :class:`python:zoneinfo.ZoneInfo` objects.
486
487 If ``no_cache=True``, the generated instances are constructed using
488 :meth:`ZoneInfo.no_cache <python:zoneinfo.ZoneInfo.no_cache>` instead
489 of the usual constructor. This may change the semantics of your datetimes
490 in surprising ways, so only use it if you know that you need to!
491
492 .. note::
493
494 `The tzdata package is required on Windows
495 <https://docs.python.org/3/library/zoneinfo.html#data-sources>`__.
496 ``pip install hypothesis[zoneinfo]`` installs it, if and only if needed.
497 """
498 check_type(bool, no_cache, "no_cache")
499 return timezone_keys().map(
500 zoneinfo.ZoneInfo.no_cache if no_cache else zoneinfo.ZoneInfo
501 )