Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/arrow/parser.py: 87%
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1"""Provides the :class:`Arrow <arrow.parser.DateTimeParser>` class, a better way to parse datetime strings."""
3import re
4from datetime import datetime, timedelta
5from datetime import tzinfo as dt_tzinfo
6from functools import lru_cache
7from typing import (
8 Any,
9 ClassVar,
10 Dict,
11 Iterable,
12 List,
13 Literal,
14 Match,
15 Optional,
16 Pattern,
17 SupportsFloat,
18 SupportsInt,
19 Tuple,
20 TypedDict,
21 Union,
22 cast,
23 overload,
24)
26from dateutil import tz
28from arrow import locales
29from arrow.constants import DEFAULT_LOCALE
30from arrow.util import next_weekday, normalize_timestamp
33class ParserError(ValueError):
34 """
35 A custom exception class for handling parsing errors in the parser.
37 Notes:
38 This class inherits from the built-in `ValueError` class and is used to raise exceptions
39 when an error occurs during the parsing process.
40 """
42 pass
45# Allows for ParserErrors to be propagated from _build_datetime()
46# when day_of_year errors occur.
47# Before this, the ParserErrors were caught by the try/except in
48# _parse_multiformat() and the appropriate error message was not
49# transmitted to the user.
50class ParserMatchError(ParserError):
51 """
52 This class is a subclass of the ParserError class and is used to raise errors that occur during the matching process.
54 Notes:
55 This class is part of the Arrow parser and is used to provide error handling when a parsing match fails.
57 """
59 pass
62_WEEKDATE_ELEMENT = Union[str, bytes, SupportsInt, bytearray]
64_FORMAT_TYPE = Literal[
65 "YYYY",
66 "YY",
67 "MM",
68 "M",
69 "DDDD",
70 "DDD",
71 "DD",
72 "D",
73 "HH",
74 "H",
75 "hh",
76 "h",
77 "mm",
78 "m",
79 "ss",
80 "s",
81 "X",
82 "x",
83 "ZZZ",
84 "ZZ",
85 "Z",
86 "S",
87 "W",
88 "MMMM",
89 "MMM",
90 "Do",
91 "dddd",
92 "ddd",
93 "d",
94 "a",
95 "A",
96]
99class _Parts(TypedDict, total=False):
100 """
101 A dictionary that represents different parts of a datetime.
103 :class:`_Parts` is a TypedDict that represents various components of a date or time,
104 such as year, month, day, hour, minute, second, microsecond, timestamp, expanded_timestamp, tzinfo,
105 am_pm, day_of_week, and weekdate.
107 :ivar year: The year, if present, as an integer.
108 :ivar month: The month, if present, as an integer.
109 :ivar day_of_year: The day of the year, if present, as an integer.
110 :ivar day: The day, if present, as an integer.
111 :ivar hour: The hour, if present, as an integer.
112 :ivar minute: The minute, if present, as an integer.
113 :ivar second: The second, if present, as an integer.
114 :ivar microsecond: The microsecond, if present, as an integer.
115 :ivar timestamp: The timestamp, if present, as a float.
116 :ivar expanded_timestamp: The expanded timestamp, if present, as an integer.
117 :ivar tzinfo: The timezone info, if present, as a :class:`dt_tzinfo` object.
118 :ivar am_pm: The AM/PM indicator, if present, as a string literal "am" or "pm".
119 :ivar day_of_week: The day of the week, if present, as an integer.
120 :ivar weekdate: The week date, if present, as a tuple of three integers or None.
121 """
123 year: int
124 month: int
125 day_of_year: int
126 day: int
127 hour: int
128 minute: int
129 second: int
130 microsecond: int
131 timestamp: float
132 expanded_timestamp: int
133 tzinfo: dt_tzinfo
134 am_pm: Literal["am", "pm"]
135 day_of_week: int
136 weekdate: Tuple[_WEEKDATE_ELEMENT, _WEEKDATE_ELEMENT, Optional[_WEEKDATE_ELEMENT]]
139class DateTimeParser:
140 """A :class:`DateTimeParser <arrow.arrow.parser>` object
142 Contains the regular expressions and functions to parse and split the input strings into tokens and eventually
143 produce a datetime that is used by :class:`Arrow <arrow.arrow.Arrow>` internally.
145 :param locale: the locale string
146 :param cache_size: the size of the LRU cache used for regular expressions. Defaults to 0.
148 """
150 _FORMAT_RE: ClassVar[Pattern[str]] = re.compile(
151 r"(YYY?Y?|MM?M?M?|Do|DD?D?D?|d?d?d?d|HH?|hh?|mm?|ss?|S+|ZZ?Z?|a|A|x|X|W)"
152 )
153 _ESCAPE_RE: ClassVar[Pattern[str]] = re.compile(r"\[[^\[\]]*\]")
155 _ONE_OR_TWO_DIGIT_RE: ClassVar[Pattern[str]] = re.compile(r"\d{1,2}")
156 _ONE_OR_TWO_OR_THREE_DIGIT_RE: ClassVar[Pattern[str]] = re.compile(r"\d{1,3}")
157 _ONE_OR_MORE_DIGIT_RE: ClassVar[Pattern[str]] = re.compile(r"\d+")
158 _TWO_DIGIT_RE: ClassVar[Pattern[str]] = re.compile(r"\d{2}")
159 _THREE_DIGIT_RE: ClassVar[Pattern[str]] = re.compile(r"\d{3}")
160 _FOUR_DIGIT_RE: ClassVar[Pattern[str]] = re.compile(r"\d{4}")
161 _TZ_Z_RE: ClassVar[Pattern[str]] = re.compile(r"([\+\-])(\d{2})(?:(\d{2}))?|Z")
162 _TZ_ZZ_RE: ClassVar[Pattern[str]] = re.compile(r"([\+\-])(\d{2})(?:\:(\d{2}))?|Z")
163 _TZ_NAME_RE: ClassVar[Pattern[str]] = re.compile(r"\w[\w+\-/]+")
164 # NOTE: timestamps cannot be parsed from natural language strings (by removing the ^...$) because it will
165 # break cases like "15 Jul 2000" and a format list (see issue #447)
166 _TIMESTAMP_RE: ClassVar[Pattern[str]] = re.compile(r"^\-?\d+\.?\d+$")
167 _TIMESTAMP_EXPANDED_RE: ClassVar[Pattern[str]] = re.compile(r"^\-?\d+$")
168 _TIME_RE: ClassVar[Pattern[str]] = re.compile(
169 r"^(\d{2})(?:\:?(\d{2}))?(?:\:?(\d{2}))?(?:([\.\,])(\d+))?$"
170 )
171 _WEEK_DATE_RE: ClassVar[Pattern[str]] = re.compile(
172 r"(?P<year>\d{4})[\-]?W(?P<week>\d{2})[\-]?(?P<day>\d)?"
173 )
175 _BASE_INPUT_RE_MAP: ClassVar[Dict[_FORMAT_TYPE, Pattern[str]]] = {
176 "YYYY": _FOUR_DIGIT_RE,
177 "YY": _TWO_DIGIT_RE,
178 "MM": _TWO_DIGIT_RE,
179 "M": _ONE_OR_TWO_DIGIT_RE,
180 "DDDD": _THREE_DIGIT_RE,
181 "DDD": _ONE_OR_TWO_OR_THREE_DIGIT_RE,
182 "DD": _TWO_DIGIT_RE,
183 "D": _ONE_OR_TWO_DIGIT_RE,
184 "HH": _TWO_DIGIT_RE,
185 "H": _ONE_OR_TWO_DIGIT_RE,
186 "hh": _TWO_DIGIT_RE,
187 "h": _ONE_OR_TWO_DIGIT_RE,
188 "mm": _TWO_DIGIT_RE,
189 "m": _ONE_OR_TWO_DIGIT_RE,
190 "ss": _TWO_DIGIT_RE,
191 "s": _ONE_OR_TWO_DIGIT_RE,
192 "X": _TIMESTAMP_RE,
193 "x": _TIMESTAMP_EXPANDED_RE,
194 "ZZZ": _TZ_NAME_RE,
195 "ZZ": _TZ_ZZ_RE,
196 "Z": _TZ_Z_RE,
197 "S": _ONE_OR_MORE_DIGIT_RE,
198 "W": _WEEK_DATE_RE,
199 }
201 SEPARATORS: ClassVar[List[str]] = ["-", "/", "."]
203 locale: locales.Locale
204 _input_re_map: Dict[_FORMAT_TYPE, Pattern[str]]
206 def __init__(self, locale: str = DEFAULT_LOCALE, cache_size: int = 0) -> None:
207 """
208 Contains the regular expressions and functions to parse and split the input strings into tokens and eventually
209 produce a datetime that is used by :class:`Arrow <arrow.arrow.Arrow>` internally.
211 :param locale: the locale string
212 :type locale: str
213 :param cache_size: the size of the LRU cache used for regular expressions. Defaults to 0.
214 :type cache_size: int
215 """
216 self.locale = locales.get_locale(locale)
217 self._input_re_map = self._BASE_INPUT_RE_MAP.copy()
218 self._input_re_map.update(
219 {
220 "MMMM": self._generate_choice_re(
221 self.locale.month_names[1:], re.IGNORECASE
222 ),
223 "MMM": self._generate_choice_re(
224 self.locale.month_abbreviations[1:], re.IGNORECASE
225 ),
226 "Do": re.compile(self.locale.ordinal_day_re),
227 "dddd": self._generate_choice_re(
228 self.locale.day_names[1:], re.IGNORECASE
229 ),
230 "ddd": self._generate_choice_re(
231 self.locale.day_abbreviations[1:], re.IGNORECASE
232 ),
233 "d": re.compile(r"[1-7]"),
234 "a": self._generate_choice_re(
235 (self.locale.meridians["am"], self.locale.meridians["pm"])
236 ),
237 # note: 'A' token accepts both 'am/pm' and 'AM/PM' formats to
238 # ensure backwards compatibility of this token
239 "A": self._generate_choice_re(self.locale.meridians.values()),
240 }
241 )
242 if cache_size > 0:
243 self._generate_pattern_re = lru_cache(maxsize=cache_size)( # type: ignore
244 self._generate_pattern_re
245 )
247 # TODO: since we support more than ISO 8601, we should rename this function
248 # IDEA: break into multiple functions
249 def parse_iso(
250 self, datetime_string: str, normalize_whitespace: bool = False
251 ) -> datetime:
252 """
253 Parses a datetime string using a ISO 8601-like format.
255 :param datetime_string: The datetime string to parse.
256 :param normalize_whitespace: Whether to normalize whitespace in the datetime string (default is False).
257 :type datetime_string: str
258 :type normalize_whitespace: bool
259 :returns: The parsed datetime object.
260 :rtype: datetime
261 :raises ParserError: If the datetime string is not in a valid ISO 8601-like format.
263 Usage::
264 >>> import arrow.parser
265 >>> arrow.parser.DateTimeParser().parse_iso('2021-10-12T14:30:00')
266 datetime.datetime(2021, 10, 12, 14, 30)
268 """
269 if normalize_whitespace:
270 datetime_string = re.sub(r"\s+", " ", datetime_string.strip())
272 has_space_divider = " " in datetime_string
273 has_t_divider = "T" in datetime_string
275 num_spaces = datetime_string.count(" ")
276 if has_space_divider and num_spaces != 1 or has_t_divider and num_spaces > 0:
277 raise ParserError(
278 f"Expected an ISO 8601-like string, but was given {datetime_string!r}. "
279 "Try passing in a format string to resolve this."
280 )
282 has_time = has_space_divider or has_t_divider
283 has_tz = False
285 # date formats (ISO 8601 and others) to test against
286 # NOTE: YYYYMM is omitted to avoid confusion with YYMMDD (no longer part of ISO 8601, but is still often used)
287 formats = [
288 "YYYY-MM-DD",
289 "YYYY-M-DD",
290 "YYYY-M-D",
291 "YYYY/MM/DD",
292 "YYYY/M/DD",
293 "YYYY/M/D",
294 "YYYY.MM.DD",
295 "YYYY.M.DD",
296 "YYYY.M.D",
297 "YYYYMMDD",
298 "YYYY-DDDD",
299 "YYYYDDDD",
300 "YYYY-MM",
301 "YYYY/MM",
302 "YYYY.MM",
303 "YYYY",
304 "W",
305 ]
307 if has_time:
308 if has_space_divider:
309 date_string, time_string = datetime_string.split(" ", 1)
310 else:
311 date_string, time_string = datetime_string.split("T", 1)
313 time_parts = re.split(
314 r"[\+\-Z]", time_string, maxsplit=1, flags=re.IGNORECASE
315 )
317 time_components: Optional[Match[str]] = self._TIME_RE.match(time_parts[0])
319 if time_components is None:
320 raise ParserError(
321 "Invalid time component provided. "
322 "Please specify a format or provide a valid time component in the basic or extended ISO 8601 time format."
323 )
325 (
326 hours,
327 minutes,
328 seconds,
329 subseconds_sep,
330 subseconds,
331 ) = time_components.groups()
333 has_tz = len(time_parts) == 2
334 has_minutes = minutes is not None
335 has_seconds = seconds is not None
336 has_subseconds = subseconds is not None
338 is_basic_time_format = ":" not in time_parts[0]
339 tz_format = "Z"
341 # use 'ZZ' token instead since tz offset is present in non-basic format
342 if has_tz and ":" in time_parts[1]:
343 tz_format = "ZZ"
345 time_sep = "" if is_basic_time_format else ":"
347 if has_subseconds:
348 time_string = "HH{time_sep}mm{time_sep}ss{subseconds_sep}S".format(
349 time_sep=time_sep, subseconds_sep=subseconds_sep
350 )
351 elif has_seconds:
352 time_string = "HH{time_sep}mm{time_sep}ss".format(time_sep=time_sep)
353 elif has_minutes:
354 time_string = f"HH{time_sep}mm"
355 else:
356 time_string = "HH"
358 if has_space_divider:
359 formats = [f"{f} {time_string}" for f in formats]
360 else:
361 formats = [f"{f}T{time_string}" for f in formats]
363 if has_time and has_tz:
364 # Add "Z" or "ZZ" to the format strings to indicate to
365 # _parse_token() that a timezone needs to be parsed
366 formats = [f"{f}{tz_format}" for f in formats]
368 return self._parse_multiformat(datetime_string, formats)
370 def parse(
371 self,
372 datetime_string: str,
373 fmt: Union[List[str], str],
374 normalize_whitespace: bool = False,
375 ) -> datetime:
376 """
377 Parses a datetime string using a specified format.
379 :param datetime_string: The datetime string to parse.
380 :param fmt: The format string or list of format strings to use for parsing.
381 :param normalize_whitespace: Whether to normalize whitespace in the datetime string (default is False).
382 :type datetime_string: str
383 :type fmt: Union[List[str], str]
384 :type normalize_whitespace: bool
385 :returns: The parsed datetime object.
386 :rtype: datetime
387 :raises ParserMatchError: If the datetime string does not match the specified format.
389 Usage::
391 >>> import arrow.parser
392 >>> arrow.parser.DateTimeParser().parse('2021-10-12 14:30:00', 'YYYY-MM-DD HH:mm:ss')
393 datetime.datetime(2021, 10, 12, 14, 30)
396 """
397 if normalize_whitespace:
398 datetime_string = re.sub(r"\s+", " ", datetime_string)
400 if isinstance(fmt, list):
401 return self._parse_multiformat(datetime_string, fmt)
403 try:
404 fmt_tokens: List[_FORMAT_TYPE]
405 fmt_pattern_re: Pattern[str]
406 fmt_tokens, fmt_pattern_re = self._generate_pattern_re(fmt)
407 except re.error as e:
408 raise ParserMatchError(
409 f"Failed to generate regular expression pattern: {e}."
410 )
412 match = fmt_pattern_re.search(datetime_string)
414 if match is None:
415 raise ParserMatchError(
416 f"Failed to match {fmt!r} when parsing {datetime_string!r}."
417 )
419 parts: _Parts = {}
420 for token in fmt_tokens:
421 value: Union[Tuple[str, str, str], str]
422 if token == "Do":
423 value = match.group("value")
424 elif token == "W":
425 value = (match.group("year"), match.group("week"), match.group("day"))
426 else:
427 value = match.group(token)
429 if value is None:
430 raise ParserMatchError(
431 f"Unable to find a match group for the specified token {token!r}."
432 )
434 self._parse_token(token, value, parts) # type: ignore[arg-type]
436 return self._build_datetime(parts)
438 def _generate_pattern_re(self, fmt: str) -> Tuple[List[_FORMAT_TYPE], Pattern[str]]:
439 """
440 Generates a regular expression pattern from a format string.
442 :param fmt: The format string to convert into a regular expression pattern.
443 :type fmt: str
444 :returns: A tuple containing a list of format tokens and the corresponding regular expression pattern.
445 :rtype: Tuple[List[_FORMAT_TYPE], Pattern[str]]
446 :raises ParserError: If an unrecognized token is encountered in the format string.
447 """
448 # fmt is a string of tokens like 'YYYY-MM-DD'
449 # we construct a new string by replacing each
450 # token by its pattern:
451 # 'YYYY-MM-DD' -> '(?P<YYYY>\d{4})-(?P<MM>\d{2})-(?P<DD>\d{2})'
452 tokens: List[_FORMAT_TYPE] = []
453 offset = 0
455 # Escape all special RegEx chars
456 escaped_fmt = re.escape(fmt)
458 # Extract the bracketed expressions to be reinserted later.
459 escaped_fmt = re.sub(self._ESCAPE_RE, "#", escaped_fmt)
461 # Any number of S is the same as one.
462 # TODO: allow users to specify the number of digits to parse
463 escaped_fmt = re.sub(r"S+", "S", escaped_fmt)
465 escaped_data = re.findall(self._ESCAPE_RE, fmt)
467 fmt_pattern = escaped_fmt
469 for m in self._FORMAT_RE.finditer(escaped_fmt):
470 token: _FORMAT_TYPE = cast(_FORMAT_TYPE, m.group(0))
471 try:
472 input_re = self._input_re_map[token]
473 except KeyError:
474 raise ParserError(f"Unrecognized token {token!r}.")
475 input_pattern = f"(?P<{token}>{input_re.pattern})"
476 tokens.append(token)
477 # a pattern doesn't have the same length as the token
478 # it replaces! We keep the difference in the offset variable.
479 # This works because the string is scanned left-to-right and matches
480 # are returned in the order found by finditer.
481 fmt_pattern = (
482 fmt_pattern[: m.start() + offset]
483 + input_pattern
484 + fmt_pattern[m.end() + offset :]
485 )
486 offset += len(input_pattern) - (m.end() - m.start())
488 final_fmt_pattern = ""
489 split_fmt = fmt_pattern.split(r"\#")
491 # Due to the way Python splits, 'split_fmt' will always be longer
492 for i in range(len(split_fmt)):
493 final_fmt_pattern += split_fmt[i]
494 if i < len(escaped_data):
495 final_fmt_pattern += escaped_data[i][1:-1]
497 # Wrap final_fmt_pattern in a custom word boundary to strictly
498 # match the formatting pattern and filter out date and time formats
499 # that include junk such as: blah1998-09-12 blah, blah 1998-09-12blah,
500 # blah1998-09-12blah. The custom word boundary matches every character
501 # that is not a whitespace character to allow for searching for a date
502 # and time string in a natural language sentence. Therefore, searching
503 # for a string of the form YYYY-MM-DD in "blah 1998-09-12 blah" will
504 # work properly.
505 # Certain punctuation before or after the target pattern such as
506 # "1998-09-12," is permitted. For the full list of valid punctuation,
507 # see the documentation.
509 starting_word_boundary = (
510 r"(?<!\S\S)" # Don't have two consecutive non-whitespace characters. This ensures that we allow cases
511 # like .11.25.2019 but not 1.11.25.2019 (for pattern MM.DD.YYYY)
512 r"(?<![^\,\.\;\:\?\!\"\'\`\[\]\{\}\(\)<>\s])" # This is the list of punctuation that is ok before the
513 # pattern (i.e. "It can't not be these characters before the pattern")
514 r"(\b|^)"
515 # The \b is to block cases like 1201912 but allow 201912 for pattern YYYYMM. The ^ was necessary to allow a
516 # negative number through i.e. before epoch numbers
517 )
518 ending_word_boundary = (
519 r"(?=[\,\.\;\:\?\!\"\'\`\[\]\{\}\(\)\<\>]?" # Positive lookahead stating that these punctuation marks
520 # can appear after the pattern at most 1 time
521 r"(?!\S))" # Don't allow any non-whitespace character after the punctuation
522 )
523 bounded_fmt_pattern = r"{}{}{}".format(
524 starting_word_boundary, final_fmt_pattern, ending_word_boundary
525 )
527 return tokens, re.compile(bounded_fmt_pattern, flags=re.IGNORECASE)
529 @overload
530 def _parse_token(
531 self,
532 token: Literal[
533 "YYYY",
534 "YY",
535 "MM",
536 "M",
537 "DDDD",
538 "DDD",
539 "DD",
540 "D",
541 "Do",
542 "HH",
543 "hh",
544 "h",
545 "H",
546 "mm",
547 "m",
548 "ss",
549 "s",
550 "x",
551 ],
552 value: Union[str, bytes, SupportsInt, bytearray],
553 parts: _Parts,
554 ) -> None:
555 ... # pragma: no cover
557 @overload
558 def _parse_token(
559 self,
560 token: Literal["X"],
561 value: Union[str, bytes, SupportsFloat, bytearray],
562 parts: _Parts,
563 ) -> None:
564 ... # pragma: no cover
566 @overload
567 def _parse_token(
568 self,
569 token: Literal["MMMM", "MMM", "dddd", "ddd", "S"],
570 value: Union[str, bytes, bytearray],
571 parts: _Parts,
572 ) -> None:
573 ... # pragma: no cover
575 @overload
576 def _parse_token(
577 self,
578 token: Literal["a", "A", "ZZZ", "ZZ", "Z"],
579 value: Union[str, bytes],
580 parts: _Parts,
581 ) -> None:
582 ... # pragma: no cover
584 @overload
585 def _parse_token(
586 self,
587 token: Literal["W"],
588 value: Tuple[_WEEKDATE_ELEMENT, _WEEKDATE_ELEMENT, Optional[_WEEKDATE_ELEMENT]],
589 parts: _Parts,
590 ) -> None:
591 ... # pragma: no cover
593 def _parse_token(
594 self,
595 token: Any,
596 value: Any,
597 parts: _Parts,
598 ) -> None:
599 """
600 Parse a token and its value, and update the `_Parts` dictionary with the parsed values.
602 The function supports several tokens, including "YYYY", "YY", "MMMM", "MMM", "MM", "M", "DDDD", "DDD", "DD", "D", "Do", "dddd", "ddd", "HH", "H", "mm", "m", "ss", "s", "S", "X", "x", "ZZZ", "ZZ", "Z", "a", "A", and "W". Each token is matched and the corresponding value is parsed and added to the `_Parts` dictionary.
604 :param token: The token to parse.
605 :type token: Any
606 :param value: The value of the token.
607 :type value: Any
608 :param parts: A dictionary to update with the parsed values.
609 :type parts: _Parts
610 :raises ParserMatchError: If the hour token value is not between 0 and 12 inclusive for tokens "a" or "A".
612 """
613 if token == "YYYY":
614 parts["year"] = int(value)
616 elif token == "YY":
617 value = int(value)
618 parts["year"] = 1900 + value if value > 68 else 2000 + value
620 elif token in ["MMMM", "MMM"]:
621 # FIXME: month_number() is nullable
622 parts["month"] = self.locale.month_number(value.lower()) # type: ignore[typeddict-item]
624 elif token in ["MM", "M"]:
625 parts["month"] = int(value)
627 elif token in ["DDDD", "DDD"]:
628 parts["day_of_year"] = int(value)
630 elif token in ["DD", "D"]:
631 parts["day"] = int(value)
633 elif token == "Do":
634 parts["day"] = int(value)
636 elif token == "dddd":
637 # locale day names are 1-indexed
638 day_of_week = [x.lower() for x in self.locale.day_names].index(
639 value.lower()
640 )
641 parts["day_of_week"] = day_of_week - 1
643 elif token == "ddd":
644 # locale day abbreviations are 1-indexed
645 day_of_week = [x.lower() for x in self.locale.day_abbreviations].index(
646 value.lower()
647 )
648 parts["day_of_week"] = day_of_week - 1
650 elif token.upper() in ["HH", "H"]:
651 parts["hour"] = int(value)
653 elif token in ["mm", "m"]:
654 parts["minute"] = int(value)
656 elif token in ["ss", "s"]:
657 parts["second"] = int(value)
659 elif token == "S":
660 # We have the *most significant* digits of an arbitrary-precision integer.
661 # We want the six most significant digits as an integer, rounded.
662 # IDEA: add nanosecond support somehow? Need datetime support for it first.
663 value = value.ljust(7, "0")
665 # floating-point (IEEE-754) defaults to half-to-even rounding
666 seventh_digit = int(value[6])
667 if seventh_digit == 5:
668 rounding = int(value[5]) % 2
669 elif seventh_digit > 5:
670 rounding = 1
671 else:
672 rounding = 0
674 parts["microsecond"] = int(value[:6]) + rounding
676 elif token == "X":
677 parts["timestamp"] = float(value)
679 elif token == "x":
680 parts["expanded_timestamp"] = int(value)
682 elif token in ["ZZZ", "ZZ", "Z"]:
683 parts["tzinfo"] = TzinfoParser.parse(value)
685 elif token in ["a", "A"]:
686 if value in (self.locale.meridians["am"], self.locale.meridians["AM"]):
687 parts["am_pm"] = "am"
688 if "hour" in parts and not 0 <= parts["hour"] <= 12:
689 raise ParserMatchError(
690 f"Hour token value must be between 0 and 12 inclusive for token {token!r}."
691 )
692 elif value in (self.locale.meridians["pm"], self.locale.meridians["PM"]):
693 parts["am_pm"] = "pm"
694 elif token == "W":
695 parts["weekdate"] = value
697 @staticmethod
698 def _build_datetime(parts: _Parts) -> datetime:
699 """
700 Build a datetime object from a dictionary of date parts.
702 :param parts: A dictionary containing the date parts extracted from a date string.
703 :type parts: dict
704 :return: A datetime object representing the date and time.
705 :rtype: datetime.datetime
706 """
707 weekdate = parts.get("weekdate")
709 if weekdate is not None:
710 year, week = int(weekdate[0]), int(weekdate[1])
712 if weekdate[2] is not None:
713 _day = int(weekdate[2])
714 else:
715 # day not given, default to 1
716 _day = 1
718 date_string = f"{year}-{week}-{_day}"
720 # tokens for ISO 8601 weekdates
721 dt = datetime.strptime(date_string, "%G-%V-%u")
723 parts["year"] = dt.year
724 parts["month"] = dt.month
725 parts["day"] = dt.day
727 timestamp = parts.get("timestamp")
729 if timestamp is not None:
730 return datetime.fromtimestamp(timestamp, tz=tz.tzutc())
732 expanded_timestamp = parts.get("expanded_timestamp")
734 if expanded_timestamp is not None:
735 return datetime.fromtimestamp(
736 normalize_timestamp(expanded_timestamp),
737 tz=tz.tzutc(),
738 )
740 day_of_year = parts.get("day_of_year")
742 if day_of_year is not None:
743 _year = parts.get("year")
744 month = parts.get("month")
745 if _year is None:
746 raise ParserError(
747 "Year component is required with the DDD and DDDD tokens."
748 )
750 if month is not None:
751 raise ParserError(
752 "Month component is not allowed with the DDD and DDDD tokens."
753 )
755 date_string = f"{_year}-{day_of_year}"
756 try:
757 dt = datetime.strptime(date_string, "%Y-%j")
758 except ValueError:
759 raise ParserError(
760 f"The provided day of year {day_of_year!r} is invalid."
761 )
763 parts["year"] = dt.year
764 parts["month"] = dt.month
765 parts["day"] = dt.day
767 day_of_week: Optional[int] = parts.get("day_of_week")
768 day = parts.get("day")
770 # If day is passed, ignore day of week
771 if day_of_week is not None and day is None:
772 year = parts.get("year", 1970)
773 month = parts.get("month", 1)
774 day = 1
776 # dddd => first day of week after epoch
777 # dddd YYYY => first day of week in specified year
778 # dddd MM YYYY => first day of week in specified year and month
779 # dddd MM => first day after epoch in specified month
780 next_weekday_dt = next_weekday(datetime(year, month, day), day_of_week)
781 parts["year"] = next_weekday_dt.year
782 parts["month"] = next_weekday_dt.month
783 parts["day"] = next_weekday_dt.day
785 am_pm = parts.get("am_pm")
786 hour = parts.get("hour", 0)
788 if am_pm == "pm" and hour < 12:
789 hour += 12
790 elif am_pm == "am" and hour == 12:
791 hour = 0
793 # Support for midnight at the end of day
794 if hour == 24:
795 if parts.get("minute", 0) != 0:
796 raise ParserError("Midnight at the end of day must not contain minutes")
797 if parts.get("second", 0) != 0:
798 raise ParserError("Midnight at the end of day must not contain seconds")
799 if parts.get("microsecond", 0) != 0:
800 raise ParserError(
801 "Midnight at the end of day must not contain microseconds"
802 )
803 hour = 0
804 day_increment = 1
805 else:
806 day_increment = 0
808 # account for rounding up to 1000000
809 microsecond = parts.get("microsecond", 0)
810 if microsecond == 1000000:
811 microsecond = 0
812 second_increment = 1
813 else:
814 second_increment = 0
816 increment = timedelta(days=day_increment, seconds=second_increment)
818 return (
819 datetime(
820 year=parts.get("year", 1),
821 month=parts.get("month", 1),
822 day=parts.get("day", 1),
823 hour=hour,
824 minute=parts.get("minute", 0),
825 second=parts.get("second", 0),
826 microsecond=microsecond,
827 tzinfo=parts.get("tzinfo"),
828 )
829 + increment
830 )
832 def _parse_multiformat(self, string: str, formats: Iterable[str]) -> datetime:
833 """
834 Parse a date and time string using multiple formats.
836 Tries to parse the provided string with each format in the given `formats`
837 iterable, returning the resulting `datetime` object if a match is found. If no
838 format matches the string, a `ParserError` is raised.
840 :param string: The date and time string to parse.
841 :type string: str
842 :param formats: An iterable of date and time format strings to try, in order.
843 :type formats: Iterable[str]
844 :returns: The parsed date and time.
845 :rtype: datetime.datetime
846 :raises ParserError: If no format matches the input string.
847 """
848 _datetime: Optional[datetime] = None
850 for fmt in formats:
851 try:
852 _datetime = self.parse(string, fmt)
853 break
854 except ParserMatchError:
855 pass
857 if _datetime is None:
858 supported_formats = ", ".join(formats)
859 raise ParserError(
860 f"Could not match input {string!r} to any of the following formats: {supported_formats}."
861 )
863 return _datetime
865 # generates a capture group of choices separated by an OR operator
866 @staticmethod
867 def _generate_choice_re(
868 choices: Iterable[str], flags: Union[int, re.RegexFlag] = 0
869 ) -> Pattern[str]:
870 """
871 Generate a regular expression pattern that matches a choice from an iterable.
873 Takes an iterable of strings (`choices`) and returns a compiled regular expression
874 pattern that matches any of the choices. The pattern is created by joining the
875 choices with the '|' (OR) operator, which matches any of the enclosed patterns.
877 :param choices: An iterable of strings to match.
878 :type choices: Iterable[str]
879 :param flags: Optional regular expression flags. Default is 0.
880 :type flags: Union[int, re.RegexFlag], optional
881 :returns: A compiled regular expression pattern that matches any of the choices.
882 :rtype: re.Pattern[str]
883 """
884 return re.compile(r"({})".format("|".join(choices)), flags=flags)
887class TzinfoParser:
888 """
889 Parser for timezone information.
890 """
892 _TZINFO_RE: ClassVar[Pattern[str]] = re.compile(
893 r"^(?:\(UTC)*([\+\-])?(\d{2})(?:\:?(\d{2}))?"
894 )
896 @classmethod
897 def parse(cls, tzinfo_string: str) -> dt_tzinfo:
898 """
899 Parse a timezone string and return a datetime timezone object.
901 :param tzinfo_string: The timezone string to parse.
902 :type tzinfo_string: str
903 :returns: The parsed datetime timezone object.
904 :rtype: datetime.timezone
905 :raises ParserError: If the timezone string cannot be parsed.
906 """
907 tzinfo: Optional[dt_tzinfo] = None
909 if tzinfo_string == "local":
910 tzinfo = tz.tzlocal()
912 elif tzinfo_string in ["utc", "UTC", "Z"]:
913 tzinfo = tz.tzutc()
915 else:
916 iso_match = cls._TZINFO_RE.match(tzinfo_string)
918 if iso_match:
919 sign: Optional[str]
920 hours: str
921 minutes: Union[str, int, None]
922 sign, hours, minutes = iso_match.groups()
923 seconds = int(hours) * 3600 + int(minutes or 0) * 60
925 if sign == "-":
926 seconds *= -1
928 tzinfo = tz.tzoffset(None, seconds)
930 else:
931 tzinfo = tz.gettz(tzinfo_string)
933 if tzinfo is None:
934 raise ParserError(f"Could not parse timezone expression {tzinfo_string!r}.")
936 return tzinfo