Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/arrow/parser.py: 87%
315 statements
« prev ^ index » next coverage.py v7.2.7, created at 2023-06-07 06:17 +0000
« prev ^ index » next coverage.py v7.2.7, created at 2023-06-07 06:17 +0000
1"""Provides the :class:`Arrow <arrow.parser.DateTimeParser>` class, a better way to parse datetime strings."""
3import re
4import sys
5from datetime import datetime, timedelta
6from datetime import tzinfo as dt_tzinfo
7from functools import lru_cache
8from typing import (
9 Any,
10 ClassVar,
11 Dict,
12 Iterable,
13 List,
14 Match,
15 Optional,
16 Pattern,
17 SupportsFloat,
18 SupportsInt,
19 Tuple,
20 Union,
21 cast,
22 overload,
23)
25from dateutil import tz
27from arrow import locales
28from arrow.constants import DEFAULT_LOCALE
29from arrow.util import next_weekday, normalize_timestamp
31if sys.version_info < (3, 8): # pragma: no cover
32 from typing_extensions import Literal, TypedDict
33else:
34 from typing import Literal, TypedDict # pragma: no cover
37class ParserError(ValueError):
38 pass
41# Allows for ParserErrors to be propagated from _build_datetime()
42# when day_of_year errors occur.
43# Before this, the ParserErrors were caught by the try/except in
44# _parse_multiformat() and the appropriate error message was not
45# transmitted to the user.
46class ParserMatchError(ParserError):
47 pass
50_WEEKDATE_ELEMENT = Union[str, bytes, SupportsInt, bytearray]
52_FORMAT_TYPE = Literal[
53 "YYYY",
54 "YY",
55 "MM",
56 "M",
57 "DDDD",
58 "DDD",
59 "DD",
60 "D",
61 "HH",
62 "H",
63 "hh",
64 "h",
65 "mm",
66 "m",
67 "ss",
68 "s",
69 "X",
70 "x",
71 "ZZZ",
72 "ZZ",
73 "Z",
74 "S",
75 "W",
76 "MMMM",
77 "MMM",
78 "Do",
79 "dddd",
80 "ddd",
81 "d",
82 "a",
83 "A",
84]
87class _Parts(TypedDict, total=False):
88 year: int
89 month: int
90 day_of_year: int
91 day: int
92 hour: int
93 minute: int
94 second: int
95 microsecond: int
96 timestamp: float
97 expanded_timestamp: int
98 tzinfo: dt_tzinfo
99 am_pm: Literal["am", "pm"]
100 day_of_week: int
101 weekdate: Tuple[_WEEKDATE_ELEMENT, _WEEKDATE_ELEMENT, Optional[_WEEKDATE_ELEMENT]]
104class DateTimeParser:
105 _FORMAT_RE: ClassVar[Pattern[str]] = re.compile(
106 r"(YYY?Y?|MM?M?M?|Do|DD?D?D?|d?d?d?d|HH?|hh?|mm?|ss?|S+|ZZ?Z?|a|A|x|X|W)"
107 )
108 _ESCAPE_RE: ClassVar[Pattern[str]] = re.compile(r"\[[^\[\]]*\]")
110 _ONE_OR_TWO_DIGIT_RE: ClassVar[Pattern[str]] = re.compile(r"\d{1,2}")
111 _ONE_OR_TWO_OR_THREE_DIGIT_RE: ClassVar[Pattern[str]] = re.compile(r"\d{1,3}")
112 _ONE_OR_MORE_DIGIT_RE: ClassVar[Pattern[str]] = re.compile(r"\d+")
113 _TWO_DIGIT_RE: ClassVar[Pattern[str]] = re.compile(r"\d{2}")
114 _THREE_DIGIT_RE: ClassVar[Pattern[str]] = re.compile(r"\d{3}")
115 _FOUR_DIGIT_RE: ClassVar[Pattern[str]] = re.compile(r"\d{4}")
116 _TZ_Z_RE: ClassVar[Pattern[str]] = re.compile(r"([\+\-])(\d{2})(?:(\d{2}))?|Z")
117 _TZ_ZZ_RE: ClassVar[Pattern[str]] = re.compile(r"([\+\-])(\d{2})(?:\:(\d{2}))?|Z")
118 _TZ_NAME_RE: ClassVar[Pattern[str]] = re.compile(r"\w[\w+\-/]+")
119 # NOTE: timestamps cannot be parsed from natural language strings (by removing the ^...$) because it will
120 # break cases like "15 Jul 2000" and a format list (see issue #447)
121 _TIMESTAMP_RE: ClassVar[Pattern[str]] = re.compile(r"^\-?\d+\.?\d+$")
122 _TIMESTAMP_EXPANDED_RE: ClassVar[Pattern[str]] = re.compile(r"^\-?\d+$")
123 _TIME_RE: ClassVar[Pattern[str]] = re.compile(
124 r"^(\d{2})(?:\:?(\d{2}))?(?:\:?(\d{2}))?(?:([\.\,])(\d+))?$"
125 )
126 _WEEK_DATE_RE: ClassVar[Pattern[str]] = re.compile(
127 r"(?P<year>\d{4})[\-]?W(?P<week>\d{2})[\-]?(?P<day>\d)?"
128 )
130 _BASE_INPUT_RE_MAP: ClassVar[Dict[_FORMAT_TYPE, Pattern[str]]] = {
131 "YYYY": _FOUR_DIGIT_RE,
132 "YY": _TWO_DIGIT_RE,
133 "MM": _TWO_DIGIT_RE,
134 "M": _ONE_OR_TWO_DIGIT_RE,
135 "DDDD": _THREE_DIGIT_RE,
136 "DDD": _ONE_OR_TWO_OR_THREE_DIGIT_RE,
137 "DD": _TWO_DIGIT_RE,
138 "D": _ONE_OR_TWO_DIGIT_RE,
139 "HH": _TWO_DIGIT_RE,
140 "H": _ONE_OR_TWO_DIGIT_RE,
141 "hh": _TWO_DIGIT_RE,
142 "h": _ONE_OR_TWO_DIGIT_RE,
143 "mm": _TWO_DIGIT_RE,
144 "m": _ONE_OR_TWO_DIGIT_RE,
145 "ss": _TWO_DIGIT_RE,
146 "s": _ONE_OR_TWO_DIGIT_RE,
147 "X": _TIMESTAMP_RE,
148 "x": _TIMESTAMP_EXPANDED_RE,
149 "ZZZ": _TZ_NAME_RE,
150 "ZZ": _TZ_ZZ_RE,
151 "Z": _TZ_Z_RE,
152 "S": _ONE_OR_MORE_DIGIT_RE,
153 "W": _WEEK_DATE_RE,
154 }
156 SEPARATORS: ClassVar[List[str]] = ["-", "/", "."]
158 locale: locales.Locale
159 _input_re_map: Dict[_FORMAT_TYPE, Pattern[str]]
161 def __init__(self, locale: str = DEFAULT_LOCALE, cache_size: int = 0) -> None:
163 self.locale = locales.get_locale(locale)
164 self._input_re_map = self._BASE_INPUT_RE_MAP.copy()
165 self._input_re_map.update(
166 {
167 "MMMM": self._generate_choice_re(
168 self.locale.month_names[1:], re.IGNORECASE
169 ),
170 "MMM": self._generate_choice_re(
171 self.locale.month_abbreviations[1:], re.IGNORECASE
172 ),
173 "Do": re.compile(self.locale.ordinal_day_re),
174 "dddd": self._generate_choice_re(
175 self.locale.day_names[1:], re.IGNORECASE
176 ),
177 "ddd": self._generate_choice_re(
178 self.locale.day_abbreviations[1:], re.IGNORECASE
179 ),
180 "d": re.compile(r"[1-7]"),
181 "a": self._generate_choice_re(
182 (self.locale.meridians["am"], self.locale.meridians["pm"])
183 ),
184 # note: 'A' token accepts both 'am/pm' and 'AM/PM' formats to
185 # ensure backwards compatibility of this token
186 "A": self._generate_choice_re(self.locale.meridians.values()),
187 }
188 )
189 if cache_size > 0:
190 self._generate_pattern_re = lru_cache(maxsize=cache_size)( # type: ignore[assignment]
191 self._generate_pattern_re
192 )
194 # TODO: since we support more than ISO 8601, we should rename this function
195 # IDEA: break into multiple functions
196 def parse_iso(
197 self, datetime_string: str, normalize_whitespace: bool = False
198 ) -> datetime:
200 if normalize_whitespace:
201 datetime_string = re.sub(r"\s+", " ", datetime_string.strip())
203 has_space_divider = " " in datetime_string
204 has_t_divider = "T" in datetime_string
206 num_spaces = datetime_string.count(" ")
207 if has_space_divider and num_spaces != 1 or has_t_divider and num_spaces > 0:
208 raise ParserError(
209 f"Expected an ISO 8601-like string, but was given {datetime_string!r}. "
210 "Try passing in a format string to resolve this."
211 )
213 has_time = has_space_divider or has_t_divider
214 has_tz = False
216 # date formats (ISO 8601 and others) to test against
217 # NOTE: YYYYMM is omitted to avoid confusion with YYMMDD (no longer part of ISO 8601, but is still often used)
218 formats = [
219 "YYYY-MM-DD",
220 "YYYY-M-DD",
221 "YYYY-M-D",
222 "YYYY/MM/DD",
223 "YYYY/M/DD",
224 "YYYY/M/D",
225 "YYYY.MM.DD",
226 "YYYY.M.DD",
227 "YYYY.M.D",
228 "YYYYMMDD",
229 "YYYY-DDDD",
230 "YYYYDDDD",
231 "YYYY-MM",
232 "YYYY/MM",
233 "YYYY.MM",
234 "YYYY",
235 "W",
236 ]
238 if has_time:
240 if has_space_divider:
241 date_string, time_string = datetime_string.split(" ", 1)
242 else:
243 date_string, time_string = datetime_string.split("T", 1)
245 time_parts = re.split(r"[\+\-Z]", time_string, 1, re.IGNORECASE)
247 time_components: Optional[Match[str]] = self._TIME_RE.match(time_parts[0])
249 if time_components is None:
250 raise ParserError(
251 "Invalid time component provided. "
252 "Please specify a format or provide a valid time component in the basic or extended ISO 8601 time format."
253 )
255 (
256 hours,
257 minutes,
258 seconds,
259 subseconds_sep,
260 subseconds,
261 ) = time_components.groups()
263 has_tz = len(time_parts) == 2
264 has_minutes = minutes is not None
265 has_seconds = seconds is not None
266 has_subseconds = subseconds is not None
268 is_basic_time_format = ":" not in time_parts[0]
269 tz_format = "Z"
271 # use 'ZZ' token instead since tz offset is present in non-basic format
272 if has_tz and ":" in time_parts[1]:
273 tz_format = "ZZ"
275 time_sep = "" if is_basic_time_format else ":"
277 if has_subseconds:
278 time_string = "HH{time_sep}mm{time_sep}ss{subseconds_sep}S".format(
279 time_sep=time_sep, subseconds_sep=subseconds_sep
280 )
281 elif has_seconds:
282 time_string = "HH{time_sep}mm{time_sep}ss".format(time_sep=time_sep)
283 elif has_minutes:
284 time_string = f"HH{time_sep}mm"
285 else:
286 time_string = "HH"
288 if has_space_divider:
289 formats = [f"{f} {time_string}" for f in formats]
290 else:
291 formats = [f"{f}T{time_string}" for f in formats]
293 if has_time and has_tz:
294 # Add "Z" or "ZZ" to the format strings to indicate to
295 # _parse_token() that a timezone needs to be parsed
296 formats = [f"{f}{tz_format}" for f in formats]
298 return self._parse_multiformat(datetime_string, formats)
300 def parse(
301 self,
302 datetime_string: str,
303 fmt: Union[List[str], str],
304 normalize_whitespace: bool = False,
305 ) -> datetime:
307 if normalize_whitespace:
308 datetime_string = re.sub(r"\s+", " ", datetime_string)
310 if isinstance(fmt, list):
311 return self._parse_multiformat(datetime_string, fmt)
313 try:
314 fmt_tokens: List[_FORMAT_TYPE]
315 fmt_pattern_re: Pattern[str]
316 fmt_tokens, fmt_pattern_re = self._generate_pattern_re(fmt)
317 except re.error as e:
318 raise ParserMatchError(
319 f"Failed to generate regular expression pattern: {e}."
320 )
322 match = fmt_pattern_re.search(datetime_string)
324 if match is None:
325 raise ParserMatchError(
326 f"Failed to match {fmt!r} when parsing {datetime_string!r}."
327 )
329 parts: _Parts = {}
330 for token in fmt_tokens:
331 value: Union[Tuple[str, str, str], str]
332 if token == "Do":
333 value = match.group("value")
334 elif token == "W":
335 value = (match.group("year"), match.group("week"), match.group("day"))
336 else:
337 value = match.group(token)
339 if value is None:
340 raise ParserMatchError(
341 f"Unable to find a match group for the specified token {token!r}."
342 )
344 self._parse_token(token, value, parts) # type: ignore[arg-type]
346 return self._build_datetime(parts)
348 def _generate_pattern_re(self, fmt: str) -> Tuple[List[_FORMAT_TYPE], Pattern[str]]:
350 # fmt is a string of tokens like 'YYYY-MM-DD'
351 # we construct a new string by replacing each
352 # token by its pattern:
353 # 'YYYY-MM-DD' -> '(?P<YYYY>\d{4})-(?P<MM>\d{2})-(?P<DD>\d{2})'
354 tokens: List[_FORMAT_TYPE] = []
355 offset = 0
357 # Escape all special RegEx chars
358 escaped_fmt = re.escape(fmt)
360 # Extract the bracketed expressions to be reinserted later.
361 escaped_fmt = re.sub(self._ESCAPE_RE, "#", escaped_fmt)
363 # Any number of S is the same as one.
364 # TODO: allow users to specify the number of digits to parse
365 escaped_fmt = re.sub(r"S+", "S", escaped_fmt)
367 escaped_data = re.findall(self._ESCAPE_RE, fmt)
369 fmt_pattern = escaped_fmt
371 for m in self._FORMAT_RE.finditer(escaped_fmt):
372 token: _FORMAT_TYPE = cast(_FORMAT_TYPE, m.group(0))
373 try:
374 input_re = self._input_re_map[token]
375 except KeyError:
376 raise ParserError(f"Unrecognized token {token!r}.")
377 input_pattern = f"(?P<{token}>{input_re.pattern})"
378 tokens.append(token)
379 # a pattern doesn't have the same length as the token
380 # it replaces! We keep the difference in the offset variable.
381 # This works because the string is scanned left-to-right and matches
382 # are returned in the order found by finditer.
383 fmt_pattern = (
384 fmt_pattern[: m.start() + offset]
385 + input_pattern
386 + fmt_pattern[m.end() + offset :]
387 )
388 offset += len(input_pattern) - (m.end() - m.start())
390 final_fmt_pattern = ""
391 split_fmt = fmt_pattern.split(r"\#")
393 # Due to the way Python splits, 'split_fmt' will always be longer
394 for i in range(len(split_fmt)):
395 final_fmt_pattern += split_fmt[i]
396 if i < len(escaped_data):
397 final_fmt_pattern += escaped_data[i][1:-1]
399 # Wrap final_fmt_pattern in a custom word boundary to strictly
400 # match the formatting pattern and filter out date and time formats
401 # that include junk such as: blah1998-09-12 blah, blah 1998-09-12blah,
402 # blah1998-09-12blah. The custom word boundary matches every character
403 # that is not a whitespace character to allow for searching for a date
404 # and time string in a natural language sentence. Therefore, searching
405 # for a string of the form YYYY-MM-DD in "blah 1998-09-12 blah" will
406 # work properly.
407 # Certain punctuation before or after the target pattern such as
408 # "1998-09-12," is permitted. For the full list of valid punctuation,
409 # see the documentation.
411 starting_word_boundary = (
412 r"(?<!\S\S)" # Don't have two consecutive non-whitespace characters. This ensures that we allow cases
413 # like .11.25.2019 but not 1.11.25.2019 (for pattern MM.DD.YYYY)
414 r"(?<![^\,\.\;\:\?\!\"\'\`\[\]\{\}\(\)<>\s])" # This is the list of punctuation that is ok before the
415 # pattern (i.e. "It can't not be these characters before the pattern")
416 r"(\b|^)"
417 # The \b is to block cases like 1201912 but allow 201912 for pattern YYYYMM. The ^ was necessary to allow a
418 # negative number through i.e. before epoch numbers
419 )
420 ending_word_boundary = (
421 r"(?=[\,\.\;\:\?\!\"\'\`\[\]\{\}\(\)\<\>]?" # Positive lookahead stating that these punctuation marks
422 # can appear after the pattern at most 1 time
423 r"(?!\S))" # Don't allow any non-whitespace character after the punctuation
424 )
425 bounded_fmt_pattern = r"{}{}{}".format(
426 starting_word_boundary, final_fmt_pattern, ending_word_boundary
427 )
429 return tokens, re.compile(bounded_fmt_pattern, flags=re.IGNORECASE)
431 @overload
432 def _parse_token(
433 self,
434 token: Literal[
435 "YYYY",
436 "YY",
437 "MM",
438 "M",
439 "DDDD",
440 "DDD",
441 "DD",
442 "D",
443 "Do",
444 "HH",
445 "hh",
446 "h",
447 "H",
448 "mm",
449 "m",
450 "ss",
451 "s",
452 "x",
453 ],
454 value: Union[str, bytes, SupportsInt, bytearray],
455 parts: _Parts,
456 ) -> None:
457 ... # pragma: no cover
459 @overload
460 def _parse_token(
461 self,
462 token: Literal["X"],
463 value: Union[str, bytes, SupportsFloat, bytearray],
464 parts: _Parts,
465 ) -> None:
466 ... # pragma: no cover
468 @overload
469 def _parse_token(
470 self,
471 token: Literal["MMMM", "MMM", "dddd", "ddd", "S"],
472 value: Union[str, bytes, bytearray],
473 parts: _Parts,
474 ) -> None:
475 ... # pragma: no cover
477 @overload
478 def _parse_token(
479 self,
480 token: Literal["a", "A", "ZZZ", "ZZ", "Z"],
481 value: Union[str, bytes],
482 parts: _Parts,
483 ) -> None:
484 ... # pragma: no cover
486 @overload
487 def _parse_token(
488 self,
489 token: Literal["W"],
490 value: Tuple[_WEEKDATE_ELEMENT, _WEEKDATE_ELEMENT, Optional[_WEEKDATE_ELEMENT]],
491 parts: _Parts,
492 ) -> None:
493 ... # pragma: no cover
495 def _parse_token(
496 self,
497 token: Any,
498 value: Any,
499 parts: _Parts,
500 ) -> None:
502 if token == "YYYY":
503 parts["year"] = int(value)
505 elif token == "YY":
506 value = int(value)
507 parts["year"] = 1900 + value if value > 68 else 2000 + value
509 elif token in ["MMMM", "MMM"]:
510 # FIXME: month_number() is nullable
511 parts["month"] = self.locale.month_number(value.lower()) # type: ignore[typeddict-item]
513 elif token in ["MM", "M"]:
514 parts["month"] = int(value)
516 elif token in ["DDDD", "DDD"]:
517 parts["day_of_year"] = int(value)
519 elif token in ["DD", "D"]:
520 parts["day"] = int(value)
522 elif token == "Do":
523 parts["day"] = int(value)
525 elif token == "dddd":
526 # locale day names are 1-indexed
527 day_of_week = [x.lower() for x in self.locale.day_names].index(
528 value.lower()
529 )
530 parts["day_of_week"] = day_of_week - 1
532 elif token == "ddd":
533 # locale day abbreviations are 1-indexed
534 day_of_week = [x.lower() for x in self.locale.day_abbreviations].index(
535 value.lower()
536 )
537 parts["day_of_week"] = day_of_week - 1
539 elif token.upper() in ["HH", "H"]:
540 parts["hour"] = int(value)
542 elif token in ["mm", "m"]:
543 parts["minute"] = int(value)
545 elif token in ["ss", "s"]:
546 parts["second"] = int(value)
548 elif token == "S":
549 # We have the *most significant* digits of an arbitrary-precision integer.
550 # We want the six most significant digits as an integer, rounded.
551 # IDEA: add nanosecond support somehow? Need datetime support for it first.
552 value = value.ljust(7, "0")
554 # floating-point (IEEE-754) defaults to half-to-even rounding
555 seventh_digit = int(value[6])
556 if seventh_digit == 5:
557 rounding = int(value[5]) % 2
558 elif seventh_digit > 5:
559 rounding = 1
560 else:
561 rounding = 0
563 parts["microsecond"] = int(value[:6]) + rounding
565 elif token == "X":
566 parts["timestamp"] = float(value)
568 elif token == "x":
569 parts["expanded_timestamp"] = int(value)
571 elif token in ["ZZZ", "ZZ", "Z"]:
572 parts["tzinfo"] = TzinfoParser.parse(value)
574 elif token in ["a", "A"]:
575 if value in (self.locale.meridians["am"], self.locale.meridians["AM"]):
576 parts["am_pm"] = "am"
577 if "hour" in parts and not 0 <= parts["hour"] <= 12:
578 raise ParserMatchError(
579 f"Hour token value must be between 0 and 12 inclusive for token {token!r}."
580 )
581 elif value in (self.locale.meridians["pm"], self.locale.meridians["PM"]):
582 parts["am_pm"] = "pm"
583 elif token == "W":
584 parts["weekdate"] = value
586 @staticmethod
587 def _build_datetime(parts: _Parts) -> datetime:
588 weekdate = parts.get("weekdate")
590 if weekdate is not None:
592 year, week = int(weekdate[0]), int(weekdate[1])
594 if weekdate[2] is not None:
595 _day = int(weekdate[2])
596 else:
597 # day not given, default to 1
598 _day = 1
600 date_string = f"{year}-{week}-{_day}"
602 # tokens for ISO 8601 weekdates
603 dt = datetime.strptime(date_string, "%G-%V-%u")
605 parts["year"] = dt.year
606 parts["month"] = dt.month
607 parts["day"] = dt.day
609 timestamp = parts.get("timestamp")
611 if timestamp is not None:
612 return datetime.fromtimestamp(timestamp, tz=tz.tzutc())
614 expanded_timestamp = parts.get("expanded_timestamp")
616 if expanded_timestamp is not None:
617 return datetime.fromtimestamp(
618 normalize_timestamp(expanded_timestamp),
619 tz=tz.tzutc(),
620 )
622 day_of_year = parts.get("day_of_year")
624 if day_of_year is not None:
625 _year = parts.get("year")
626 month = parts.get("month")
627 if _year is None:
628 raise ParserError(
629 "Year component is required with the DDD and DDDD tokens."
630 )
632 if month is not None:
633 raise ParserError(
634 "Month component is not allowed with the DDD and DDDD tokens."
635 )
637 date_string = f"{_year}-{day_of_year}"
638 try:
639 dt = datetime.strptime(date_string, "%Y-%j")
640 except ValueError:
641 raise ParserError(
642 f"The provided day of year {day_of_year!r} is invalid."
643 )
645 parts["year"] = dt.year
646 parts["month"] = dt.month
647 parts["day"] = dt.day
649 day_of_week: Optional[int] = parts.get("day_of_week")
650 day = parts.get("day")
652 # If day is passed, ignore day of week
653 if day_of_week is not None and day is None:
654 year = parts.get("year", 1970)
655 month = parts.get("month", 1)
656 day = 1
658 # dddd => first day of week after epoch
659 # dddd YYYY => first day of week in specified year
660 # dddd MM YYYY => first day of week in specified year and month
661 # dddd MM => first day after epoch in specified month
662 next_weekday_dt = next_weekday(datetime(year, month, day), day_of_week)
663 parts["year"] = next_weekday_dt.year
664 parts["month"] = next_weekday_dt.month
665 parts["day"] = next_weekday_dt.day
667 am_pm = parts.get("am_pm")
668 hour = parts.get("hour", 0)
670 if am_pm == "pm" and hour < 12:
671 hour += 12
672 elif am_pm == "am" and hour == 12:
673 hour = 0
675 # Support for midnight at the end of day
676 if hour == 24:
677 if parts.get("minute", 0) != 0:
678 raise ParserError("Midnight at the end of day must not contain minutes")
679 if parts.get("second", 0) != 0:
680 raise ParserError("Midnight at the end of day must not contain seconds")
681 if parts.get("microsecond", 0) != 0:
682 raise ParserError(
683 "Midnight at the end of day must not contain microseconds"
684 )
685 hour = 0
686 day_increment = 1
687 else:
688 day_increment = 0
690 # account for rounding up to 1000000
691 microsecond = parts.get("microsecond", 0)
692 if microsecond == 1000000:
693 microsecond = 0
694 second_increment = 1
695 else:
696 second_increment = 0
698 increment = timedelta(days=day_increment, seconds=second_increment)
700 return (
701 datetime(
702 year=parts.get("year", 1),
703 month=parts.get("month", 1),
704 day=parts.get("day", 1),
705 hour=hour,
706 minute=parts.get("minute", 0),
707 second=parts.get("second", 0),
708 microsecond=microsecond,
709 tzinfo=parts.get("tzinfo"),
710 )
711 + increment
712 )
714 def _parse_multiformat(self, string: str, formats: Iterable[str]) -> datetime:
716 _datetime: Optional[datetime] = None
718 for fmt in formats:
719 try:
720 _datetime = self.parse(string, fmt)
721 break
722 except ParserMatchError:
723 pass
725 if _datetime is None:
726 supported_formats = ", ".join(formats)
727 raise ParserError(
728 f"Could not match input {string!r} to any of the following formats: {supported_formats}."
729 )
731 return _datetime
733 # generates a capture group of choices separated by an OR operator
734 @staticmethod
735 def _generate_choice_re(
736 choices: Iterable[str], flags: Union[int, re.RegexFlag] = 0
737 ) -> Pattern[str]:
738 return re.compile(r"({})".format("|".join(choices)), flags=flags)
741class TzinfoParser:
742 _TZINFO_RE: ClassVar[Pattern[str]] = re.compile(
743 r"^(?:\(UTC)*([\+\-])?(\d{2})(?:\:?(\d{2}))?"
744 )
746 @classmethod
747 def parse(cls, tzinfo_string: str) -> dt_tzinfo:
749 tzinfo: Optional[dt_tzinfo] = None
751 if tzinfo_string == "local":
752 tzinfo = tz.tzlocal()
754 elif tzinfo_string in ["utc", "UTC", "Z"]:
755 tzinfo = tz.tzutc()
757 else:
759 iso_match = cls._TZINFO_RE.match(tzinfo_string)
761 if iso_match:
762 sign: Optional[str]
763 hours: str
764 minutes: Union[str, int, None]
765 sign, hours, minutes = iso_match.groups()
766 seconds = int(hours) * 3600 + int(minutes or 0) * 60
768 if sign == "-":
769 seconds *= -1
771 tzinfo = tz.tzoffset(None, seconds)
773 else:
774 tzinfo = tz.gettz(tzinfo_string)
776 if tzinfo is None:
777 raise ParserError(f"Could not parse timezone expression {tzinfo_string!r}.")
779 return tzinfo