Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/arrow/parser.py: 24%
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1"""Provides the :class:`Arrow <arrow.parser.DateTimeParser>` class, a better way to parse datetime strings."""
3import re
4import sys
5from datetime import datetime, timedelta
6from datetime import tzinfo as dt_tzinfo
7from functools import lru_cache
8from typing import (
9 Any,
10 ClassVar,
11 Dict,
12 Iterable,
13 List,
14 Match,
15 Optional,
16 Pattern,
17 SupportsFloat,
18 SupportsInt,
19 Tuple,
20 Union,
21 cast,
22 overload,
23)
25from dateutil import tz
27from arrow import locales
28from arrow.constants import DEFAULT_LOCALE
29from arrow.util import next_weekday, normalize_timestamp
31if sys.version_info < (3, 8): # pragma: no cover
32 from typing_extensions import Literal, TypedDict
33else:
34 from typing import Literal, TypedDict # pragma: no cover
37class ParserError(ValueError):
38 pass
41# Allows for ParserErrors to be propagated from _build_datetime()
42# when day_of_year errors occur.
43# Before this, the ParserErrors were caught by the try/except in
44# _parse_multiformat() and the appropriate error message was not
45# transmitted to the user.
46class ParserMatchError(ParserError):
47 pass
50_WEEKDATE_ELEMENT = Union[str, bytes, SupportsInt, bytearray]
52_FORMAT_TYPE = Literal[
53 "YYYY",
54 "YY",
55 "MM",
56 "M",
57 "DDDD",
58 "DDD",
59 "DD",
60 "D",
61 "HH",
62 "H",
63 "hh",
64 "h",
65 "mm",
66 "m",
67 "ss",
68 "s",
69 "X",
70 "x",
71 "ZZZ",
72 "ZZ",
73 "Z",
74 "S",
75 "W",
76 "MMMM",
77 "MMM",
78 "Do",
79 "dddd",
80 "ddd",
81 "d",
82 "a",
83 "A",
84]
87class _Parts(TypedDict, total=False):
88 year: int
89 month: int
90 day_of_year: int
91 day: int
92 hour: int
93 minute: int
94 second: int
95 microsecond: int
96 timestamp: float
97 expanded_timestamp: int
98 tzinfo: dt_tzinfo
99 am_pm: Literal["am", "pm"]
100 day_of_week: int
101 weekdate: Tuple[_WEEKDATE_ELEMENT, _WEEKDATE_ELEMENT, Optional[_WEEKDATE_ELEMENT]]
104class DateTimeParser:
105 _FORMAT_RE: ClassVar[Pattern[str]] = re.compile(
106 r"(YYY?Y?|MM?M?M?|Do|DD?D?D?|d?d?d?d|HH?|hh?|mm?|ss?|S+|ZZ?Z?|a|A|x|X|W)"
107 )
108 _ESCAPE_RE: ClassVar[Pattern[str]] = re.compile(r"\[[^\[\]]*\]")
110 _ONE_OR_TWO_DIGIT_RE: ClassVar[Pattern[str]] = re.compile(r"\d{1,2}")
111 _ONE_OR_TWO_OR_THREE_DIGIT_RE: ClassVar[Pattern[str]] = re.compile(r"\d{1,3}")
112 _ONE_OR_MORE_DIGIT_RE: ClassVar[Pattern[str]] = re.compile(r"\d+")
113 _TWO_DIGIT_RE: ClassVar[Pattern[str]] = re.compile(r"\d{2}")
114 _THREE_DIGIT_RE: ClassVar[Pattern[str]] = re.compile(r"\d{3}")
115 _FOUR_DIGIT_RE: ClassVar[Pattern[str]] = re.compile(r"\d{4}")
116 _TZ_Z_RE: ClassVar[Pattern[str]] = re.compile(r"([\+\-])(\d{2})(?:(\d{2}))?|Z")
117 _TZ_ZZ_RE: ClassVar[Pattern[str]] = re.compile(r"([\+\-])(\d{2})(?:\:(\d{2}))?|Z")
118 _TZ_NAME_RE: ClassVar[Pattern[str]] = re.compile(r"\w[\w+\-/]+")
119 # NOTE: timestamps cannot be parsed from natural language strings (by removing the ^...$) because it will
120 # break cases like "15 Jul 2000" and a format list (see issue #447)
121 _TIMESTAMP_RE: ClassVar[Pattern[str]] = re.compile(r"^\-?\d+\.?\d+$")
122 _TIMESTAMP_EXPANDED_RE: ClassVar[Pattern[str]] = re.compile(r"^\-?\d+$")
123 _TIME_RE: ClassVar[Pattern[str]] = re.compile(
124 r"^(\d{2})(?:\:?(\d{2}))?(?:\:?(\d{2}))?(?:([\.\,])(\d+))?$"
125 )
126 _WEEK_DATE_RE: ClassVar[Pattern[str]] = re.compile(
127 r"(?P<year>\d{4})[\-]?W(?P<week>\d{2})[\-]?(?P<day>\d)?"
128 )
130 _BASE_INPUT_RE_MAP: ClassVar[Dict[_FORMAT_TYPE, Pattern[str]]] = {
131 "YYYY": _FOUR_DIGIT_RE,
132 "YY": _TWO_DIGIT_RE,
133 "MM": _TWO_DIGIT_RE,
134 "M": _ONE_OR_TWO_DIGIT_RE,
135 "DDDD": _THREE_DIGIT_RE,
136 "DDD": _ONE_OR_TWO_OR_THREE_DIGIT_RE,
137 "DD": _TWO_DIGIT_RE,
138 "D": _ONE_OR_TWO_DIGIT_RE,
139 "HH": _TWO_DIGIT_RE,
140 "H": _ONE_OR_TWO_DIGIT_RE,
141 "hh": _TWO_DIGIT_RE,
142 "h": _ONE_OR_TWO_DIGIT_RE,
143 "mm": _TWO_DIGIT_RE,
144 "m": _ONE_OR_TWO_DIGIT_RE,
145 "ss": _TWO_DIGIT_RE,
146 "s": _ONE_OR_TWO_DIGIT_RE,
147 "X": _TIMESTAMP_RE,
148 "x": _TIMESTAMP_EXPANDED_RE,
149 "ZZZ": _TZ_NAME_RE,
150 "ZZ": _TZ_ZZ_RE,
151 "Z": _TZ_Z_RE,
152 "S": _ONE_OR_MORE_DIGIT_RE,
153 "W": _WEEK_DATE_RE,
154 }
156 SEPARATORS: ClassVar[List[str]] = ["-", "/", "."]
158 locale: locales.Locale
159 _input_re_map: Dict[_FORMAT_TYPE, Pattern[str]]
161 def __init__(self, locale: str = DEFAULT_LOCALE, cache_size: int = 0) -> None:
162 self.locale = locales.get_locale(locale)
163 self._input_re_map = self._BASE_INPUT_RE_MAP.copy()
164 self._input_re_map.update(
165 {
166 "MMMM": self._generate_choice_re(
167 self.locale.month_names[1:], re.IGNORECASE
168 ),
169 "MMM": self._generate_choice_re(
170 self.locale.month_abbreviations[1:], re.IGNORECASE
171 ),
172 "Do": re.compile(self.locale.ordinal_day_re),
173 "dddd": self._generate_choice_re(
174 self.locale.day_names[1:], re.IGNORECASE
175 ),
176 "ddd": self._generate_choice_re(
177 self.locale.day_abbreviations[1:], re.IGNORECASE
178 ),
179 "d": re.compile(r"[1-7]"),
180 "a": self._generate_choice_re(
181 (self.locale.meridians["am"], self.locale.meridians["pm"])
182 ),
183 # note: 'A' token accepts both 'am/pm' and 'AM/PM' formats to
184 # ensure backwards compatibility of this token
185 "A": self._generate_choice_re(self.locale.meridians.values()),
186 }
187 )
188 if cache_size > 0:
189 self._generate_pattern_re = lru_cache(maxsize=cache_size)( # type: ignore
190 self._generate_pattern_re
191 )
193 # TODO: since we support more than ISO 8601, we should rename this function
194 # IDEA: break into multiple functions
195 def parse_iso(
196 self, datetime_string: str, normalize_whitespace: bool = False
197 ) -> datetime:
198 if normalize_whitespace:
199 datetime_string = re.sub(r"\s+", " ", datetime_string.strip())
201 has_space_divider = " " in datetime_string
202 has_t_divider = "T" in datetime_string
204 num_spaces = datetime_string.count(" ")
205 if has_space_divider and num_spaces != 1 or has_t_divider and num_spaces > 0:
206 raise ParserError(
207 f"Expected an ISO 8601-like string, but was given {datetime_string!r}. "
208 "Try passing in a format string to resolve this."
209 )
211 has_time = has_space_divider or has_t_divider
212 has_tz = False
214 # date formats (ISO 8601 and others) to test against
215 # NOTE: YYYYMM is omitted to avoid confusion with YYMMDD (no longer part of ISO 8601, but is still often used)
216 formats = [
217 "YYYY-MM-DD",
218 "YYYY-M-DD",
219 "YYYY-M-D",
220 "YYYY/MM/DD",
221 "YYYY/M/DD",
222 "YYYY/M/D",
223 "YYYY.MM.DD",
224 "YYYY.M.DD",
225 "YYYY.M.D",
226 "YYYYMMDD",
227 "YYYY-DDDD",
228 "YYYYDDDD",
229 "YYYY-MM",
230 "YYYY/MM",
231 "YYYY.MM",
232 "YYYY",
233 "W",
234 ]
236 if has_time:
237 if has_space_divider:
238 date_string, time_string = datetime_string.split(" ", 1)
239 else:
240 date_string, time_string = datetime_string.split("T", 1)
242 time_parts = re.split(
243 r"[\+\-Z]", time_string, maxsplit=1, flags=re.IGNORECASE
244 )
246 time_components: Optional[Match[str]] = self._TIME_RE.match(time_parts[0])
248 if time_components is None:
249 raise ParserError(
250 "Invalid time component provided. "
251 "Please specify a format or provide a valid time component in the basic or extended ISO 8601 time format."
252 )
254 (
255 hours,
256 minutes,
257 seconds,
258 subseconds_sep,
259 subseconds,
260 ) = time_components.groups()
262 has_tz = len(time_parts) == 2
263 has_minutes = minutes is not None
264 has_seconds = seconds is not None
265 has_subseconds = subseconds is not None
267 is_basic_time_format = ":" not in time_parts[0]
268 tz_format = "Z"
270 # use 'ZZ' token instead since tz offset is present in non-basic format
271 if has_tz and ":" in time_parts[1]:
272 tz_format = "ZZ"
274 time_sep = "" if is_basic_time_format else ":"
276 if has_subseconds:
277 time_string = "HH{time_sep}mm{time_sep}ss{subseconds_sep}S".format(
278 time_sep=time_sep, subseconds_sep=subseconds_sep
279 )
280 elif has_seconds:
281 time_string = "HH{time_sep}mm{time_sep}ss".format(time_sep=time_sep)
282 elif has_minutes:
283 time_string = f"HH{time_sep}mm"
284 else:
285 time_string = "HH"
287 if has_space_divider:
288 formats = [f"{f} {time_string}" for f in formats]
289 else:
290 formats = [f"{f}T{time_string}" for f in formats]
292 if has_time and has_tz:
293 # Add "Z" or "ZZ" to the format strings to indicate to
294 # _parse_token() that a timezone needs to be parsed
295 formats = [f"{f}{tz_format}" for f in formats]
297 return self._parse_multiformat(datetime_string, formats)
299 def parse(
300 self,
301 datetime_string: str,
302 fmt: Union[List[str], str],
303 normalize_whitespace: bool = False,
304 ) -> datetime:
305 if normalize_whitespace:
306 datetime_string = re.sub(r"\s+", " ", datetime_string)
308 if isinstance(fmt, list):
309 return self._parse_multiformat(datetime_string, fmt)
311 try:
312 fmt_tokens: List[_FORMAT_TYPE]
313 fmt_pattern_re: Pattern[str]
314 fmt_tokens, fmt_pattern_re = self._generate_pattern_re(fmt)
315 except re.error as e:
316 raise ParserMatchError(
317 f"Failed to generate regular expression pattern: {e}."
318 )
320 match = fmt_pattern_re.search(datetime_string)
322 if match is None:
323 raise ParserMatchError(
324 f"Failed to match {fmt!r} when parsing {datetime_string!r}."
325 )
327 parts: _Parts = {}
328 for token in fmt_tokens:
329 value: Union[Tuple[str, str, str], str]
330 if token == "Do":
331 value = match.group("value")
332 elif token == "W":
333 value = (match.group("year"), match.group("week"), match.group("day"))
334 else:
335 value = match.group(token)
337 if value is None:
338 raise ParserMatchError(
339 f"Unable to find a match group for the specified token {token!r}."
340 )
342 self._parse_token(token, value, parts) # type: ignore[arg-type]
344 return self._build_datetime(parts)
346 def _generate_pattern_re(self, fmt: str) -> Tuple[List[_FORMAT_TYPE], Pattern[str]]:
347 # fmt is a string of tokens like 'YYYY-MM-DD'
348 # we construct a new string by replacing each
349 # token by its pattern:
350 # 'YYYY-MM-DD' -> '(?P<YYYY>\d{4})-(?P<MM>\d{2})-(?P<DD>\d{2})'
351 tokens: List[_FORMAT_TYPE] = []
352 offset = 0
354 # Escape all special RegEx chars
355 escaped_fmt = re.escape(fmt)
357 # Extract the bracketed expressions to be reinserted later.
358 escaped_fmt = re.sub(self._ESCAPE_RE, "#", escaped_fmt)
360 # Any number of S is the same as one.
361 # TODO: allow users to specify the number of digits to parse
362 escaped_fmt = re.sub(r"S+", "S", escaped_fmt)
364 escaped_data = re.findall(self._ESCAPE_RE, fmt)
366 fmt_pattern = escaped_fmt
368 for m in self._FORMAT_RE.finditer(escaped_fmt):
369 token: _FORMAT_TYPE = cast(_FORMAT_TYPE, m.group(0))
370 try:
371 input_re = self._input_re_map[token]
372 except KeyError:
373 raise ParserError(f"Unrecognized token {token!r}.")
374 input_pattern = f"(?P<{token}>{input_re.pattern})"
375 tokens.append(token)
376 # a pattern doesn't have the same length as the token
377 # it replaces! We keep the difference in the offset variable.
378 # This works because the string is scanned left-to-right and matches
379 # are returned in the order found by finditer.
380 fmt_pattern = (
381 fmt_pattern[: m.start() + offset]
382 + input_pattern
383 + fmt_pattern[m.end() + offset :]
384 )
385 offset += len(input_pattern) - (m.end() - m.start())
387 final_fmt_pattern = ""
388 split_fmt = fmt_pattern.split(r"\#")
390 # Due to the way Python splits, 'split_fmt' will always be longer
391 for i in range(len(split_fmt)):
392 final_fmt_pattern += split_fmt[i]
393 if i < len(escaped_data):
394 final_fmt_pattern += escaped_data[i][1:-1]
396 # Wrap final_fmt_pattern in a custom word boundary to strictly
397 # match the formatting pattern and filter out date and time formats
398 # that include junk such as: blah1998-09-12 blah, blah 1998-09-12blah,
399 # blah1998-09-12blah. The custom word boundary matches every character
400 # that is not a whitespace character to allow for searching for a date
401 # and time string in a natural language sentence. Therefore, searching
402 # for a string of the form YYYY-MM-DD in "blah 1998-09-12 blah" will
403 # work properly.
404 # Certain punctuation before or after the target pattern such as
405 # "1998-09-12," is permitted. For the full list of valid punctuation,
406 # see the documentation.
408 starting_word_boundary = (
409 r"(?<!\S\S)" # Don't have two consecutive non-whitespace characters. This ensures that we allow cases
410 # like .11.25.2019 but not 1.11.25.2019 (for pattern MM.DD.YYYY)
411 r"(?<![^\,\.\;\:\?\!\"\'\`\[\]\{\}\(\)<>\s])" # This is the list of punctuation that is ok before the
412 # pattern (i.e. "It can't not be these characters before the pattern")
413 r"(\b|^)"
414 # The \b is to block cases like 1201912 but allow 201912 for pattern YYYYMM. The ^ was necessary to allow a
415 # negative number through i.e. before epoch numbers
416 )
417 ending_word_boundary = (
418 r"(?=[\,\.\;\:\?\!\"\'\`\[\]\{\}\(\)\<\>]?" # Positive lookahead stating that these punctuation marks
419 # can appear after the pattern at most 1 time
420 r"(?!\S))" # Don't allow any non-whitespace character after the punctuation
421 )
422 bounded_fmt_pattern = r"{}{}{}".format(
423 starting_word_boundary, final_fmt_pattern, ending_word_boundary
424 )
426 return tokens, re.compile(bounded_fmt_pattern, flags=re.IGNORECASE)
428 @overload
429 def _parse_token(
430 self,
431 token: Literal[
432 "YYYY",
433 "YY",
434 "MM",
435 "M",
436 "DDDD",
437 "DDD",
438 "DD",
439 "D",
440 "Do",
441 "HH",
442 "hh",
443 "h",
444 "H",
445 "mm",
446 "m",
447 "ss",
448 "s",
449 "x",
450 ],
451 value: Union[str, bytes, SupportsInt, bytearray],
452 parts: _Parts,
453 ) -> None:
454 ... # pragma: no cover
456 @overload
457 def _parse_token(
458 self,
459 token: Literal["X"],
460 value: Union[str, bytes, SupportsFloat, bytearray],
461 parts: _Parts,
462 ) -> None:
463 ... # pragma: no cover
465 @overload
466 def _parse_token(
467 self,
468 token: Literal["MMMM", "MMM", "dddd", "ddd", "S"],
469 value: Union[str, bytes, bytearray],
470 parts: _Parts,
471 ) -> None:
472 ... # pragma: no cover
474 @overload
475 def _parse_token(
476 self,
477 token: Literal["a", "A", "ZZZ", "ZZ", "Z"],
478 value: Union[str, bytes],
479 parts: _Parts,
480 ) -> None:
481 ... # pragma: no cover
483 @overload
484 def _parse_token(
485 self,
486 token: Literal["W"],
487 value: Tuple[_WEEKDATE_ELEMENT, _WEEKDATE_ELEMENT, Optional[_WEEKDATE_ELEMENT]],
488 parts: _Parts,
489 ) -> None:
490 ... # pragma: no cover
492 def _parse_token(
493 self,
494 token: Any,
495 value: Any,
496 parts: _Parts,
497 ) -> None:
498 if token == "YYYY":
499 parts["year"] = int(value)
501 elif token == "YY":
502 value = int(value)
503 parts["year"] = 1900 + value if value > 68 else 2000 + value
505 elif token in ["MMMM", "MMM"]:
506 # FIXME: month_number() is nullable
507 parts["month"] = self.locale.month_number(value.lower()) # type: ignore[typeddict-item]
509 elif token in ["MM", "M"]:
510 parts["month"] = int(value)
512 elif token in ["DDDD", "DDD"]:
513 parts["day_of_year"] = int(value)
515 elif token in ["DD", "D"]:
516 parts["day"] = int(value)
518 elif token == "Do":
519 parts["day"] = int(value)
521 elif token == "dddd":
522 # locale day names are 1-indexed
523 day_of_week = [x.lower() for x in self.locale.day_names].index(
524 value.lower()
525 )
526 parts["day_of_week"] = day_of_week - 1
528 elif token == "ddd":
529 # locale day abbreviations are 1-indexed
530 day_of_week = [x.lower() for x in self.locale.day_abbreviations].index(
531 value.lower()
532 )
533 parts["day_of_week"] = day_of_week - 1
535 elif token.upper() in ["HH", "H"]:
536 parts["hour"] = int(value)
538 elif token in ["mm", "m"]:
539 parts["minute"] = int(value)
541 elif token in ["ss", "s"]:
542 parts["second"] = int(value)
544 elif token == "S":
545 # We have the *most significant* digits of an arbitrary-precision integer.
546 # We want the six most significant digits as an integer, rounded.
547 # IDEA: add nanosecond support somehow? Need datetime support for it first.
548 value = value.ljust(7, "0")
550 # floating-point (IEEE-754) defaults to half-to-even rounding
551 seventh_digit = int(value[6])
552 if seventh_digit == 5:
553 rounding = int(value[5]) % 2
554 elif seventh_digit > 5:
555 rounding = 1
556 else:
557 rounding = 0
559 parts["microsecond"] = int(value[:6]) + rounding
561 elif token == "X":
562 parts["timestamp"] = float(value)
564 elif token == "x":
565 parts["expanded_timestamp"] = int(value)
567 elif token in ["ZZZ", "ZZ", "Z"]:
568 parts["tzinfo"] = TzinfoParser.parse(value)
570 elif token in ["a", "A"]:
571 if value in (self.locale.meridians["am"], self.locale.meridians["AM"]):
572 parts["am_pm"] = "am"
573 if "hour" in parts and not 0 <= parts["hour"] <= 12:
574 raise ParserMatchError(
575 f"Hour token value must be between 0 and 12 inclusive for token {token!r}."
576 )
577 elif value in (self.locale.meridians["pm"], self.locale.meridians["PM"]):
578 parts["am_pm"] = "pm"
579 elif token == "W":
580 parts["weekdate"] = value
582 @staticmethod
583 def _build_datetime(parts: _Parts) -> datetime:
584 weekdate = parts.get("weekdate")
586 if weekdate is not None:
587 year, week = int(weekdate[0]), int(weekdate[1])
589 if weekdate[2] is not None:
590 _day = int(weekdate[2])
591 else:
592 # day not given, default to 1
593 _day = 1
595 date_string = f"{year}-{week}-{_day}"
597 # tokens for ISO 8601 weekdates
598 dt = datetime.strptime(date_string, "%G-%V-%u")
600 parts["year"] = dt.year
601 parts["month"] = dt.month
602 parts["day"] = dt.day
604 timestamp = parts.get("timestamp")
606 if timestamp is not None:
607 return datetime.fromtimestamp(timestamp, tz=tz.tzutc())
609 expanded_timestamp = parts.get("expanded_timestamp")
611 if expanded_timestamp is not None:
612 return datetime.fromtimestamp(
613 normalize_timestamp(expanded_timestamp),
614 tz=tz.tzutc(),
615 )
617 day_of_year = parts.get("day_of_year")
619 if day_of_year is not None:
620 _year = parts.get("year")
621 month = parts.get("month")
622 if _year is None:
623 raise ParserError(
624 "Year component is required with the DDD and DDDD tokens."
625 )
627 if month is not None:
628 raise ParserError(
629 "Month component is not allowed with the DDD and DDDD tokens."
630 )
632 date_string = f"{_year}-{day_of_year}"
633 try:
634 dt = datetime.strptime(date_string, "%Y-%j")
635 except ValueError:
636 raise ParserError(
637 f"The provided day of year {day_of_year!r} is invalid."
638 )
640 parts["year"] = dt.year
641 parts["month"] = dt.month
642 parts["day"] = dt.day
644 day_of_week: Optional[int] = parts.get("day_of_week")
645 day = parts.get("day")
647 # If day is passed, ignore day of week
648 if day_of_week is not None and day is None:
649 year = parts.get("year", 1970)
650 month = parts.get("month", 1)
651 day = 1
653 # dddd => first day of week after epoch
654 # dddd YYYY => first day of week in specified year
655 # dddd MM YYYY => first day of week in specified year and month
656 # dddd MM => first day after epoch in specified month
657 next_weekday_dt = next_weekday(datetime(year, month, day), day_of_week)
658 parts["year"] = next_weekday_dt.year
659 parts["month"] = next_weekday_dt.month
660 parts["day"] = next_weekday_dt.day
662 am_pm = parts.get("am_pm")
663 hour = parts.get("hour", 0)
665 if am_pm == "pm" and hour < 12:
666 hour += 12
667 elif am_pm == "am" and hour == 12:
668 hour = 0
670 # Support for midnight at the end of day
671 if hour == 24:
672 if parts.get("minute", 0) != 0:
673 raise ParserError("Midnight at the end of day must not contain minutes")
674 if parts.get("second", 0) != 0:
675 raise ParserError("Midnight at the end of day must not contain seconds")
676 if parts.get("microsecond", 0) != 0:
677 raise ParserError(
678 "Midnight at the end of day must not contain microseconds"
679 )
680 hour = 0
681 day_increment = 1
682 else:
683 day_increment = 0
685 # account for rounding up to 1000000
686 microsecond = parts.get("microsecond", 0)
687 if microsecond == 1000000:
688 microsecond = 0
689 second_increment = 1
690 else:
691 second_increment = 0
693 increment = timedelta(days=day_increment, seconds=second_increment)
695 return (
696 datetime(
697 year=parts.get("year", 1),
698 month=parts.get("month", 1),
699 day=parts.get("day", 1),
700 hour=hour,
701 minute=parts.get("minute", 0),
702 second=parts.get("second", 0),
703 microsecond=microsecond,
704 tzinfo=parts.get("tzinfo"),
705 )
706 + increment
707 )
709 def _parse_multiformat(self, string: str, formats: Iterable[str]) -> datetime:
710 _datetime: Optional[datetime] = None
712 for fmt in formats:
713 try:
714 _datetime = self.parse(string, fmt)
715 break
716 except ParserMatchError:
717 pass
719 if _datetime is None:
720 supported_formats = ", ".join(formats)
721 raise ParserError(
722 f"Could not match input {string!r} to any of the following formats: {supported_formats}."
723 )
725 return _datetime
727 # generates a capture group of choices separated by an OR operator
728 @staticmethod
729 def _generate_choice_re(
730 choices: Iterable[str], flags: Union[int, re.RegexFlag] = 0
731 ) -> Pattern[str]:
732 return re.compile(r"({})".format("|".join(choices)), flags=flags)
735class TzinfoParser:
736 _TZINFO_RE: ClassVar[Pattern[str]] = re.compile(
737 r"^(?:\(UTC)*([\+\-])?(\d{2})(?:\:?(\d{2}))?"
738 )
740 @classmethod
741 def parse(cls, tzinfo_string: str) -> dt_tzinfo:
742 tzinfo: Optional[dt_tzinfo] = None
744 if tzinfo_string == "local":
745 tzinfo = tz.tzlocal()
747 elif tzinfo_string in ["utc", "UTC", "Z"]:
748 tzinfo = tz.tzutc()
750 else:
751 iso_match = cls._TZINFO_RE.match(tzinfo_string)
753 if iso_match:
754 sign: Optional[str]
755 hours: str
756 minutes: Union[str, int, None]
757 sign, hours, minutes = iso_match.groups()
758 seconds = int(hours) * 3600 + int(minutes or 0) * 60
760 if sign == "-":
761 seconds *= -1
763 tzinfo = tz.tzoffset(None, seconds)
765 else:
766 tzinfo = tz.gettz(tzinfo_string)
768 if tzinfo is None:
769 raise ParserError(f"Could not parse timezone expression {tzinfo_string!r}.")
771 return tzinfo