Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/arrow/parser.py: 87%
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1"""Provides the :class:`Arrow <arrow.parser.DateTimeParser>` class, a better way to parse datetime strings."""
3import re
4from datetime import datetime, timedelta
5from datetime import tzinfo as dt_tzinfo
6from functools import lru_cache
7from typing import (
8 Any,
9 ClassVar,
10 Dict,
11 Iterable,
12 List,
13 Literal,
14 Match,
15 Optional,
16 Pattern,
17 SupportsFloat,
18 SupportsInt,
19 Tuple,
20 TypedDict,
21 Union,
22 cast,
23 overload,
24)
26from dateutil import tz
28from arrow import locales
29from arrow.constants import DEFAULT_LOCALE
30from arrow.util import next_weekday, normalize_timestamp
33class ParserError(ValueError):
34 pass
37# Allows for ParserErrors to be propagated from _build_datetime()
38# when day_of_year errors occur.
39# Before this, the ParserErrors were caught by the try/except in
40# _parse_multiformat() and the appropriate error message was not
41# transmitted to the user.
42class ParserMatchError(ParserError):
43 pass
46_WEEKDATE_ELEMENT = Union[str, bytes, SupportsInt, bytearray]
48_FORMAT_TYPE = Literal[
49 "YYYY",
50 "YY",
51 "MM",
52 "M",
53 "DDDD",
54 "DDD",
55 "DD",
56 "D",
57 "HH",
58 "H",
59 "hh",
60 "h",
61 "mm",
62 "m",
63 "ss",
64 "s",
65 "X",
66 "x",
67 "ZZZ",
68 "ZZ",
69 "Z",
70 "S",
71 "W",
72 "MMMM",
73 "MMM",
74 "Do",
75 "dddd",
76 "ddd",
77 "d",
78 "a",
79 "A",
80]
83class _Parts(TypedDict, total=False):
84 year: int
85 month: int
86 day_of_year: int
87 day: int
88 hour: int
89 minute: int
90 second: int
91 microsecond: int
92 timestamp: float
93 expanded_timestamp: int
94 tzinfo: dt_tzinfo
95 am_pm: Literal["am", "pm"]
96 day_of_week: int
97 weekdate: Tuple[_WEEKDATE_ELEMENT, _WEEKDATE_ELEMENT, Optional[_WEEKDATE_ELEMENT]]
100class DateTimeParser:
101 _FORMAT_RE: ClassVar[Pattern[str]] = re.compile(
102 r"(YYY?Y?|MM?M?M?|Do|DD?D?D?|d?d?d?d|HH?|hh?|mm?|ss?|S+|ZZ?Z?|a|A|x|X|W)"
103 )
104 _ESCAPE_RE: ClassVar[Pattern[str]] = re.compile(r"\[[^\[\]]*\]")
106 _ONE_OR_TWO_DIGIT_RE: ClassVar[Pattern[str]] = re.compile(r"\d{1,2}")
107 _ONE_OR_TWO_OR_THREE_DIGIT_RE: ClassVar[Pattern[str]] = re.compile(r"\d{1,3}")
108 _ONE_OR_MORE_DIGIT_RE: ClassVar[Pattern[str]] = re.compile(r"\d+")
109 _TWO_DIGIT_RE: ClassVar[Pattern[str]] = re.compile(r"\d{2}")
110 _THREE_DIGIT_RE: ClassVar[Pattern[str]] = re.compile(r"\d{3}")
111 _FOUR_DIGIT_RE: ClassVar[Pattern[str]] = re.compile(r"\d{4}")
112 _TZ_Z_RE: ClassVar[Pattern[str]] = re.compile(r"([\+\-])(\d{2})(?:(\d{2}))?|Z")
113 _TZ_ZZ_RE: ClassVar[Pattern[str]] = re.compile(r"([\+\-])(\d{2})(?:\:(\d{2}))?|Z")
114 _TZ_NAME_RE: ClassVar[Pattern[str]] = re.compile(r"\w[\w+\-/]+")
115 # NOTE: timestamps cannot be parsed from natural language strings (by removing the ^...$) because it will
116 # break cases like "15 Jul 2000" and a format list (see issue #447)
117 _TIMESTAMP_RE: ClassVar[Pattern[str]] = re.compile(r"^\-?\d+\.?\d+$")
118 _TIMESTAMP_EXPANDED_RE: ClassVar[Pattern[str]] = re.compile(r"^\-?\d+$")
119 _TIME_RE: ClassVar[Pattern[str]] = re.compile(
120 r"^(\d{2})(?:\:?(\d{2}))?(?:\:?(\d{2}))?(?:([\.\,])(\d+))?$"
121 )
122 _WEEK_DATE_RE: ClassVar[Pattern[str]] = re.compile(
123 r"(?P<year>\d{4})[\-]?W(?P<week>\d{2})[\-]?(?P<day>\d)?"
124 )
126 _BASE_INPUT_RE_MAP: ClassVar[Dict[_FORMAT_TYPE, Pattern[str]]] = {
127 "YYYY": _FOUR_DIGIT_RE,
128 "YY": _TWO_DIGIT_RE,
129 "MM": _TWO_DIGIT_RE,
130 "M": _ONE_OR_TWO_DIGIT_RE,
131 "DDDD": _THREE_DIGIT_RE,
132 "DDD": _ONE_OR_TWO_OR_THREE_DIGIT_RE,
133 "DD": _TWO_DIGIT_RE,
134 "D": _ONE_OR_TWO_DIGIT_RE,
135 "HH": _TWO_DIGIT_RE,
136 "H": _ONE_OR_TWO_DIGIT_RE,
137 "hh": _TWO_DIGIT_RE,
138 "h": _ONE_OR_TWO_DIGIT_RE,
139 "mm": _TWO_DIGIT_RE,
140 "m": _ONE_OR_TWO_DIGIT_RE,
141 "ss": _TWO_DIGIT_RE,
142 "s": _ONE_OR_TWO_DIGIT_RE,
143 "X": _TIMESTAMP_RE,
144 "x": _TIMESTAMP_EXPANDED_RE,
145 "ZZZ": _TZ_NAME_RE,
146 "ZZ": _TZ_ZZ_RE,
147 "Z": _TZ_Z_RE,
148 "S": _ONE_OR_MORE_DIGIT_RE,
149 "W": _WEEK_DATE_RE,
150 }
152 SEPARATORS: ClassVar[List[str]] = ["-", "/", "."]
154 locale: locales.Locale
155 _input_re_map: Dict[_FORMAT_TYPE, Pattern[str]]
157 def __init__(self, locale: str = DEFAULT_LOCALE, cache_size: int = 0) -> None:
158 self.locale = locales.get_locale(locale)
159 self._input_re_map = self._BASE_INPUT_RE_MAP.copy()
160 self._input_re_map.update(
161 {
162 "MMMM": self._generate_choice_re(
163 self.locale.month_names[1:], re.IGNORECASE
164 ),
165 "MMM": self._generate_choice_re(
166 self.locale.month_abbreviations[1:], re.IGNORECASE
167 ),
168 "Do": re.compile(self.locale.ordinal_day_re),
169 "dddd": self._generate_choice_re(
170 self.locale.day_names[1:], re.IGNORECASE
171 ),
172 "ddd": self._generate_choice_re(
173 self.locale.day_abbreviations[1:], re.IGNORECASE
174 ),
175 "d": re.compile(r"[1-7]"),
176 "a": self._generate_choice_re(
177 (self.locale.meridians["am"], self.locale.meridians["pm"])
178 ),
179 # note: 'A' token accepts both 'am/pm' and 'AM/PM' formats to
180 # ensure backwards compatibility of this token
181 "A": self._generate_choice_re(self.locale.meridians.values()),
182 }
183 )
184 if cache_size > 0:
185 self._generate_pattern_re = lru_cache(maxsize=cache_size)( # type: ignore
186 self._generate_pattern_re
187 )
189 # TODO: since we support more than ISO 8601, we should rename this function
190 # IDEA: break into multiple functions
191 def parse_iso(
192 self, datetime_string: str, normalize_whitespace: bool = False
193 ) -> datetime:
194 if normalize_whitespace:
195 datetime_string = re.sub(r"\s+", " ", datetime_string.strip())
197 has_space_divider = " " in datetime_string
198 has_t_divider = "T" in datetime_string
200 num_spaces = datetime_string.count(" ")
201 if has_space_divider and num_spaces != 1 or has_t_divider and num_spaces > 0:
202 raise ParserError(
203 f"Expected an ISO 8601-like string, but was given {datetime_string!r}. "
204 "Try passing in a format string to resolve this."
205 )
207 has_time = has_space_divider or has_t_divider
208 has_tz = False
210 # date formats (ISO 8601 and others) to test against
211 # NOTE: YYYYMM is omitted to avoid confusion with YYMMDD (no longer part of ISO 8601, but is still often used)
212 formats = [
213 "YYYY-MM-DD",
214 "YYYY-M-DD",
215 "YYYY-M-D",
216 "YYYY/MM/DD",
217 "YYYY/M/DD",
218 "YYYY/M/D",
219 "YYYY.MM.DD",
220 "YYYY.M.DD",
221 "YYYY.M.D",
222 "YYYYMMDD",
223 "YYYY-DDDD",
224 "YYYYDDDD",
225 "YYYY-MM",
226 "YYYY/MM",
227 "YYYY.MM",
228 "YYYY",
229 "W",
230 ]
232 if has_time:
233 if has_space_divider:
234 date_string, time_string = datetime_string.split(" ", 1)
235 else:
236 date_string, time_string = datetime_string.split("T", 1)
238 time_parts = re.split(
239 r"[\+\-Z]", time_string, maxsplit=1, flags=re.IGNORECASE
240 )
242 time_components: Optional[Match[str]] = self._TIME_RE.match(time_parts[0])
244 if time_components is None:
245 raise ParserError(
246 "Invalid time component provided. "
247 "Please specify a format or provide a valid time component in the basic or extended ISO 8601 time format."
248 )
250 (
251 hours,
252 minutes,
253 seconds,
254 subseconds_sep,
255 subseconds,
256 ) = time_components.groups()
258 has_tz = len(time_parts) == 2
259 has_minutes = minutes is not None
260 has_seconds = seconds is not None
261 has_subseconds = subseconds is not None
263 is_basic_time_format = ":" not in time_parts[0]
264 tz_format = "Z"
266 # use 'ZZ' token instead since tz offset is present in non-basic format
267 if has_tz and ":" in time_parts[1]:
268 tz_format = "ZZ"
270 time_sep = "" if is_basic_time_format else ":"
272 if has_subseconds:
273 time_string = "HH{time_sep}mm{time_sep}ss{subseconds_sep}S".format(
274 time_sep=time_sep, subseconds_sep=subseconds_sep
275 )
276 elif has_seconds:
277 time_string = "HH{time_sep}mm{time_sep}ss".format(time_sep=time_sep)
278 elif has_minutes:
279 time_string = f"HH{time_sep}mm"
280 else:
281 time_string = "HH"
283 if has_space_divider:
284 formats = [f"{f} {time_string}" for f in formats]
285 else:
286 formats = [f"{f}T{time_string}" for f in formats]
288 if has_time and has_tz:
289 # Add "Z" or "ZZ" to the format strings to indicate to
290 # _parse_token() that a timezone needs to be parsed
291 formats = [f"{f}{tz_format}" for f in formats]
293 return self._parse_multiformat(datetime_string, formats)
295 def parse(
296 self,
297 datetime_string: str,
298 fmt: Union[List[str], str],
299 normalize_whitespace: bool = False,
300 ) -> datetime:
301 if normalize_whitespace:
302 datetime_string = re.sub(r"\s+", " ", datetime_string)
304 if isinstance(fmt, list):
305 return self._parse_multiformat(datetime_string, fmt)
307 try:
308 fmt_tokens: List[_FORMAT_TYPE]
309 fmt_pattern_re: Pattern[str]
310 fmt_tokens, fmt_pattern_re = self._generate_pattern_re(fmt)
311 except re.error as e:
312 raise ParserMatchError(
313 f"Failed to generate regular expression pattern: {e}."
314 )
316 match = fmt_pattern_re.search(datetime_string)
318 if match is None:
319 raise ParserMatchError(
320 f"Failed to match {fmt!r} when parsing {datetime_string!r}."
321 )
323 parts: _Parts = {}
324 for token in fmt_tokens:
325 value: Union[Tuple[str, str, str], str]
326 if token == "Do":
327 value = match.group("value")
328 elif token == "W":
329 value = (match.group("year"), match.group("week"), match.group("day"))
330 else:
331 value = match.group(token)
333 if value is None:
334 raise ParserMatchError(
335 f"Unable to find a match group for the specified token {token!r}."
336 )
338 self._parse_token(token, value, parts) # type: ignore[arg-type]
340 return self._build_datetime(parts)
342 def _generate_pattern_re(self, fmt: str) -> Tuple[List[_FORMAT_TYPE], Pattern[str]]:
343 # fmt is a string of tokens like 'YYYY-MM-DD'
344 # we construct a new string by replacing each
345 # token by its pattern:
346 # 'YYYY-MM-DD' -> '(?P<YYYY>\d{4})-(?P<MM>\d{2})-(?P<DD>\d{2})'
347 tokens: List[_FORMAT_TYPE] = []
348 offset = 0
350 # Escape all special RegEx chars
351 escaped_fmt = re.escape(fmt)
353 # Extract the bracketed expressions to be reinserted later.
354 escaped_fmt = re.sub(self._ESCAPE_RE, "#", escaped_fmt)
356 # Any number of S is the same as one.
357 # TODO: allow users to specify the number of digits to parse
358 escaped_fmt = re.sub(r"S+", "S", escaped_fmt)
360 escaped_data = re.findall(self._ESCAPE_RE, fmt)
362 fmt_pattern = escaped_fmt
364 for m in self._FORMAT_RE.finditer(escaped_fmt):
365 token: _FORMAT_TYPE = cast(_FORMAT_TYPE, m.group(0))
366 try:
367 input_re = self._input_re_map[token]
368 except KeyError:
369 raise ParserError(f"Unrecognized token {token!r}.")
370 input_pattern = f"(?P<{token}>{input_re.pattern})"
371 tokens.append(token)
372 # a pattern doesn't have the same length as the token
373 # it replaces! We keep the difference in the offset variable.
374 # This works because the string is scanned left-to-right and matches
375 # are returned in the order found by finditer.
376 fmt_pattern = (
377 fmt_pattern[: m.start() + offset]
378 + input_pattern
379 + fmt_pattern[m.end() + offset :]
380 )
381 offset += len(input_pattern) - (m.end() - m.start())
383 final_fmt_pattern = ""
384 split_fmt = fmt_pattern.split(r"\#")
386 # Due to the way Python splits, 'split_fmt' will always be longer
387 for i in range(len(split_fmt)):
388 final_fmt_pattern += split_fmt[i]
389 if i < len(escaped_data):
390 final_fmt_pattern += escaped_data[i][1:-1]
392 # Wrap final_fmt_pattern in a custom word boundary to strictly
393 # match the formatting pattern and filter out date and time formats
394 # that include junk such as: blah1998-09-12 blah, blah 1998-09-12blah,
395 # blah1998-09-12blah. The custom word boundary matches every character
396 # that is not a whitespace character to allow for searching for a date
397 # and time string in a natural language sentence. Therefore, searching
398 # for a string of the form YYYY-MM-DD in "blah 1998-09-12 blah" will
399 # work properly.
400 # Certain punctuation before or after the target pattern such as
401 # "1998-09-12," is permitted. For the full list of valid punctuation,
402 # see the documentation.
404 starting_word_boundary = (
405 r"(?<!\S\S)" # Don't have two consecutive non-whitespace characters. This ensures that we allow cases
406 # like .11.25.2019 but not 1.11.25.2019 (for pattern MM.DD.YYYY)
407 r"(?<![^\,\.\;\:\?\!\"\'\`\[\]\{\}\(\)<>\s])" # This is the list of punctuation that is ok before the
408 # pattern (i.e. "It can't not be these characters before the pattern")
409 r"(\b|^)"
410 # The \b is to block cases like 1201912 but allow 201912 for pattern YYYYMM. The ^ was necessary to allow a
411 # negative number through i.e. before epoch numbers
412 )
413 ending_word_boundary = (
414 r"(?=[\,\.\;\:\?\!\"\'\`\[\]\{\}\(\)\<\>]?" # Positive lookahead stating that these punctuation marks
415 # can appear after the pattern at most 1 time
416 r"(?!\S))" # Don't allow any non-whitespace character after the punctuation
417 )
418 bounded_fmt_pattern = r"{}{}{}".format(
419 starting_word_boundary, final_fmt_pattern, ending_word_boundary
420 )
422 return tokens, re.compile(bounded_fmt_pattern, flags=re.IGNORECASE)
424 @overload
425 def _parse_token(
426 self,
427 token: Literal[
428 "YYYY",
429 "YY",
430 "MM",
431 "M",
432 "DDDD",
433 "DDD",
434 "DD",
435 "D",
436 "Do",
437 "HH",
438 "hh",
439 "h",
440 "H",
441 "mm",
442 "m",
443 "ss",
444 "s",
445 "x",
446 ],
447 value: Union[str, bytes, SupportsInt, bytearray],
448 parts: _Parts,
449 ) -> None:
450 ... # pragma: no cover
452 @overload
453 def _parse_token(
454 self,
455 token: Literal["X"],
456 value: Union[str, bytes, SupportsFloat, bytearray],
457 parts: _Parts,
458 ) -> None:
459 ... # pragma: no cover
461 @overload
462 def _parse_token(
463 self,
464 token: Literal["MMMM", "MMM", "dddd", "ddd", "S"],
465 value: Union[str, bytes, bytearray],
466 parts: _Parts,
467 ) -> None:
468 ... # pragma: no cover
470 @overload
471 def _parse_token(
472 self,
473 token: Literal["a", "A", "ZZZ", "ZZ", "Z"],
474 value: Union[str, bytes],
475 parts: _Parts,
476 ) -> None:
477 ... # pragma: no cover
479 @overload
480 def _parse_token(
481 self,
482 token: Literal["W"],
483 value: Tuple[_WEEKDATE_ELEMENT, _WEEKDATE_ELEMENT, Optional[_WEEKDATE_ELEMENT]],
484 parts: _Parts,
485 ) -> None:
486 ... # pragma: no cover
488 def _parse_token(
489 self,
490 token: Any,
491 value: Any,
492 parts: _Parts,
493 ) -> None:
494 if token == "YYYY":
495 parts["year"] = int(value)
497 elif token == "YY":
498 value = int(value)
499 parts["year"] = 1900 + value if value > 68 else 2000 + value
501 elif token in ["MMMM", "MMM"]:
502 # FIXME: month_number() is nullable
503 parts["month"] = self.locale.month_number(value.lower()) # type: ignore[typeddict-item]
505 elif token in ["MM", "M"]:
506 parts["month"] = int(value)
508 elif token in ["DDDD", "DDD"]:
509 parts["day_of_year"] = int(value)
511 elif token in ["DD", "D"]:
512 parts["day"] = int(value)
514 elif token == "Do":
515 parts["day"] = int(value)
517 elif token == "dddd":
518 # locale day names are 1-indexed
519 day_of_week = [x.lower() for x in self.locale.day_names].index(
520 value.lower()
521 )
522 parts["day_of_week"] = day_of_week - 1
524 elif token == "ddd":
525 # locale day abbreviations are 1-indexed
526 day_of_week = [x.lower() for x in self.locale.day_abbreviations].index(
527 value.lower()
528 )
529 parts["day_of_week"] = day_of_week - 1
531 elif token.upper() in ["HH", "H"]:
532 parts["hour"] = int(value)
534 elif token in ["mm", "m"]:
535 parts["minute"] = int(value)
537 elif token in ["ss", "s"]:
538 parts["second"] = int(value)
540 elif token == "S":
541 # We have the *most significant* digits of an arbitrary-precision integer.
542 # We want the six most significant digits as an integer, rounded.
543 # IDEA: add nanosecond support somehow? Need datetime support for it first.
544 value = value.ljust(7, "0")
546 # floating-point (IEEE-754) defaults to half-to-even rounding
547 seventh_digit = int(value[6])
548 if seventh_digit == 5:
549 rounding = int(value[5]) % 2
550 elif seventh_digit > 5:
551 rounding = 1
552 else:
553 rounding = 0
555 parts["microsecond"] = int(value[:6]) + rounding
557 elif token == "X":
558 parts["timestamp"] = float(value)
560 elif token == "x":
561 parts["expanded_timestamp"] = int(value)
563 elif token in ["ZZZ", "ZZ", "Z"]:
564 parts["tzinfo"] = TzinfoParser.parse(value)
566 elif token in ["a", "A"]:
567 if value in (self.locale.meridians["am"], self.locale.meridians["AM"]):
568 parts["am_pm"] = "am"
569 if "hour" in parts and not 0 <= parts["hour"] <= 12:
570 raise ParserMatchError(
571 f"Hour token value must be between 0 and 12 inclusive for token {token!r}."
572 )
573 elif value in (self.locale.meridians["pm"], self.locale.meridians["PM"]):
574 parts["am_pm"] = "pm"
575 elif token == "W":
576 parts["weekdate"] = value
578 @staticmethod
579 def _build_datetime(parts: _Parts) -> datetime:
580 weekdate = parts.get("weekdate")
582 if weekdate is not None:
583 year, week = int(weekdate[0]), int(weekdate[1])
585 if weekdate[2] is not None:
586 _day = int(weekdate[2])
587 else:
588 # day not given, default to 1
589 _day = 1
591 date_string = f"{year}-{week}-{_day}"
593 # tokens for ISO 8601 weekdates
594 dt = datetime.strptime(date_string, "%G-%V-%u")
596 parts["year"] = dt.year
597 parts["month"] = dt.month
598 parts["day"] = dt.day
600 timestamp = parts.get("timestamp")
602 if timestamp is not None:
603 return datetime.fromtimestamp(timestamp, tz=tz.tzutc())
605 expanded_timestamp = parts.get("expanded_timestamp")
607 if expanded_timestamp is not None:
608 return datetime.fromtimestamp(
609 normalize_timestamp(expanded_timestamp),
610 tz=tz.tzutc(),
611 )
613 day_of_year = parts.get("day_of_year")
615 if day_of_year is not None:
616 _year = parts.get("year")
617 month = parts.get("month")
618 if _year is None:
619 raise ParserError(
620 "Year component is required with the DDD and DDDD tokens."
621 )
623 if month is not None:
624 raise ParserError(
625 "Month component is not allowed with the DDD and DDDD tokens."
626 )
628 date_string = f"{_year}-{day_of_year}"
629 try:
630 dt = datetime.strptime(date_string, "%Y-%j")
631 except ValueError:
632 raise ParserError(
633 f"The provided day of year {day_of_year!r} is invalid."
634 )
636 parts["year"] = dt.year
637 parts["month"] = dt.month
638 parts["day"] = dt.day
640 day_of_week: Optional[int] = parts.get("day_of_week")
641 day = parts.get("day")
643 # If day is passed, ignore day of week
644 if day_of_week is not None and day is None:
645 year = parts.get("year", 1970)
646 month = parts.get("month", 1)
647 day = 1
649 # dddd => first day of week after epoch
650 # dddd YYYY => first day of week in specified year
651 # dddd MM YYYY => first day of week in specified year and month
652 # dddd MM => first day after epoch in specified month
653 next_weekday_dt = next_weekday(datetime(year, month, day), day_of_week)
654 parts["year"] = next_weekday_dt.year
655 parts["month"] = next_weekday_dt.month
656 parts["day"] = next_weekday_dt.day
658 am_pm = parts.get("am_pm")
659 hour = parts.get("hour", 0)
661 if am_pm == "pm" and hour < 12:
662 hour += 12
663 elif am_pm == "am" and hour == 12:
664 hour = 0
666 # Support for midnight at the end of day
667 if hour == 24:
668 if parts.get("minute", 0) != 0:
669 raise ParserError("Midnight at the end of day must not contain minutes")
670 if parts.get("second", 0) != 0:
671 raise ParserError("Midnight at the end of day must not contain seconds")
672 if parts.get("microsecond", 0) != 0:
673 raise ParserError(
674 "Midnight at the end of day must not contain microseconds"
675 )
676 hour = 0
677 day_increment = 1
678 else:
679 day_increment = 0
681 # account for rounding up to 1000000
682 microsecond = parts.get("microsecond", 0)
683 if microsecond == 1000000:
684 microsecond = 0
685 second_increment = 1
686 else:
687 second_increment = 0
689 increment = timedelta(days=day_increment, seconds=second_increment)
691 return (
692 datetime(
693 year=parts.get("year", 1),
694 month=parts.get("month", 1),
695 day=parts.get("day", 1),
696 hour=hour,
697 minute=parts.get("minute", 0),
698 second=parts.get("second", 0),
699 microsecond=microsecond,
700 tzinfo=parts.get("tzinfo"),
701 )
702 + increment
703 )
705 def _parse_multiformat(self, string: str, formats: Iterable[str]) -> datetime:
706 _datetime: Optional[datetime] = None
708 for fmt in formats:
709 try:
710 _datetime = self.parse(string, fmt)
711 break
712 except ParserMatchError:
713 pass
715 if _datetime is None:
716 supported_formats = ", ".join(formats)
717 raise ParserError(
718 f"Could not match input {string!r} to any of the following formats: {supported_formats}."
719 )
721 return _datetime
723 # generates a capture group of choices separated by an OR operator
724 @staticmethod
725 def _generate_choice_re(
726 choices: Iterable[str], flags: Union[int, re.RegexFlag] = 0
727 ) -> Pattern[str]:
728 return re.compile(r"({})".format("|".join(choices)), flags=flags)
731class TzinfoParser:
732 _TZINFO_RE: ClassVar[Pattern[str]] = re.compile(
733 r"^(?:\(UTC)*([\+\-])?(\d{2})(?:\:?(\d{2}))?"
734 )
736 @classmethod
737 def parse(cls, tzinfo_string: str) -> dt_tzinfo:
738 tzinfo: Optional[dt_tzinfo] = None
740 if tzinfo_string == "local":
741 tzinfo = tz.tzlocal()
743 elif tzinfo_string in ["utc", "UTC", "Z"]:
744 tzinfo = tz.tzutc()
746 else:
747 iso_match = cls._TZINFO_RE.match(tzinfo_string)
749 if iso_match:
750 sign: Optional[str]
751 hours: str
752 minutes: Union[str, int, None]
753 sign, hours, minutes = iso_match.groups()
754 seconds = int(hours) * 3600 + int(minutes or 0) * 60
756 if sign == "-":
757 seconds *= -1
759 tzinfo = tz.tzoffset(None, seconds)
761 else:
762 tzinfo = tz.gettz(tzinfo_string)
764 if tzinfo is None:
765 raise ParserError(f"Could not parse timezone expression {tzinfo_string!r}.")
767 return tzinfo