Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/arrow/parser.py: 87%

1"""Provides the :class:`Arrow <arrow.parser.DateTimeParser>` class, a better way to parse datetime strings."""

3import re

4import sys

5from datetime import datetime, timedelta

6from datetime import tzinfo as dt_tzinfo

7from functools import lru_cache

8from typing import (

9 Any,

10 ClassVar,

11 Dict,

12 Iterable,

13 List,

14 Match,

15 Optional,

16 Pattern,

17 SupportsFloat,

18 SupportsInt,

19 Tuple,

20 Union,

21 cast,

22 overload,

23)

25from dateutil import tz

27from arrow import locales

28from arrow.constants import DEFAULT_LOCALE

29from arrow.util import next_weekday, normalize_timestamp

31if sys.version_info < (3, 8): # pragma: no cover

32 from typing_extensions import Literal, TypedDict

33else:

34 from typing import Literal, TypedDict # pragma: no cover

37class ParserError(ValueError):

38 pass

41# Allows for ParserErrors to be propagated from _build_datetime()

42# when day_of_year errors occur.

43# Before this, the ParserErrors were caught by the try/except in

44# _parse_multiformat() and the appropriate error message was not

45# transmitted to the user.

46class ParserMatchError(ParserError):

47 pass

50_WEEKDATE_ELEMENT = Union[str, bytes, SupportsInt, bytearray]

52_FORMAT_TYPE = Literal[

53 "YYYY",

54 "YY",

55 "MM",

56 "M",

57 "DDDD",

58 "DDD",

59 "DD",

60 "D",

61 "HH",

62 "H",

63 "hh",

64 "h",

65 "mm",

66 "m",

67 "ss",

68 "s",

69 "X",

70 "x",

71 "ZZZ",

72 "ZZ",

73 "Z",

74 "S",

75 "W",

76 "MMMM",

77 "MMM",

78 "Do",

79 "dddd",

80 "ddd",

81 "d",

82 "a",

83 "A",

84]

87class _Parts(TypedDict, total=False):

88 year: int

89 month: int

90 day_of_year: int

91 day: int

92 hour: int

93 minute: int

94 second: int

95 microsecond: int

96 timestamp: float

97 expanded_timestamp: int

98 tzinfo: dt_tzinfo

99 am_pm: Literal["am", "pm"]

100 day_of_week: int

101 weekdate: Tuple[_WEEKDATE_ELEMENT, _WEEKDATE_ELEMENT, Optional[_WEEKDATE_ELEMENT]]

102

103

104class DateTimeParser:

105 _FORMAT_RE: ClassVar[Pattern[str]] = re.compile(

106 r"(YYY?Y?|MM?M?M?|Do|DD?D?D?|d?d?d?d|HH?|hh?|mm?|ss?|S+|ZZ?Z?|a|A|x|X|W)"

107 )

108 _ESCAPE_RE: ClassVar[Pattern[str]] = re.compile(r"\[[^\[\]]*\]")

109

110 _ONE_OR_TWO_DIGIT_RE: ClassVar[Pattern[str]] = re.compile(r"\d{1,2}")

111 _ONE_OR_TWO_OR_THREE_DIGIT_RE: ClassVar[Pattern[str]] = re.compile(r"\d{1,3}")

112 _ONE_OR_MORE_DIGIT_RE: ClassVar[Pattern[str]] = re.compile(r"\d+")

113 _TWO_DIGIT_RE: ClassVar[Pattern[str]] = re.compile(r"\d{2}")

114 _THREE_DIGIT_RE: ClassVar[Pattern[str]] = re.compile(r"\d{3}")

115 _FOUR_DIGIT_RE: ClassVar[Pattern[str]] = re.compile(r"\d{4}")

116 _TZ_Z_RE: ClassVar[Pattern[str]] = re.compile(r"([\+\-])(\d{2})(?:(\d{2}))?|Z")

117 _TZ_ZZ_RE: ClassVar[Pattern[str]] = re.compile(r"([\+\-])(\d{2})(?:\:(\d{2}))?|Z")

118 _TZ_NAME_RE: ClassVar[Pattern[str]] = re.compile(r"\w[\w+\-/]+")

119 # NOTE: timestamps cannot be parsed from natural language strings (by removing the ^...$) because it will

120 # break cases like "15 Jul 2000" and a format list (see issue #447)

121 _TIMESTAMP_RE: ClassVar[Pattern[str]] = re.compile(r"^\-?\d+\.?\d+$")

122 _TIMESTAMP_EXPANDED_RE: ClassVar[Pattern[str]] = re.compile(r"^\-?\d+$")

123 _TIME_RE: ClassVar[Pattern[str]] = re.compile(

124 r"^(\d{2})(?:\:?(\d{2}))?(?:\:?(\d{2}))?(?:([\.\,])(\d+))?$"

125 )

126 _WEEK_DATE_RE: ClassVar[Pattern[str]] = re.compile(

127 r"(?P<year>\d{4})[\-]?W(?P<week>\d{2})[\-]?(?P<day>\d)?"

128 )

129

130 _BASE_INPUT_RE_MAP: ClassVar[Dict[_FORMAT_TYPE, Pattern[str]]] = {

131 "YYYY": _FOUR_DIGIT_RE,

132 "YY": _TWO_DIGIT_RE,

133 "MM": _TWO_DIGIT_RE,

134 "M": _ONE_OR_TWO_DIGIT_RE,

135 "DDDD": _THREE_DIGIT_RE,

136 "DDD": _ONE_OR_TWO_OR_THREE_DIGIT_RE,

137 "DD": _TWO_DIGIT_RE,

138 "D": _ONE_OR_TWO_DIGIT_RE,

139 "HH": _TWO_DIGIT_RE,

140 "H": _ONE_OR_TWO_DIGIT_RE,

141 "hh": _TWO_DIGIT_RE,

142 "h": _ONE_OR_TWO_DIGIT_RE,

143 "mm": _TWO_DIGIT_RE,

144 "m": _ONE_OR_TWO_DIGIT_RE,

145 "ss": _TWO_DIGIT_RE,

146 "s": _ONE_OR_TWO_DIGIT_RE,

147 "X": _TIMESTAMP_RE,

148 "x": _TIMESTAMP_EXPANDED_RE,

149 "ZZZ": _TZ_NAME_RE,

150 "ZZ": _TZ_ZZ_RE,

151 "Z": _TZ_Z_RE,

152 "S": _ONE_OR_MORE_DIGIT_RE,

153 "W": _WEEK_DATE_RE,

154 }

155

156 SEPARATORS: ClassVar[List[str]] = ["-", "/", "."]

157

158 locale: locales.Locale

159 _input_re_map: Dict[_FORMAT_TYPE, Pattern[str]]

160

161 def __init__(self, locale: str = DEFAULT_LOCALE, cache_size: int = 0) -> None:

162

163 self.locale = locales.get_locale(locale)

164 self._input_re_map = self._BASE_INPUT_RE_MAP.copy()

165 self._input_re_map.update(

166 {

167 "MMMM": self._generate_choice_re(

168 self.locale.month_names[1:], re.IGNORECASE

169 ),

170 "MMM": self._generate_choice_re(

171 self.locale.month_abbreviations[1:], re.IGNORECASE

172 ),

173 "Do": re.compile(self.locale.ordinal_day_re),

174 "dddd": self._generate_choice_re(

175 self.locale.day_names[1:], re.IGNORECASE

176 ),

177 "ddd": self._generate_choice_re(

178 self.locale.day_abbreviations[1:], re.IGNORECASE

179 ),

180 "d": re.compile(r"[1-7]"),

181 "a": self._generate_choice_re(

182 (self.locale.meridians["am"], self.locale.meridians["pm"])

183 ),

184 # note: 'A' token accepts both 'am/pm' and 'AM/PM' formats to

185 # ensure backwards compatibility of this token

186 "A": self._generate_choice_re(self.locale.meridians.values()),

187 }

188 )

189 if cache_size > 0:

190 self._generate_pattern_re = lru_cache(maxsize=cache_size)( # type: ignore[assignment]

191 self._generate_pattern_re

192 )

193

194 # TODO: since we support more than ISO 8601, we should rename this function

195 # IDEA: break into multiple functions

196 def parse_iso(

197 self, datetime_string: str, normalize_whitespace: bool = False

198 ) -> datetime:

199

200 if normalize_whitespace:

201 datetime_string = re.sub(r"\s+", " ", datetime_string.strip())

202

203 has_space_divider = " " in datetime_string

204 has_t_divider = "T" in datetime_string

205

206 num_spaces = datetime_string.count(" ")

207 if has_space_divider and num_spaces != 1 or has_t_divider and num_spaces > 0:

208 raise ParserError(

209 f"Expected an ISO 8601-like string, but was given {datetime_string!r}. "

210 "Try passing in a format string to resolve this."

211 )

212

213 has_time = has_space_divider or has_t_divider

214 has_tz = False

215

216 # date formats (ISO 8601 and others) to test against

217 # NOTE: YYYYMM is omitted to avoid confusion with YYMMDD (no longer part of ISO 8601, but is still often used)

218 formats = [

219 "YYYY-MM-DD",

220 "YYYY-M-DD",

221 "YYYY-M-D",

222 "YYYY/MM/DD",

223 "YYYY/M/DD",

224 "YYYY/M/D",

225 "YYYY.MM.DD",

226 "YYYY.M.DD",

227 "YYYY.M.D",

228 "YYYYMMDD",

229 "YYYY-DDDD",

230 "YYYYDDDD",

231 "YYYY-MM",

232 "YYYY/MM",

233 "YYYY.MM",

234 "YYYY",

235 "W",

236 ]

237

238 if has_time:

239

240 if has_space_divider:

241 date_string, time_string = datetime_string.split(" ", 1)

242 else:

243 date_string, time_string = datetime_string.split("T", 1)

244

245 time_parts = re.split(r"[\+\-Z]", time_string, 1, re.IGNORECASE)

246

247 time_components: Optional[Match[str]] = self._TIME_RE.match(time_parts[0])

248

249 if time_components is None:

250 raise ParserError(

251 "Invalid time component provided. "

252 "Please specify a format or provide a valid time component in the basic or extended ISO 8601 time format."

253 )

254

255 (

256 hours,

257 minutes,

258 seconds,

259 subseconds_sep,

260 subseconds,

261 ) = time_components.groups()

262

263 has_tz = len(time_parts) == 2

264 has_minutes = minutes is not None

265 has_seconds = seconds is not None

266 has_subseconds = subseconds is not None

267

268 is_basic_time_format = ":" not in time_parts[0]

269 tz_format = "Z"

270

271 # use 'ZZ' token instead since tz offset is present in non-basic format

272 if has_tz and ":" in time_parts[1]:

273 tz_format = "ZZ"

274

275 time_sep = "" if is_basic_time_format else ":"

276

277 if has_subseconds:

278 time_string = "HH{time_sep}mm{time_sep}ss{subseconds_sep}S".format(

279 time_sep=time_sep, subseconds_sep=subseconds_sep

280 )

281 elif has_seconds:

282 time_string = "HH{time_sep}mm{time_sep}ss".format(time_sep=time_sep)

283 elif has_minutes:

284 time_string = f"HH{time_sep}mm"

285 else:

286 time_string = "HH"

287

288 if has_space_divider:

289 formats = [f"{f} {time_string}" for f in formats]

290 else:

291 formats = [f"{f}T{time_string}" for f in formats]

292

293 if has_time and has_tz:

294 # Add "Z" or "ZZ" to the format strings to indicate to

295 # _parse_token() that a timezone needs to be parsed

296 formats = [f"{f}{tz_format}" for f in formats]

297

298 return self._parse_multiformat(datetime_string, formats)

299

300 def parse(

301 self,

302 datetime_string: str,

303 fmt: Union[List[str], str],

304 normalize_whitespace: bool = False,

305 ) -> datetime:

306

307 if normalize_whitespace:

308 datetime_string = re.sub(r"\s+", " ", datetime_string)

309

310 if isinstance(fmt, list):

311 return self._parse_multiformat(datetime_string, fmt)

312

313 try:

314 fmt_tokens: List[_FORMAT_TYPE]

315 fmt_pattern_re: Pattern[str]

316 fmt_tokens, fmt_pattern_re = self._generate_pattern_re(fmt)

317 except re.error as e:

318 raise ParserMatchError(

319 f"Failed to generate regular expression pattern: {e}."

320 )

321

322 match = fmt_pattern_re.search(datetime_string)

323

324 if match is None:

325 raise ParserMatchError(

326 f"Failed to match {fmt!r} when parsing {datetime_string!r}."

327 )

328

329 parts: _Parts = {}

330 for token in fmt_tokens:

331 value: Union[Tuple[str, str, str], str]

332 if token == "Do":

333 value = match.group("value")

334 elif token == "W":

335 value = (match.group("year"), match.group("week"), match.group("day"))

336 else:

337 value = match.group(token)

338

339 if value is None:

340 raise ParserMatchError(

341 f"Unable to find a match group for the specified token {token!r}."

342 )

343

344 self._parse_token(token, value, parts) # type: ignore[arg-type]

345

346 return self._build_datetime(parts)

347

348 def _generate_pattern_re(self, fmt: str) -> Tuple[List[_FORMAT_TYPE], Pattern[str]]:

349

350 # fmt is a string of tokens like 'YYYY-MM-DD'

351 # we construct a new string by replacing each

352 # token by its pattern:

353 # 'YYYY-MM-DD' -> '(?P<YYYY>\d{4})-(?P<MM>\d{2})-(?P<DD>\d{2})'

354 tokens: List[_FORMAT_TYPE] = []

355 offset = 0

356

357 # Escape all special RegEx chars

358 escaped_fmt = re.escape(fmt)

359

360 # Extract the bracketed expressions to be reinserted later.

361 escaped_fmt = re.sub(self._ESCAPE_RE, "#", escaped_fmt)

362

363 # Any number of S is the same as one.

364 # TODO: allow users to specify the number of digits to parse

365 escaped_fmt = re.sub(r"S+", "S", escaped_fmt)

366

367 escaped_data = re.findall(self._ESCAPE_RE, fmt)

368

369 fmt_pattern = escaped_fmt

370

371 for m in self._FORMAT_RE.finditer(escaped_fmt):

372 token: _FORMAT_TYPE = cast(_FORMAT_TYPE, m.group(0))

373 try:

374 input_re = self._input_re_map[token]

375 except KeyError:

376 raise ParserError(f"Unrecognized token {token!r}.")

377 input_pattern = f"(?P<{token}>{input_re.pattern})"

378 tokens.append(token)

379 # a pattern doesn't have the same length as the token

380 # it replaces! We keep the difference in the offset variable.

381 # This works because the string is scanned left-to-right and matches

382 # are returned in the order found by finditer.

383 fmt_pattern = (

384 fmt_pattern[: m.start() + offset]

385 + input_pattern

386 + fmt_pattern[m.end() + offset :]

387 )

388 offset += len(input_pattern) - (m.end() - m.start())

389

390 final_fmt_pattern = ""

391 split_fmt = fmt_pattern.split(r"\#")

392

393 # Due to the way Python splits, 'split_fmt' will always be longer

394 for i in range(len(split_fmt)):

395 final_fmt_pattern += split_fmt[i]

396 if i < len(escaped_data):

397 final_fmt_pattern += escaped_data[i][1:-1]

398

399 # Wrap final_fmt_pattern in a custom word boundary to strictly

400 # match the formatting pattern and filter out date and time formats

401 # that include junk such as: blah1998-09-12 blah, blah 1998-09-12blah,

402 # blah1998-09-12blah. The custom word boundary matches every character

403 # that is not a whitespace character to allow for searching for a date

404 # and time string in a natural language sentence. Therefore, searching

405 # for a string of the form YYYY-MM-DD in "blah 1998-09-12 blah" will

406 # work properly.

407 # Certain punctuation before or after the target pattern such as

408 # "1998-09-12," is permitted. For the full list of valid punctuation,

409 # see the documentation.

410

411 starting_word_boundary = (

412 r"(?<!\S\S)" # Don't have two consecutive non-whitespace characters. This ensures that we allow cases

413 # like .11.25.2019 but not 1.11.25.2019 (for pattern MM.DD.YYYY)

414 r"(?<![^\,\.\;\:\?\!\"\'\`\[\]\{\}<>\s])" # This is the list of punctuation that is ok before the

415 # pattern (i.e. "It can't not be these characters before the pattern")

416 r"(\b|^)"

417 # The \b is to block cases like 1201912 but allow 201912 for pattern YYYYMM. The ^ was necessary to allow a

418 # negative number through i.e. before epoch numbers

419 )

420 ending_word_boundary = (

421 r"(?=[\,\.\;\:\?\!\"\'\`\[\]\{\}\<\>]?" # Positive lookahead stating that these punctuation marks

422 # can appear after the pattern at most 1 time

423 r"(?!\S))" # Don't allow any non-whitespace character after the punctuation

424 )

425 bounded_fmt_pattern = r"{}{}{}".format(

426 starting_word_boundary, final_fmt_pattern, ending_word_boundary

427 )

428

429 return tokens, re.compile(bounded_fmt_pattern, flags=re.IGNORECASE)

430

431 @overload

432 def _parse_token(

433 self,

434 token: Literal[

435 "YYYY",

436 "YY",

437 "MM",

438 "M",

439 "DDDD",

440 "DDD",

441 "DD",

442 "D",

443 "Do",

444 "HH",

445 "hh",

446 "h",

447 "H",

448 "mm",

449 "m",

450 "ss",

451 "s",

452 "x",

453 ],

454 value: Union[str, bytes, SupportsInt, bytearray],

455 parts: _Parts,

456 ) -> None:

457 ... # pragma: no cover

458

459 @overload

460 def _parse_token(

461 self,

462 token: Literal["X"],

463 value: Union[str, bytes, SupportsFloat, bytearray],

464 parts: _Parts,

465 ) -> None:

466 ... # pragma: no cover

467

468 @overload

469 def _parse_token(

470 self,

471 token: Literal["MMMM", "MMM", "dddd", "ddd", "S"],

472 value: Union[str, bytes, bytearray],

473 parts: _Parts,

474 ) -> None:

475 ... # pragma: no cover

476

477 @overload

478 def _parse_token(

479 self,

480 token: Literal["a", "A", "ZZZ", "ZZ", "Z"],

481 value: Union[str, bytes],

482 parts: _Parts,

483 ) -> None:

484 ... # pragma: no cover

485

486 @overload

487 def _parse_token(

488 self,

489 token: Literal["W"],

490 value: Tuple[_WEEKDATE_ELEMENT, _WEEKDATE_ELEMENT, Optional[_WEEKDATE_ELEMENT]],

491 parts: _Parts,

492 ) -> None:

493 ... # pragma: no cover

494

495 def _parse_token(

496 self,

497 token: Any,

498 value: Any,

499 parts: _Parts,

500 ) -> None:

501

502 if token == "YYYY":

503 parts["year"] = int(value)

504

505 elif token == "YY":

506 value = int(value)

507 parts["year"] = 1900 + value if value > 68 else 2000 + value

508

509 elif token in ["MMMM", "MMM"]:

510 # FIXME: month_number() is nullable

511 parts["month"] = self.locale.month_number(value.lower()) # type: ignore[typeddict-item]

512

513 elif token in ["MM", "M"]:

514 parts["month"] = int(value)

515

516 elif token in ["DDDD", "DDD"]:

517 parts["day_of_year"] = int(value)

518

519 elif token in ["DD", "D"]:

520 parts["day"] = int(value)

521

522 elif token == "Do":

523 parts["day"] = int(value)

524

525 elif token == "dddd":

526 # locale day names are 1-indexed

527 day_of_week = [x.lower() for x in self.locale.day_names].index(

528 value.lower()

529 )

530 parts["day_of_week"] = day_of_week - 1

531

532 elif token == "ddd":

533 # locale day abbreviations are 1-indexed

534 day_of_week = [x.lower() for x in self.locale.day_abbreviations].index(

535 value.lower()

536 )

537 parts["day_of_week"] = day_of_week - 1

538

539 elif token.upper() in ["HH", "H"]:

540 parts["hour"] = int(value)

541

542 elif token in ["mm", "m"]:

543 parts["minute"] = int(value)

544

545 elif token in ["ss", "s"]:

546 parts["second"] = int(value)

547

548 elif token == "S":

549 # We have the *most significant* digits of an arbitrary-precision integer.

550 # We want the six most significant digits as an integer, rounded.

551 # IDEA: add nanosecond support somehow? Need datetime support for it first.

552 value = value.ljust(7, "0")

553

554 # floating-point (IEEE-754) defaults to half-to-even rounding

555 seventh_digit = int(value[6])

556 if seventh_digit == 5:

557 rounding = int(value[5]) % 2

558 elif seventh_digit > 5:

559 rounding = 1

560 else:

561 rounding = 0

562

563 parts["microsecond"] = int(value[:6]) + rounding

564

565 elif token == "X":

566 parts["timestamp"] = float(value)

567

568 elif token == "x":

569 parts["expanded_timestamp"] = int(value)

570

571 elif token in ["ZZZ", "ZZ", "Z"]:

572 parts["tzinfo"] = TzinfoParser.parse(value)

573

574 elif token in ["a", "A"]:

575 if value in (self.locale.meridians["am"], self.locale.meridians["AM"]):

576 parts["am_pm"] = "am"

577 if "hour" in parts and not 0 <= parts["hour"] <= 12:

578 raise ParserMatchError(

579 f"Hour token value must be between 0 and 12 inclusive for token {token!r}."

580 )

581 elif value in (self.locale.meridians["pm"], self.locale.meridians["PM"]):

582 parts["am_pm"] = "pm"

583 elif token == "W":

584 parts["weekdate"] = value

585

586 @staticmethod

587 def _build_datetime(parts: _Parts) -> datetime:

588 weekdate = parts.get("weekdate")

589

590 if weekdate is not None:

591

592 year, week = int(weekdate[0]), int(weekdate[1])

593

594 if weekdate[2] is not None:

595 _day = int(weekdate[2])

596 else:

597 # day not given, default to 1

598 _day = 1

599

600 date_string = f"{year}-{week}-{_day}"

601

602 # tokens for ISO 8601 weekdates

603 dt = datetime.strptime(date_string, "%G-%V-%u")

604

605 parts["year"] = dt.year

606 parts["month"] = dt.month

607 parts["day"] = dt.day

608

609 timestamp = parts.get("timestamp")

610

611 if timestamp is not None:

612 return datetime.fromtimestamp(timestamp, tz=tz.tzutc())

613

614 expanded_timestamp = parts.get("expanded_timestamp")

615

616 if expanded_timestamp is not None:

617 return datetime.fromtimestamp(

618 normalize_timestamp(expanded_timestamp),

619 tz=tz.tzutc(),

620 )

621

622 day_of_year = parts.get("day_of_year")

623

624 if day_of_year is not None:

625 _year = parts.get("year")

626 month = parts.get("month")

627 if _year is None:

628 raise ParserError(

629 "Year component is required with the DDD and DDDD tokens."

630 )

631

632 if month is not None:

633 raise ParserError(

634 "Month component is not allowed with the DDD and DDDD tokens."

635 )

636

637 date_string = f"{_year}-{day_of_year}"

638 try:

639 dt = datetime.strptime(date_string, "%Y-%j")

640 except ValueError:

641 raise ParserError(

642 f"The provided day of year {day_of_year!r} is invalid."

643 )

644

645 parts["year"] = dt.year

646 parts["month"] = dt.month

647 parts["day"] = dt.day

648

649 day_of_week: Optional[int] = parts.get("day_of_week")

650 day = parts.get("day")

651

652 # If day is passed, ignore day of week

653 if day_of_week is not None and day is None:

654 year = parts.get("year", 1970)

655 month = parts.get("month", 1)

656 day = 1

657

658 # dddd => first day of week after epoch

659 # dddd YYYY => first day of week in specified year

660 # dddd MM YYYY => first day of week in specified year and month

661 # dddd MM => first day after epoch in specified month

662 next_weekday_dt = next_weekday(datetime(year, month, day), day_of_week)

663 parts["year"] = next_weekday_dt.year

664 parts["month"] = next_weekday_dt.month

665 parts["day"] = next_weekday_dt.day

666

667 am_pm = parts.get("am_pm")

668 hour = parts.get("hour", 0)

669

670 if am_pm == "pm" and hour < 12:

671 hour += 12

672 elif am_pm == "am" and hour == 12:

673 hour = 0

674

675 # Support for midnight at the end of day

676 if hour == 24:

677 if parts.get("minute", 0) != 0:

678 raise ParserError("Midnight at the end of day must not contain minutes")

679 if parts.get("second", 0) != 0:

680 raise ParserError("Midnight at the end of day must not contain seconds")

681 if parts.get("microsecond", 0) != 0:

682 raise ParserError(

683 "Midnight at the end of day must not contain microseconds"

684 )

685 hour = 0

686 day_increment = 1

687 else:

688 day_increment = 0

689

690 # account for rounding up to 1000000

691 microsecond = parts.get("microsecond", 0)

692 if microsecond == 1000000:

693 microsecond = 0

694 second_increment = 1

695 else:

696 second_increment = 0

697

698 increment = timedelta(days=day_increment, seconds=second_increment)

699

700 return (

701 datetime(

702 year=parts.get("year", 1),

703 month=parts.get("month", 1),

704 day=parts.get("day", 1),

705 hour=hour,

706 minute=parts.get("minute", 0),

707 second=parts.get("second", 0),

708 microsecond=microsecond,

709 tzinfo=parts.get("tzinfo"),

710 )

711 + increment

712 )

713

714 def _parse_multiformat(self, string: str, formats: Iterable[str]) -> datetime:

715

716 _datetime: Optional[datetime] = None

717

718 for fmt in formats:

719 try:

720 _datetime = self.parse(string, fmt)

721 break

722 except ParserMatchError:

723 pass

724

725 if _datetime is None:

726 supported_formats = ", ".join(formats)

727 raise ParserError(

728 f"Could not match input {string!r} to any of the following formats: {supported_formats}."

729 )

730

731 return _datetime

732

733 # generates a capture group of choices separated by an OR operator

734 @staticmethod

735 def _generate_choice_re(

736 choices: Iterable[str], flags: Union[int, re.RegexFlag] = 0

737 ) -> Pattern[str]:

738 return re.compile(r"({})".format("|".join(choices)), flags=flags)

739

740

741class TzinfoParser:

742 _TZINFO_RE: ClassVar[Pattern[str]] = re.compile(

743 r"^(?:\(UTC)*([\+\-])?(\d{2})(?:\:?(\d{2}))?"

744 )

745

746 @classmethod

747 def parse(cls, tzinfo_string: str) -> dt_tzinfo:

748

749 tzinfo: Optional[dt_tzinfo] = None

750

751 if tzinfo_string == "local":

752 tzinfo = tz.tzlocal()

753

754 elif tzinfo_string in ["utc", "UTC", "Z"]:

755 tzinfo = tz.tzutc()

756

757 else:

758

759 iso_match = cls._TZINFO_RE.match(tzinfo_string)

760

761 if iso_match:

762 sign: Optional[str]

763 hours: str

764 minutes: Union[str, int, None]

765 sign, hours, minutes = iso_match.groups()

766 seconds = int(hours) * 3600 + int(minutes or 0) * 60

767

768 if sign == "-":

769 seconds *= -1

770

771 tzinfo = tz.tzoffset(None, seconds)

772

773 else:

774 tzinfo = tz.gettz(tzinfo_string)

775

776 if tzinfo is None:

777 raise ParserError(f"Could not parse timezone expression {tzinfo_string!r}.")

778

779 return tzinfo