Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/arrow/parser.py: 24%

1"""Provides the :class:`Arrow <arrow.parser.DateTimeParser>` class, a better way to parse datetime strings."""

3import re

4import sys

5from datetime import datetime, timedelta

6from datetime import tzinfo as dt_tzinfo

7from functools import lru_cache

8from typing import (

9 Any,

10 ClassVar,

11 Dict,

12 Iterable,

13 List,

14 Match,

15 Optional,

16 Pattern,

17 SupportsFloat,

18 SupportsInt,

19 Tuple,

20 Union,

21 cast,

22 overload,

23)

25from dateutil import tz

27from arrow import locales

28from arrow.constants import DEFAULT_LOCALE

29from arrow.util import next_weekday, normalize_timestamp

31if sys.version_info < (3, 8): # pragma: no cover

32 from typing_extensions import Literal, TypedDict

33else:

34 from typing import Literal, TypedDict # pragma: no cover

37class ParserError(ValueError):

38 pass

41# Allows for ParserErrors to be propagated from _build_datetime()

42# when day_of_year errors occur.

43# Before this, the ParserErrors were caught by the try/except in

44# _parse_multiformat() and the appropriate error message was not

45# transmitted to the user.

46class ParserMatchError(ParserError):

47 pass

50_WEEKDATE_ELEMENT = Union[str, bytes, SupportsInt, bytearray]

52_FORMAT_TYPE = Literal[

53 "YYYY",

54 "YY",

55 "MM",

56 "M",

57 "DDDD",

58 "DDD",

59 "DD",

60 "D",

61 "HH",

62 "H",

63 "hh",

64 "h",

65 "mm",

66 "m",

67 "ss",

68 "s",

69 "X",

70 "x",

71 "ZZZ",

72 "ZZ",

73 "Z",

74 "S",

75 "W",

76 "MMMM",

77 "MMM",

78 "Do",

79 "dddd",

80 "ddd",

81 "d",

82 "a",

83 "A",

84]

87class _Parts(TypedDict, total=False):

88 year: int

89 month: int

90 day_of_year: int

91 day: int

92 hour: int

93 minute: int

94 second: int

95 microsecond: int

96 timestamp: float

97 expanded_timestamp: int

98 tzinfo: dt_tzinfo

99 am_pm: Literal["am", "pm"]

100 day_of_week: int

101 weekdate: Tuple[_WEEKDATE_ELEMENT, _WEEKDATE_ELEMENT, Optional[_WEEKDATE_ELEMENT]]

102

103

104class DateTimeParser:

105 _FORMAT_RE: ClassVar[Pattern[str]] = re.compile(

106 r"(YYY?Y?|MM?M?M?|Do|DD?D?D?|d?d?d?d|HH?|hh?|mm?|ss?|S+|ZZ?Z?|a|A|x|X|W)"

107 )

108 _ESCAPE_RE: ClassVar[Pattern[str]] = re.compile(r"\[[^\[\]]*\]")

109

110 _ONE_OR_TWO_DIGIT_RE: ClassVar[Pattern[str]] = re.compile(r"\d{1,2}")

111 _ONE_OR_TWO_OR_THREE_DIGIT_RE: ClassVar[Pattern[str]] = re.compile(r"\d{1,3}")

112 _ONE_OR_MORE_DIGIT_RE: ClassVar[Pattern[str]] = re.compile(r"\d+")

113 _TWO_DIGIT_RE: ClassVar[Pattern[str]] = re.compile(r"\d{2}")

114 _THREE_DIGIT_RE: ClassVar[Pattern[str]] = re.compile(r"\d{3}")

115 _FOUR_DIGIT_RE: ClassVar[Pattern[str]] = re.compile(r"\d{4}")

116 _TZ_Z_RE: ClassVar[Pattern[str]] = re.compile(r"([\+\-])(\d{2})(?:(\d{2}))?|Z")

117 _TZ_ZZ_RE: ClassVar[Pattern[str]] = re.compile(r"([\+\-])(\d{2})(?:\:(\d{2}))?|Z")

118 _TZ_NAME_RE: ClassVar[Pattern[str]] = re.compile(r"\w[\w+\-/]+")

119 # NOTE: timestamps cannot be parsed from natural language strings (by removing the ^...$) because it will

120 # break cases like "15 Jul 2000" and a format list (see issue #447)

121 _TIMESTAMP_RE: ClassVar[Pattern[str]] = re.compile(r"^\-?\d+\.?\d+$")

122 _TIMESTAMP_EXPANDED_RE: ClassVar[Pattern[str]] = re.compile(r"^\-?\d+$")

123 _TIME_RE: ClassVar[Pattern[str]] = re.compile(

124 r"^(\d{2})(?:\:?(\d{2}))?(?:\:?(\d{2}))?(?:([\.\,])(\d+))?$"

125 )

126 _WEEK_DATE_RE: ClassVar[Pattern[str]] = re.compile(

127 r"(?P<year>\d{4})[\-]?W(?P<week>\d{2})[\-]?(?P<day>\d)?"

128 )

129

130 _BASE_INPUT_RE_MAP: ClassVar[Dict[_FORMAT_TYPE, Pattern[str]]] = {

131 "YYYY": _FOUR_DIGIT_RE,

132 "YY": _TWO_DIGIT_RE,

133 "MM": _TWO_DIGIT_RE,

134 "M": _ONE_OR_TWO_DIGIT_RE,

135 "DDDD": _THREE_DIGIT_RE,

136 "DDD": _ONE_OR_TWO_OR_THREE_DIGIT_RE,

137 "DD": _TWO_DIGIT_RE,

138 "D": _ONE_OR_TWO_DIGIT_RE,

139 "HH": _TWO_DIGIT_RE,

140 "H": _ONE_OR_TWO_DIGIT_RE,

141 "hh": _TWO_DIGIT_RE,

142 "h": _ONE_OR_TWO_DIGIT_RE,

143 "mm": _TWO_DIGIT_RE,

144 "m": _ONE_OR_TWO_DIGIT_RE,

145 "ss": _TWO_DIGIT_RE,

146 "s": _ONE_OR_TWO_DIGIT_RE,

147 "X": _TIMESTAMP_RE,

148 "x": _TIMESTAMP_EXPANDED_RE,

149 "ZZZ": _TZ_NAME_RE,

150 "ZZ": _TZ_ZZ_RE,

151 "Z": _TZ_Z_RE,

152 "S": _ONE_OR_MORE_DIGIT_RE,

153 "W": _WEEK_DATE_RE,

154 }

155

156 SEPARATORS: ClassVar[List[str]] = ["-", "/", "."]

157

158 locale: locales.Locale

159 _input_re_map: Dict[_FORMAT_TYPE, Pattern[str]]

160

161 def __init__(self, locale: str = DEFAULT_LOCALE, cache_size: int = 0) -> None:

162 self.locale = locales.get_locale(locale)

163 self._input_re_map = self._BASE_INPUT_RE_MAP.copy()

164 self._input_re_map.update(

165 {

166 "MMMM": self._generate_choice_re(

167 self.locale.month_names[1:], re.IGNORECASE

168 ),

169 "MMM": self._generate_choice_re(

170 self.locale.month_abbreviations[1:], re.IGNORECASE

171 ),

172 "Do": re.compile(self.locale.ordinal_day_re),

173 "dddd": self._generate_choice_re(

174 self.locale.day_names[1:], re.IGNORECASE

175 ),

176 "ddd": self._generate_choice_re(

177 self.locale.day_abbreviations[1:], re.IGNORECASE

178 ),

179 "d": re.compile(r"[1-7]"),

180 "a": self._generate_choice_re(

181 (self.locale.meridians["am"], self.locale.meridians["pm"])

182 ),

183 # note: 'A' token accepts both 'am/pm' and 'AM/PM' formats to

184 # ensure backwards compatibility of this token

185 "A": self._generate_choice_re(self.locale.meridians.values()),

186 }

187 )

188 if cache_size > 0:

189 self._generate_pattern_re = lru_cache(maxsize=cache_size)( # type: ignore

190 self._generate_pattern_re

191 )

192

193 # TODO: since we support more than ISO 8601, we should rename this function

194 # IDEA: break into multiple functions

195 def parse_iso(

196 self, datetime_string: str, normalize_whitespace: bool = False

197 ) -> datetime:

198 if normalize_whitespace:

199 datetime_string = re.sub(r"\s+", " ", datetime_string.strip())

200

201 has_space_divider = " " in datetime_string

202 has_t_divider = "T" in datetime_string

203

204 num_spaces = datetime_string.count(" ")

205 if has_space_divider and num_spaces != 1 or has_t_divider and num_spaces > 0:

206 raise ParserError(

207 f"Expected an ISO 8601-like string, but was given {datetime_string!r}. "

208 "Try passing in a format string to resolve this."

209 )

210

211 has_time = has_space_divider or has_t_divider

212 has_tz = False

213

214 # date formats (ISO 8601 and others) to test against

215 # NOTE: YYYYMM is omitted to avoid confusion with YYMMDD (no longer part of ISO 8601, but is still often used)

216 formats = [

217 "YYYY-MM-DD",

218 "YYYY-M-DD",

219 "YYYY-M-D",

220 "YYYY/MM/DD",

221 "YYYY/M/DD",

222 "YYYY/M/D",

223 "YYYY.MM.DD",

224 "YYYY.M.DD",

225 "YYYY.M.D",

226 "YYYYMMDD",

227 "YYYY-DDDD",

228 "YYYYDDDD",

229 "YYYY-MM",

230 "YYYY/MM",

231 "YYYY.MM",

232 "YYYY",

233 "W",

234 ]

235

236 if has_time:

237 if has_space_divider:

238 date_string, time_string = datetime_string.split(" ", 1)

239 else:

240 date_string, time_string = datetime_string.split("T", 1)

241

242 time_parts = re.split(

243 r"[\+\-Z]", time_string, maxsplit=1, flags=re.IGNORECASE

244 )

245

246 time_components: Optional[Match[str]] = self._TIME_RE.match(time_parts[0])

247

248 if time_components is None:

249 raise ParserError(

250 "Invalid time component provided. "

251 "Please specify a format or provide a valid time component in the basic or extended ISO 8601 time format."

252 )

253

254 (

255 hours,

256 minutes,

257 seconds,

258 subseconds_sep,

259 subseconds,

260 ) = time_components.groups()

261

262 has_tz = len(time_parts) == 2

263 has_minutes = minutes is not None

264 has_seconds = seconds is not None

265 has_subseconds = subseconds is not None

266

267 is_basic_time_format = ":" not in time_parts[0]

268 tz_format = "Z"

269

270 # use 'ZZ' token instead since tz offset is present in non-basic format

271 if has_tz and ":" in time_parts[1]:

272 tz_format = "ZZ"

273

274 time_sep = "" if is_basic_time_format else ":"

275

276 if has_subseconds:

277 time_string = "HH{time_sep}mm{time_sep}ss{subseconds_sep}S".format(

278 time_sep=time_sep, subseconds_sep=subseconds_sep

279 )

280 elif has_seconds:

281 time_string = "HH{time_sep}mm{time_sep}ss".format(time_sep=time_sep)

282 elif has_minutes:

283 time_string = f"HH{time_sep}mm"

284 else:

285 time_string = "HH"

286

287 if has_space_divider:

288 formats = [f"{f} {time_string}" for f in formats]

289 else:

290 formats = [f"{f}T{time_string}" for f in formats]

291

292 if has_time and has_tz:

293 # Add "Z" or "ZZ" to the format strings to indicate to

294 # _parse_token() that a timezone needs to be parsed

295 formats = [f"{f}{tz_format}" for f in formats]

296

297 return self._parse_multiformat(datetime_string, formats)

298

299 def parse(

300 self,

301 datetime_string: str,

302 fmt: Union[List[str], str],

303 normalize_whitespace: bool = False,

304 ) -> datetime:

305 if normalize_whitespace:

306 datetime_string = re.sub(r"\s+", " ", datetime_string)

307

308 if isinstance(fmt, list):

309 return self._parse_multiformat(datetime_string, fmt)

310

311 try:

312 fmt_tokens: List[_FORMAT_TYPE]

313 fmt_pattern_re: Pattern[str]

314 fmt_tokens, fmt_pattern_re = self._generate_pattern_re(fmt)

315 except re.error as e:

316 raise ParserMatchError(

317 f"Failed to generate regular expression pattern: {e}."

318 )

319

320 match = fmt_pattern_re.search(datetime_string)

321

322 if match is None:

323 raise ParserMatchError(

324 f"Failed to match {fmt!r} when parsing {datetime_string!r}."

325 )

326

327 parts: _Parts = {}

328 for token in fmt_tokens:

329 value: Union[Tuple[str, str, str], str]

330 if token == "Do":

331 value = match.group("value")

332 elif token == "W":

333 value = (match.group("year"), match.group("week"), match.group("day"))

334 else:

335 value = match.group(token)

336

337 if value is None:

338 raise ParserMatchError(

339 f"Unable to find a match group for the specified token {token!r}."

340 )

341

342 self._parse_token(token, value, parts) # type: ignore[arg-type]

343

344 return self._build_datetime(parts)

345

346 def _generate_pattern_re(self, fmt: str) -> Tuple[List[_FORMAT_TYPE], Pattern[str]]:

347 # fmt is a string of tokens like 'YYYY-MM-DD'

348 # we construct a new string by replacing each

349 # token by its pattern:

350 # 'YYYY-MM-DD' -> '(?P<YYYY>\d{4})-(?P<MM>\d{2})-(?P<DD>\d{2})'

351 tokens: List[_FORMAT_TYPE] = []

352 offset = 0

353

354 # Escape all special RegEx chars

355 escaped_fmt = re.escape(fmt)

356

357 # Extract the bracketed expressions to be reinserted later.

358 escaped_fmt = re.sub(self._ESCAPE_RE, "#", escaped_fmt)

359

360 # Any number of S is the same as one.

361 # TODO: allow users to specify the number of digits to parse

362 escaped_fmt = re.sub(r"S+", "S", escaped_fmt)

363

364 escaped_data = re.findall(self._ESCAPE_RE, fmt)

365

366 fmt_pattern = escaped_fmt

367

368 for m in self._FORMAT_RE.finditer(escaped_fmt):

369 token: _FORMAT_TYPE = cast(_FORMAT_TYPE, m.group(0))

370 try:

371 input_re = self._input_re_map[token]

372 except KeyError:

373 raise ParserError(f"Unrecognized token {token!r}.")

374 input_pattern = f"(?P<{token}>{input_re.pattern})"

375 tokens.append(token)

376 # a pattern doesn't have the same length as the token

377 # it replaces! We keep the difference in the offset variable.

378 # This works because the string is scanned left-to-right and matches

379 # are returned in the order found by finditer.

380 fmt_pattern = (

381 fmt_pattern[: m.start() + offset]

382 + input_pattern

383 + fmt_pattern[m.end() + offset :]

384 )

385 offset += len(input_pattern) - (m.end() - m.start())

386

387 final_fmt_pattern = ""

388 split_fmt = fmt_pattern.split(r"\#")

389

390 # Due to the way Python splits, 'split_fmt' will always be longer

391 for i in range(len(split_fmt)):

392 final_fmt_pattern += split_fmt[i]

393 if i < len(escaped_data):

394 final_fmt_pattern += escaped_data[i][1:-1]

395

396 # Wrap final_fmt_pattern in a custom word boundary to strictly

397 # match the formatting pattern and filter out date and time formats

398 # that include junk such as: blah1998-09-12 blah, blah 1998-09-12blah,

399 # blah1998-09-12blah. The custom word boundary matches every character

400 # that is not a whitespace character to allow for searching for a date

401 # and time string in a natural language sentence. Therefore, searching

402 # for a string of the form YYYY-MM-DD in "blah 1998-09-12 blah" will

403 # work properly.

404 # Certain punctuation before or after the target pattern such as

405 # "1998-09-12," is permitted. For the full list of valid punctuation,

406 # see the documentation.

407

408 starting_word_boundary = (

409 r"(?<!\S\S)" # Don't have two consecutive non-whitespace characters. This ensures that we allow cases

410 # like .11.25.2019 but not 1.11.25.2019 (for pattern MM.DD.YYYY)

411 r"(?<![^\,\.\;\:\?\!\"\'\`\[\]\{\}<>\s])" # This is the list of punctuation that is ok before the

412 # pattern (i.e. "It can't not be these characters before the pattern")

413 r"(\b|^)"

414 # The \b is to block cases like 1201912 but allow 201912 for pattern YYYYMM. The ^ was necessary to allow a

415 # negative number through i.e. before epoch numbers

416 )

417 ending_word_boundary = (

418 r"(?=[\,\.\;\:\?\!\"\'\`\[\]\{\}\<\>]?" # Positive lookahead stating that these punctuation marks

419 # can appear after the pattern at most 1 time

420 r"(?!\S))" # Don't allow any non-whitespace character after the punctuation

421 )

422 bounded_fmt_pattern = r"{}{}{}".format(

423 starting_word_boundary, final_fmt_pattern, ending_word_boundary

424 )

425

426 return tokens, re.compile(bounded_fmt_pattern, flags=re.IGNORECASE)

427

428 @overload

429 def _parse_token(

430 self,

431 token: Literal[

432 "YYYY",

433 "YY",

434 "MM",

435 "M",

436 "DDDD",

437 "DDD",

438 "DD",

439 "D",

440 "Do",

441 "HH",

442 "hh",

443 "h",

444 "H",

445 "mm",

446 "m",

447 "ss",

448 "s",

449 "x",

450 ],

451 value: Union[str, bytes, SupportsInt, bytearray],

452 parts: _Parts,

453 ) -> None:

454 ... # pragma: no cover

455

456 @overload

457 def _parse_token(

458 self,

459 token: Literal["X"],

460 value: Union[str, bytes, SupportsFloat, bytearray],

461 parts: _Parts,

462 ) -> None:

463 ... # pragma: no cover

464

465 @overload

466 def _parse_token(

467 self,

468 token: Literal["MMMM", "MMM", "dddd", "ddd", "S"],

469 value: Union[str, bytes, bytearray],

470 parts: _Parts,

471 ) -> None:

472 ... # pragma: no cover

473

474 @overload

475 def _parse_token(

476 self,

477 token: Literal["a", "A", "ZZZ", "ZZ", "Z"],

478 value: Union[str, bytes],

479 parts: _Parts,

480 ) -> None:

481 ... # pragma: no cover

482

483 @overload

484 def _parse_token(

485 self,

486 token: Literal["W"],

487 value: Tuple[_WEEKDATE_ELEMENT, _WEEKDATE_ELEMENT, Optional[_WEEKDATE_ELEMENT]],

488 parts: _Parts,

489 ) -> None:

490 ... # pragma: no cover

491

492 def _parse_token(

493 self,

494 token: Any,

495 value: Any,

496 parts: _Parts,

497 ) -> None:

498 if token == "YYYY":

499 parts["year"] = int(value)

500

501 elif token == "YY":

502 value = int(value)

503 parts["year"] = 1900 + value if value > 68 else 2000 + value

504

505 elif token in ["MMMM", "MMM"]:

506 # FIXME: month_number() is nullable

507 parts["month"] = self.locale.month_number(value.lower()) # type: ignore[typeddict-item]

508

509 elif token in ["MM", "M"]:

510 parts["month"] = int(value)

511

512 elif token in ["DDDD", "DDD"]:

513 parts["day_of_year"] = int(value)

514

515 elif token in ["DD", "D"]:

516 parts["day"] = int(value)

517

518 elif token == "Do":

519 parts["day"] = int(value)

520

521 elif token == "dddd":

522 # locale day names are 1-indexed

523 day_of_week = [x.lower() for x in self.locale.day_names].index(

524 value.lower()

525 )

526 parts["day_of_week"] = day_of_week - 1

527

528 elif token == "ddd":

529 # locale day abbreviations are 1-indexed

530 day_of_week = [x.lower() for x in self.locale.day_abbreviations].index(

531 value.lower()

532 )

533 parts["day_of_week"] = day_of_week - 1

534

535 elif token.upper() in ["HH", "H"]:

536 parts["hour"] = int(value)

537

538 elif token in ["mm", "m"]:

539 parts["minute"] = int(value)

540

541 elif token in ["ss", "s"]:

542 parts["second"] = int(value)

543

544 elif token == "S":

545 # We have the *most significant* digits of an arbitrary-precision integer.

546 # We want the six most significant digits as an integer, rounded.

547 # IDEA: add nanosecond support somehow? Need datetime support for it first.

548 value = value.ljust(7, "0")

549

550 # floating-point (IEEE-754) defaults to half-to-even rounding

551 seventh_digit = int(value[6])

552 if seventh_digit == 5:

553 rounding = int(value[5]) % 2

554 elif seventh_digit > 5:

555 rounding = 1

556 else:

557 rounding = 0

558

559 parts["microsecond"] = int(value[:6]) + rounding

560

561 elif token == "X":

562 parts["timestamp"] = float(value)

563

564 elif token == "x":

565 parts["expanded_timestamp"] = int(value)

566

567 elif token in ["ZZZ", "ZZ", "Z"]:

568 parts["tzinfo"] = TzinfoParser.parse(value)

569

570 elif token in ["a", "A"]:

571 if value in (self.locale.meridians["am"], self.locale.meridians["AM"]):

572 parts["am_pm"] = "am"

573 if "hour" in parts and not 0 <= parts["hour"] <= 12:

574 raise ParserMatchError(

575 f"Hour token value must be between 0 and 12 inclusive for token {token!r}."

576 )

577 elif value in (self.locale.meridians["pm"], self.locale.meridians["PM"]):

578 parts["am_pm"] = "pm"

579 elif token == "W":

580 parts["weekdate"] = value

581

582 @staticmethod

583 def _build_datetime(parts: _Parts) -> datetime:

584 weekdate = parts.get("weekdate")

585

586 if weekdate is not None:

587 year, week = int(weekdate[0]), int(weekdate[1])

588

589 if weekdate[2] is not None:

590 _day = int(weekdate[2])

591 else:

592 # day not given, default to 1

593 _day = 1

594

595 date_string = f"{year}-{week}-{_day}"

596

597 # tokens for ISO 8601 weekdates

598 dt = datetime.strptime(date_string, "%G-%V-%u")

599

600 parts["year"] = dt.year

601 parts["month"] = dt.month

602 parts["day"] = dt.day

603

604 timestamp = parts.get("timestamp")

605

606 if timestamp is not None:

607 return datetime.fromtimestamp(timestamp, tz=tz.tzutc())

608

609 expanded_timestamp = parts.get("expanded_timestamp")

610

611 if expanded_timestamp is not None:

612 return datetime.fromtimestamp(

613 normalize_timestamp(expanded_timestamp),

614 tz=tz.tzutc(),

615 )

616

617 day_of_year = parts.get("day_of_year")

618

619 if day_of_year is not None:

620 _year = parts.get("year")

621 month = parts.get("month")

622 if _year is None:

623 raise ParserError(

624 "Year component is required with the DDD and DDDD tokens."

625 )

626

627 if month is not None:

628 raise ParserError(

629 "Month component is not allowed with the DDD and DDDD tokens."

630 )

631

632 date_string = f"{_year}-{day_of_year}"

633 try:

634 dt = datetime.strptime(date_string, "%Y-%j")

635 except ValueError:

636 raise ParserError(

637 f"The provided day of year {day_of_year!r} is invalid."

638 )

639

640 parts["year"] = dt.year

641 parts["month"] = dt.month

642 parts["day"] = dt.day

643

644 day_of_week: Optional[int] = parts.get("day_of_week")

645 day = parts.get("day")

646

647 # If day is passed, ignore day of week

648 if day_of_week is not None and day is None:

649 year = parts.get("year", 1970)

650 month = parts.get("month", 1)

651 day = 1

652

653 # dddd => first day of week after epoch

654 # dddd YYYY => first day of week in specified year

655 # dddd MM YYYY => first day of week in specified year and month

656 # dddd MM => first day after epoch in specified month

657 next_weekday_dt = next_weekday(datetime(year, month, day), day_of_week)

658 parts["year"] = next_weekday_dt.year

659 parts["month"] = next_weekday_dt.month

660 parts["day"] = next_weekday_dt.day

661

662 am_pm = parts.get("am_pm")

663 hour = parts.get("hour", 0)

664

665 if am_pm == "pm" and hour < 12:

666 hour += 12

667 elif am_pm == "am" and hour == 12:

668 hour = 0

669

670 # Support for midnight at the end of day

671 if hour == 24:

672 if parts.get("minute", 0) != 0:

673 raise ParserError("Midnight at the end of day must not contain minutes")

674 if parts.get("second", 0) != 0:

675 raise ParserError("Midnight at the end of day must not contain seconds")

676 if parts.get("microsecond", 0) != 0:

677 raise ParserError(

678 "Midnight at the end of day must not contain microseconds"

679 )

680 hour = 0

681 day_increment = 1

682 else:

683 day_increment = 0

684

685 # account for rounding up to 1000000

686 microsecond = parts.get("microsecond", 0)

687 if microsecond == 1000000:

688 microsecond = 0

689 second_increment = 1

690 else:

691 second_increment = 0

692

693 increment = timedelta(days=day_increment, seconds=second_increment)

694

695 return (

696 datetime(

697 year=parts.get("year", 1),

698 month=parts.get("month", 1),

699 day=parts.get("day", 1),

700 hour=hour,

701 minute=parts.get("minute", 0),

702 second=parts.get("second", 0),

703 microsecond=microsecond,

704 tzinfo=parts.get("tzinfo"),

705 )

706 + increment

707 )

708

709 def _parse_multiformat(self, string: str, formats: Iterable[str]) -> datetime:

710 _datetime: Optional[datetime] = None

711

712 for fmt in formats:

713 try:

714 _datetime = self.parse(string, fmt)

715 break

716 except ParserMatchError:

717 pass

718

719 if _datetime is None:

720 supported_formats = ", ".join(formats)

721 raise ParserError(

722 f"Could not match input {string!r} to any of the following formats: {supported_formats}."

723 )

724

725 return _datetime

726

727 # generates a capture group of choices separated by an OR operator

728 @staticmethod

729 def _generate_choice_re(

730 choices: Iterable[str], flags: Union[int, re.RegexFlag] = 0

731 ) -> Pattern[str]:

732 return re.compile(r"({})".format("|".join(choices)), flags=flags)

733

734

735class TzinfoParser:

736 _TZINFO_RE: ClassVar[Pattern[str]] = re.compile(

737 r"^(?:\(UTC)*([\+\-])?(\d{2})(?:\:?(\d{2}))?"

738 )

739

740 @classmethod

741 def parse(cls, tzinfo_string: str) -> dt_tzinfo:

742 tzinfo: Optional[dt_tzinfo] = None

743

744 if tzinfo_string == "local":

745 tzinfo = tz.tzlocal()

746

747 elif tzinfo_string in ["utc", "UTC", "Z"]:

748 tzinfo = tz.tzutc()

749

750 else:

751 iso_match = cls._TZINFO_RE.match(tzinfo_string)

752

753 if iso_match:

754 sign: Optional[str]

755 hours: str

756 minutes: Union[str, int, None]

757 sign, hours, minutes = iso_match.groups()

758 seconds = int(hours) * 3600 + int(minutes or 0) * 60

759

760 if sign == "-":

761 seconds *= -1

762

763 tzinfo = tz.tzoffset(None, seconds)

764

765 else:

766 tzinfo = tz.gettz(tzinfo_string)

767

768 if tzinfo is None:

769 raise ParserError(f"Could not parse timezone expression {tzinfo_string!r}.")

770

771 return tzinfo