Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/arrow/parser.py: 87%

1"""Provides the :class:`Arrow <arrow.parser.DateTimeParser>` class, a better way to parse datetime strings."""

3import re

4from datetime import datetime, timedelta

5from datetime import tzinfo as dt_tzinfo

6from functools import lru_cache

7from typing import (

8 Any,

9 ClassVar,

10 Dict,

11 Iterable,

12 List,

13 Literal,

14 Match,

15 Optional,

16 Pattern,

17 SupportsFloat,

18 SupportsInt,

19 Tuple,

20 TypedDict,

21 Union,

22 cast,

23 overload,

24)

26from dateutil import tz

28from arrow import locales

29from arrow.constants import DEFAULT_LOCALE

30from arrow.util import next_weekday, normalize_timestamp

33class ParserError(ValueError):

34 pass

37# Allows for ParserErrors to be propagated from _build_datetime()

38# when day_of_year errors occur.

39# Before this, the ParserErrors were caught by the try/except in

40# _parse_multiformat() and the appropriate error message was not

41# transmitted to the user.

42class ParserMatchError(ParserError):

43 pass

46_WEEKDATE_ELEMENT = Union[str, bytes, SupportsInt, bytearray]

48_FORMAT_TYPE = Literal[

49 "YYYY",

50 "YY",

51 "MM",

52 "M",

53 "DDDD",

54 "DDD",

55 "DD",

56 "D",

57 "HH",

58 "H",

59 "hh",

60 "h",

61 "mm",

62 "m",

63 "ss",

64 "s",

65 "X",

66 "x",

67 "ZZZ",

68 "ZZ",

69 "Z",

70 "S",

71 "W",

72 "MMMM",

73 "MMM",

74 "Do",

75 "dddd",

76 "ddd",

77 "d",

78 "a",

79 "A",

80]

83class _Parts(TypedDict, total=False):

84 year: int

85 month: int

86 day_of_year: int

87 day: int

88 hour: int

89 minute: int

90 second: int

91 microsecond: int

92 timestamp: float

93 expanded_timestamp: int

94 tzinfo: dt_tzinfo

95 am_pm: Literal["am", "pm"]

96 day_of_week: int

97 weekdate: Tuple[_WEEKDATE_ELEMENT, _WEEKDATE_ELEMENT, Optional[_WEEKDATE_ELEMENT]]

100class DateTimeParser:

101 _FORMAT_RE: ClassVar[Pattern[str]] = re.compile(

102 r"(YYY?Y?|MM?M?M?|Do|DD?D?D?|d?d?d?d|HH?|hh?|mm?|ss?|S+|ZZ?Z?|a|A|x|X|W)"

103 )

104 _ESCAPE_RE: ClassVar[Pattern[str]] = re.compile(r"\[[^\[\]]*\]")

105

106 _ONE_OR_TWO_DIGIT_RE: ClassVar[Pattern[str]] = re.compile(r"\d{1,2}")

107 _ONE_OR_TWO_OR_THREE_DIGIT_RE: ClassVar[Pattern[str]] = re.compile(r"\d{1,3}")

108 _ONE_OR_MORE_DIGIT_RE: ClassVar[Pattern[str]] = re.compile(r"\d+")

109 _TWO_DIGIT_RE: ClassVar[Pattern[str]] = re.compile(r"\d{2}")

110 _THREE_DIGIT_RE: ClassVar[Pattern[str]] = re.compile(r"\d{3}")

111 _FOUR_DIGIT_RE: ClassVar[Pattern[str]] = re.compile(r"\d{4}")

112 _TZ_Z_RE: ClassVar[Pattern[str]] = re.compile(r"([\+\-])(\d{2})(?:(\d{2}))?|Z")

113 _TZ_ZZ_RE: ClassVar[Pattern[str]] = re.compile(r"([\+\-])(\d{2})(?:\:(\d{2}))?|Z")

114 _TZ_NAME_RE: ClassVar[Pattern[str]] = re.compile(r"\w[\w+\-/]+")

115 # NOTE: timestamps cannot be parsed from natural language strings (by removing the ^...$) because it will

116 # break cases like "15 Jul 2000" and a format list (see issue #447)

117 _TIMESTAMP_RE: ClassVar[Pattern[str]] = re.compile(r"^\-?\d+\.?\d+$")

118 _TIMESTAMP_EXPANDED_RE: ClassVar[Pattern[str]] = re.compile(r"^\-?\d+$")

119 _TIME_RE: ClassVar[Pattern[str]] = re.compile(

120 r"^(\d{2})(?:\:?(\d{2}))?(?:\:?(\d{2}))?(?:([\.\,])(\d+))?$"

121 )

122 _WEEK_DATE_RE: ClassVar[Pattern[str]] = re.compile(

123 r"(?P<year>\d{4})[\-]?W(?P<week>\d{2})[\-]?(?P<day>\d)?"

124 )

125

126 _BASE_INPUT_RE_MAP: ClassVar[Dict[_FORMAT_TYPE, Pattern[str]]] = {

127 "YYYY": _FOUR_DIGIT_RE,

128 "YY": _TWO_DIGIT_RE,

129 "MM": _TWO_DIGIT_RE,

130 "M": _ONE_OR_TWO_DIGIT_RE,

131 "DDDD": _THREE_DIGIT_RE,

132 "DDD": _ONE_OR_TWO_OR_THREE_DIGIT_RE,

133 "DD": _TWO_DIGIT_RE,

134 "D": _ONE_OR_TWO_DIGIT_RE,

135 "HH": _TWO_DIGIT_RE,

136 "H": _ONE_OR_TWO_DIGIT_RE,

137 "hh": _TWO_DIGIT_RE,

138 "h": _ONE_OR_TWO_DIGIT_RE,

139 "mm": _TWO_DIGIT_RE,

140 "m": _ONE_OR_TWO_DIGIT_RE,

141 "ss": _TWO_DIGIT_RE,

142 "s": _ONE_OR_TWO_DIGIT_RE,

143 "X": _TIMESTAMP_RE,

144 "x": _TIMESTAMP_EXPANDED_RE,

145 "ZZZ": _TZ_NAME_RE,

146 "ZZ": _TZ_ZZ_RE,

147 "Z": _TZ_Z_RE,

148 "S": _ONE_OR_MORE_DIGIT_RE,

149 "W": _WEEK_DATE_RE,

150 }

151

152 SEPARATORS: ClassVar[List[str]] = ["-", "/", "."]

153

154 locale: locales.Locale

155 _input_re_map: Dict[_FORMAT_TYPE, Pattern[str]]

156

157 def __init__(self, locale: str = DEFAULT_LOCALE, cache_size: int = 0) -> None:

158 self.locale = locales.get_locale(locale)

159 self._input_re_map = self._BASE_INPUT_RE_MAP.copy()

160 self._input_re_map.update(

161 {

162 "MMMM": self._generate_choice_re(

163 self.locale.month_names[1:], re.IGNORECASE

164 ),

165 "MMM": self._generate_choice_re(

166 self.locale.month_abbreviations[1:], re.IGNORECASE

167 ),

168 "Do": re.compile(self.locale.ordinal_day_re),

169 "dddd": self._generate_choice_re(

170 self.locale.day_names[1:], re.IGNORECASE

171 ),

172 "ddd": self._generate_choice_re(

173 self.locale.day_abbreviations[1:], re.IGNORECASE

174 ),

175 "d": re.compile(r"[1-7]"),

176 "a": self._generate_choice_re(

177 (self.locale.meridians["am"], self.locale.meridians["pm"])

178 ),

179 # note: 'A' token accepts both 'am/pm' and 'AM/PM' formats to

180 # ensure backwards compatibility of this token

181 "A": self._generate_choice_re(self.locale.meridians.values()),

182 }

183 )

184 if cache_size > 0:

185 self._generate_pattern_re = lru_cache(maxsize=cache_size)( # type: ignore

186 self._generate_pattern_re

187 )

188

189 # TODO: since we support more than ISO 8601, we should rename this function

190 # IDEA: break into multiple functions

191 def parse_iso(

192 self, datetime_string: str, normalize_whitespace: bool = False

193 ) -> datetime:

194 if normalize_whitespace:

195 datetime_string = re.sub(r"\s+", " ", datetime_string.strip())

196

197 has_space_divider = " " in datetime_string

198 has_t_divider = "T" in datetime_string

199

200 num_spaces = datetime_string.count(" ")

201 if has_space_divider and num_spaces != 1 or has_t_divider and num_spaces > 0:

202 raise ParserError(

203 f"Expected an ISO 8601-like string, but was given {datetime_string!r}. "

204 "Try passing in a format string to resolve this."

205 )

206

207 has_time = has_space_divider or has_t_divider

208 has_tz = False

209

210 # date formats (ISO 8601 and others) to test against

211 # NOTE: YYYYMM is omitted to avoid confusion with YYMMDD (no longer part of ISO 8601, but is still often used)

212 formats = [

213 "YYYY-MM-DD",

214 "YYYY-M-DD",

215 "YYYY-M-D",

216 "YYYY/MM/DD",

217 "YYYY/M/DD",

218 "YYYY/M/D",

219 "YYYY.MM.DD",

220 "YYYY.M.DD",

221 "YYYY.M.D",

222 "YYYYMMDD",

223 "YYYY-DDDD",

224 "YYYYDDDD",

225 "YYYY-MM",

226 "YYYY/MM",

227 "YYYY.MM",

228 "YYYY",

229 "W",

230 ]

231

232 if has_time:

233 if has_space_divider:

234 date_string, time_string = datetime_string.split(" ", 1)

235 else:

236 date_string, time_string = datetime_string.split("T", 1)

237

238 time_parts = re.split(

239 r"[\+\-Z]", time_string, maxsplit=1, flags=re.IGNORECASE

240 )

241

242 time_components: Optional[Match[str]] = self._TIME_RE.match(time_parts[0])

243

244 if time_components is None:

245 raise ParserError(

246 "Invalid time component provided. "

247 "Please specify a format or provide a valid time component in the basic or extended ISO 8601 time format."

248 )

249

250 (

251 hours,

252 minutes,

253 seconds,

254 subseconds_sep,

255 subseconds,

256 ) = time_components.groups()

257

258 has_tz = len(time_parts) == 2

259 has_minutes = minutes is not None

260 has_seconds = seconds is not None

261 has_subseconds = subseconds is not None

262

263 is_basic_time_format = ":" not in time_parts[0]

264 tz_format = "Z"

265

266 # use 'ZZ' token instead since tz offset is present in non-basic format

267 if has_tz and ":" in time_parts[1]:

268 tz_format = "ZZ"

269

270 time_sep = "" if is_basic_time_format else ":"

271

272 if has_subseconds:

273 time_string = "HH{time_sep}mm{time_sep}ss{subseconds_sep}S".format(

274 time_sep=time_sep, subseconds_sep=subseconds_sep

275 )

276 elif has_seconds:

277 time_string = "HH{time_sep}mm{time_sep}ss".format(time_sep=time_sep)

278 elif has_minutes:

279 time_string = f"HH{time_sep}mm"

280 else:

281 time_string = "HH"

282

283 if has_space_divider:

284 formats = [f"{f} {time_string}" for f in formats]

285 else:

286 formats = [f"{f}T{time_string}" for f in formats]

287

288 if has_time and has_tz:

289 # Add "Z" or "ZZ" to the format strings to indicate to

290 # _parse_token() that a timezone needs to be parsed

291 formats = [f"{f}{tz_format}" for f in formats]

292

293 return self._parse_multiformat(datetime_string, formats)

294

295 def parse(

296 self,

297 datetime_string: str,

298 fmt: Union[List[str], str],

299 normalize_whitespace: bool = False,

300 ) -> datetime:

301 if normalize_whitespace:

302 datetime_string = re.sub(r"\s+", " ", datetime_string)

303

304 if isinstance(fmt, list):

305 return self._parse_multiformat(datetime_string, fmt)

306

307 try:

308 fmt_tokens: List[_FORMAT_TYPE]

309 fmt_pattern_re: Pattern[str]

310 fmt_tokens, fmt_pattern_re = self._generate_pattern_re(fmt)

311 except re.error as e:

312 raise ParserMatchError(

313 f"Failed to generate regular expression pattern: {e}."

314 )

315

316 match = fmt_pattern_re.search(datetime_string)

317

318 if match is None:

319 raise ParserMatchError(

320 f"Failed to match {fmt!r} when parsing {datetime_string!r}."

321 )

322

323 parts: _Parts = {}

324 for token in fmt_tokens:

325 value: Union[Tuple[str, str, str], str]

326 if token == "Do":

327 value = match.group("value")

328 elif token == "W":

329 value = (match.group("year"), match.group("week"), match.group("day"))

330 else:

331 value = match.group(token)

332

333 if value is None:

334 raise ParserMatchError(

335 f"Unable to find a match group for the specified token {token!r}."

336 )

337

338 self._parse_token(token, value, parts) # type: ignore[arg-type]

339

340 return self._build_datetime(parts)

341

342 def _generate_pattern_re(self, fmt: str) -> Tuple[List[_FORMAT_TYPE], Pattern[str]]:

343 # fmt is a string of tokens like 'YYYY-MM-DD'

344 # we construct a new string by replacing each

345 # token by its pattern:

346 # 'YYYY-MM-DD' -> '(?P<YYYY>\d{4})-(?P<MM>\d{2})-(?P<DD>\d{2})'

347 tokens: List[_FORMAT_TYPE] = []

348 offset = 0

349

350 # Escape all special RegEx chars

351 escaped_fmt = re.escape(fmt)

352

353 # Extract the bracketed expressions to be reinserted later.

354 escaped_fmt = re.sub(self._ESCAPE_RE, "#", escaped_fmt)

355

356 # Any number of S is the same as one.

357 # TODO: allow users to specify the number of digits to parse

358 escaped_fmt = re.sub(r"S+", "S", escaped_fmt)

359

360 escaped_data = re.findall(self._ESCAPE_RE, fmt)

361

362 fmt_pattern = escaped_fmt

363

364 for m in self._FORMAT_RE.finditer(escaped_fmt):

365 token: _FORMAT_TYPE = cast(_FORMAT_TYPE, m.group(0))

366 try:

367 input_re = self._input_re_map[token]

368 except KeyError:

369 raise ParserError(f"Unrecognized token {token!r}.")

370 input_pattern = f"(?P<{token}>{input_re.pattern})"

371 tokens.append(token)

372 # a pattern doesn't have the same length as the token

373 # it replaces! We keep the difference in the offset variable.

374 # This works because the string is scanned left-to-right and matches

375 # are returned in the order found by finditer.

376 fmt_pattern = (

377 fmt_pattern[: m.start() + offset]

378 + input_pattern

379 + fmt_pattern[m.end() + offset :]

380 )

381 offset += len(input_pattern) - (m.end() - m.start())

382

383 final_fmt_pattern = ""

384 split_fmt = fmt_pattern.split(r"\#")

385

386 # Due to the way Python splits, 'split_fmt' will always be longer

387 for i in range(len(split_fmt)):

388 final_fmt_pattern += split_fmt[i]

389 if i < len(escaped_data):

390 final_fmt_pattern += escaped_data[i][1:-1]

391

392 # Wrap final_fmt_pattern in a custom word boundary to strictly

393 # match the formatting pattern and filter out date and time formats

394 # that include junk such as: blah1998-09-12 blah, blah 1998-09-12blah,

395 # blah1998-09-12blah. The custom word boundary matches every character

396 # that is not a whitespace character to allow for searching for a date

397 # and time string in a natural language sentence. Therefore, searching

398 # for a string of the form YYYY-MM-DD in "blah 1998-09-12 blah" will

399 # work properly.

400 # Certain punctuation before or after the target pattern such as

401 # "1998-09-12," is permitted. For the full list of valid punctuation,

402 # see the documentation.

403

404 starting_word_boundary = (

405 r"(?<!\S\S)" # Don't have two consecutive non-whitespace characters. This ensures that we allow cases

406 # like .11.25.2019 but not 1.11.25.2019 (for pattern MM.DD.YYYY)

407 r"(?<![^\,\.\;\:\?\!\"\'\`\[\]\{\}<>\s])" # This is the list of punctuation that is ok before the

408 # pattern (i.e. "It can't not be these characters before the pattern")

409 r"(\b|^)"

410 # The \b is to block cases like 1201912 but allow 201912 for pattern YYYYMM. The ^ was necessary to allow a

411 # negative number through i.e. before epoch numbers

412 )

413 ending_word_boundary = (

414 r"(?=[\,\.\;\:\?\!\"\'\`\[\]\{\}\<\>]?" # Positive lookahead stating that these punctuation marks

415 # can appear after the pattern at most 1 time

416 r"(?!\S))" # Don't allow any non-whitespace character after the punctuation

417 )

418 bounded_fmt_pattern = r"{}{}{}".format(

419 starting_word_boundary, final_fmt_pattern, ending_word_boundary

420 )

421

422 return tokens, re.compile(bounded_fmt_pattern, flags=re.IGNORECASE)

423

424 @overload

425 def _parse_token(

426 self,

427 token: Literal[

428 "YYYY",

429 "YY",

430 "MM",

431 "M",

432 "DDDD",

433 "DDD",

434 "DD",

435 "D",

436 "Do",

437 "HH",

438 "hh",

439 "h",

440 "H",

441 "mm",

442 "m",

443 "ss",

444 "s",

445 "x",

446 ],

447 value: Union[str, bytes, SupportsInt, bytearray],

448 parts: _Parts,

449 ) -> None:

450 ... # pragma: no cover

451

452 @overload

453 def _parse_token(

454 self,

455 token: Literal["X"],

456 value: Union[str, bytes, SupportsFloat, bytearray],

457 parts: _Parts,

458 ) -> None:

459 ... # pragma: no cover

460

461 @overload

462 def _parse_token(

463 self,

464 token: Literal["MMMM", "MMM", "dddd", "ddd", "S"],

465 value: Union[str, bytes, bytearray],

466 parts: _Parts,

467 ) -> None:

468 ... # pragma: no cover

469

470 @overload

471 def _parse_token(

472 self,

473 token: Literal["a", "A", "ZZZ", "ZZ", "Z"],

474 value: Union[str, bytes],

475 parts: _Parts,

476 ) -> None:

477 ... # pragma: no cover

478

479 @overload

480 def _parse_token(

481 self,

482 token: Literal["W"],

483 value: Tuple[_WEEKDATE_ELEMENT, _WEEKDATE_ELEMENT, Optional[_WEEKDATE_ELEMENT]],

484 parts: _Parts,

485 ) -> None:

486 ... # pragma: no cover

487

488 def _parse_token(

489 self,

490 token: Any,

491 value: Any,

492 parts: _Parts,

493 ) -> None:

494 if token == "YYYY":

495 parts["year"] = int(value)

496

497 elif token == "YY":

498 value = int(value)

499 parts["year"] = 1900 + value if value > 68 else 2000 + value

500

501 elif token in ["MMMM", "MMM"]:

502 # FIXME: month_number() is nullable

503 parts["month"] = self.locale.month_number(value.lower()) # type: ignore[typeddict-item]

504

505 elif token in ["MM", "M"]:

506 parts["month"] = int(value)

507

508 elif token in ["DDDD", "DDD"]:

509 parts["day_of_year"] = int(value)

510

511 elif token in ["DD", "D"]:

512 parts["day"] = int(value)

513

514 elif token == "Do":

515 parts["day"] = int(value)

516

517 elif token == "dddd":

518 # locale day names are 1-indexed

519 day_of_week = [x.lower() for x in self.locale.day_names].index(

520 value.lower()

521 )

522 parts["day_of_week"] = day_of_week - 1

523

524 elif token == "ddd":

525 # locale day abbreviations are 1-indexed

526 day_of_week = [x.lower() for x in self.locale.day_abbreviations].index(

527 value.lower()

528 )

529 parts["day_of_week"] = day_of_week - 1

530

531 elif token.upper() in ["HH", "H"]:

532 parts["hour"] = int(value)

533

534 elif token in ["mm", "m"]:

535 parts["minute"] = int(value)

536

537 elif token in ["ss", "s"]:

538 parts["second"] = int(value)

539

540 elif token == "S":

541 # We have the *most significant* digits of an arbitrary-precision integer.

542 # We want the six most significant digits as an integer, rounded.

543 # IDEA: add nanosecond support somehow? Need datetime support for it first.

544 value = value.ljust(7, "0")

545

546 # floating-point (IEEE-754) defaults to half-to-even rounding

547 seventh_digit = int(value[6])

548 if seventh_digit == 5:

549 rounding = int(value[5]) % 2

550 elif seventh_digit > 5:

551 rounding = 1

552 else:

553 rounding = 0

554

555 parts["microsecond"] = int(value[:6]) + rounding

556

557 elif token == "X":

558 parts["timestamp"] = float(value)

559

560 elif token == "x":

561 parts["expanded_timestamp"] = int(value)

562

563 elif token in ["ZZZ", "ZZ", "Z"]:

564 parts["tzinfo"] = TzinfoParser.parse(value)

565

566 elif token in ["a", "A"]:

567 if value in (self.locale.meridians["am"], self.locale.meridians["AM"]):

568 parts["am_pm"] = "am"

569 if "hour" in parts and not 0 <= parts["hour"] <= 12:

570 raise ParserMatchError(

571 f"Hour token value must be between 0 and 12 inclusive for token {token!r}."

572 )

573 elif value in (self.locale.meridians["pm"], self.locale.meridians["PM"]):

574 parts["am_pm"] = "pm"

575 elif token == "W":

576 parts["weekdate"] = value

577

578 @staticmethod

579 def _build_datetime(parts: _Parts) -> datetime:

580 weekdate = parts.get("weekdate")

581

582 if weekdate is not None:

583 year, week = int(weekdate[0]), int(weekdate[1])

584

585 if weekdate[2] is not None:

586 _day = int(weekdate[2])

587 else:

588 # day not given, default to 1

589 _day = 1

590

591 date_string = f"{year}-{week}-{_day}"

592

593 # tokens for ISO 8601 weekdates

594 dt = datetime.strptime(date_string, "%G-%V-%u")

595

596 parts["year"] = dt.year

597 parts["month"] = dt.month

598 parts["day"] = dt.day

599

600 timestamp = parts.get("timestamp")

601

602 if timestamp is not None:

603 return datetime.fromtimestamp(timestamp, tz=tz.tzutc())

604

605 expanded_timestamp = parts.get("expanded_timestamp")

606

607 if expanded_timestamp is not None:

608 return datetime.fromtimestamp(

609 normalize_timestamp(expanded_timestamp),

610 tz=tz.tzutc(),

611 )

612

613 day_of_year = parts.get("day_of_year")

614

615 if day_of_year is not None:

616 _year = parts.get("year")

617 month = parts.get("month")

618 if _year is None:

619 raise ParserError(

620 "Year component is required with the DDD and DDDD tokens."

621 )

622

623 if month is not None:

624 raise ParserError(

625 "Month component is not allowed with the DDD and DDDD tokens."

626 )

627

628 date_string = f"{_year}-{day_of_year}"

629 try:

630 dt = datetime.strptime(date_string, "%Y-%j")

631 except ValueError:

632 raise ParserError(

633 f"The provided day of year {day_of_year!r} is invalid."

634 )

635

636 parts["year"] = dt.year

637 parts["month"] = dt.month

638 parts["day"] = dt.day

639

640 day_of_week: Optional[int] = parts.get("day_of_week")

641 day = parts.get("day")

642

643 # If day is passed, ignore day of week

644 if day_of_week is not None and day is None:

645 year = parts.get("year", 1970)

646 month = parts.get("month", 1)

647 day = 1

648

649 # dddd => first day of week after epoch

650 # dddd YYYY => first day of week in specified year

651 # dddd MM YYYY => first day of week in specified year and month

652 # dddd MM => first day after epoch in specified month

653 next_weekday_dt = next_weekday(datetime(year, month, day), day_of_week)

654 parts["year"] = next_weekday_dt.year

655 parts["month"] = next_weekday_dt.month

656 parts["day"] = next_weekday_dt.day

657

658 am_pm = parts.get("am_pm")

659 hour = parts.get("hour", 0)

660

661 if am_pm == "pm" and hour < 12:

662 hour += 12

663 elif am_pm == "am" and hour == 12:

664 hour = 0

665

666 # Support for midnight at the end of day

667 if hour == 24:

668 if parts.get("minute", 0) != 0:

669 raise ParserError("Midnight at the end of day must not contain minutes")

670 if parts.get("second", 0) != 0:

671 raise ParserError("Midnight at the end of day must not contain seconds")

672 if parts.get("microsecond", 0) != 0:

673 raise ParserError(

674 "Midnight at the end of day must not contain microseconds"

675 )

676 hour = 0

677 day_increment = 1

678 else:

679 day_increment = 0

680

681 # account for rounding up to 1000000

682 microsecond = parts.get("microsecond", 0)

683 if microsecond == 1000000:

684 microsecond = 0

685 second_increment = 1

686 else:

687 second_increment = 0

688

689 increment = timedelta(days=day_increment, seconds=second_increment)

690

691 return (

692 datetime(

693 year=parts.get("year", 1),

694 month=parts.get("month", 1),

695 day=parts.get("day", 1),

696 hour=hour,

697 minute=parts.get("minute", 0),

698 second=parts.get("second", 0),

699 microsecond=microsecond,

700 tzinfo=parts.get("tzinfo"),

701 )

702 + increment

703 )

704

705 def _parse_multiformat(self, string: str, formats: Iterable[str]) -> datetime:

706 _datetime: Optional[datetime] = None

707

708 for fmt in formats:

709 try:

710 _datetime = self.parse(string, fmt)

711 break

712 except ParserMatchError:

713 pass

714

715 if _datetime is None:

716 supported_formats = ", ".join(formats)

717 raise ParserError(

718 f"Could not match input {string!r} to any of the following formats: {supported_formats}."

719 )

720

721 return _datetime

722

723 # generates a capture group of choices separated by an OR operator

724 @staticmethod

725 def _generate_choice_re(

726 choices: Iterable[str], flags: Union[int, re.RegexFlag] = 0

727 ) -> Pattern[str]:

728 return re.compile(r"({})".format("|".join(choices)), flags=flags)

729

730

731class TzinfoParser:

732 _TZINFO_RE: ClassVar[Pattern[str]] = re.compile(

733 r"^(?:\(UTC)*([\+\-])?(\d{2})(?:\:?(\d{2}))?"

734 )

735

736 @classmethod

737 def parse(cls, tzinfo_string: str) -> dt_tzinfo:

738 tzinfo: Optional[dt_tzinfo] = None

739

740 if tzinfo_string == "local":

741 tzinfo = tz.tzlocal()

742

743 elif tzinfo_string in ["utc", "UTC", "Z"]:

744 tzinfo = tz.tzutc()

745

746 else:

747 iso_match = cls._TZINFO_RE.match(tzinfo_string)

748

749 if iso_match:

750 sign: Optional[str]

751 hours: str

752 minutes: Union[str, int, None]

753 sign, hours, minutes = iso_match.groups()

754 seconds = int(hours) * 3600 + int(minutes or 0) * 60

755

756 if sign == "-":

757 seconds *= -1

758

759 tzinfo = tz.tzoffset(None, seconds)

760

761 else:

762 tzinfo = tz.gettz(tzinfo_string)

763

764 if tzinfo is None:

765 raise ParserError(f"Could not parse timezone expression {tzinfo_string!r}.")

766

767 return tzinfo