Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/arrow/parser.py: 82%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

321 statements  

1"""Provides the :class:`Arrow <arrow.parser.DateTimeParser>` class, a better way to parse datetime strings.""" 

2 

3import re 

4from datetime import datetime, timedelta, timezone 

5from datetime import tzinfo as dt_tzinfo 

6from functools import lru_cache 

7from typing import ( 

8 Any, 

9 ClassVar, 

10 Dict, 

11 Iterable, 

12 List, 

13 Literal, 

14 Match, 

15 Optional, 

16 Pattern, 

17 SupportsFloat, 

18 SupportsInt, 

19 Tuple, 

20 TypedDict, 

21 Union, 

22 cast, 

23 overload, 

24) 

25 

26try: 

27 from zoneinfo import ZoneInfo, ZoneInfoNotFoundError 

28except ImportError: 

29 from backports.zoneinfo import ZoneInfo, ZoneInfoNotFoundError # type: ignore[no-redef] 

30 

31from arrow import locales 

32from arrow.constants import DEFAULT_LOCALE 

33from arrow.util import next_weekday, normalize_timestamp 

34 

35 

36class ParserError(ValueError): 

37 """ 

38 A custom exception class for handling parsing errors in the parser. 

39 

40 Notes: 

41 This class inherits from the built-in `ValueError` class and is used to raise exceptions 

42 when an error occurs during the parsing process. 

43 """ 

44 

45 pass 

46 

47 

48# Allows for ParserErrors to be propagated from _build_datetime() 

49# when day_of_year errors occur. 

50# Before this, the ParserErrors were caught by the try/except in 

51# _parse_multiformat() and the appropriate error message was not 

52# transmitted to the user. 

53class ParserMatchError(ParserError): 

54 """ 

55 This class is a subclass of the ParserError class and is used to raise errors that occur during the matching process. 

56 

57 Notes: 

58 This class is part of the Arrow parser and is used to provide error handling when a parsing match fails. 

59 

60 """ 

61 

62 pass 

63 

64 

65_WEEKDATE_ELEMENT = Union[str, bytes, SupportsInt, bytearray] 

66 

67_FORMAT_TYPE = Literal[ 

68 "YYYY", 

69 "YY", 

70 "MM", 

71 "M", 

72 "DDDD", 

73 "DDD", 

74 "DD", 

75 "D", 

76 "HH", 

77 "H", 

78 "hh", 

79 "h", 

80 "mm", 

81 "m", 

82 "ss", 

83 "s", 

84 "X", 

85 "x", 

86 "ZZZ", 

87 "ZZ", 

88 "Z", 

89 "S", 

90 "W", 

91 "MMMM", 

92 "MMM", 

93 "Do", 

94 "dddd", 

95 "ddd", 

96 "d", 

97 "a", 

98 "A", 

99] 

100 

101 

102class _Parts(TypedDict, total=False): 

103 """ 

104 A dictionary that represents different parts of a datetime. 

105 

106 :class:`_Parts` is a TypedDict that represents various components of a date or time, 

107 such as year, month, day, hour, minute, second, microsecond, timestamp, expanded_timestamp, tzinfo, 

108 am_pm, day_of_week, and weekdate. 

109 

110 :ivar year: The year, if present, as an integer. 

111 :ivar month: The month, if present, as an integer. 

112 :ivar day_of_year: The day of the year, if present, as an integer. 

113 :ivar day: The day, if present, as an integer. 

114 :ivar hour: The hour, if present, as an integer. 

115 :ivar minute: The minute, if present, as an integer. 

116 :ivar second: The second, if present, as an integer. 

117 :ivar microsecond: The microsecond, if present, as an integer. 

118 :ivar timestamp: The timestamp, if present, as a float. 

119 :ivar expanded_timestamp: The expanded timestamp, if present, as an integer. 

120 :ivar tzinfo: The timezone info, if present, as a :class:`dt_tzinfo` object. 

121 :ivar am_pm: The AM/PM indicator, if present, as a string literal "am" or "pm". 

122 :ivar day_of_week: The day of the week, if present, as an integer. 

123 :ivar weekdate: The week date, if present, as a tuple of three integers or None. 

124 """ 

125 

126 year: int 

127 month: int 

128 day_of_year: int 

129 day: int 

130 hour: int 

131 minute: int 

132 second: int 

133 microsecond: int 

134 timestamp: float 

135 expanded_timestamp: int 

136 tzinfo: dt_tzinfo 

137 am_pm: Literal["am", "pm"] 

138 day_of_week: int 

139 weekdate: Tuple[_WEEKDATE_ELEMENT, _WEEKDATE_ELEMENT, Optional[_WEEKDATE_ELEMENT]] 

140 

141 

142class DateTimeParser: 

143 """A :class:`DateTimeParser <arrow.arrow.parser>` object 

144 

145 Contains the regular expressions and functions to parse and split the input strings into tokens and eventually 

146 produce a datetime that is used by :class:`Arrow <arrow.arrow.Arrow>` internally. 

147 

148 :param locale: the locale string 

149 :param cache_size: the size of the LRU cache used for regular expressions. Defaults to 0. 

150 

151 """ 

152 

153 _FORMAT_RE: ClassVar[Pattern[str]] = re.compile( 

154 r"(YYY?Y?|MM?M?M?|Do|DD?D?D?|d?d?d?d|HH?|hh?|mm?|ss?|S+|ZZ?Z?|a|A|x|X|W)" 

155 ) 

156 _ESCAPE_RE: ClassVar[Pattern[str]] = re.compile(r"\[[^\[\]]*\]") 

157 

158 _ONE_OR_TWO_DIGIT_RE: ClassVar[Pattern[str]] = re.compile(r"\d{1,2}") 

159 _ONE_OR_TWO_OR_THREE_DIGIT_RE: ClassVar[Pattern[str]] = re.compile(r"\d{1,3}") 

160 _ONE_OR_MORE_DIGIT_RE: ClassVar[Pattern[str]] = re.compile(r"\d+") 

161 _TWO_DIGIT_RE: ClassVar[Pattern[str]] = re.compile(r"\d{2}") 

162 _THREE_DIGIT_RE: ClassVar[Pattern[str]] = re.compile(r"\d{3}") 

163 _FOUR_DIGIT_RE: ClassVar[Pattern[str]] = re.compile(r"\d{4}") 

164 _TZ_Z_RE: ClassVar[Pattern[str]] = re.compile(r"([\+\-])(\d{2})(?:(\d{2}))?|Z") 

165 _TZ_ZZ_RE: ClassVar[Pattern[str]] = re.compile(r"([\+\-])(\d{2})(?:\:(\d{2}))?|Z") 

166 _TZ_NAME_RE: ClassVar[Pattern[str]] = re.compile(r"\w[\w+\-/]+") 

167 # NOTE: timestamps cannot be parsed from natural language strings (by removing the ^...$) because it will 

168 # break cases like "15 Jul 2000" and a format list (see issue #447) 

169 _TIMESTAMP_RE: ClassVar[Pattern[str]] = re.compile(r"^\-?\d+\.?\d+$") 

170 _TIMESTAMP_EXPANDED_RE: ClassVar[Pattern[str]] = re.compile(r"^\-?\d+$") 

171 _TIME_RE: ClassVar[Pattern[str]] = re.compile( 

172 r"^(\d{2})(?:\:?(\d{2}))?(?:\:?(\d{2}))?(?:([\.\,])(\d+))?$" 

173 ) 

174 _WEEK_DATE_RE: ClassVar[Pattern[str]] = re.compile( 

175 r"(?P<year>\d{4})[\-]?W(?P<week>\d{2})[\-]?(?P<day>\d)?" 

176 ) 

177 

178 _BASE_INPUT_RE_MAP: ClassVar[Dict[_FORMAT_TYPE, Pattern[str]]] = { 

179 "YYYY": _FOUR_DIGIT_RE, 

180 "YY": _TWO_DIGIT_RE, 

181 "MM": _TWO_DIGIT_RE, 

182 "M": _ONE_OR_TWO_DIGIT_RE, 

183 "DDDD": _THREE_DIGIT_RE, 

184 "DDD": _ONE_OR_TWO_OR_THREE_DIGIT_RE, 

185 "DD": _TWO_DIGIT_RE, 

186 "D": _ONE_OR_TWO_DIGIT_RE, 

187 "HH": _TWO_DIGIT_RE, 

188 "H": _ONE_OR_TWO_DIGIT_RE, 

189 "hh": _TWO_DIGIT_RE, 

190 "h": _ONE_OR_TWO_DIGIT_RE, 

191 "mm": _TWO_DIGIT_RE, 

192 "m": _ONE_OR_TWO_DIGIT_RE, 

193 "ss": _TWO_DIGIT_RE, 

194 "s": _ONE_OR_TWO_DIGIT_RE, 

195 "X": _TIMESTAMP_RE, 

196 "x": _TIMESTAMP_EXPANDED_RE, 

197 "ZZZ": _TZ_NAME_RE, 

198 "ZZ": _TZ_ZZ_RE, 

199 "Z": _TZ_Z_RE, 

200 "S": _ONE_OR_MORE_DIGIT_RE, 

201 "W": _WEEK_DATE_RE, 

202 } 

203 

204 SEPARATORS: ClassVar[List[str]] = ["-", "/", "."] 

205 

206 locale: locales.Locale 

207 _input_re_map: Dict[_FORMAT_TYPE, Pattern[str]] 

208 

209 def __init__(self, locale: str = DEFAULT_LOCALE, cache_size: int = 0) -> None: 

210 """ 

211 Contains the regular expressions and functions to parse and split the input strings into tokens and eventually 

212 produce a datetime that is used by :class:`Arrow <arrow.arrow.Arrow>` internally. 

213 

214 :param locale: the locale string 

215 :type locale: str 

216 :param cache_size: the size of the LRU cache used for regular expressions. Defaults to 0. 

217 :type cache_size: int 

218 """ 

219 self.locale = locales.get_locale(locale) 

220 self._input_re_map = self._BASE_INPUT_RE_MAP.copy() 

221 self._input_re_map.update( 

222 { 

223 "MMMM": self._generate_choice_re( 

224 self.locale.month_names[1:], re.IGNORECASE 

225 ), 

226 "MMM": self._generate_choice_re( 

227 self.locale.month_abbreviations[1:], re.IGNORECASE 

228 ), 

229 "Do": re.compile(self.locale.ordinal_day_re), 

230 "dddd": self._generate_choice_re( 

231 self.locale.day_names[1:], re.IGNORECASE 

232 ), 

233 "ddd": self._generate_choice_re( 

234 self.locale.day_abbreviations[1:], re.IGNORECASE 

235 ), 

236 "d": re.compile(r"[1-7]"), 

237 "a": self._generate_choice_re( 

238 (self.locale.meridians["am"], self.locale.meridians["pm"]) 

239 ), 

240 # note: 'A' token accepts both 'am/pm' and 'AM/PM' formats to 

241 # ensure backwards compatibility of this token 

242 "A": self._generate_choice_re(self.locale.meridians.values()), 

243 } 

244 ) 

245 if cache_size > 0: 

246 self._generate_pattern_re = lru_cache(maxsize=cache_size)( # type: ignore 

247 self._generate_pattern_re 

248 ) 

249 

250 # TODO: since we support more than ISO 8601, we should rename this function 

251 # IDEA: break into multiple functions 

252 def parse_iso( 

253 self, datetime_string: str, normalize_whitespace: bool = False 

254 ) -> datetime: 

255 """ 

256 Parses a datetime string using a ISO 8601-like format. 

257 

258 :param datetime_string: The datetime string to parse. 

259 :param normalize_whitespace: Whether to normalize whitespace in the datetime string (default is False). 

260 :type datetime_string: str 

261 :type normalize_whitespace: bool 

262 :returns: The parsed datetime object. 

263 :rtype: datetime 

264 :raises ParserError: If the datetime string is not in a valid ISO 8601-like format. 

265 

266 Usage:: 

267 >>> import arrow.parser 

268 >>> arrow.parser.DateTimeParser().parse_iso('2021-10-12T14:30:00') 

269 datetime.datetime(2021, 10, 12, 14, 30) 

270 

271 """ 

272 if normalize_whitespace: 

273 datetime_string = re.sub(r"\s+", " ", datetime_string.strip()) 

274 

275 has_space_divider = " " in datetime_string 

276 has_t_divider = "T" in datetime_string 

277 

278 num_spaces = datetime_string.count(" ") 

279 if has_space_divider and num_spaces != 1 or has_t_divider and num_spaces > 0: 

280 raise ParserError( 

281 f"Expected an ISO 8601-like string, but was given {datetime_string!r}. " 

282 "Try passing in a format string to resolve this." 

283 ) 

284 

285 has_time = has_space_divider or has_t_divider 

286 has_tz = False 

287 

288 # date formats (ISO 8601 and others) to test against 

289 # NOTE: YYYYMM is omitted to avoid confusion with YYMMDD (no longer part of ISO 8601, but is still often used) 

290 formats = [ 

291 "YYYY-MM-DD", 

292 "YYYY-M-DD", 

293 "YYYY-M-D", 

294 "YYYY/MM/DD", 

295 "YYYY/M/DD", 

296 "YYYY/M/D", 

297 "YYYY.MM.DD", 

298 "YYYY.M.DD", 

299 "YYYY.M.D", 

300 "YYYYMMDD", 

301 "YYYY-DDDD", 

302 "YYYYDDDD", 

303 "YYYY-MM", 

304 "YYYY/MM", 

305 "YYYY.MM", 

306 "YYYY", 

307 "W", 

308 ] 

309 

310 if has_time: 

311 if has_space_divider: 

312 date_string, time_string = datetime_string.split(" ", 1) 

313 else: 

314 date_string, time_string = datetime_string.split("T", 1) 

315 

316 time_parts = re.split( 

317 r"[\+\-Z]", time_string, maxsplit=1, flags=re.IGNORECASE 

318 ) 

319 

320 time_components: Optional[Match[str]] = self._TIME_RE.match(time_parts[0]) 

321 

322 if time_components is None: 

323 raise ParserError( 

324 "Invalid time component provided. " 

325 "Please specify a format or provide a valid time component in the basic or extended ISO 8601 time format." 

326 ) 

327 

328 ( 

329 hours, 

330 minutes, 

331 seconds, 

332 subseconds_sep, 

333 subseconds, 

334 ) = time_components.groups() 

335 

336 has_tz = len(time_parts) == 2 

337 has_minutes = minutes is not None 

338 has_seconds = seconds is not None 

339 has_subseconds = subseconds is not None 

340 

341 is_basic_time_format = ":" not in time_parts[0] 

342 tz_format = "Z" 

343 

344 # use 'ZZ' token instead since tz offset is present in non-basic format 

345 if has_tz and ":" in time_parts[1]: 

346 tz_format = "ZZ" 

347 

348 time_sep = "" if is_basic_time_format else ":" 

349 

350 if has_subseconds: 

351 time_string = "HH{time_sep}mm{time_sep}ss{subseconds_sep}S".format( 

352 time_sep=time_sep, subseconds_sep=subseconds_sep 

353 ) 

354 elif has_seconds: 

355 time_string = "HH{time_sep}mm{time_sep}ss".format(time_sep=time_sep) 

356 elif has_minutes: 

357 time_string = f"HH{time_sep}mm" 

358 else: 

359 time_string = "HH" 

360 

361 if has_space_divider: 

362 formats = [f"{f} {time_string}" for f in formats] 

363 else: 

364 formats = [f"{f}T{time_string}" for f in formats] 

365 

366 if has_time and has_tz: 

367 # Add "Z" or "ZZ" to the format strings to indicate to 

368 # _parse_token() that a timezone needs to be parsed 

369 formats = [f"{f}{tz_format}" for f in formats] 

370 

371 return self._parse_multiformat(datetime_string, formats) 

372 

373 def parse( 

374 self, 

375 datetime_string: str, 

376 fmt: Union[List[str], str], 

377 normalize_whitespace: bool = False, 

378 ) -> datetime: 

379 """ 

380 Parses a datetime string using a specified format. 

381 

382 :param datetime_string: The datetime string to parse. 

383 :param fmt: The format string or list of format strings to use for parsing. 

384 :param normalize_whitespace: Whether to normalize whitespace in the datetime string (default is False). 

385 :type datetime_string: str 

386 :type fmt: Union[List[str], str] 

387 :type normalize_whitespace: bool 

388 :returns: The parsed datetime object. 

389 :rtype: datetime 

390 :raises ParserMatchError: If the datetime string does not match the specified format. 

391 

392 Usage:: 

393 

394 >>> import arrow.parser 

395 >>> arrow.parser.DateTimeParser().parse('2021-10-12 14:30:00', 'YYYY-MM-DD HH:mm:ss') 

396 datetime.datetime(2021, 10, 12, 14, 30) 

397 

398 

399 """ 

400 if normalize_whitespace: 

401 datetime_string = re.sub(r"\s+", " ", datetime_string) 

402 

403 if isinstance(fmt, list): 

404 return self._parse_multiformat(datetime_string, fmt) 

405 

406 try: 

407 fmt_tokens: List[_FORMAT_TYPE] 

408 fmt_pattern_re: Pattern[str] 

409 fmt_tokens, fmt_pattern_re = self._generate_pattern_re(fmt) 

410 except re.error as e: 

411 raise ParserMatchError( 

412 f"Failed to generate regular expression pattern: {e}." 

413 ) 

414 

415 match = fmt_pattern_re.search(datetime_string) 

416 

417 if match is None: 

418 raise ParserMatchError( 

419 f"Failed to match {fmt!r} when parsing {datetime_string!r}." 

420 ) 

421 

422 parts: _Parts = {} 

423 for token in fmt_tokens: 

424 value: Union[Tuple[str, str, str], str] 

425 if token == "Do": 

426 value = match.group("value") 

427 elif token == "W": 

428 value = (match.group("year"), match.group("week"), match.group("day")) 

429 else: 

430 value = match.group(token) 

431 

432 if value is None: 

433 raise ParserMatchError( 

434 f"Unable to find a match group for the specified token {token!r}." 

435 ) 

436 

437 self._parse_token(token, value, parts) # type: ignore[arg-type] 

438 

439 return self._build_datetime(parts) 

440 

441 def _generate_pattern_re(self, fmt: str) -> Tuple[List[_FORMAT_TYPE], Pattern[str]]: 

442 """ 

443 Generates a regular expression pattern from a format string. 

444 

445 :param fmt: The format string to convert into a regular expression pattern. 

446 :type fmt: str 

447 :returns: A tuple containing a list of format tokens and the corresponding regular expression pattern. 

448 :rtype: Tuple[List[_FORMAT_TYPE], Pattern[str]] 

449 :raises ParserError: If an unrecognized token is encountered in the format string. 

450 """ 

451 # fmt is a string of tokens like 'YYYY-MM-DD' 

452 # we construct a new string by replacing each 

453 # token by its pattern: 

454 # 'YYYY-MM-DD' -> '(?P<YYYY>\d{4})-(?P<MM>\d{2})-(?P<DD>\d{2})' 

455 tokens: List[_FORMAT_TYPE] = [] 

456 offset = 0 

457 

458 # Escape all special RegEx chars 

459 escaped_fmt = re.escape(fmt) 

460 

461 # Extract the bracketed expressions to be reinserted later. 

462 escaped_fmt = re.sub(self._ESCAPE_RE, "#", escaped_fmt) 

463 

464 # Any number of S is the same as one. 

465 # TODO: allow users to specify the number of digits to parse 

466 escaped_fmt = re.sub(r"S+", "S", escaped_fmt) 

467 

468 escaped_data = re.findall(self._ESCAPE_RE, fmt) 

469 

470 fmt_pattern = escaped_fmt 

471 

472 for m in self._FORMAT_RE.finditer(escaped_fmt): 

473 token: _FORMAT_TYPE = cast(_FORMAT_TYPE, m.group(0)) 

474 try: 

475 input_re = self._input_re_map[token] 

476 except KeyError: 

477 raise ParserError(f"Unrecognized token {token!r}.") 

478 input_pattern = f"(?P<{token}>{input_re.pattern})" 

479 tokens.append(token) 

480 # a pattern doesn't have the same length as the token 

481 # it replaces! We keep the difference in the offset variable. 

482 # This works because the string is scanned left-to-right and matches 

483 # are returned in the order found by finditer. 

484 fmt_pattern = ( 

485 fmt_pattern[: m.start() + offset] 

486 + input_pattern 

487 + fmt_pattern[m.end() + offset :] 

488 ) 

489 offset += len(input_pattern) - (m.end() - m.start()) 

490 

491 final_fmt_pattern = "" 

492 split_fmt = fmt_pattern.split(r"\#") 

493 

494 # Due to the way Python splits, 'split_fmt' will always be longer 

495 for i in range(len(split_fmt)): 

496 final_fmt_pattern += split_fmt[i] 

497 if i < len(escaped_data): 

498 final_fmt_pattern += escaped_data[i][1:-1] 

499 

500 # Wrap final_fmt_pattern in a custom word boundary to strictly 

501 # match the formatting pattern and filter out date and time formats 

502 # that include junk such as: blah1998-09-12 blah, blah 1998-09-12blah, 

503 # blah1998-09-12blah. The custom word boundary matches every character 

504 # that is not a whitespace character to allow for searching for a date 

505 # and time string in a natural language sentence. Therefore, searching 

506 # for a string of the form YYYY-MM-DD in "blah 1998-09-12 blah" will 

507 # work properly. 

508 # Certain punctuation before or after the target pattern such as 

509 # "1998-09-12," is permitted. For the full list of valid punctuation, 

510 # see the documentation. 

511 

512 starting_word_boundary = ( 

513 r"(?<!\S\S)" # Don't have two consecutive non-whitespace characters. This ensures that we allow cases 

514 # like .11.25.2019 but not 1.11.25.2019 (for pattern MM.DD.YYYY) 

515 r"(?<![^\,\.\;\:\?\!\"\'\`\[\]\{\}\(\)<>\s])" # This is the list of punctuation that is ok before the 

516 # pattern (i.e. "It can't not be these characters before the pattern") 

517 r"(\b|^)" 

518 # The \b is to block cases like 1201912 but allow 201912 for pattern YYYYMM. The ^ was necessary to allow a 

519 # negative number through i.e. before epoch numbers 

520 ) 

521 ending_word_boundary = ( 

522 r"(?=[\,\.\;\:\?\!\"\'\`\[\]\{\}\(\)\<\>]?" # Positive lookahead stating that these punctuation marks 

523 # can appear after the pattern at most 1 time 

524 r"(?!\S))" # Don't allow any non-whitespace character after the punctuation 

525 ) 

526 bounded_fmt_pattern = r"{}{}{}".format( 

527 starting_word_boundary, final_fmt_pattern, ending_word_boundary 

528 ) 

529 

530 return tokens, re.compile(bounded_fmt_pattern, flags=re.IGNORECASE) 

531 

532 @overload 

533 def _parse_token( 

534 self, 

535 token: Literal[ 

536 "YYYY", 

537 "YY", 

538 "MM", 

539 "M", 

540 "DDDD", 

541 "DDD", 

542 "DD", 

543 "D", 

544 "Do", 

545 "HH", 

546 "hh", 

547 "h", 

548 "H", 

549 "mm", 

550 "m", 

551 "ss", 

552 "s", 

553 "x", 

554 ], 

555 value: Union[str, bytes, SupportsInt, bytearray], 

556 parts: _Parts, 

557 ) -> None: 

558 ... # pragma: no cover 

559 

560 @overload 

561 def _parse_token( 

562 self, 

563 token: Literal["X"], 

564 value: Union[str, bytes, SupportsFloat, bytearray], 

565 parts: _Parts, 

566 ) -> None: 

567 ... # pragma: no cover 

568 

569 @overload 

570 def _parse_token( 

571 self, 

572 token: Literal["MMMM", "MMM", "dddd", "ddd", "S"], 

573 value: Union[str, bytes, bytearray], 

574 parts: _Parts, 

575 ) -> None: 

576 ... # pragma: no cover 

577 

578 @overload 

579 def _parse_token( 

580 self, 

581 token: Literal["a", "A", "ZZZ", "ZZ", "Z"], 

582 value: Union[str, bytes], 

583 parts: _Parts, 

584 ) -> None: 

585 ... # pragma: no cover 

586 

587 @overload 

588 def _parse_token( 

589 self, 

590 token: Literal["W"], 

591 value: Tuple[_WEEKDATE_ELEMENT, _WEEKDATE_ELEMENT, Optional[_WEEKDATE_ELEMENT]], 

592 parts: _Parts, 

593 ) -> None: 

594 ... # pragma: no cover 

595 

596 def _parse_token( 

597 self, 

598 token: Any, 

599 value: Any, 

600 parts: _Parts, 

601 ) -> None: 

602 """ 

603 Parse a token and its value, and update the `_Parts` dictionary with the parsed values. 

604 

605 The function supports several tokens, including "YYYY", "YY", "MMMM", "MMM", "MM", "M", "DDDD", "DDD", "DD", "D", "Do", "dddd", "ddd", "HH", "H", "mm", "m", "ss", "s", "S", "X", "x", "ZZZ", "ZZ", "Z", "a", "A", and "W". Each token is matched and the corresponding value is parsed and added to the `_Parts` dictionary. 

606 

607 :param token: The token to parse. 

608 :type token: Any 

609 :param value: The value of the token. 

610 :type value: Any 

611 :param parts: A dictionary to update with the parsed values. 

612 :type parts: _Parts 

613 :raises ParserMatchError: If the hour token value is not between 0 and 12 inclusive for tokens "a" or "A". 

614 

615 """ 

616 if token == "YYYY": 

617 parts["year"] = int(value) 

618 

619 elif token == "YY": 

620 value = int(value) 

621 parts["year"] = 1900 + value if value > 68 else 2000 + value 

622 

623 elif token in ["MMMM", "MMM"]: 

624 # FIXME: month_number() is nullable 

625 parts["month"] = self.locale.month_number(value.lower()) # type: ignore[typeddict-item] 

626 

627 elif token in ["MM", "M"]: 

628 parts["month"] = int(value) 

629 

630 elif token in ["DDDD", "DDD"]: 

631 parts["day_of_year"] = int(value) 

632 

633 elif token in ["DD", "D"]: 

634 parts["day"] = int(value) 

635 

636 elif token == "Do": 

637 parts["day"] = int(value) 

638 

639 elif token == "dddd": 

640 # locale day names are 1-indexed 

641 day_of_week = [x.lower() for x in self.locale.day_names].index( 

642 value.lower() 

643 ) 

644 parts["day_of_week"] = day_of_week - 1 

645 

646 elif token == "ddd": 

647 # locale day abbreviations are 1-indexed 

648 day_of_week = [x.lower() for x in self.locale.day_abbreviations].index( 

649 value.lower() 

650 ) 

651 parts["day_of_week"] = day_of_week - 1 

652 

653 elif token.upper() in ["HH", "H"]: 

654 parts["hour"] = int(value) 

655 

656 elif token in ["mm", "m"]: 

657 parts["minute"] = int(value) 

658 

659 elif token in ["ss", "s"]: 

660 parts["second"] = int(value) 

661 

662 elif token == "S": 

663 # We have the *most significant* digits of an arbitrary-precision integer. 

664 # We want the six most significant digits as an integer, rounded. 

665 # IDEA: add nanosecond support somehow? Need datetime support for it first. 

666 value = value.ljust(7, "0") 

667 

668 # floating-point (IEEE-754) defaults to half-to-even rounding 

669 seventh_digit = int(value[6]) 

670 if seventh_digit == 5: 

671 rounding = int(value[5]) % 2 

672 elif seventh_digit > 5: 

673 rounding = 1 

674 else: 

675 rounding = 0 

676 

677 parts["microsecond"] = int(value[:6]) + rounding 

678 

679 elif token == "X": 

680 parts["timestamp"] = float(value) 

681 

682 elif token == "x": 

683 parts["expanded_timestamp"] = int(value) 

684 

685 elif token in ["ZZZ", "ZZ", "Z"]: 

686 parts["tzinfo"] = TzinfoParser.parse(value) 

687 

688 elif token in ["a", "A"]: 

689 if value in (self.locale.meridians["am"], self.locale.meridians["AM"]): 

690 parts["am_pm"] = "am" 

691 if "hour" in parts and not 0 <= parts["hour"] <= 12: 

692 raise ParserMatchError( 

693 f"Hour token value must be between 0 and 12 inclusive for token {token!r}." 

694 ) 

695 elif value in (self.locale.meridians["pm"], self.locale.meridians["PM"]): 

696 parts["am_pm"] = "pm" 

697 elif token == "W": 

698 parts["weekdate"] = value 

699 

700 @staticmethod 

701 def _build_datetime(parts: _Parts) -> datetime: 

702 """ 

703 Build a datetime object from a dictionary of date parts. 

704 

705 :param parts: A dictionary containing the date parts extracted from a date string. 

706 :type parts: dict 

707 :return: A datetime object representing the date and time. 

708 :rtype: datetime.datetime 

709 """ 

710 weekdate = parts.get("weekdate") 

711 

712 if weekdate is not None: 

713 year, week = int(weekdate[0]), int(weekdate[1]) 

714 

715 if weekdate[2] is not None: 

716 _day = int(weekdate[2]) 

717 else: 

718 # day not given, default to 1 

719 _day = 1 

720 

721 date_string = f"{year}-{week}-{_day}" 

722 

723 # tokens for ISO 8601 weekdates 

724 dt = datetime.strptime(date_string, "%G-%V-%u") 

725 

726 parts["year"] = dt.year 

727 parts["month"] = dt.month 

728 parts["day"] = dt.day 

729 

730 timestamp = parts.get("timestamp") 

731 

732 if timestamp is not None: 

733 return datetime.fromtimestamp(timestamp, tz=timezone.utc) 

734 

735 expanded_timestamp = parts.get("expanded_timestamp") 

736 

737 if expanded_timestamp is not None: 

738 return datetime.fromtimestamp( 

739 normalize_timestamp(expanded_timestamp), 

740 tz=timezone.utc, 

741 ) 

742 

743 day_of_year = parts.get("day_of_year") 

744 

745 if day_of_year is not None: 

746 _year = parts.get("year") 

747 month = parts.get("month") 

748 if _year is None: 

749 raise ParserError( 

750 "Year component is required with the DDD and DDDD tokens." 

751 ) 

752 

753 if month is not None: 

754 raise ParserError( 

755 "Month component is not allowed with the DDD and DDDD tokens." 

756 ) 

757 

758 date_string = f"{_year}-{day_of_year}" 

759 try: 

760 dt = datetime.strptime(date_string, "%Y-%j") 

761 except ValueError: 

762 raise ParserError( 

763 f"The provided day of year {day_of_year!r} is invalid." 

764 ) 

765 

766 parts["year"] = dt.year 

767 parts["month"] = dt.month 

768 parts["day"] = dt.day 

769 

770 day_of_week: Optional[int] = parts.get("day_of_week") 

771 day = parts.get("day") 

772 

773 # If day is passed, ignore day of week 

774 if day_of_week is not None and day is None: 

775 year = parts.get("year", 1970) 

776 month = parts.get("month", 1) 

777 day = 1 

778 

779 # dddd => first day of week after epoch 

780 # dddd YYYY => first day of week in specified year 

781 # dddd MM YYYY => first day of week in specified year and month 

782 # dddd MM => first day after epoch in specified month 

783 next_weekday_dt = next_weekday(datetime(year, month, day), day_of_week) 

784 parts["year"] = next_weekday_dt.year 

785 parts["month"] = next_weekday_dt.month 

786 parts["day"] = next_weekday_dt.day 

787 

788 am_pm = parts.get("am_pm") 

789 hour = parts.get("hour", 0) 

790 

791 if am_pm == "pm" and hour < 12: 

792 hour += 12 

793 elif am_pm == "am" and hour == 12: 

794 hour = 0 

795 

796 # Support for midnight at the end of day 

797 if hour == 24: 

798 if parts.get("minute", 0) != 0: 

799 raise ParserError("Midnight at the end of day must not contain minutes") 

800 if parts.get("second", 0) != 0: 

801 raise ParserError("Midnight at the end of day must not contain seconds") 

802 if parts.get("microsecond", 0) != 0: 

803 raise ParserError( 

804 "Midnight at the end of day must not contain microseconds" 

805 ) 

806 hour = 0 

807 day_increment = 1 

808 else: 

809 day_increment = 0 

810 

811 # account for rounding up to 1000000 

812 microsecond = parts.get("microsecond", 0) 

813 if microsecond == 1000000: 

814 microsecond = 0 

815 second_increment = 1 

816 else: 

817 second_increment = 0 

818 

819 increment = timedelta(days=day_increment, seconds=second_increment) 

820 

821 return ( 

822 datetime( 

823 year=parts.get("year", 1), 

824 month=parts.get("month", 1), 

825 day=parts.get("day", 1), 

826 hour=hour, 

827 minute=parts.get("minute", 0), 

828 second=parts.get("second", 0), 

829 microsecond=microsecond, 

830 tzinfo=parts.get("tzinfo"), 

831 ) 

832 + increment 

833 ) 

834 

835 def _parse_multiformat(self, string: str, formats: Iterable[str]) -> datetime: 

836 """ 

837 Parse a date and time string using multiple formats. 

838 

839 Tries to parse the provided string with each format in the given `formats` 

840 iterable, returning the resulting `datetime` object if a match is found. If no 

841 format matches the string, a `ParserError` is raised. 

842 

843 :param string: The date and time string to parse. 

844 :type string: str 

845 :param formats: An iterable of date and time format strings to try, in order. 

846 :type formats: Iterable[str] 

847 :returns: The parsed date and time. 

848 :rtype: datetime.datetime 

849 :raises ParserError: If no format matches the input string. 

850 """ 

851 _datetime: Optional[datetime] = None 

852 

853 for fmt in formats: 

854 try: 

855 _datetime = self.parse(string, fmt) 

856 break 

857 except ParserMatchError: 

858 pass 

859 

860 if _datetime is None: 

861 supported_formats = ", ".join(formats) 

862 raise ParserError( 

863 f"Could not match input {string!r} to any of the following formats: {supported_formats}." 

864 ) 

865 

866 return _datetime 

867 

868 # generates a capture group of choices separated by an OR operator 

869 @staticmethod 

870 def _generate_choice_re( 

871 choices: Iterable[str], flags: Union[int, re.RegexFlag] = 0 

872 ) -> Pattern[str]: 

873 """ 

874 Generate a regular expression pattern that matches a choice from an iterable. 

875 

876 Takes an iterable of strings (`choices`) and returns a compiled regular expression 

877 pattern that matches any of the choices. The pattern is created by joining the 

878 choices with the '|' (OR) operator, which matches any of the enclosed patterns. 

879 

880 :param choices: An iterable of strings to match. 

881 :type choices: Iterable[str] 

882 :param flags: Optional regular expression flags. Default is 0. 

883 :type flags: Union[int, re.RegexFlag], optional 

884 :returns: A compiled regular expression pattern that matches any of the choices. 

885 :rtype: re.Pattern[str] 

886 """ 

887 return re.compile(r"({})".format("|".join(choices)), flags=flags) 

888 

889 

890class TzinfoParser: 

891 """ 

892 Parser for timezone information. 

893 """ 

894 

895 _TZINFO_RE: ClassVar[Pattern[str]] = re.compile( 

896 r"^(?:\(UTC)*([\+\-])?(\d{2})(?:\:?(\d{2}))?" 

897 ) 

898 

899 @classmethod 

900 def parse(cls, tzinfo_string: str) -> dt_tzinfo: 

901 """ 

902 Parse a timezone string and return a datetime timezone object. 

903 

904 :param tzinfo_string: The timezone string to parse. 

905 :type tzinfo_string: str 

906 :returns: The parsed datetime timezone object. 

907 :rtype: datetime.timezone 

908 :raises ParserError: If the timezone string cannot be parsed. 

909 """ 

910 tzinfo: Optional[dt_tzinfo] = None 

911 

912 if tzinfo_string == "local": 

913 tzinfo = datetime.now().astimezone().tzinfo 

914 

915 elif tzinfo_string in ["utc", "UTC", "Z"]: 

916 tzinfo = timezone.utc 

917 

918 else: 

919 iso_match = cls._TZINFO_RE.match(tzinfo_string) 

920 

921 if iso_match: 

922 sign: Optional[str] 

923 hours: str 

924 minutes: Union[str, int, None] 

925 sign, hours, minutes = iso_match.groups() 

926 seconds = int(hours) * 3600 + int(minutes or 0) * 60 

927 

928 if sign == "-": 

929 seconds *= -1 

930 

931 tzinfo = timezone(timedelta(seconds=seconds)) 

932 

933 else: 

934 try: 

935 tzinfo = ZoneInfo(tzinfo_string) 

936 except ZoneInfoNotFoundError: 

937 tzinfo = None 

938 

939 if tzinfo is None: 

940 raise ParserError(f"Could not parse timezone expression {tzinfo_string!r}.") 

941 

942 return tzinfo