Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/arrow/parser.py: 87%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

315 statements  

1"""Provides the :class:`Arrow <arrow.parser.DateTimeParser>` class, a better way to parse datetime strings.""" 

2 

3import re 

4from datetime import datetime, timedelta 

5from datetime import tzinfo as dt_tzinfo 

6from functools import lru_cache 

7from typing import ( 

8 Any, 

9 ClassVar, 

10 Dict, 

11 Iterable, 

12 List, 

13 Literal, 

14 Match, 

15 Optional, 

16 Pattern, 

17 SupportsFloat, 

18 SupportsInt, 

19 Tuple, 

20 TypedDict, 

21 Union, 

22 cast, 

23 overload, 

24) 

25 

26from dateutil import tz 

27 

28from arrow import locales 

29from arrow.constants import DEFAULT_LOCALE 

30from arrow.util import next_weekday, normalize_timestamp 

31 

32 

33class ParserError(ValueError): 

34 """ 

35 A custom exception class for handling parsing errors in the parser. 

36 

37 Notes: 

38 This class inherits from the built-in `ValueError` class and is used to raise exceptions 

39 when an error occurs during the parsing process. 

40 """ 

41 

42 pass 

43 

44 

45# Allows for ParserErrors to be propagated from _build_datetime() 

46# when day_of_year errors occur. 

47# Before this, the ParserErrors were caught by the try/except in 

48# _parse_multiformat() and the appropriate error message was not 

49# transmitted to the user. 

50class ParserMatchError(ParserError): 

51 """ 

52 This class is a subclass of the ParserError class and is used to raise errors that occur during the matching process. 

53 

54 Notes: 

55 This class is part of the Arrow parser and is used to provide error handling when a parsing match fails. 

56 

57 """ 

58 

59 pass 

60 

61 

62_WEEKDATE_ELEMENT = Union[str, bytes, SupportsInt, bytearray] 

63 

64_FORMAT_TYPE = Literal[ 

65 "YYYY", 

66 "YY", 

67 "MM", 

68 "M", 

69 "DDDD", 

70 "DDD", 

71 "DD", 

72 "D", 

73 "HH", 

74 "H", 

75 "hh", 

76 "h", 

77 "mm", 

78 "m", 

79 "ss", 

80 "s", 

81 "X", 

82 "x", 

83 "ZZZ", 

84 "ZZ", 

85 "Z", 

86 "S", 

87 "W", 

88 "MMMM", 

89 "MMM", 

90 "Do", 

91 "dddd", 

92 "ddd", 

93 "d", 

94 "a", 

95 "A", 

96] 

97 

98 

99class _Parts(TypedDict, total=False): 

100 """ 

101 A dictionary that represents different parts of a datetime. 

102 

103 :class:`_Parts` is a TypedDict that represents various components of a date or time, 

104 such as year, month, day, hour, minute, second, microsecond, timestamp, expanded_timestamp, tzinfo, 

105 am_pm, day_of_week, and weekdate. 

106 

107 :ivar year: The year, if present, as an integer. 

108 :ivar month: The month, if present, as an integer. 

109 :ivar day_of_year: The day of the year, if present, as an integer. 

110 :ivar day: The day, if present, as an integer. 

111 :ivar hour: The hour, if present, as an integer. 

112 :ivar minute: The minute, if present, as an integer. 

113 :ivar second: The second, if present, as an integer. 

114 :ivar microsecond: The microsecond, if present, as an integer. 

115 :ivar timestamp: The timestamp, if present, as a float. 

116 :ivar expanded_timestamp: The expanded timestamp, if present, as an integer. 

117 :ivar tzinfo: The timezone info, if present, as a :class:`dt_tzinfo` object. 

118 :ivar am_pm: The AM/PM indicator, if present, as a string literal "am" or "pm". 

119 :ivar day_of_week: The day of the week, if present, as an integer. 

120 :ivar weekdate: The week date, if present, as a tuple of three integers or None. 

121 """ 

122 

123 year: int 

124 month: int 

125 day_of_year: int 

126 day: int 

127 hour: int 

128 minute: int 

129 second: int 

130 microsecond: int 

131 timestamp: float 

132 expanded_timestamp: int 

133 tzinfo: dt_tzinfo 

134 am_pm: Literal["am", "pm"] 

135 day_of_week: int 

136 weekdate: Tuple[_WEEKDATE_ELEMENT, _WEEKDATE_ELEMENT, Optional[_WEEKDATE_ELEMENT]] 

137 

138 

139class DateTimeParser: 

140 """A :class:`DateTimeParser <arrow.arrow.parser>` object 

141 

142 Contains the regular expressions and functions to parse and split the input strings into tokens and eventually 

143 produce a datetime that is used by :class:`Arrow <arrow.arrow.Arrow>` internally. 

144 

145 :param locale: the locale string 

146 :param cache_size: the size of the LRU cache used for regular expressions. Defaults to 0. 

147 

148 """ 

149 

150 _FORMAT_RE: ClassVar[Pattern[str]] = re.compile( 

151 r"(YYY?Y?|MM?M?M?|Do|DD?D?D?|d?d?d?d|HH?|hh?|mm?|ss?|S+|ZZ?Z?|a|A|x|X|W)" 

152 ) 

153 _ESCAPE_RE: ClassVar[Pattern[str]] = re.compile(r"\[[^\[\]]*\]") 

154 

155 _ONE_OR_TWO_DIGIT_RE: ClassVar[Pattern[str]] = re.compile(r"\d{1,2}") 

156 _ONE_OR_TWO_OR_THREE_DIGIT_RE: ClassVar[Pattern[str]] = re.compile(r"\d{1,3}") 

157 _ONE_OR_MORE_DIGIT_RE: ClassVar[Pattern[str]] = re.compile(r"\d+") 

158 _TWO_DIGIT_RE: ClassVar[Pattern[str]] = re.compile(r"\d{2}") 

159 _THREE_DIGIT_RE: ClassVar[Pattern[str]] = re.compile(r"\d{3}") 

160 _FOUR_DIGIT_RE: ClassVar[Pattern[str]] = re.compile(r"\d{4}") 

161 _TZ_Z_RE: ClassVar[Pattern[str]] = re.compile(r"([\+\-])(\d{2})(?:(\d{2}))?|Z") 

162 _TZ_ZZ_RE: ClassVar[Pattern[str]] = re.compile(r"([\+\-])(\d{2})(?:\:(\d{2}))?|Z") 

163 _TZ_NAME_RE: ClassVar[Pattern[str]] = re.compile(r"\w[\w+\-/]+") 

164 # NOTE: timestamps cannot be parsed from natural language strings (by removing the ^...$) because it will 

165 # break cases like "15 Jul 2000" and a format list (see issue #447) 

166 _TIMESTAMP_RE: ClassVar[Pattern[str]] = re.compile(r"^\-?\d+\.?\d+$") 

167 _TIMESTAMP_EXPANDED_RE: ClassVar[Pattern[str]] = re.compile(r"^\-?\d+$") 

168 _TIME_RE: ClassVar[Pattern[str]] = re.compile( 

169 r"^(\d{2})(?:\:?(\d{2}))?(?:\:?(\d{2}))?(?:([\.\,])(\d+))?$" 

170 ) 

171 _WEEK_DATE_RE: ClassVar[Pattern[str]] = re.compile( 

172 r"(?P<year>\d{4})[\-]?W(?P<week>\d{2})[\-]?(?P<day>\d)?" 

173 ) 

174 

175 _BASE_INPUT_RE_MAP: ClassVar[Dict[_FORMAT_TYPE, Pattern[str]]] = { 

176 "YYYY": _FOUR_DIGIT_RE, 

177 "YY": _TWO_DIGIT_RE, 

178 "MM": _TWO_DIGIT_RE, 

179 "M": _ONE_OR_TWO_DIGIT_RE, 

180 "DDDD": _THREE_DIGIT_RE, 

181 "DDD": _ONE_OR_TWO_OR_THREE_DIGIT_RE, 

182 "DD": _TWO_DIGIT_RE, 

183 "D": _ONE_OR_TWO_DIGIT_RE, 

184 "HH": _TWO_DIGIT_RE, 

185 "H": _ONE_OR_TWO_DIGIT_RE, 

186 "hh": _TWO_DIGIT_RE, 

187 "h": _ONE_OR_TWO_DIGIT_RE, 

188 "mm": _TWO_DIGIT_RE, 

189 "m": _ONE_OR_TWO_DIGIT_RE, 

190 "ss": _TWO_DIGIT_RE, 

191 "s": _ONE_OR_TWO_DIGIT_RE, 

192 "X": _TIMESTAMP_RE, 

193 "x": _TIMESTAMP_EXPANDED_RE, 

194 "ZZZ": _TZ_NAME_RE, 

195 "ZZ": _TZ_ZZ_RE, 

196 "Z": _TZ_Z_RE, 

197 "S": _ONE_OR_MORE_DIGIT_RE, 

198 "W": _WEEK_DATE_RE, 

199 } 

200 

201 SEPARATORS: ClassVar[List[str]] = ["-", "/", "."] 

202 

203 locale: locales.Locale 

204 _input_re_map: Dict[_FORMAT_TYPE, Pattern[str]] 

205 

206 def __init__(self, locale: str = DEFAULT_LOCALE, cache_size: int = 0) -> None: 

207 """ 

208 Contains the regular expressions and functions to parse and split the input strings into tokens and eventually 

209 produce a datetime that is used by :class:`Arrow <arrow.arrow.Arrow>` internally. 

210 

211 :param locale: the locale string 

212 :type locale: str 

213 :param cache_size: the size of the LRU cache used for regular expressions. Defaults to 0. 

214 :type cache_size: int 

215 """ 

216 self.locale = locales.get_locale(locale) 

217 self._input_re_map = self._BASE_INPUT_RE_MAP.copy() 

218 self._input_re_map.update( 

219 { 

220 "MMMM": self._generate_choice_re( 

221 self.locale.month_names[1:], re.IGNORECASE 

222 ), 

223 "MMM": self._generate_choice_re( 

224 self.locale.month_abbreviations[1:], re.IGNORECASE 

225 ), 

226 "Do": re.compile(self.locale.ordinal_day_re), 

227 "dddd": self._generate_choice_re( 

228 self.locale.day_names[1:], re.IGNORECASE 

229 ), 

230 "ddd": self._generate_choice_re( 

231 self.locale.day_abbreviations[1:], re.IGNORECASE 

232 ), 

233 "d": re.compile(r"[1-7]"), 

234 "a": self._generate_choice_re( 

235 (self.locale.meridians["am"], self.locale.meridians["pm"]) 

236 ), 

237 # note: 'A' token accepts both 'am/pm' and 'AM/PM' formats to 

238 # ensure backwards compatibility of this token 

239 "A": self._generate_choice_re(self.locale.meridians.values()), 

240 } 

241 ) 

242 if cache_size > 0: 

243 self._generate_pattern_re = lru_cache(maxsize=cache_size)( # type: ignore 

244 self._generate_pattern_re 

245 ) 

246 

247 # TODO: since we support more than ISO 8601, we should rename this function 

248 # IDEA: break into multiple functions 

249 def parse_iso( 

250 self, datetime_string: str, normalize_whitespace: bool = False 

251 ) -> datetime: 

252 """ 

253 Parses a datetime string using a ISO 8601-like format. 

254 

255 :param datetime_string: The datetime string to parse. 

256 :param normalize_whitespace: Whether to normalize whitespace in the datetime string (default is False). 

257 :type datetime_string: str 

258 :type normalize_whitespace: bool 

259 :returns: The parsed datetime object. 

260 :rtype: datetime 

261 :raises ParserError: If the datetime string is not in a valid ISO 8601-like format. 

262 

263 Usage:: 

264 >>> import arrow.parser 

265 >>> arrow.parser.DateTimeParser().parse_iso('2021-10-12T14:30:00') 

266 datetime.datetime(2021, 10, 12, 14, 30) 

267 

268 """ 

269 if normalize_whitespace: 

270 datetime_string = re.sub(r"\s+", " ", datetime_string.strip()) 

271 

272 has_space_divider = " " in datetime_string 

273 has_t_divider = "T" in datetime_string 

274 

275 num_spaces = datetime_string.count(" ") 

276 if has_space_divider and num_spaces != 1 or has_t_divider and num_spaces > 0: 

277 raise ParserError( 

278 f"Expected an ISO 8601-like string, but was given {datetime_string!r}. " 

279 "Try passing in a format string to resolve this." 

280 ) 

281 

282 has_time = has_space_divider or has_t_divider 

283 has_tz = False 

284 

285 # date formats (ISO 8601 and others) to test against 

286 # NOTE: YYYYMM is omitted to avoid confusion with YYMMDD (no longer part of ISO 8601, but is still often used) 

287 formats = [ 

288 "YYYY-MM-DD", 

289 "YYYY-M-DD", 

290 "YYYY-M-D", 

291 "YYYY/MM/DD", 

292 "YYYY/M/DD", 

293 "YYYY/M/D", 

294 "YYYY.MM.DD", 

295 "YYYY.M.DD", 

296 "YYYY.M.D", 

297 "YYYYMMDD", 

298 "YYYY-DDDD", 

299 "YYYYDDDD", 

300 "YYYY-MM", 

301 "YYYY/MM", 

302 "YYYY.MM", 

303 "YYYY", 

304 "W", 

305 ] 

306 

307 if has_time: 

308 if has_space_divider: 

309 date_string, time_string = datetime_string.split(" ", 1) 

310 else: 

311 date_string, time_string = datetime_string.split("T", 1) 

312 

313 time_parts = re.split( 

314 r"[\+\-Z]", time_string, maxsplit=1, flags=re.IGNORECASE 

315 ) 

316 

317 time_components: Optional[Match[str]] = self._TIME_RE.match(time_parts[0]) 

318 

319 if time_components is None: 

320 raise ParserError( 

321 "Invalid time component provided. " 

322 "Please specify a format or provide a valid time component in the basic or extended ISO 8601 time format." 

323 ) 

324 

325 ( 

326 hours, 

327 minutes, 

328 seconds, 

329 subseconds_sep, 

330 subseconds, 

331 ) = time_components.groups() 

332 

333 has_tz = len(time_parts) == 2 

334 has_minutes = minutes is not None 

335 has_seconds = seconds is not None 

336 has_subseconds = subseconds is not None 

337 

338 is_basic_time_format = ":" not in time_parts[0] 

339 tz_format = "Z" 

340 

341 # use 'ZZ' token instead since tz offset is present in non-basic format 

342 if has_tz and ":" in time_parts[1]: 

343 tz_format = "ZZ" 

344 

345 time_sep = "" if is_basic_time_format else ":" 

346 

347 if has_subseconds: 

348 time_string = "HH{time_sep}mm{time_sep}ss{subseconds_sep}S".format( 

349 time_sep=time_sep, subseconds_sep=subseconds_sep 

350 ) 

351 elif has_seconds: 

352 time_string = "HH{time_sep}mm{time_sep}ss".format(time_sep=time_sep) 

353 elif has_minutes: 

354 time_string = f"HH{time_sep}mm" 

355 else: 

356 time_string = "HH" 

357 

358 if has_space_divider: 

359 formats = [f"{f} {time_string}" for f in formats] 

360 else: 

361 formats = [f"{f}T{time_string}" for f in formats] 

362 

363 if has_time and has_tz: 

364 # Add "Z" or "ZZ" to the format strings to indicate to 

365 # _parse_token() that a timezone needs to be parsed 

366 formats = [f"{f}{tz_format}" for f in formats] 

367 

368 return self._parse_multiformat(datetime_string, formats) 

369 

370 def parse( 

371 self, 

372 datetime_string: str, 

373 fmt: Union[List[str], str], 

374 normalize_whitespace: bool = False, 

375 ) -> datetime: 

376 """ 

377 Parses a datetime string using a specified format. 

378 

379 :param datetime_string: The datetime string to parse. 

380 :param fmt: The format string or list of format strings to use for parsing. 

381 :param normalize_whitespace: Whether to normalize whitespace in the datetime string (default is False). 

382 :type datetime_string: str 

383 :type fmt: Union[List[str], str] 

384 :type normalize_whitespace: bool 

385 :returns: The parsed datetime object. 

386 :rtype: datetime 

387 :raises ParserMatchError: If the datetime string does not match the specified format. 

388 

389 Usage:: 

390 

391 >>> import arrow.parser 

392 >>> arrow.parser.DateTimeParser().parse('2021-10-12 14:30:00', 'YYYY-MM-DD HH:mm:ss') 

393 datetime.datetime(2021, 10, 12, 14, 30) 

394 

395 

396 """ 

397 if normalize_whitespace: 

398 datetime_string = re.sub(r"\s+", " ", datetime_string) 

399 

400 if isinstance(fmt, list): 

401 return self._parse_multiformat(datetime_string, fmt) 

402 

403 try: 

404 fmt_tokens: List[_FORMAT_TYPE] 

405 fmt_pattern_re: Pattern[str] 

406 fmt_tokens, fmt_pattern_re = self._generate_pattern_re(fmt) 

407 except re.error as e: 

408 raise ParserMatchError( 

409 f"Failed to generate regular expression pattern: {e}." 

410 ) 

411 

412 match = fmt_pattern_re.search(datetime_string) 

413 

414 if match is None: 

415 raise ParserMatchError( 

416 f"Failed to match {fmt!r} when parsing {datetime_string!r}." 

417 ) 

418 

419 parts: _Parts = {} 

420 for token in fmt_tokens: 

421 value: Union[Tuple[str, str, str], str] 

422 if token == "Do": 

423 value = match.group("value") 

424 elif token == "W": 

425 value = (match.group("year"), match.group("week"), match.group("day")) 

426 else: 

427 value = match.group(token) 

428 

429 if value is None: 

430 raise ParserMatchError( 

431 f"Unable to find a match group for the specified token {token!r}." 

432 ) 

433 

434 self._parse_token(token, value, parts) # type: ignore[arg-type] 

435 

436 return self._build_datetime(parts) 

437 

438 def _generate_pattern_re(self, fmt: str) -> Tuple[List[_FORMAT_TYPE], Pattern[str]]: 

439 """ 

440 Generates a regular expression pattern from a format string. 

441 

442 :param fmt: The format string to convert into a regular expression pattern. 

443 :type fmt: str 

444 :returns: A tuple containing a list of format tokens and the corresponding regular expression pattern. 

445 :rtype: Tuple[List[_FORMAT_TYPE], Pattern[str]] 

446 :raises ParserError: If an unrecognized token is encountered in the format string. 

447 """ 

448 # fmt is a string of tokens like 'YYYY-MM-DD' 

449 # we construct a new string by replacing each 

450 # token by its pattern: 

451 # 'YYYY-MM-DD' -> '(?P<YYYY>\d{4})-(?P<MM>\d{2})-(?P<DD>\d{2})' 

452 tokens: List[_FORMAT_TYPE] = [] 

453 offset = 0 

454 

455 # Escape all special RegEx chars 

456 escaped_fmt = re.escape(fmt) 

457 

458 # Extract the bracketed expressions to be reinserted later. 

459 escaped_fmt = re.sub(self._ESCAPE_RE, "#", escaped_fmt) 

460 

461 # Any number of S is the same as one. 

462 # TODO: allow users to specify the number of digits to parse 

463 escaped_fmt = re.sub(r"S+", "S", escaped_fmt) 

464 

465 escaped_data = re.findall(self._ESCAPE_RE, fmt) 

466 

467 fmt_pattern = escaped_fmt 

468 

469 for m in self._FORMAT_RE.finditer(escaped_fmt): 

470 token: _FORMAT_TYPE = cast(_FORMAT_TYPE, m.group(0)) 

471 try: 

472 input_re = self._input_re_map[token] 

473 except KeyError: 

474 raise ParserError(f"Unrecognized token {token!r}.") 

475 input_pattern = f"(?P<{token}>{input_re.pattern})" 

476 tokens.append(token) 

477 # a pattern doesn't have the same length as the token 

478 # it replaces! We keep the difference in the offset variable. 

479 # This works because the string is scanned left-to-right and matches 

480 # are returned in the order found by finditer. 

481 fmt_pattern = ( 

482 fmt_pattern[: m.start() + offset] 

483 + input_pattern 

484 + fmt_pattern[m.end() + offset :] 

485 ) 

486 offset += len(input_pattern) - (m.end() - m.start()) 

487 

488 final_fmt_pattern = "" 

489 split_fmt = fmt_pattern.split(r"\#") 

490 

491 # Due to the way Python splits, 'split_fmt' will always be longer 

492 for i in range(len(split_fmt)): 

493 final_fmt_pattern += split_fmt[i] 

494 if i < len(escaped_data): 

495 final_fmt_pattern += escaped_data[i][1:-1] 

496 

497 # Wrap final_fmt_pattern in a custom word boundary to strictly 

498 # match the formatting pattern and filter out date and time formats 

499 # that include junk such as: blah1998-09-12 blah, blah 1998-09-12blah, 

500 # blah1998-09-12blah. The custom word boundary matches every character 

501 # that is not a whitespace character to allow for searching for a date 

502 # and time string in a natural language sentence. Therefore, searching 

503 # for a string of the form YYYY-MM-DD in "blah 1998-09-12 blah" will 

504 # work properly. 

505 # Certain punctuation before or after the target pattern such as 

506 # "1998-09-12," is permitted. For the full list of valid punctuation, 

507 # see the documentation. 

508 

509 starting_word_boundary = ( 

510 r"(?<!\S\S)" # Don't have two consecutive non-whitespace characters. This ensures that we allow cases 

511 # like .11.25.2019 but not 1.11.25.2019 (for pattern MM.DD.YYYY) 

512 r"(?<![^\,\.\;\:\?\!\"\'\`\[\]\{\}\(\)<>\s])" # This is the list of punctuation that is ok before the 

513 # pattern (i.e. "It can't not be these characters before the pattern") 

514 r"(\b|^)" 

515 # The \b is to block cases like 1201912 but allow 201912 for pattern YYYYMM. The ^ was necessary to allow a 

516 # negative number through i.e. before epoch numbers 

517 ) 

518 ending_word_boundary = ( 

519 r"(?=[\,\.\;\:\?\!\"\'\`\[\]\{\}\(\)\<\>]?" # Positive lookahead stating that these punctuation marks 

520 # can appear after the pattern at most 1 time 

521 r"(?!\S))" # Don't allow any non-whitespace character after the punctuation 

522 ) 

523 bounded_fmt_pattern = r"{}{}{}".format( 

524 starting_word_boundary, final_fmt_pattern, ending_word_boundary 

525 ) 

526 

527 return tokens, re.compile(bounded_fmt_pattern, flags=re.IGNORECASE) 

528 

529 @overload 

530 def _parse_token( 

531 self, 

532 token: Literal[ 

533 "YYYY", 

534 "YY", 

535 "MM", 

536 "M", 

537 "DDDD", 

538 "DDD", 

539 "DD", 

540 "D", 

541 "Do", 

542 "HH", 

543 "hh", 

544 "h", 

545 "H", 

546 "mm", 

547 "m", 

548 "ss", 

549 "s", 

550 "x", 

551 ], 

552 value: Union[str, bytes, SupportsInt, bytearray], 

553 parts: _Parts, 

554 ) -> None: 

555 ... # pragma: no cover 

556 

557 @overload 

558 def _parse_token( 

559 self, 

560 token: Literal["X"], 

561 value: Union[str, bytes, SupportsFloat, bytearray], 

562 parts: _Parts, 

563 ) -> None: 

564 ... # pragma: no cover 

565 

566 @overload 

567 def _parse_token( 

568 self, 

569 token: Literal["MMMM", "MMM", "dddd", "ddd", "S"], 

570 value: Union[str, bytes, bytearray], 

571 parts: _Parts, 

572 ) -> None: 

573 ... # pragma: no cover 

574 

575 @overload 

576 def _parse_token( 

577 self, 

578 token: Literal["a", "A", "ZZZ", "ZZ", "Z"], 

579 value: Union[str, bytes], 

580 parts: _Parts, 

581 ) -> None: 

582 ... # pragma: no cover 

583 

584 @overload 

585 def _parse_token( 

586 self, 

587 token: Literal["W"], 

588 value: Tuple[_WEEKDATE_ELEMENT, _WEEKDATE_ELEMENT, Optional[_WEEKDATE_ELEMENT]], 

589 parts: _Parts, 

590 ) -> None: 

591 ... # pragma: no cover 

592 

593 def _parse_token( 

594 self, 

595 token: Any, 

596 value: Any, 

597 parts: _Parts, 

598 ) -> None: 

599 """ 

600 Parse a token and its value, and update the `_Parts` dictionary with the parsed values. 

601 

602 The function supports several tokens, including "YYYY", "YY", "MMMM", "MMM", "MM", "M", "DDDD", "DDD", "DD", "D", "Do", "dddd", "ddd", "HH", "H", "mm", "m", "ss", "s", "S", "X", "x", "ZZZ", "ZZ", "Z", "a", "A", and "W". Each token is matched and the corresponding value is parsed and added to the `_Parts` dictionary. 

603 

604 :param token: The token to parse. 

605 :type token: Any 

606 :param value: The value of the token. 

607 :type value: Any 

608 :param parts: A dictionary to update with the parsed values. 

609 :type parts: _Parts 

610 :raises ParserMatchError: If the hour token value is not between 0 and 12 inclusive for tokens "a" or "A". 

611 

612 """ 

613 if token == "YYYY": 

614 parts["year"] = int(value) 

615 

616 elif token == "YY": 

617 value = int(value) 

618 parts["year"] = 1900 + value if value > 68 else 2000 + value 

619 

620 elif token in ["MMMM", "MMM"]: 

621 # FIXME: month_number() is nullable 

622 parts["month"] = self.locale.month_number(value.lower()) # type: ignore[typeddict-item] 

623 

624 elif token in ["MM", "M"]: 

625 parts["month"] = int(value) 

626 

627 elif token in ["DDDD", "DDD"]: 

628 parts["day_of_year"] = int(value) 

629 

630 elif token in ["DD", "D"]: 

631 parts["day"] = int(value) 

632 

633 elif token == "Do": 

634 parts["day"] = int(value) 

635 

636 elif token == "dddd": 

637 # locale day names are 1-indexed 

638 day_of_week = [x.lower() for x in self.locale.day_names].index( 

639 value.lower() 

640 ) 

641 parts["day_of_week"] = day_of_week - 1 

642 

643 elif token == "ddd": 

644 # locale day abbreviations are 1-indexed 

645 day_of_week = [x.lower() for x in self.locale.day_abbreviations].index( 

646 value.lower() 

647 ) 

648 parts["day_of_week"] = day_of_week - 1 

649 

650 elif token.upper() in ["HH", "H"]: 

651 parts["hour"] = int(value) 

652 

653 elif token in ["mm", "m"]: 

654 parts["minute"] = int(value) 

655 

656 elif token in ["ss", "s"]: 

657 parts["second"] = int(value) 

658 

659 elif token == "S": 

660 # We have the *most significant* digits of an arbitrary-precision integer. 

661 # We want the six most significant digits as an integer, rounded. 

662 # IDEA: add nanosecond support somehow? Need datetime support for it first. 

663 value = value.ljust(7, "0") 

664 

665 # floating-point (IEEE-754) defaults to half-to-even rounding 

666 seventh_digit = int(value[6]) 

667 if seventh_digit == 5: 

668 rounding = int(value[5]) % 2 

669 elif seventh_digit > 5: 

670 rounding = 1 

671 else: 

672 rounding = 0 

673 

674 parts["microsecond"] = int(value[:6]) + rounding 

675 

676 elif token == "X": 

677 parts["timestamp"] = float(value) 

678 

679 elif token == "x": 

680 parts["expanded_timestamp"] = int(value) 

681 

682 elif token in ["ZZZ", "ZZ", "Z"]: 

683 parts["tzinfo"] = TzinfoParser.parse(value) 

684 

685 elif token in ["a", "A"]: 

686 if value in (self.locale.meridians["am"], self.locale.meridians["AM"]): 

687 parts["am_pm"] = "am" 

688 if "hour" in parts and not 0 <= parts["hour"] <= 12: 

689 raise ParserMatchError( 

690 f"Hour token value must be between 0 and 12 inclusive for token {token!r}." 

691 ) 

692 elif value in (self.locale.meridians["pm"], self.locale.meridians["PM"]): 

693 parts["am_pm"] = "pm" 

694 elif token == "W": 

695 parts["weekdate"] = value 

696 

697 @staticmethod 

698 def _build_datetime(parts: _Parts) -> datetime: 

699 """ 

700 Build a datetime object from a dictionary of date parts. 

701 

702 :param parts: A dictionary containing the date parts extracted from a date string. 

703 :type parts: dict 

704 :return: A datetime object representing the date and time. 

705 :rtype: datetime.datetime 

706 """ 

707 weekdate = parts.get("weekdate") 

708 

709 if weekdate is not None: 

710 year, week = int(weekdate[0]), int(weekdate[1]) 

711 

712 if weekdate[2] is not None: 

713 _day = int(weekdate[2]) 

714 else: 

715 # day not given, default to 1 

716 _day = 1 

717 

718 date_string = f"{year}-{week}-{_day}" 

719 

720 # tokens for ISO 8601 weekdates 

721 dt = datetime.strptime(date_string, "%G-%V-%u") 

722 

723 parts["year"] = dt.year 

724 parts["month"] = dt.month 

725 parts["day"] = dt.day 

726 

727 timestamp = parts.get("timestamp") 

728 

729 if timestamp is not None: 

730 return datetime.fromtimestamp(timestamp, tz=tz.tzutc()) 

731 

732 expanded_timestamp = parts.get("expanded_timestamp") 

733 

734 if expanded_timestamp is not None: 

735 return datetime.fromtimestamp( 

736 normalize_timestamp(expanded_timestamp), 

737 tz=tz.tzutc(), 

738 ) 

739 

740 day_of_year = parts.get("day_of_year") 

741 

742 if day_of_year is not None: 

743 _year = parts.get("year") 

744 month = parts.get("month") 

745 if _year is None: 

746 raise ParserError( 

747 "Year component is required with the DDD and DDDD tokens." 

748 ) 

749 

750 if month is not None: 

751 raise ParserError( 

752 "Month component is not allowed with the DDD and DDDD tokens." 

753 ) 

754 

755 date_string = f"{_year}-{day_of_year}" 

756 try: 

757 dt = datetime.strptime(date_string, "%Y-%j") 

758 except ValueError: 

759 raise ParserError( 

760 f"The provided day of year {day_of_year!r} is invalid." 

761 ) 

762 

763 parts["year"] = dt.year 

764 parts["month"] = dt.month 

765 parts["day"] = dt.day 

766 

767 day_of_week: Optional[int] = parts.get("day_of_week") 

768 day = parts.get("day") 

769 

770 # If day is passed, ignore day of week 

771 if day_of_week is not None and day is None: 

772 year = parts.get("year", 1970) 

773 month = parts.get("month", 1) 

774 day = 1 

775 

776 # dddd => first day of week after epoch 

777 # dddd YYYY => first day of week in specified year 

778 # dddd MM YYYY => first day of week in specified year and month 

779 # dddd MM => first day after epoch in specified month 

780 next_weekday_dt = next_weekday(datetime(year, month, day), day_of_week) 

781 parts["year"] = next_weekday_dt.year 

782 parts["month"] = next_weekday_dt.month 

783 parts["day"] = next_weekday_dt.day 

784 

785 am_pm = parts.get("am_pm") 

786 hour = parts.get("hour", 0) 

787 

788 if am_pm == "pm" and hour < 12: 

789 hour += 12 

790 elif am_pm == "am" and hour == 12: 

791 hour = 0 

792 

793 # Support for midnight at the end of day 

794 if hour == 24: 

795 if parts.get("minute", 0) != 0: 

796 raise ParserError("Midnight at the end of day must not contain minutes") 

797 if parts.get("second", 0) != 0: 

798 raise ParserError("Midnight at the end of day must not contain seconds") 

799 if parts.get("microsecond", 0) != 0: 

800 raise ParserError( 

801 "Midnight at the end of day must not contain microseconds" 

802 ) 

803 hour = 0 

804 day_increment = 1 

805 else: 

806 day_increment = 0 

807 

808 # account for rounding up to 1000000 

809 microsecond = parts.get("microsecond", 0) 

810 if microsecond == 1000000: 

811 microsecond = 0 

812 second_increment = 1 

813 else: 

814 second_increment = 0 

815 

816 increment = timedelta(days=day_increment, seconds=second_increment) 

817 

818 return ( 

819 datetime( 

820 year=parts.get("year", 1), 

821 month=parts.get("month", 1), 

822 day=parts.get("day", 1), 

823 hour=hour, 

824 minute=parts.get("minute", 0), 

825 second=parts.get("second", 0), 

826 microsecond=microsecond, 

827 tzinfo=parts.get("tzinfo"), 

828 ) 

829 + increment 

830 ) 

831 

832 def _parse_multiformat(self, string: str, formats: Iterable[str]) -> datetime: 

833 """ 

834 Parse a date and time string using multiple formats. 

835 

836 Tries to parse the provided string with each format in the given `formats` 

837 iterable, returning the resulting `datetime` object if a match is found. If no 

838 format matches the string, a `ParserError` is raised. 

839 

840 :param string: The date and time string to parse. 

841 :type string: str 

842 :param formats: An iterable of date and time format strings to try, in order. 

843 :type formats: Iterable[str] 

844 :returns: The parsed date and time. 

845 :rtype: datetime.datetime 

846 :raises ParserError: If no format matches the input string. 

847 """ 

848 _datetime: Optional[datetime] = None 

849 

850 for fmt in formats: 

851 try: 

852 _datetime = self.parse(string, fmt) 

853 break 

854 except ParserMatchError: 

855 pass 

856 

857 if _datetime is None: 

858 supported_formats = ", ".join(formats) 

859 raise ParserError( 

860 f"Could not match input {string!r} to any of the following formats: {supported_formats}." 

861 ) 

862 

863 return _datetime 

864 

865 # generates a capture group of choices separated by an OR operator 

866 @staticmethod 

867 def _generate_choice_re( 

868 choices: Iterable[str], flags: Union[int, re.RegexFlag] = 0 

869 ) -> Pattern[str]: 

870 """ 

871 Generate a regular expression pattern that matches a choice from an iterable. 

872 

873 Takes an iterable of strings (`choices`) and returns a compiled regular expression 

874 pattern that matches any of the choices. The pattern is created by joining the 

875 choices with the '|' (OR) operator, which matches any of the enclosed patterns. 

876 

877 :param choices: An iterable of strings to match. 

878 :type choices: Iterable[str] 

879 :param flags: Optional regular expression flags. Default is 0. 

880 :type flags: Union[int, re.RegexFlag], optional 

881 :returns: A compiled regular expression pattern that matches any of the choices. 

882 :rtype: re.Pattern[str] 

883 """ 

884 return re.compile(r"({})".format("|".join(choices)), flags=flags) 

885 

886 

887class TzinfoParser: 

888 """ 

889 Parser for timezone information. 

890 """ 

891 

892 _TZINFO_RE: ClassVar[Pattern[str]] = re.compile( 

893 r"^(?:\(UTC)*([\+\-])?(\d{2})(?:\:?(\d{2}))?" 

894 ) 

895 

896 @classmethod 

897 def parse(cls, tzinfo_string: str) -> dt_tzinfo: 

898 """ 

899 Parse a timezone string and return a datetime timezone object. 

900 

901 :param tzinfo_string: The timezone string to parse. 

902 :type tzinfo_string: str 

903 :returns: The parsed datetime timezone object. 

904 :rtype: datetime.timezone 

905 :raises ParserError: If the timezone string cannot be parsed. 

906 """ 

907 tzinfo: Optional[dt_tzinfo] = None 

908 

909 if tzinfo_string == "local": 

910 tzinfo = tz.tzlocal() 

911 

912 elif tzinfo_string in ["utc", "UTC", "Z"]: 

913 tzinfo = tz.tzutc() 

914 

915 else: 

916 iso_match = cls._TZINFO_RE.match(tzinfo_string) 

917 

918 if iso_match: 

919 sign: Optional[str] 

920 hours: str 

921 minutes: Union[str, int, None] 

922 sign, hours, minutes = iso_match.groups() 

923 seconds = int(hours) * 3600 + int(minutes or 0) * 60 

924 

925 if sign == "-": 

926 seconds *= -1 

927 

928 tzinfo = tz.tzoffset(None, seconds) 

929 

930 else: 

931 tzinfo = tz.gettz(tzinfo_string) 

932 

933 if tzinfo is None: 

934 raise ParserError(f"Could not parse timezone expression {tzinfo_string!r}.") 

935 

936 return tzinfo