Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/arrow/parser.py: 87%

315 statements  

« prev     ^ index     » next       coverage.py v7.2.7, created at 2023-06-07 06:17 +0000

1"""Provides the :class:`Arrow <arrow.parser.DateTimeParser>` class, a better way to parse datetime strings.""" 

2 

3import re 

4import sys 

5from datetime import datetime, timedelta 

6from datetime import tzinfo as dt_tzinfo 

7from functools import lru_cache 

8from typing import ( 

9 Any, 

10 ClassVar, 

11 Dict, 

12 Iterable, 

13 List, 

14 Match, 

15 Optional, 

16 Pattern, 

17 SupportsFloat, 

18 SupportsInt, 

19 Tuple, 

20 Union, 

21 cast, 

22 overload, 

23) 

24 

25from dateutil import tz 

26 

27from arrow import locales 

28from arrow.constants import DEFAULT_LOCALE 

29from arrow.util import next_weekday, normalize_timestamp 

30 

31if sys.version_info < (3, 8): # pragma: no cover 

32 from typing_extensions import Literal, TypedDict 

33else: 

34 from typing import Literal, TypedDict # pragma: no cover 

35 

36 

37class ParserError(ValueError): 

38 pass 

39 

40 

41# Allows for ParserErrors to be propagated from _build_datetime() 

42# when day_of_year errors occur. 

43# Before this, the ParserErrors were caught by the try/except in 

44# _parse_multiformat() and the appropriate error message was not 

45# transmitted to the user. 

46class ParserMatchError(ParserError): 

47 pass 

48 

49 

50_WEEKDATE_ELEMENT = Union[str, bytes, SupportsInt, bytearray] 

51 

52_FORMAT_TYPE = Literal[ 

53 "YYYY", 

54 "YY", 

55 "MM", 

56 "M", 

57 "DDDD", 

58 "DDD", 

59 "DD", 

60 "D", 

61 "HH", 

62 "H", 

63 "hh", 

64 "h", 

65 "mm", 

66 "m", 

67 "ss", 

68 "s", 

69 "X", 

70 "x", 

71 "ZZZ", 

72 "ZZ", 

73 "Z", 

74 "S", 

75 "W", 

76 "MMMM", 

77 "MMM", 

78 "Do", 

79 "dddd", 

80 "ddd", 

81 "d", 

82 "a", 

83 "A", 

84] 

85 

86 

87class _Parts(TypedDict, total=False): 

88 year: int 

89 month: int 

90 day_of_year: int 

91 day: int 

92 hour: int 

93 minute: int 

94 second: int 

95 microsecond: int 

96 timestamp: float 

97 expanded_timestamp: int 

98 tzinfo: dt_tzinfo 

99 am_pm: Literal["am", "pm"] 

100 day_of_week: int 

101 weekdate: Tuple[_WEEKDATE_ELEMENT, _WEEKDATE_ELEMENT, Optional[_WEEKDATE_ELEMENT]] 

102 

103 

104class DateTimeParser: 

105 _FORMAT_RE: ClassVar[Pattern[str]] = re.compile( 

106 r"(YYY?Y?|MM?M?M?|Do|DD?D?D?|d?d?d?d|HH?|hh?|mm?|ss?|S+|ZZ?Z?|a|A|x|X|W)" 

107 ) 

108 _ESCAPE_RE: ClassVar[Pattern[str]] = re.compile(r"\[[^\[\]]*\]") 

109 

110 _ONE_OR_TWO_DIGIT_RE: ClassVar[Pattern[str]] = re.compile(r"\d{1,2}") 

111 _ONE_OR_TWO_OR_THREE_DIGIT_RE: ClassVar[Pattern[str]] = re.compile(r"\d{1,3}") 

112 _ONE_OR_MORE_DIGIT_RE: ClassVar[Pattern[str]] = re.compile(r"\d+") 

113 _TWO_DIGIT_RE: ClassVar[Pattern[str]] = re.compile(r"\d{2}") 

114 _THREE_DIGIT_RE: ClassVar[Pattern[str]] = re.compile(r"\d{3}") 

115 _FOUR_DIGIT_RE: ClassVar[Pattern[str]] = re.compile(r"\d{4}") 

116 _TZ_Z_RE: ClassVar[Pattern[str]] = re.compile(r"([\+\-])(\d{2})(?:(\d{2}))?|Z") 

117 _TZ_ZZ_RE: ClassVar[Pattern[str]] = re.compile(r"([\+\-])(\d{2})(?:\:(\d{2}))?|Z") 

118 _TZ_NAME_RE: ClassVar[Pattern[str]] = re.compile(r"\w[\w+\-/]+") 

119 # NOTE: timestamps cannot be parsed from natural language strings (by removing the ^...$) because it will 

120 # break cases like "15 Jul 2000" and a format list (see issue #447) 

121 _TIMESTAMP_RE: ClassVar[Pattern[str]] = re.compile(r"^\-?\d+\.?\d+$") 

122 _TIMESTAMP_EXPANDED_RE: ClassVar[Pattern[str]] = re.compile(r"^\-?\d+$") 

123 _TIME_RE: ClassVar[Pattern[str]] = re.compile( 

124 r"^(\d{2})(?:\:?(\d{2}))?(?:\:?(\d{2}))?(?:([\.\,])(\d+))?$" 

125 ) 

126 _WEEK_DATE_RE: ClassVar[Pattern[str]] = re.compile( 

127 r"(?P<year>\d{4})[\-]?W(?P<week>\d{2})[\-]?(?P<day>\d)?" 

128 ) 

129 

130 _BASE_INPUT_RE_MAP: ClassVar[Dict[_FORMAT_TYPE, Pattern[str]]] = { 

131 "YYYY": _FOUR_DIGIT_RE, 

132 "YY": _TWO_DIGIT_RE, 

133 "MM": _TWO_DIGIT_RE, 

134 "M": _ONE_OR_TWO_DIGIT_RE, 

135 "DDDD": _THREE_DIGIT_RE, 

136 "DDD": _ONE_OR_TWO_OR_THREE_DIGIT_RE, 

137 "DD": _TWO_DIGIT_RE, 

138 "D": _ONE_OR_TWO_DIGIT_RE, 

139 "HH": _TWO_DIGIT_RE, 

140 "H": _ONE_OR_TWO_DIGIT_RE, 

141 "hh": _TWO_DIGIT_RE, 

142 "h": _ONE_OR_TWO_DIGIT_RE, 

143 "mm": _TWO_DIGIT_RE, 

144 "m": _ONE_OR_TWO_DIGIT_RE, 

145 "ss": _TWO_DIGIT_RE, 

146 "s": _ONE_OR_TWO_DIGIT_RE, 

147 "X": _TIMESTAMP_RE, 

148 "x": _TIMESTAMP_EXPANDED_RE, 

149 "ZZZ": _TZ_NAME_RE, 

150 "ZZ": _TZ_ZZ_RE, 

151 "Z": _TZ_Z_RE, 

152 "S": _ONE_OR_MORE_DIGIT_RE, 

153 "W": _WEEK_DATE_RE, 

154 } 

155 

156 SEPARATORS: ClassVar[List[str]] = ["-", "/", "."] 

157 

158 locale: locales.Locale 

159 _input_re_map: Dict[_FORMAT_TYPE, Pattern[str]] 

160 

161 def __init__(self, locale: str = DEFAULT_LOCALE, cache_size: int = 0) -> None: 

162 

163 self.locale = locales.get_locale(locale) 

164 self._input_re_map = self._BASE_INPUT_RE_MAP.copy() 

165 self._input_re_map.update( 

166 { 

167 "MMMM": self._generate_choice_re( 

168 self.locale.month_names[1:], re.IGNORECASE 

169 ), 

170 "MMM": self._generate_choice_re( 

171 self.locale.month_abbreviations[1:], re.IGNORECASE 

172 ), 

173 "Do": re.compile(self.locale.ordinal_day_re), 

174 "dddd": self._generate_choice_re( 

175 self.locale.day_names[1:], re.IGNORECASE 

176 ), 

177 "ddd": self._generate_choice_re( 

178 self.locale.day_abbreviations[1:], re.IGNORECASE 

179 ), 

180 "d": re.compile(r"[1-7]"), 

181 "a": self._generate_choice_re( 

182 (self.locale.meridians["am"], self.locale.meridians["pm"]) 

183 ), 

184 # note: 'A' token accepts both 'am/pm' and 'AM/PM' formats to 

185 # ensure backwards compatibility of this token 

186 "A": self._generate_choice_re(self.locale.meridians.values()), 

187 } 

188 ) 

189 if cache_size > 0: 

190 self._generate_pattern_re = lru_cache(maxsize=cache_size)( # type: ignore[assignment] 

191 self._generate_pattern_re 

192 ) 

193 

194 # TODO: since we support more than ISO 8601, we should rename this function 

195 # IDEA: break into multiple functions 

196 def parse_iso( 

197 self, datetime_string: str, normalize_whitespace: bool = False 

198 ) -> datetime: 

199 

200 if normalize_whitespace: 

201 datetime_string = re.sub(r"\s+", " ", datetime_string.strip()) 

202 

203 has_space_divider = " " in datetime_string 

204 has_t_divider = "T" in datetime_string 

205 

206 num_spaces = datetime_string.count(" ") 

207 if has_space_divider and num_spaces != 1 or has_t_divider and num_spaces > 0: 

208 raise ParserError( 

209 f"Expected an ISO 8601-like string, but was given {datetime_string!r}. " 

210 "Try passing in a format string to resolve this." 

211 ) 

212 

213 has_time = has_space_divider or has_t_divider 

214 has_tz = False 

215 

216 # date formats (ISO 8601 and others) to test against 

217 # NOTE: YYYYMM is omitted to avoid confusion with YYMMDD (no longer part of ISO 8601, but is still often used) 

218 formats = [ 

219 "YYYY-MM-DD", 

220 "YYYY-M-DD", 

221 "YYYY-M-D", 

222 "YYYY/MM/DD", 

223 "YYYY/M/DD", 

224 "YYYY/M/D", 

225 "YYYY.MM.DD", 

226 "YYYY.M.DD", 

227 "YYYY.M.D", 

228 "YYYYMMDD", 

229 "YYYY-DDDD", 

230 "YYYYDDDD", 

231 "YYYY-MM", 

232 "YYYY/MM", 

233 "YYYY.MM", 

234 "YYYY", 

235 "W", 

236 ] 

237 

238 if has_time: 

239 

240 if has_space_divider: 

241 date_string, time_string = datetime_string.split(" ", 1) 

242 else: 

243 date_string, time_string = datetime_string.split("T", 1) 

244 

245 time_parts = re.split(r"[\+\-Z]", time_string, 1, re.IGNORECASE) 

246 

247 time_components: Optional[Match[str]] = self._TIME_RE.match(time_parts[0]) 

248 

249 if time_components is None: 

250 raise ParserError( 

251 "Invalid time component provided. " 

252 "Please specify a format or provide a valid time component in the basic or extended ISO 8601 time format." 

253 ) 

254 

255 ( 

256 hours, 

257 minutes, 

258 seconds, 

259 subseconds_sep, 

260 subseconds, 

261 ) = time_components.groups() 

262 

263 has_tz = len(time_parts) == 2 

264 has_minutes = minutes is not None 

265 has_seconds = seconds is not None 

266 has_subseconds = subseconds is not None 

267 

268 is_basic_time_format = ":" not in time_parts[0] 

269 tz_format = "Z" 

270 

271 # use 'ZZ' token instead since tz offset is present in non-basic format 

272 if has_tz and ":" in time_parts[1]: 

273 tz_format = "ZZ" 

274 

275 time_sep = "" if is_basic_time_format else ":" 

276 

277 if has_subseconds: 

278 time_string = "HH{time_sep}mm{time_sep}ss{subseconds_sep}S".format( 

279 time_sep=time_sep, subseconds_sep=subseconds_sep 

280 ) 

281 elif has_seconds: 

282 time_string = "HH{time_sep}mm{time_sep}ss".format(time_sep=time_sep) 

283 elif has_minutes: 

284 time_string = f"HH{time_sep}mm" 

285 else: 

286 time_string = "HH" 

287 

288 if has_space_divider: 

289 formats = [f"{f} {time_string}" for f in formats] 

290 else: 

291 formats = [f"{f}T{time_string}" for f in formats] 

292 

293 if has_time and has_tz: 

294 # Add "Z" or "ZZ" to the format strings to indicate to 

295 # _parse_token() that a timezone needs to be parsed 

296 formats = [f"{f}{tz_format}" for f in formats] 

297 

298 return self._parse_multiformat(datetime_string, formats) 

299 

300 def parse( 

301 self, 

302 datetime_string: str, 

303 fmt: Union[List[str], str], 

304 normalize_whitespace: bool = False, 

305 ) -> datetime: 

306 

307 if normalize_whitespace: 

308 datetime_string = re.sub(r"\s+", " ", datetime_string) 

309 

310 if isinstance(fmt, list): 

311 return self._parse_multiformat(datetime_string, fmt) 

312 

313 try: 

314 fmt_tokens: List[_FORMAT_TYPE] 

315 fmt_pattern_re: Pattern[str] 

316 fmt_tokens, fmt_pattern_re = self._generate_pattern_re(fmt) 

317 except re.error as e: 

318 raise ParserMatchError( 

319 f"Failed to generate regular expression pattern: {e}." 

320 ) 

321 

322 match = fmt_pattern_re.search(datetime_string) 

323 

324 if match is None: 

325 raise ParserMatchError( 

326 f"Failed to match {fmt!r} when parsing {datetime_string!r}." 

327 ) 

328 

329 parts: _Parts = {} 

330 for token in fmt_tokens: 

331 value: Union[Tuple[str, str, str], str] 

332 if token == "Do": 

333 value = match.group("value") 

334 elif token == "W": 

335 value = (match.group("year"), match.group("week"), match.group("day")) 

336 else: 

337 value = match.group(token) 

338 

339 if value is None: 

340 raise ParserMatchError( 

341 f"Unable to find a match group for the specified token {token!r}." 

342 ) 

343 

344 self._parse_token(token, value, parts) # type: ignore[arg-type] 

345 

346 return self._build_datetime(parts) 

347 

348 def _generate_pattern_re(self, fmt: str) -> Tuple[List[_FORMAT_TYPE], Pattern[str]]: 

349 

350 # fmt is a string of tokens like 'YYYY-MM-DD' 

351 # we construct a new string by replacing each 

352 # token by its pattern: 

353 # 'YYYY-MM-DD' -> '(?P<YYYY>\d{4})-(?P<MM>\d{2})-(?P<DD>\d{2})' 

354 tokens: List[_FORMAT_TYPE] = [] 

355 offset = 0 

356 

357 # Escape all special RegEx chars 

358 escaped_fmt = re.escape(fmt) 

359 

360 # Extract the bracketed expressions to be reinserted later. 

361 escaped_fmt = re.sub(self._ESCAPE_RE, "#", escaped_fmt) 

362 

363 # Any number of S is the same as one. 

364 # TODO: allow users to specify the number of digits to parse 

365 escaped_fmt = re.sub(r"S+", "S", escaped_fmt) 

366 

367 escaped_data = re.findall(self._ESCAPE_RE, fmt) 

368 

369 fmt_pattern = escaped_fmt 

370 

371 for m in self._FORMAT_RE.finditer(escaped_fmt): 

372 token: _FORMAT_TYPE = cast(_FORMAT_TYPE, m.group(0)) 

373 try: 

374 input_re = self._input_re_map[token] 

375 except KeyError: 

376 raise ParserError(f"Unrecognized token {token!r}.") 

377 input_pattern = f"(?P<{token}>{input_re.pattern})" 

378 tokens.append(token) 

379 # a pattern doesn't have the same length as the token 

380 # it replaces! We keep the difference in the offset variable. 

381 # This works because the string is scanned left-to-right and matches 

382 # are returned in the order found by finditer. 

383 fmt_pattern = ( 

384 fmt_pattern[: m.start() + offset] 

385 + input_pattern 

386 + fmt_pattern[m.end() + offset :] 

387 ) 

388 offset += len(input_pattern) - (m.end() - m.start()) 

389 

390 final_fmt_pattern = "" 

391 split_fmt = fmt_pattern.split(r"\#") 

392 

393 # Due to the way Python splits, 'split_fmt' will always be longer 

394 for i in range(len(split_fmt)): 

395 final_fmt_pattern += split_fmt[i] 

396 if i < len(escaped_data): 

397 final_fmt_pattern += escaped_data[i][1:-1] 

398 

399 # Wrap final_fmt_pattern in a custom word boundary to strictly 

400 # match the formatting pattern and filter out date and time formats 

401 # that include junk such as: blah1998-09-12 blah, blah 1998-09-12blah, 

402 # blah1998-09-12blah. The custom word boundary matches every character 

403 # that is not a whitespace character to allow for searching for a date 

404 # and time string in a natural language sentence. Therefore, searching 

405 # for a string of the form YYYY-MM-DD in "blah 1998-09-12 blah" will 

406 # work properly. 

407 # Certain punctuation before or after the target pattern such as 

408 # "1998-09-12," is permitted. For the full list of valid punctuation, 

409 # see the documentation. 

410 

411 starting_word_boundary = ( 

412 r"(?<!\S\S)" # Don't have two consecutive non-whitespace characters. This ensures that we allow cases 

413 # like .11.25.2019 but not 1.11.25.2019 (for pattern MM.DD.YYYY) 

414 r"(?<![^\,\.\;\:\?\!\"\'\`\[\]\{\}\(\)<>\s])" # This is the list of punctuation that is ok before the 

415 # pattern (i.e. "It can't not be these characters before the pattern") 

416 r"(\b|^)" 

417 # The \b is to block cases like 1201912 but allow 201912 for pattern YYYYMM. The ^ was necessary to allow a 

418 # negative number through i.e. before epoch numbers 

419 ) 

420 ending_word_boundary = ( 

421 r"(?=[\,\.\;\:\?\!\"\'\`\[\]\{\}\(\)\<\>]?" # Positive lookahead stating that these punctuation marks 

422 # can appear after the pattern at most 1 time 

423 r"(?!\S))" # Don't allow any non-whitespace character after the punctuation 

424 ) 

425 bounded_fmt_pattern = r"{}{}{}".format( 

426 starting_word_boundary, final_fmt_pattern, ending_word_boundary 

427 ) 

428 

429 return tokens, re.compile(bounded_fmt_pattern, flags=re.IGNORECASE) 

430 

431 @overload 

432 def _parse_token( 

433 self, 

434 token: Literal[ 

435 "YYYY", 

436 "YY", 

437 "MM", 

438 "M", 

439 "DDDD", 

440 "DDD", 

441 "DD", 

442 "D", 

443 "Do", 

444 "HH", 

445 "hh", 

446 "h", 

447 "H", 

448 "mm", 

449 "m", 

450 "ss", 

451 "s", 

452 "x", 

453 ], 

454 value: Union[str, bytes, SupportsInt, bytearray], 

455 parts: _Parts, 

456 ) -> None: 

457 ... # pragma: no cover 

458 

459 @overload 

460 def _parse_token( 

461 self, 

462 token: Literal["X"], 

463 value: Union[str, bytes, SupportsFloat, bytearray], 

464 parts: _Parts, 

465 ) -> None: 

466 ... # pragma: no cover 

467 

468 @overload 

469 def _parse_token( 

470 self, 

471 token: Literal["MMMM", "MMM", "dddd", "ddd", "S"], 

472 value: Union[str, bytes, bytearray], 

473 parts: _Parts, 

474 ) -> None: 

475 ... # pragma: no cover 

476 

477 @overload 

478 def _parse_token( 

479 self, 

480 token: Literal["a", "A", "ZZZ", "ZZ", "Z"], 

481 value: Union[str, bytes], 

482 parts: _Parts, 

483 ) -> None: 

484 ... # pragma: no cover 

485 

486 @overload 

487 def _parse_token( 

488 self, 

489 token: Literal["W"], 

490 value: Tuple[_WEEKDATE_ELEMENT, _WEEKDATE_ELEMENT, Optional[_WEEKDATE_ELEMENT]], 

491 parts: _Parts, 

492 ) -> None: 

493 ... # pragma: no cover 

494 

495 def _parse_token( 

496 self, 

497 token: Any, 

498 value: Any, 

499 parts: _Parts, 

500 ) -> None: 

501 

502 if token == "YYYY": 

503 parts["year"] = int(value) 

504 

505 elif token == "YY": 

506 value = int(value) 

507 parts["year"] = 1900 + value if value > 68 else 2000 + value 

508 

509 elif token in ["MMMM", "MMM"]: 

510 # FIXME: month_number() is nullable 

511 parts["month"] = self.locale.month_number(value.lower()) # type: ignore[typeddict-item] 

512 

513 elif token in ["MM", "M"]: 

514 parts["month"] = int(value) 

515 

516 elif token in ["DDDD", "DDD"]: 

517 parts["day_of_year"] = int(value) 

518 

519 elif token in ["DD", "D"]: 

520 parts["day"] = int(value) 

521 

522 elif token == "Do": 

523 parts["day"] = int(value) 

524 

525 elif token == "dddd": 

526 # locale day names are 1-indexed 

527 day_of_week = [x.lower() for x in self.locale.day_names].index( 

528 value.lower() 

529 ) 

530 parts["day_of_week"] = day_of_week - 1 

531 

532 elif token == "ddd": 

533 # locale day abbreviations are 1-indexed 

534 day_of_week = [x.lower() for x in self.locale.day_abbreviations].index( 

535 value.lower() 

536 ) 

537 parts["day_of_week"] = day_of_week - 1 

538 

539 elif token.upper() in ["HH", "H"]: 

540 parts["hour"] = int(value) 

541 

542 elif token in ["mm", "m"]: 

543 parts["minute"] = int(value) 

544 

545 elif token in ["ss", "s"]: 

546 parts["second"] = int(value) 

547 

548 elif token == "S": 

549 # We have the *most significant* digits of an arbitrary-precision integer. 

550 # We want the six most significant digits as an integer, rounded. 

551 # IDEA: add nanosecond support somehow? Need datetime support for it first. 

552 value = value.ljust(7, "0") 

553 

554 # floating-point (IEEE-754) defaults to half-to-even rounding 

555 seventh_digit = int(value[6]) 

556 if seventh_digit == 5: 

557 rounding = int(value[5]) % 2 

558 elif seventh_digit > 5: 

559 rounding = 1 

560 else: 

561 rounding = 0 

562 

563 parts["microsecond"] = int(value[:6]) + rounding 

564 

565 elif token == "X": 

566 parts["timestamp"] = float(value) 

567 

568 elif token == "x": 

569 parts["expanded_timestamp"] = int(value) 

570 

571 elif token in ["ZZZ", "ZZ", "Z"]: 

572 parts["tzinfo"] = TzinfoParser.parse(value) 

573 

574 elif token in ["a", "A"]: 

575 if value in (self.locale.meridians["am"], self.locale.meridians["AM"]): 

576 parts["am_pm"] = "am" 

577 if "hour" in parts and not 0 <= parts["hour"] <= 12: 

578 raise ParserMatchError( 

579 f"Hour token value must be between 0 and 12 inclusive for token {token!r}." 

580 ) 

581 elif value in (self.locale.meridians["pm"], self.locale.meridians["PM"]): 

582 parts["am_pm"] = "pm" 

583 elif token == "W": 

584 parts["weekdate"] = value 

585 

586 @staticmethod 

587 def _build_datetime(parts: _Parts) -> datetime: 

588 weekdate = parts.get("weekdate") 

589 

590 if weekdate is not None: 

591 

592 year, week = int(weekdate[0]), int(weekdate[1]) 

593 

594 if weekdate[2] is not None: 

595 _day = int(weekdate[2]) 

596 else: 

597 # day not given, default to 1 

598 _day = 1 

599 

600 date_string = f"{year}-{week}-{_day}" 

601 

602 # tokens for ISO 8601 weekdates 

603 dt = datetime.strptime(date_string, "%G-%V-%u") 

604 

605 parts["year"] = dt.year 

606 parts["month"] = dt.month 

607 parts["day"] = dt.day 

608 

609 timestamp = parts.get("timestamp") 

610 

611 if timestamp is not None: 

612 return datetime.fromtimestamp(timestamp, tz=tz.tzutc()) 

613 

614 expanded_timestamp = parts.get("expanded_timestamp") 

615 

616 if expanded_timestamp is not None: 

617 return datetime.fromtimestamp( 

618 normalize_timestamp(expanded_timestamp), 

619 tz=tz.tzutc(), 

620 ) 

621 

622 day_of_year = parts.get("day_of_year") 

623 

624 if day_of_year is not None: 

625 _year = parts.get("year") 

626 month = parts.get("month") 

627 if _year is None: 

628 raise ParserError( 

629 "Year component is required with the DDD and DDDD tokens." 

630 ) 

631 

632 if month is not None: 

633 raise ParserError( 

634 "Month component is not allowed with the DDD and DDDD tokens." 

635 ) 

636 

637 date_string = f"{_year}-{day_of_year}" 

638 try: 

639 dt = datetime.strptime(date_string, "%Y-%j") 

640 except ValueError: 

641 raise ParserError( 

642 f"The provided day of year {day_of_year!r} is invalid." 

643 ) 

644 

645 parts["year"] = dt.year 

646 parts["month"] = dt.month 

647 parts["day"] = dt.day 

648 

649 day_of_week: Optional[int] = parts.get("day_of_week") 

650 day = parts.get("day") 

651 

652 # If day is passed, ignore day of week 

653 if day_of_week is not None and day is None: 

654 year = parts.get("year", 1970) 

655 month = parts.get("month", 1) 

656 day = 1 

657 

658 # dddd => first day of week after epoch 

659 # dddd YYYY => first day of week in specified year 

660 # dddd MM YYYY => first day of week in specified year and month 

661 # dddd MM => first day after epoch in specified month 

662 next_weekday_dt = next_weekday(datetime(year, month, day), day_of_week) 

663 parts["year"] = next_weekday_dt.year 

664 parts["month"] = next_weekday_dt.month 

665 parts["day"] = next_weekday_dt.day 

666 

667 am_pm = parts.get("am_pm") 

668 hour = parts.get("hour", 0) 

669 

670 if am_pm == "pm" and hour < 12: 

671 hour += 12 

672 elif am_pm == "am" and hour == 12: 

673 hour = 0 

674 

675 # Support for midnight at the end of day 

676 if hour == 24: 

677 if parts.get("minute", 0) != 0: 

678 raise ParserError("Midnight at the end of day must not contain minutes") 

679 if parts.get("second", 0) != 0: 

680 raise ParserError("Midnight at the end of day must not contain seconds") 

681 if parts.get("microsecond", 0) != 0: 

682 raise ParserError( 

683 "Midnight at the end of day must not contain microseconds" 

684 ) 

685 hour = 0 

686 day_increment = 1 

687 else: 

688 day_increment = 0 

689 

690 # account for rounding up to 1000000 

691 microsecond = parts.get("microsecond", 0) 

692 if microsecond == 1000000: 

693 microsecond = 0 

694 second_increment = 1 

695 else: 

696 second_increment = 0 

697 

698 increment = timedelta(days=day_increment, seconds=second_increment) 

699 

700 return ( 

701 datetime( 

702 year=parts.get("year", 1), 

703 month=parts.get("month", 1), 

704 day=parts.get("day", 1), 

705 hour=hour, 

706 minute=parts.get("minute", 0), 

707 second=parts.get("second", 0), 

708 microsecond=microsecond, 

709 tzinfo=parts.get("tzinfo"), 

710 ) 

711 + increment 

712 ) 

713 

714 def _parse_multiformat(self, string: str, formats: Iterable[str]) -> datetime: 

715 

716 _datetime: Optional[datetime] = None 

717 

718 for fmt in formats: 

719 try: 

720 _datetime = self.parse(string, fmt) 

721 break 

722 except ParserMatchError: 

723 pass 

724 

725 if _datetime is None: 

726 supported_formats = ", ".join(formats) 

727 raise ParserError( 

728 f"Could not match input {string!r} to any of the following formats: {supported_formats}." 

729 ) 

730 

731 return _datetime 

732 

733 # generates a capture group of choices separated by an OR operator 

734 @staticmethod 

735 def _generate_choice_re( 

736 choices: Iterable[str], flags: Union[int, re.RegexFlag] = 0 

737 ) -> Pattern[str]: 

738 return re.compile(r"({})".format("|".join(choices)), flags=flags) 

739 

740 

741class TzinfoParser: 

742 _TZINFO_RE: ClassVar[Pattern[str]] = re.compile( 

743 r"^(?:\(UTC)*([\+\-])?(\d{2})(?:\:?(\d{2}))?" 

744 ) 

745 

746 @classmethod 

747 def parse(cls, tzinfo_string: str) -> dt_tzinfo: 

748 

749 tzinfo: Optional[dt_tzinfo] = None 

750 

751 if tzinfo_string == "local": 

752 tzinfo = tz.tzlocal() 

753 

754 elif tzinfo_string in ["utc", "UTC", "Z"]: 

755 tzinfo = tz.tzutc() 

756 

757 else: 

758 

759 iso_match = cls._TZINFO_RE.match(tzinfo_string) 

760 

761 if iso_match: 

762 sign: Optional[str] 

763 hours: str 

764 minutes: Union[str, int, None] 

765 sign, hours, minutes = iso_match.groups() 

766 seconds = int(hours) * 3600 + int(minutes or 0) * 60 

767 

768 if sign == "-": 

769 seconds *= -1 

770 

771 tzinfo = tz.tzoffset(None, seconds) 

772 

773 else: 

774 tzinfo = tz.gettz(tzinfo_string) 

775 

776 if tzinfo is None: 

777 raise ParserError(f"Could not parse timezone expression {tzinfo_string!r}.") 

778 

779 return tzinfo