Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/arrow/parser.py: 87%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

314 statements  

1"""Provides the :class:`Arrow <arrow.parser.DateTimeParser>` class, a better way to parse datetime strings.""" 

2 

3import re 

4from datetime import datetime, timedelta 

5from datetime import tzinfo as dt_tzinfo 

6from functools import lru_cache 

7from typing import ( 

8 Any, 

9 ClassVar, 

10 Dict, 

11 Iterable, 

12 List, 

13 Literal, 

14 Match, 

15 Optional, 

16 Pattern, 

17 SupportsFloat, 

18 SupportsInt, 

19 Tuple, 

20 TypedDict, 

21 Union, 

22 cast, 

23 overload, 

24) 

25 

26from dateutil import tz 

27 

28from arrow import locales 

29from arrow.constants import DEFAULT_LOCALE 

30from arrow.util import next_weekday, normalize_timestamp 

31 

32 

33class ParserError(ValueError): 

34 pass 

35 

36 

37# Allows for ParserErrors to be propagated from _build_datetime() 

38# when day_of_year errors occur. 

39# Before this, the ParserErrors were caught by the try/except in 

40# _parse_multiformat() and the appropriate error message was not 

41# transmitted to the user. 

42class ParserMatchError(ParserError): 

43 pass 

44 

45 

46_WEEKDATE_ELEMENT = Union[str, bytes, SupportsInt, bytearray] 

47 

48_FORMAT_TYPE = Literal[ 

49 "YYYY", 

50 "YY", 

51 "MM", 

52 "M", 

53 "DDDD", 

54 "DDD", 

55 "DD", 

56 "D", 

57 "HH", 

58 "H", 

59 "hh", 

60 "h", 

61 "mm", 

62 "m", 

63 "ss", 

64 "s", 

65 "X", 

66 "x", 

67 "ZZZ", 

68 "ZZ", 

69 "Z", 

70 "S", 

71 "W", 

72 "MMMM", 

73 "MMM", 

74 "Do", 

75 "dddd", 

76 "ddd", 

77 "d", 

78 "a", 

79 "A", 

80] 

81 

82 

83class _Parts(TypedDict, total=False): 

84 year: int 

85 month: int 

86 day_of_year: int 

87 day: int 

88 hour: int 

89 minute: int 

90 second: int 

91 microsecond: int 

92 timestamp: float 

93 expanded_timestamp: int 

94 tzinfo: dt_tzinfo 

95 am_pm: Literal["am", "pm"] 

96 day_of_week: int 

97 weekdate: Tuple[_WEEKDATE_ELEMENT, _WEEKDATE_ELEMENT, Optional[_WEEKDATE_ELEMENT]] 

98 

99 

100class DateTimeParser: 

101 _FORMAT_RE: ClassVar[Pattern[str]] = re.compile( 

102 r"(YYY?Y?|MM?M?M?|Do|DD?D?D?|d?d?d?d|HH?|hh?|mm?|ss?|S+|ZZ?Z?|a|A|x|X|W)" 

103 ) 

104 _ESCAPE_RE: ClassVar[Pattern[str]] = re.compile(r"\[[^\[\]]*\]") 

105 

106 _ONE_OR_TWO_DIGIT_RE: ClassVar[Pattern[str]] = re.compile(r"\d{1,2}") 

107 _ONE_OR_TWO_OR_THREE_DIGIT_RE: ClassVar[Pattern[str]] = re.compile(r"\d{1,3}") 

108 _ONE_OR_MORE_DIGIT_RE: ClassVar[Pattern[str]] = re.compile(r"\d+") 

109 _TWO_DIGIT_RE: ClassVar[Pattern[str]] = re.compile(r"\d{2}") 

110 _THREE_DIGIT_RE: ClassVar[Pattern[str]] = re.compile(r"\d{3}") 

111 _FOUR_DIGIT_RE: ClassVar[Pattern[str]] = re.compile(r"\d{4}") 

112 _TZ_Z_RE: ClassVar[Pattern[str]] = re.compile(r"([\+\-])(\d{2})(?:(\d{2}))?|Z") 

113 _TZ_ZZ_RE: ClassVar[Pattern[str]] = re.compile(r"([\+\-])(\d{2})(?:\:(\d{2}))?|Z") 

114 _TZ_NAME_RE: ClassVar[Pattern[str]] = re.compile(r"\w[\w+\-/]+") 

115 # NOTE: timestamps cannot be parsed from natural language strings (by removing the ^...$) because it will 

116 # break cases like "15 Jul 2000" and a format list (see issue #447) 

117 _TIMESTAMP_RE: ClassVar[Pattern[str]] = re.compile(r"^\-?\d+\.?\d+$") 

118 _TIMESTAMP_EXPANDED_RE: ClassVar[Pattern[str]] = re.compile(r"^\-?\d+$") 

119 _TIME_RE: ClassVar[Pattern[str]] = re.compile( 

120 r"^(\d{2})(?:\:?(\d{2}))?(?:\:?(\d{2}))?(?:([\.\,])(\d+))?$" 

121 ) 

122 _WEEK_DATE_RE: ClassVar[Pattern[str]] = re.compile( 

123 r"(?P<year>\d{4})[\-]?W(?P<week>\d{2})[\-]?(?P<day>\d)?" 

124 ) 

125 

126 _BASE_INPUT_RE_MAP: ClassVar[Dict[_FORMAT_TYPE, Pattern[str]]] = { 

127 "YYYY": _FOUR_DIGIT_RE, 

128 "YY": _TWO_DIGIT_RE, 

129 "MM": _TWO_DIGIT_RE, 

130 "M": _ONE_OR_TWO_DIGIT_RE, 

131 "DDDD": _THREE_DIGIT_RE, 

132 "DDD": _ONE_OR_TWO_OR_THREE_DIGIT_RE, 

133 "DD": _TWO_DIGIT_RE, 

134 "D": _ONE_OR_TWO_DIGIT_RE, 

135 "HH": _TWO_DIGIT_RE, 

136 "H": _ONE_OR_TWO_DIGIT_RE, 

137 "hh": _TWO_DIGIT_RE, 

138 "h": _ONE_OR_TWO_DIGIT_RE, 

139 "mm": _TWO_DIGIT_RE, 

140 "m": _ONE_OR_TWO_DIGIT_RE, 

141 "ss": _TWO_DIGIT_RE, 

142 "s": _ONE_OR_TWO_DIGIT_RE, 

143 "X": _TIMESTAMP_RE, 

144 "x": _TIMESTAMP_EXPANDED_RE, 

145 "ZZZ": _TZ_NAME_RE, 

146 "ZZ": _TZ_ZZ_RE, 

147 "Z": _TZ_Z_RE, 

148 "S": _ONE_OR_MORE_DIGIT_RE, 

149 "W": _WEEK_DATE_RE, 

150 } 

151 

152 SEPARATORS: ClassVar[List[str]] = ["-", "/", "."] 

153 

154 locale: locales.Locale 

155 _input_re_map: Dict[_FORMAT_TYPE, Pattern[str]] 

156 

157 def __init__(self, locale: str = DEFAULT_LOCALE, cache_size: int = 0) -> None: 

158 self.locale = locales.get_locale(locale) 

159 self._input_re_map = self._BASE_INPUT_RE_MAP.copy() 

160 self._input_re_map.update( 

161 { 

162 "MMMM": self._generate_choice_re( 

163 self.locale.month_names[1:], re.IGNORECASE 

164 ), 

165 "MMM": self._generate_choice_re( 

166 self.locale.month_abbreviations[1:], re.IGNORECASE 

167 ), 

168 "Do": re.compile(self.locale.ordinal_day_re), 

169 "dddd": self._generate_choice_re( 

170 self.locale.day_names[1:], re.IGNORECASE 

171 ), 

172 "ddd": self._generate_choice_re( 

173 self.locale.day_abbreviations[1:], re.IGNORECASE 

174 ), 

175 "d": re.compile(r"[1-7]"), 

176 "a": self._generate_choice_re( 

177 (self.locale.meridians["am"], self.locale.meridians["pm"]) 

178 ), 

179 # note: 'A' token accepts both 'am/pm' and 'AM/PM' formats to 

180 # ensure backwards compatibility of this token 

181 "A": self._generate_choice_re(self.locale.meridians.values()), 

182 } 

183 ) 

184 if cache_size > 0: 

185 self._generate_pattern_re = lru_cache(maxsize=cache_size)( # type: ignore 

186 self._generate_pattern_re 

187 ) 

188 

189 # TODO: since we support more than ISO 8601, we should rename this function 

190 # IDEA: break into multiple functions 

191 def parse_iso( 

192 self, datetime_string: str, normalize_whitespace: bool = False 

193 ) -> datetime: 

194 if normalize_whitespace: 

195 datetime_string = re.sub(r"\s+", " ", datetime_string.strip()) 

196 

197 has_space_divider = " " in datetime_string 

198 has_t_divider = "T" in datetime_string 

199 

200 num_spaces = datetime_string.count(" ") 

201 if has_space_divider and num_spaces != 1 or has_t_divider and num_spaces > 0: 

202 raise ParserError( 

203 f"Expected an ISO 8601-like string, but was given {datetime_string!r}. " 

204 "Try passing in a format string to resolve this." 

205 ) 

206 

207 has_time = has_space_divider or has_t_divider 

208 has_tz = False 

209 

210 # date formats (ISO 8601 and others) to test against 

211 # NOTE: YYYYMM is omitted to avoid confusion with YYMMDD (no longer part of ISO 8601, but is still often used) 

212 formats = [ 

213 "YYYY-MM-DD", 

214 "YYYY-M-DD", 

215 "YYYY-M-D", 

216 "YYYY/MM/DD", 

217 "YYYY/M/DD", 

218 "YYYY/M/D", 

219 "YYYY.MM.DD", 

220 "YYYY.M.DD", 

221 "YYYY.M.D", 

222 "YYYYMMDD", 

223 "YYYY-DDDD", 

224 "YYYYDDDD", 

225 "YYYY-MM", 

226 "YYYY/MM", 

227 "YYYY.MM", 

228 "YYYY", 

229 "W", 

230 ] 

231 

232 if has_time: 

233 if has_space_divider: 

234 date_string, time_string = datetime_string.split(" ", 1) 

235 else: 

236 date_string, time_string = datetime_string.split("T", 1) 

237 

238 time_parts = re.split( 

239 r"[\+\-Z]", time_string, maxsplit=1, flags=re.IGNORECASE 

240 ) 

241 

242 time_components: Optional[Match[str]] = self._TIME_RE.match(time_parts[0]) 

243 

244 if time_components is None: 

245 raise ParserError( 

246 "Invalid time component provided. " 

247 "Please specify a format or provide a valid time component in the basic or extended ISO 8601 time format." 

248 ) 

249 

250 ( 

251 hours, 

252 minutes, 

253 seconds, 

254 subseconds_sep, 

255 subseconds, 

256 ) = time_components.groups() 

257 

258 has_tz = len(time_parts) == 2 

259 has_minutes = minutes is not None 

260 has_seconds = seconds is not None 

261 has_subseconds = subseconds is not None 

262 

263 is_basic_time_format = ":" not in time_parts[0] 

264 tz_format = "Z" 

265 

266 # use 'ZZ' token instead since tz offset is present in non-basic format 

267 if has_tz and ":" in time_parts[1]: 

268 tz_format = "ZZ" 

269 

270 time_sep = "" if is_basic_time_format else ":" 

271 

272 if has_subseconds: 

273 time_string = "HH{time_sep}mm{time_sep}ss{subseconds_sep}S".format( 

274 time_sep=time_sep, subseconds_sep=subseconds_sep 

275 ) 

276 elif has_seconds: 

277 time_string = "HH{time_sep}mm{time_sep}ss".format(time_sep=time_sep) 

278 elif has_minutes: 

279 time_string = f"HH{time_sep}mm" 

280 else: 

281 time_string = "HH" 

282 

283 if has_space_divider: 

284 formats = [f"{f} {time_string}" for f in formats] 

285 else: 

286 formats = [f"{f}T{time_string}" for f in formats] 

287 

288 if has_time and has_tz: 

289 # Add "Z" or "ZZ" to the format strings to indicate to 

290 # _parse_token() that a timezone needs to be parsed 

291 formats = [f"{f}{tz_format}" for f in formats] 

292 

293 return self._parse_multiformat(datetime_string, formats) 

294 

295 def parse( 

296 self, 

297 datetime_string: str, 

298 fmt: Union[List[str], str], 

299 normalize_whitespace: bool = False, 

300 ) -> datetime: 

301 if normalize_whitespace: 

302 datetime_string = re.sub(r"\s+", " ", datetime_string) 

303 

304 if isinstance(fmt, list): 

305 return self._parse_multiformat(datetime_string, fmt) 

306 

307 try: 

308 fmt_tokens: List[_FORMAT_TYPE] 

309 fmt_pattern_re: Pattern[str] 

310 fmt_tokens, fmt_pattern_re = self._generate_pattern_re(fmt) 

311 except re.error as e: 

312 raise ParserMatchError( 

313 f"Failed to generate regular expression pattern: {e}." 

314 ) 

315 

316 match = fmt_pattern_re.search(datetime_string) 

317 

318 if match is None: 

319 raise ParserMatchError( 

320 f"Failed to match {fmt!r} when parsing {datetime_string!r}." 

321 ) 

322 

323 parts: _Parts = {} 

324 for token in fmt_tokens: 

325 value: Union[Tuple[str, str, str], str] 

326 if token == "Do": 

327 value = match.group("value") 

328 elif token == "W": 

329 value = (match.group("year"), match.group("week"), match.group("day")) 

330 else: 

331 value = match.group(token) 

332 

333 if value is None: 

334 raise ParserMatchError( 

335 f"Unable to find a match group for the specified token {token!r}." 

336 ) 

337 

338 self._parse_token(token, value, parts) # type: ignore[arg-type] 

339 

340 return self._build_datetime(parts) 

341 

342 def _generate_pattern_re(self, fmt: str) -> Tuple[List[_FORMAT_TYPE], Pattern[str]]: 

343 # fmt is a string of tokens like 'YYYY-MM-DD' 

344 # we construct a new string by replacing each 

345 # token by its pattern: 

346 # 'YYYY-MM-DD' -> '(?P<YYYY>\d{4})-(?P<MM>\d{2})-(?P<DD>\d{2})' 

347 tokens: List[_FORMAT_TYPE] = [] 

348 offset = 0 

349 

350 # Escape all special RegEx chars 

351 escaped_fmt = re.escape(fmt) 

352 

353 # Extract the bracketed expressions to be reinserted later. 

354 escaped_fmt = re.sub(self._ESCAPE_RE, "#", escaped_fmt) 

355 

356 # Any number of S is the same as one. 

357 # TODO: allow users to specify the number of digits to parse 

358 escaped_fmt = re.sub(r"S+", "S", escaped_fmt) 

359 

360 escaped_data = re.findall(self._ESCAPE_RE, fmt) 

361 

362 fmt_pattern = escaped_fmt 

363 

364 for m in self._FORMAT_RE.finditer(escaped_fmt): 

365 token: _FORMAT_TYPE = cast(_FORMAT_TYPE, m.group(0)) 

366 try: 

367 input_re = self._input_re_map[token] 

368 except KeyError: 

369 raise ParserError(f"Unrecognized token {token!r}.") 

370 input_pattern = f"(?P<{token}>{input_re.pattern})" 

371 tokens.append(token) 

372 # a pattern doesn't have the same length as the token 

373 # it replaces! We keep the difference in the offset variable. 

374 # This works because the string is scanned left-to-right and matches 

375 # are returned in the order found by finditer. 

376 fmt_pattern = ( 

377 fmt_pattern[: m.start() + offset] 

378 + input_pattern 

379 + fmt_pattern[m.end() + offset :] 

380 ) 

381 offset += len(input_pattern) - (m.end() - m.start()) 

382 

383 final_fmt_pattern = "" 

384 split_fmt = fmt_pattern.split(r"\#") 

385 

386 # Due to the way Python splits, 'split_fmt' will always be longer 

387 for i in range(len(split_fmt)): 

388 final_fmt_pattern += split_fmt[i] 

389 if i < len(escaped_data): 

390 final_fmt_pattern += escaped_data[i][1:-1] 

391 

392 # Wrap final_fmt_pattern in a custom word boundary to strictly 

393 # match the formatting pattern and filter out date and time formats 

394 # that include junk such as: blah1998-09-12 blah, blah 1998-09-12blah, 

395 # blah1998-09-12blah. The custom word boundary matches every character 

396 # that is not a whitespace character to allow for searching for a date 

397 # and time string in a natural language sentence. Therefore, searching 

398 # for a string of the form YYYY-MM-DD in "blah 1998-09-12 blah" will 

399 # work properly. 

400 # Certain punctuation before or after the target pattern such as 

401 # "1998-09-12," is permitted. For the full list of valid punctuation, 

402 # see the documentation. 

403 

404 starting_word_boundary = ( 

405 r"(?<!\S\S)" # Don't have two consecutive non-whitespace characters. This ensures that we allow cases 

406 # like .11.25.2019 but not 1.11.25.2019 (for pattern MM.DD.YYYY) 

407 r"(?<![^\,\.\;\:\?\!\"\'\`\[\]\{\}\(\)<>\s])" # This is the list of punctuation that is ok before the 

408 # pattern (i.e. "It can't not be these characters before the pattern") 

409 r"(\b|^)" 

410 # The \b is to block cases like 1201912 but allow 201912 for pattern YYYYMM. The ^ was necessary to allow a 

411 # negative number through i.e. before epoch numbers 

412 ) 

413 ending_word_boundary = ( 

414 r"(?=[\,\.\;\:\?\!\"\'\`\[\]\{\}\(\)\<\>]?" # Positive lookahead stating that these punctuation marks 

415 # can appear after the pattern at most 1 time 

416 r"(?!\S))" # Don't allow any non-whitespace character after the punctuation 

417 ) 

418 bounded_fmt_pattern = r"{}{}{}".format( 

419 starting_word_boundary, final_fmt_pattern, ending_word_boundary 

420 ) 

421 

422 return tokens, re.compile(bounded_fmt_pattern, flags=re.IGNORECASE) 

423 

424 @overload 

425 def _parse_token( 

426 self, 

427 token: Literal[ 

428 "YYYY", 

429 "YY", 

430 "MM", 

431 "M", 

432 "DDDD", 

433 "DDD", 

434 "DD", 

435 "D", 

436 "Do", 

437 "HH", 

438 "hh", 

439 "h", 

440 "H", 

441 "mm", 

442 "m", 

443 "ss", 

444 "s", 

445 "x", 

446 ], 

447 value: Union[str, bytes, SupportsInt, bytearray], 

448 parts: _Parts, 

449 ) -> None: 

450 ... # pragma: no cover 

451 

452 @overload 

453 def _parse_token( 

454 self, 

455 token: Literal["X"], 

456 value: Union[str, bytes, SupportsFloat, bytearray], 

457 parts: _Parts, 

458 ) -> None: 

459 ... # pragma: no cover 

460 

461 @overload 

462 def _parse_token( 

463 self, 

464 token: Literal["MMMM", "MMM", "dddd", "ddd", "S"], 

465 value: Union[str, bytes, bytearray], 

466 parts: _Parts, 

467 ) -> None: 

468 ... # pragma: no cover 

469 

470 @overload 

471 def _parse_token( 

472 self, 

473 token: Literal["a", "A", "ZZZ", "ZZ", "Z"], 

474 value: Union[str, bytes], 

475 parts: _Parts, 

476 ) -> None: 

477 ... # pragma: no cover 

478 

479 @overload 

480 def _parse_token( 

481 self, 

482 token: Literal["W"], 

483 value: Tuple[_WEEKDATE_ELEMENT, _WEEKDATE_ELEMENT, Optional[_WEEKDATE_ELEMENT]], 

484 parts: _Parts, 

485 ) -> None: 

486 ... # pragma: no cover 

487 

488 def _parse_token( 

489 self, 

490 token: Any, 

491 value: Any, 

492 parts: _Parts, 

493 ) -> None: 

494 if token == "YYYY": 

495 parts["year"] = int(value) 

496 

497 elif token == "YY": 

498 value = int(value) 

499 parts["year"] = 1900 + value if value > 68 else 2000 + value 

500 

501 elif token in ["MMMM", "MMM"]: 

502 # FIXME: month_number() is nullable 

503 parts["month"] = self.locale.month_number(value.lower()) # type: ignore[typeddict-item] 

504 

505 elif token in ["MM", "M"]: 

506 parts["month"] = int(value) 

507 

508 elif token in ["DDDD", "DDD"]: 

509 parts["day_of_year"] = int(value) 

510 

511 elif token in ["DD", "D"]: 

512 parts["day"] = int(value) 

513 

514 elif token == "Do": 

515 parts["day"] = int(value) 

516 

517 elif token == "dddd": 

518 # locale day names are 1-indexed 

519 day_of_week = [x.lower() for x in self.locale.day_names].index( 

520 value.lower() 

521 ) 

522 parts["day_of_week"] = day_of_week - 1 

523 

524 elif token == "ddd": 

525 # locale day abbreviations are 1-indexed 

526 day_of_week = [x.lower() for x in self.locale.day_abbreviations].index( 

527 value.lower() 

528 ) 

529 parts["day_of_week"] = day_of_week - 1 

530 

531 elif token.upper() in ["HH", "H"]: 

532 parts["hour"] = int(value) 

533 

534 elif token in ["mm", "m"]: 

535 parts["minute"] = int(value) 

536 

537 elif token in ["ss", "s"]: 

538 parts["second"] = int(value) 

539 

540 elif token == "S": 

541 # We have the *most significant* digits of an arbitrary-precision integer. 

542 # We want the six most significant digits as an integer, rounded. 

543 # IDEA: add nanosecond support somehow? Need datetime support for it first. 

544 value = value.ljust(7, "0") 

545 

546 # floating-point (IEEE-754) defaults to half-to-even rounding 

547 seventh_digit = int(value[6]) 

548 if seventh_digit == 5: 

549 rounding = int(value[5]) % 2 

550 elif seventh_digit > 5: 

551 rounding = 1 

552 else: 

553 rounding = 0 

554 

555 parts["microsecond"] = int(value[:6]) + rounding 

556 

557 elif token == "X": 

558 parts["timestamp"] = float(value) 

559 

560 elif token == "x": 

561 parts["expanded_timestamp"] = int(value) 

562 

563 elif token in ["ZZZ", "ZZ", "Z"]: 

564 parts["tzinfo"] = TzinfoParser.parse(value) 

565 

566 elif token in ["a", "A"]: 

567 if value in (self.locale.meridians["am"], self.locale.meridians["AM"]): 

568 parts["am_pm"] = "am" 

569 if "hour" in parts and not 0 <= parts["hour"] <= 12: 

570 raise ParserMatchError( 

571 f"Hour token value must be between 0 and 12 inclusive for token {token!r}." 

572 ) 

573 elif value in (self.locale.meridians["pm"], self.locale.meridians["PM"]): 

574 parts["am_pm"] = "pm" 

575 elif token == "W": 

576 parts["weekdate"] = value 

577 

578 @staticmethod 

579 def _build_datetime(parts: _Parts) -> datetime: 

580 weekdate = parts.get("weekdate") 

581 

582 if weekdate is not None: 

583 year, week = int(weekdate[0]), int(weekdate[1]) 

584 

585 if weekdate[2] is not None: 

586 _day = int(weekdate[2]) 

587 else: 

588 # day not given, default to 1 

589 _day = 1 

590 

591 date_string = f"{year}-{week}-{_day}" 

592 

593 # tokens for ISO 8601 weekdates 

594 dt = datetime.strptime(date_string, "%G-%V-%u") 

595 

596 parts["year"] = dt.year 

597 parts["month"] = dt.month 

598 parts["day"] = dt.day 

599 

600 timestamp = parts.get("timestamp") 

601 

602 if timestamp is not None: 

603 return datetime.fromtimestamp(timestamp, tz=tz.tzutc()) 

604 

605 expanded_timestamp = parts.get("expanded_timestamp") 

606 

607 if expanded_timestamp is not None: 

608 return datetime.fromtimestamp( 

609 normalize_timestamp(expanded_timestamp), 

610 tz=tz.tzutc(), 

611 ) 

612 

613 day_of_year = parts.get("day_of_year") 

614 

615 if day_of_year is not None: 

616 _year = parts.get("year") 

617 month = parts.get("month") 

618 if _year is None: 

619 raise ParserError( 

620 "Year component is required with the DDD and DDDD tokens." 

621 ) 

622 

623 if month is not None: 

624 raise ParserError( 

625 "Month component is not allowed with the DDD and DDDD tokens." 

626 ) 

627 

628 date_string = f"{_year}-{day_of_year}" 

629 try: 

630 dt = datetime.strptime(date_string, "%Y-%j") 

631 except ValueError: 

632 raise ParserError( 

633 f"The provided day of year {day_of_year!r} is invalid." 

634 ) 

635 

636 parts["year"] = dt.year 

637 parts["month"] = dt.month 

638 parts["day"] = dt.day 

639 

640 day_of_week: Optional[int] = parts.get("day_of_week") 

641 day = parts.get("day") 

642 

643 # If day is passed, ignore day of week 

644 if day_of_week is not None and day is None: 

645 year = parts.get("year", 1970) 

646 month = parts.get("month", 1) 

647 day = 1 

648 

649 # dddd => first day of week after epoch 

650 # dddd YYYY => first day of week in specified year 

651 # dddd MM YYYY => first day of week in specified year and month 

652 # dddd MM => first day after epoch in specified month 

653 next_weekday_dt = next_weekday(datetime(year, month, day), day_of_week) 

654 parts["year"] = next_weekday_dt.year 

655 parts["month"] = next_weekday_dt.month 

656 parts["day"] = next_weekday_dt.day 

657 

658 am_pm = parts.get("am_pm") 

659 hour = parts.get("hour", 0) 

660 

661 if am_pm == "pm" and hour < 12: 

662 hour += 12 

663 elif am_pm == "am" and hour == 12: 

664 hour = 0 

665 

666 # Support for midnight at the end of day 

667 if hour == 24: 

668 if parts.get("minute", 0) != 0: 

669 raise ParserError("Midnight at the end of day must not contain minutes") 

670 if parts.get("second", 0) != 0: 

671 raise ParserError("Midnight at the end of day must not contain seconds") 

672 if parts.get("microsecond", 0) != 0: 

673 raise ParserError( 

674 "Midnight at the end of day must not contain microseconds" 

675 ) 

676 hour = 0 

677 day_increment = 1 

678 else: 

679 day_increment = 0 

680 

681 # account for rounding up to 1000000 

682 microsecond = parts.get("microsecond", 0) 

683 if microsecond == 1000000: 

684 microsecond = 0 

685 second_increment = 1 

686 else: 

687 second_increment = 0 

688 

689 increment = timedelta(days=day_increment, seconds=second_increment) 

690 

691 return ( 

692 datetime( 

693 year=parts.get("year", 1), 

694 month=parts.get("month", 1), 

695 day=parts.get("day", 1), 

696 hour=hour, 

697 minute=parts.get("minute", 0), 

698 second=parts.get("second", 0), 

699 microsecond=microsecond, 

700 tzinfo=parts.get("tzinfo"), 

701 ) 

702 + increment 

703 ) 

704 

705 def _parse_multiformat(self, string: str, formats: Iterable[str]) -> datetime: 

706 _datetime: Optional[datetime] = None 

707 

708 for fmt in formats: 

709 try: 

710 _datetime = self.parse(string, fmt) 

711 break 

712 except ParserMatchError: 

713 pass 

714 

715 if _datetime is None: 

716 supported_formats = ", ".join(formats) 

717 raise ParserError( 

718 f"Could not match input {string!r} to any of the following formats: {supported_formats}." 

719 ) 

720 

721 return _datetime 

722 

723 # generates a capture group of choices separated by an OR operator 

724 @staticmethod 

725 def _generate_choice_re( 

726 choices: Iterable[str], flags: Union[int, re.RegexFlag] = 0 

727 ) -> Pattern[str]: 

728 return re.compile(r"({})".format("|".join(choices)), flags=flags) 

729 

730 

731class TzinfoParser: 

732 _TZINFO_RE: ClassVar[Pattern[str]] = re.compile( 

733 r"^(?:\(UTC)*([\+\-])?(\d{2})(?:\:?(\d{2}))?" 

734 ) 

735 

736 @classmethod 

737 def parse(cls, tzinfo_string: str) -> dt_tzinfo: 

738 tzinfo: Optional[dt_tzinfo] = None 

739 

740 if tzinfo_string == "local": 

741 tzinfo = tz.tzlocal() 

742 

743 elif tzinfo_string in ["utc", "UTC", "Z"]: 

744 tzinfo = tz.tzutc() 

745 

746 else: 

747 iso_match = cls._TZINFO_RE.match(tzinfo_string) 

748 

749 if iso_match: 

750 sign: Optional[str] 

751 hours: str 

752 minutes: Union[str, int, None] 

753 sign, hours, minutes = iso_match.groups() 

754 seconds = int(hours) * 3600 + int(minutes or 0) * 60 

755 

756 if sign == "-": 

757 seconds *= -1 

758 

759 tzinfo = tz.tzoffset(None, seconds) 

760 

761 else: 

762 tzinfo = tz.gettz(tzinfo_string) 

763 

764 if tzinfo is None: 

765 raise ParserError(f"Could not parse timezone expression {tzinfo_string!r}.") 

766 

767 return tzinfo