Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/arrow/parser.py: 24%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

315 statements  

1"""Provides the :class:`Arrow <arrow.parser.DateTimeParser>` class, a better way to parse datetime strings.""" 

2 

3import re 

4import sys 

5from datetime import datetime, timedelta 

6from datetime import tzinfo as dt_tzinfo 

7from functools import lru_cache 

8from typing import ( 

9 Any, 

10 ClassVar, 

11 Dict, 

12 Iterable, 

13 List, 

14 Match, 

15 Optional, 

16 Pattern, 

17 SupportsFloat, 

18 SupportsInt, 

19 Tuple, 

20 Union, 

21 cast, 

22 overload, 

23) 

24 

25from dateutil import tz 

26 

27from arrow import locales 

28from arrow.constants import DEFAULT_LOCALE 

29from arrow.util import next_weekday, normalize_timestamp 

30 

31if sys.version_info < (3, 8): # pragma: no cover 

32 from typing_extensions import Literal, TypedDict 

33else: 

34 from typing import Literal, TypedDict # pragma: no cover 

35 

36 

37class ParserError(ValueError): 

38 pass 

39 

40 

41# Allows for ParserErrors to be propagated from _build_datetime() 

42# when day_of_year errors occur. 

43# Before this, the ParserErrors were caught by the try/except in 

44# _parse_multiformat() and the appropriate error message was not 

45# transmitted to the user. 

46class ParserMatchError(ParserError): 

47 pass 

48 

49 

50_WEEKDATE_ELEMENT = Union[str, bytes, SupportsInt, bytearray] 

51 

52_FORMAT_TYPE = Literal[ 

53 "YYYY", 

54 "YY", 

55 "MM", 

56 "M", 

57 "DDDD", 

58 "DDD", 

59 "DD", 

60 "D", 

61 "HH", 

62 "H", 

63 "hh", 

64 "h", 

65 "mm", 

66 "m", 

67 "ss", 

68 "s", 

69 "X", 

70 "x", 

71 "ZZZ", 

72 "ZZ", 

73 "Z", 

74 "S", 

75 "W", 

76 "MMMM", 

77 "MMM", 

78 "Do", 

79 "dddd", 

80 "ddd", 

81 "d", 

82 "a", 

83 "A", 

84] 

85 

86 

87class _Parts(TypedDict, total=False): 

88 year: int 

89 month: int 

90 day_of_year: int 

91 day: int 

92 hour: int 

93 minute: int 

94 second: int 

95 microsecond: int 

96 timestamp: float 

97 expanded_timestamp: int 

98 tzinfo: dt_tzinfo 

99 am_pm: Literal["am", "pm"] 

100 day_of_week: int 

101 weekdate: Tuple[_WEEKDATE_ELEMENT, _WEEKDATE_ELEMENT, Optional[_WEEKDATE_ELEMENT]] 

102 

103 

104class DateTimeParser: 

105 _FORMAT_RE: ClassVar[Pattern[str]] = re.compile( 

106 r"(YYY?Y?|MM?M?M?|Do|DD?D?D?|d?d?d?d|HH?|hh?|mm?|ss?|S+|ZZ?Z?|a|A|x|X|W)" 

107 ) 

108 _ESCAPE_RE: ClassVar[Pattern[str]] = re.compile(r"\[[^\[\]]*\]") 

109 

110 _ONE_OR_TWO_DIGIT_RE: ClassVar[Pattern[str]] = re.compile(r"\d{1,2}") 

111 _ONE_OR_TWO_OR_THREE_DIGIT_RE: ClassVar[Pattern[str]] = re.compile(r"\d{1,3}") 

112 _ONE_OR_MORE_DIGIT_RE: ClassVar[Pattern[str]] = re.compile(r"\d+") 

113 _TWO_DIGIT_RE: ClassVar[Pattern[str]] = re.compile(r"\d{2}") 

114 _THREE_DIGIT_RE: ClassVar[Pattern[str]] = re.compile(r"\d{3}") 

115 _FOUR_DIGIT_RE: ClassVar[Pattern[str]] = re.compile(r"\d{4}") 

116 _TZ_Z_RE: ClassVar[Pattern[str]] = re.compile(r"([\+\-])(\d{2})(?:(\d{2}))?|Z") 

117 _TZ_ZZ_RE: ClassVar[Pattern[str]] = re.compile(r"([\+\-])(\d{2})(?:\:(\d{2}))?|Z") 

118 _TZ_NAME_RE: ClassVar[Pattern[str]] = re.compile(r"\w[\w+\-/]+") 

119 # NOTE: timestamps cannot be parsed from natural language strings (by removing the ^...$) because it will 

120 # break cases like "15 Jul 2000" and a format list (see issue #447) 

121 _TIMESTAMP_RE: ClassVar[Pattern[str]] = re.compile(r"^\-?\d+\.?\d+$") 

122 _TIMESTAMP_EXPANDED_RE: ClassVar[Pattern[str]] = re.compile(r"^\-?\d+$") 

123 _TIME_RE: ClassVar[Pattern[str]] = re.compile( 

124 r"^(\d{2})(?:\:?(\d{2}))?(?:\:?(\d{2}))?(?:([\.\,])(\d+))?$" 

125 ) 

126 _WEEK_DATE_RE: ClassVar[Pattern[str]] = re.compile( 

127 r"(?P<year>\d{4})[\-]?W(?P<week>\d{2})[\-]?(?P<day>\d)?" 

128 ) 

129 

130 _BASE_INPUT_RE_MAP: ClassVar[Dict[_FORMAT_TYPE, Pattern[str]]] = { 

131 "YYYY": _FOUR_DIGIT_RE, 

132 "YY": _TWO_DIGIT_RE, 

133 "MM": _TWO_DIGIT_RE, 

134 "M": _ONE_OR_TWO_DIGIT_RE, 

135 "DDDD": _THREE_DIGIT_RE, 

136 "DDD": _ONE_OR_TWO_OR_THREE_DIGIT_RE, 

137 "DD": _TWO_DIGIT_RE, 

138 "D": _ONE_OR_TWO_DIGIT_RE, 

139 "HH": _TWO_DIGIT_RE, 

140 "H": _ONE_OR_TWO_DIGIT_RE, 

141 "hh": _TWO_DIGIT_RE, 

142 "h": _ONE_OR_TWO_DIGIT_RE, 

143 "mm": _TWO_DIGIT_RE, 

144 "m": _ONE_OR_TWO_DIGIT_RE, 

145 "ss": _TWO_DIGIT_RE, 

146 "s": _ONE_OR_TWO_DIGIT_RE, 

147 "X": _TIMESTAMP_RE, 

148 "x": _TIMESTAMP_EXPANDED_RE, 

149 "ZZZ": _TZ_NAME_RE, 

150 "ZZ": _TZ_ZZ_RE, 

151 "Z": _TZ_Z_RE, 

152 "S": _ONE_OR_MORE_DIGIT_RE, 

153 "W": _WEEK_DATE_RE, 

154 } 

155 

156 SEPARATORS: ClassVar[List[str]] = ["-", "/", "."] 

157 

158 locale: locales.Locale 

159 _input_re_map: Dict[_FORMAT_TYPE, Pattern[str]] 

160 

161 def __init__(self, locale: str = DEFAULT_LOCALE, cache_size: int = 0) -> None: 

162 self.locale = locales.get_locale(locale) 

163 self._input_re_map = self._BASE_INPUT_RE_MAP.copy() 

164 self._input_re_map.update( 

165 { 

166 "MMMM": self._generate_choice_re( 

167 self.locale.month_names[1:], re.IGNORECASE 

168 ), 

169 "MMM": self._generate_choice_re( 

170 self.locale.month_abbreviations[1:], re.IGNORECASE 

171 ), 

172 "Do": re.compile(self.locale.ordinal_day_re), 

173 "dddd": self._generate_choice_re( 

174 self.locale.day_names[1:], re.IGNORECASE 

175 ), 

176 "ddd": self._generate_choice_re( 

177 self.locale.day_abbreviations[1:], re.IGNORECASE 

178 ), 

179 "d": re.compile(r"[1-7]"), 

180 "a": self._generate_choice_re( 

181 (self.locale.meridians["am"], self.locale.meridians["pm"]) 

182 ), 

183 # note: 'A' token accepts both 'am/pm' and 'AM/PM' formats to 

184 # ensure backwards compatibility of this token 

185 "A": self._generate_choice_re(self.locale.meridians.values()), 

186 } 

187 ) 

188 if cache_size > 0: 

189 self._generate_pattern_re = lru_cache(maxsize=cache_size)( # type: ignore 

190 self._generate_pattern_re 

191 ) 

192 

193 # TODO: since we support more than ISO 8601, we should rename this function 

194 # IDEA: break into multiple functions 

195 def parse_iso( 

196 self, datetime_string: str, normalize_whitespace: bool = False 

197 ) -> datetime: 

198 if normalize_whitespace: 

199 datetime_string = re.sub(r"\s+", " ", datetime_string.strip()) 

200 

201 has_space_divider = " " in datetime_string 

202 has_t_divider = "T" in datetime_string 

203 

204 num_spaces = datetime_string.count(" ") 

205 if has_space_divider and num_spaces != 1 or has_t_divider and num_spaces > 0: 

206 raise ParserError( 

207 f"Expected an ISO 8601-like string, but was given {datetime_string!r}. " 

208 "Try passing in a format string to resolve this." 

209 ) 

210 

211 has_time = has_space_divider or has_t_divider 

212 has_tz = False 

213 

214 # date formats (ISO 8601 and others) to test against 

215 # NOTE: YYYYMM is omitted to avoid confusion with YYMMDD (no longer part of ISO 8601, but is still often used) 

216 formats = [ 

217 "YYYY-MM-DD", 

218 "YYYY-M-DD", 

219 "YYYY-M-D", 

220 "YYYY/MM/DD", 

221 "YYYY/M/DD", 

222 "YYYY/M/D", 

223 "YYYY.MM.DD", 

224 "YYYY.M.DD", 

225 "YYYY.M.D", 

226 "YYYYMMDD", 

227 "YYYY-DDDD", 

228 "YYYYDDDD", 

229 "YYYY-MM", 

230 "YYYY/MM", 

231 "YYYY.MM", 

232 "YYYY", 

233 "W", 

234 ] 

235 

236 if has_time: 

237 if has_space_divider: 

238 date_string, time_string = datetime_string.split(" ", 1) 

239 else: 

240 date_string, time_string = datetime_string.split("T", 1) 

241 

242 time_parts = re.split( 

243 r"[\+\-Z]", time_string, maxsplit=1, flags=re.IGNORECASE 

244 ) 

245 

246 time_components: Optional[Match[str]] = self._TIME_RE.match(time_parts[0]) 

247 

248 if time_components is None: 

249 raise ParserError( 

250 "Invalid time component provided. " 

251 "Please specify a format or provide a valid time component in the basic or extended ISO 8601 time format." 

252 ) 

253 

254 ( 

255 hours, 

256 minutes, 

257 seconds, 

258 subseconds_sep, 

259 subseconds, 

260 ) = time_components.groups() 

261 

262 has_tz = len(time_parts) == 2 

263 has_minutes = minutes is not None 

264 has_seconds = seconds is not None 

265 has_subseconds = subseconds is not None 

266 

267 is_basic_time_format = ":" not in time_parts[0] 

268 tz_format = "Z" 

269 

270 # use 'ZZ' token instead since tz offset is present in non-basic format 

271 if has_tz and ":" in time_parts[1]: 

272 tz_format = "ZZ" 

273 

274 time_sep = "" if is_basic_time_format else ":" 

275 

276 if has_subseconds: 

277 time_string = "HH{time_sep}mm{time_sep}ss{subseconds_sep}S".format( 

278 time_sep=time_sep, subseconds_sep=subseconds_sep 

279 ) 

280 elif has_seconds: 

281 time_string = "HH{time_sep}mm{time_sep}ss".format(time_sep=time_sep) 

282 elif has_minutes: 

283 time_string = f"HH{time_sep}mm" 

284 else: 

285 time_string = "HH" 

286 

287 if has_space_divider: 

288 formats = [f"{f} {time_string}" for f in formats] 

289 else: 

290 formats = [f"{f}T{time_string}" for f in formats] 

291 

292 if has_time and has_tz: 

293 # Add "Z" or "ZZ" to the format strings to indicate to 

294 # _parse_token() that a timezone needs to be parsed 

295 formats = [f"{f}{tz_format}" for f in formats] 

296 

297 return self._parse_multiformat(datetime_string, formats) 

298 

299 def parse( 

300 self, 

301 datetime_string: str, 

302 fmt: Union[List[str], str], 

303 normalize_whitespace: bool = False, 

304 ) -> datetime: 

305 if normalize_whitespace: 

306 datetime_string = re.sub(r"\s+", " ", datetime_string) 

307 

308 if isinstance(fmt, list): 

309 return self._parse_multiformat(datetime_string, fmt) 

310 

311 try: 

312 fmt_tokens: List[_FORMAT_TYPE] 

313 fmt_pattern_re: Pattern[str] 

314 fmt_tokens, fmt_pattern_re = self._generate_pattern_re(fmt) 

315 except re.error as e: 

316 raise ParserMatchError( 

317 f"Failed to generate regular expression pattern: {e}." 

318 ) 

319 

320 match = fmt_pattern_re.search(datetime_string) 

321 

322 if match is None: 

323 raise ParserMatchError( 

324 f"Failed to match {fmt!r} when parsing {datetime_string!r}." 

325 ) 

326 

327 parts: _Parts = {} 

328 for token in fmt_tokens: 

329 value: Union[Tuple[str, str, str], str] 

330 if token == "Do": 

331 value = match.group("value") 

332 elif token == "W": 

333 value = (match.group("year"), match.group("week"), match.group("day")) 

334 else: 

335 value = match.group(token) 

336 

337 if value is None: 

338 raise ParserMatchError( 

339 f"Unable to find a match group for the specified token {token!r}." 

340 ) 

341 

342 self._parse_token(token, value, parts) # type: ignore[arg-type] 

343 

344 return self._build_datetime(parts) 

345 

346 def _generate_pattern_re(self, fmt: str) -> Tuple[List[_FORMAT_TYPE], Pattern[str]]: 

347 # fmt is a string of tokens like 'YYYY-MM-DD' 

348 # we construct a new string by replacing each 

349 # token by its pattern: 

350 # 'YYYY-MM-DD' -> '(?P<YYYY>\d{4})-(?P<MM>\d{2})-(?P<DD>\d{2})' 

351 tokens: List[_FORMAT_TYPE] = [] 

352 offset = 0 

353 

354 # Escape all special RegEx chars 

355 escaped_fmt = re.escape(fmt) 

356 

357 # Extract the bracketed expressions to be reinserted later. 

358 escaped_fmt = re.sub(self._ESCAPE_RE, "#", escaped_fmt) 

359 

360 # Any number of S is the same as one. 

361 # TODO: allow users to specify the number of digits to parse 

362 escaped_fmt = re.sub(r"S+", "S", escaped_fmt) 

363 

364 escaped_data = re.findall(self._ESCAPE_RE, fmt) 

365 

366 fmt_pattern = escaped_fmt 

367 

368 for m in self._FORMAT_RE.finditer(escaped_fmt): 

369 token: _FORMAT_TYPE = cast(_FORMAT_TYPE, m.group(0)) 

370 try: 

371 input_re = self._input_re_map[token] 

372 except KeyError: 

373 raise ParserError(f"Unrecognized token {token!r}.") 

374 input_pattern = f"(?P<{token}>{input_re.pattern})" 

375 tokens.append(token) 

376 # a pattern doesn't have the same length as the token 

377 # it replaces! We keep the difference in the offset variable. 

378 # This works because the string is scanned left-to-right and matches 

379 # are returned in the order found by finditer. 

380 fmt_pattern = ( 

381 fmt_pattern[: m.start() + offset] 

382 + input_pattern 

383 + fmt_pattern[m.end() + offset :] 

384 ) 

385 offset += len(input_pattern) - (m.end() - m.start()) 

386 

387 final_fmt_pattern = "" 

388 split_fmt = fmt_pattern.split(r"\#") 

389 

390 # Due to the way Python splits, 'split_fmt' will always be longer 

391 for i in range(len(split_fmt)): 

392 final_fmt_pattern += split_fmt[i] 

393 if i < len(escaped_data): 

394 final_fmt_pattern += escaped_data[i][1:-1] 

395 

396 # Wrap final_fmt_pattern in a custom word boundary to strictly 

397 # match the formatting pattern and filter out date and time formats 

398 # that include junk such as: blah1998-09-12 blah, blah 1998-09-12blah, 

399 # blah1998-09-12blah. The custom word boundary matches every character 

400 # that is not a whitespace character to allow for searching for a date 

401 # and time string in a natural language sentence. Therefore, searching 

402 # for a string of the form YYYY-MM-DD in "blah 1998-09-12 blah" will 

403 # work properly. 

404 # Certain punctuation before or after the target pattern such as 

405 # "1998-09-12," is permitted. For the full list of valid punctuation, 

406 # see the documentation. 

407 

408 starting_word_boundary = ( 

409 r"(?<!\S\S)" # Don't have two consecutive non-whitespace characters. This ensures that we allow cases 

410 # like .11.25.2019 but not 1.11.25.2019 (for pattern MM.DD.YYYY) 

411 r"(?<![^\,\.\;\:\?\!\"\'\`\[\]\{\}\(\)<>\s])" # This is the list of punctuation that is ok before the 

412 # pattern (i.e. "It can't not be these characters before the pattern") 

413 r"(\b|^)" 

414 # The \b is to block cases like 1201912 but allow 201912 for pattern YYYYMM. The ^ was necessary to allow a 

415 # negative number through i.e. before epoch numbers 

416 ) 

417 ending_word_boundary = ( 

418 r"(?=[\,\.\;\:\?\!\"\'\`\[\]\{\}\(\)\<\>]?" # Positive lookahead stating that these punctuation marks 

419 # can appear after the pattern at most 1 time 

420 r"(?!\S))" # Don't allow any non-whitespace character after the punctuation 

421 ) 

422 bounded_fmt_pattern = r"{}{}{}".format( 

423 starting_word_boundary, final_fmt_pattern, ending_word_boundary 

424 ) 

425 

426 return tokens, re.compile(bounded_fmt_pattern, flags=re.IGNORECASE) 

427 

428 @overload 

429 def _parse_token( 

430 self, 

431 token: Literal[ 

432 "YYYY", 

433 "YY", 

434 "MM", 

435 "M", 

436 "DDDD", 

437 "DDD", 

438 "DD", 

439 "D", 

440 "Do", 

441 "HH", 

442 "hh", 

443 "h", 

444 "H", 

445 "mm", 

446 "m", 

447 "ss", 

448 "s", 

449 "x", 

450 ], 

451 value: Union[str, bytes, SupportsInt, bytearray], 

452 parts: _Parts, 

453 ) -> None: 

454 ... # pragma: no cover 

455 

456 @overload 

457 def _parse_token( 

458 self, 

459 token: Literal["X"], 

460 value: Union[str, bytes, SupportsFloat, bytearray], 

461 parts: _Parts, 

462 ) -> None: 

463 ... # pragma: no cover 

464 

465 @overload 

466 def _parse_token( 

467 self, 

468 token: Literal["MMMM", "MMM", "dddd", "ddd", "S"], 

469 value: Union[str, bytes, bytearray], 

470 parts: _Parts, 

471 ) -> None: 

472 ... # pragma: no cover 

473 

474 @overload 

475 def _parse_token( 

476 self, 

477 token: Literal["a", "A", "ZZZ", "ZZ", "Z"], 

478 value: Union[str, bytes], 

479 parts: _Parts, 

480 ) -> None: 

481 ... # pragma: no cover 

482 

483 @overload 

484 def _parse_token( 

485 self, 

486 token: Literal["W"], 

487 value: Tuple[_WEEKDATE_ELEMENT, _WEEKDATE_ELEMENT, Optional[_WEEKDATE_ELEMENT]], 

488 parts: _Parts, 

489 ) -> None: 

490 ... # pragma: no cover 

491 

492 def _parse_token( 

493 self, 

494 token: Any, 

495 value: Any, 

496 parts: _Parts, 

497 ) -> None: 

498 if token == "YYYY": 

499 parts["year"] = int(value) 

500 

501 elif token == "YY": 

502 value = int(value) 

503 parts["year"] = 1900 + value if value > 68 else 2000 + value 

504 

505 elif token in ["MMMM", "MMM"]: 

506 # FIXME: month_number() is nullable 

507 parts["month"] = self.locale.month_number(value.lower()) # type: ignore[typeddict-item] 

508 

509 elif token in ["MM", "M"]: 

510 parts["month"] = int(value) 

511 

512 elif token in ["DDDD", "DDD"]: 

513 parts["day_of_year"] = int(value) 

514 

515 elif token in ["DD", "D"]: 

516 parts["day"] = int(value) 

517 

518 elif token == "Do": 

519 parts["day"] = int(value) 

520 

521 elif token == "dddd": 

522 # locale day names are 1-indexed 

523 day_of_week = [x.lower() for x in self.locale.day_names].index( 

524 value.lower() 

525 ) 

526 parts["day_of_week"] = day_of_week - 1 

527 

528 elif token == "ddd": 

529 # locale day abbreviations are 1-indexed 

530 day_of_week = [x.lower() for x in self.locale.day_abbreviations].index( 

531 value.lower() 

532 ) 

533 parts["day_of_week"] = day_of_week - 1 

534 

535 elif token.upper() in ["HH", "H"]: 

536 parts["hour"] = int(value) 

537 

538 elif token in ["mm", "m"]: 

539 parts["minute"] = int(value) 

540 

541 elif token in ["ss", "s"]: 

542 parts["second"] = int(value) 

543 

544 elif token == "S": 

545 # We have the *most significant* digits of an arbitrary-precision integer. 

546 # We want the six most significant digits as an integer, rounded. 

547 # IDEA: add nanosecond support somehow? Need datetime support for it first. 

548 value = value.ljust(7, "0") 

549 

550 # floating-point (IEEE-754) defaults to half-to-even rounding 

551 seventh_digit = int(value[6]) 

552 if seventh_digit == 5: 

553 rounding = int(value[5]) % 2 

554 elif seventh_digit > 5: 

555 rounding = 1 

556 else: 

557 rounding = 0 

558 

559 parts["microsecond"] = int(value[:6]) + rounding 

560 

561 elif token == "X": 

562 parts["timestamp"] = float(value) 

563 

564 elif token == "x": 

565 parts["expanded_timestamp"] = int(value) 

566 

567 elif token in ["ZZZ", "ZZ", "Z"]: 

568 parts["tzinfo"] = TzinfoParser.parse(value) 

569 

570 elif token in ["a", "A"]: 

571 if value in (self.locale.meridians["am"], self.locale.meridians["AM"]): 

572 parts["am_pm"] = "am" 

573 if "hour" in parts and not 0 <= parts["hour"] <= 12: 

574 raise ParserMatchError( 

575 f"Hour token value must be between 0 and 12 inclusive for token {token!r}." 

576 ) 

577 elif value in (self.locale.meridians["pm"], self.locale.meridians["PM"]): 

578 parts["am_pm"] = "pm" 

579 elif token == "W": 

580 parts["weekdate"] = value 

581 

582 @staticmethod 

583 def _build_datetime(parts: _Parts) -> datetime: 

584 weekdate = parts.get("weekdate") 

585 

586 if weekdate is not None: 

587 year, week = int(weekdate[0]), int(weekdate[1]) 

588 

589 if weekdate[2] is not None: 

590 _day = int(weekdate[2]) 

591 else: 

592 # day not given, default to 1 

593 _day = 1 

594 

595 date_string = f"{year}-{week}-{_day}" 

596 

597 # tokens for ISO 8601 weekdates 

598 dt = datetime.strptime(date_string, "%G-%V-%u") 

599 

600 parts["year"] = dt.year 

601 parts["month"] = dt.month 

602 parts["day"] = dt.day 

603 

604 timestamp = parts.get("timestamp") 

605 

606 if timestamp is not None: 

607 return datetime.fromtimestamp(timestamp, tz=tz.tzutc()) 

608 

609 expanded_timestamp = parts.get("expanded_timestamp") 

610 

611 if expanded_timestamp is not None: 

612 return datetime.fromtimestamp( 

613 normalize_timestamp(expanded_timestamp), 

614 tz=tz.tzutc(), 

615 ) 

616 

617 day_of_year = parts.get("day_of_year") 

618 

619 if day_of_year is not None: 

620 _year = parts.get("year") 

621 month = parts.get("month") 

622 if _year is None: 

623 raise ParserError( 

624 "Year component is required with the DDD and DDDD tokens." 

625 ) 

626 

627 if month is not None: 

628 raise ParserError( 

629 "Month component is not allowed with the DDD and DDDD tokens." 

630 ) 

631 

632 date_string = f"{_year}-{day_of_year}" 

633 try: 

634 dt = datetime.strptime(date_string, "%Y-%j") 

635 except ValueError: 

636 raise ParserError( 

637 f"The provided day of year {day_of_year!r} is invalid." 

638 ) 

639 

640 parts["year"] = dt.year 

641 parts["month"] = dt.month 

642 parts["day"] = dt.day 

643 

644 day_of_week: Optional[int] = parts.get("day_of_week") 

645 day = parts.get("day") 

646 

647 # If day is passed, ignore day of week 

648 if day_of_week is not None and day is None: 

649 year = parts.get("year", 1970) 

650 month = parts.get("month", 1) 

651 day = 1 

652 

653 # dddd => first day of week after epoch 

654 # dddd YYYY => first day of week in specified year 

655 # dddd MM YYYY => first day of week in specified year and month 

656 # dddd MM => first day after epoch in specified month 

657 next_weekday_dt = next_weekday(datetime(year, month, day), day_of_week) 

658 parts["year"] = next_weekday_dt.year 

659 parts["month"] = next_weekday_dt.month 

660 parts["day"] = next_weekday_dt.day 

661 

662 am_pm = parts.get("am_pm") 

663 hour = parts.get("hour", 0) 

664 

665 if am_pm == "pm" and hour < 12: 

666 hour += 12 

667 elif am_pm == "am" and hour == 12: 

668 hour = 0 

669 

670 # Support for midnight at the end of day 

671 if hour == 24: 

672 if parts.get("minute", 0) != 0: 

673 raise ParserError("Midnight at the end of day must not contain minutes") 

674 if parts.get("second", 0) != 0: 

675 raise ParserError("Midnight at the end of day must not contain seconds") 

676 if parts.get("microsecond", 0) != 0: 

677 raise ParserError( 

678 "Midnight at the end of day must not contain microseconds" 

679 ) 

680 hour = 0 

681 day_increment = 1 

682 else: 

683 day_increment = 0 

684 

685 # account for rounding up to 1000000 

686 microsecond = parts.get("microsecond", 0) 

687 if microsecond == 1000000: 

688 microsecond = 0 

689 second_increment = 1 

690 else: 

691 second_increment = 0 

692 

693 increment = timedelta(days=day_increment, seconds=second_increment) 

694 

695 return ( 

696 datetime( 

697 year=parts.get("year", 1), 

698 month=parts.get("month", 1), 

699 day=parts.get("day", 1), 

700 hour=hour, 

701 minute=parts.get("minute", 0), 

702 second=parts.get("second", 0), 

703 microsecond=microsecond, 

704 tzinfo=parts.get("tzinfo"), 

705 ) 

706 + increment 

707 ) 

708 

709 def _parse_multiformat(self, string: str, formats: Iterable[str]) -> datetime: 

710 _datetime: Optional[datetime] = None 

711 

712 for fmt in formats: 

713 try: 

714 _datetime = self.parse(string, fmt) 

715 break 

716 except ParserMatchError: 

717 pass 

718 

719 if _datetime is None: 

720 supported_formats = ", ".join(formats) 

721 raise ParserError( 

722 f"Could not match input {string!r} to any of the following formats: {supported_formats}." 

723 ) 

724 

725 return _datetime 

726 

727 # generates a capture group of choices separated by an OR operator 

728 @staticmethod 

729 def _generate_choice_re( 

730 choices: Iterable[str], flags: Union[int, re.RegexFlag] = 0 

731 ) -> Pattern[str]: 

732 return re.compile(r"({})".format("|".join(choices)), flags=flags) 

733 

734 

735class TzinfoParser: 

736 _TZINFO_RE: ClassVar[Pattern[str]] = re.compile( 

737 r"^(?:\(UTC)*([\+\-])?(\d{2})(?:\:?(\d{2}))?" 

738 ) 

739 

740 @classmethod 

741 def parse(cls, tzinfo_string: str) -> dt_tzinfo: 

742 tzinfo: Optional[dt_tzinfo] = None 

743 

744 if tzinfo_string == "local": 

745 tzinfo = tz.tzlocal() 

746 

747 elif tzinfo_string in ["utc", "UTC", "Z"]: 

748 tzinfo = tz.tzutc() 

749 

750 else: 

751 iso_match = cls._TZINFO_RE.match(tzinfo_string) 

752 

753 if iso_match: 

754 sign: Optional[str] 

755 hours: str 

756 minutes: Union[str, int, None] 

757 sign, hours, minutes = iso_match.groups() 

758 seconds = int(hours) * 3600 + int(minutes or 0) * 60 

759 

760 if sign == "-": 

761 seconds *= -1 

762 

763 tzinfo = tz.tzoffset(None, seconds) 

764 

765 else: 

766 tzinfo = tz.gettz(tzinfo_string) 

767 

768 if tzinfo is None: 

769 raise ParserError(f"Could not parse timezone expression {tzinfo_string!r}.") 

770 

771 return tzinfo