Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/babel/dates.py: 12%

701 statements  

« prev     ^ index     » next       coverage.py v7.2.7, created at 2023-06-29 06:25 +0000

1""" 

2 babel.dates 

3 ~~~~~~~~~~~ 

4 

5 Locale dependent formatting and parsing of dates and times. 

6 

7 The default locale for the functions in this module is determined by the 

8 following environment variables, in that order: 

9 

10 * ``LC_TIME``, 

11 * ``LC_ALL``, and 

12 * ``LANG`` 

13 

14 :copyright: (c) 2013-2023 by the Babel Team. 

15 :license: BSD, see LICENSE for more details. 

16""" 

17 

18from __future__ import annotations 

19 

20import re 

21import warnings 

22from functools import lru_cache 

23from typing import TYPE_CHECKING, SupportsInt 

24 

25try: 

26 import pytz 

27except ModuleNotFoundError: 

28 pytz = None 

29 import zoneinfo 

30 

31import datetime 

32from collections.abc import Iterable 

33 

34from babel import localtime 

35from babel.core import Locale, default_locale, get_global 

36from babel.localedata import LocaleDataDict 

37 

38if TYPE_CHECKING: 

39 from typing_extensions import Literal, TypeAlias 

40 _Instant: TypeAlias = datetime.date | datetime.time | float | None 

41 _PredefinedTimeFormat: TypeAlias = Literal['full', 'long', 'medium', 'short'] 

42 _Context: TypeAlias = Literal['format', 'stand-alone'] 

43 _DtOrTzinfo: TypeAlias = datetime.datetime | datetime.tzinfo | str | int | datetime.time | None 

44 

45# "If a given short metazone form is known NOT to be understood in a given 

46# locale and the parent locale has this value such that it would normally 

47# be inherited, the inheritance of this value can be explicitly disabled by 

48# use of the 'no inheritance marker' as the value, which is 3 simultaneous [sic] 

49# empty set characters ( U+2205 )." 

50# - https://www.unicode.org/reports/tr35/tr35-dates.html#Metazone_Names 

51 

52NO_INHERITANCE_MARKER = '\u2205\u2205\u2205' 

53 

54UTC = datetime.timezone.utc 

55LOCALTZ = localtime.LOCALTZ 

56 

57LC_TIME = default_locale('LC_TIME') 

58 

59 

60def _localize(tz: datetime.tzinfo, dt: datetime.datetime) -> datetime.datetime: 

61 # Support localizing with both pytz and zoneinfo tzinfos 

62 # nothing to do 

63 if dt.tzinfo is tz: 

64 return dt 

65 

66 if hasattr(tz, 'localize'): # pytz 

67 return tz.localize(dt) 

68 

69 if dt.tzinfo is None: 

70 # convert naive to localized 

71 return dt.replace(tzinfo=tz) 

72 

73 # convert timezones 

74 return dt.astimezone(tz) 

75 

76 

77def _get_dt_and_tzinfo(dt_or_tzinfo: _DtOrTzinfo) -> tuple[datetime.datetime | None, datetime.tzinfo]: 

78 """ 

79 Parse a `dt_or_tzinfo` value into a datetime and a tzinfo. 

80 

81 See the docs for this function's callers for semantics. 

82 

83 :rtype: tuple[datetime, tzinfo] 

84 """ 

85 if dt_or_tzinfo is None: 

86 dt = datetime.datetime.now() 

87 tzinfo = LOCALTZ 

88 elif isinstance(dt_or_tzinfo, str): 

89 dt = None 

90 tzinfo = get_timezone(dt_or_tzinfo) 

91 elif isinstance(dt_or_tzinfo, int): 

92 dt = None 

93 tzinfo = UTC 

94 elif isinstance(dt_or_tzinfo, (datetime.datetime, datetime.time)): 

95 dt = _get_datetime(dt_or_tzinfo) 

96 tzinfo = dt.tzinfo if dt.tzinfo is not None else UTC 

97 else: 

98 dt = None 

99 tzinfo = dt_or_tzinfo 

100 return dt, tzinfo 

101 

102 

103def _get_tz_name(dt_or_tzinfo: _DtOrTzinfo) -> str: 

104 """ 

105 Get the timezone name out of a time, datetime, or tzinfo object. 

106 

107 :rtype: str 

108 """ 

109 dt, tzinfo = _get_dt_and_tzinfo(dt_or_tzinfo) 

110 if hasattr(tzinfo, 'zone'): # pytz object 

111 return tzinfo.zone 

112 elif hasattr(tzinfo, 'key') and tzinfo.key is not None: # ZoneInfo object 

113 return tzinfo.key 

114 else: 

115 return tzinfo.tzname(dt or datetime.datetime.now(UTC)) 

116 

117 

118def _get_datetime(instant: _Instant) -> datetime.datetime: 

119 """ 

120 Get a datetime out of an "instant" (date, time, datetime, number). 

121 

122 .. warning:: The return values of this function may depend on the system clock. 

123 

124 If the instant is None, the current moment is used. 

125 If the instant is a time, it's augmented with today's date. 

126 

127 Dates are converted to naive datetimes with midnight as the time component. 

128 

129 >>> from datetime import date, datetime 

130 >>> _get_datetime(date(2015, 1, 1)) 

131 datetime.datetime(2015, 1, 1, 0, 0) 

132 

133 UNIX timestamps are converted to datetimes. 

134 

135 >>> _get_datetime(1400000000) 

136 datetime.datetime(2014, 5, 13, 16, 53, 20) 

137 

138 Other values are passed through as-is. 

139 

140 >>> x = datetime(2015, 1, 1) 

141 >>> _get_datetime(x) is x 

142 True 

143 

144 :param instant: date, time, datetime, integer, float or None 

145 :type instant: date|time|datetime|int|float|None 

146 :return: a datetime 

147 :rtype: datetime 

148 """ 

149 if instant is None: 

150 return datetime.datetime.now(UTC).replace(tzinfo=None) 

151 elif isinstance(instant, (int, float)): 

152 return datetime.datetime.fromtimestamp(instant, UTC).replace(tzinfo=None) 

153 elif isinstance(instant, datetime.time): 

154 return datetime.datetime.combine(datetime.date.today(), instant) 

155 elif isinstance(instant, datetime.date) and not isinstance(instant, datetime.datetime): 

156 return datetime.datetime.combine(instant, datetime.time()) 

157 # TODO (3.x): Add an assertion/type check for this fallthrough branch: 

158 return instant 

159 

160 

161def _ensure_datetime_tzinfo(dt: datetime.datetime, tzinfo: datetime.tzinfo | None = None) -> datetime.datetime: 

162 """ 

163 Ensure the datetime passed has an attached tzinfo. 

164 

165 If the datetime is tz-naive to begin with, UTC is attached. 

166 

167 If a tzinfo is passed in, the datetime is normalized to that timezone. 

168 

169 >>> from datetime import datetime 

170 >>> _get_tz_name(_ensure_datetime_tzinfo(datetime(2015, 1, 1))) 

171 'UTC' 

172 

173 >>> tz = get_timezone("Europe/Stockholm") 

174 >>> _ensure_datetime_tzinfo(datetime(2015, 1, 1, 13, 15, tzinfo=UTC), tzinfo=tz).hour 

175 14 

176 

177 :param datetime: Datetime to augment. 

178 :param tzinfo: optional tzinfo 

179 :return: datetime with tzinfo 

180 :rtype: datetime 

181 """ 

182 if dt.tzinfo is None: 

183 dt = dt.replace(tzinfo=UTC) 

184 if tzinfo is not None: 

185 dt = dt.astimezone(get_timezone(tzinfo)) 

186 if hasattr(tzinfo, 'normalize'): # pytz 

187 dt = tzinfo.normalize(dt) 

188 return dt 

189 

190 

191def _get_time( 

192 time: datetime.time | datetime.datetime | None, 

193 tzinfo: datetime.tzinfo | None = None, 

194) -> datetime.time: 

195 """ 

196 Get a timezoned time from a given instant. 

197 

198 .. warning:: The return values of this function may depend on the system clock. 

199 

200 :param time: time, datetime or None 

201 :rtype: time 

202 """ 

203 if time is None: 

204 time = datetime.datetime.now(UTC) 

205 elif isinstance(time, (int, float)): 

206 time = datetime.datetime.fromtimestamp(time, UTC) 

207 

208 if time.tzinfo is None: 

209 time = time.replace(tzinfo=UTC) 

210 

211 if isinstance(time, datetime.datetime): 

212 if tzinfo is not None: 

213 time = time.astimezone(tzinfo) 

214 if hasattr(tzinfo, 'normalize'): # pytz 

215 time = tzinfo.normalize(time) 

216 time = time.timetz() 

217 elif tzinfo is not None: 

218 time = time.replace(tzinfo=tzinfo) 

219 return time 

220 

221 

222def get_timezone(zone: str | datetime.tzinfo | None = None) -> datetime.tzinfo: 

223 """Looks up a timezone by name and returns it. The timezone object 

224 returned comes from ``pytz`` or ``zoneinfo``, whichever is available. 

225 It corresponds to the `tzinfo` interface and can be used with all of 

226 the functions of Babel that operate with dates. 

227 

228 If a timezone is not known a :exc:`LookupError` is raised. If `zone` 

229 is ``None`` a local zone object is returned. 

230 

231 :param zone: the name of the timezone to look up. If a timezone object 

232 itself is passed in, it's returned unchanged. 

233 """ 

234 if zone is None: 

235 return LOCALTZ 

236 if not isinstance(zone, str): 

237 return zone 

238 

239 if pytz: 

240 try: 

241 return pytz.timezone(zone) 

242 except pytz.UnknownTimeZoneError as e: 

243 exc = e 

244 else: 

245 assert zoneinfo 

246 try: 

247 return zoneinfo.ZoneInfo(zone) 

248 except zoneinfo.ZoneInfoNotFoundError as e: 

249 exc = e 

250 

251 raise LookupError(f"Unknown timezone {zone}") from exc 

252 

253 

254def get_period_names(width: Literal['abbreviated', 'narrow', 'wide'] = 'wide', 

255 context: _Context = 'stand-alone', locale: Locale | str | None = LC_TIME) -> LocaleDataDict: 

256 """Return the names for day periods (AM/PM) used by the locale. 

257 

258 >>> get_period_names(locale='en_US')['am'] 

259 u'AM' 

260 

261 :param width: the width to use, one of "abbreviated", "narrow", or "wide" 

262 :param context: the context, either "format" or "stand-alone" 

263 :param locale: the `Locale` object, or a locale string 

264 """ 

265 return Locale.parse(locale).day_periods[context][width] 

266 

267 

268def get_day_names(width: Literal['abbreviated', 'narrow', 'short', 'wide'] = 'wide', 

269 context: _Context = 'format', locale: Locale | str | None = LC_TIME) -> LocaleDataDict: 

270 """Return the day names used by the locale for the specified format. 

271 

272 >>> get_day_names('wide', locale='en_US')[1] 

273 u'Tuesday' 

274 >>> get_day_names('short', locale='en_US')[1] 

275 u'Tu' 

276 >>> get_day_names('abbreviated', locale='es')[1] 

277 u'mar' 

278 >>> get_day_names('narrow', context='stand-alone', locale='de_DE')[1] 

279 u'D' 

280 

281 :param width: the width to use, one of "wide", "abbreviated", "short" or "narrow" 

282 :param context: the context, either "format" or "stand-alone" 

283 :param locale: the `Locale` object, or a locale string 

284 """ 

285 return Locale.parse(locale).days[context][width] 

286 

287 

288def get_month_names(width: Literal['abbreviated', 'narrow', 'wide'] = 'wide', 

289 context: _Context = 'format', locale: Locale | str | None = LC_TIME) -> LocaleDataDict: 

290 """Return the month names used by the locale for the specified format. 

291 

292 >>> get_month_names('wide', locale='en_US')[1] 

293 u'January' 

294 >>> get_month_names('abbreviated', locale='es')[1] 

295 u'ene' 

296 >>> get_month_names('narrow', context='stand-alone', locale='de_DE')[1] 

297 u'J' 

298 

299 :param width: the width to use, one of "wide", "abbreviated", or "narrow" 

300 :param context: the context, either "format" or "stand-alone" 

301 :param locale: the `Locale` object, or a locale string 

302 """ 

303 return Locale.parse(locale).months[context][width] 

304 

305 

306def get_quarter_names(width: Literal['abbreviated', 'narrow', 'wide'] = 'wide', 

307 context: _Context = 'format', locale: Locale | str | None = LC_TIME) -> LocaleDataDict: 

308 """Return the quarter names used by the locale for the specified format. 

309 

310 >>> get_quarter_names('wide', locale='en_US')[1] 

311 u'1st quarter' 

312 >>> get_quarter_names('abbreviated', locale='de_DE')[1] 

313 u'Q1' 

314 >>> get_quarter_names('narrow', locale='de_DE')[1] 

315 u'1' 

316 

317 :param width: the width to use, one of "wide", "abbreviated", or "narrow" 

318 :param context: the context, either "format" or "stand-alone" 

319 :param locale: the `Locale` object, or a locale string 

320 """ 

321 return Locale.parse(locale).quarters[context][width] 

322 

323 

324def get_era_names(width: Literal['abbreviated', 'narrow', 'wide'] = 'wide', 

325 locale: Locale | str | None = LC_TIME) -> LocaleDataDict: 

326 """Return the era names used by the locale for the specified format. 

327 

328 >>> get_era_names('wide', locale='en_US')[1] 

329 u'Anno Domini' 

330 >>> get_era_names('abbreviated', locale='de_DE')[1] 

331 u'n. Chr.' 

332 

333 :param width: the width to use, either "wide", "abbreviated", or "narrow" 

334 :param locale: the `Locale` object, or a locale string 

335 """ 

336 return Locale.parse(locale).eras[width] 

337 

338 

339def get_date_format(format: _PredefinedTimeFormat = 'medium', locale: Locale | str | None = LC_TIME) -> DateTimePattern: 

340 """Return the date formatting patterns used by the locale for the specified 

341 format. 

342 

343 >>> get_date_format(locale='en_US') 

344 <DateTimePattern u'MMM d, y'> 

345 >>> get_date_format('full', locale='de_DE') 

346 <DateTimePattern u'EEEE, d. MMMM y'> 

347 

348 :param format: the format to use, one of "full", "long", "medium", or 

349 "short" 

350 :param locale: the `Locale` object, or a locale string 

351 """ 

352 return Locale.parse(locale).date_formats[format] 

353 

354 

355def get_datetime_format(format: _PredefinedTimeFormat = 'medium', locale: Locale | str | None = LC_TIME) -> DateTimePattern: 

356 """Return the datetime formatting patterns used by the locale for the 

357 specified format. 

358 

359 >>> get_datetime_format(locale='en_US') 

360 u'{1}, {0}' 

361 

362 :param format: the format to use, one of "full", "long", "medium", or 

363 "short" 

364 :param locale: the `Locale` object, or a locale string 

365 """ 

366 patterns = Locale.parse(locale).datetime_formats 

367 if format not in patterns: 

368 format = None 

369 return patterns[format] 

370 

371 

372def get_time_format(format: _PredefinedTimeFormat = 'medium', locale: Locale | str | None = LC_TIME) -> DateTimePattern: 

373 """Return the time formatting patterns used by the locale for the specified 

374 format. 

375 

376 >>> get_time_format(locale='en_US') 

377 <DateTimePattern u'h:mm:ss\u202fa'> 

378 >>> get_time_format('full', locale='de_DE') 

379 <DateTimePattern u'HH:mm:ss zzzz'> 

380 

381 :param format: the format to use, one of "full", "long", "medium", or 

382 "short" 

383 :param locale: the `Locale` object, or a locale string 

384 """ 

385 return Locale.parse(locale).time_formats[format] 

386 

387 

388def get_timezone_gmt( 

389 datetime: _Instant = None, 

390 width: Literal['long', 'short', 'iso8601', 'iso8601_short'] = 'long', 

391 locale: Locale | str | None = LC_TIME, 

392 return_z: bool = False, 

393) -> str: 

394 """Return the timezone associated with the given `datetime` object formatted 

395 as string indicating the offset from GMT. 

396 

397 >>> from datetime import datetime 

398 >>> dt = datetime(2007, 4, 1, 15, 30) 

399 >>> get_timezone_gmt(dt, locale='en') 

400 u'GMT+00:00' 

401 >>> get_timezone_gmt(dt, locale='en', return_z=True) 

402 'Z' 

403 >>> get_timezone_gmt(dt, locale='en', width='iso8601_short') 

404 u'+00' 

405 >>> tz = get_timezone('America/Los_Angeles') 

406 >>> dt = _localize(tz, datetime(2007, 4, 1, 15, 30)) 

407 >>> get_timezone_gmt(dt, locale='en') 

408 u'GMT-07:00' 

409 >>> get_timezone_gmt(dt, 'short', locale='en') 

410 u'-0700' 

411 >>> get_timezone_gmt(dt, locale='en', width='iso8601_short') 

412 u'-07' 

413 

414 The long format depends on the locale, for example in France the acronym 

415 UTC string is used instead of GMT: 

416 

417 >>> get_timezone_gmt(dt, 'long', locale='fr_FR') 

418 u'UTC-07:00' 

419 

420 .. versionadded:: 0.9 

421 

422 :param datetime: the ``datetime`` object; if `None`, the current date and 

423 time in UTC is used 

424 :param width: either "long" or "short" or "iso8601" or "iso8601_short" 

425 :param locale: the `Locale` object, or a locale string 

426 :param return_z: True or False; Function returns indicator "Z" 

427 when local time offset is 0 

428 """ 

429 datetime = _ensure_datetime_tzinfo(_get_datetime(datetime)) 

430 locale = Locale.parse(locale) 

431 

432 offset = datetime.tzinfo.utcoffset(datetime) 

433 seconds = offset.days * 24 * 60 * 60 + offset.seconds 

434 hours, seconds = divmod(seconds, 3600) 

435 if return_z and hours == 0 and seconds == 0: 

436 return 'Z' 

437 elif seconds == 0 and width == 'iso8601_short': 

438 return '%+03d' % hours 

439 elif width == 'short' or width == 'iso8601_short': 

440 pattern = '%+03d%02d' 

441 elif width == 'iso8601': 

442 pattern = '%+03d:%02d' 

443 else: 

444 pattern = locale.zone_formats['gmt'] % '%+03d:%02d' 

445 return pattern % (hours, seconds // 60) 

446 

447 

448def get_timezone_location( 

449 dt_or_tzinfo: _DtOrTzinfo = None, 

450 locale: Locale | str | None = LC_TIME, 

451 return_city: bool = False, 

452) -> str: 

453 """Return a representation of the given timezone using "location format". 

454 

455 The result depends on both the local display name of the country and the 

456 city associated with the time zone: 

457 

458 >>> tz = get_timezone('America/St_Johns') 

459 >>> print(get_timezone_location(tz, locale='de_DE')) 

460 Kanada (St. John’s) (Ortszeit) 

461 >>> print(get_timezone_location(tz, locale='en')) 

462 Canada (St. John’s) Time 

463 >>> print(get_timezone_location(tz, locale='en', return_city=True)) 

464 St. John’s 

465 >>> tz = get_timezone('America/Mexico_City') 

466 >>> get_timezone_location(tz, locale='de_DE') 

467 u'Mexiko (Mexiko-Stadt) (Ortszeit)' 

468 

469 If the timezone is associated with a country that uses only a single 

470 timezone, just the localized country name is returned: 

471 

472 >>> tz = get_timezone('Europe/Berlin') 

473 >>> get_timezone_name(tz, locale='de_DE') 

474 u'Mitteleurop\\xe4ische Zeit' 

475 

476 .. versionadded:: 0.9 

477 

478 :param dt_or_tzinfo: the ``datetime`` or ``tzinfo`` object that determines 

479 the timezone; if `None`, the current date and time in 

480 UTC is assumed 

481 :param locale: the `Locale` object, or a locale string 

482 :param return_city: True or False, if True then return exemplar city (location) 

483 for the time zone 

484 :return: the localized timezone name using location format 

485 

486 """ 

487 locale = Locale.parse(locale) 

488 

489 zone = _get_tz_name(dt_or_tzinfo) 

490 

491 # Get the canonical time-zone code 

492 zone = get_global('zone_aliases').get(zone, zone) 

493 

494 info = locale.time_zones.get(zone, {}) 

495 

496 # Otherwise, if there is only one timezone for the country, return the 

497 # localized country name 

498 region_format = locale.zone_formats['region'] 

499 territory = get_global('zone_territories').get(zone) 

500 if territory not in locale.territories: 

501 territory = 'ZZ' # invalid/unknown 

502 territory_name = locale.territories[territory] 

503 if not return_city and territory and len(get_global('territory_zones').get(territory, [])) == 1: 

504 return region_format % territory_name 

505 

506 # Otherwise, include the city in the output 

507 fallback_format = locale.zone_formats['fallback'] 

508 if 'city' in info: 

509 city_name = info['city'] 

510 else: 

511 metazone = get_global('meta_zones').get(zone) 

512 metazone_info = locale.meta_zones.get(metazone, {}) 

513 if 'city' in metazone_info: 

514 city_name = metazone_info['city'] 

515 elif '/' in zone: 

516 city_name = zone.split('/', 1)[1].replace('_', ' ') 

517 else: 

518 city_name = zone.replace('_', ' ') 

519 

520 if return_city: 

521 return city_name 

522 return region_format % (fallback_format % { 

523 '0': city_name, 

524 '1': territory_name 

525 }) 

526 

527 

528def get_timezone_name( 

529 dt_or_tzinfo: _DtOrTzinfo = None, 

530 width: Literal['long', 'short'] = 'long', 

531 uncommon: bool = False, 

532 locale: Locale | str | None = LC_TIME, 

533 zone_variant: Literal['generic', 'daylight', 'standard'] | None = None, 

534 return_zone: bool = False, 

535) -> str: 

536 r"""Return the localized display name for the given timezone. The timezone 

537 may be specified using a ``datetime`` or `tzinfo` object. 

538 

539 >>> from datetime import time 

540 >>> dt = time(15, 30, tzinfo=get_timezone('America/Los_Angeles')) 

541 >>> get_timezone_name(dt, locale='en_US') # doctest: +SKIP 

542 u'Pacific Standard Time' 

543 >>> get_timezone_name(dt, locale='en_US', return_zone=True) 

544 'America/Los_Angeles' 

545 >>> get_timezone_name(dt, width='short', locale='en_US') # doctest: +SKIP 

546 u'PST' 

547 

548 If this function gets passed only a `tzinfo` object and no concrete 

549 `datetime`, the returned display name is independent of daylight savings 

550 time. This can be used for example for selecting timezones, or to set the 

551 time of events that recur across DST changes: 

552 

553 >>> tz = get_timezone('America/Los_Angeles') 

554 >>> get_timezone_name(tz, locale='en_US') 

555 u'Pacific Time' 

556 >>> get_timezone_name(tz, 'short', locale='en_US') 

557 u'PT' 

558 

559 If no localized display name for the timezone is available, and the timezone 

560 is associated with a country that uses only a single timezone, the name of 

561 that country is returned, formatted according to the locale: 

562 

563 >>> tz = get_timezone('Europe/Berlin') 

564 >>> get_timezone_name(tz, locale='de_DE') 

565 u'Mitteleurop\xe4ische Zeit' 

566 >>> get_timezone_name(tz, locale='pt_BR') 

567 u'Hor\xe1rio da Europa Central' 

568 

569 On the other hand, if the country uses multiple timezones, the city is also 

570 included in the representation: 

571 

572 >>> tz = get_timezone('America/St_Johns') 

573 >>> get_timezone_name(tz, locale='de_DE') 

574 u'Neufundland-Zeit' 

575 

576 Note that short format is currently not supported for all timezones and 

577 all locales. This is partially because not every timezone has a short 

578 code in every locale. In that case it currently falls back to the long 

579 format. 

580 

581 For more information see `LDML Appendix J: Time Zone Display Names 

582 <https://www.unicode.org/reports/tr35/#Time_Zone_Fallback>`_ 

583 

584 .. versionadded:: 0.9 

585 

586 .. versionchanged:: 1.0 

587 Added `zone_variant` support. 

588 

589 :param dt_or_tzinfo: the ``datetime`` or ``tzinfo`` object that determines 

590 the timezone; if a ``tzinfo`` object is used, the 

591 resulting display name will be generic, i.e. 

592 independent of daylight savings time; if `None`, the 

593 current date in UTC is assumed 

594 :param width: either "long" or "short" 

595 :param uncommon: deprecated and ignored 

596 :param zone_variant: defines the zone variation to return. By default the 

597 variation is defined from the datetime object 

598 passed in. If no datetime object is passed in, the 

599 ``'generic'`` variation is assumed. The following 

600 values are valid: ``'generic'``, ``'daylight'`` and 

601 ``'standard'``. 

602 :param locale: the `Locale` object, or a locale string 

603 :param return_zone: True or False. If true then function 

604 returns long time zone ID 

605 """ 

606 dt, tzinfo = _get_dt_and_tzinfo(dt_or_tzinfo) 

607 locale = Locale.parse(locale) 

608 

609 zone = _get_tz_name(dt_or_tzinfo) 

610 

611 if zone_variant is None: 

612 if dt is None: 

613 zone_variant = 'generic' 

614 else: 

615 dst = tzinfo.dst(dt) 

616 zone_variant = "daylight" if dst else "standard" 

617 else: 

618 if zone_variant not in ('generic', 'standard', 'daylight'): 

619 raise ValueError('Invalid zone variation') 

620 

621 # Get the canonical time-zone code 

622 zone = get_global('zone_aliases').get(zone, zone) 

623 if return_zone: 

624 return zone 

625 info = locale.time_zones.get(zone, {}) 

626 # Try explicitly translated zone names first 

627 if width in info and zone_variant in info[width]: 

628 return info[width][zone_variant] 

629 

630 metazone = get_global('meta_zones').get(zone) 

631 if metazone: 

632 metazone_info = locale.meta_zones.get(metazone, {}) 

633 if width in metazone_info: 

634 name = metazone_info[width].get(zone_variant) 

635 if width == 'short' and name == NO_INHERITANCE_MARKER: 

636 # If the short form is marked no-inheritance, 

637 # try to fall back to the long name instead. 

638 name = metazone_info.get('long', {}).get(zone_variant) 

639 if name: 

640 return name 

641 

642 # If we have a concrete datetime, we assume that the result can't be 

643 # independent of daylight savings time, so we return the GMT offset 

644 if dt is not None: 

645 return get_timezone_gmt(dt, width=width, locale=locale) 

646 

647 return get_timezone_location(dt_or_tzinfo, locale=locale) 

648 

649 

650def format_date( 

651 date: datetime.date | None = None, 

652 format: _PredefinedTimeFormat | str = 'medium', 

653 locale: Locale | str | None = LC_TIME, 

654) -> str: 

655 """Return a date formatted according to the given pattern. 

656 

657 >>> from datetime import date 

658 >>> d = date(2007, 4, 1) 

659 >>> format_date(d, locale='en_US') 

660 u'Apr 1, 2007' 

661 >>> format_date(d, format='full', locale='de_DE') 

662 u'Sonntag, 1. April 2007' 

663 

664 If you don't want to use the locale default formats, you can specify a 

665 custom date pattern: 

666 

667 >>> format_date(d, "EEE, MMM d, ''yy", locale='en') 

668 u"Sun, Apr 1, '07" 

669 

670 :param date: the ``date`` or ``datetime`` object; if `None`, the current 

671 date is used 

672 :param format: one of "full", "long", "medium", or "short", or a custom 

673 date/time pattern 

674 :param locale: a `Locale` object or a locale identifier 

675 """ 

676 if date is None: 

677 date = datetime.date.today() 

678 elif isinstance(date, datetime.datetime): 

679 date = date.date() 

680 

681 locale = Locale.parse(locale) 

682 if format in ('full', 'long', 'medium', 'short'): 

683 format = get_date_format(format, locale=locale) 

684 pattern = parse_pattern(format) 

685 return pattern.apply(date, locale) 

686 

687 

688def format_datetime( 

689 datetime: _Instant = None, 

690 format: _PredefinedTimeFormat | str = 'medium', 

691 tzinfo: datetime.tzinfo | None = None, 

692 locale: Locale | str | None = LC_TIME, 

693) -> str: 

694 r"""Return a date formatted according to the given pattern. 

695 

696 >>> from datetime import datetime 

697 >>> dt = datetime(2007, 4, 1, 15, 30) 

698 >>> format_datetime(dt, locale='en_US') 

699 u'Apr 1, 2007, 3:30:00\u202fPM' 

700 

701 For any pattern requiring the display of the timezone: 

702 

703 >>> format_datetime(dt, 'full', tzinfo=get_timezone('Europe/Paris'), 

704 ... locale='fr_FR') 

705 'dimanche 1 avril 2007, 17:30:00 heure d’été d’Europe centrale' 

706 >>> format_datetime(dt, "yyyy.MM.dd G 'at' HH:mm:ss zzz", 

707 ... tzinfo=get_timezone('US/Eastern'), locale='en') 

708 u'2007.04.01 AD at 11:30:00 EDT' 

709 

710 :param datetime: the `datetime` object; if `None`, the current date and 

711 time is used 

712 :param format: one of "full", "long", "medium", or "short", or a custom 

713 date/time pattern 

714 :param tzinfo: the timezone to apply to the time for display 

715 :param locale: a `Locale` object or a locale identifier 

716 """ 

717 datetime = _ensure_datetime_tzinfo(_get_datetime(datetime), tzinfo) 

718 

719 locale = Locale.parse(locale) 

720 if format in ('full', 'long', 'medium', 'short'): 

721 return get_datetime_format(format, locale=locale) \ 

722 .replace("'", "") \ 

723 .replace('{0}', format_time(datetime, format, tzinfo=None, 

724 locale=locale)) \ 

725 .replace('{1}', format_date(datetime, format, locale=locale)) 

726 else: 

727 return parse_pattern(format).apply(datetime, locale) 

728 

729 

730def format_time( 

731 time: datetime.time | datetime.datetime | float | None = None, 

732 format: _PredefinedTimeFormat | str = 'medium', 

733 tzinfo: datetime.tzinfo | None = None, locale: Locale | str | None = LC_TIME, 

734) -> str: 

735 r"""Return a time formatted according to the given pattern. 

736 

737 >>> from datetime import datetime, time 

738 >>> t = time(15, 30) 

739 >>> format_time(t, locale='en_US') 

740 u'3:30:00\u202fPM' 

741 >>> format_time(t, format='short', locale='de_DE') 

742 u'15:30' 

743 

744 If you don't want to use the locale default formats, you can specify a 

745 custom time pattern: 

746 

747 >>> format_time(t, "hh 'o''clock' a", locale='en') 

748 u"03 o'clock PM" 

749 

750 For any pattern requiring the display of the time-zone a 

751 timezone has to be specified explicitly: 

752 

753 >>> t = datetime(2007, 4, 1, 15, 30) 

754 >>> tzinfo = get_timezone('Europe/Paris') 

755 >>> t = _localize(tzinfo, t) 

756 >>> format_time(t, format='full', tzinfo=tzinfo, locale='fr_FR') 

757 '15:30:00 heure d’été d’Europe centrale' 

758 >>> format_time(t, "hh 'o''clock' a, zzzz", tzinfo=get_timezone('US/Eastern'), 

759 ... locale='en') 

760 u"09 o'clock AM, Eastern Daylight Time" 

761 

762 As that example shows, when this function gets passed a 

763 ``datetime.datetime`` value, the actual time in the formatted string is 

764 adjusted to the timezone specified by the `tzinfo` parameter. If the 

765 ``datetime`` is "naive" (i.e. it has no associated timezone information), 

766 it is assumed to be in UTC. 

767 

768 These timezone calculations are **not** performed if the value is of type 

769 ``datetime.time``, as without date information there's no way to determine 

770 what a given time would translate to in a different timezone without 

771 information about whether daylight savings time is in effect or not. This 

772 means that time values are left as-is, and the value of the `tzinfo` 

773 parameter is only used to display the timezone name if needed: 

774 

775 >>> t = time(15, 30) 

776 >>> format_time(t, format='full', tzinfo=get_timezone('Europe/Paris'), 

777 ... locale='fr_FR') # doctest: +SKIP 

778 u'15:30:00 heure normale d\u2019Europe centrale' 

779 >>> format_time(t, format='full', tzinfo=get_timezone('US/Eastern'), 

780 ... locale='en_US') # doctest: +SKIP 

781 u'3:30:00\u202fPM Eastern Standard Time' 

782 

783 :param time: the ``time`` or ``datetime`` object; if `None`, the current 

784 time in UTC is used 

785 :param format: one of "full", "long", "medium", or "short", or a custom 

786 date/time pattern 

787 :param tzinfo: the time-zone to apply to the time for display 

788 :param locale: a `Locale` object or a locale identifier 

789 """ 

790 

791 # get reference date for if we need to find the right timezone variant 

792 # in the pattern 

793 ref_date = time.date() if isinstance(time, datetime.datetime) else None 

794 

795 time = _get_time(time, tzinfo) 

796 

797 locale = Locale.parse(locale) 

798 if format in ('full', 'long', 'medium', 'short'): 

799 format = get_time_format(format, locale=locale) 

800 return parse_pattern(format).apply(time, locale, reference_date=ref_date) 

801 

802 

803def format_skeleton( 

804 skeleton: str, 

805 datetime: _Instant = None, 

806 tzinfo: datetime.tzinfo | None = None, 

807 fuzzy: bool = True, 

808 locale: Locale | str | None = LC_TIME, 

809) -> str: 

810 r"""Return a time and/or date formatted according to the given pattern. 

811 

812 The skeletons are defined in the CLDR data and provide more flexibility 

813 than the simple short/long/medium formats, but are a bit harder to use. 

814 The are defined using the date/time symbols without order or punctuation 

815 and map to a suitable format for the given locale. 

816 

817 >>> from datetime import datetime 

818 >>> t = datetime(2007, 4, 1, 15, 30) 

819 >>> format_skeleton('MMMEd', t, locale='fr') 

820 u'dim. 1 avr.' 

821 >>> format_skeleton('MMMEd', t, locale='en') 

822 u'Sun, Apr 1' 

823 >>> format_skeleton('yMMd', t, locale='fi') # yMMd is not in the Finnish locale; yMd gets used 

824 u'1.4.2007' 

825 >>> format_skeleton('yMMd', t, fuzzy=False, locale='fi') # yMMd is not in the Finnish locale, an error is thrown 

826 Traceback (most recent call last): 

827 ... 

828 KeyError: yMMd 

829 

830 After the skeleton is resolved to a pattern `format_datetime` is called so 

831 all timezone processing etc is the same as for that. 

832 

833 :param skeleton: A date time skeleton as defined in the cldr data. 

834 :param datetime: the ``time`` or ``datetime`` object; if `None`, the current 

835 time in UTC is used 

836 :param tzinfo: the time-zone to apply to the time for display 

837 :param fuzzy: If the skeleton is not found, allow choosing a skeleton that's 

838 close enough to it. 

839 :param locale: a `Locale` object or a locale identifier 

840 """ 

841 locale = Locale.parse(locale) 

842 if fuzzy and skeleton not in locale.datetime_skeletons: 

843 skeleton = match_skeleton(skeleton, locale.datetime_skeletons) 

844 format = locale.datetime_skeletons[skeleton] 

845 return format_datetime(datetime, format, tzinfo, locale) 

846 

847 

848TIMEDELTA_UNITS: tuple[tuple[str, int], ...] = ( 

849 ('year', 3600 * 24 * 365), 

850 ('month', 3600 * 24 * 30), 

851 ('week', 3600 * 24 * 7), 

852 ('day', 3600 * 24), 

853 ('hour', 3600), 

854 ('minute', 60), 

855 ('second', 1) 

856) 

857 

858 

859def format_timedelta( 

860 delta: datetime.timedelta | int, 

861 granularity: Literal['year', 'month', 'week', 'day', 'hour', 'minute', 'second'] = 'second', 

862 threshold: float = .85, 

863 add_direction: bool = False, 

864 format: Literal['narrow', 'short', 'medium', 'long'] = 'long', 

865 locale: Locale | str | None = LC_TIME, 

866) -> str: 

867 """Return a time delta according to the rules of the given locale. 

868 

869 >>> from datetime import timedelta 

870 >>> format_timedelta(timedelta(weeks=12), locale='en_US') 

871 u'3 months' 

872 >>> format_timedelta(timedelta(seconds=1), locale='es') 

873 u'1 segundo' 

874 

875 The granularity parameter can be provided to alter the lowest unit 

876 presented, which defaults to a second. 

877 

878 >>> format_timedelta(timedelta(hours=3), granularity='day', locale='en_US') 

879 u'1 day' 

880 

881 The threshold parameter can be used to determine at which value the 

882 presentation switches to the next higher unit. A higher threshold factor 

883 means the presentation will switch later. For example: 

884 

885 >>> format_timedelta(timedelta(hours=23), threshold=0.9, locale='en_US') 

886 u'1 day' 

887 >>> format_timedelta(timedelta(hours=23), threshold=1.1, locale='en_US') 

888 u'23 hours' 

889 

890 In addition directional information can be provided that informs 

891 the user if the date is in the past or in the future: 

892 

893 >>> format_timedelta(timedelta(hours=1), add_direction=True, locale='en') 

894 u'in 1 hour' 

895 >>> format_timedelta(timedelta(hours=-1), add_direction=True, locale='en') 

896 u'1 hour ago' 

897 

898 The format parameter controls how compact or wide the presentation is: 

899 

900 >>> format_timedelta(timedelta(hours=3), format='short', locale='en') 

901 u'3 hr' 

902 >>> format_timedelta(timedelta(hours=3), format='narrow', locale='en') 

903 u'3h' 

904 

905 :param delta: a ``timedelta`` object representing the time difference to 

906 format, or the delta in seconds as an `int` value 

907 :param granularity: determines the smallest unit that should be displayed, 

908 the value can be one of "year", "month", "week", "day", 

909 "hour", "minute" or "second" 

910 :param threshold: factor that determines at which point the presentation 

911 switches to the next higher unit 

912 :param add_direction: if this flag is set to `True` the return value will 

913 include directional information. For instance a 

914 positive timedelta will include the information about 

915 it being in the future, a negative will be information 

916 about the value being in the past. 

917 :param format: the format, can be "narrow", "short" or "long". ( 

918 "medium" is deprecated, currently converted to "long" to 

919 maintain compatibility) 

920 :param locale: a `Locale` object or a locale identifier 

921 """ 

922 if format not in ('narrow', 'short', 'medium', 'long'): 

923 raise TypeError('Format must be one of "narrow", "short" or "long"') 

924 if format == 'medium': 

925 warnings.warn( 

926 '"medium" value for format param of format_timedelta' 

927 ' is deprecated. Use "long" instead', 

928 category=DeprecationWarning, 

929 stacklevel=2, 

930 ) 

931 format = 'long' 

932 if isinstance(delta, datetime.timedelta): 

933 seconds = int((delta.days * 86400) + delta.seconds) 

934 else: 

935 seconds = delta 

936 locale = Locale.parse(locale) 

937 

938 def _iter_patterns(a_unit): 

939 if add_direction: 

940 unit_rel_patterns = locale._data['date_fields'][a_unit] 

941 if seconds >= 0: 

942 yield unit_rel_patterns['future'] 

943 else: 

944 yield unit_rel_patterns['past'] 

945 a_unit = f"duration-{a_unit}" 

946 yield locale._data['unit_patterns'].get(a_unit, {}).get(format) 

947 

948 for unit, secs_per_unit in TIMEDELTA_UNITS: 

949 value = abs(seconds) / secs_per_unit 

950 if value >= threshold or unit == granularity: 

951 if unit == granularity and value > 0: 

952 value = max(1, value) 

953 value = int(round(value)) 

954 plural_form = locale.plural_form(value) 

955 pattern = None 

956 for patterns in _iter_patterns(unit): 

957 if patterns is not None: 

958 pattern = patterns.get(plural_form) or patterns.get('other') 

959 break 

960 # This really should not happen 

961 if pattern is None: 

962 return '' 

963 return pattern.replace('{0}', str(value)) 

964 

965 return '' 

966 

967 

968def _format_fallback_interval( 

969 start: _Instant, 

970 end: _Instant, 

971 skeleton: str | None, 

972 tzinfo: datetime.tzinfo | None, 

973 locale: Locale | str | None = LC_TIME, 

974) -> str: 

975 if skeleton in locale.datetime_skeletons: # Use the given skeleton 

976 format = lambda dt: format_skeleton(skeleton, dt, tzinfo, locale=locale) 

977 elif all((isinstance(d, datetime.date) and not isinstance(d, datetime.datetime)) for d in (start, end)): # Both are just dates 

978 format = lambda dt: format_date(dt, locale=locale) 

979 elif all((isinstance(d, datetime.time) and not isinstance(d, datetime.date)) for d in (start, end)): # Both are times 

980 format = lambda dt: format_time(dt, tzinfo=tzinfo, locale=locale) 

981 else: 

982 format = lambda dt: format_datetime(dt, tzinfo=tzinfo, locale=locale) 

983 

984 formatted_start = format(start) 

985 formatted_end = format(end) 

986 

987 if formatted_start == formatted_end: 

988 return format(start) 

989 

990 return ( 

991 locale.interval_formats.get(None, "{0}-{1}"). 

992 replace("{0}", formatted_start). 

993 replace("{1}", formatted_end) 

994 ) 

995 

996 

997def format_interval( 

998 start: _Instant, 

999 end: _Instant, 

1000 skeleton: str | None = None, 

1001 tzinfo: datetime.tzinfo | None = None, 

1002 fuzzy: bool = True, 

1003 locale: Locale | str | None = LC_TIME, 

1004) -> str: 

1005 """ 

1006 Format an interval between two instants according to the locale's rules. 

1007 

1008 >>> from datetime import date, time 

1009 >>> format_interval(date(2016, 1, 15), date(2016, 1, 17), "yMd", locale="fi") 

1010 u'15.\u201317.1.2016' 

1011 

1012 >>> format_interval(time(12, 12), time(16, 16), "Hm", locale="en_GB") 

1013 '12:12\u201316:16' 

1014 

1015 >>> format_interval(time(5, 12), time(16, 16), "hm", locale="en_US") 

1016 '5:12\u202fAM\u2009–\u20094:16\u202fPM' 

1017 

1018 >>> format_interval(time(16, 18), time(16, 24), "Hm", locale="it") 

1019 '16:18\u201316:24' 

1020 

1021 If the start instant equals the end instant, the interval is formatted like the instant. 

1022 

1023 >>> format_interval(time(16, 18), time(16, 18), "Hm", locale="it") 

1024 '16:18' 

1025 

1026 Unknown skeletons fall back to "default" formatting. 

1027 

1028 >>> format_interval(date(2015, 1, 1), date(2017, 1, 1), "wzq", locale="ja") 

1029 '2015/01/01\uff5e2017/01/01' 

1030 

1031 >>> format_interval(time(16, 18), time(16, 24), "xxx", locale="ja") 

1032 '16:18:00\uff5e16:24:00' 

1033 

1034 >>> format_interval(date(2016, 1, 15), date(2016, 1, 17), "xxx", locale="de") 

1035 '15.01.2016\u2009–\u200917.01.2016' 

1036 

1037 :param start: First instant (datetime/date/time) 

1038 :param end: Second instant (datetime/date/time) 

1039 :param skeleton: The "skeleton format" to use for formatting. 

1040 :param tzinfo: tzinfo to use (if none is already attached) 

1041 :param fuzzy: If the skeleton is not found, allow choosing a skeleton that's 

1042 close enough to it. 

1043 :param locale: A locale object or identifier. 

1044 :return: Formatted interval 

1045 """ 

1046 locale = Locale.parse(locale) 

1047 

1048 # NB: The quote comments below are from the algorithm description in 

1049 # https://www.unicode.org/reports/tr35/tr35-dates.html#intervalFormats 

1050 

1051 # > Look for the intervalFormatItem element that matches the "skeleton", 

1052 # > starting in the current locale and then following the locale fallback 

1053 # > chain up to, but not including root. 

1054 

1055 interval_formats = locale.interval_formats 

1056 

1057 if skeleton not in interval_formats or not skeleton: 

1058 # > If no match was found from the previous step, check what the closest 

1059 # > match is in the fallback locale chain, as in availableFormats. That 

1060 # > is, this allows for adjusting the string value field's width, 

1061 # > including adjusting between "MMM" and "MMMM", and using different 

1062 # > variants of the same field, such as 'v' and 'z'. 

1063 if skeleton and fuzzy: 

1064 skeleton = match_skeleton(skeleton, interval_formats) 

1065 else: 

1066 skeleton = None 

1067 if not skeleton: # Still no match whatsoever? 

1068 # > Otherwise, format the start and end datetime using the fallback pattern. 

1069 return _format_fallback_interval(start, end, skeleton, tzinfo, locale) 

1070 

1071 skel_formats = interval_formats[skeleton] 

1072 

1073 if start == end: 

1074 return format_skeleton(skeleton, start, tzinfo, fuzzy=fuzzy, locale=locale) 

1075 

1076 start = _ensure_datetime_tzinfo(_get_datetime(start), tzinfo=tzinfo) 

1077 end = _ensure_datetime_tzinfo(_get_datetime(end), tzinfo=tzinfo) 

1078 

1079 start_fmt = DateTimeFormat(start, locale=locale) 

1080 end_fmt = DateTimeFormat(end, locale=locale) 

1081 

1082 # > If a match is found from previous steps, compute the calendar field 

1083 # > with the greatest difference between start and end datetime. If there 

1084 # > is no difference among any of the fields in the pattern, format as a 

1085 # > single date using availableFormats, and return. 

1086 

1087 for field in PATTERN_CHAR_ORDER: # These are in largest-to-smallest order 

1088 if field in skel_formats and start_fmt.extract(field) != end_fmt.extract(field): 

1089 # > If there is a match, use the pieces of the corresponding pattern to 

1090 # > format the start and end datetime, as above. 

1091 return "".join( 

1092 parse_pattern(pattern).apply(instant, locale) 

1093 for pattern, instant 

1094 in zip(skel_formats[field], (start, end)) 

1095 ) 

1096 

1097 # > Otherwise, format the start and end datetime using the fallback pattern. 

1098 

1099 return _format_fallback_interval(start, end, skeleton, tzinfo, locale) 

1100 

1101 

1102def get_period_id( 

1103 time: _Instant, 

1104 tzinfo: datetime.tzinfo | None = None, 

1105 type: Literal['selection'] | None = None, 

1106 locale: Locale | str | None = LC_TIME, 

1107) -> str: 

1108 """ 

1109 Get the day period ID for a given time. 

1110 

1111 This ID can be used as a key for the period name dictionary. 

1112 

1113 >>> from datetime import time 

1114 >>> get_period_names(locale="de")[get_period_id(time(7, 42), locale="de")] 

1115 u'Morgen' 

1116 

1117 >>> get_period_id(time(0), locale="en_US") 

1118 u'midnight' 

1119 

1120 >>> get_period_id(time(0), type="selection", locale="en_US") 

1121 u'night1' 

1122 

1123 :param time: The time to inspect. 

1124 :param tzinfo: The timezone for the time. See ``format_time``. 

1125 :param type: The period type to use. Either "selection" or None. 

1126 The selection type is used for selecting among phrases such as 

1127 “Your email arrived yesterday evening” or “Your email arrived last night”. 

1128 :param locale: the `Locale` object, or a locale string 

1129 :return: period ID. Something is always returned -- even if it's just "am" or "pm". 

1130 """ 

1131 time = _get_time(time, tzinfo) 

1132 seconds_past_midnight = int(time.hour * 60 * 60 + time.minute * 60 + time.second) 

1133 locale = Locale.parse(locale) 

1134 

1135 # The LDML rules state that the rules may not overlap, so iterating in arbitrary 

1136 # order should be alright, though `at` periods should be preferred. 

1137 rulesets = locale.day_period_rules.get(type, {}).items() 

1138 

1139 for rule_id, rules in rulesets: 

1140 for rule in rules: 

1141 if "at" in rule and rule["at"] == seconds_past_midnight: 

1142 return rule_id 

1143 

1144 for rule_id, rules in rulesets: 

1145 for rule in rules: 

1146 if "from" in rule and "before" in rule: 

1147 if rule["from"] < rule["before"]: 

1148 if rule["from"] <= seconds_past_midnight < rule["before"]: 

1149 return rule_id 

1150 else: 

1151 # e.g. from="21:00" before="06:00" 

1152 if rule["from"] <= seconds_past_midnight < 86400 or \ 

1153 0 <= seconds_past_midnight < rule["before"]: 

1154 return rule_id 

1155 

1156 start_ok = end_ok = False 

1157 

1158 if "from" in rule and seconds_past_midnight >= rule["from"]: 

1159 start_ok = True 

1160 if "to" in rule and seconds_past_midnight <= rule["to"]: 

1161 # This rule type does not exist in the present CLDR data; 

1162 # excuse the lack of test coverage. 

1163 end_ok = True 

1164 if "before" in rule and seconds_past_midnight < rule["before"]: 

1165 end_ok = True 

1166 if "after" in rule: 

1167 raise NotImplementedError("'after' is deprecated as of CLDR 29.") 

1168 

1169 if start_ok and end_ok: 

1170 return rule_id 

1171 

1172 if seconds_past_midnight < 43200: 

1173 return "am" 

1174 else: 

1175 return "pm" 

1176 

1177 

1178class ParseError(ValueError): 

1179 pass 

1180 

1181 

1182def parse_date( 

1183 string: str, 

1184 locale: Locale | str | None = LC_TIME, 

1185 format: _PredefinedTimeFormat = 'medium', 

1186) -> datetime.date: 

1187 """Parse a date from a string. 

1188 

1189 This function first tries to interpret the string as ISO-8601 

1190 date format, then uses the date format for the locale as a hint to 

1191 determine the order in which the date fields appear in the string. 

1192 

1193 >>> parse_date('4/1/04', locale='en_US') 

1194 datetime.date(2004, 4, 1) 

1195 >>> parse_date('01.04.2004', locale='de_DE') 

1196 datetime.date(2004, 4, 1) 

1197 >>> parse_date('2004-04-01', locale='en_US') 

1198 datetime.date(2004, 4, 1) 

1199 >>> parse_date('2004-04-01', locale='de_DE') 

1200 datetime.date(2004, 4, 1) 

1201 

1202 :param string: the string containing the date 

1203 :param locale: a `Locale` object or a locale identifier 

1204 :param format: the format to use (see ``get_date_format``) 

1205 """ 

1206 numbers = re.findall(r'(\d+)', string) 

1207 if not numbers: 

1208 raise ParseError("No numbers were found in input") 

1209 

1210 # we try ISO-8601 format first, meaning similar to formats 

1211 # extended YYYY-MM-DD or basic YYYYMMDD 

1212 iso_alike = re.match(r'^(\d{4})-?([01]\d)-?([0-3]\d)$', 

1213 string, flags=re.ASCII) # allow only ASCII digits 

1214 if iso_alike: 

1215 try: 

1216 return datetime.date(*map(int, iso_alike.groups())) 

1217 except ValueError: 

1218 pass # a locale format might fit better, so let's continue 

1219 

1220 format_str = get_date_format(format=format, locale=locale).pattern.lower() 

1221 year_idx = format_str.index('y') 

1222 month_idx = format_str.index('m') 

1223 if month_idx < 0: 

1224 month_idx = format_str.index('l') 

1225 day_idx = format_str.index('d') 

1226 

1227 indexes = sorted([(year_idx, 'Y'), (month_idx, 'M'), (day_idx, 'D')]) 

1228 indexes = {item[1]: idx for idx, item in enumerate(indexes)} 

1229 

1230 # FIXME: this currently only supports numbers, but should also support month 

1231 # names, both in the requested locale, and english 

1232 

1233 year = numbers[indexes['Y']] 

1234 year = 2000 + int(year) if len(year) == 2 else int(year) 

1235 month = int(numbers[indexes['M']]) 

1236 day = int(numbers[indexes['D']]) 

1237 if month > 12: 

1238 month, day = day, month 

1239 return datetime.date(year, month, day) 

1240 

1241 

1242def parse_time( 

1243 string: str, 

1244 locale: Locale | str | None = LC_TIME, 

1245 format: _PredefinedTimeFormat = 'medium', 

1246) -> datetime.time: 

1247 """Parse a time from a string. 

1248 

1249 This function uses the time format for the locale as a hint to determine 

1250 the order in which the time fields appear in the string. 

1251 

1252 >>> parse_time('15:30:00', locale='en_US') 

1253 datetime.time(15, 30) 

1254 

1255 :param string: the string containing the time 

1256 :param locale: a `Locale` object or a locale identifier 

1257 :param format: the format to use (see ``get_time_format``) 

1258 :return: the parsed time 

1259 :rtype: `time` 

1260 """ 

1261 numbers = re.findall(r'(\d+)', string) 

1262 if not numbers: 

1263 raise ParseError("No numbers were found in input") 

1264 

1265 # TODO: try ISO format first? 

1266 format_str = get_time_format(format=format, locale=locale).pattern.lower() 

1267 hour_idx = format_str.index('h') 

1268 if hour_idx < 0: 

1269 hour_idx = format_str.index('k') 

1270 min_idx = format_str.index('m') 

1271 sec_idx = format_str.index('s') 

1272 

1273 indexes = sorted([(hour_idx, 'H'), (min_idx, 'M'), (sec_idx, 'S')]) 

1274 indexes = {item[1]: idx for idx, item in enumerate(indexes)} 

1275 

1276 # TODO: support time zones 

1277 

1278 # Check if the format specifies a period to be used; 

1279 # if it does, look for 'pm' to figure out an offset. 

1280 hour_offset = 0 

1281 if 'a' in format_str and 'pm' in string.lower(): 

1282 hour_offset = 12 

1283 

1284 # Parse up to three numbers from the string. 

1285 minute = second = 0 

1286 hour = int(numbers[indexes['H']]) + hour_offset 

1287 if len(numbers) > 1: 

1288 minute = int(numbers[indexes['M']]) 

1289 if len(numbers) > 2: 

1290 second = int(numbers[indexes['S']]) 

1291 return datetime.time(hour, minute, second) 

1292 

1293 

1294class DateTimePattern: 

1295 

1296 def __init__(self, pattern: str, format: DateTimeFormat): 

1297 self.pattern = pattern 

1298 self.format = format 

1299 

1300 def __repr__(self) -> str: 

1301 return f"<{type(self).__name__} {self.pattern!r}>" 

1302 

1303 def __str__(self) -> str: 

1304 pat = self.pattern 

1305 return pat 

1306 

1307 def __mod__(self, other: DateTimeFormat) -> str: 

1308 if not isinstance(other, DateTimeFormat): 

1309 return NotImplemented 

1310 return self.format % other 

1311 

1312 def apply( 

1313 self, 

1314 datetime: datetime.date | datetime.time, 

1315 locale: Locale | str | None, 

1316 reference_date: datetime.date | None = None, 

1317 ) -> str: 

1318 return self % DateTimeFormat(datetime, locale, reference_date) 

1319 

1320 

1321class DateTimeFormat: 

1322 

1323 def __init__( 

1324 self, 

1325 value: datetime.date | datetime.time, 

1326 locale: Locale | str, 

1327 reference_date: datetime.date | None = None 

1328 ) -> None: 

1329 assert isinstance(value, (datetime.date, datetime.datetime, datetime.time)) 

1330 if isinstance(value, (datetime.datetime, datetime.time)) and value.tzinfo is None: 

1331 value = value.replace(tzinfo=UTC) 

1332 self.value = value 

1333 self.locale = Locale.parse(locale) 

1334 self.reference_date = reference_date 

1335 

1336 def __getitem__(self, name: str) -> str: 

1337 char = name[0] 

1338 num = len(name) 

1339 if char == 'G': 

1340 return self.format_era(char, num) 

1341 elif char in ('y', 'Y', 'u'): 

1342 return self.format_year(char, num) 

1343 elif char in ('Q', 'q'): 

1344 return self.format_quarter(char, num) 

1345 elif char in ('M', 'L'): 

1346 return self.format_month(char, num) 

1347 elif char in ('w', 'W'): 

1348 return self.format_week(char, num) 

1349 elif char == 'd': 

1350 return self.format(self.value.day, num) 

1351 elif char == 'D': 

1352 return self.format_day_of_year(num) 

1353 elif char == 'F': 

1354 return self.format_day_of_week_in_month() 

1355 elif char in ('E', 'e', 'c'): 

1356 return self.format_weekday(char, num) 

1357 elif char in ('a', 'b', 'B'): 

1358 return self.format_period(char, num) 

1359 elif char == 'h': 

1360 if self.value.hour % 12 == 0: 

1361 return self.format(12, num) 

1362 else: 

1363 return self.format(self.value.hour % 12, num) 

1364 elif char == 'H': 

1365 return self.format(self.value.hour, num) 

1366 elif char == 'K': 

1367 return self.format(self.value.hour % 12, num) 

1368 elif char == 'k': 

1369 if self.value.hour == 0: 

1370 return self.format(24, num) 

1371 else: 

1372 return self.format(self.value.hour, num) 

1373 elif char == 'm': 

1374 return self.format(self.value.minute, num) 

1375 elif char == 's': 

1376 return self.format(self.value.second, num) 

1377 elif char == 'S': 

1378 return self.format_frac_seconds(num) 

1379 elif char == 'A': 

1380 return self.format_milliseconds_in_day(num) 

1381 elif char in ('z', 'Z', 'v', 'V', 'x', 'X', 'O'): 

1382 return self.format_timezone(char, num) 

1383 else: 

1384 raise KeyError(f"Unsupported date/time field {char!r}") 

1385 

1386 def extract(self, char: str) -> int: 

1387 char = str(char)[0] 

1388 if char == 'y': 

1389 return self.value.year 

1390 elif char == 'M': 

1391 return self.value.month 

1392 elif char == 'd': 

1393 return self.value.day 

1394 elif char == 'H': 

1395 return self.value.hour 

1396 elif char == 'h': 

1397 return self.value.hour % 12 or 12 

1398 elif char == 'm': 

1399 return self.value.minute 

1400 elif char == 'a': 

1401 return int(self.value.hour >= 12) # 0 for am, 1 for pm 

1402 else: 

1403 raise NotImplementedError(f"Not implemented: extracting {char!r} from {self.value!r}") 

1404 

1405 def format_era(self, char: str, num: int) -> str: 

1406 width = {3: 'abbreviated', 4: 'wide', 5: 'narrow'}[max(3, num)] 

1407 era = int(self.value.year >= 0) 

1408 return get_era_names(width, self.locale)[era] 

1409 

1410 def format_year(self, char: str, num: int) -> str: 

1411 value = self.value.year 

1412 if char.isupper(): 

1413 value = self.value.isocalendar()[0] 

1414 year = self.format(value, num) 

1415 if num == 2: 

1416 year = year[-2:] 

1417 return year 

1418 

1419 def format_quarter(self, char: str, num: int) -> str: 

1420 quarter = (self.value.month - 1) // 3 + 1 

1421 if num <= 2: 

1422 return '%0*d' % (num, quarter) 

1423 width = {3: 'abbreviated', 4: 'wide', 5: 'narrow'}[num] 

1424 context = {'Q': 'format', 'q': 'stand-alone'}[char] 

1425 return get_quarter_names(width, context, self.locale)[quarter] 

1426 

1427 def format_month(self, char: str, num: int) -> str: 

1428 if num <= 2: 

1429 return '%0*d' % (num, self.value.month) 

1430 width = {3: 'abbreviated', 4: 'wide', 5: 'narrow'}[num] 

1431 context = {'M': 'format', 'L': 'stand-alone'}[char] 

1432 return get_month_names(width, context, self.locale)[self.value.month] 

1433 

1434 def format_week(self, char: str, num: int) -> str: 

1435 if char.islower(): # week of year 

1436 day_of_year = self.get_day_of_year() 

1437 week = self.get_week_number(day_of_year) 

1438 if week == 0: 

1439 date = self.value - datetime.timedelta(days=day_of_year) 

1440 week = self.get_week_number(self.get_day_of_year(date), 

1441 date.weekday()) 

1442 return self.format(week, num) 

1443 else: # week of month 

1444 week = self.get_week_number(self.value.day) 

1445 if week == 0: 

1446 date = self.value - datetime.timedelta(days=self.value.day) 

1447 week = self.get_week_number(date.day, date.weekday()) 

1448 return str(week) 

1449 

1450 def format_weekday(self, char: str = 'E', num: int = 4) -> str: 

1451 """ 

1452 Return weekday from parsed datetime according to format pattern. 

1453 

1454 >>> from datetime import date 

1455 >>> format = DateTimeFormat(date(2016, 2, 28), Locale.parse('en_US')) 

1456 >>> format.format_weekday() 

1457 u'Sunday' 

1458 

1459 'E': Day of week - Use one through three letters for the abbreviated day name, four for the full (wide) name, 

1460 five for the narrow name, or six for the short name. 

1461 >>> format.format_weekday('E',2) 

1462 u'Sun' 

1463 

1464 'e': Local day of week. Same as E except adds a numeric value that will depend on the local starting day of the 

1465 week, using one or two letters. For this example, Monday is the first day of the week. 

1466 >>> format.format_weekday('e',2) 

1467 '01' 

1468 

1469 'c': Stand-Alone local day of week - Use one letter for the local numeric value (same as 'e'), three for the 

1470 abbreviated day name, four for the full (wide) name, five for the narrow name, or six for the short name. 

1471 >>> format.format_weekday('c',1) 

1472 '1' 

1473 

1474 :param char: pattern format character ('e','E','c') 

1475 :param num: count of format character 

1476 

1477 """ 

1478 if num < 3: 

1479 if char.islower(): 

1480 value = 7 - self.locale.first_week_day + self.value.weekday() 

1481 return self.format(value % 7 + 1, num) 

1482 num = 3 

1483 weekday = self.value.weekday() 

1484 width = {3: 'abbreviated', 4: 'wide', 5: 'narrow', 6: 'short'}[num] 

1485 context = "stand-alone" if char == "c" else "format" 

1486 return get_day_names(width, context, self.locale)[weekday] 

1487 

1488 def format_day_of_year(self, num: int) -> str: 

1489 return self.format(self.get_day_of_year(), num) 

1490 

1491 def format_day_of_week_in_month(self) -> str: 

1492 return str((self.value.day - 1) // 7 + 1) 

1493 

1494 def format_period(self, char: str, num: int) -> str: 

1495 """ 

1496 Return period from parsed datetime according to format pattern. 

1497 

1498 >>> from datetime import datetime, time 

1499 >>> format = DateTimeFormat(time(13, 42), 'fi_FI') 

1500 >>> format.format_period('a', 1) 

1501 u'ip.' 

1502 >>> format.format_period('b', 1) 

1503 u'iltap.' 

1504 >>> format.format_period('b', 4) 

1505 u'iltapäivä' 

1506 >>> format.format_period('B', 4) 

1507 u'iltapäivällä' 

1508 >>> format.format_period('B', 5) 

1509 u'ip.' 

1510 

1511 >>> format = DateTimeFormat(datetime(2022, 4, 28, 6, 27), 'zh_Hant') 

1512 >>> format.format_period('a', 1) 

1513 u'上午' 

1514 >>> format.format_period('b', 1) 

1515 u'清晨' 

1516 >>> format.format_period('B', 1) 

1517 u'清晨' 

1518 

1519 :param char: pattern format character ('a', 'b', 'B') 

1520 :param num: count of format character 

1521 

1522 """ 

1523 widths = [{3: 'abbreviated', 4: 'wide', 5: 'narrow'}[max(3, num)], 

1524 'wide', 'narrow', 'abbreviated'] 

1525 if char == 'a': 

1526 period = 'pm' if self.value.hour >= 12 else 'am' 

1527 context = 'format' 

1528 else: 

1529 period = get_period_id(self.value, locale=self.locale) 

1530 context = 'format' if char == 'B' else 'stand-alone' 

1531 for width in widths: 

1532 period_names = get_period_names(context=context, width=width, locale=self.locale) 

1533 if period in period_names: 

1534 return period_names[period] 

1535 raise ValueError(f"Could not format period {period} in {self.locale}") 

1536 

1537 def format_frac_seconds(self, num: int) -> str: 

1538 """ Return fractional seconds. 

1539 

1540 Rounds the time's microseconds to the precision given by the number \ 

1541 of digits passed in. 

1542 """ 

1543 value = self.value.microsecond / 1000000 

1544 return self.format(round(value, num) * 10**num, num) 

1545 

1546 def format_milliseconds_in_day(self, num): 

1547 msecs = self.value.microsecond // 1000 + self.value.second * 1000 + \ 

1548 self.value.minute * 60000 + self.value.hour * 3600000 

1549 return self.format(msecs, num) 

1550 

1551 def format_timezone(self, char: str, num: int) -> str: 

1552 width = {3: 'short', 4: 'long', 5: 'iso8601'}[max(3, num)] 

1553 

1554 # It could be that we only receive a time to format, but also have a 

1555 # reference date which is important to distinguish between timezone 

1556 # variants (summer/standard time) 

1557 value = self.value 

1558 if self.reference_date: 

1559 value = datetime.datetime.combine(self.reference_date, self.value) 

1560 

1561 if char == 'z': 

1562 return get_timezone_name(value, width, locale=self.locale) 

1563 elif char == 'Z': 

1564 if num == 5: 

1565 return get_timezone_gmt(value, width, locale=self.locale, return_z=True) 

1566 return get_timezone_gmt(value, width, locale=self.locale) 

1567 elif char == 'O': 

1568 if num == 4: 

1569 return get_timezone_gmt(value, width, locale=self.locale) 

1570 # TODO: To add support for O:1 

1571 elif char == 'v': 

1572 return get_timezone_name(value.tzinfo, width, 

1573 locale=self.locale) 

1574 elif char == 'V': 

1575 if num == 1: 

1576 return get_timezone_name(value.tzinfo, width, 

1577 uncommon=True, locale=self.locale) 

1578 elif num == 2: 

1579 return get_timezone_name(value.tzinfo, locale=self.locale, return_zone=True) 

1580 elif num == 3: 

1581 return get_timezone_location(value.tzinfo, locale=self.locale, return_city=True) 

1582 return get_timezone_location(value.tzinfo, locale=self.locale) 

1583 # Included additional elif condition to add support for 'Xx' in timezone format 

1584 elif char == 'X': 

1585 if num == 1: 

1586 return get_timezone_gmt(value, width='iso8601_short', locale=self.locale, 

1587 return_z=True) 

1588 elif num in (2, 4): 

1589 return get_timezone_gmt(value, width='short', locale=self.locale, 

1590 return_z=True) 

1591 elif num in (3, 5): 

1592 return get_timezone_gmt(value, width='iso8601', locale=self.locale, 

1593 return_z=True) 

1594 elif char == 'x': 

1595 if num == 1: 

1596 return get_timezone_gmt(value, width='iso8601_short', locale=self.locale) 

1597 elif num in (2, 4): 

1598 return get_timezone_gmt(value, width='short', locale=self.locale) 

1599 elif num in (3, 5): 

1600 return get_timezone_gmt(value, width='iso8601', locale=self.locale) 

1601 

1602 def format(self, value: SupportsInt, length: int) -> str: 

1603 return '%0*d' % (length, value) 

1604 

1605 def get_day_of_year(self, date: datetime.date | None = None) -> int: 

1606 if date is None: 

1607 date = self.value 

1608 return (date - date.replace(month=1, day=1)).days + 1 

1609 

1610 def get_week_number(self, day_of_period: int, day_of_week: int | None = None) -> int: 

1611 """Return the number of the week of a day within a period. This may be 

1612 the week number in a year or the week number in a month. 

1613 

1614 Usually this will return a value equal to or greater than 1, but if the 

1615 first week of the period is so short that it actually counts as the last 

1616 week of the previous period, this function will return 0. 

1617 

1618 >>> date = datetime.date(2006, 1, 8) 

1619 >>> DateTimeFormat(date, 'de_DE').get_week_number(6) 

1620 1 

1621 >>> DateTimeFormat(date, 'en_US').get_week_number(6) 

1622 2 

1623 

1624 :param day_of_period: the number of the day in the period (usually 

1625 either the day of month or the day of year) 

1626 :param day_of_week: the week day; if omitted, the week day of the 

1627 current date is assumed 

1628 """ 

1629 if day_of_week is None: 

1630 day_of_week = self.value.weekday() 

1631 first_day = (day_of_week - self.locale.first_week_day - 

1632 day_of_period + 1) % 7 

1633 if first_day < 0: 

1634 first_day += 7 

1635 week_number = (day_of_period + first_day - 1) // 7 

1636 

1637 if 7 - first_day >= self.locale.min_week_days: 

1638 week_number += 1 

1639 

1640 if self.locale.first_week_day == 0: 

1641 # Correct the weeknumber in case of iso-calendar usage (first_week_day=0). 

1642 # If the weeknumber exceeds the maximum number of weeks for the given year 

1643 # we must count from zero.For example the above calculation gives week 53 

1644 # for 2018-12-31. By iso-calender definition 2018 has a max of 52 

1645 # weeks, thus the weeknumber must be 53-52=1. 

1646 max_weeks = datetime.date(year=self.value.year, day=28, month=12).isocalendar()[1] 

1647 if week_number > max_weeks: 

1648 week_number -= max_weeks 

1649 

1650 return week_number 

1651 

1652 

1653PATTERN_CHARS: dict[str, list[int] | None] = { 

1654 'G': [1, 2, 3, 4, 5], # era 

1655 'y': None, 'Y': None, 'u': None, # year 

1656 'Q': [1, 2, 3, 4, 5], 'q': [1, 2, 3, 4, 5], # quarter 

1657 'M': [1, 2, 3, 4, 5], 'L': [1, 2, 3, 4, 5], # month 

1658 'w': [1, 2], 'W': [1], # week 

1659 'd': [1, 2], 'D': [1, 2, 3], 'F': [1], 'g': None, # day 

1660 'E': [1, 2, 3, 4, 5, 6], 'e': [1, 2, 3, 4, 5, 6], 'c': [1, 3, 4, 5, 6], # week day 

1661 'a': [1, 2, 3, 4, 5], 'b': [1, 2, 3, 4, 5], 'B': [1, 2, 3, 4, 5], # period 

1662 'h': [1, 2], 'H': [1, 2], 'K': [1, 2], 'k': [1, 2], # hour 

1663 'm': [1, 2], # minute 

1664 's': [1, 2], 'S': None, 'A': None, # second 

1665 'z': [1, 2, 3, 4], 'Z': [1, 2, 3, 4, 5], 'O': [1, 4], 'v': [1, 4], # zone 

1666 'V': [1, 2, 3, 4], 'x': [1, 2, 3, 4, 5], 'X': [1, 2, 3, 4, 5] # zone 

1667} 

1668 

1669#: The pattern characters declared in the Date Field Symbol Table 

1670#: (https://www.unicode.org/reports/tr35/tr35-dates.html#Date_Field_Symbol_Table) 

1671#: in order of decreasing magnitude. 

1672PATTERN_CHAR_ORDER = "GyYuUQqMLlwWdDFgEecabBChHKkjJmsSAzZOvVXx" 

1673 

1674 

1675def parse_pattern(pattern: str | DateTimePattern) -> DateTimePattern: 

1676 """Parse date, time, and datetime format patterns. 

1677 

1678 >>> parse_pattern("MMMMd").format 

1679 u'%(MMMM)s%(d)s' 

1680 >>> parse_pattern("MMM d, yyyy").format 

1681 u'%(MMM)s %(d)s, %(yyyy)s' 

1682 

1683 Pattern can contain literal strings in single quotes: 

1684 

1685 >>> parse_pattern("H:mm' Uhr 'z").format 

1686 u'%(H)s:%(mm)s Uhr %(z)s' 

1687 

1688 An actual single quote can be used by using two adjacent single quote 

1689 characters: 

1690 

1691 >>> parse_pattern("hh' o''clock'").format 

1692 u"%(hh)s o'clock" 

1693 

1694 :param pattern: the formatting pattern to parse 

1695 """ 

1696 if isinstance(pattern, DateTimePattern): 

1697 return pattern 

1698 return _cached_parse_pattern(pattern) 

1699 

1700 

1701@lru_cache(maxsize=1024) 

1702def _cached_parse_pattern(pattern: str) -> DateTimePattern: 

1703 result = [] 

1704 

1705 for tok_type, tok_value in tokenize_pattern(pattern): 

1706 if tok_type == "chars": 

1707 result.append(tok_value.replace('%', '%%')) 

1708 elif tok_type == "field": 

1709 fieldchar, fieldnum = tok_value 

1710 limit = PATTERN_CHARS[fieldchar] 

1711 if limit and fieldnum not in limit: 

1712 raise ValueError(f"Invalid length for field: {fieldchar * fieldnum!r}") 

1713 result.append('%%(%s)s' % (fieldchar * fieldnum)) 

1714 else: 

1715 raise NotImplementedError(f"Unknown token type: {tok_type}") 

1716 return DateTimePattern(pattern, ''.join(result)) 

1717 

1718 

1719def tokenize_pattern(pattern: str) -> list[tuple[str, str | tuple[str, int]]]: 

1720 """ 

1721 Tokenize date format patterns. 

1722 

1723 Returns a list of (token_type, token_value) tuples. 

1724 

1725 ``token_type`` may be either "chars" or "field". 

1726 

1727 For "chars" tokens, the value is the literal value. 

1728 

1729 For "field" tokens, the value is a tuple of (field character, repetition count). 

1730 

1731 :param pattern: Pattern string 

1732 :type pattern: str 

1733 :rtype: list[tuple] 

1734 """ 

1735 result = [] 

1736 quotebuf = None 

1737 charbuf = [] 

1738 fieldchar = [''] 

1739 fieldnum = [0] 

1740 

1741 def append_chars(): 

1742 result.append(('chars', ''.join(charbuf).replace('\0', "'"))) 

1743 del charbuf[:] 

1744 

1745 def append_field(): 

1746 result.append(('field', (fieldchar[0], fieldnum[0]))) 

1747 fieldchar[0] = '' 

1748 fieldnum[0] = 0 

1749 

1750 for char in pattern.replace("''", '\0'): 

1751 if quotebuf is None: 

1752 if char == "'": # quote started 

1753 if fieldchar[0]: 

1754 append_field() 

1755 elif charbuf: 

1756 append_chars() 

1757 quotebuf = [] 

1758 elif char in PATTERN_CHARS: 

1759 if charbuf: 

1760 append_chars() 

1761 if char == fieldchar[0]: 

1762 fieldnum[0] += 1 

1763 else: 

1764 if fieldchar[0]: 

1765 append_field() 

1766 fieldchar[0] = char 

1767 fieldnum[0] = 1 

1768 else: 

1769 if fieldchar[0]: 

1770 append_field() 

1771 charbuf.append(char) 

1772 

1773 elif quotebuf is not None: 

1774 if char == "'": # end of quote 

1775 charbuf.extend(quotebuf) 

1776 quotebuf = None 

1777 else: # inside quote 

1778 quotebuf.append(char) 

1779 

1780 if fieldchar[0]: 

1781 append_field() 

1782 elif charbuf: 

1783 append_chars() 

1784 

1785 return result 

1786 

1787 

1788def untokenize_pattern(tokens: Iterable[tuple[str, str | tuple[str, int]]]) -> str: 

1789 """ 

1790 Turn a date format pattern token stream back into a string. 

1791 

1792 This is the reverse operation of ``tokenize_pattern``. 

1793 

1794 :type tokens: Iterable[tuple] 

1795 :rtype: str 

1796 """ 

1797 output = [] 

1798 for tok_type, tok_value in tokens: 

1799 if tok_type == "field": 

1800 output.append(tok_value[0] * tok_value[1]) 

1801 elif tok_type == "chars": 

1802 if not any(ch in PATTERN_CHARS for ch in tok_value): # No need to quote 

1803 output.append(tok_value) 

1804 else: 

1805 output.append("'%s'" % tok_value.replace("'", "''")) 

1806 return "".join(output) 

1807 

1808 

1809def split_interval_pattern(pattern: str) -> list[str]: 

1810 """ 

1811 Split an interval-describing datetime pattern into multiple pieces. 

1812 

1813 > The pattern is then designed to be broken up into two pieces by determining the first repeating field. 

1814 - https://www.unicode.org/reports/tr35/tr35-dates.html#intervalFormats 

1815 

1816 >>> split_interval_pattern(u'E d.M. \u2013 E d.M.') 

1817 [u'E d.M. \u2013 ', 'E d.M.'] 

1818 >>> split_interval_pattern("Y 'text' Y 'more text'") 

1819 ["Y 'text '", "Y 'more text'"] 

1820 >>> split_interval_pattern(u"E, MMM d \u2013 E") 

1821 [u'E, MMM d \u2013 ', u'E'] 

1822 >>> split_interval_pattern("MMM d") 

1823 ['MMM d'] 

1824 >>> split_interval_pattern("y G") 

1825 ['y G'] 

1826 >>> split_interval_pattern(u"MMM d \u2013 d") 

1827 [u'MMM d \u2013 ', u'd'] 

1828 

1829 :param pattern: Interval pattern string 

1830 :return: list of "subpatterns" 

1831 """ 

1832 

1833 seen_fields = set() 

1834 parts = [[]] 

1835 

1836 for tok_type, tok_value in tokenize_pattern(pattern): 

1837 if tok_type == "field": 

1838 if tok_value[0] in seen_fields: # Repeated field 

1839 parts.append([]) 

1840 seen_fields.clear() 

1841 seen_fields.add(tok_value[0]) 

1842 parts[-1].append((tok_type, tok_value)) 

1843 

1844 return [untokenize_pattern(tokens) for tokens in parts] 

1845 

1846 

1847def match_skeleton(skeleton: str, options: Iterable[str], allow_different_fields: bool = False) -> str | None: 

1848 """ 

1849 Find the closest match for the given datetime skeleton among the options given. 

1850 

1851 This uses the rules outlined in the TR35 document. 

1852 

1853 >>> match_skeleton('yMMd', ('yMd', 'yMMMd')) 

1854 'yMd' 

1855 

1856 >>> match_skeleton('yMMd', ('jyMMd',), allow_different_fields=True) 

1857 'jyMMd' 

1858 

1859 >>> match_skeleton('yMMd', ('qyMMd',), allow_different_fields=False) 

1860 

1861 >>> match_skeleton('hmz', ('hmv',)) 

1862 'hmv' 

1863 

1864 :param skeleton: The skeleton to match 

1865 :type skeleton: str 

1866 :param options: An iterable of other skeletons to match against 

1867 :type options: Iterable[str] 

1868 :return: The closest skeleton match, or if no match was found, None. 

1869 :rtype: str|None 

1870 """ 

1871 

1872 # TODO: maybe implement pattern expansion? 

1873 

1874 # Based on the implementation in 

1875 # http://source.icu-project.org/repos/icu/icu4j/trunk/main/classes/core/src/com/ibm/icu/text/DateIntervalInfo.java 

1876 

1877 # Filter out falsy values and sort for stability; when `interval_formats` is passed in, there may be a None key. 

1878 options = sorted(option for option in options if option) 

1879 

1880 if 'z' in skeleton and not any('z' in option for option in options): 

1881 skeleton = skeleton.replace('z', 'v') 

1882 

1883 get_input_field_width = dict(t[1] for t in tokenize_pattern(skeleton) if t[0] == "field").get 

1884 best_skeleton = None 

1885 best_distance = None 

1886 for option in options: 

1887 get_opt_field_width = dict(t[1] for t in tokenize_pattern(option) if t[0] == "field").get 

1888 distance = 0 

1889 for field in PATTERN_CHARS: 

1890 input_width = get_input_field_width(field, 0) 

1891 opt_width = get_opt_field_width(field, 0) 

1892 if input_width == opt_width: 

1893 continue 

1894 if opt_width == 0 or input_width == 0: 

1895 if not allow_different_fields: # This one is not okay 

1896 option = None 

1897 break 

1898 distance += 0x1000 # Magic weight constant for "entirely different fields" 

1899 elif field == 'M' and ((input_width > 2 and opt_width <= 2) or (input_width <= 2 and opt_width > 2)): 

1900 distance += 0x100 # Magic weight for "text turns into a number" 

1901 else: 

1902 distance += abs(input_width - opt_width) 

1903 

1904 if not option: # We lost the option along the way (probably due to "allow_different_fields") 

1905 continue 

1906 

1907 if not best_skeleton or distance < best_distance: 

1908 best_skeleton = option 

1909 best_distance = distance 

1910 

1911 if distance == 0: # Found a perfect match! 

1912 break 

1913 

1914 return best_skeleton