1"""This modules provides a method to parse an ISO 8601:2004 date string to a
2python datetime.date instance.
3
4It supports all basic, extended and expanded formats as described in the ISO
5standard. The only limitations it has, are given by the Python datetime.date
6implementation, which does not support dates before 0001-01-01.
7"""
8
9import re
10from datetime import date, time, timedelta
11from typing import Union
12
13from isodate.duration import Duration
14from isodate.isoerror import ISO8601Error
15from isodate.isostrf import DATE_EXT_COMPLETE, strftime
16
17DATE_REGEX_CACHE: dict[tuple[int, bool], list[re.Pattern[str]]] = {}
18# A dictionary to cache pre-compiled regular expressions.
19# A set of regular expressions is identified, by number of year digits allowed
20# and whether a plus/minus sign is required or not. (This option is changeable
21# only for 4 digit years).
22
23
24def build_date_regexps(yeardigits: int = 4, expanded: bool = False) -> list[re.Pattern[str]]:
25 """Compile set of regular expressions to parse ISO dates.
26
27 The expressions will be created only if they are not already in REGEX_CACHE.
28
29 It is necessary to fix the number of year digits, else it is not possible
30 to automatically distinguish between various ISO date formats.
31
32 ISO 8601 allows more than 4 digit years, on prior agreement, but then a +/-
33 sign is required (expanded format). To support +/- sign for 4 digit years,
34 the expanded parameter needs to be set to True.
35 """
36 if yeardigits != 4:
37 expanded = True
38 if (yeardigits, expanded) not in DATE_REGEX_CACHE:
39 cache_entry: list[re.Pattern[str]] = []
40 # ISO 8601 expanded DATE formats allow an arbitrary number of year
41 # digits with a leading +/- sign.
42 if expanded:
43 sign = 1
44 else:
45 sign = 0
46
47 def add_re(regex_text: str) -> None:
48 cache_entry.append(re.compile(r"\A" + regex_text + r"\Z"))
49
50 # 1. complete dates:
51 # YYYY-MM-DD or +- YYYYYY-MM-DD... extended date format
52 add_re(
53 r"(?P<sign>[+-]){%d}(?P<year>[0-9]{%d})"
54 r"-(?P<month>[0-9]{2})-(?P<day>[0-9]{2})" % (sign, yeardigits)
55 )
56 # YYYYMMDD or +- YYYYYYMMDD... basic date format
57 add_re(
58 r"(?P<sign>[+-]){%d}(?P<year>[0-9]{%d})"
59 r"(?P<month>[0-9]{2})(?P<day>[0-9]{2})" % (sign, yeardigits)
60 )
61 # 2. complete week dates:
62 # YYYY-Www-D or +-YYYYYY-Www-D ... extended week date
63 add_re(
64 r"(?P<sign>[+-]){%d}(?P<year>[0-9]{%d})"
65 r"-W(?P<week>[0-9]{2})-(?P<day>[0-9]{1})" % (sign, yeardigits)
66 )
67 # YYYYWwwD or +-YYYYYYWwwD ... basic week date
68 add_re(
69 r"(?P<sign>[+-]){%d}(?P<year>[0-9]{%d})W"
70 r"(?P<week>[0-9]{2})(?P<day>[0-9]{1})" % (sign, yeardigits)
71 )
72 # 3. ordinal dates:
73 # YYYY-DDD or +-YYYYYY-DDD ... extended format
74 add_re(r"(?P<sign>[+-]){%d}(?P<year>[0-9]{%d})" r"-(?P<day>[0-9]{3})" % (sign, yeardigits))
75 # YYYYDDD or +-YYYYYYDDD ... basic format
76 add_re(r"(?P<sign>[+-]){%d}(?P<year>[0-9]{%d})" r"(?P<day>[0-9]{3})" % (sign, yeardigits))
77 # 4. week dates:
78 # YYYY-Www or +-YYYYYY-Www ... extended reduced accuracy week date
79 # 4. week dates:
80 # YYYY-Www or +-YYYYYY-Www ... extended reduced accuracy week date
81 add_re(
82 r"(?P<sign>[+-]){%d}(?P<year>[0-9]{%d})" r"-W(?P<week>[0-9]{2})" % (sign, yeardigits)
83 )
84 # YYYYWww or +-YYYYYYWww ... basic reduced accuracy week date
85 add_re(r"(?P<sign>[+-]){%d}(?P<year>[0-9]{%d})W" r"(?P<week>[0-9]{2})" % (sign, yeardigits))
86 # 5. month dates:
87 # YYY-MM or +-YYYYYY-MM ... reduced accuracy specific month
88 # 5. month dates:
89 # YYY-MM or +-YYYYYY-MM ... reduced accuracy specific month
90 add_re(
91 r"(?P<sign>[+-]){%d}(?P<year>[0-9]{%d})" r"-(?P<month>[0-9]{2})" % (sign, yeardigits)
92 )
93 # YYYMM or +-YYYYYYMM ... basic incomplete month date format
94 add_re(r"(?P<sign>[+-]){%d}(?P<year>[0-9]{%d})" r"(?P<month>[0-9]{2})" % (sign, yeardigits))
95 # 6. year dates:
96 # YYYY or +-YYYYYY ... reduced accuracy specific year
97 add_re(r"(?P<sign>[+-]){%d}(?P<year>[0-9]{%d})" % (sign, yeardigits))
98 # 7. century dates:
99 # YY or +-YYYY ... reduced accuracy specific century
100 add_re(r"(?P<sign>[+-]){%d}" r"(?P<century>[0-9]{%d})" % (sign, yeardigits - 2))
101
102 DATE_REGEX_CACHE[(yeardigits, expanded)] = cache_entry
103 return DATE_REGEX_CACHE[(yeardigits, expanded)]
104
105
106def parse_date(
107 datestring: str,
108 yeardigits: int = 4,
109 expanded: bool = False,
110 defaultmonth: int = 1,
111 defaultday: int = 1,
112) -> date:
113 """Parse an ISO 8601 date string into a datetime.date object.
114
115 As the datetime.date implementation is limited to dates starting from
116 0001-01-01, negative dates (BC) and year 0 can not be parsed by this
117 method.
118
119 For incomplete dates, this method chooses the first day for it. For
120 instance if only a century is given, this method returns the 1st of
121 January in year 1 of this century.
122
123 supported formats: (expanded formats are shown with 6 digits for year)
124 YYYYMMDD +-YYYYYYMMDD basic complete date
125 YYYY-MM-DD +-YYYYYY-MM-DD extended complete date
126 YYYYWwwD +-YYYYYYWwwD basic complete week date
127 YYYY-Www-D +-YYYYYY-Www-D extended complete week date
128 YYYYDDD +-YYYYYYDDD basic ordinal date
129 YYYY-DDD +-YYYYYY-DDD extended ordinal date
130 YYYYWww +-YYYYYYWww basic incomplete week date
131 YYYY-Www +-YYYYYY-Www extended incomplete week date
132 YYYMM +-YYYYYYMM basic incomplete month date
133 YYY-MM +-YYYYYY-MM incomplete month date
134 YYYY +-YYYYYY incomplete year date
135 YY +-YYYY incomplete century date
136
137 @param datestring: the ISO date string to parse
138 @param yeardigits: how many digits are used to represent a year
139 @param expanded: if True then +/- signs are allowed. This parameter
140 is forced to True, if yeardigits != 4
141
142 @return: a datetime.date instance represented by datestring
143 @raise ISO8601Error: if this function can not parse the datestring
144 @raise ValueError: if datestring can not be represented by datetime.date
145 """
146 if yeardigits != 4:
147 expanded = True
148 isodates = build_date_regexps(yeardigits, expanded)
149 for pattern in isodates:
150 match = pattern.match(datestring)
151 if match:
152 groups = match.groupdict()
153 # sign, century, year, month, week, day,
154 # FIXME: negative dates not possible with python standard types
155 sign = (groups["sign"] == "-" and -1) or 1
156 if "century" in groups:
157 return date(sign * (int(groups["century"]) * 100 + 1), defaultmonth, defaultday)
158 if "month" not in groups: # weekdate or ordinal date
159 ret = date(sign * int(groups["year"]), 1, 1)
160 if "week" in groups:
161 isotuple = ret.isocalendar()
162 if "day" in groups:
163 days = int(groups["day"] or 1)
164 else:
165 days = 1
166 # if first week in year, do weeks-1
167 return ret + timedelta(
168 weeks=int(groups["week"]) - (((isotuple[1] == 1) and 1) or 0),
169 days=-isotuple[2] + days,
170 )
171 elif "day" in groups: # ordinal date
172 return ret + timedelta(days=int(groups["day"]) - 1)
173 else: # year date
174 return ret.replace(month=defaultmonth, day=defaultday)
175 # year-, month-, or complete date
176 if "day" not in groups or groups["day"] is None:
177 day = defaultday
178 else:
179 day = int(groups["day"])
180 return date(sign * int(groups["year"]), int(groups["month"]) or defaultmonth, day)
181 raise ISO8601Error("Unrecognised ISO 8601 date format: %r" % datestring)
182
183
184def date_isoformat(
185 tdate: Union[timedelta, Duration, time, date],
186 format: str = DATE_EXT_COMPLETE,
187 yeardigits: int = 4,
188) -> str:
189 """Format date strings.
190
191 This method is just a wrapper around isodate.isostrf.strftime and uses
192 Date-Extended-Complete as default format.
193 """
194 return strftime(tdate, format, yeardigits)