1"""
2This modules provides a method to parse an ISO 8601:2004 date string to a
3python datetime.date instance.
4
5It supports all basic, extended and expanded formats as described in the ISO
6standard. The only limitations it has, are given by the Python datetime.date
7implementation, which does not support dates before 0001-01-01.
8"""
9
10import re
11from datetime import date, timedelta
12
13from isodate.isoerror import ISO8601Error
14from isodate.isostrf import DATE_EXT_COMPLETE, strftime
15
16DATE_REGEX_CACHE = {}
17# A dictionary to cache pre-compiled regular expressions.
18# A set of regular expressions is identified, by number of year digits allowed
19# and whether a plus/minus sign is required or not. (This option is changeable
20# only for 4 digit years).
21
22
23def build_date_regexps(yeardigits=4, expanded=False):
24 """
25 Compile set of regular expressions to parse ISO dates. The expressions will
26 be created only if they are not already in REGEX_CACHE.
27
28 It is necessary to fix the number of year digits, else it is not possible
29 to automatically distinguish between various ISO date formats.
30
31 ISO 8601 allows more than 4 digit years, on prior agreement, but then a +/-
32 sign is required (expanded format). To support +/- sign for 4 digit years,
33 the expanded parameter needs to be set to True.
34 """
35 if yeardigits != 4:
36 expanded = True
37 if (yeardigits, expanded) not in DATE_REGEX_CACHE:
38 cache_entry = []
39 # ISO 8601 expanded DATE formats allow an arbitrary number of year
40 # digits with a leading +/- sign.
41 if expanded:
42 sign = 1
43 else:
44 sign = 0
45
46 def add_re(regex_text):
47 cache_entry.append(re.compile(r"\A" + regex_text + r"\Z"))
48
49 # 1. complete dates:
50 # YYYY-MM-DD or +- YYYYYY-MM-DD... extended date format
51 add_re(
52 r"(?P<sign>[+-]){%d}(?P<year>[0-9]{%d})"
53 r"-(?P<month>[0-9]{2})-(?P<day>[0-9]{2})" % (sign, yeardigits)
54 )
55 # YYYYMMDD or +- YYYYYYMMDD... basic date format
56 add_re(
57 r"(?P<sign>[+-]){%d}(?P<year>[0-9]{%d})"
58 r"(?P<month>[0-9]{2})(?P<day>[0-9]{2})" % (sign, yeardigits)
59 )
60 # 2. complete week dates:
61 # YYYY-Www-D or +-YYYYYY-Www-D ... extended week date
62 add_re(
63 r"(?P<sign>[+-]){%d}(?P<year>[0-9]{%d})"
64 r"-W(?P<week>[0-9]{2})-(?P<day>[0-9]{1})" % (sign, yeardigits)
65 )
66 # YYYYWwwD or +-YYYYYYWwwD ... basic week date
67 add_re(
68 r"(?P<sign>[+-]){%d}(?P<year>[0-9]{%d})W"
69 r"(?P<week>[0-9]{2})(?P<day>[0-9]{1})" % (sign, yeardigits)
70 )
71 # 3. ordinal dates:
72 # YYYY-DDD or +-YYYYYY-DDD ... extended format
73 add_re(
74 r"(?P<sign>[+-]){%d}(?P<year>[0-9]{%d})"
75 r"-(?P<day>[0-9]{3})" % (sign, yeardigits)
76 )
77 # YYYYDDD or +-YYYYYYDDD ... basic format
78 add_re(
79 r"(?P<sign>[+-]){%d}(?P<year>[0-9]{%d})"
80 r"(?P<day>[0-9]{3})" % (sign, yeardigits)
81 )
82 # 4. week dates:
83 # YYYY-Www or +-YYYYYY-Www ... extended reduced accuracy week date
84 # 4. week dates:
85 # YYYY-Www or +-YYYYYY-Www ... extended reduced accuracy week date
86 add_re(
87 r"(?P<sign>[+-]){%d}(?P<year>[0-9]{%d})"
88 r"-W(?P<week>[0-9]{2})" % (sign, yeardigits)
89 )
90 # YYYYWww or +-YYYYYYWww ... basic reduced accuracy week date
91 add_re(
92 r"(?P<sign>[+-]){%d}(?P<year>[0-9]{%d})W"
93 r"(?P<week>[0-9]{2})" % (sign, yeardigits)
94 )
95 # 5. month dates:
96 # YYY-MM or +-YYYYYY-MM ... reduced accuracy specific month
97 # 5. month dates:
98 # YYY-MM or +-YYYYYY-MM ... reduced accuracy specific month
99 add_re(
100 r"(?P<sign>[+-]){%d}(?P<year>[0-9]{%d})"
101 r"-(?P<month>[0-9]{2})" % (sign, yeardigits)
102 )
103 # YYYMM or +-YYYYYYMM ... basic incomplete month date format
104 add_re(
105 r"(?P<sign>[+-]){%d}(?P<year>[0-9]{%d})"
106 r"(?P<month>[0-9]{2})" % (sign, yeardigits)
107 )
108 # 6. year dates:
109 # YYYY or +-YYYYYY ... reduced accuracy specific year
110 add_re(r"(?P<sign>[+-]){%d}(?P<year>[0-9]{%d})" % (sign, yeardigits))
111 # 7. century dates:
112 # YY or +-YYYY ... reduced accuracy specific century
113 add_re(r"(?P<sign>[+-]){%d}" r"(?P<century>[0-9]{%d})" % (sign, yeardigits - 2))
114
115 DATE_REGEX_CACHE[(yeardigits, expanded)] = cache_entry
116 return DATE_REGEX_CACHE[(yeardigits, expanded)]
117
118
119def parse_date(datestring, yeardigits=4, expanded=False, defaultmonth=1, defaultday=1):
120 """
121 Parse an ISO 8601 date string into a datetime.date object.
122
123 As the datetime.date implementation is limited to dates starting from
124 0001-01-01, negative dates (BC) and year 0 can not be parsed by this
125 method.
126
127 For incomplete dates, this method chooses the first day for it. For
128 instance if only a century is given, this method returns the 1st of
129 January in year 1 of this century.
130
131 supported formats: (expanded formats are shown with 6 digits for year)
132 YYYYMMDD +-YYYYYYMMDD basic complete date
133 YYYY-MM-DD +-YYYYYY-MM-DD extended complete date
134 YYYYWwwD +-YYYYYYWwwD basic complete week date
135 YYYY-Www-D +-YYYYYY-Www-D extended complete week date
136 YYYYDDD +-YYYYYYDDD basic ordinal date
137 YYYY-DDD +-YYYYYY-DDD extended ordinal date
138 YYYYWww +-YYYYYYWww basic incomplete week date
139 YYYY-Www +-YYYYYY-Www extended incomplete week date
140 YYYMM +-YYYYYYMM basic incomplete month date
141 YYY-MM +-YYYYYY-MM incomplete month date
142 YYYY +-YYYYYY incomplete year date
143 YY +-YYYY incomplete century date
144
145 @param datestring: the ISO date string to parse
146 @param yeardigits: how many digits are used to represent a year
147 @param expanded: if True then +/- signs are allowed. This parameter
148 is forced to True, if yeardigits != 4
149
150 @return: a datetime.date instance represented by datestring
151 @raise ISO8601Error: if this function can not parse the datestring
152 @raise ValueError: if datestring can not be represented by datetime.date
153 """
154 if yeardigits != 4:
155 expanded = True
156 isodates = build_date_regexps(yeardigits, expanded)
157 for pattern in isodates:
158 match = pattern.match(datestring)
159 if match:
160 groups = match.groupdict()
161 # sign, century, year, month, week, day,
162 # FIXME: negative dates not possible with python standard types
163 sign = (groups["sign"] == "-" and -1) or 1
164 if "century" in groups:
165 return date(
166 sign * (int(groups["century"]) * 100 + 1), defaultmonth, defaultday
167 )
168 if "month" not in groups: # weekdate or ordinal date
169 ret = date(sign * int(groups["year"]), 1, 1)
170 if "week" in groups:
171 isotuple = ret.isocalendar()
172 if "day" in groups:
173 days = int(groups["day"] or 1)
174 else:
175 days = 1
176 # if first week in year, do weeks-1
177 return ret + timedelta(
178 weeks=int(groups["week"]) - (((isotuple[1] == 1) and 1) or 0),
179 days=-isotuple[2] + days,
180 )
181 elif "day" in groups: # ordinal date
182 return ret + timedelta(days=int(groups["day"]) - 1)
183 else: # year date
184 return ret.replace(month=defaultmonth, day=defaultday)
185 # year-, month-, or complete date
186 if "day" not in groups or groups["day"] is None:
187 day = defaultday
188 else:
189 day = int(groups["day"])
190 return date(
191 sign * int(groups["year"]), int(groups["month"]) or defaultmonth, day
192 )
193 raise ISO8601Error("Unrecognised ISO 8601 date format: %r" % datestring)
194
195
196def date_isoformat(tdate, format=DATE_EXT_COMPLETE, yeardigits=4):
197 """
198 Format date strings.
199
200 This method is just a wrapper around isodate.isostrf.strftime and uses
201 Date-Extended-Complete as default format.
202 """
203 return strftime(tdate, format, yeardigits)