/src/libsoup/libsoup/soup-date-utils.c
Line | Count | Source |
1 | | /* |
2 | | * Copyright (C) 2020 Igalia, S.L. |
3 | | * |
4 | | * This library is free software; you can redistribute it and/or |
5 | | * modify it under the terms of the GNU Library General Public |
6 | | * License as published by the Free Software Foundation; either |
7 | | * version 2 of the License, or (at your option) any later version. |
8 | | * |
9 | | * This library is distributed in the hope that it will be useful, |
10 | | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
11 | | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
12 | | * Library General Public License for more details. |
13 | | * |
14 | | * You should have received a copy of the GNU Library General Public License |
15 | | * along with this library; see the file COPYING.LIB. If not, write to |
16 | | * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, |
17 | | * Boston, MA 02110-1301, USA. |
18 | | */ |
19 | | |
20 | | #ifdef HAVE_CONFIG_H |
21 | | #include <config.h> |
22 | | #endif |
23 | | |
24 | | #include <stdlib.h> |
25 | | |
26 | | #include "soup-date-utils.h" |
27 | | #include "soup-date-utils-private.h" |
28 | | |
29 | | /** |
30 | | * soup_date_time_is_past: |
31 | | * @date: a #GDateTime |
32 | | * |
33 | | * Determines if @date is in the past. |
34 | | * |
35 | | * Returns: %TRUE if @date is in the past |
36 | | */ |
37 | | gboolean |
38 | | soup_date_time_is_past (GDateTime *date) |
39 | 0 | { |
40 | 0 | g_return_val_if_fail (date != NULL, TRUE); |
41 | | |
42 | | /* optimization */ |
43 | 0 | if (g_date_time_get_year (date) < 2025) |
44 | 0 | return TRUE; |
45 | | |
46 | 0 | return g_date_time_to_unix (date) < time (NULL); |
47 | 0 | } |
48 | | |
49 | | /** |
50 | | * SoupDateFormat: |
51 | | * @SOUP_DATE_HTTP: RFC 1123 format, used by the HTTP "Date" header. Eg |
52 | | * "Sun, 06 Nov 1994 08:49:37 GMT". |
53 | | * @SOUP_DATE_COOKIE: The format for the "Expires" timestamp in the |
54 | | * Netscape cookie specification. Eg, "Sun, 06-Nov-1994 08:49:37 GMT". |
55 | | * |
56 | | * Date formats that [func@date_time_to_string] can use. |
57 | | * |
58 | | * @SOUP_DATE_HTTP and @SOUP_DATE_COOKIE always coerce the time to |
59 | | * UTC. |
60 | | * |
61 | | * This enum may be extended with more values in future releases. |
62 | | **/ |
63 | | |
64 | | /* Do not internationalize */ |
65 | | static const char *const months[] = { |
66 | | "Jan", "Feb", "Mar", "Apr", "May", "Jun", |
67 | | "Jul", "Aug", "Sep", "Oct", "Nov", "Dec" |
68 | | }; |
69 | | |
70 | | /* Do not internationalize */ |
71 | | static const char *const days[] = { |
72 | | "Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun" |
73 | | }; |
74 | | |
75 | | /** |
76 | | * soup_date_time_to_string: |
77 | | * @date: a #GDateTime |
78 | | * @format: the format to generate the date in |
79 | | * |
80 | | * Converts @date to a string in the format described by @format. |
81 | | * |
82 | | * Returns: (transfer full): @date as a string or %NULL |
83 | | **/ |
84 | | char * |
85 | | soup_date_time_to_string (GDateTime *date, |
86 | | SoupDateFormat format) |
87 | 0 | { |
88 | 0 | g_return_val_if_fail (date != NULL, NULL); |
89 | | |
90 | 0 | if (format == SOUP_DATE_HTTP || format == SOUP_DATE_COOKIE) { |
91 | | /* HTTP and COOKIE formats require UTC timestamp, so coerce |
92 | | * @date if it's non-UTC. |
93 | | */ |
94 | 0 | GDateTime *utcdate = g_date_time_to_utc (date); |
95 | 0 | char *date_format; |
96 | 0 | char *formatted_date; |
97 | |
|
98 | 0 | if (!utcdate) |
99 | 0 | return NULL; |
100 | | |
101 | | // We insert days/months ourselves to avoid locale specific formatting |
102 | 0 | if (format == SOUP_DATE_HTTP) { |
103 | | /* "Sun, 06 Nov 1994 08:49:37 GMT" */ |
104 | 0 | date_format = g_strdup_printf ("%s, %%d %s %%Y %%T GMT", |
105 | 0 | days[g_date_time_get_day_of_week (utcdate) - 1], |
106 | 0 | months[g_date_time_get_month (utcdate) - 1]); |
107 | 0 | } else { |
108 | | /* "Sun, 06-Nov-1994 08:49:37 GMT" */ |
109 | 0 | date_format = g_strdup_printf ("%s, %%d-%s-%%Y %%T GMT", |
110 | 0 | days[g_date_time_get_day_of_week (utcdate) - 1], |
111 | 0 | months[g_date_time_get_month (utcdate) - 1]); |
112 | 0 | } |
113 | |
|
114 | 0 | formatted_date = g_date_time_format (utcdate, (const char*)date_format); |
115 | 0 | g_date_time_unref (utcdate); |
116 | 0 | g_free (date_format); |
117 | 0 | return formatted_date; |
118 | 0 | } |
119 | | |
120 | 0 | g_return_val_if_reached (NULL); |
121 | 0 | } |
122 | | |
123 | | static inline gboolean |
124 | | parse_day (int *day, const char **date_string) |
125 | 13.8k | { |
126 | 13.8k | char *end; |
127 | | |
128 | 13.8k | *day = strtoul (*date_string, &end, 10); |
129 | 13.8k | if (end == (char *)*date_string) |
130 | 1.31k | return FALSE; |
131 | | |
132 | 17.1k | while (*end == ' ' || *end == '-') |
133 | 4.64k | end++; |
134 | 12.5k | *date_string = end; |
135 | 12.5k | return *day >= 1 && *day <= 31; |
136 | 13.8k | } |
137 | | |
138 | | static inline gboolean |
139 | | parse_month (int *month, const char **date_string) |
140 | 12.6k | { |
141 | 12.6k | int i; |
142 | | |
143 | 66.0k | for (i = 0; i < G_N_ELEMENTS (months); i++) { |
144 | 65.5k | if (!g_ascii_strncasecmp (*date_string, months[i], 3)) { |
145 | 12.1k | *month = i + 1; |
146 | 12.1k | *date_string += 3; |
147 | 14.2k | while (**date_string == ' ' || **date_string == '-') |
148 | 2.04k | (*date_string)++; |
149 | 12.1k | return TRUE; |
150 | 12.1k | } |
151 | 65.5k | } |
152 | 495 | return FALSE; |
153 | 12.6k | } |
154 | | |
155 | | static inline gboolean |
156 | | parse_year (int *year, const char **date_string) |
157 | 9.09k | { |
158 | 9.09k | char *end; |
159 | | |
160 | 9.09k | *year = strtoul (*date_string, &end, 10); |
161 | 9.09k | if (end == (char *)*date_string) |
162 | 501 | return FALSE; |
163 | | |
164 | 8.59k | if (end == (char *)*date_string + 2) { |
165 | 1.07k | if (*year < 70) |
166 | 663 | *year += 2000; |
167 | 408 | else |
168 | 408 | *year += 1900; |
169 | 7.52k | } else if (end == (char *)*date_string + 3) |
170 | 409 | *year += 1900; |
171 | | |
172 | 12.6k | while (*end == ' ' || *end == '-') |
173 | 4.07k | end++; |
174 | 8.59k | *date_string = end; |
175 | 8.59k | return *year > 0 && *year < 9999; |
176 | 9.09k | } |
177 | | |
178 | | static inline gboolean |
179 | | parse_time (int *hour, int *minute, int *second, const char **date_string) |
180 | 10.9k | { |
181 | 10.9k | char *p, *end; |
182 | | |
183 | 10.9k | *hour = strtoul (*date_string, &end, 10); |
184 | 10.9k | if (end == (char *)*date_string || *end++ != ':') |
185 | 1.58k | return FALSE; |
186 | 9.40k | p = end; |
187 | 9.40k | *minute = strtoul (p, &end, 10); |
188 | 9.40k | if (end == p || *end++ != ':') |
189 | 813 | return FALSE; |
190 | 8.59k | p = end; |
191 | 8.59k | *second = strtoul (p, &end, 10); |
192 | 8.59k | if (end == p) |
193 | 390 | return FALSE; |
194 | 8.20k | p = end; |
195 | | |
196 | 9.45k | while (*p == ' ') |
197 | 1.25k | p++; |
198 | 8.20k | *date_string = p; |
199 | 8.20k | return *hour >= 0 && *hour < 24 && *minute >= 0 && *minute < 60 && *second >= 0 && *second < 60; |
200 | 8.59k | } |
201 | | |
202 | | static inline gboolean |
203 | | parse_timezone (GTimeZone **timezone, const char **date_string) |
204 | 4.43k | { |
205 | 4.43k | gint32 offset_minutes; |
206 | 4.43k | gboolean utc; |
207 | | |
208 | 4.43k | if (!**date_string) { |
209 | 924 | utc = FALSE; |
210 | 924 | offset_minutes = 0; |
211 | 3.50k | } else if (**date_string == '+' || **date_string == '-') { |
212 | 1.68k | gulong val; |
213 | 1.68k | int sign = (**date_string == '+') ? 1 : -1; |
214 | 1.68k | val = strtoul (*date_string + 1, (char **)date_string, 10); |
215 | 1.68k | if (val > 9999) |
216 | 381 | return FALSE; |
217 | 1.30k | if (**date_string == ':') { |
218 | 847 | gulong val2 = strtoul (*date_string + 1, (char **)date_string, 10); |
219 | 847 | if (val > 99 || val2 > 99) |
220 | 603 | return FALSE; |
221 | 244 | val = 60 * val + val2; |
222 | 244 | } else |
223 | 459 | val = 60 * (val / 100) + (val % 100); |
224 | 703 | offset_minutes = sign * val; |
225 | 703 | utc = (sign == -1) && !val; |
226 | 1.82k | } else if (**date_string == 'Z') { |
227 | 195 | offset_minutes = 0; |
228 | 195 | utc = TRUE; |
229 | 195 | (*date_string)++; |
230 | 1.62k | } else if (!strcmp (*date_string, "GMT") || |
231 | 1.43k | !strcmp (*date_string, "UTC")) { |
232 | 390 | offset_minutes = 0; |
233 | 390 | utc = TRUE; |
234 | 390 | (*date_string) += 3; |
235 | 1.23k | } else if (strchr ("ECMP", **date_string) && |
236 | 888 | ((*date_string)[1] == 'D' || (*date_string)[1] == 'S') && |
237 | 656 | (*date_string)[2] == 'T') { |
238 | 390 | offset_minutes = -60 * (5 * strcspn ("ECMP", *date_string)); |
239 | 390 | if ((*date_string)[1] == 'D') |
240 | 195 | offset_minutes += 60; |
241 | 390 | utc = FALSE; |
242 | 390 | } else |
243 | 846 | return FALSE; |
244 | | |
245 | 2.60k | if (utc) |
246 | 981 | *timezone = g_time_zone_new_utc (); |
247 | 1.62k | else |
248 | 1.62k | *timezone = g_time_zone_new_offset (offset_minutes * 60); |
249 | 2.60k | return TRUE; |
250 | 4.43k | } |
251 | | |
252 | | static GDateTime * |
253 | | parse_textual_date (const char *date_string) |
254 | 14.0k | { |
255 | 14.0k | int month, day, year, hour, minute, second; |
256 | 14.0k | GTimeZone *tz = NULL; |
257 | 14.0k | GDateTime *date; |
258 | | |
259 | | /* If it starts with a word, it must be a weekday, which we skip */ |
260 | 14.0k | if (g_ascii_isalpha (*date_string)) { |
261 | 10.9k | while (g_ascii_isalpha (*date_string)) |
262 | 5.66k | date_string++; |
263 | 5.29k | if (*date_string == ',') |
264 | 630 | date_string++; |
265 | 9.99k | while (g_ascii_isspace (*date_string)) |
266 | 4.70k | date_string++; |
267 | 5.29k | } |
268 | | |
269 | | /* If there's now another word, this must be an asctime-date */ |
270 | 14.0k | if (g_ascii_isalpha (*date_string)) { |
271 | | /* (Sun) Nov 6 08:49:37 1994 */ |
272 | 4.74k | if (!parse_month (&month, &date_string) || |
273 | 4.51k | !parse_day (&day, &date_string) || |
274 | 3.92k | !parse_time (&hour, &minute, &second, &date_string) || |
275 | 1.46k | !parse_year (&year, &date_string) || |
276 | 896 | !g_date_valid_dmy (day, month, year)) |
277 | 4.04k | return NULL; |
278 | | |
279 | | /* There shouldn't be a timezone, but check anyway */ |
280 | 701 | parse_timezone (&tz, &date_string); |
281 | 9.33k | } else { |
282 | | /* Non-asctime date, so some variation of |
283 | | * (Sun,) 06 Nov 1994 08:49:37 GMT |
284 | | */ |
285 | 9.33k | if (!parse_day (&day, &date_string) || |
286 | 7.90k | !parse_month (&month, &date_string) || |
287 | 7.63k | !parse_year (&year, &date_string) || |
288 | 7.06k | !parse_time (&hour, &minute, &second, &date_string) || |
289 | 3.92k | !g_date_valid_dmy (day, month, year)) |
290 | 5.59k | return NULL; |
291 | | |
292 | | /* This time there *should* be a timezone, but we |
293 | | * survive if there isn't. |
294 | | */ |
295 | 3.73k | parse_timezone (&tz, &date_string); |
296 | 3.73k | } |
297 | | |
298 | 4.43k | if (!tz) |
299 | 1.83k | tz = g_time_zone_new_utc (); |
300 | | |
301 | 4.43k | date = g_date_time_new (tz, year, month, day, hour, minute, second); |
302 | 4.43k | g_time_zone_unref (tz); |
303 | | |
304 | 4.43k | return date; |
305 | 14.0k | } |
306 | | |
307 | | /** |
308 | | * soup_date_time_new_from_http_string: |
309 | | * @date_string: The date as a string |
310 | | * |
311 | | * Parses @date_string and tries to extract a date from it. |
312 | | * |
313 | | * This recognizes all of the "HTTP-date" formats from RFC 2616, RFC 2822 dates, |
314 | | * and reasonable approximations thereof. (Eg, it is lenient about whitespace, |
315 | | * leading "0"s, etc.) |
316 | | * |
317 | | * Returns: (nullable): a new #GDateTime, or %NULL if @date_string |
318 | | * could not be parsed. |
319 | | **/ |
320 | | GDateTime * |
321 | | soup_date_time_new_from_http_string (const char *date_string) |
322 | 14.2k | { |
323 | 14.2k | g_return_val_if_fail (date_string != NULL, NULL); |
324 | | |
325 | 14.6k | while (g_ascii_isspace (*date_string)) |
326 | 388 | date_string++; |
327 | | |
328 | | /* If it starts with a digit, it's either an ISO 8601 date, or |
329 | | * an RFC2822 date without the optional weekday; in the later |
330 | | * case, there will be a month name later on, so look for one |
331 | | * of the month-start letters. |
332 | | * Previous versions of this library supported parsing iso8601 strings |
333 | | * however g_date_time_new_from_iso8601() should be used now. Just |
334 | | * catch those in case for testing. |
335 | | */ |
336 | 14.2k | if (G_UNLIKELY (g_ascii_isdigit (*date_string) && !strpbrk (date_string, "JFMASOND"))) { |
337 | 202 | g_debug ("Unsupported format passed to soup_date_time_new_from_http_string(): %s", date_string); |
338 | 202 | return NULL; |
339 | 202 | } |
340 | | |
341 | 14.0k | return parse_textual_date (date_string); |
342 | 14.2k | } |