/rust/registry/src/index.crates.io-1949cf8c6b5b557f/jiff-0.2.16/src/fmt/rfc9557.rs
Line | Count | Source |
1 | | /*! |
2 | | This module provides parsing facilities for [RFC 9557] extensions to |
3 | | [RFC 3339]. |
4 | | |
5 | | This only provides internal helper routines that can be used in other parsers. |
6 | | Namely, RFC 9557 is just a backward compatible expansion to RFC 3339. |
7 | | |
8 | | The parser in this module checks for full syntactic validity of the annotation |
9 | | syntax defined in RFC 9557. However, Jiff doesn't make use of any of these |
10 | | annotations except for time zone annotations. So for example, |
11 | | `2024-05-25T13:33:00-05[America/New_York][foo=bar]` is valid, but the parser |
12 | | will only expose the `America/New_York` annotation. |
13 | | |
14 | | Note though that even for things that are ignored, validity |
15 | | and criticality are still respected. So for example, |
16 | | `2024-05-25T13:33:00-05[America/New_York][!foo=bar]` will fail to parse because |
17 | | of the `!` indicating that consumers must take action on the annotation, |
18 | | including by returning an error if it isn't supported. |
19 | | |
20 | | [RFC 3339]: https://www.rfc-editor.org/rfc/rfc3339 |
21 | | [RFC 9557]: https://www.rfc-editor.org/rfc/rfc9557.html |
22 | | */ |
23 | | |
24 | | // Here's the specific part of Temporal's grammar that is implemented below |
25 | | // (which should match what's in RFC 9557): |
26 | | // |
27 | | // TimeZoneAnnotation ::: |
28 | | // [ AnnotationCriticalFlag[opt] TimeZoneIdentifier ] |
29 | | // |
30 | | // Annotations ::: |
31 | | // Annotation Annotations[opt] |
32 | | // |
33 | | // AnnotationCriticalFlag ::: |
34 | | // ! |
35 | | // |
36 | | // TimeZoneIdentifier ::: |
37 | | // TimeZoneUTCOffsetName |
38 | | // TimeZoneIANAName |
39 | | // |
40 | | // TimeZoneIANAName ::: |
41 | | // TimeZoneIANANameComponent |
42 | | // TimeZoneIANAName / TimeZoneIANANameComponent |
43 | | // |
44 | | // TimeZoneIANANameComponent ::: |
45 | | // TZLeadingChar |
46 | | // TimeZoneIANANameComponent TZChar |
47 | | // |
48 | | // Annotation ::: |
49 | | // [ AnnotationCriticalFlag[opt] AnnotationKey = AnnotationValue ] |
50 | | // |
51 | | // AnnotationKey ::: |
52 | | // AKeyLeadingChar |
53 | | // AnnotationKey AKeyChar |
54 | | // |
55 | | // AnnotationValue ::: |
56 | | // AnnotationValueComponent |
57 | | // AnnotationValueComponent - AnnotationValue |
58 | | // |
59 | | // AnnotationValueComponent ::: |
60 | | // Alpha AnnotationValueComponent[opt] |
61 | | // DecimalDigit AnnotationValueComponent[opt] |
62 | | // |
63 | | // AKeyLeadingChar ::: |
64 | | // LowercaseAlpha |
65 | | // _ |
66 | | // |
67 | | // AKeyChar ::: |
68 | | // AKeyLeadingChar |
69 | | // DecimalDigit |
70 | | // - |
71 | | // |
72 | | // TZLeadingChar ::: |
73 | | // Alpha |
74 | | // . |
75 | | // _ |
76 | | // |
77 | | // TZChar ::: |
78 | | // TZLeadingChar |
79 | | // DecimalDigit |
80 | | // - |
81 | | // + |
82 | | // |
83 | | // DecimalDigit :: one of |
84 | | // 0 1 2 3 4 5 6 7 8 9 |
85 | | // |
86 | | // Alpha ::: one of |
87 | | // A B C D E F G H I J K L M N O P Q R S T U V W X Y Z |
88 | | // a b c d e f g h i j k l m n o p q r s t u v w x y z |
89 | | // |
90 | | // LowercaseAlpha ::: one of |
91 | | // a b c d e f g h i j k l m n o p q r s t u v w x y z |
92 | | // |
93 | | // # N.B. This is handled by src/format/offset.rs, so we don't expand it here. |
94 | | // TimeZoneUTCOffsetName ::: |
95 | | // UTCOffsetMinutePrecision |
96 | | |
97 | | use crate::{ |
98 | | error::{err, Error}, |
99 | | fmt::{ |
100 | | offset::{self, ParsedOffset}, |
101 | | temporal::{TimeZoneAnnotation, TimeZoneAnnotationKind}, |
102 | | Parsed, |
103 | | }, |
104 | | util::{escape, parse}, |
105 | | }; |
106 | | |
107 | | /// The result of parsing RFC 9557 annotations. |
108 | | /// |
109 | | /// Currently, this only provides access to a parsed time zone annotation, if |
110 | | /// present. While the parser does validate all other key/value annotations, |
111 | | /// Jiff doesn't make use of them and thus does not expose them here. They are |
112 | | /// only validated at a syntax level. |
113 | | #[derive(Debug)] |
114 | | pub(crate) struct ParsedAnnotations<'i> { |
115 | | /// The original input that all of the annotations were parsed from. |
116 | | /// |
117 | | /// N.B. This is currently unused, but potentially useful, so we leave it. |
118 | | #[allow(dead_code)] |
119 | | input: escape::Bytes<'i>, |
120 | | /// An optional time zone annotation that was extracted from the input. |
121 | | time_zone: Option<ParsedTimeZone<'i>>, |
122 | | // While we parse/validate them, we don't support any other annotations |
123 | | // at time of writing. Temporal supports calendar annotations, but I'm |
124 | | // not sure Jiff will ever go down that route. |
125 | | } |
126 | | |
127 | | impl<'i> ParsedAnnotations<'i> { |
128 | | /// Return an empty parsed annotations. |
129 | 0 | pub(crate) fn none() -> ParsedAnnotations<'static> { |
130 | 0 | ParsedAnnotations { input: escape::Bytes(&[]), time_zone: None } |
131 | 0 | } |
132 | | |
133 | | /// Turns this parsed time zone into a structured time zone annotation, |
134 | | /// if an annotation was found. Otherwise, returns `Ok(None)`. |
135 | | /// |
136 | | /// This can return an error if the parsed offset could not be converted |
137 | | /// to a `crate::tz::Offset`. |
138 | 0 | pub(crate) fn to_time_zone_annotation( |
139 | 0 | &self, |
140 | 0 | ) -> Result<Option<TimeZoneAnnotation<'i>>, Error> { |
141 | 0 | let Some(ref parsed) = self.time_zone else { return Ok(None) }; |
142 | 0 | Ok(Some(parsed.to_time_zone_annotation()?)) |
143 | 0 | } |
144 | | } |
145 | | |
146 | | /// The result of parsing a time zone annotation. |
147 | | #[derive(Debug)] |
148 | | enum ParsedTimeZone<'i> { |
149 | | /// The name of an IANA time zone was found. |
150 | | Named { |
151 | | /// Whether the critical flag was seen. |
152 | | critical: bool, |
153 | | /// The parsed name. |
154 | | name: &'i str, |
155 | | }, |
156 | | /// A specific UTC numeric offset was found. |
157 | | Offset { |
158 | | /// Whether the critical flag was seen. |
159 | | critical: bool, |
160 | | /// The parsed UTC offset. |
161 | | offset: ParsedOffset, |
162 | | }, |
163 | | } |
164 | | |
165 | | impl<'i> ParsedTimeZone<'i> { |
166 | | /// Turns this parsed time zone into a structured time zone annotation. |
167 | | /// |
168 | | /// This can return an error if the parsed offset could not be converted |
169 | | /// to a `crate::tz::Offset`. |
170 | | /// |
171 | | /// This also includes a flag of whether the annotation is "critical" or |
172 | | /// not. |
173 | 0 | pub(crate) fn to_time_zone_annotation( |
174 | 0 | &self, |
175 | 0 | ) -> Result<TimeZoneAnnotation<'i>, Error> { |
176 | 0 | let (kind, critical) = match *self { |
177 | 0 | ParsedTimeZone::Named { name, critical } => { |
178 | 0 | let kind = TimeZoneAnnotationKind::from(name); |
179 | 0 | (kind, critical) |
180 | | } |
181 | 0 | ParsedTimeZone::Offset { ref offset, critical } => { |
182 | 0 | let kind = TimeZoneAnnotationKind::Offset(offset.to_offset()?); |
183 | 0 | (kind, critical) |
184 | | } |
185 | | }; |
186 | 0 | Ok(TimeZoneAnnotation { kind, critical }) |
187 | 0 | } |
188 | | } |
189 | | |
190 | | /// A parser for RFC 9557 annotations. |
191 | | #[derive(Debug)] |
192 | | pub(crate) struct Parser { |
193 | | /// There are currently no configuration options for this parser. |
194 | | _priv: (), |
195 | | } |
196 | | |
197 | | impl Parser { |
198 | | /// Create a new RFC 9557 annotation parser with the default configuration. |
199 | 0 | pub(crate) const fn new() -> Parser { |
200 | 0 | Parser { _priv: () } |
201 | 0 | } |
202 | | |
203 | | /// Parse RFC 9557 annotations from the start of `input`. |
204 | | /// |
205 | | /// This only parses annotations when `input` starts with an `[`. |
206 | | /// |
207 | | /// Note that the result returned only provides access to the time zone |
208 | | /// annotation (if it was present). All other annotations are parsed and |
209 | | /// checked for validity, but are not accessible from `ParsedAnnotations` |
210 | | /// since Jiff does not make use of them. |
211 | 0 | pub(crate) fn parse<'i>( |
212 | 0 | &self, |
213 | 0 | input: &'i [u8], |
214 | 0 | ) -> Result<Parsed<'i, ParsedAnnotations<'i>>, Error> { |
215 | 0 | let mkslice = parse::slicer(input); |
216 | | |
217 | 0 | let Parsed { value: time_zone, mut input } = |
218 | 0 | self.parse_time_zone_annotation(input)?; |
219 | | loop { |
220 | | // We don't actually do anything with any annotation that isn't |
221 | | // a time zone, but we do parse them to ensure validity and to |
222 | | // be able to fail when a critical flag is set. Otherwise, we know |
223 | | // we're done if parsing an annotation doesn't consume any input. |
224 | 0 | let Parsed { value: did_consume, input: unconsumed } = |
225 | 0 | self.parse_annotation(input)?; |
226 | 0 | if !did_consume { |
227 | 0 | break; |
228 | 0 | } |
229 | 0 | input = unconsumed; |
230 | | } |
231 | | |
232 | 0 | let value = ParsedAnnotations { |
233 | 0 | input: escape::Bytes(mkslice(input)), |
234 | 0 | time_zone, |
235 | 0 | }; |
236 | 0 | Ok(Parsed { value, input }) |
237 | 0 | } |
238 | | |
239 | 0 | fn parse_time_zone_annotation<'i>( |
240 | 0 | &self, |
241 | 0 | mut input: &'i [u8], |
242 | 0 | ) -> Result<Parsed<'i, Option<ParsedTimeZone<'i>>>, Error> { |
243 | 0 | let unconsumed = input; |
244 | 0 | if input.is_empty() || input[0] != b'[' { |
245 | 0 | return Ok(Parsed { value: None, input: unconsumed }); |
246 | 0 | } |
247 | 0 | input = &input[1..]; |
248 | | |
249 | 0 | let critical = input.starts_with(b"!"); |
250 | 0 | if critical { |
251 | 0 | input = &input[1..]; |
252 | 0 | } |
253 | | |
254 | | // If we're starting with a `+` or `-`, then we know we MUST have a |
255 | | // time zone offset annotation. It can't be anything else since neither |
256 | | // an IANA annotation nor a generic key/value annotation can begin with |
257 | | // a `+` or a `-`. |
258 | 0 | if input.starts_with(b"+") || input.starts_with(b"-") { |
259 | | const P: offset::Parser = |
260 | | offset::Parser::new().zulu(false).subminute(false); |
261 | | |
262 | 0 | let Parsed { value: offset, input } = P.parse(input)?; |
263 | 0 | let Parsed { input, .. } = |
264 | 0 | self.parse_tz_annotation_close(input)?; |
265 | 0 | let value = Some(ParsedTimeZone::Offset { critical, offset }); |
266 | 0 | return Ok(Parsed { value, input }); |
267 | 0 | } |
268 | | |
269 | | // At this point, we know it's impossible to see an offset. But we |
270 | | // could still see *either* an IANA time zone annotation or a more |
271 | | // generic key-value annotation. We don't know yet. In the latter case, |
272 | | // we'll eventually see an `=` sign. But since IANA time zone names |
273 | | // represent a superset of generic keys, we just parse what we can. |
274 | | // Once we stop, we can check for an `=`. |
275 | 0 | let mkiana = parse::slicer(input); |
276 | 0 | let Parsed { mut input, .. } = |
277 | 0 | self.parse_tz_annotation_iana_name(input)?; |
278 | | // Now that we've parsed the first IANA name component, if this were |
279 | | // actually a generic key/value annotation, the `=` *must* appear here. |
280 | | // Otherwise, we assume we are trying to parse an IANA annotation as it |
281 | | // is the only other possibility and likely the most common case. |
282 | 0 | if input.starts_with(b"=") { |
283 | | // Pretend like we parsed nothing and let the caller try to parse |
284 | | // a generic key/value annotation. |
285 | 0 | return Ok(Parsed { value: None, input: unconsumed }); |
286 | 0 | } |
287 | 0 | while input.starts_with(b"/") { |
288 | 0 | input = &input[1..]; |
289 | 0 | let Parsed { input: unconsumed, .. } = |
290 | 0 | self.parse_tz_annotation_iana_name(input)?; |
291 | 0 | input = unconsumed; |
292 | | } |
293 | | // This is OK because all bytes in a IANA TZ annotation are guaranteed |
294 | | // to be ASCII, or else we wouldn't be here. If this turns out to be |
295 | | // a perf issue, we can do an unchecked conversion here. But I figured |
296 | | // it would be better to start conservative. |
297 | 0 | let iana_name = core::str::from_utf8(mkiana(input)).expect("ASCII"); |
298 | 0 | let time_zone = |
299 | 0 | Some(ParsedTimeZone::Named { critical, name: iana_name }); |
300 | | // And finally, parse the closing bracket. |
301 | 0 | let Parsed { input, .. } = self.parse_tz_annotation_close(input)?; |
302 | 0 | Ok(Parsed { value: time_zone, input }) |
303 | 0 | } |
304 | | |
305 | 0 | fn parse_annotation<'i>( |
306 | 0 | &self, |
307 | 0 | mut input: &'i [u8], |
308 | 0 | ) -> Result<Parsed<'i, bool>, Error> { |
309 | 0 | if input.is_empty() || input[0] != b'[' { |
310 | 0 | return Ok(Parsed { value: false, input }); |
311 | 0 | } |
312 | 0 | input = &input[1..]; |
313 | | |
314 | 0 | let critical = input.starts_with(b"!"); |
315 | 0 | if critical { |
316 | 0 | input = &input[1..]; |
317 | 0 | } |
318 | | |
319 | 0 | let Parsed { value: key, input } = self.parse_annotation_key(input)?; |
320 | 0 | let Parsed { input, .. } = self.parse_annotation_separator(input)?; |
321 | 0 | let Parsed { input, .. } = self.parse_annotation_values(input)?; |
322 | 0 | let Parsed { input, .. } = self.parse_annotation_close(input)?; |
323 | | |
324 | | // If the critical flag is set, then we automatically return an error |
325 | | // because we don't support any non-time-zone annotations. When the |
326 | | // critical flag isn't set, we're "permissive" and just validate that |
327 | | // the syntax is correct (as we've already done at this point). |
328 | 0 | if critical { |
329 | 0 | return Err(err!( |
330 | 0 | "found unsupported RFC 9557 annotation with key {key:?} \ |
331 | 0 | with the critical flag ('!') set", |
332 | 0 | key = escape::Bytes(key), |
333 | 0 | )); |
334 | 0 | } |
335 | | |
336 | 0 | Ok(Parsed { value: true, input }) |
337 | 0 | } |
338 | | |
339 | 0 | fn parse_tz_annotation_iana_name<'i>( |
340 | 0 | &self, |
341 | 0 | input: &'i [u8], |
342 | 0 | ) -> Result<Parsed<'i, &'i [u8]>, Error> { |
343 | 0 | let mkname = parse::slicer(input); |
344 | 0 | let Parsed { mut input, .. } = |
345 | 0 | self.parse_tz_annotation_leading_char(input)?; |
346 | | loop { |
347 | 0 | let Parsed { value: did_consume, input: unconsumed } = |
348 | 0 | self.parse_tz_annotation_char(input); |
349 | 0 | if !did_consume { |
350 | 0 | break; |
351 | 0 | } |
352 | 0 | input = unconsumed; |
353 | | } |
354 | 0 | Ok(Parsed { value: mkname(input), input }) |
355 | 0 | } |
356 | | |
357 | 0 | fn parse_annotation_key<'i>( |
358 | 0 | &self, |
359 | 0 | input: &'i [u8], |
360 | 0 | ) -> Result<Parsed<'i, &'i [u8]>, Error> { |
361 | 0 | let mkkey = parse::slicer(input); |
362 | 0 | let Parsed { mut input, .. } = |
363 | 0 | self.parse_annotation_key_leading_char(input)?; |
364 | | loop { |
365 | 0 | let Parsed { value: did_consume, input: unconsumed } = |
366 | 0 | self.parse_annotation_key_char(input); |
367 | 0 | if !did_consume { |
368 | 0 | break; |
369 | 0 | } |
370 | 0 | input = unconsumed; |
371 | | } |
372 | 0 | Ok(Parsed { value: mkkey(input), input }) |
373 | 0 | } |
374 | | |
375 | | // N.B. If we ever actually need the values, this should probably return a |
376 | | // `Vec<&'i [u8]>`. (Well, no, because that wouldn't be good for core-only |
377 | | // configurations. So it will probably need to be something else. But, |
378 | | // probably Jiff will never care about other values.) |
379 | 0 | fn parse_annotation_values<'i>( |
380 | 0 | &self, |
381 | 0 | input: &'i [u8], |
382 | 0 | ) -> Result<Parsed<'i, ()>, Error> { |
383 | 0 | let Parsed { mut input, .. } = self.parse_annotation_value(input)?; |
384 | 0 | while input.starts_with(b"-") { |
385 | 0 | input = &input[1..]; |
386 | 0 | let Parsed { input: unconsumed, .. } = |
387 | 0 | self.parse_annotation_value(input)?; |
388 | 0 | input = unconsumed; |
389 | | } |
390 | 0 | Ok(Parsed { value: (), input }) |
391 | 0 | } |
392 | | |
393 | 0 | fn parse_annotation_value<'i>( |
394 | 0 | &self, |
395 | 0 | input: &'i [u8], |
396 | 0 | ) -> Result<Parsed<'i, &'i [u8]>, Error> { |
397 | 0 | let mkvalue = parse::slicer(input); |
398 | 0 | let Parsed { mut input, .. } = |
399 | 0 | self.parse_annotation_value_leading_char(input)?; |
400 | | loop { |
401 | 0 | let Parsed { value: did_consume, input: unconsumed } = |
402 | 0 | self.parse_annotation_value_char(input); |
403 | 0 | if !did_consume { |
404 | 0 | break; |
405 | 0 | } |
406 | 0 | input = unconsumed; |
407 | | } |
408 | 0 | let value = mkvalue(input); |
409 | 0 | Ok(Parsed { value, input }) |
410 | 0 | } |
411 | | |
412 | 0 | fn parse_tz_annotation_leading_char<'i>( |
413 | 0 | &self, |
414 | 0 | input: &'i [u8], |
415 | 0 | ) -> Result<Parsed<'i, ()>, Error> { |
416 | 0 | if input.is_empty() { |
417 | 0 | return Err(err!( |
418 | 0 | "expected the start of an RFC 9557 annotation or IANA \ |
419 | 0 | time zone component name, but found end of input instead", |
420 | 0 | )); |
421 | 0 | } |
422 | 0 | if !matches!(input[0], b'_' | b'.' | b'A'..=b'Z' | b'a'..=b'z') { |
423 | 0 | return Err(err!( |
424 | 0 | "expected ASCII alphabetic byte (or underscore or period) \ |
425 | 0 | at the start of an RFC 9557 annotation or time zone \ |
426 | 0 | component name, but found {:?} instead", |
427 | 0 | escape::Byte(input[0]), |
428 | 0 | )); |
429 | 0 | } |
430 | 0 | Ok(Parsed { value: (), input: &input[1..] }) |
431 | 0 | } |
432 | | |
433 | 0 | fn parse_tz_annotation_char<'i>( |
434 | 0 | &self, |
435 | 0 | input: &'i [u8], |
436 | 0 | ) -> Parsed<'i, bool> { |
437 | 0 | let is_tz_annotation_char = |byte| { |
438 | 0 | matches!( |
439 | 0 | byte, |
440 | 0 | b'_' | b'.' | b'+' | b'-' | b'0'..=b'9' | b'A'..=b'Z' | b'a'..=b'z', |
441 | | ) |
442 | 0 | }; |
443 | 0 | if input.is_empty() || !is_tz_annotation_char(input[0]) { |
444 | 0 | return Parsed { value: false, input }; |
445 | 0 | } |
446 | 0 | Parsed { value: true, input: &input[1..] } |
447 | 0 | } |
448 | | |
449 | 0 | fn parse_annotation_key_leading_char<'i>( |
450 | 0 | &self, |
451 | 0 | input: &'i [u8], |
452 | 0 | ) -> Result<Parsed<'i, ()>, Error> { |
453 | 0 | if input.is_empty() { |
454 | 0 | return Err(err!( |
455 | 0 | "expected the start of an RFC 9557 annotation key, \ |
456 | 0 | but found end of input instead", |
457 | 0 | )); |
458 | 0 | } |
459 | 0 | if !matches!(input[0], b'_' | b'a'..=b'z') { |
460 | 0 | return Err(err!( |
461 | 0 | "expected lowercase alphabetic byte (or underscore) \ |
462 | 0 | at the start of an RFC 9557 annotation key, \ |
463 | 0 | but found {:?} instead", |
464 | 0 | escape::Byte(input[0]), |
465 | 0 | )); |
466 | 0 | } |
467 | 0 | Ok(Parsed { value: (), input: &input[1..] }) |
468 | 0 | } |
469 | | |
470 | 0 | fn parse_annotation_key_char<'i>( |
471 | 0 | &self, |
472 | 0 | input: &'i [u8], |
473 | 0 | ) -> Parsed<'i, bool> { |
474 | 0 | let is_annotation_key_char = |
475 | 0 | |byte| matches!(byte, b'_' | b'-' | b'0'..=b'9' | b'a'..=b'z'); |
476 | 0 | if input.is_empty() || !is_annotation_key_char(input[0]) { |
477 | 0 | return Parsed { value: false, input }; |
478 | 0 | } |
479 | 0 | Parsed { value: true, input: &input[1..] } |
480 | 0 | } |
481 | | |
482 | 0 | fn parse_annotation_value_leading_char<'i>( |
483 | 0 | &self, |
484 | 0 | input: &'i [u8], |
485 | 0 | ) -> Result<Parsed<'i, ()>, Error> { |
486 | 0 | if input.is_empty() { |
487 | 0 | return Err(err!( |
488 | 0 | "expected the start of an RFC 9557 annotation value, \ |
489 | 0 | but found end of input instead", |
490 | 0 | )); |
491 | 0 | } |
492 | 0 | if !matches!(input[0], b'0'..=b'9' | b'A'..=b'Z' | b'a'..=b'z') { |
493 | 0 | return Err(err!( |
494 | 0 | "expected alphanumeric ASCII byte \ |
495 | 0 | at the start of an RFC 9557 annotation value, \ |
496 | 0 | but found {:?} instead", |
497 | 0 | escape::Byte(input[0]), |
498 | 0 | )); |
499 | 0 | } |
500 | 0 | Ok(Parsed { value: (), input: &input[1..] }) |
501 | 0 | } |
502 | | |
503 | 0 | fn parse_annotation_value_char<'i>( |
504 | 0 | &self, |
505 | 0 | input: &'i [u8], |
506 | 0 | ) -> Parsed<'i, bool> { |
507 | 0 | let is_annotation_value_char = |
508 | 0 | |byte| matches!(byte, b'0'..=b'9' | b'A'..=b'Z' | b'a'..=b'z'); |
509 | 0 | if input.is_empty() || !is_annotation_value_char(input[0]) { |
510 | 0 | return Parsed { value: false, input }; |
511 | 0 | } |
512 | 0 | Parsed { value: true, input: &input[1..] } |
513 | 0 | } |
514 | | |
515 | 0 | fn parse_annotation_separator<'i>( |
516 | 0 | &self, |
517 | 0 | input: &'i [u8], |
518 | 0 | ) -> Result<Parsed<'i, ()>, Error> { |
519 | 0 | if input.is_empty() { |
520 | 0 | return Err(err!( |
521 | 0 | "expected an '=' after parsing an RFC 9557 annotation key, \ |
522 | 0 | but found end of input instead", |
523 | 0 | )); |
524 | 0 | } |
525 | 0 | if input[0] != b'=' { |
526 | | // If we see a /, then it's likely the user was trying to insert a |
527 | | // time zone annotation in the wrong place. |
528 | 0 | return Err(if input[0] == b'/' { |
529 | 0 | err!( |
530 | 0 | "expected an '=' after parsing an RFC 9557 annotation \ |
531 | 0 | key, but found / instead (time zone annotations must \ |
532 | 0 | come first)", |
533 | | ) |
534 | | } else { |
535 | 0 | err!( |
536 | 0 | "expected an '=' after parsing an RFC 9557 annotation \ |
537 | 0 | key, but found {:?} instead", |
538 | 0 | escape::Byte(input[0]), |
539 | | ) |
540 | | }); |
541 | 0 | } |
542 | 0 | Ok(Parsed { value: (), input: &input[1..] }) |
543 | 0 | } |
544 | | |
545 | 0 | fn parse_annotation_close<'i>( |
546 | 0 | &self, |
547 | 0 | input: &'i [u8], |
548 | 0 | ) -> Result<Parsed<'i, ()>, Error> { |
549 | 0 | if input.is_empty() { |
550 | 0 | return Err(err!( |
551 | 0 | "expected an ']' after parsing an RFC 9557 annotation key \ |
552 | 0 | and value, but found end of input instead", |
553 | 0 | )); |
554 | 0 | } |
555 | 0 | if input[0] != b']' { |
556 | 0 | return Err(err!( |
557 | 0 | "expected an ']' after parsing an RFC 9557 annotation key \ |
558 | 0 | and value, but found {:?} instead", |
559 | 0 | escape::Byte(input[0]), |
560 | 0 | )); |
561 | 0 | } |
562 | 0 | Ok(Parsed { value: (), input: &input[1..] }) |
563 | 0 | } |
564 | | |
565 | 0 | fn parse_tz_annotation_close<'i>( |
566 | 0 | &self, |
567 | 0 | input: &'i [u8], |
568 | 0 | ) -> Result<Parsed<'i, ()>, Error> { |
569 | 0 | if input.is_empty() { |
570 | 0 | return Err(err!( |
571 | 0 | "expected an ']' after parsing an RFC 9557 time zone \ |
572 | 0 | annotation, but found end of input instead", |
573 | 0 | )); |
574 | 0 | } |
575 | 0 | if input[0] != b']' { |
576 | 0 | return Err(err!( |
577 | 0 | "expected an ']' after parsing an RFC 9557 time zone \ |
578 | 0 | annotation, but found {:?} instead", |
579 | 0 | escape::Byte(input[0]), |
580 | 0 | )); |
581 | 0 | } |
582 | 0 | Ok(Parsed { value: (), input: &input[1..] }) |
583 | 0 | } |
584 | | } |
585 | | |
586 | | #[cfg(test)] |
587 | | mod tests { |
588 | | use super::*; |
589 | | |
590 | | #[test] |
591 | | fn ok_time_zone() { |
592 | | if crate::tz::db().is_definitively_empty() { |
593 | | return; |
594 | | } |
595 | | |
596 | | let p = |input| { |
597 | | Parser::new() |
598 | | .parse(input) |
599 | | .unwrap() |
600 | | .value |
601 | | .to_time_zone_annotation() |
602 | | .unwrap() |
603 | | .map(|ann| (ann.to_time_zone().unwrap(), ann.is_critical())) |
604 | | }; |
605 | | |
606 | | insta::assert_debug_snapshot!(p(b"[America/New_York]"), @r###" |
607 | | Some( |
608 | | ( |
609 | | TimeZone( |
610 | | TZif( |
611 | | "America/New_York", |
612 | | ), |
613 | | ), |
614 | | false, |
615 | | ), |
616 | | ) |
617 | | "###); |
618 | | insta::assert_debug_snapshot!(p(b"[!America/New_York]"), @r###" |
619 | | Some( |
620 | | ( |
621 | | TimeZone( |
622 | | TZif( |
623 | | "America/New_York", |
624 | | ), |
625 | | ), |
626 | | true, |
627 | | ), |
628 | | ) |
629 | | "###); |
630 | | insta::assert_debug_snapshot!(p(b"[america/new_york]"), @r###" |
631 | | Some( |
632 | | ( |
633 | | TimeZone( |
634 | | TZif( |
635 | | "America/New_York", |
636 | | ), |
637 | | ), |
638 | | false, |
639 | | ), |
640 | | ) |
641 | | "###); |
642 | | insta::assert_debug_snapshot!(p(b"[+25:59]"), @r###" |
643 | | Some( |
644 | | ( |
645 | | TimeZone( |
646 | | 25:59:00, |
647 | | ), |
648 | | false, |
649 | | ), |
650 | | ) |
651 | | "###); |
652 | | insta::assert_debug_snapshot!(p(b"[-25:59]"), @r###" |
653 | | Some( |
654 | | ( |
655 | | TimeZone( |
656 | | -25:59:00, |
657 | | ), |
658 | | false, |
659 | | ), |
660 | | ) |
661 | | "###); |
662 | | } |
663 | | |
664 | | #[test] |
665 | | fn ok_empty() { |
666 | | let p = |input| Parser::new().parse(input).unwrap(); |
667 | | |
668 | | insta::assert_debug_snapshot!(p(b""), @r###" |
669 | | Parsed { |
670 | | value: ParsedAnnotations { |
671 | | input: "", |
672 | | time_zone: None, |
673 | | }, |
674 | | input: "", |
675 | | } |
676 | | "###); |
677 | | insta::assert_debug_snapshot!(p(b"blah"), @r###" |
678 | | Parsed { |
679 | | value: ParsedAnnotations { |
680 | | input: "", |
681 | | time_zone: None, |
682 | | }, |
683 | | input: "blah", |
684 | | } |
685 | | "###); |
686 | | } |
687 | | |
688 | | #[test] |
689 | | fn ok_unsupported() { |
690 | | let p = |input| Parser::new().parse(input).unwrap(); |
691 | | |
692 | | insta::assert_debug_snapshot!( |
693 | | p(b"[u-ca=chinese]"), |
694 | | @r###" |
695 | | Parsed { |
696 | | value: ParsedAnnotations { |
697 | | input: "[u-ca=chinese]", |
698 | | time_zone: None, |
699 | | }, |
700 | | input: "", |
701 | | } |
702 | | "###, |
703 | | ); |
704 | | insta::assert_debug_snapshot!( |
705 | | p(b"[u-ca=chinese-japanese]"), |
706 | | @r###" |
707 | | Parsed { |
708 | | value: ParsedAnnotations { |
709 | | input: "[u-ca=chinese-japanese]", |
710 | | time_zone: None, |
711 | | }, |
712 | | input: "", |
713 | | } |
714 | | "###, |
715 | | ); |
716 | | insta::assert_debug_snapshot!( |
717 | | p(b"[u-ca=chinese-japanese-russian]"), |
718 | | @r###" |
719 | | Parsed { |
720 | | value: ParsedAnnotations { |
721 | | input: "[u-ca=chinese-japanese-russian]", |
722 | | time_zone: None, |
723 | | }, |
724 | | input: "", |
725 | | } |
726 | | "###, |
727 | | ); |
728 | | } |
729 | | |
730 | | #[test] |
731 | | fn ok_iana() { |
732 | | let p = |input| Parser::new().parse(input).unwrap(); |
733 | | |
734 | | insta::assert_debug_snapshot!(p(b"[America/New_York]"), @r###" |
735 | | Parsed { |
736 | | value: ParsedAnnotations { |
737 | | input: "[America/New_York]", |
738 | | time_zone: Some( |
739 | | Named { |
740 | | critical: false, |
741 | | name: "America/New_York", |
742 | | }, |
743 | | ), |
744 | | }, |
745 | | input: "", |
746 | | } |
747 | | "###); |
748 | | insta::assert_debug_snapshot!(p(b"[!America/New_York]"), @r###" |
749 | | Parsed { |
750 | | value: ParsedAnnotations { |
751 | | input: "[!America/New_York]", |
752 | | time_zone: Some( |
753 | | Named { |
754 | | critical: true, |
755 | | name: "America/New_York", |
756 | | }, |
757 | | ), |
758 | | }, |
759 | | input: "", |
760 | | } |
761 | | "###); |
762 | | insta::assert_debug_snapshot!(p(b"[UTC]"), @r###" |
763 | | Parsed { |
764 | | value: ParsedAnnotations { |
765 | | input: "[UTC]", |
766 | | time_zone: Some( |
767 | | Named { |
768 | | critical: false, |
769 | | name: "UTC", |
770 | | }, |
771 | | ), |
772 | | }, |
773 | | input: "", |
774 | | } |
775 | | "###); |
776 | | insta::assert_debug_snapshot!(p(b"[.._foo_../.0+-]"), @r###" |
777 | | Parsed { |
778 | | value: ParsedAnnotations { |
779 | | input: "[.._foo_../.0+-]", |
780 | | time_zone: Some( |
781 | | Named { |
782 | | critical: false, |
783 | | name: ".._foo_../.0+-", |
784 | | }, |
785 | | ), |
786 | | }, |
787 | | input: "", |
788 | | } |
789 | | "###); |
790 | | } |
791 | | |
792 | | #[test] |
793 | | fn ok_offset() { |
794 | | let p = |input| Parser::new().parse(input).unwrap(); |
795 | | |
796 | | insta::assert_debug_snapshot!(p(b"[-00]"), @r###" |
797 | | Parsed { |
798 | | value: ParsedAnnotations { |
799 | | input: "[-00]", |
800 | | time_zone: Some( |
801 | | Offset { |
802 | | critical: false, |
803 | | offset: ParsedOffset { |
804 | | kind: Numeric( |
805 | | -00, |
806 | | ), |
807 | | }, |
808 | | }, |
809 | | ), |
810 | | }, |
811 | | input: "", |
812 | | } |
813 | | "###); |
814 | | insta::assert_debug_snapshot!(p(b"[+00]"), @r###" |
815 | | Parsed { |
816 | | value: ParsedAnnotations { |
817 | | input: "[+00]", |
818 | | time_zone: Some( |
819 | | Offset { |
820 | | critical: false, |
821 | | offset: ParsedOffset { |
822 | | kind: Numeric( |
823 | | +00, |
824 | | ), |
825 | | }, |
826 | | }, |
827 | | ), |
828 | | }, |
829 | | input: "", |
830 | | } |
831 | | "###); |
832 | | insta::assert_debug_snapshot!(p(b"[-05]"), @r###" |
833 | | Parsed { |
834 | | value: ParsedAnnotations { |
835 | | input: "[-05]", |
836 | | time_zone: Some( |
837 | | Offset { |
838 | | critical: false, |
839 | | offset: ParsedOffset { |
840 | | kind: Numeric( |
841 | | -05, |
842 | | ), |
843 | | }, |
844 | | }, |
845 | | ), |
846 | | }, |
847 | | input: "", |
848 | | } |
849 | | "###); |
850 | | insta::assert_debug_snapshot!(p(b"[!+05:12]"), @r###" |
851 | | Parsed { |
852 | | value: ParsedAnnotations { |
853 | | input: "[!+05:12]", |
854 | | time_zone: Some( |
855 | | Offset { |
856 | | critical: true, |
857 | | offset: ParsedOffset { |
858 | | kind: Numeric( |
859 | | +05:12, |
860 | | ), |
861 | | }, |
862 | | }, |
863 | | ), |
864 | | }, |
865 | | input: "", |
866 | | } |
867 | | "###); |
868 | | } |
869 | | |
870 | | #[test] |
871 | | fn ok_iana_unsupported() { |
872 | | let p = |input| Parser::new().parse(input).unwrap(); |
873 | | |
874 | | insta::assert_debug_snapshot!( |
875 | | p(b"[America/New_York][u-ca=chinese-japanese-russian]"), |
876 | | @r###" |
877 | | Parsed { |
878 | | value: ParsedAnnotations { |
879 | | input: "[America/New_York][u-ca=chinese-japanese-russian]", |
880 | | time_zone: Some( |
881 | | Named { |
882 | | critical: false, |
883 | | name: "America/New_York", |
884 | | }, |
885 | | ), |
886 | | }, |
887 | | input: "", |
888 | | } |
889 | | "###, |
890 | | ); |
891 | | } |
892 | | |
893 | | #[test] |
894 | | fn err_iana() { |
895 | | insta::assert_snapshot!( |
896 | | Parser::new().parse(b"[0/Foo]").unwrap_err(), |
897 | | @r###"expected ASCII alphabetic byte (or underscore or period) at the start of an RFC 9557 annotation or time zone component name, but found "0" instead"###, |
898 | | ); |
899 | | insta::assert_snapshot!( |
900 | | Parser::new().parse(b"[Foo/0Bar]").unwrap_err(), |
901 | | @r###"expected ASCII alphabetic byte (or underscore or period) at the start of an RFC 9557 annotation or time zone component name, but found "0" instead"###, |
902 | | ); |
903 | | } |
904 | | |
905 | | #[test] |
906 | | fn err_offset() { |
907 | | insta::assert_snapshot!( |
908 | | Parser::new().parse(b"[+").unwrap_err(), |
909 | | @r###"failed to parse hours in UTC numeric offset "+": expected two digit hour after sign, but found end of input"###, |
910 | | ); |
911 | | insta::assert_snapshot!( |
912 | | Parser::new().parse(b"[+26]").unwrap_err(), |
913 | | @r###"failed to parse hours in UTC numeric offset "+26]": offset hours are not valid: parameter 'hours' with value 26 is not in the required range of 0..=25"###, |
914 | | ); |
915 | | insta::assert_snapshot!( |
916 | | Parser::new().parse(b"[-26]").unwrap_err(), |
917 | | @r###"failed to parse hours in UTC numeric offset "-26]": offset hours are not valid: parameter 'hours' with value 26 is not in the required range of 0..=25"###, |
918 | | ); |
919 | | insta::assert_snapshot!( |
920 | | Parser::new().parse(b"[+05:12:34]").unwrap_err(), |
921 | | @r###"subminute precision for UTC numeric offset "+05:12:34]" is not enabled in this context (must provide only integral minutes)"###, |
922 | | ); |
923 | | insta::assert_snapshot!( |
924 | | Parser::new().parse(b"[+05:12:34.123456789]").unwrap_err(), |
925 | | @r###"subminute precision for UTC numeric offset "+05:12:34.123456789]" is not enabled in this context (must provide only integral minutes)"###, |
926 | | ); |
927 | | } |
928 | | |
929 | | #[test] |
930 | | fn err_critical_unsupported() { |
931 | | insta::assert_snapshot!( |
932 | | Parser::new().parse(b"[!u-ca=chinese]").unwrap_err(), |
933 | | @r###"found unsupported RFC 9557 annotation with key "u-ca" with the critical flag ('!') set"###, |
934 | | ); |
935 | | } |
936 | | |
937 | | #[test] |
938 | | fn err_key_leading_char() { |
939 | | insta::assert_snapshot!( |
940 | | Parser::new().parse(b"[").unwrap_err(), |
941 | | @"expected the start of an RFC 9557 annotation or IANA time zone component name, but found end of input instead", |
942 | | ); |
943 | | insta::assert_snapshot!( |
944 | | Parser::new().parse(b"[&").unwrap_err(), |
945 | | @r###"expected ASCII alphabetic byte (or underscore or period) at the start of an RFC 9557 annotation or time zone component name, but found "&" instead"###, |
946 | | ); |
947 | | insta::assert_snapshot!( |
948 | | Parser::new().parse(b"[Foo][").unwrap_err(), |
949 | | @"expected the start of an RFC 9557 annotation key, but found end of input instead", |
950 | | ); |
951 | | insta::assert_snapshot!( |
952 | | Parser::new().parse(b"[Foo][&").unwrap_err(), |
953 | | @r###"expected lowercase alphabetic byte (or underscore) at the start of an RFC 9557 annotation key, but found "&" instead"###, |
954 | | ); |
955 | | } |
956 | | |
957 | | #[test] |
958 | | fn err_separator() { |
959 | | insta::assert_snapshot!( |
960 | | Parser::new().parse(b"[abc").unwrap_err(), |
961 | | @"expected an ']' after parsing an RFC 9557 time zone annotation, but found end of input instead", |
962 | | ); |
963 | | insta::assert_snapshot!( |
964 | | Parser::new().parse(b"[_abc").unwrap_err(), |
965 | | @"expected an ']' after parsing an RFC 9557 time zone annotation, but found end of input instead", |
966 | | ); |
967 | | insta::assert_snapshot!( |
968 | | Parser::new().parse(b"[abc^").unwrap_err(), |
969 | | @r###"expected an ']' after parsing an RFC 9557 time zone annotation, but found "^" instead"###, |
970 | | ); |
971 | | insta::assert_snapshot!( |
972 | | Parser::new().parse(b"[Foo][abc").unwrap_err(), |
973 | | @"expected an '=' after parsing an RFC 9557 annotation key, but found end of input instead", |
974 | | ); |
975 | | insta::assert_snapshot!( |
976 | | Parser::new().parse(b"[Foo][_abc").unwrap_err(), |
977 | | @"expected an '=' after parsing an RFC 9557 annotation key, but found end of input instead", |
978 | | ); |
979 | | insta::assert_snapshot!( |
980 | | Parser::new().parse(b"[Foo][abc^").unwrap_err(), |
981 | | @r###"expected an '=' after parsing an RFC 9557 annotation key, but found "^" instead"###, |
982 | | ); |
983 | | } |
984 | | |
985 | | #[test] |
986 | | fn err_value() { |
987 | | insta::assert_snapshot!( |
988 | | Parser::new().parse(b"[abc=").unwrap_err(), |
989 | | @"expected the start of an RFC 9557 annotation value, but found end of input instead", |
990 | | ); |
991 | | insta::assert_snapshot!( |
992 | | Parser::new().parse(b"[_abc=").unwrap_err(), |
993 | | @"expected the start of an RFC 9557 annotation value, but found end of input instead", |
994 | | ); |
995 | | insta::assert_snapshot!( |
996 | | Parser::new().parse(b"[abc=^").unwrap_err(), |
997 | | @r###"expected alphanumeric ASCII byte at the start of an RFC 9557 annotation value, but found "^" instead"###, |
998 | | ); |
999 | | insta::assert_snapshot!( |
1000 | | Parser::new().parse(b"[abc=]").unwrap_err(), |
1001 | | @r###"expected alphanumeric ASCII byte at the start of an RFC 9557 annotation value, but found "]" instead"###, |
1002 | | ); |
1003 | | } |
1004 | | |
1005 | | #[test] |
1006 | | fn err_close() { |
1007 | | insta::assert_snapshot!( |
1008 | | Parser::new().parse(b"[abc=123").unwrap_err(), |
1009 | | @"expected an ']' after parsing an RFC 9557 annotation key and value, but found end of input instead", |
1010 | | ); |
1011 | | insta::assert_snapshot!( |
1012 | | Parser::new().parse(b"[abc=123*").unwrap_err(), |
1013 | | @r###"expected an ']' after parsing an RFC 9557 annotation key and value, but found "*" instead"###, |
1014 | | ); |
1015 | | } |
1016 | | |
1017 | | #[cfg(feature = "std")] |
1018 | | #[test] |
1019 | | fn err_time_zone_db_lookup() { |
1020 | | // The error message snapshotted below can vary based on tzdb |
1021 | | // config, so only run this when we know we've got a real tzdb. |
1022 | | if crate::tz::db().is_definitively_empty() { |
1023 | | return; |
1024 | | } |
1025 | | |
1026 | | let p = |input| { |
1027 | | Parser::new() |
1028 | | .parse(input) |
1029 | | .unwrap() |
1030 | | .value |
1031 | | .to_time_zone_annotation() |
1032 | | .unwrap() |
1033 | | .unwrap() |
1034 | | .to_time_zone() |
1035 | | .unwrap_err() |
1036 | | }; |
1037 | | |
1038 | | insta::assert_snapshot!( |
1039 | | p(b"[Foo]"), |
1040 | | @"failed to find time zone `Foo` in time zone database", |
1041 | | ); |
1042 | | } |
1043 | | |
1044 | | #[test] |
1045 | | fn err_repeated_time_zone() { |
1046 | | let p = |input| Parser::new().parse(input).unwrap_err(); |
1047 | | insta::assert_snapshot!( |
1048 | | p(b"[america/new_york][america/new_york]"), |
1049 | | @"expected an '=' after parsing an RFC 9557 annotation key, but found / instead (time zone annotations must come first)", |
1050 | | ); |
1051 | | } |
1052 | | } |