/src/chrono/src/format/scan.rs
Line | Count | Source |
1 | | // This is a part of Chrono. |
2 | | // See README.md and LICENSE.txt for details. |
3 | | |
4 | | /*! |
5 | | * Various scanning routines for the parser. |
6 | | */ |
7 | | |
8 | | use super::{INVALID, OUT_OF_RANGE, ParseResult, TOO_SHORT}; |
9 | | use crate::Weekday; |
10 | | |
11 | | /// Tries to parse the non-negative number from `min` to `max` digits. |
12 | | /// |
13 | | /// The absence of digits at all is an unconditional error. |
14 | | /// More than `max` digits are consumed up to the first `max` digits. |
15 | | /// Any number that does not fit in `i64` is an error. |
16 | | #[inline] |
17 | 56.6k | pub(super) fn number(s: &str, min: usize, max: usize) -> ParseResult<(&str, i64)> { |
18 | 56.6k | assert!(min <= max); |
19 | | |
20 | | // We are only interested in ascii numbers, so we can work with the `str` as bytes. We stop on |
21 | | // the first non-numeric byte, which may be another ascii character or beginning of multi-byte |
22 | | // UTF-8 character. |
23 | 56.6k | let bytes = s.as_bytes(); |
24 | 56.6k | if bytes.len() < min { |
25 | 355 | return Err(TOO_SHORT); |
26 | 56.2k | } |
27 | | |
28 | 56.2k | let mut n = 0i64; |
29 | 1.76M | for (i, c) in bytes.iter().take(max).cloned().enumerate() { |
30 | | // cloned() = copied() |
31 | 1.76M | if !c.is_ascii_digit() { |
32 | 35.6k | if i < min { |
33 | 934 | return Err(INVALID); |
34 | | } else { |
35 | 34.7k | return Ok((&s[i..], n)); |
36 | | } |
37 | 1.72M | } |
38 | | |
39 | 1.72M | n = match n.checked_mul(10).and_then(|n| n.checked_add((c - b'0') as i64)) { |
40 | 1.72M | Some(n) => n, |
41 | 20 | None => return Err(OUT_OF_RANGE), |
42 | | }; |
43 | | } |
44 | | |
45 | 20.6k | Ok((&s[core::cmp::min(max, bytes.len())..], n)) |
46 | 56.6k | } |
47 | | |
48 | | /// Tries to consume at least one digits as a fractional second. |
49 | | /// Returns the number of whole nanoseconds (0--999,999,999). |
50 | 13.1k | pub(super) fn nanosecond(s: &str) -> ParseResult<(&str, u32)> { |
51 | | // record the number of digits consumed for later scaling. |
52 | 13.1k | let origlen = s.len(); |
53 | 13.1k | let (s, v) = number(s, 1, 9)?; |
54 | 13.1k | let v = u32::try_from(v).expect("999,999,999 should fit u32"); |
55 | 13.1k | let consumed = origlen - s.len(); |
56 | | |
57 | | // scale the number accordingly. |
58 | | const SCALE: [u32; 10] = |
59 | | [0, 100_000_000, 10_000_000, 1_000_000, 100_000, 10_000, 1_000, 100, 10, 1]; |
60 | 13.1k | let v = v.checked_mul(SCALE[consumed]).ok_or(OUT_OF_RANGE)?; |
61 | | |
62 | | // if there are more than 9 digits, skip next digits. |
63 | 20.2k | let s = s.trim_start_matches(|c: char| c.is_ascii_digit()); |
64 | | |
65 | 13.1k | Ok((s, v)) |
66 | 13.1k | } |
67 | | |
68 | | /// Tries to consume a fixed number of digits as a fractional second. |
69 | | /// Returns the number of whole nanoseconds (0--999,999,999). |
70 | 2.89k | pub(super) fn nanosecond_fixed(s: &str, digits: usize) -> ParseResult<(&str, i64)> { |
71 | | // record the number of digits consumed for later scaling. |
72 | 2.89k | let (s, v) = number(s, digits, digits)?; |
73 | | |
74 | | // scale the number accordingly. |
75 | | static SCALE: [i64; 10] = |
76 | | [0, 100_000_000, 10_000_000, 1_000_000, 100_000, 10_000, 1_000, 100, 10, 1]; |
77 | 2.51k | let v = v.checked_mul(SCALE[digits]).ok_or(OUT_OF_RANGE)?; |
78 | | |
79 | 2.51k | Ok((s, v)) |
80 | 2.89k | } |
81 | | |
82 | | /// Tries to parse the month index (0 through 11) with the first three ASCII letters. |
83 | 3.64k | pub(super) fn short_month0(s: &str) -> ParseResult<(&str, u8)> { |
84 | 3.64k | if s.len() < 3 { |
85 | 42 | return Err(TOO_SHORT); |
86 | 3.60k | } |
87 | 3.60k | let buf = s.as_bytes(); |
88 | 3.60k | let month0 = match (buf[0] | 32, buf[1] | 32, buf[2] | 32) { |
89 | 603 | (b'j', b'a', b'n') => 0, |
90 | 121 | (b'f', b'e', b'b') => 1, |
91 | 278 | (b'm', b'a', b'r') => 2, |
92 | 88 | (b'a', b'p', b'r') => 3, |
93 | 120 | (b'm', b'a', b'y') => 4, |
94 | 126 | (b'j', b'u', b'n') => 5, |
95 | 208 | (b'j', b'u', b'l') => 6, |
96 | 95 | (b'a', b'u', b'g') => 7, |
97 | 154 | (b's', b'e', b'p') => 8, |
98 | 572 | (b'o', b'c', b't') => 9, |
99 | 386 | (b'n', b'o', b'v') => 10, |
100 | 621 | (b'd', b'e', b'c') => 11, |
101 | 233 | _ => return Err(INVALID), |
102 | | }; |
103 | 3.37k | Ok((&s[3..], month0)) |
104 | 3.64k | } |
105 | | |
106 | | /// Tries to parse the weekday with the first three ASCII letters. |
107 | 5.21k | pub(super) fn short_weekday(s: &str) -> ParseResult<(&str, Weekday)> { |
108 | 5.21k | if s.len() < 3 { |
109 | 138 | return Err(TOO_SHORT); |
110 | 5.07k | } |
111 | 5.07k | let buf = s.as_bytes(); |
112 | 5.07k | let weekday = match (buf[0] | 32, buf[1] | 32, buf[2] | 32) { |
113 | 147 | (b'm', b'o', b'n') => Weekday::Mon, |
114 | 170 | (b't', b'u', b'e') => Weekday::Tue, |
115 | 81 | (b'w', b'e', b'd') => Weekday::Wed, |
116 | 280 | (b't', b'h', b'u') => Weekday::Thu, |
117 | 1.01k | (b'f', b'r', b'i') => Weekday::Fri, |
118 | 390 | (b's', b'a', b't') => Weekday::Sat, |
119 | 196 | (b's', b'u', b'n') => Weekday::Sun, |
120 | 2.80k | _ => return Err(INVALID), |
121 | | }; |
122 | 2.27k | Ok((&s[3..], weekday)) |
123 | 5.21k | } |
124 | | |
125 | | /// Tries to parse the month index (0 through 11) with short or long month names. |
126 | | /// It prefers long month names to short month names when both are possible. |
127 | 1.46k | pub(super) fn short_or_long_month0(s: &str) -> ParseResult<(&str, u8)> { |
128 | | // lowercased month names, minus first three chars |
129 | | static LONG_MONTH_SUFFIXES: [&[u8]; 12] = [ |
130 | | b"uary", b"ruary", b"ch", b"il", b"", b"e", b"y", b"ust", b"tember", b"ober", b"ember", |
131 | | b"ember", |
132 | | ]; |
133 | | |
134 | 1.46k | let (mut s, month0) = short_month0(s)?; |
135 | | |
136 | | // tries to consume the suffix if possible |
137 | 1.38k | let suffix = LONG_MONTH_SUFFIXES[month0 as usize]; |
138 | 1.38k | if s.len() >= suffix.len() && s.as_bytes()[..suffix.len()].eq_ignore_ascii_case(suffix) { |
139 | 220 | s = &s[suffix.len()..]; |
140 | 1.16k | } |
141 | | |
142 | 1.38k | Ok((s, month0)) |
143 | 1.46k | } |
144 | | |
145 | | /// Tries to parse the weekday with short or long weekday names. |
146 | | /// It prefers long weekday names to short weekday names when both are possible. |
147 | 1.49k | pub(super) fn short_or_long_weekday(s: &str) -> ParseResult<(&str, Weekday)> { |
148 | | // lowercased weekday names, minus first three chars |
149 | | static LONG_WEEKDAY_SUFFIXES: [&[u8]; 7] = |
150 | | [b"day", b"sday", b"nesday", b"rsday", b"day", b"urday", b"day"]; |
151 | | |
152 | 1.49k | let (mut s, weekday) = short_weekday(s)?; |
153 | | |
154 | | // tries to consume the suffix if possible |
155 | 1.44k | let suffix = LONG_WEEKDAY_SUFFIXES[weekday.num_days_from_monday() as usize]; |
156 | 1.44k | if s.len() >= suffix.len() && s.as_bytes()[..suffix.len()].eq_ignore_ascii_case(suffix) { |
157 | 523 | s = &s[suffix.len()..]; |
158 | 919 | } |
159 | | |
160 | 1.44k | Ok((s, weekday)) |
161 | 1.49k | } |
162 | | |
163 | | /// Tries to consume exactly one given character. |
164 | 2.45k | pub(super) fn char(s: &str, c1: u8) -> ParseResult<&str> { |
165 | 2.45k | match s.as_bytes().first() { |
166 | 2.41k | Some(&c) if c == c1 => Ok(&s[1..]), |
167 | 1.10k | Some(_) => Err(INVALID), |
168 | 41 | None => Err(TOO_SHORT), |
169 | | } |
170 | 2.45k | } |
171 | | |
172 | | /// Tries to consume one or more whitespace. |
173 | 5.83k | pub(super) fn space(s: &str) -> ParseResult<&str> { |
174 | 5.83k | let s_ = s.trim_start(); |
175 | 5.83k | if s_.len() < s.len() { |
176 | 5.01k | Ok(s_) |
177 | 822 | } else if s.is_empty() { |
178 | 118 | Err(TOO_SHORT) |
179 | | } else { |
180 | 704 | Err(INVALID) |
181 | | } |
182 | 5.83k | } |
183 | | |
184 | | /// Consumes any number (including zero) of colon or spaces. |
185 | 1.86k | pub(crate) fn colon_or_space(s: &str) -> ParseResult<&str> { |
186 | 3.85k | Ok(s.trim_start_matches(|c: char| c == ':' || c.is_whitespace())) |
187 | 1.86k | } |
188 | | |
189 | | /// Parse a timezone from `s` and return the offset in seconds. |
190 | | /// |
191 | | /// The `consume_colon` function is used to parse a mandatory or optional `:` |
192 | | /// separator between hours offset and minutes offset. |
193 | | /// |
194 | | /// The `allow_missing_minutes` flag allows the timezone minutes offset to be |
195 | | /// missing from `s`. |
196 | | /// |
197 | | /// The `allow_tz_minus_sign` flag allows the timezone offset negative character |
198 | | /// to also be `−` MINUS SIGN (U+2212) in addition to the typical |
199 | | /// ASCII-compatible `-` HYPHEN-MINUS (U+2D). |
200 | | /// This is part of [RFC 3339 & ISO 8601]. |
201 | | /// |
202 | | /// [RFC 3339 & ISO 8601]: https://en.wikipedia.org/w/index.php?title=ISO_8601&oldid=1114309368#Time_offsets_from_UTC |
203 | 5.89k | pub(crate) fn timezone_offset<F>( |
204 | 5.89k | mut s: &str, |
205 | 5.89k | mut consume_colon: F, |
206 | 5.89k | allow_zulu: bool, |
207 | 5.89k | allow_missing_minutes: bool, |
208 | 5.89k | allow_tz_minus_sign: bool, |
209 | 5.89k | ) -> ParseResult<(&str, i32)> |
210 | 5.89k | where |
211 | 5.89k | F: FnMut(&str) -> ParseResult<&str>, |
212 | | { |
213 | 5.89k | if allow_zulu { |
214 | 3.47k | if let Some(&b'Z' | &b'z') = s.as_bytes().first() { |
215 | 2.37k | return Ok((&s[1..], 0)); |
216 | 1.09k | } |
217 | 2.42k | } |
218 | | |
219 | 4.89k | const fn digits(s: &str) -> ParseResult<(u8, u8)> { |
220 | 4.89k | let b = s.as_bytes(); |
221 | 4.89k | if b.len() < 2 { Err(TOO_SHORT) } else { Ok((b[0], b[1])) } |
222 | 4.89k | } |
223 | 3.51k | let negative = match s.chars().next() { |
224 | | Some('+') => { |
225 | | // PLUS SIGN (U+2B) |
226 | 634 | s = &s['+'.len_utf8()..]; |
227 | | |
228 | 634 | false |
229 | | } |
230 | | Some('-') => { |
231 | | // HYPHEN-MINUS (U+2D) |
232 | 1.85k | s = &s['-'.len_utf8()..]; |
233 | | |
234 | 1.85k | true |
235 | | } |
236 | | Some('−') => { |
237 | | // MINUS SIGN (U+2212) |
238 | 106 | if !allow_tz_minus_sign { |
239 | 1 | return Err(INVALID); |
240 | 105 | } |
241 | 105 | s = &s['−'.len_utf8()..]; |
242 | | |
243 | 105 | true |
244 | | } |
245 | 614 | Some(_) => return Err(INVALID), |
246 | 310 | None => return Err(TOO_SHORT), |
247 | | }; |
248 | | |
249 | | // hours (00--99) |
250 | 2.59k | let hours = match digits(s)? { |
251 | 2.47k | (h1 @ b'0'..=b'9', h2 @ b'0'..=b'9') => i32::from((h1 - b'0') * 10 + (h2 - b'0')), |
252 | 124 | _ => return Err(INVALID), |
253 | | }; |
254 | 2.37k | s = &s[2..]; |
255 | | |
256 | | // colons (and possibly other separators) |
257 | 2.37k | s = consume_colon(s)?; |
258 | | |
259 | | // minutes (00--59) |
260 | | // if the next two items are digits then we have to add minutes |
261 | 2.30k | let minutes = if let Ok(ds) = digits(s) { |
262 | 2.11k | match ds { |
263 | 2.06k | (m1 @ b'0'..=b'5', m2 @ b'0'..=b'9') => i32::from((m1 - b'0') * 10 + (m2 - b'0')), |
264 | 167 | (b'6'..=b'9', b'0'..=b'9') => return Err(OUT_OF_RANGE), |
265 | 245 | _ => return Err(INVALID), |
266 | | } |
267 | 191 | } else if allow_missing_minutes { |
268 | 75 | 0 |
269 | | } else { |
270 | 116 | return Err(TOO_SHORT); |
271 | | }; |
272 | 1.92k | s = match s.len() { |
273 | 1.92k | len if len >= 2 => &s[2..], |
274 | 74 | 0 => s, |
275 | 1 | _ => return Err(TOO_SHORT), |
276 | | }; |
277 | | |
278 | 1.92k | let seconds = hours * 3600 + minutes * 60; |
279 | 1.92k | Ok((s, if negative { -seconds } else { seconds })) |
280 | 5.89k | } chrono::format::scan::timezone_offset::<chrono::format::scan::timezone_offset_2822::{closure#2}>Line | Count | Source | 203 | 474 | pub(crate) fn timezone_offset<F>( | 204 | 474 | mut s: &str, | 205 | 474 | mut consume_colon: F, | 206 | 474 | allow_zulu: bool, | 207 | 474 | allow_missing_minutes: bool, | 208 | 474 | allow_tz_minus_sign: bool, | 209 | 474 | ) -> ParseResult<(&str, i32)> | 210 | 474 | where | 211 | 474 | F: FnMut(&str) -> ParseResult<&str>, | 212 | | { | 213 | 474 | if allow_zulu { | 214 | 0 | if let Some(&b'Z' | &b'z') = s.as_bytes().first() { | 215 | 0 | return Ok((&s[1..], 0)); | 216 | 0 | } | 217 | 474 | } | 218 | | | 219 | | const fn digits(s: &str) -> ParseResult<(u8, u8)> { | 220 | | let b = s.as_bytes(); | 221 | | if b.len() < 2 { Err(TOO_SHORT) } else { Ok((b[0], b[1])) } | 222 | | } | 223 | 474 | let negative = match s.chars().next() { | 224 | | Some('+') => { | 225 | | // PLUS SIGN (U+2B) | 226 | 162 | s = &s['+'.len_utf8()..]; | 227 | | | 228 | 162 | false | 229 | | } | 230 | | Some('-') => { | 231 | | // HYPHEN-MINUS (U+2D) | 232 | 135 | s = &s['-'.len_utf8()..]; | 233 | | | 234 | 135 | true | 235 | | } | 236 | | Some('−') => { | 237 | | // MINUS SIGN (U+2212) | 238 | 1 | if !allow_tz_minus_sign { | 239 | 1 | return Err(INVALID); | 240 | 0 | } | 241 | 0 | s = &s['−'.len_utf8()..]; | 242 | | | 243 | 0 | true | 244 | | } | 245 | 157 | Some(_) => return Err(INVALID), | 246 | 19 | None => return Err(TOO_SHORT), | 247 | | }; | 248 | | | 249 | | // hours (00--99) | 250 | 297 | let hours = match digits(s)? { | 251 | 273 | (h1 @ b'0'..=b'9', h2 @ b'0'..=b'9') => i32::from((h1 - b'0') * 10 + (h2 - b'0')), | 252 | 30 | _ => return Err(INVALID), | 253 | | }; | 254 | 247 | s = &s[2..]; | 255 | | | 256 | | // colons (and possibly other separators) | 257 | 247 | s = consume_colon(s)?; | 258 | | | 259 | | // minutes (00--59) | 260 | | // if the next two items are digits then we have to add minutes | 261 | 247 | let minutes = if let Ok(ds) = digits(s) { | 262 | 233 | match ds { | 263 | 213 | (m1 @ b'0'..=b'5', m2 @ b'0'..=b'9') => i32::from((m1 - b'0') * 10 + (m2 - b'0')), | 264 | 57 | (b'6'..=b'9', b'0'..=b'9') => return Err(OUT_OF_RANGE), | 265 | 89 | _ => return Err(INVALID), | 266 | | } | 267 | 14 | } else if allow_missing_minutes { | 268 | 0 | 0 | 269 | | } else { | 270 | 14 | return Err(TOO_SHORT); | 271 | | }; | 272 | 136 | s = match s.len() { | 273 | 136 | len if len >= 2 => &s[2..], | 274 | 0 | 0 => s, | 275 | 0 | _ => return Err(TOO_SHORT), | 276 | | }; | 277 | | | 278 | 136 | let seconds = hours * 3600 + minutes * 60; | 279 | 136 | Ok((s, if negative { -seconds } else { seconds })) | 280 | 474 | } |
chrono::format::scan::timezone_offset::<chrono::format::parse::parse_rfc3339::{closure#0}>Line | Count | Source | 203 | 796 | pub(crate) fn timezone_offset<F>( | 204 | 796 | mut s: &str, | 205 | 796 | mut consume_colon: F, | 206 | 796 | allow_zulu: bool, | 207 | 796 | allow_missing_minutes: bool, | 208 | 796 | allow_tz_minus_sign: bool, | 209 | 796 | ) -> ParseResult<(&str, i32)> | 210 | 796 | where | 211 | 796 | F: FnMut(&str) -> ParseResult<&str>, | 212 | | { | 213 | 796 | if allow_zulu { | 214 | 796 | if let Some(&b'Z' | &b'z') = s.as_bytes().first() { | 215 | 48 | return Ok((&s[1..], 0)); | 216 | 748 | } | 217 | 0 | } | 218 | | | 219 | | const fn digits(s: &str) -> ParseResult<(u8, u8)> { | 220 | | let b = s.as_bytes(); | 221 | | if b.len() < 2 { Err(TOO_SHORT) } else { Ok((b[0], b[1])) } | 222 | | } | 223 | 748 | let negative = match s.chars().next() { | 224 | | Some('+') => { | 225 | | // PLUS SIGN (U+2B) | 226 | 83 | s = &s['+'.len_utf8()..]; | 227 | | | 228 | 83 | false | 229 | | } | 230 | | Some('-') => { | 231 | | // HYPHEN-MINUS (U+2D) | 232 | 206 | s = &s['-'.len_utf8()..]; | 233 | | | 234 | 206 | true | 235 | | } | 236 | | Some('−') => { | 237 | | // MINUS SIGN (U+2212) | 238 | 39 | if !allow_tz_minus_sign { | 239 | 0 | return Err(INVALID); | 240 | 39 | } | 241 | 39 | s = &s['−'.len_utf8()..]; | 242 | | | 243 | 39 | true | 244 | | } | 245 | 294 | Some(_) => return Err(INVALID), | 246 | 126 | None => return Err(TOO_SHORT), | 247 | | }; | 248 | | | 249 | | // hours (00--99) | 250 | 328 | let hours = match digits(s)? { | 251 | 287 | (h1 @ b'0'..=b'9', h2 @ b'0'..=b'9') => i32::from((h1 - b'0') * 10 + (h2 - b'0')), | 252 | 42 | _ => return Err(INVALID), | 253 | | }; | 254 | 258 | s = &s[2..]; | 255 | | | 256 | | // colons (and possibly other separators) | 257 | 258 | s = consume_colon(s)?; | 258 | | | 259 | | // minutes (00--59) | 260 | | // if the next two items are digits then we have to add minutes | 261 | 186 | let minutes = if let Ok(ds) = digits(s) { | 262 | 177 | match ds { | 263 | 165 | (m1 @ b'0'..=b'5', m2 @ b'0'..=b'9') => i32::from((m1 - b'0') * 10 + (m2 - b'0')), | 264 | 27 | (b'6'..=b'9', b'0'..=b'9') => return Err(OUT_OF_RANGE), | 265 | 54 | _ => return Err(INVALID), | 266 | | } | 267 | 9 | } else if allow_missing_minutes { | 268 | 0 | 0 | 269 | | } else { | 270 | 9 | return Err(TOO_SHORT); | 271 | | }; | 272 | 120 | s = match s.len() { | 273 | 120 | len if len >= 2 => &s[2..], | 274 | 0 | 0 => s, | 275 | 0 | _ => return Err(TOO_SHORT), | 276 | | }; | 277 | | | 278 | 120 | let seconds = hours * 3600 + minutes * 60; | 279 | 120 | Ok((s, if negative { -seconds } else { seconds })) | 280 | 796 | } |
chrono::format::scan::timezone_offset::<chrono::format::scan::colon_or_space> Line | Count | Source | 203 | 4.62k | pub(crate) fn timezone_offset<F>( | 204 | 4.62k | mut s: &str, | 205 | 4.62k | mut consume_colon: F, | 206 | 4.62k | allow_zulu: bool, | 207 | 4.62k | allow_missing_minutes: bool, | 208 | 4.62k | allow_tz_minus_sign: bool, | 209 | 4.62k | ) -> ParseResult<(&str, i32)> | 210 | 4.62k | where | 211 | 4.62k | F: FnMut(&str) -> ParseResult<&str>, | 212 | | { | 213 | 4.62k | if allow_zulu { | 214 | 2.68k | if let Some(&b'Z' | &b'z') = s.as_bytes().first() { | 215 | 2.33k | return Ok((&s[1..], 0)); | 216 | 349 | } | 217 | 1.94k | } | 218 | | | 219 | | const fn digits(s: &str) -> ParseResult<(u8, u8)> { | 220 | | let b = s.as_bytes(); | 221 | | if b.len() < 2 { Err(TOO_SHORT) } else { Ok((b[0], b[1])) } | 222 | | } | 223 | 2.29k | let negative = match s.chars().next() { | 224 | | Some('+') => { | 225 | | // PLUS SIGN (U+2B) | 226 | 389 | s = &s['+'.len_utf8()..]; | 227 | | | 228 | 389 | false | 229 | | } | 230 | | Some('-') => { | 231 | | // HYPHEN-MINUS (U+2D) | 232 | 1.51k | s = &s['-'.len_utf8()..]; | 233 | | | 234 | 1.51k | true | 235 | | } | 236 | | Some('−') => { | 237 | | // MINUS SIGN (U+2212) | 238 | 66 | if !allow_tz_minus_sign { | 239 | 0 | return Err(INVALID); | 240 | 66 | } | 241 | 66 | s = &s['−'.len_utf8()..]; | 242 | | | 243 | 66 | true | 244 | | } | 245 | 163 | Some(_) => return Err(INVALID), | 246 | 165 | None => return Err(TOO_SHORT), | 247 | | }; | 248 | | | 249 | | // hours (00--99) | 250 | 1.96k | let hours = match digits(s)? { | 251 | 1.91k | (h1 @ b'0'..=b'9', h2 @ b'0'..=b'9') => i32::from((h1 - b'0') * 10 + (h2 - b'0')), | 252 | 52 | _ => return Err(INVALID), | 253 | | }; | 254 | 1.86k | s = &s[2..]; | 255 | | | 256 | | // colons (and possibly other separators) | 257 | 1.86k | s = consume_colon(s)?; | 258 | | | 259 | | // minutes (00--59) | 260 | | // if the next two items are digits then we have to add minutes | 261 | 1.86k | let minutes = if let Ok(ds) = digits(s) { | 262 | 1.70k | match ds { | 263 | 1.68k | (m1 @ b'0'..=b'5', m2 @ b'0'..=b'9') => i32::from((m1 - b'0') * 10 + (m2 - b'0')), | 264 | 83 | (b'6'..=b'9', b'0'..=b'9') => return Err(OUT_OF_RANGE), | 265 | 102 | _ => return Err(INVALID), | 266 | | } | 267 | 168 | } else if allow_missing_minutes { | 268 | 75 | 0 | 269 | | } else { | 270 | 93 | return Err(TOO_SHORT); | 271 | | }; | 272 | 1.66k | s = match s.len() { | 273 | 1.66k | len if len >= 2 => &s[2..], | 274 | 74 | 0 => s, | 275 | 1 | _ => return Err(TOO_SHORT), | 276 | | }; | 277 | | | 278 | 1.66k | let seconds = hours * 3600 + minutes * 60; | 279 | 1.66k | Ok((s, if negative { -seconds } else { seconds })) | 280 | 4.62k | } |
|
281 | | |
282 | | /// Same as `timezone_offset` but also allows for RFC 2822 legacy timezones. |
283 | | /// May return `None` which indicates an insufficient offset data (i.e. `-0000`). |
284 | | /// See [RFC 2822 Section 4.3]. |
285 | | /// |
286 | | /// [RFC 2822 Section 4.3]: https://tools.ietf.org/html/rfc2822#section-4.3 |
287 | 1.02k | pub(super) fn timezone_offset_2822(s: &str) -> ParseResult<(&str, i32)> { |
288 | | // tries to parse legacy time zone names |
289 | 4.10M | let upto = s.as_bytes().iter().position(|&c| !c.is_ascii_alphabetic()).unwrap_or(s.len()); |
290 | 1.02k | if upto > 0 { |
291 | 551 | let name = &s.as_bytes()[..upto]; |
292 | 551 | let s = &s[upto..]; |
293 | 551 | let offset_hours = |o| Ok((s, o * 3600)); |
294 | | // RFC 2822 requires support for some named North America timezones, a small subset of all |
295 | | // named timezones. |
296 | 551 | if name.eq_ignore_ascii_case(b"gmt") |
297 | 550 | || name.eq_ignore_ascii_case(b"ut") |
298 | 547 | || name.eq_ignore_ascii_case(b"z") |
299 | | { |
300 | 67 | return offset_hours(0); |
301 | 484 | } else if name.eq_ignore_ascii_case(b"edt") { |
302 | 4 | return offset_hours(-4); |
303 | 480 | } else if name.eq_ignore_ascii_case(b"est") || name.eq_ignore_ascii_case(b"cdt") { |
304 | 10 | return offset_hours(-5); |
305 | 470 | } else if name.eq_ignore_ascii_case(b"cst") || name.eq_ignore_ascii_case(b"mdt") { |
306 | 13 | return offset_hours(-6); |
307 | 457 | } else if name.eq_ignore_ascii_case(b"mst") || name.eq_ignore_ascii_case(b"pdt") { |
308 | 31 | return offset_hours(-7); |
309 | 426 | } else if name.eq_ignore_ascii_case(b"pst") { |
310 | 6 | return offset_hours(-8); |
311 | 420 | } else if name.len() == 1 { |
312 | 253 | if let b'a'..=b'i' | b'k'..=b'y' | b'A'..=b'I' | b'K'..=b'Y' = name[0] { |
313 | | // recommended by RFC 2822: consume but treat it as -0000 |
314 | 250 | return Ok((s, 0)); |
315 | 3 | } |
316 | 167 | } |
317 | 170 | Err(INVALID) |
318 | | } else { |
319 | 474 | timezone_offset(s, |s| Ok(s), false, false, false) |
320 | | } |
321 | 1.02k | } |
322 | | |
323 | | /// Tries to consume an RFC2822 comment including preceding ` `. |
324 | | /// |
325 | | /// Returns the remaining string after the closing parenthesis. |
326 | 2.15k | pub(super) fn comment_2822(s: &str) -> ParseResult<(&str, ())> { |
327 | | use CommentState::*; |
328 | | |
329 | 2.15k | let s = s.trim_start(); |
330 | | |
331 | 2.15k | let mut state = Start; |
332 | 4.66M | for (i, c) in s.bytes().enumerate() { |
333 | 4.66M | state = match (state, c) { |
334 | 1.73k | (Start, b'(') => Next(1), |
335 | 1.63k | (Next(1), b')') => return Ok((&s[i + 1..], ())), |
336 | 609 | (Next(depth), b'\\') => Escape(depth), |
337 | 1.15M | (Next(depth), b'(') => Next(depth + 1), |
338 | 6.30k | (Next(depth), b')') => Next(depth - 1), |
339 | 3.49M | (Next(depth), _) | (Escape(depth), _) => Next(depth), |
340 | 113 | _ => return Err(INVALID), |
341 | | }; |
342 | | } |
343 | | |
344 | 404 | Err(TOO_SHORT) |
345 | 2.15k | } |
346 | | |
347 | | enum CommentState { |
348 | | Start, |
349 | | Next(usize), |
350 | | Escape(usize), |
351 | | } |
352 | | |
353 | | #[cfg(test)] |
354 | | mod tests { |
355 | | use super::{ |
356 | | comment_2822, nanosecond, nanosecond_fixed, short_or_long_month0, short_or_long_weekday, |
357 | | timezone_offset_2822, |
358 | | }; |
359 | | use crate::Weekday; |
360 | | use crate::format::{INVALID, TOO_SHORT}; |
361 | | |
362 | | #[test] |
363 | | fn test_rfc2822_comments() { |
364 | | let testdata = [ |
365 | | ("", Err(TOO_SHORT)), |
366 | | (" ", Err(TOO_SHORT)), |
367 | | ("x", Err(INVALID)), |
368 | | ("(", Err(TOO_SHORT)), |
369 | | ("()", Ok("")), |
370 | | (" \r\n\t()", Ok("")), |
371 | | ("() ", Ok(" ")), |
372 | | ("()z", Ok("z")), |
373 | | ("(x)", Ok("")), |
374 | | ("(())", Ok("")), |
375 | | ("((()))", Ok("")), |
376 | | ("(x(x(x)x)x)", Ok("")), |
377 | | ("( x ( x ( x ) x ) x )", Ok("")), |
378 | | (r"(\)", Err(TOO_SHORT)), |
379 | | (r"(\()", Ok("")), |
380 | | (r"(\))", Ok("")), |
381 | | (r"(\\)", Ok("")), |
382 | | ("(()())", Ok("")), |
383 | | ("( x ( x ) x ( x ) x )", Ok("")), |
384 | | ]; |
385 | | |
386 | | for (test_in, expected) in testdata.iter() { |
387 | | let actual = comment_2822(test_in).map(|(s, _)| s); |
388 | | assert_eq!( |
389 | | *expected, actual, |
390 | | "{test_in:?} expected to produce {expected:?}, but produced {actual:?}." |
391 | | ); |
392 | | } |
393 | | } |
394 | | |
395 | | #[test] |
396 | | fn test_timezone_offset_2822() { |
397 | | assert_eq!(timezone_offset_2822("cSt").unwrap(), ("", -21600)); |
398 | | assert_eq!(timezone_offset_2822("pSt").unwrap(), ("", -28800)); |
399 | | assert_eq!(timezone_offset_2822("mSt").unwrap(), ("", -25200)); |
400 | | assert_eq!(timezone_offset_2822("-1551").unwrap(), ("", -57060)); |
401 | | assert_eq!(timezone_offset_2822("Gp"), Err(INVALID)); |
402 | | } |
403 | | |
404 | | #[test] |
405 | | fn test_short_or_long_month0() { |
406 | | assert_eq!(short_or_long_month0("JUn").unwrap(), ("", 5)); |
407 | | assert_eq!(short_or_long_month0("mAy").unwrap(), ("", 4)); |
408 | | assert_eq!(short_or_long_month0("AuG").unwrap(), ("", 7)); |
409 | | assert_eq!(short_or_long_month0("Aprâ").unwrap(), ("â", 3)); |
410 | | assert_eq!(short_or_long_month0("JUl").unwrap(), ("", 6)); |
411 | | assert_eq!(short_or_long_month0("mAr").unwrap(), ("", 2)); |
412 | | assert_eq!(short_or_long_month0("Jan").unwrap(), ("", 0)); |
413 | | } |
414 | | |
415 | | #[test] |
416 | | fn test_short_or_long_weekday() { |
417 | | assert_eq!(short_or_long_weekday("sAtu").unwrap(), ("u", Weekday::Sat)); |
418 | | assert_eq!(short_or_long_weekday("thu").unwrap(), ("", Weekday::Thu)); |
419 | | } |
420 | | |
421 | | #[test] |
422 | | fn test_nanosecond_fixed() { |
423 | | assert_eq!(nanosecond_fixed("", 0usize).unwrap(), ("", 0)); |
424 | | assert!(nanosecond_fixed("", 1usize).is_err()); |
425 | | } |
426 | | |
427 | | #[test] |
428 | | fn test_nanosecond() { |
429 | | assert_eq!(nanosecond("2Ù").unwrap(), ("Ù", 200000000)); |
430 | | assert_eq!(nanosecond("8").unwrap(), ("", 800000000)); |
431 | | } |
432 | | } |