/rust/registry/src/index.crates.io-1949cf8c6b5b557f/sfv-0.14.0/src/parser.rs
Line | Count | Source |
1 | | use std::{borrow::Cow, string::String as StdString}; |
2 | | |
3 | | use crate::{ |
4 | | error, utils, |
5 | | visitor::{ |
6 | | DictionaryVisitor, EntryVisitor, InnerListVisitor, ItemVisitor, ListVisitor, |
7 | | ParameterVisitor, |
8 | | }, |
9 | | BareItemFromInput, Date, Decimal, Integer, KeyRef, Num, SFVResult, String, StringRef, TokenRef, |
10 | | Version, |
11 | | }; |
12 | | |
13 | 3.22M | fn parse_item<'de>( |
14 | 3.22M | parser: &mut Parser<'de>, |
15 | 3.22M | visitor: impl ItemVisitor<'de>, |
16 | 3.22M | ) -> Result<(), error::Repr> { |
17 | | // https://httpwg.org/specs/rfc9651.html#parse-item |
18 | 3.22M | let param_visitor = visitor.bare_item(parser.parse_bare_item()?)?; |
19 | 3.22M | parser.parse_parameters(param_visitor) |
20 | 3.22M | } sfv::parser::parse_item::<indexmap::map::core::entry::Entry<sfv::key::Key, sfv::parsed::ListEntry>> Line | Count | Source | 13 | 80.9k | fn parse_item<'de>( | 14 | 80.9k | parser: &mut Parser<'de>, | 15 | 80.9k | visitor: impl ItemVisitor<'de>, | 16 | 80.9k | ) -> Result<(), error::Repr> { | 17 | | // https://httpwg.org/specs/rfc9651.html#parse-item | 18 | 80.9k | let param_visitor = visitor.bare_item(parser.parse_bare_item()?)?; | 19 | 79.5k | parser.parse_parameters(param_visitor) | 20 | 80.9k | } |
Unexecuted instantiation: sfv::parser::parse_item::<&mut alloc::vec::Vec<sfv::parsed::ListEntry>> Unexecuted instantiation: sfv::parser::parse_item::<&mut sfv::parsed::Item> sfv::parser::parse_item::<&mut sfv::parsed::InnerList> Line | Count | Source | 13 | 3.14M | fn parse_item<'de>( | 14 | 3.14M | parser: &mut Parser<'de>, | 15 | 3.14M | visitor: impl ItemVisitor<'de>, | 16 | 3.14M | ) -> Result<(), error::Repr> { | 17 | | // https://httpwg.org/specs/rfc9651.html#parse-item | 18 | 3.14M | let param_visitor = visitor.bare_item(parser.parse_bare_item()?)?; | 19 | 3.14M | parser.parse_parameters(param_visitor) | 20 | 3.14M | } |
|
21 | | |
22 | 6.49k | fn parse_comma_separated<'de>( |
23 | 6.49k | parser: &mut Parser<'de>, |
24 | 6.49k | mut parse_member: impl FnMut(&mut Parser<'de>) -> Result<(), error::Repr>, |
25 | 6.49k | ) -> Result<(), error::Repr> { |
26 | 624k | while parser.peek().is_some() { |
27 | 624k | parse_member(parser)?; |
28 | | |
29 | 621k | parser.consume_ows_chars(); |
30 | | |
31 | 621k | if parser.peek().is_none() { |
32 | 2.57k | return Ok(()); |
33 | 619k | } |
34 | | |
35 | 619k | let comma_index = parser.index; |
36 | | |
37 | 619k | if let Some(c) = parser.peek() { |
38 | 619k | if c != b',' { |
39 | 872 | return Err(error::Repr::TrailingCharactersAfterMember(parser.index)); |
40 | 618k | } |
41 | 618k | parser.next(); |
42 | 0 | } |
43 | | |
44 | 618k | parser.consume_ows_chars(); |
45 | | |
46 | 618k | if parser.peek().is_none() { |
47 | | // Report the error at the position of the comma itself, rather |
48 | | // than at the end of input. |
49 | 77 | return Err(error::Repr::TrailingComma(comma_index)); |
50 | 618k | } |
51 | | } |
52 | | |
53 | 22 | Ok(()) |
54 | 6.49k | } Unexecuted instantiation: sfv::parser::parse_comma_separated::<<sfv::parser::Parser>::parse_list_with_visitor<alloc::vec::Vec<sfv::parsed::ListEntry>>::{closure#0}::{closure#0}>sfv::parser::parse_comma_separated::<<sfv::parser::Parser>::parse_dictionary_with_visitor<indexmap::map::IndexMap<sfv::key::Key, sfv::parsed::ListEntry>>::{closure#0}::{closure#0}>Line | Count | Source | 22 | 6.49k | fn parse_comma_separated<'de>( | 23 | 6.49k | parser: &mut Parser<'de>, | 24 | 6.49k | mut parse_member: impl FnMut(&mut Parser<'de>) -> Result<(), error::Repr>, | 25 | 6.49k | ) -> Result<(), error::Repr> { | 26 | 624k | while parser.peek().is_some() { | 27 | 624k | parse_member(parser)?; | 28 | | | 29 | 621k | parser.consume_ows_chars(); | 30 | | | 31 | 621k | if parser.peek().is_none() { | 32 | 2.57k | return Ok(()); | 33 | 619k | } | 34 | | | 35 | 619k | let comma_index = parser.index; | 36 | | | 37 | 619k | if let Some(c) = parser.peek() { | 38 | 619k | if c != b',' { | 39 | 872 | return Err(error::Repr::TrailingCharactersAfterMember(parser.index)); | 40 | 618k | } | 41 | 618k | parser.next(); | 42 | 0 | } | 43 | | | 44 | 618k | parser.consume_ows_chars(); | 45 | | | 46 | 618k | if parser.peek().is_none() { | 47 | | // Report the error at the position of the comma itself, rather | 48 | | // than at the end of input. | 49 | 77 | return Err(error::Repr::TrailingComma(comma_index)); | 50 | 618k | } | 51 | | } | 52 | | | 53 | 22 | Ok(()) | 54 | 6.49k | } |
|
55 | | |
56 | | /// Exposes methods for parsing input into a structured field value. |
57 | | #[must_use] |
58 | | pub struct Parser<'de> { |
59 | | input: &'de [u8], |
60 | | index: usize, |
61 | | version: Version, |
62 | | } |
63 | | |
64 | | impl<'de> Parser<'de> { |
65 | | /// Creates a parser from the given input with [`Version::Rfc9651`]. |
66 | 6.49k | pub fn new(input: &'de (impl ?Sized + AsRef<[u8]>)) -> Self { |
67 | 6.49k | Self { |
68 | 6.49k | input: input.as_ref(), |
69 | 6.49k | index: 0, |
70 | 6.49k | version: Version::Rfc9651, |
71 | 6.49k | } |
72 | 6.49k | } <sfv::parser::Parser>::new::<[u8]> Line | Count | Source | 66 | 6.49k | pub fn new(input: &'de (impl ?Sized + AsRef<[u8]>)) -> Self { | 67 | 6.49k | Self { | 68 | 6.49k | input: input.as_ref(), | 69 | 6.49k | index: 0, | 70 | 6.49k | version: Version::Rfc9651, | 71 | 6.49k | } | 72 | 6.49k | } |
Unexecuted instantiation: <sfv::parser::Parser>::new::<_> |
73 | | |
74 | | /// Sets the parser's version and returns it. |
75 | 0 | pub fn with_version(mut self, version: Version) -> Self { |
76 | 0 | self.version = version; |
77 | 0 | self |
78 | 0 | } |
79 | | |
80 | | /// Parses a structured field value. |
81 | | /// |
82 | | /// # Errors |
83 | | /// When the parsing process is unsuccessful. |
84 | | #[cfg(feature = "parsed-types")] |
85 | 6.49k | pub fn parse<T: crate::FieldType>(self) -> SFVResult<T> { |
86 | 6.49k | T::parse(self) |
87 | 6.49k | } <sfv::parser::Parser>::parse::<indexmap::map::IndexMap<sfv::key::Key, sfv::parsed::ListEntry>> Line | Count | Source | 85 | 6.49k | pub fn parse<T: crate::FieldType>(self) -> SFVResult<T> { | 86 | 6.49k | T::parse(self) | 87 | 6.49k | } |
Unexecuted instantiation: <sfv::parser::Parser>::parse::<_> |
88 | | |
89 | | /// Parses input into a structured field value of `Dictionary` type, using |
90 | | /// the given visitor. |
91 | | #[cfg_attr( |
92 | | feature = "parsed-types", |
93 | | doc = r#" |
94 | | |
95 | | This can also be used to parse a dictionary that is split into multiple lines by merging |
96 | | them into an existing structure: |
97 | | |
98 | | ``` |
99 | | # use sfv::{Dictionary, FieldType, Parser}; |
100 | | # fn main() -> Result<(), sfv::Error> { |
101 | | let mut dict: Dictionary = Parser::new("a=1").parse()?; |
102 | | |
103 | | Parser::new("b=2").parse_dictionary_with_visitor(&mut dict)?; |
104 | | |
105 | | assert_eq!( |
106 | | dict.serialize().as_deref(), |
107 | | Some("a=1, b=2"), |
108 | | ); |
109 | | # Ok(()) |
110 | | # } |
111 | | ``` |
112 | | "# |
113 | | )] |
114 | | /// |
115 | | /// # Errors |
116 | | /// When the parsing process is unsuccessful, including any error raised by a visitor. |
117 | 6.49k | pub fn parse_dictionary_with_visitor( |
118 | 6.49k | self, |
119 | 6.49k | visitor: &mut (impl ?Sized + DictionaryVisitor<'de>), |
120 | 6.49k | ) -> SFVResult<()> { |
121 | | // https://httpwg.org/specs/rfc9651.html#parse-dictionary |
122 | 6.49k | self.parse_internal(move |parser| { |
123 | 624k | parse_comma_separated(parser, |parser| { |
124 | | // Note: It is up to the visitor to properly handle duplicate keys. |
125 | 624k | let entry_visitor = visitor.entry(parser.parse_key()?)?; |
126 | | |
127 | 624k | if let Some(b'=') = parser.peek() { |
128 | 105k | parser.next(); |
129 | 105k | parser.parse_list_entry(entry_visitor) |
130 | | } else { |
131 | 519k | let param_visitor = entry_visitor.bare_item(BareItemFromInput::from(true))?; |
132 | 519k | parser.parse_parameters(param_visitor) |
133 | | } |
134 | 624k | }) |
135 | 6.49k | }) |
136 | 6.49k | } |
137 | | |
138 | | /// Parses input into a structured field value of `List` type, using the |
139 | | /// given visitor. |
140 | | #[allow(clippy::needless_raw_string_hashes)] // false positive: https://github.com/rust-lang/rust-clippy/issues/11737 |
141 | | #[cfg_attr( |
142 | | feature = "parsed-types", |
143 | | doc = r##" |
144 | | |
145 | | This can also be used to parse a list that is split into multiple lines by merging them |
146 | | into an existing structure: |
147 | | ``` |
148 | | # use sfv::{FieldType, List, Parser}; |
149 | | # fn main() -> Result<(), sfv::Error> { |
150 | | let mut list: List = Parser::new("11, (12 13)").parse()?; |
151 | | |
152 | | Parser::new(r#""foo", "bar""#).parse_list_with_visitor(&mut list)?; |
153 | | |
154 | | assert_eq!( |
155 | | list.serialize().as_deref(), |
156 | | Some(r#"11, (12 13), "foo", "bar""#), |
157 | | ); |
158 | | # Ok(()) |
159 | | # } |
160 | | ``` |
161 | | "## |
162 | | )] |
163 | | /// |
164 | | /// # Errors |
165 | | /// When the parsing process is unsuccessful, including any error raised by a visitor. |
166 | 0 | pub fn parse_list_with_visitor( |
167 | 0 | self, |
168 | 0 | visitor: &mut (impl ?Sized + ListVisitor<'de>), |
169 | 0 | ) -> SFVResult<()> { |
170 | | // https://httpwg.org/specs/rfc9651.html#parse-list |
171 | 0 | self.parse_internal(|parser| { |
172 | 0 | parse_comma_separated(parser, |parser| parser.parse_list_entry(visitor.entry()?)) |
173 | 0 | }) |
174 | 0 | } |
175 | | |
176 | | /// Parses input into a structured field value of `Item` type, using the |
177 | | /// given visitor. |
178 | | /// |
179 | | /// # Errors |
180 | | /// When the parsing process is unsuccessful, including any error raised by a visitor. |
181 | 0 | pub fn parse_item_with_visitor(self, visitor: impl ItemVisitor<'de>) -> SFVResult<()> { |
182 | 0 | self.parse_internal(|parser| parse_item(parser, visitor)) |
183 | 0 | } |
184 | | |
185 | 223M | fn peek(&self) -> Option<u8> { |
186 | 223M | self.input.get(self.index).copied() |
187 | 223M | } |
188 | | |
189 | 107M | fn next(&mut self) -> Option<u8> { |
190 | 107M | self.peek().inspect(|_| self.index += 1) |
191 | 107M | } |
192 | | |
193 | | // Generic parse method for checking input before parsing |
194 | | // and handling trailing text error |
195 | 6.49k | fn parse_internal( |
196 | 6.49k | mut self, |
197 | 6.49k | f: impl FnOnce(&mut Self) -> Result<(), error::Repr>, |
198 | 6.49k | ) -> SFVResult<()> { |
199 | | // https://httpwg.org/specs/rfc9651.html#text-parse |
200 | | |
201 | 6.49k | self.consume_sp_chars(); |
202 | | |
203 | 6.49k | f(&mut self)?; |
204 | | |
205 | 2.60k | self.consume_sp_chars(); |
206 | | |
207 | 2.60k | if self.peek().is_some() { |
208 | 0 | return Err(error::Repr::TrailingCharactersAfterParsedValue(self.index).into()); |
209 | 2.60k | } |
210 | | |
211 | 2.60k | Ok(()) |
212 | 6.49k | } Unexecuted instantiation: <sfv::parser::Parser>::parse_internal::<<sfv::parser::Parser>::parse_item_with_visitor<&mut sfv::parsed::Item>::{closure#0}>Unexecuted instantiation: <sfv::parser::Parser>::parse_internal::<<sfv::parser::Parser>::parse_list_with_visitor<alloc::vec::Vec<sfv::parsed::ListEntry>>::{closure#0}><sfv::parser::Parser>::parse_internal::<<sfv::parser::Parser>::parse_dictionary_with_visitor<indexmap::map::IndexMap<sfv::key::Key, sfv::parsed::ListEntry>>::{closure#0}>Line | Count | Source | 195 | 6.49k | fn parse_internal( | 196 | 6.49k | mut self, | 197 | 6.49k | f: impl FnOnce(&mut Self) -> Result<(), error::Repr>, | 198 | 6.49k | ) -> SFVResult<()> { | 199 | | // https://httpwg.org/specs/rfc9651.html#text-parse | 200 | | | 201 | 6.49k | self.consume_sp_chars(); | 202 | | | 203 | 6.49k | f(&mut self)?; | 204 | | | 205 | 2.60k | self.consume_sp_chars(); | 206 | | | 207 | 2.60k | if self.peek().is_some() { | 208 | 0 | return Err(error::Repr::TrailingCharactersAfterParsedValue(self.index).into()); | 209 | 2.60k | } | 210 | | | 211 | 2.60k | Ok(()) | 212 | 6.49k | } |
|
213 | | |
214 | 105k | fn parse_list_entry(&mut self, visitor: impl EntryVisitor<'de>) -> Result<(), error::Repr> { |
215 | | // https://httpwg.org/specs/rfc9651.html#parse-item-or-list |
216 | | // ListEntry represents a tuple (item_or_inner_list, parameters) |
217 | | |
218 | 105k | match self.peek() { |
219 | 24.0k | Some(b'(') => self.parse_inner_list(visitor.inner_list()?), |
220 | 80.9k | _ => parse_item(self, visitor), |
221 | | } |
222 | 105k | } <sfv::parser::Parser>::parse_list_entry::<indexmap::map::core::entry::Entry<sfv::key::Key, sfv::parsed::ListEntry>> Line | Count | Source | 214 | 105k | fn parse_list_entry(&mut self, visitor: impl EntryVisitor<'de>) -> Result<(), error::Repr> { | 215 | | // https://httpwg.org/specs/rfc9651.html#parse-item-or-list | 216 | | // ListEntry represents a tuple (item_or_inner_list, parameters) | 217 | | | 218 | 105k | match self.peek() { | 219 | 24.0k | Some(b'(') => self.parse_inner_list(visitor.inner_list()?), | 220 | 80.9k | _ => parse_item(self, visitor), | 221 | | } | 222 | 105k | } |
Unexecuted instantiation: <sfv::parser::Parser>::parse_list_entry::<&mut alloc::vec::Vec<sfv::parsed::ListEntry>> |
223 | | |
224 | 24.0k | pub(crate) fn parse_inner_list( |
225 | 24.0k | &mut self, |
226 | 24.0k | mut visitor: impl InnerListVisitor<'de>, |
227 | 24.0k | ) -> Result<(), error::Repr> { |
228 | | // https://httpwg.org/specs/rfc9651.html#parse-innerlist |
229 | | |
230 | 24.0k | if Some(b'(') != self.peek() { |
231 | 0 | return Err(error::Repr::ExpectedStartOfInnerList(self.index)); |
232 | 24.0k | } |
233 | | |
234 | 24.0k | self.next(); |
235 | | |
236 | 3.16M | while self.peek().is_some() { |
237 | 3.16M | self.consume_sp_chars(); |
238 | | |
239 | 3.16M | if Some(b')') == self.peek() { |
240 | 22.9k | self.next(); |
241 | 22.9k | let param_visitor = visitor.finish()?; |
242 | 22.9k | return self.parse_parameters(param_visitor); |
243 | 3.14M | } |
244 | | |
245 | 3.14M | parse_item(self, visitor.item()?)?; |
246 | | |
247 | 3.14M | if let Some(c) = self.peek() { |
248 | 3.14M | if c != b' ' && c != b')' { |
249 | 260 | return Err(error::Repr::ExpectedInnerListDelimiter(self.index)); |
250 | 3.14M | } |
251 | 263 | } |
252 | | } |
253 | | |
254 | 296 | Err(error::Repr::UnterminatedInnerList(self.index)) |
255 | 24.0k | } |
256 | | |
257 | 3.29M | pub(crate) fn parse_bare_item(&mut self) -> Result<BareItemFromInput<'de>, error::Repr> { |
258 | | // https://httpwg.org/specs/rfc9651.html#parse-bare-item |
259 | | |
260 | 3.29M | Ok(match self.peek() { |
261 | 4.80k | Some(b'?') => BareItemFromInput::Boolean(self.parse_bool()?), |
262 | 76.3k | Some(b'"') => BareItemFromInput::String(self.parse_string()?), |
263 | 34.6k | Some(b':') => BareItemFromInput::ByteSequence(self.parse_byte_sequence()?), |
264 | 1.52k | Some(b'@') => BareItemFromInput::Date(self.parse_date()?), |
265 | 33.4k | Some(b'%') => BareItemFromInput::DisplayString(self.parse_display_string()?), |
266 | 3.14M | Some(c) if utils::is_allowed_start_token_char(c) => { |
267 | 2.78M | BareItemFromInput::Token(self.parse_token()?) |
268 | | } |
269 | 363k | Some(c) if c == b'-' || c.is_ascii_digit() => match self.parse_number()? { |
270 | 51.7k | Num::Decimal(val) => BareItemFromInput::Decimal(val), |
271 | 311k | Num::Integer(val) => BareItemFromInput::Integer(val), |
272 | | }, |
273 | 225 | _ => return Err(error::Repr::ExpectedStartOfBareItem(self.index)), |
274 | | }) |
275 | 3.29M | } |
276 | | |
277 | 4.80k | pub(crate) fn parse_bool(&mut self) -> Result<bool, error::Repr> { |
278 | | // https://httpwg.org/specs/rfc9651.html#parse-boolean |
279 | | |
280 | 4.80k | if self.peek() != Some(b'?') { |
281 | 0 | return Err(error::Repr::ExpectedStartOfBoolean(self.index)); |
282 | 4.80k | } |
283 | | |
284 | 4.80k | self.next(); |
285 | | |
286 | 4.80k | match self.peek() { |
287 | | Some(b'0') => { |
288 | 1.48k | self.next(); |
289 | 1.48k | Ok(false) |
290 | | } |
291 | | Some(b'1') => { |
292 | 3.29k | self.next(); |
293 | 3.29k | Ok(true) |
294 | | } |
295 | 18 | _ => Err(error::Repr::ExpectedBoolean(self.index)), |
296 | | } |
297 | 4.80k | } |
298 | | |
299 | 76.3k | pub(crate) fn parse_string(&mut self) -> Result<Cow<'de, StringRef>, error::Repr> { |
300 | | // https://httpwg.org/specs/rfc9651.html#parse-string |
301 | | |
302 | 76.3k | if self.peek() != Some(b'"') { |
303 | 0 | return Err(error::Repr::ExpectedStartOfString(self.index)); |
304 | 76.3k | } |
305 | | |
306 | 76.3k | self.next(); |
307 | | |
308 | 76.3k | let start = self.index; |
309 | 76.3k | let mut output = Cow::Borrowed(&[] as &[u8]); |
310 | | |
311 | 16.0M | while let Some(curr_char) = self.peek() { |
312 | 16.0M | match curr_char { |
313 | | b'"' => { |
314 | 75.9k | self.next(); |
315 | | // TODO: The UTF-8 validation is redundant with the preceding character checks, but |
316 | | // its removal is only possible with unsafe code. |
317 | 75.9k | return Ok(match output { |
318 | 72.0k | Cow::Borrowed(output) => { |
319 | 72.0k | let output = std::str::from_utf8(output).unwrap(); |
320 | 72.0k | Cow::Borrowed(StringRef::from_str(output).unwrap()) |
321 | | } |
322 | 3.80k | Cow::Owned(output) => { |
323 | 3.80k | let output = StdString::from_utf8(output).unwrap(); |
324 | 3.80k | Cow::Owned(String::from_string(output).unwrap()) |
325 | | } |
326 | | }); |
327 | | } |
328 | 15.9M | 0x00..=0x1f | 0x7f..=0xff => { |
329 | 92 | return Err(error::Repr::InvalidStringCharacter(self.index)); |
330 | | } |
331 | | b'\\' => { |
332 | 11.7k | self.next(); |
333 | 11.7k | match self.peek() { |
334 | 11.7k | Some(c @ (b'\\' | b'"')) => { |
335 | 11.7k | self.next(); |
336 | 11.7k | output.to_mut().push(c); |
337 | 11.7k | } |
338 | 12 | None => return Err(error::Repr::UnterminatedEscapeSequence(self.index)), |
339 | 33 | Some(_) => return Err(error::Repr::InvalidEscapeSequence(self.index)), |
340 | | } |
341 | | } |
342 | | _ => { |
343 | 15.9M | self.next(); |
344 | 15.9M | match output { |
345 | 13.7M | Cow::Borrowed(ref mut output) => *output = &self.input[start..self.index], |
346 | 2.20M | Cow::Owned(ref mut output) => output.push(curr_char), |
347 | | } |
348 | | } |
349 | | } |
350 | | } |
351 | 274 | Err(error::Repr::UnterminatedString(self.index)) |
352 | 76.3k | } |
353 | | |
354 | 3.92M | fn parse_non_empty_str( |
355 | 3.92M | &mut self, |
356 | 3.92M | is_allowed_start_char: impl FnOnce(u8) -> bool, |
357 | 3.92M | is_allowed_inner_char: impl Fn(u8) -> bool, |
358 | 3.92M | ) -> Option<&'de str> { |
359 | 3.92M | let start = self.index; |
360 | | |
361 | 3.92M | match self.peek() { |
362 | 3.92M | Some(c) if is_allowed_start_char(c) => { |
363 | 3.92M | self.next(); |
364 | 3.92M | } |
365 | 283 | _ => return None, |
366 | | } |
367 | | |
368 | | loop { |
369 | 44.1M | match self.peek() { |
370 | 44.1M | Some(c) if is_allowed_inner_char(c) => { |
371 | 40.2M | self.next(); |
372 | 40.2M | } |
373 | | // TODO: The UTF-8 validation is redundant with the preceding character checks, but |
374 | | // its removal is only possible with unsafe code. |
375 | 3.92M | _ => return Some(std::str::from_utf8(&self.input[start..self.index]).unwrap()), |
376 | | } |
377 | | } |
378 | 3.92M | } <sfv::parser::Parser>::parse_non_empty_str::<sfv::utils::is_allowed_start_key_char, sfv::utils::is_allowed_inner_key_char> Line | Count | Source | 354 | 1.14M | fn parse_non_empty_str( | 355 | 1.14M | &mut self, | 356 | 1.14M | is_allowed_start_char: impl FnOnce(u8) -> bool, | 357 | 1.14M | is_allowed_inner_char: impl Fn(u8) -> bool, | 358 | 1.14M | ) -> Option<&'de str> { | 359 | 1.14M | let start = self.index; | 360 | | | 361 | 1.14M | match self.peek() { | 362 | 1.14M | Some(c) if is_allowed_start_char(c) => { | 363 | 1.14M | self.next(); | 364 | 1.14M | } | 365 | 283 | _ => return None, | 366 | | } | 367 | | | 368 | | loop { | 369 | 34.0M | match self.peek() { | 370 | 34.0M | Some(c) if is_allowed_inner_char(c) => { | 371 | 32.8M | self.next(); | 372 | 32.8M | } | 373 | | // TODO: The UTF-8 validation is redundant with the preceding character checks, but | 374 | | // its removal is only possible with unsafe code. | 375 | 1.14M | _ => return Some(std::str::from_utf8(&self.input[start..self.index]).unwrap()), | 376 | | } | 377 | | } | 378 | 1.14M | } |
<sfv::parser::Parser>::parse_non_empty_str::<sfv::utils::is_allowed_start_token_char, sfv::utils::is_allowed_inner_token_char> Line | Count | Source | 354 | 2.78M | fn parse_non_empty_str( | 355 | 2.78M | &mut self, | 356 | 2.78M | is_allowed_start_char: impl FnOnce(u8) -> bool, | 357 | 2.78M | is_allowed_inner_char: impl Fn(u8) -> bool, | 358 | 2.78M | ) -> Option<&'de str> { | 359 | 2.78M | let start = self.index; | 360 | | | 361 | 2.78M | match self.peek() { | 362 | 2.78M | Some(c) if is_allowed_start_char(c) => { | 363 | 2.78M | self.next(); | 364 | 2.78M | } | 365 | 0 | _ => return None, | 366 | | } | 367 | | | 368 | | loop { | 369 | 10.1M | match self.peek() { | 370 | 10.1M | Some(c) if is_allowed_inner_char(c) => { | 371 | 7.34M | self.next(); | 372 | 7.34M | } | 373 | | // TODO: The UTF-8 validation is redundant with the preceding character checks, but | 374 | | // its removal is only possible with unsafe code. | 375 | 2.78M | _ => return Some(std::str::from_utf8(&self.input[start..self.index]).unwrap()), | 376 | | } | 377 | | } | 378 | 2.78M | } |
|
379 | | |
380 | 2.78M | pub(crate) fn parse_token(&mut self) -> Result<&'de TokenRef, error::Repr> { |
381 | | // https://httpwg.org/specs/9651.html#parse-token |
382 | | |
383 | 2.78M | match self.parse_non_empty_str( |
384 | 2.78M | utils::is_allowed_start_token_char, |
385 | 2.78M | utils::is_allowed_inner_token_char, |
386 | 2.78M | ) { |
387 | 0 | None => Err(error::Repr::ExpectedStartOfToken(self.index)), |
388 | 2.78M | Some(str) => Ok(TokenRef::from_validated_str(str)), |
389 | | } |
390 | 2.78M | } |
391 | | |
392 | 34.6k | pub(crate) fn parse_byte_sequence(&mut self) -> Result<Vec<u8>, error::Repr> { |
393 | | // https://httpwg.org/specs/rfc9651.html#parse-binary |
394 | | |
395 | 34.6k | if self.peek() != Some(b':') { |
396 | 0 | return Err(error::Repr::ExpectedStartOfByteSequence(self.index)); |
397 | 34.6k | } |
398 | | |
399 | 34.6k | self.next(); |
400 | 34.6k | let start = self.index; |
401 | | |
402 | | loop { |
403 | 19.9M | match self.next() { |
404 | 34.6k | Some(b':') => break, |
405 | 19.9M | Some(_) => {} |
406 | 28 | None => return Err(error::Repr::UnterminatedByteSequence(self.index)), |
407 | | } |
408 | | } |
409 | | |
410 | 34.6k | let colon_index = self.index - 1; |
411 | | |
412 | 34.6k | match base64::Engine::decode(&utils::BASE64, &self.input[start..colon_index]) { |
413 | 34.0k | Ok(content) => Ok(content), |
414 | 588 | Err(err) => { |
415 | 588 | let index = match err { |
416 | 469 | base64::DecodeError::InvalidByte(offset, _) |
417 | 469 | | base64::DecodeError::InvalidLastSymbol(offset, _) => start + offset, |
418 | | // Report these two at the position of the last base64 |
419 | | // character, since they correspond to errors in the input |
420 | | // as a whole. |
421 | | base64::DecodeError::InvalidLength(_) | base64::DecodeError::InvalidPadding => { |
422 | 119 | colon_index - 1 |
423 | | } |
424 | | }; |
425 | | |
426 | 588 | Err(error::Repr::InvalidByteSequence(index)) |
427 | | } |
428 | | } |
429 | 34.6k | } |
430 | | |
431 | 364k | pub(crate) fn parse_number(&mut self) -> Result<Num, error::Repr> { |
432 | | // https://httpwg.org/specs/rfc9651.html#parse-number |
433 | | |
434 | 711k | fn char_to_i64(c: u8) -> i64 { |
435 | 711k | i64::from(c - b'0') |
436 | 711k | } |
437 | | |
438 | 364k | let sign = if let Some(b'-') = self.peek() { |
439 | 2.83k | self.next(); |
440 | 2.83k | -1 |
441 | | } else { |
442 | 361k | 1 |
443 | | }; |
444 | | |
445 | 364k | let mut magnitude = match self.peek() { |
446 | 364k | Some(c @ b'0'..=b'9') => { |
447 | 364k | self.next(); |
448 | 364k | char_to_i64(c) |
449 | | } |
450 | 54 | _ => return Err(error::Repr::ExpectedDigit(self.index)), |
451 | | }; |
452 | | |
453 | 364k | let mut digits = 1; |
454 | | |
455 | | loop { |
456 | 589k | match self.peek() { |
457 | | Some(b'.') => { |
458 | 51.7k | if digits > 12 { |
459 | 5 | return Err(error::Repr::TooManyDigitsBeforeDecimalPoint(self.index)); |
460 | 51.7k | } |
461 | 51.7k | self.next(); |
462 | 51.7k | break; |
463 | | } |
464 | 228k | Some(c @ b'0'..=b'9') => { |
465 | 224k | digits += 1; |
466 | 224k | if digits > 15 { |
467 | 9 | return Err(error::Repr::TooManyDigits(self.index)); |
468 | 224k | } |
469 | 224k | self.next(); |
470 | 224k | magnitude = magnitude * 10 + char_to_i64(c); |
471 | | } |
472 | 312k | _ => return Ok(Num::Integer(Integer::try_from(sign * magnitude).unwrap())), |
473 | | } |
474 | | } |
475 | | |
476 | 51.7k | magnitude *= 1000; |
477 | 51.7k | let mut scale = 100; |
478 | | |
479 | 173k | while let Some(c @ b'0'..=b'9') = self.peek() { |
480 | 121k | if scale == 0 { |
481 | 15 | return Err(error::Repr::TooManyDigitsAfterDecimalPoint(self.index)); |
482 | 121k | } |
483 | | |
484 | 121k | self.next(); |
485 | 121k | magnitude += char_to_i64(c) * scale; |
486 | 121k | scale /= 10; |
487 | | } |
488 | | |
489 | 51.7k | if scale == 100 { |
490 | | // Report the error at the position of the decimal itself, rather |
491 | | // than the next position. |
492 | 45 | Err(error::Repr::TrailingDecimalPoint(self.index - 1)) |
493 | | } else { |
494 | 51.7k | Ok(Num::Decimal(Decimal::from_integer_scaled_1000( |
495 | 51.7k | Integer::try_from(sign * magnitude).unwrap(), |
496 | 51.7k | ))) |
497 | | } |
498 | 364k | } |
499 | | |
500 | 1.52k | pub(crate) fn parse_date(&mut self) -> Result<Date, error::Repr> { |
501 | | // https://httpwg.org/specs/rfc9651.html#parse-date |
502 | | |
503 | 1.52k | if self.peek() != Some(b'@') { |
504 | 0 | return Err(error::Repr::ExpectedStartOfDate(self.index)); |
505 | 1.52k | } |
506 | | |
507 | 1.52k | match self.version { |
508 | 0 | Version::Rfc8941 => return Err(error::Repr::Rfc8941Date(self.index)), |
509 | 1.52k | Version::Rfc9651 => {} |
510 | | } |
511 | | |
512 | 1.52k | let start = self.index; |
513 | 1.52k | self.next(); |
514 | | |
515 | 1.52k | match self.parse_number()? { |
516 | 1.48k | Num::Integer(seconds) => Ok(Date::from_unix_seconds(seconds)), |
517 | 3 | Num::Decimal(_) => Err(error::Repr::NonIntegerDate(start)), |
518 | | } |
519 | 1.52k | } |
520 | | |
521 | 33.4k | pub(crate) fn parse_display_string(&mut self) -> Result<Cow<'de, str>, error::Repr> { |
522 | | // https://httpwg.org/specs/rfc9651.html#parse-display |
523 | | |
524 | 33.4k | if self.peek() != Some(b'%') { |
525 | 0 | return Err(error::Repr::ExpectedStartOfDisplayString(self.index)); |
526 | 33.4k | } |
527 | | |
528 | 33.4k | match self.version { |
529 | 0 | Version::Rfc8941 => return Err(error::Repr::Rfc8941DisplayString(self.index)), |
530 | 33.4k | Version::Rfc9651 => {} |
531 | | } |
532 | | |
533 | 33.4k | self.next(); |
534 | | |
535 | 33.4k | if self.peek() != Some(b'"') { |
536 | 43 | return Err(error::Repr::ExpectedQuote(self.index)); |
537 | 33.4k | } |
538 | | |
539 | 33.4k | self.next(); |
540 | | |
541 | 33.4k | let start = self.index; |
542 | 33.4k | let mut output = Cow::Borrowed(&[] as &[u8]); |
543 | | |
544 | 21.3M | while let Some(curr_char) = self.peek() { |
545 | 21.3M | match curr_char { |
546 | | b'"' => { |
547 | 32.8k | self.next(); |
548 | 32.8k | return match output { |
549 | 24.8k | Cow::Borrowed(output) => match std::str::from_utf8(output) { |
550 | 24.8k | Ok(output) => Ok(Cow::Borrowed(output)), |
551 | 0 | Err(err) => Err(error::Repr::InvalidUtf8InDisplayString( |
552 | 0 | start + err.valid_up_to(), |
553 | 0 | )), |
554 | | }, |
555 | 8.00k | Cow::Owned(output) => match StdString::from_utf8(output) { |
556 | 7.88k | Ok(output) => Ok(Cow::Owned(output)), |
557 | 119 | Err(err) => Err(error::Repr::InvalidUtf8InDisplayString( |
558 | 119 | start + err.utf8_error().valid_up_to(), |
559 | 119 | )), |
560 | | }, |
561 | | }; |
562 | | } |
563 | 21.2M | 0x00..=0x1f | 0x7f..=0xff => { |
564 | 96 | return Err(error::Repr::InvalidDisplayStringCharacter(self.index)); |
565 | | } |
566 | | b'%' => { |
567 | 26.8k | self.next(); |
568 | | |
569 | 26.8k | let mut octet = 0; |
570 | | |
571 | 26.8k | for _ in 0..2 { |
572 | 53.5k | octet = (octet << 4) |
573 | 53.5k | + match self.peek() { |
574 | 53.4k | Some(c @ b'0'..=b'9') => { |
575 | 45.7k | self.next(); |
576 | 45.7k | c - b'0' |
577 | | } |
578 | 7.67k | Some(c @ b'a'..=b'f') => { |
579 | 7.63k | self.next(); |
580 | 7.63k | c - b'a' + 10 |
581 | | } |
582 | | None => { |
583 | 46 | return Err(error::Repr::UnterminatedEscapeSequence(self.index)) |
584 | | } |
585 | | Some(_) => { |
586 | 127 | return Err(error::Repr::InvalidEscapeSequence(self.index)) |
587 | | } |
588 | | }; |
589 | | } |
590 | | |
591 | 26.6k | output.to_mut().push(octet); |
592 | | } |
593 | | _ => { |
594 | 21.2M | self.next(); |
595 | 21.2M | match output { |
596 | 19.3M | Cow::Borrowed(ref mut output) => *output = &self.input[start..self.index], |
597 | 1.90M | Cow::Owned(ref mut output) => output.push(curr_char), |
598 | | } |
599 | | } |
600 | | } |
601 | | } |
602 | 275 | Err(error::Repr::UnterminatedDisplayString(self.index)) |
603 | 33.4k | } |
604 | | |
605 | 3.76M | pub(crate) fn parse_parameters( |
606 | 3.76M | &mut self, |
607 | 3.76M | mut visitor: impl ParameterVisitor<'de>, |
608 | 3.76M | ) -> Result<(), error::Repr> { |
609 | | // https://httpwg.org/specs/rfc9651.html#parse-param |
610 | | |
611 | 4.28M | while let Some(b';') = self.peek() { |
612 | 520k | self.next(); |
613 | 520k | self.consume_sp_chars(); |
614 | | |
615 | 520k | let param_name = self.parse_key()?; |
616 | 520k | let param_value = match self.peek() { |
617 | | Some(b'=') => { |
618 | 69.3k | self.next(); |
619 | 69.3k | self.parse_bare_item()? |
620 | | } |
621 | 451k | _ => BareItemFromInput::Boolean(true), |
622 | | }; |
623 | | // Note: It is up to the visitor to properly handle duplicate keys. |
624 | 520k | visitor.parameter(param_name, param_value)?; |
625 | | } |
626 | | |
627 | 3.76M | visitor.finish()?; |
628 | 3.76M | Ok(()) |
629 | 3.76M | } |
630 | | |
631 | 1.14M | pub(crate) fn parse_key(&mut self) -> Result<&'de KeyRef, error::Repr> { |
632 | | // https://httpwg.org/specs/rfc9651.html#parse-key |
633 | | |
634 | 1.14M | match self.parse_non_empty_str( |
635 | 1.14M | utils::is_allowed_start_key_char, |
636 | 1.14M | utils::is_allowed_inner_key_char, |
637 | 1.14M | ) { |
638 | 283 | None => Err(error::Repr::ExpectedStartOfKey(self.index)), |
639 | 1.14M | Some(str) => Ok(KeyRef::from_validated_str(str)), |
640 | | } |
641 | 1.14M | } |
642 | | |
643 | 1.24M | fn consume_ows_chars(&mut self) { |
644 | 1.25M | while let Some(b' ' | b'\t') = self.peek() { |
645 | 9.87k | self.next(); |
646 | 9.87k | } |
647 | 1.24M | } |
648 | | |
649 | 3.69M | fn consume_sp_chars(&mut self) { |
650 | 7.17M | while let Some(b' ') = self.peek() { |
651 | 3.47M | self.next(); |
652 | 3.47M | } |
653 | 3.69M | } |
654 | | |
655 | | #[cfg(test)] |
656 | | pub(crate) fn remaining(&self) -> &[u8] { |
657 | | &self.input[self.index..] |
658 | | } |
659 | | } |