/rust/registry/src/index.crates.io-1949cf8c6b5b557f/mime-0.3.17/src/parse.rs
Line | Count | Source |
1 | | #[allow(unused, deprecated)] |
2 | | use std::ascii::AsciiExt; |
3 | | use std::error::Error; |
4 | | use std::fmt; |
5 | | use std::iter::Enumerate; |
6 | | use std::str::Bytes; |
7 | | |
8 | | use super::{Mime, MimeIter, Source, ParamSource, Indexed, CHARSET, UTF_8}; |
9 | | |
10 | | #[derive(Debug)] |
11 | | pub enum ParseError { |
12 | | MissingSlash, |
13 | | MissingEqual, |
14 | | MissingQuote, |
15 | | InvalidToken { |
16 | | pos: usize, |
17 | | byte: u8, |
18 | | }, |
19 | | } |
20 | | |
21 | | impl ParseError { |
22 | 0 | fn s(&self) -> &str { |
23 | | use self::ParseError::*; |
24 | | |
25 | 0 | match *self { |
26 | 0 | MissingSlash => "a slash (/) was missing between the type and subtype", |
27 | 0 | MissingEqual => "an equals sign (=) was missing between a parameter and its value", |
28 | 0 | MissingQuote => "a quote (\") was missing from a parameter value", |
29 | 0 | InvalidToken { .. } => "an invalid token was encountered", |
30 | | } |
31 | 0 | } |
32 | | } |
33 | | |
34 | | impl fmt::Display for ParseError { |
35 | 0 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { |
36 | 0 | if let ParseError::InvalidToken { pos, byte } = *self { |
37 | 0 | write!(f, "{}, {:X} at position {}", self.s(), byte, pos) |
38 | | } else { |
39 | 0 | f.write_str(self.s()) |
40 | | } |
41 | 0 | } |
42 | | } |
43 | | |
44 | | impl Error for ParseError { |
45 | | // Minimum Rust is 1.15, Error::description was still required then |
46 | | #[allow(deprecated)] |
47 | 0 | fn description(&self) -> &str { |
48 | 0 | self.s() |
49 | 0 | } |
50 | | } |
51 | | |
52 | | impl<'a> MimeIter<'a> { |
53 | | /// A new iterator over mimes or media types |
54 | 0 | pub fn new(s: &'a str) -> Self { |
55 | 0 | Self { |
56 | 0 | pos: 0, |
57 | 0 | source: s, |
58 | 0 | } |
59 | 0 | } |
60 | | } |
61 | | |
62 | | impl<'a> Iterator for MimeIter<'a> { |
63 | | type Item = Result<Mime, &'a str>; |
64 | | |
65 | 0 | fn next(&mut self) -> Option<Self::Item> { |
66 | 0 | let start = self.pos; |
67 | 0 | let len = self.source.bytes().len(); |
68 | | |
69 | 0 | if start >= len { |
70 | 0 | return None |
71 | 0 | } |
72 | | |
73 | | // Try parsing the whole remaining slice, until the end |
74 | 0 | match parse(&self.source[start ..len]) { |
75 | 0 | Ok(value) => { |
76 | 0 | self.pos = len; |
77 | 0 | Some(Ok(value)) |
78 | | } |
79 | 0 | Err(ParseError::InvalidToken { pos, .. }) => { |
80 | | // The first token is immediately found to be wrong by `parse`. Skip it |
81 | 0 | if pos == 0 { |
82 | 0 | self.pos += 1; |
83 | 0 | return self.next() |
84 | 0 | } |
85 | 0 | let slice = &self.source[start .. start + pos]; |
86 | | // Try parsing the longest slice (until the first invalid token) |
87 | 0 | return match parse(slice) { |
88 | 0 | Ok(mime) => { |
89 | 0 | self.pos = start + pos + 1; |
90 | 0 | Some(Ok(mime)) |
91 | | } |
92 | | Err(_) => { |
93 | 0 | if start + pos < len { |
94 | | // Skip this invalid slice, |
95 | | // try parsing the remaining slice in the next iteration |
96 | 0 | self.pos = start + pos; |
97 | 0 | Some(Err(slice)) |
98 | | } else { |
99 | 0 | None |
100 | | } |
101 | | } |
102 | | } |
103 | | } |
104 | | // Do not process any other error condition: the slice is malformed and |
105 | | // no character is found to be invalid: a character is missing |
106 | 0 | Err(_) => None, |
107 | | } |
108 | 0 | } |
109 | | } |
110 | | |
111 | 0 | pub fn parse(s: &str) -> Result<Mime, ParseError> { |
112 | 0 | if s == "*/*" { |
113 | 0 | return Ok(::STAR_STAR); |
114 | 0 | } |
115 | | |
116 | 0 | let mut iter = s.bytes().enumerate(); |
117 | | // toplevel |
118 | | let mut start; |
119 | | let slash; |
120 | | loop { |
121 | 0 | match iter.next() { |
122 | 0 | Some((_, c)) if is_token(c) => (), |
123 | 0 | Some((i, b'/')) if i > 0 => { |
124 | 0 | slash = i; |
125 | 0 | start = i + 1; |
126 | 0 | break; |
127 | | }, |
128 | 0 | None => return Err(ParseError::MissingSlash), // EOF and no toplevel is no Mime |
129 | 0 | Some((pos, byte)) => return Err(ParseError::InvalidToken { |
130 | 0 | pos: pos, |
131 | 0 | byte: byte, |
132 | 0 | }) |
133 | | }; |
134 | | |
135 | | } |
136 | | |
137 | | // sublevel |
138 | 0 | let mut plus = None; |
139 | | loop { |
140 | 0 | match iter.next() { |
141 | 0 | Some((i, b'+')) if i > start => { |
142 | 0 | plus = Some(i); |
143 | 0 | }, |
144 | 0 | Some((i, b';')) if i > start => { |
145 | 0 | start = i; |
146 | 0 | break; |
147 | | }, |
148 | 0 | Some((_, c)) if is_token(c) => (), |
149 | | None => { |
150 | 0 | return Ok(Mime { |
151 | 0 | source: Source::Dynamic(s.to_ascii_lowercase()), |
152 | 0 | slash: slash, |
153 | 0 | plus: plus, |
154 | 0 | params: ParamSource::None, |
155 | 0 | }); |
156 | | }, |
157 | 0 | Some((pos, byte)) => return Err(ParseError::InvalidToken { |
158 | 0 | pos: pos, |
159 | 0 | byte: byte, |
160 | 0 | }) |
161 | | }; |
162 | | } |
163 | | |
164 | | // params |
165 | 0 | let params = params_from_str(s, &mut iter, start)?; |
166 | | |
167 | 0 | let src = match params { |
168 | 0 | ParamSource::Utf8(_) => s.to_ascii_lowercase(), |
169 | 0 | ParamSource::Custom(semicolon, ref indices) => lower_ascii_with_params(s, semicolon, indices), |
170 | | ParamSource::None => { |
171 | | // Chop off the empty list |
172 | 0 | s[..start].to_ascii_lowercase() |
173 | | } |
174 | | }; |
175 | | |
176 | 0 | Ok(Mime { |
177 | 0 | source: Source::Dynamic(src), |
178 | 0 | slash: slash, |
179 | 0 | plus: plus, |
180 | 0 | params: params, |
181 | 0 | }) |
182 | 0 | } |
183 | | |
184 | | |
185 | 0 | fn params_from_str(s: &str, iter: &mut Enumerate<Bytes>, mut start: usize) -> Result<ParamSource, ParseError> { |
186 | 0 | let semicolon = start; |
187 | 0 | start += 1; |
188 | 0 | let mut params = ParamSource::None; |
189 | 0 | 'params: while start < s.len() { |
190 | | let name; |
191 | | // name |
192 | | 'name: loop { |
193 | 0 | match iter.next() { |
194 | 0 | Some((i, b' ')) if i == start => { |
195 | 0 | start = i + 1; |
196 | 0 | continue 'params; |
197 | | }, |
198 | 0 | Some((_, c)) if is_token(c) => (), |
199 | 0 | Some((i, b'=')) if i > start => { |
200 | 0 | name = Indexed(start, i); |
201 | 0 | start = i + 1; |
202 | 0 | break 'name; |
203 | | }, |
204 | 0 | None => return Err(ParseError::MissingEqual), |
205 | 0 | Some((pos, byte)) => return Err(ParseError::InvalidToken { |
206 | 0 | pos: pos, |
207 | 0 | byte: byte, |
208 | 0 | }), |
209 | | } |
210 | | } |
211 | | |
212 | | let value; |
213 | | // values must be restrict-name-char or "anything goes" |
214 | 0 | let mut is_quoted = false; |
215 | | |
216 | | 'value: loop { |
217 | 0 | if is_quoted { |
218 | 0 | match iter.next() { |
219 | 0 | Some((i, b'"')) if i > start => { |
220 | 0 | value = Indexed(start, i); |
221 | 0 | break 'value; |
222 | | }, |
223 | 0 | Some((_, c)) if is_restricted_quoted_char(c) => (), |
224 | 0 | None => return Err(ParseError::MissingQuote), |
225 | 0 | Some((pos, byte)) => return Err(ParseError::InvalidToken { |
226 | 0 | pos: pos, |
227 | 0 | byte: byte, |
228 | 0 | }), |
229 | | } |
230 | | } else { |
231 | 0 | match iter.next() { |
232 | 0 | Some((i, b'"')) if i == start => { |
233 | 0 | is_quoted = true; |
234 | 0 | start = i + 1; |
235 | 0 | }, |
236 | 0 | Some((_, c)) if is_token(c) => (), |
237 | 0 | Some((i, b';')) if i > start => { |
238 | 0 | value = Indexed(start, i); |
239 | 0 | start = i + 1; |
240 | 0 | break 'value; |
241 | | } |
242 | | None => { |
243 | 0 | value = Indexed(start, s.len()); |
244 | 0 | start = s.len(); |
245 | 0 | break 'value; |
246 | | }, |
247 | | |
248 | 0 | Some((pos, byte)) => return Err(ParseError::InvalidToken { |
249 | 0 | pos: pos, |
250 | 0 | byte: byte, |
251 | 0 | }), |
252 | | } |
253 | | } |
254 | | } |
255 | | |
256 | 0 | if is_quoted { |
257 | | 'ws: loop { |
258 | 0 | match iter.next() { |
259 | 0 | Some((i, b';')) => { |
260 | | // next param |
261 | 0 | start = i + 1; |
262 | 0 | break 'ws; |
263 | | }, |
264 | 0 | Some((_, b' ')) => { |
265 | 0 | // skip whitespace |
266 | 0 | }, |
267 | | None => { |
268 | | // eof |
269 | 0 | start = s.len(); |
270 | 0 | break 'ws; |
271 | | }, |
272 | 0 | Some((pos, byte)) => return Err(ParseError::InvalidToken { |
273 | 0 | pos: pos, |
274 | 0 | byte: byte, |
275 | 0 | }), |
276 | | } |
277 | | } |
278 | 0 | } |
279 | | |
280 | 0 | match params { |
281 | 0 | ParamSource::Utf8(i) => { |
282 | 0 | let i = i + 2; |
283 | 0 | let charset = Indexed(i, "charset".len() + i); |
284 | 0 | let utf8 = Indexed(charset.1 + 1, charset.1 + "utf-8".len() + 1); |
285 | 0 | params = ParamSource::Custom(semicolon, vec![ |
286 | 0 | (charset, utf8), |
287 | 0 | (name, value), |
288 | 0 | ]); |
289 | 0 | }, |
290 | 0 | ParamSource::Custom(_, ref mut vec) => { |
291 | 0 | vec.push((name, value)); |
292 | 0 | }, |
293 | | ParamSource::None => { |
294 | 0 | if semicolon + 2 == name.0 && CHARSET == &s[name.0..name.1] { |
295 | 0 | if UTF_8 == &s[value.0..value.1] { |
296 | 0 | params = ParamSource::Utf8(semicolon); |
297 | 0 | continue 'params; |
298 | 0 | } |
299 | 0 | } |
300 | 0 | params = ParamSource::Custom(semicolon, vec![(name, value)]); |
301 | | }, |
302 | | } |
303 | | } |
304 | 0 | Ok(params) |
305 | 0 | } |
306 | | |
307 | 0 | fn lower_ascii_with_params(s: &str, semi: usize, params: &[(Indexed, Indexed)]) -> String { |
308 | 0 | let mut owned = s.to_owned(); |
309 | 0 | owned[..semi].make_ascii_lowercase(); |
310 | | |
311 | 0 | for &(ref name, ref value) in params { |
312 | 0 | owned[name.0..name.1].make_ascii_lowercase(); |
313 | | // Since we just converted this part of the string to lowercase, |
314 | | // we can skip the `Name == &str` unicase check and do a faster |
315 | | // memcmp instead. |
316 | 0 | if &owned[name.0..name.1] == CHARSET.source { |
317 | 0 | owned[value.0..value.1].make_ascii_lowercase(); |
318 | 0 | } |
319 | | } |
320 | | |
321 | 0 | owned |
322 | 0 | } |
323 | | |
324 | | // From [RFC6838](http://tools.ietf.org/html/rfc6838#section-4.2): |
325 | | // |
326 | | // > All registered media types MUST be assigned top-level type and |
327 | | // > subtype names. The combination of these names serves to uniquely |
328 | | // > identify the media type, and the subtype name facet (or the absence |
329 | | // > of one) identifies the registration tree. Both top-level type and |
330 | | // > subtype names are case-insensitive. |
331 | | // > |
332 | | // > Type and subtype names MUST conform to the following ABNF: |
333 | | // > |
334 | | // > type-name = restricted-name |
335 | | // > subtype-name = restricted-name |
336 | | // > |
337 | | // > restricted-name = restricted-name-first *126restricted-name-chars |
338 | | // > restricted-name-first = ALPHA / DIGIT |
339 | | // > restricted-name-chars = ALPHA / DIGIT / "!" / "#" / |
340 | | // > "$" / "&" / "-" / "^" / "_" |
341 | | // > restricted-name-chars =/ "." ; Characters before first dot always |
342 | | // > ; specify a facet name |
343 | | // > restricted-name-chars =/ "+" ; Characters after last plus always |
344 | | // > ; specify a structured syntax suffix |
345 | | |
346 | | // However, [HTTP](https://tools.ietf.org/html/rfc7231#section-3.1.1.1): |
347 | | // |
348 | | // > media-type = type "/" subtype *( OWS ";" OWS parameter ) |
349 | | // > type = token |
350 | | // > subtype = token |
351 | | // > parameter = token "=" ( token / quoted-string ) |
352 | | // |
353 | | // Where token is defined as: |
354 | | // |
355 | | // > token = 1*tchar |
356 | | // > tchar = "!" / "#" / "$" / "%" / "&" / "'" / "*" / "+" / "-" / "." / |
357 | | // > "^" / "_" / "`" / "|" / "~" / DIGIT / ALPHA |
358 | | // |
359 | | // So, clearly, ¯\_(Ä_/¯ |
360 | | |
361 | | macro_rules! byte_map { |
362 | | ($($flag:expr,)*) => ([ |
363 | | $($flag != 0,)* |
364 | | ]) |
365 | | } |
366 | | |
367 | | static TOKEN_MAP: [bool; 256] = byte_map![ |
368 | | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
369 | | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
370 | | 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 0, |
371 | | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, |
372 | | 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
373 | | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, |
374 | | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
375 | | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, |
376 | | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
377 | | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
378 | | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
379 | | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
380 | | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
381 | | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
382 | | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
383 | | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
384 | | ]; |
385 | | |
386 | 0 | fn is_token(c: u8) -> bool { |
387 | 0 | TOKEN_MAP[c as usize] |
388 | 0 | } |
389 | | |
390 | 0 | fn is_restricted_quoted_char(c: u8) -> bool { |
391 | 0 | c > 31 && c != 127 |
392 | 0 | } |
393 | | |
394 | | #[test] |
395 | | #[allow(warnings)] // ... ranges deprecated |
396 | | fn test_lookup_tables() { |
397 | | for (i, &valid) in TOKEN_MAP.iter().enumerate() { |
398 | | let i = i as u8; |
399 | | let should = match i { |
400 | | b'a'...b'z' | |
401 | | b'A'...b'Z' | |
402 | | b'0'...b'9' | |
403 | | b'!' | |
404 | | b'#' | |
405 | | b'$' | |
406 | | b'%' | |
407 | | b'&' | |
408 | | b'\'' | |
409 | | b'*' | |
410 | | b'+' | |
411 | | b'-' | |
412 | | b'.' | |
413 | | b'^' | |
414 | | b'_' | |
415 | | b'`' | |
416 | | b'|' | |
417 | | b'~' => true, |
418 | | _ => false |
419 | | }; |
420 | | assert_eq!(valid, should, "{:?} ({}) should be {}", i as char, i, should); |
421 | | } |
422 | | } |
423 | | |
424 | | #[test] |
425 | | fn test_parse_iterator() { |
426 | | let mut iter = MimeIter::new("application/json, application/json"); |
427 | | assert_eq!(iter.next().unwrap().unwrap(), parse("application/json").unwrap()); |
428 | | assert_eq!(iter.next().unwrap().unwrap(), parse("application/json").unwrap()); |
429 | | assert_eq!(iter.next(), None); |
430 | | |
431 | | let mut iter = MimeIter::new("application/json"); |
432 | | assert_eq!(iter.next().unwrap().unwrap(), parse("application/json").unwrap()); |
433 | | assert_eq!(iter.next(), None); |
434 | | |
435 | | let mut iter = MimeIter::new("application/json; "); |
436 | | assert_eq!(iter.next().unwrap().unwrap(), parse("application/json").unwrap()); |
437 | | assert_eq!(iter.next(), None); |
438 | | } |
439 | | |
440 | | #[test] |
441 | | fn test_parse_iterator_invalid() { |
442 | | let mut iter = MimeIter::new("application/json, invalid, application/json"); |
443 | | assert_eq!(iter.next().unwrap().unwrap(), parse("application/json").unwrap()); |
444 | | assert_eq!(iter.next().unwrap().unwrap_err(), "invalid"); |
445 | | assert_eq!(iter.next().unwrap().unwrap(), parse("application/json").unwrap()); |
446 | | assert_eq!(iter.next(), None); |
447 | | } |
448 | | |
449 | | #[test] |
450 | | fn test_parse_iterator_all_invalid() { |
451 | | let mut iter = MimeIter::new("application/json, text/html"); |
452 | | assert_eq!(iter.next().unwrap().unwrap_err(), "application/json"); |
453 | | assert_eq!(iter.next(), None); |
454 | | } |