/rust/registry/src/index.crates.io-1949cf8c6b5b557f/proc-macro2-1.0.101/src/parse.rs
Line | Count | Source |
1 | | use crate::fallback::{ |
2 | | self, is_ident_continue, is_ident_start, Group, Ident, LexError, Literal, Span, TokenStream, |
3 | | TokenStreamBuilder, |
4 | | }; |
5 | | use crate::{Delimiter, Punct, Spacing, TokenTree}; |
6 | | use core::char; |
7 | | use core::str::{Bytes, CharIndices, Chars}; |
8 | | |
9 | | #[derive(Copy, Clone, Eq, PartialEq)] |
10 | | pub(crate) struct Cursor<'a> { |
11 | | pub(crate) rest: &'a str, |
12 | | #[cfg(span_locations)] |
13 | | pub(crate) off: u32, |
14 | | } |
15 | | |
16 | | impl<'a> Cursor<'a> { |
17 | 26.2M | pub(crate) fn advance(&self, bytes: usize) -> Cursor<'a> { |
18 | 26.2M | let (_front, rest) = self.rest.split_at(bytes); |
19 | 26.2M | Cursor { |
20 | 26.2M | rest, |
21 | 26.2M | #[cfg(span_locations)] |
22 | 26.2M | off: self.off + _front.chars().count() as u32, |
23 | 26.2M | } |
24 | 26.2M | } |
25 | | |
26 | 276M | pub(crate) fn starts_with(&self, s: &str) -> bool { |
27 | 276M | self.rest.starts_with(s) |
28 | 276M | } |
29 | | |
30 | 2.57k | pub(crate) fn starts_with_char(&self, ch: char) -> bool { |
31 | 2.57k | self.rest.starts_with(ch) |
32 | 2.57k | } |
33 | | |
34 | 0 | pub(crate) fn starts_with_fn<Pattern>(&self, f: Pattern) -> bool |
35 | 0 | where |
36 | 0 | Pattern: FnMut(char) -> bool, |
37 | | { |
38 | 0 | self.rest.starts_with(f) |
39 | 0 | } |
40 | | |
41 | 20.4M | pub(crate) fn is_empty(&self) -> bool { |
42 | 20.4M | self.rest.is_empty() |
43 | 20.4M | } |
44 | | |
45 | 7.72M | fn len(&self) -> usize { |
46 | 7.72M | self.rest.len() |
47 | 7.72M | } |
48 | | |
49 | 20.4M | fn as_bytes(&self) -> &'a [u8] { |
50 | 20.4M | self.rest.as_bytes() |
51 | 20.4M | } |
52 | | |
53 | 31.2M | fn bytes(&self) -> Bytes<'a> { |
54 | 31.2M | self.rest.bytes() |
55 | 31.2M | } |
56 | | |
57 | 37.8M | fn chars(&self) -> Chars<'a> { |
58 | 37.8M | self.rest.chars() |
59 | 37.8M | } |
60 | | |
61 | 1.67M | fn char_indices(&self) -> CharIndices<'a> { |
62 | 1.67M | self.rest.char_indices() |
63 | 1.67M | } |
64 | | |
65 | 105M | fn parse(&self, tag: &str) -> Result<Cursor<'a>, Reject> { |
66 | 105M | if self.starts_with(tag) { |
67 | 219k | Ok(self.advance(tag.len())) |
68 | | } else { |
69 | 104M | Err(Reject) |
70 | | } |
71 | 105M | } |
72 | | } |
73 | | |
74 | | pub(crate) struct Reject; |
75 | | type PResult<'a, O> = Result<(Cursor<'a>, O), Reject>; |
76 | | |
77 | 19.1M | fn skip_whitespace(input: Cursor) -> Cursor { |
78 | 19.1M | let mut s = input; |
79 | | |
80 | 20.4M | while !s.is_empty() { |
81 | 20.4M | let byte = s.as_bytes()[0]; |
82 | 20.4M | if byte == b'/' { |
83 | 161k | if s.starts_with("//") |
84 | 12.0k | && (!s.starts_with("///") || s.starts_with("////")) |
85 | 11.0k | && !s.starts_with("//!") |
86 | | { |
87 | 7.53k | let (cursor, _) = take_until_newline_or_eof(s); |
88 | 7.53k | s = cursor; |
89 | 7.53k | continue; |
90 | 153k | } else if s.starts_with("/**/") { |
91 | 180 | s = s.advance(4); |
92 | 180 | continue; |
93 | 153k | } else if s.starts_with("/*") |
94 | 495 | && (!s.starts_with("/**") || s.starts_with("/***")) |
95 | 324 | && !s.starts_with("/*!") |
96 | | { |
97 | 297 | match block_comment(s) { |
98 | 289 | Ok((rest, _)) => { |
99 | 289 | s = rest; |
100 | 289 | continue; |
101 | | } |
102 | 8 | Err(Reject) => return s, |
103 | | } |
104 | 153k | } |
105 | 20.3M | } |
106 | 19.1M | match byte { |
107 | 20.2M | b' ' | 0x09..=0x0d => { |
108 | 1.33M | s = s.advance(1); |
109 | 1.33M | continue; |
110 | | } |
111 | 19.1M | b if b.is_ascii() => {} |
112 | | _ => { |
113 | 438 | let ch = s.chars().next().unwrap(); |
114 | 438 | if is_whitespace(ch) { |
115 | 180 | s = s.advance(ch.len_utf8()); |
116 | 180 | continue; |
117 | 258 | } |
118 | | } |
119 | | } |
120 | 19.1M | return s; |
121 | | } |
122 | 155 | s |
123 | 19.1M | } |
124 | | |
125 | 495 | fn block_comment(input: Cursor) -> PResult<&str> { |
126 | 495 | if !input.starts_with("/*") { |
127 | 0 | return Err(Reject); |
128 | 495 | } |
129 | | |
130 | 495 | let mut depth = 0usize; |
131 | 495 | let bytes = input.as_bytes(); |
132 | 495 | let mut i = 0usize; |
133 | 495 | let upper = bytes.len() - 1; |
134 | | |
135 | 14.1M | while i < upper { |
136 | 14.1M | if bytes[i] == b'/' && bytes[i + 1] == b'*' { |
137 | 1.40M | depth += 1; |
138 | 1.40M | i += 1; // eat '*' |
139 | 12.7M | } else if bytes[i] == b'*' && bytes[i + 1] == b'/' { |
140 | 20.0k | depth -= 1; |
141 | 20.0k | if depth == 0 { |
142 | 479 | return Ok((input.advance(i + 2), &input.rest[..i + 2])); |
143 | 19.5k | } |
144 | 19.5k | i += 1; // eat '/' |
145 | 12.7M | } |
146 | 14.1M | i += 1; |
147 | | } |
148 | | |
149 | 16 | Err(Reject) |
150 | 495 | } |
151 | | |
152 | 438 | fn is_whitespace(ch: char) -> bool { |
153 | | // Rust treats left-to-right mark and right-to-left mark as whitespace |
154 | 438 | ch.is_whitespace() || ch == '\u{200e}' || ch == '\u{200f}' |
155 | 438 | } |
156 | | |
157 | 3.03M | fn word_break(input: Cursor) -> Result<Cursor, Reject> { |
158 | 3.03M | match input.chars().next() { |
159 | 3.03M | Some(ch) if is_ident_continue(ch) => Err(Reject), |
160 | 3.03M | Some(_) | None => Ok(input), |
161 | | } |
162 | 3.03M | } |
163 | | |
164 | | // Rustc's representation of a macro expansion error in expression position or |
165 | | // type position. |
166 | | const ERROR: &str = "(/*ERROR*/)"; |
167 | | |
168 | 435 | pub(crate) fn token_stream(mut input: Cursor) -> Result<TokenStream, LexError> { |
169 | 435 | let mut trees = TokenStreamBuilder::new(); |
170 | 435 | let mut stack = Vec::new(); |
171 | | |
172 | | loop { |
173 | 19.1M | input = skip_whitespace(input); |
174 | | |
175 | 19.1M | if let Ok((rest, ())) = doc_comment(input, &mut trees) { |
176 | 4.67k | input = rest; |
177 | 4.67k | continue; |
178 | 19.1M | } |
179 | | |
180 | | #[cfg(span_locations)] |
181 | | let lo = input.off; |
182 | | |
183 | 19.1M | let first = match input.bytes().next() { |
184 | 19.1M | Some(first) => first, |
185 | 155 | None => match stack.last() { |
186 | 138 | None => return Ok(trees.build()), |
187 | | #[cfg(span_locations)] |
188 | | Some((lo, _frame)) => { |
189 | | return Err(LexError { |
190 | | span: Span { lo: *lo, hi: *lo }, |
191 | | }) |
192 | | } |
193 | | #[cfg(not(span_locations))] |
194 | 17 | Some(_frame) => return Err(LexError { span: Span {} }), |
195 | | }, |
196 | | }; |
197 | | |
198 | 5.40M | if let Some(open_delimiter) = match first { |
199 | 4.08M | b'(' if !input.starts_with(ERROR) => Some(Delimiter::Parenthesis), |
200 | 1.31M | b'[' => Some(Delimiter::Bracket), |
201 | 3.28k | b'{' => Some(Delimiter::Brace), |
202 | 13.7M | _ => None, |
203 | 5.40M | } { |
204 | 5.40M | input = input.advance(1); |
205 | 5.40M | let frame = (open_delimiter, trees); |
206 | 5.40M | #[cfg(span_locations)] |
207 | 5.40M | let frame = (lo, frame); |
208 | 5.40M | stack.push(frame); |
209 | 5.40M | trees = TokenStreamBuilder::new(); |
210 | 13.7M | } else if let Some(close_delimiter) = match first { |
211 | 531k | b')' => Some(Delimiter::Parenthesis), |
212 | 3.57k | b']' => Some(Delimiter::Bracket), |
213 | 140 | b'}' => Some(Delimiter::Brace), |
214 | 13.1M | _ => None, |
215 | | } { |
216 | 534k | let frame = match stack.pop() { |
217 | 534k | Some(frame) => frame, |
218 | 6 | None => return Err(lex_error(input)), |
219 | | }; |
220 | | #[cfg(span_locations)] |
221 | | let (lo, frame) = frame; |
222 | 534k | let (open_delimiter, outer) = frame; |
223 | 534k | if open_delimiter != close_delimiter { |
224 | 3 | return Err(lex_error(input)); |
225 | 534k | } |
226 | 534k | input = input.advance(1); |
227 | 534k | let mut g = Group::new(open_delimiter, trees.build()); |
228 | 534k | g.set_span(Span { |
229 | 534k | #[cfg(span_locations)] |
230 | 534k | lo, |
231 | 534k | #[cfg(span_locations)] |
232 | 534k | hi: input.off, |
233 | 534k | }); |
234 | 534k | trees = outer; |
235 | 534k | trees.push_token_from_parser(TokenTree::Group(crate::Group::_new_fallback(g))); |
236 | | } else { |
237 | 13.1M | let (rest, mut tt) = match leaf_token(input) { |
238 | 13.1M | Ok((rest, tt)) => (rest, tt), |
239 | 271 | Err(Reject) => return Err(lex_error(input)), |
240 | | }; |
241 | 13.1M | tt.set_span(crate::Span::_new_fallback(Span { |
242 | 13.1M | #[cfg(span_locations)] |
243 | 13.1M | lo, |
244 | 13.1M | #[cfg(span_locations)] |
245 | 13.1M | hi: rest.off, |
246 | 13.1M | })); |
247 | 13.1M | trees.push_token_from_parser(tt); |
248 | 13.1M | input = rest; |
249 | | } |
250 | | } |
251 | 435 | } |
252 | | |
253 | 280 | fn lex_error(cursor: Cursor) -> LexError { |
254 | | #[cfg(not(span_locations))] |
255 | 280 | let _ = cursor; |
256 | 280 | LexError { |
257 | 280 | span: Span { |
258 | 280 | #[cfg(span_locations)] |
259 | 280 | lo: cursor.off, |
260 | 280 | #[cfg(span_locations)] |
261 | 280 | hi: cursor.off, |
262 | 280 | }, |
263 | 280 | } |
264 | 280 | } |
265 | | |
266 | 13.1M | fn leaf_token(input: Cursor) -> PResult<TokenTree> { |
267 | 13.1M | if let Ok((input, l)) = literal(input) { |
268 | | // must be parsed before ident |
269 | 3.09M | Ok((input, TokenTree::Literal(crate::Literal::_new_fallback(l)))) |
270 | 10.0M | } else if let Ok((input, p)) = punct(input) { |
271 | 8.54M | Ok((input, TokenTree::Punct(p))) |
272 | 1.53M | } else if let Ok((input, i)) = ident(input) { |
273 | 1.53M | Ok((input, TokenTree::Ident(i))) |
274 | 342 | } else if input.starts_with(ERROR) { |
275 | 71 | let rest = input.advance(ERROR.len()); |
276 | 71 | let repr = crate::Literal::_new_fallback(Literal::_new(ERROR.to_owned())); |
277 | 71 | Ok((rest, TokenTree::Literal(repr))) |
278 | | } else { |
279 | 271 | Err(Reject) |
280 | | } |
281 | 13.1M | } |
282 | | |
283 | 1.53M | fn ident(input: Cursor) -> PResult<crate::Ident> { |
284 | 1.53M | if [ |
285 | 1.53M | "r\"", "r#\"", "r##", "b\"", "b\'", "br\"", "br#", "c\"", "cr\"", "cr#", |
286 | 1.53M | ] |
287 | 1.53M | .iter() |
288 | 15.3M | .any(|prefix| input.starts_with(prefix)) |
289 | | { |
290 | 70 | Err(Reject) |
291 | | } else { |
292 | 1.53M | ident_any(input) |
293 | | } |
294 | 1.53M | } |
295 | | |
296 | 1.53M | fn ident_any(input: Cursor) -> PResult<crate::Ident> { |
297 | 1.53M | let raw = input.starts_with("r#"); |
298 | 1.53M | let rest = input.advance((raw as usize) << 1); |
299 | | |
300 | 1.53M | let (rest, sym) = ident_not_raw(rest)?; |
301 | | |
302 | 1.53M | if !raw { |
303 | 1.53M | let ident = |
304 | 1.53M | crate::Ident::_new_fallback(Ident::new_unchecked(sym, fallback::Span::call_site())); |
305 | 1.53M | return Ok((rest, ident)); |
306 | 1.40k | } |
307 | | |
308 | 1.40k | match sym { |
309 | 1.40k | "_" | "super" | "self" | "Self" | "crate" => return Err(Reject), |
310 | 1.40k | _ => {} |
311 | | } |
312 | | |
313 | 1.40k | let ident = |
314 | 1.40k | crate::Ident::_new_fallback(Ident::new_raw_unchecked(sym, fallback::Span::call_site())); |
315 | 1.40k | Ok((rest, ident)) |
316 | 1.53M | } |
317 | | |
318 | 1.60M | fn ident_not_raw(input: Cursor) -> PResult<&str> { |
319 | 1.60M | let mut chars = input.char_indices(); |
320 | | |
321 | 1.60M | match chars.next() { |
322 | 1.60M | Some((_, ch)) if is_ident_start(ch) => {} |
323 | 58.3k | _ => return Err(Reject), |
324 | | } |
325 | | |
326 | 1.54M | let mut end = input.len(); |
327 | 14.7M | for (i, ch) in chars { |
328 | 14.7M | if !is_ident_continue(ch) { |
329 | 1.54M | end = i; |
330 | 1.54M | break; |
331 | 13.1M | } |
332 | | } |
333 | | |
334 | 1.54M | Ok((input.advance(end), &input.rest[..end])) |
335 | 1.60M | } |
336 | | |
337 | 13.1M | pub(crate) fn literal(input: Cursor) -> PResult<Literal> { |
338 | 13.1M | let rest = literal_nocapture(input)?; |
339 | 3.09M | let end = input.len() - rest.len(); |
340 | 3.09M | Ok((rest, Literal::_new(input.rest[..end].to_string()))) |
341 | 13.1M | } |
342 | | |
343 | 13.1M | fn literal_nocapture(input: Cursor) -> Result<Cursor, Reject> { |
344 | 13.1M | if let Ok(ok) = string(input) { |
345 | 57.3k | Ok(ok) |
346 | 13.1M | } else if let Ok(ok) = byte_string(input) { |
347 | 90 | Ok(ok) |
348 | 13.1M | } else if let Ok(ok) = c_string(input) { |
349 | 109 | Ok(ok) |
350 | 13.1M | } else if let Ok(ok) = byte(input) { |
351 | 161 | Ok(ok) |
352 | 13.1M | } else if let Ok(ok) = character(input) { |
353 | 558 | Ok(ok) |
354 | 13.1M | } else if let Ok(ok) = float(input) { |
355 | 1.17M | Ok(ok) |
356 | 11.9M | } else if let Ok(ok) = int(input) { |
357 | 1.85M | Ok(ok) |
358 | | } else { |
359 | 10.0M | Err(Reject) |
360 | | } |
361 | 13.1M | } |
362 | | |
363 | 58.2k | fn literal_suffix(input: Cursor) -> Cursor { |
364 | 58.2k | match ident_not_raw(input) { |
365 | 168 | Ok((input, _)) => input, |
366 | 58.1k | Err(Reject) => input, |
367 | | } |
368 | 58.2k | } |
369 | | |
370 | 13.1M | fn string(input: Cursor) -> Result<Cursor, Reject> { |
371 | 13.1M | if let Ok(input) = input.parse("\"") { |
372 | 57.1k | cooked_string(input) |
373 | 13.1M | } else if let Ok(input) = input.parse("r") { |
374 | 159k | raw_string(input) |
375 | | } else { |
376 | 12.9M | Err(Reject) |
377 | | } |
378 | 13.1M | } |
379 | | |
380 | 57.1k | fn cooked_string(mut input: Cursor) -> Result<Cursor, Reject> { |
381 | 57.1k | let mut chars = input.char_indices(); |
382 | | |
383 | 3.56M | while let Some((i, ch)) = chars.next() { |
384 | 3.56M | match ch { |
385 | | '"' => { |
386 | 57.1k | let input = input.advance(i + 1); |
387 | 57.1k | return Ok(literal_suffix(input)); |
388 | | } |
389 | 412 | '\r' => match chars.next() { |
390 | 409 | Some((_, '\n')) => {} |
391 | 3 | _ => break, |
392 | | }, |
393 | 1.01k | '\\' => match chars.next() { |
394 | | Some((_, 'x')) => { |
395 | 447 | backslash_x_char(&mut chars)?; |
396 | | } |
397 | 502 | Some((_, 'n' | 'r' | 't' | '\\' | '\'' | '"' | '0')) => {} |
398 | | Some((_, 'u')) => { |
399 | 2 | backslash_u(&mut chars)?; |
400 | | } |
401 | 64 | Some((newline, ch @ ('\n' | '\r'))) => { |
402 | 64 | input = input.advance(newline + 1); |
403 | 64 | trailing_backslash(&mut input, ch as u8)?; |
404 | 61 | chars = input.char_indices(); |
405 | | } |
406 | 3 | _ => break, |
407 | | }, |
408 | 3.50M | _ch => {} |
409 | | } |
410 | | } |
411 | 13 | Err(Reject) |
412 | 57.1k | } |
413 | | |
414 | 159k | fn raw_string(input: Cursor) -> Result<Cursor, Reject> { |
415 | 159k | let (input, delimiter) = delimiter_of_raw_string(input)?; |
416 | 218 | let mut bytes = input.bytes().enumerate(); |
417 | 3.16M | while let Some((i, byte)) = bytes.next() { |
418 | 3.90k | match byte { |
419 | 3.90k | b'"' if input.rest[i + 1..].starts_with(delimiter) => { |
420 | 210 | let rest = input.advance(i + 1 + delimiter.len()); |
421 | 210 | return Ok(literal_suffix(rest)); |
422 | | } |
423 | 13.4k | b'\r' => match bytes.next() { |
424 | 13.4k | Some((_, b'\n')) => {} |
425 | 1 | _ => break, |
426 | | }, |
427 | 3.15M | _ => {} |
428 | | } |
429 | | } |
430 | 8 | Err(Reject) |
431 | 159k | } |
432 | | |
433 | 13.1M | fn byte_string(input: Cursor) -> Result<Cursor, Reject> { |
434 | 13.1M | if let Ok(input) = input.parse("b\"") { |
435 | 64 | cooked_byte_string(input) |
436 | 13.1M | } else if let Ok(input) = input.parse("br") { |
437 | 361 | raw_byte_string(input) |
438 | | } else { |
439 | 13.1M | Err(Reject) |
440 | | } |
441 | 13.1M | } |
442 | | |
443 | 64 | fn cooked_byte_string(mut input: Cursor) -> Result<Cursor, Reject> { |
444 | 64 | let mut bytes = input.bytes().enumerate(); |
445 | 4.59M | while let Some((offset, b)) = bytes.next() { |
446 | 4.54M | match b { |
447 | | b'"' => { |
448 | 46 | let input = input.advance(offset + 1); |
449 | 46 | return Ok(literal_suffix(input)); |
450 | | } |
451 | 43.1k | b'\r' => match bytes.next() { |
452 | 43.1k | Some((_, b'\n')) => {} |
453 | 2 | _ => break, |
454 | | }, |
455 | 6.53k | b'\\' => match bytes.next() { |
456 | | Some((_, b'x')) => { |
457 | 2.77k | backslash_x_byte(&mut bytes)?; |
458 | | } |
459 | 3.73k | Some((_, b'n' | b'r' | b't' | b'\\' | b'0' | b'\'' | b'"')) => {} |
460 | 16 | Some((newline, b @ (b'\n' | b'\r'))) => { |
461 | 16 | input = input.advance(newline + 1); |
462 | 16 | trailing_backslash(&mut input, b)?; |
463 | 15 | bytes = input.bytes().enumerate(); |
464 | | } |
465 | 1 | _ => break, |
466 | | }, |
467 | 4.54M | b if b.is_ascii() => {} |
468 | 1 | _ => break, |
469 | | } |
470 | | } |
471 | 14 | Err(Reject) |
472 | 64 | } |
473 | | |
474 | 160k | fn delimiter_of_raw_string(input: Cursor) -> PResult<&str> { |
475 | 561k | for (i, byte) in input.bytes().enumerate() { |
476 | 561k | match byte { |
477 | | b'"' => { |
478 | 298 | if i > 255 { |
479 | | // https://github.com/rust-lang/rust/pull/95251 |
480 | 0 | return Err(Reject); |
481 | 298 | } |
482 | 298 | return Ok((input.advance(i + 1), &input.rest[..i])); |
483 | | } |
484 | 401k | b'#' => {} |
485 | 159k | _ => break, |
486 | | } |
487 | | } |
488 | 159k | Err(Reject) |
489 | 160k | } |
490 | | |
491 | 361 | fn raw_byte_string(input: Cursor) -> Result<Cursor, Reject> { |
492 | 361 | let (input, delimiter) = delimiter_of_raw_string(input)?; |
493 | 51 | let mut bytes = input.bytes().enumerate(); |
494 | 2.55M | while let Some((i, byte)) = bytes.next() { |
495 | 130k | match byte { |
496 | 130k | b'"' if input.rest[i + 1..].starts_with(delimiter) => { |
497 | 44 | let rest = input.advance(i + 1 + delimiter.len()); |
498 | 44 | return Ok(literal_suffix(rest)); |
499 | | } |
500 | 8.41k | b'\r' => match bytes.next() { |
501 | 8.41k | Some((_, b'\n')) => {} |
502 | 1 | _ => break, |
503 | | }, |
504 | 2.54M | other => { |
505 | 2.54M | if !other.is_ascii() { |
506 | 0 | break; |
507 | 2.54M | } |
508 | | } |
509 | | } |
510 | | } |
511 | 7 | Err(Reject) |
512 | 361 | } |
513 | | |
514 | 13.1M | fn c_string(input: Cursor) -> Result<Cursor, Reject> { |
515 | 13.1M | if let Ok(input) = input.parse("c\"") { |
516 | 97 | cooked_c_string(input) |
517 | 13.1M | } else if let Ok(input) = input.parse("cr") { |
518 | 369 | raw_c_string(input) |
519 | | } else { |
520 | 13.1M | Err(Reject) |
521 | | } |
522 | 13.1M | } |
523 | | |
524 | 369 | fn raw_c_string(input: Cursor) -> Result<Cursor, Reject> { |
525 | 369 | let (input, delimiter) = delimiter_of_raw_string(input)?; |
526 | 29 | let mut bytes = input.bytes().enumerate(); |
527 | 2.28M | while let Some((i, byte)) = bytes.next() { |
528 | 263k | match byte { |
529 | 263k | b'"' if input.rest[i + 1..].starts_with(delimiter) => { |
530 | 20 | let rest = input.advance(i + 1 + delimiter.len()); |
531 | 20 | return Ok(literal_suffix(rest)); |
532 | | } |
533 | 12.4k | b'\r' => match bytes.next() { |
534 | 12.4k | Some((_, b'\n')) => {} |
535 | 0 | _ => break, |
536 | | }, |
537 | 5 | b'\0' => break, |
538 | 2.27M | _ => {} |
539 | | } |
540 | | } |
541 | 9 | Err(Reject) |
542 | 369 | } |
543 | | |
544 | 97 | fn cooked_c_string(mut input: Cursor) -> Result<Cursor, Reject> { |
545 | 97 | let mut chars = input.char_indices(); |
546 | | |
547 | 1.11M | while let Some((i, ch)) = chars.next() { |
548 | 1.11M | match ch { |
549 | | '"' => { |
550 | 89 | let input = input.advance(i + 1); |
551 | 89 | return Ok(literal_suffix(input)); |
552 | | } |
553 | 1 | '\r' => match chars.next() { |
554 | 0 | Some((_, '\n')) => {} |
555 | 1 | _ => break, |
556 | | }, |
557 | 30 | '\\' => match chars.next() { |
558 | | Some((_, 'x')) => { |
559 | 7 | backslash_x_nonzero(&mut chars)?; |
560 | | } |
561 | 11 | Some((_, 'n' | 'r' | 't' | '\\' | '\'' | '"')) => {} |
562 | | Some((_, 'u')) => { |
563 | 0 | if backslash_u(&mut chars)? == '\0' { |
564 | 0 | break; |
565 | 0 | } |
566 | | } |
567 | 12 | Some((newline, ch @ ('\n' | '\r'))) => { |
568 | 12 | input = input.advance(newline + 1); |
569 | 12 | trailing_backslash(&mut input, ch as u8)?; |
570 | 5 | chars = input.char_indices(); |
571 | | } |
572 | 0 | _ => break, |
573 | | }, |
574 | 0 | '\0' => break, |
575 | 1.11M | _ch => {} |
576 | | } |
577 | | } |
578 | 1 | Err(Reject) |
579 | 97 | } |
580 | | |
581 | 13.1M | fn byte(input: Cursor) -> Result<Cursor, Reject> { |
582 | 13.1M | let input = input.parse("b'")?; |
583 | 175 | let mut bytes = input.bytes().enumerate(); |
584 | 175 | let ok = match bytes.next().map(|(_, b)| b) { |
585 | 153 | Some(b'\\') => match bytes.next().map(|(_, b)| b) { |
586 | 74 | Some(b'x') => backslash_x_byte(&mut bytes).is_ok(), |
587 | 78 | Some(b'n' | b'r' | b't' | b'\\' | b'0' | b'\'' | b'"') => true, |
588 | 1 | _ => false, |
589 | | }, |
590 | 22 | b => b.is_some(), |
591 | | }; |
592 | 175 | if !ok { |
593 | 4 | return Err(Reject); |
594 | 171 | } |
595 | 171 | let (offset, _) = bytes.next().ok_or(Reject)?; |
596 | 169 | if !input.chars().as_str().is_char_boundary(offset) { |
597 | 0 | return Err(Reject); |
598 | 169 | } |
599 | 169 | let input = input.advance(offset).parse("'")?; |
600 | 161 | Ok(literal_suffix(input)) |
601 | 13.1M | } |
602 | | |
603 | 13.1M | fn character(input: Cursor) -> Result<Cursor, Reject> { |
604 | 13.1M | let input = input.parse("'")?; |
605 | 1.35k | let mut chars = input.char_indices(); |
606 | 1.35k | let ok = match chars.next().map(|(_, ch)| ch) { |
607 | 360 | Some('\\') => match chars.next().map(|(_, ch)| ch) { |
608 | 353 | Some('x') => backslash_x_char(&mut chars).is_ok(), |
609 | 0 | Some('u') => backslash_u(&mut chars).is_ok(), |
610 | 7 | Some('n' | 'r' | 't' | '\\' | '0' | '\'' | '"') => true, |
611 | 0 | _ => false, |
612 | | }, |
613 | 999 | ch => ch.is_some(), |
614 | | }; |
615 | 1.35k | if !ok { |
616 | 1 | return Err(Reject); |
617 | 1.35k | } |
618 | 1.35k | let (idx, _) = chars.next().ok_or(Reject)?; |
619 | 1.35k | let input = input.advance(idx).parse("'")?; |
620 | 558 | Ok(literal_suffix(input)) |
621 | 13.1M | } |
622 | | |
623 | | macro_rules! next_ch { |
624 | | ($chars:ident @ $pat:pat) => { |
625 | | match $chars.next() { |
626 | | Some((_, ch)) => match ch { |
627 | | $pat => ch, |
628 | | _ => return Err(Reject), |
629 | | }, |
630 | | None => return Err(Reject), |
631 | | } |
632 | | }; |
633 | | } |
634 | | |
635 | 800 | fn backslash_x_char<I>(chars: &mut I) -> Result<(), Reject> |
636 | 800 | where |
637 | 800 | I: Iterator<Item = (usize, char)>, |
638 | | { |
639 | 800 | next_ch!(chars @ '0'..='7'); |
640 | 798 | next_ch!(chars @ '0'..='9' | 'a'..='f' | 'A'..='F'); |
641 | 797 | Ok(()) |
642 | 800 | } |
643 | | |
644 | 2.85k | fn backslash_x_byte<I>(chars: &mut I) -> Result<(), Reject> |
645 | 2.85k | where |
646 | 2.85k | I: Iterator<Item = (usize, u8)>, |
647 | | { |
648 | 2.85k | next_ch!(chars @ b'0'..=b'9' | b'a'..=b'f' | b'A'..=b'F'); |
649 | 2.85k | next_ch!(chars @ b'0'..=b'9' | b'a'..=b'f' | b'A'..=b'F'); |
650 | 2.84k | Ok(()) |
651 | 2.85k | } |
652 | | |
653 | 7 | fn backslash_x_nonzero<I>(chars: &mut I) -> Result<(), Reject> |
654 | 7 | where |
655 | 7 | I: Iterator<Item = (usize, char)>, |
656 | | { |
657 | 7 | let first = next_ch!(chars @ '0'..='9' | 'a'..='f' | 'A'..='F'); |
658 | 7 | let second = next_ch!(chars @ '0'..='9' | 'a'..='f' | 'A'..='F'); |
659 | 7 | if first == '0' && second == '0' { |
660 | 0 | Err(Reject) |
661 | | } else { |
662 | 7 | Ok(()) |
663 | | } |
664 | 7 | } |
665 | | |
666 | 2 | fn backslash_u<I>(chars: &mut I) -> Result<char, Reject> |
667 | 2 | where |
668 | 2 | I: Iterator<Item = (usize, char)>, |
669 | | { |
670 | 2 | next_ch!(chars @ '{'); |
671 | 0 | let mut value = 0; |
672 | 0 | let mut len = 0; |
673 | 0 | for (_, ch) in chars { |
674 | 0 | let digit = match ch { |
675 | 0 | '0'..='9' => ch as u8 - b'0', |
676 | 0 | 'a'..='f' => 10 + ch as u8 - b'a', |
677 | 0 | 'A'..='F' => 10 + ch as u8 - b'A', |
678 | 0 | '_' if len > 0 => continue, |
679 | 0 | '}' if len > 0 => return char::from_u32(value).ok_or(Reject), |
680 | 0 | _ => break, |
681 | | }; |
682 | 0 | if len == 6 { |
683 | 0 | break; |
684 | 0 | } |
685 | 0 | value *= 0x10; |
686 | 0 | value += u32::from(digit); |
687 | 0 | len += 1; |
688 | | } |
689 | 0 | Err(Reject) |
690 | 2 | } |
691 | | |
692 | 92 | fn trailing_backslash(input: &mut Cursor, mut last: u8) -> Result<(), Reject> { |
693 | 92 | let mut whitespace = input.bytes().enumerate(); |
694 | | loop { |
695 | 6.57M | if last == b'\r' && whitespace.next().map_or(true, |(_, b)| b != b'\n') { |
696 | 8 | return Err(Reject); |
697 | 6.57M | } |
698 | 6.57M | match whitespace.next() { |
699 | 6.57M | Some((_, b @ (b' ' | b'\t' | b'\n' | b'\r'))) => { |
700 | 6.57M | last = b; |
701 | 6.57M | } |
702 | 81 | Some((offset, _)) => { |
703 | 81 | *input = input.advance(offset); |
704 | 81 | return Ok(()); |
705 | | } |
706 | 3 | None => return Err(Reject), |
707 | | } |
708 | | } |
709 | 92 | } |
710 | | |
711 | 13.1M | fn float(input: Cursor) -> Result<Cursor, Reject> { |
712 | 13.1M | let mut rest = float_digits(input)?; |
713 | 1.17M | if let Some(ch) = rest.chars().next() { |
714 | 1.17M | if is_ident_start(ch) { |
715 | 1.79k | rest = ident_not_raw(rest)?.0; |
716 | 1.17M | } |
717 | 13 | } |
718 | 1.17M | word_break(rest) |
719 | 13.1M | } |
720 | | |
721 | 13.1M | fn float_digits(input: Cursor) -> Result<Cursor, Reject> { |
722 | 13.1M | let mut chars = input.chars().peekable(); |
723 | 13.1M | match chars.next() { |
724 | 13.1M | Some(ch) if '0' <= ch && ch <= '9' => {} |
725 | 10.0M | _ => return Err(Reject), |
726 | | } |
727 | | |
728 | 3.03M | let mut len = 1; |
729 | 3.03M | let mut has_dot = false; |
730 | 3.03M | let mut has_exp = false; |
731 | 22.3M | while let Some(&ch) = chars.peek() { |
732 | 22.3M | match ch { |
733 | 18.3M | '0'..='9' | '_' => { |
734 | 18.1M | chars.next(); |
735 | 18.1M | len += 1; |
736 | 18.1M | } |
737 | | '.' => { |
738 | 2.12M | if has_dot { |
739 | 936k | break; |
740 | 1.18M | } |
741 | 1.18M | chars.next(); |
742 | 1.18M | if chars |
743 | 1.18M | .peek() |
744 | 1.18M | .map_or(false, |&ch| ch == '.' || is_ident_start(ch)) |
745 | | { |
746 | 7.30k | return Err(Reject); |
747 | 1.17M | } |
748 | 1.17M | len += 1; |
749 | 1.17M | has_dot = true; |
750 | | } |
751 | | 'e' | 'E' => { |
752 | 5.36k | chars.next(); |
753 | 5.36k | len += 1; |
754 | 5.36k | has_exp = true; |
755 | 5.36k | break; |
756 | | } |
757 | 2.08M | _ => break, |
758 | | } |
759 | | } |
760 | | |
761 | 3.02M | if !(has_dot || has_exp) { |
762 | 1.84M | return Err(Reject); |
763 | 1.18M | } |
764 | | |
765 | 1.18M | if has_exp { |
766 | 5.36k | let token_before_exp = if has_dot { |
767 | 80 | Ok(input.advance(len - 1)) |
768 | | } else { |
769 | 5.28k | Err(Reject) |
770 | | }; |
771 | 5.36k | let mut has_sign = false; |
772 | 5.36k | let mut has_exp_value = false; |
773 | 2.68M | while let Some(&ch) = chars.peek() { |
774 | 2.68M | match ch { |
775 | | '+' | '-' => { |
776 | 1.95k | if has_exp_value { |
777 | 679 | break; |
778 | 1.27k | } |
779 | 1.27k | if has_sign { |
780 | 2 | return token_before_exp; |
781 | 1.27k | } |
782 | 1.27k | chars.next(); |
783 | 1.27k | len += 1; |
784 | 1.27k | has_sign = true; |
785 | | } |
786 | 1.97M | '0'..='9' => { |
787 | 1.96M | chars.next(); |
788 | 1.96M | len += 1; |
789 | 1.96M | has_exp_value = true; |
790 | 1.96M | } |
791 | 714k | '_' => { |
792 | 714k | chars.next(); |
793 | 714k | len += 1; |
794 | 714k | } |
795 | 4.67k | _ => break, |
796 | | } |
797 | | } |
798 | 5.36k | if !has_exp_value { |
799 | 4.19k | return token_before_exp; |
800 | 1.17k | } |
801 | 1.17M | } |
802 | | |
803 | 1.17M | Ok(input.advance(len)) |
804 | 13.1M | } |
805 | | |
806 | 11.9M | fn int(input: Cursor) -> Result<Cursor, Reject> { |
807 | 11.9M | let mut rest = digits(input)?; |
808 | 1.85M | if let Some(ch) = rest.chars().next() { |
809 | 1.85M | if is_ident_start(ch) { |
810 | 5.96k | rest = ident_not_raw(rest)?.0; |
811 | 1.85M | } |
812 | 22 | } |
813 | 1.85M | word_break(rest) |
814 | 11.9M | } |
815 | | |
816 | 11.9M | fn digits(mut input: Cursor) -> Result<Cursor, Reject> { |
817 | 11.9M | let base = if input.starts_with("0x") { |
818 | 53 | input = input.advance(2); |
819 | 53 | 16 |
820 | 11.9M | } else if input.starts_with("0o") { |
821 | 5 | input = input.advance(2); |
822 | 5 | 8 |
823 | 11.9M | } else if input.starts_with("0b") { |
824 | 199 | input = input.advance(2); |
825 | 199 | 2 |
826 | | } else { |
827 | 11.9M | 10 |
828 | | }; |
829 | | |
830 | 11.9M | let mut len = 0; |
831 | 11.9M | let mut empty = true; |
832 | 25.3M | for b in input.bytes() { |
833 | 25.3M | match b { |
834 | 17.0M | b'0'..=b'9' => { |
835 | 8.91M | let digit = (b - b'0') as u64; |
836 | 8.91M | if digit >= base { |
837 | 0 | return Err(Reject); |
838 | 8.91M | } |
839 | | } |
840 | 1.31M | b'a'..=b'f' => { |
841 | 1.00M | let digit = 10 + (b - b'a') as u64; |
842 | 1.00M | if digit >= base { |
843 | 115k | break; |
844 | 889k | } |
845 | | } |
846 | 5.02M | b'A'..=b'F' => { |
847 | 1.09M | let digit = 10 + (b - b'A') as u64; |
848 | 1.09M | if digit >= base { |
849 | 1.09M | break; |
850 | 18 | } |
851 | | } |
852 | | b'_' => { |
853 | 3.58M | if empty && base == 10 { |
854 | 61 | return Err(Reject); |
855 | 3.58M | } |
856 | 3.58M | len += 1; |
857 | 3.58M | continue; |
858 | | } |
859 | 10.7M | _ => break, |
860 | | } |
861 | 9.80M | len += 1; |
862 | 9.80M | empty = false; |
863 | | } |
864 | 11.9M | if empty { |
865 | 10.0M | Err(Reject) |
866 | | } else { |
867 | 1.85M | Ok(input.advance(len)) |
868 | | } |
869 | 11.9M | } |
870 | | |
871 | 10.0M | fn punct(input: Cursor) -> PResult<Punct> { |
872 | 10.0M | let (rest, ch) = punct_char(input)?; |
873 | 8.54M | if ch == '\'' { |
874 | 801 | let (after_lifetime, _ident) = ident_any(rest)?; |
875 | 787 | if after_lifetime.starts_with_char('\'') |
876 | 787 | || (after_lifetime.starts_with_char('#') && !rest.starts_with("r#")) |
877 | | { |
878 | 7 | Err(Reject) |
879 | | } else { |
880 | 780 | Ok((rest, Punct::new('\'', Spacing::Joint))) |
881 | | } |
882 | | } else { |
883 | 8.54M | let kind = match punct_char(rest) { |
884 | 3.96M | Ok(_) => Spacing::Joint, |
885 | 4.58M | Err(Reject) => Spacing::Alone, |
886 | | }; |
887 | 8.54M | Ok((rest, Punct::new(ch, kind))) |
888 | | } |
889 | 10.0M | } |
890 | | |
891 | 18.6M | fn punct_char(input: Cursor) -> PResult<char> { |
892 | 18.6M | if input.starts_with("//") || input.starts_with("/*") { |
893 | | // Do not accept `/` of a comment as a punct. |
894 | 727 | return Err(Reject); |
895 | 18.6M | } |
896 | | |
897 | 18.6M | let mut chars = input.chars(); |
898 | 18.6M | let first = match chars.next() { |
899 | 18.6M | Some(ch) => ch, |
900 | | None => { |
901 | 18 | return Err(Reject); |
902 | | } |
903 | | }; |
904 | 18.6M | let recognized = "~!@#$%^&*-=+|;:,<.>/?'"; |
905 | 18.6M | if recognized.contains(first) { |
906 | 12.5M | Ok((input.advance(first.len_utf8()), first)) |
907 | | } else { |
908 | 6.11M | Err(Reject) |
909 | | } |
910 | 18.6M | } |
911 | | |
912 | 19.1M | fn doc_comment<'a>(input: Cursor<'a>, trees: &mut TokenStreamBuilder) -> PResult<'a, ()> { |
913 | | #[cfg(span_locations)] |
914 | | let lo = input.off; |
915 | 19.1M | let (rest, (comment, inner)) = doc_comment_contents(input)?; |
916 | 4.71k | let fallback_span = Span { |
917 | 4.71k | #[cfg(span_locations)] |
918 | 4.71k | lo, |
919 | 4.71k | #[cfg(span_locations)] |
920 | 4.71k | hi: rest.off, |
921 | 4.71k | }; |
922 | 4.71k | let span = crate::Span::_new_fallback(fallback_span); |
923 | | |
924 | 4.71k | let mut scan_for_bare_cr = comment; |
925 | 5.01k | while let Some(cr) = scan_for_bare_cr.find('\r') { |
926 | 331 | let rest = &scan_for_bare_cr[cr + 1..]; |
927 | 331 | if !rest.starts_with('\n') { |
928 | 33 | return Err(Reject); |
929 | 298 | } |
930 | 298 | scan_for_bare_cr = rest; |
931 | | } |
932 | | |
933 | 4.67k | let mut pound = Punct::new('#', Spacing::Alone); |
934 | 4.67k | pound.set_span(span); |
935 | 4.67k | trees.push_token_from_parser(TokenTree::Punct(pound)); |
936 | | |
937 | 4.67k | if inner { |
938 | 3.53k | let mut bang = Punct::new('!', Spacing::Alone); |
939 | 3.53k | bang.set_span(span); |
940 | 3.53k | trees.push_token_from_parser(TokenTree::Punct(bang)); |
941 | 3.53k | } |
942 | | |
943 | 4.67k | let doc_ident = crate::Ident::_new_fallback(Ident::new_unchecked("doc", fallback_span)); |
944 | 4.67k | let mut equal = Punct::new('=', Spacing::Alone); |
945 | 4.67k | equal.set_span(span); |
946 | 4.67k | let mut literal = crate::Literal::_new_fallback(Literal::string(comment)); |
947 | 4.67k | literal.set_span(span); |
948 | 4.67k | let mut bracketed = TokenStreamBuilder::with_capacity(3); |
949 | 4.67k | bracketed.push_token_from_parser(TokenTree::Ident(doc_ident)); |
950 | 4.67k | bracketed.push_token_from_parser(TokenTree::Punct(equal)); |
951 | 4.67k | bracketed.push_token_from_parser(TokenTree::Literal(literal)); |
952 | 4.67k | let group = Group::new(Delimiter::Bracket, bracketed.build()); |
953 | 4.67k | let mut group = crate::Group::_new_fallback(group); |
954 | 4.67k | group.set_span(span); |
955 | 4.67k | trees.push_token_from_parser(TokenTree::Group(group)); |
956 | | |
957 | 4.67k | Ok((rest, ())) |
958 | 19.1M | } |
959 | | |
960 | 19.1M | fn doc_comment_contents(input: Cursor) -> PResult<(&str, bool)> { |
961 | 19.1M | if input.starts_with("//!") { |
962 | 3.51k | let input = input.advance(3); |
963 | 3.51k | let (input, s) = take_until_newline_or_eof(input); |
964 | 3.51k | Ok((input, (s, true))) |
965 | 19.1M | } else if input.starts_with("/*!") { |
966 | 27 | let (input, s) = block_comment(input)?; |
967 | 24 | Ok((input, (&s[3..s.len() - 2], true))) |
968 | 19.1M | } else if input.starts_with("///") { |
969 | 1.00k | let input = input.advance(3); |
970 | 1.00k | if input.starts_with_char('/') { |
971 | 0 | return Err(Reject); |
972 | 1.00k | } |
973 | 1.00k | let (input, s) = take_until_newline_or_eof(input); |
974 | 1.00k | Ok((input, (s, false))) |
975 | 19.1M | } else if input.starts_with("/**") && !input.rest[3..].starts_with('*') { |
976 | 171 | let (input, s) = block_comment(input)?; |
977 | 166 | Ok((input, (&s[3..s.len() - 2], false))) |
978 | | } else { |
979 | 19.1M | Err(Reject) |
980 | | } |
981 | 19.1M | } |
982 | | |
983 | 12.0k | fn take_until_newline_or_eof(input: Cursor) -> (Cursor, &str) { |
984 | 12.0k | let chars = input.char_indices(); |
985 | | |
986 | 27.9M | for (i, ch) in chars { |
987 | 27.9M | if ch == '\n' { |
988 | 6.92k | return (input.advance(i), &input.rest[..i]); |
989 | 27.9M | } else if ch == '\r' && input.rest[i + 1..].starts_with('\n') { |
990 | 5.08k | return (input.advance(i + 1), &input.rest[..i]); |
991 | 27.9M | } |
992 | | } |
993 | | |
994 | 57 | (input.advance(input.len()), input.rest) |
995 | 12.0k | } |