/rust/registry/src/index.crates.io-6f17d22bba15001f/regex-syntax-0.8.5/src/debug.rs
Line | Count | Source (jump to first uncovered line) |
1 | | /// A type that wraps a single byte with a convenient fmt::Debug impl that |
2 | | /// escapes the byte. |
3 | | pub(crate) struct Byte(pub(crate) u8); |
4 | | |
5 | | impl core::fmt::Debug for Byte { |
6 | 0 | fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { |
7 | 0 | // Special case ASCII space. It's too hard to read otherwise, so |
8 | 0 | // put quotes around it. I sometimes wonder whether just '\x20' would |
9 | 0 | // be better... |
10 | 0 | if self.0 == b' ' { |
11 | 0 | return write!(f, "' '"); |
12 | 0 | } |
13 | 0 | // 10 bytes is enough to cover any output from ascii::escape_default. |
14 | 0 | let mut bytes = [0u8; 10]; |
15 | 0 | let mut len = 0; |
16 | 0 | for (i, mut b) in core::ascii::escape_default(self.0).enumerate() { |
17 | | // capitalize \xab to \xAB |
18 | 0 | if i >= 2 && b'a' <= b && b <= b'f' { |
19 | 0 | b -= 32; |
20 | 0 | } |
21 | 0 | bytes[len] = b; |
22 | 0 | len += 1; |
23 | | } |
24 | 0 | write!(f, "{}", core::str::from_utf8(&bytes[..len]).unwrap()) |
25 | 0 | } |
26 | | } |
27 | | |
28 | | /// A type that provides a human readable debug impl for arbitrary bytes. |
29 | | /// |
30 | | /// This generally works best when the bytes are presumed to be mostly UTF-8, |
31 | | /// but will work for anything. |
32 | | /// |
33 | | /// N.B. This is copied nearly verbatim from regex-automata. Sigh. |
34 | | pub(crate) struct Bytes<'a>(pub(crate) &'a [u8]); |
35 | | |
36 | | impl<'a> core::fmt::Debug for Bytes<'a> { |
37 | 0 | fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { |
38 | 0 | write!(f, "\"")?; |
39 | | // This is a sad re-implementation of a similar impl found in bstr. |
40 | 0 | let mut bytes = self.0; |
41 | 0 | while let Some(result) = utf8_decode(bytes) { |
42 | 0 | let ch = match result { |
43 | 0 | Ok(ch) => ch, |
44 | 0 | Err(byte) => { |
45 | 0 | write!(f, r"\x{:02x}", byte)?; |
46 | 0 | bytes = &bytes[1..]; |
47 | 0 | continue; |
48 | | } |
49 | | }; |
50 | 0 | bytes = &bytes[ch.len_utf8()..]; |
51 | 0 | match ch { |
52 | 0 | '\0' => write!(f, "\\0")?, |
53 | | // ASCII control characters except \0, \n, \r, \t |
54 | 0 | '\x01'..='\x08' |
55 | | | '\x0b' |
56 | | | '\x0c' |
57 | 0 | | '\x0e'..='\x19' |
58 | | | '\x7f' => { |
59 | 0 | write!(f, "\\x{:02x}", u32::from(ch))?; |
60 | | } |
61 | | '\n' | '\r' | '\t' | _ => { |
62 | 0 | write!(f, "{}", ch.escape_debug())?; |
63 | | } |
64 | | } |
65 | | } |
66 | 0 | write!(f, "\"")?; |
67 | 0 | Ok(()) |
68 | 0 | } |
69 | | } |
70 | | |
71 | | /// Decodes the next UTF-8 encoded codepoint from the given byte slice. |
72 | | /// |
73 | | /// If no valid encoding of a codepoint exists at the beginning of the given |
74 | | /// byte slice, then the first byte is returned instead. |
75 | | /// |
76 | | /// This returns `None` if and only if `bytes` is empty. |
77 | 9 | pub(crate) fn utf8_decode(bytes: &[u8]) -> Option<Result<char, u8>> { |
78 | 9 | fn len(byte: u8) -> Option<usize> { |
79 | 9 | if byte <= 0x7F { |
80 | 9 | return Some(1); |
81 | 0 | } else if byte & 0b1100_0000 == 0b1000_0000 { |
82 | 0 | return None; |
83 | 0 | } else if byte <= 0b1101_1111 { |
84 | 0 | Some(2) |
85 | 0 | } else if byte <= 0b1110_1111 { |
86 | 0 | Some(3) |
87 | 0 | } else if byte <= 0b1111_0111 { |
88 | 0 | Some(4) |
89 | | } else { |
90 | 0 | None |
91 | | } |
92 | 9 | } |
93 | | |
94 | 9 | if bytes.is_empty() { |
95 | 0 | return None; |
96 | 9 | } |
97 | 9 | let len = match len(bytes[0]) { |
98 | 0 | None => return Some(Err(bytes[0])), |
99 | 9 | Some(len) if len > bytes.len() => return Some(Err(bytes[0])), |
100 | 9 | Some(1) => return Some(Ok(char::from(bytes[0]))), |
101 | 0 | Some(len) => len, |
102 | 0 | }; |
103 | 0 | match core::str::from_utf8(&bytes[..len]) { |
104 | 0 | Ok(s) => Some(Ok(s.chars().next().unwrap())), |
105 | 0 | Err(_) => Some(Err(bytes[0])), |
106 | | } |
107 | 9 | } |