/rust/registry/src/index.crates.io-6f17d22bba15001f/regex-automata-0.4.9/src/util/escape.rs
Line | Count | Source (jump to first uncovered line) |
1 | | /*! |
2 | | Provides convenience routines for escaping raw bytes. |
3 | | |
4 | | Since this crate tends to deal with `&[u8]` everywhere and the default |
5 | | `Debug` implementation just shows decimal integers, it makes debugging those |
6 | | representations quite difficult. This module provides types that show `&[u8]` |
7 | | as if it were a string, with invalid UTF-8 escaped into its byte-by-byte hex |
8 | | representation. |
9 | | */ |
10 | | |
11 | | use crate::util::utf8; |
12 | | |
13 | | /// Provides a convenient `Debug` implementation for a `u8`. |
14 | | /// |
15 | | /// The `Debug` impl treats the byte as an ASCII, and emits a human readable |
16 | | /// representation of it. If the byte isn't ASCII, then it's emitted as a hex |
17 | | /// escape sequence. |
18 | | #[derive(Clone, Copy)] |
19 | | pub struct DebugByte(pub u8); |
20 | | |
21 | | impl core::fmt::Debug for DebugByte { |
22 | 0 | fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { |
23 | 0 | // Special case ASCII space. It's too hard to read otherwise, so |
24 | 0 | // put quotes around it. I sometimes wonder whether just '\x20' would |
25 | 0 | // be better... |
26 | 0 | if self.0 == b' ' { |
27 | 0 | return write!(f, "' '"); |
28 | 0 | } |
29 | 0 | // 10 bytes is enough to cover any output from ascii::escape_default. |
30 | 0 | let mut bytes = [0u8; 10]; |
31 | 0 | let mut len = 0; |
32 | 0 | for (i, mut b) in core::ascii::escape_default(self.0).enumerate() { |
33 | | // capitalize \xab to \xAB |
34 | 0 | if i >= 2 && b'a' <= b && b <= b'f' { |
35 | 0 | b -= 32; |
36 | 0 | } |
37 | 0 | bytes[len] = b; |
38 | 0 | len += 1; |
39 | | } |
40 | 0 | write!(f, "{}", core::str::from_utf8(&bytes[..len]).unwrap()) |
41 | 0 | } |
42 | | } |
43 | | |
44 | | /// Provides a convenient `Debug` implementation for `&[u8]`. |
45 | | /// |
46 | | /// This generally works best when the bytes are presumed to be mostly UTF-8, |
47 | | /// but will work for anything. For any bytes that aren't UTF-8, they are |
48 | | /// emitted as hex escape sequences. |
49 | | pub struct DebugHaystack<'a>(pub &'a [u8]); |
50 | | |
51 | | impl<'a> core::fmt::Debug for DebugHaystack<'a> { |
52 | 0 | fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { |
53 | 0 | write!(f, "\"")?; |
54 | | // This is a sad re-implementation of a similar impl found in bstr. |
55 | 0 | let mut bytes = self.0; |
56 | 0 | while let Some(result) = utf8::decode(bytes) { |
57 | 0 | let ch = match result { |
58 | 0 | Ok(ch) => ch, |
59 | 0 | Err(byte) => { |
60 | 0 | write!(f, r"\x{:02x}", byte)?; |
61 | 0 | bytes = &bytes[1..]; |
62 | 0 | continue; |
63 | | } |
64 | | }; |
65 | 0 | bytes = &bytes[ch.len_utf8()..]; |
66 | 0 | match ch { |
67 | 0 | '\0' => write!(f, "\\0")?, |
68 | | // ASCII control characters except \0, \n, \r, \t |
69 | 0 | '\x01'..='\x08' |
70 | | | '\x0b' |
71 | | | '\x0c' |
72 | 0 | | '\x0e'..='\x19' |
73 | | | '\x7f' => { |
74 | 0 | write!(f, "\\x{:02x}", u32::from(ch))?; |
75 | | } |
76 | | '\n' | '\r' | '\t' | _ => { |
77 | 0 | write!(f, "{}", ch.escape_debug())?; |
78 | | } |
79 | | } |
80 | | } |
81 | 0 | write!(f, "\"")?; |
82 | 0 | Ok(()) |
83 | 0 | } |
84 | | } |