Coverage Report

Created: 2026-04-01 06:56

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/regex/regex-syntax/src/debug.rs
Line
Count
Source
1
/// A type that wraps a single byte with a convenient fmt::Debug impl that
2
/// escapes the byte.
3
pub(crate) struct Byte(pub(crate) u8);
4
5
impl core::fmt::Debug for Byte {
6
0
    fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
7
        // Special case ASCII space. It's too hard to read otherwise, so
8
        // put quotes around it. I sometimes wonder whether just '\x20' would
9
        // be better...
10
0
        if self.0 == b' ' {
11
0
            return write!(f, "' '");
12
0
        }
13
        // 10 bytes is enough to cover any output from ascii::escape_default.
14
0
        let mut bytes = [0u8; 10];
15
0
        let mut len = 0;
16
0
        for (i, mut b) in core::ascii::escape_default(self.0).enumerate() {
17
            // capitalize \xab to \xAB
18
0
            if i >= 2 && b'a' <= b && b <= b'f' {
19
0
                b -= 32;
20
0
            }
21
0
            bytes[len] = b;
22
0
            len += 1;
23
        }
24
0
        write!(f, "{}", core::str::from_utf8(&bytes[..len]).unwrap())
25
0
    }
26
}
27
28
/// A type that provides a human readable debug impl for arbitrary bytes.
29
///
30
/// This generally works best when the bytes are presumed to be mostly UTF-8,
31
/// but will work for anything.
32
///
33
/// N.B. This is copied nearly verbatim from regex-automata. Sigh.
34
pub(crate) struct Bytes<'a>(pub(crate) &'a [u8]);
35
36
impl<'a> core::fmt::Debug for Bytes<'a> {
37
0
    fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
38
0
        write!(f, "\"")?;
39
        // This is a sad re-implementation of a similar impl found in bstr.
40
0
        let mut bytes = self.0;
41
0
        while let Some(result) = utf8_decode(bytes) {
42
0
            let ch = match result {
43
0
                Ok(ch) => ch,
44
0
                Err(byte) => {
45
0
                    write!(f, r"\x{byte:02x}")?;
46
0
                    bytes = &bytes[1..];
47
0
                    continue;
48
                }
49
            };
50
0
            bytes = &bytes[ch.len_utf8()..];
51
0
            match ch {
52
0
                '\0' => write!(f, "\\0")?,
53
                // ASCII control characters except \0, \n, \r, \t
54
0
                '\x01'..='\x08'
55
                | '\x0b'
56
                | '\x0c'
57
0
                | '\x0e'..='\x19'
58
                | '\x7f' => {
59
0
                    write!(f, "\\x{:02x}", u32::from(ch))?;
60
                }
61
                '\n' | '\r' | '\t' | _ => {
62
0
                    write!(f, "{}", ch.escape_debug())?;
63
                }
64
            }
65
        }
66
0
        write!(f, "\"")?;
67
0
        Ok(())
68
0
    }
69
}
70
71
/// Decodes the next UTF-8 encoded codepoint from the given byte slice.
72
///
73
/// If no valid encoding of a codepoint exists at the beginning of the given
74
/// byte slice, then the first byte is returned instead.
75
///
76
/// This returns `None` if and only if `bytes` is empty.
77
83.8k
pub(crate) fn utf8_decode(bytes: &[u8]) -> Option<Result<char, u8>> {
78
83.8k
    fn len(byte: u8) -> Option<usize> {
79
83.8k
        if byte <= 0x7F {
80
51.7k
            return Some(1);
81
32.0k
        } else if byte & 0b1100_0000 == 0b1000_0000 {
82
17
            return None;
83
32.0k
        } else if byte <= 0b1101_1111 {
84
2.68k
            Some(2)
85
29.3k
        } else if byte <= 0b1110_1111 {
86
13.9k
            Some(3)
87
15.4k
        } else if byte <= 0b1111_0111 {
88
15.4k
            Some(4)
89
        } else {
90
23
            None
91
        }
92
83.8k
    }
93
94
83.8k
    if bytes.is_empty() {
95
0
        return None;
96
83.8k
    }
97
83.8k
    let len = match len(bytes[0]) {
98
40
        None => return Some(Err(bytes[0])),
99
83.7k
        Some(len) if len > bytes.len() => return Some(Err(bytes[0])),
100
51.7k
        Some(1) => return Some(Ok(char::from(bytes[0]))),
101
32.0k
        Some(len) => len,
102
    };
103
32.0k
    match core::str::from_utf8(&bytes[..len]) {
104
32.0k
        Ok(s) => Some(Ok(s.chars().next().unwrap())),
105
22
        Err(_) => Some(Err(bytes[0])),
106
    }
107
83.8k
}