/rust/registry/src/index.crates.io-6f17d22bba15001f/regex-syntax-0.8.5/src/debug.rs

Source (jump to first uncovered line)
/// A type that wraps a single byte with a convenient fmt::Debug impl that
/// escapes the byte.
pub(crate) struct Byte(pub(crate) u8);

impl core::fmt::Debug for Byte {
    fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
        // Special case ASCII space. It's too hard to read otherwise, so
        // put quotes around it. I sometimes wonder whether just '\x20' would
        // be better...
        if self.0 == b' ' {
            return write!(f, "' '");
        }
        // 10 bytes is enough to cover any output from ascii::escape_default.
        let mut bytes = [0u8; 10];
        let mut len = 0;
        for (i, mut b) in core::ascii::escape_default(self.0).enumerate() {
            // capitalize \xab to \xAB
            if i >= 2 && b'a' <= b && b <= b'f' {
                b -= 32;
            }
            bytes[len] = b;
            len += 1;
        }
        write!(f, "{}", core::str::from_utf8(&bytes[..len]).unwrap())
    }
}

/// A type that provides a human readable debug impl for arbitrary bytes.
///
/// This generally works best when the bytes are presumed to be mostly UTF-8,
/// but will work for anything.
///
/// N.B. This is copied nearly verbatim from regex-automata. Sigh.
pub(crate) struct Bytes<'a>(pub(crate) &'a [u8]);

impl<'a> core::fmt::Debug for Bytes<'a> {
    fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
        write!(f, "\"")?;
        // This is a sad re-implementation of a similar impl found in bstr.
        let mut bytes = self.0;
        while let Some(result) = utf8_decode(bytes) {
            let ch = match result {
                Ok(ch) => ch,
                Err(byte) => {
                    write!(f, r"\x{:02x}", byte)?;
                    bytes = &bytes[1..];
                    continue;
                }
            };
            bytes = &bytes[ch.len_utf8()..];
            match ch {
                '\0' => write!(f, "\\0")?,
                // ASCII control characters except \0, \n, \r, \t
                '\x01'..='\x08'
                | '\x0b'
                | '\x0c'
                | '\x0e'..='\x19'
                | '\x7f' => {
                    write!(f, "\\x{:02x}", u32::from(ch))?;
                }
                '\n' | '\r' | '\t' | _ => {
                    write!(f, "{}", ch.escape_debug())?;
                }
            }
        }
        write!(f, "\"")?;
        Ok(())
    }
}

/// Decodes the next UTF-8 encoded codepoint from the given byte slice.
///
/// If no valid encoding of a codepoint exists at the beginning of the given
/// byte slice, then the first byte is returned instead.
///
/// This returns `None` if and only if `bytes` is empty.
pub(crate) fn utf8_decode(bytes: &[u8]) -> Option<Result<char, u8>> {
    fn len(byte: u8) -> Option<usize> {
        if byte <= 0x7F {
            return Some(1);
        } else if byte & 0b1100_0000 == 0b1000_0000 {
            return None;
        } else if byte <= 0b1101_1111 {
            Some(2)
        } else if byte <= 0b1110_1111 {
            Some(3)
        } else if byte <= 0b1111_0111 {
            Some(4)
        } else {
            None
        }
    }

    if bytes.is_empty() {
        return None;
    }
    let len = match len(bytes[0]) {
        None => return Some(Err(bytes[0])),
        Some(len) if len > bytes.len() => return Some(Err(bytes[0])),
        Some(1) => return Some(Ok(char::from(bytes[0]))),
        Some(len) => len,
    };
    match core::str::from_utf8(&bytes[..len]) {
        Ok(s) => Some(Ok(s.chars().next().unwrap())),
        Err(_) => Some(Err(bytes[0])),
    }
}

Coverage Report

Created: 2024-10-13 06:30

Line	Count	Source (jump to first uncovered line)
1		/// A type that wraps a single byte with a convenient fmt::Debug impl that
2		/// escapes the byte.
3		pub(crate) struct Byte(pub(crate) u8);
4
5		impl core::fmt::Debug for Byte {
6	0	fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
7	0	// Special case ASCII space. It's too hard to read otherwise, so
8	0	// put quotes around it. I sometimes wonder whether just '\x20' would
9	0	// be better...
10	0	if self.0 == b' ' {
11	0	return write!(f, "' '");
12	0	}
13	0	// 10 bytes is enough to cover any output from ascii::escape_default.
14	0	let mut bytes = [0u8; 10];
15	0	let mut len = 0;
16	0	for (i, mut b) in core::ascii::escape_default(self.0).enumerate() {
17		// capitalize \xab to \xAB
18	0	if i >= 2 && b'a' <= b && b <= b'f' {
19	0	b -= 32;
20	0	}
21	0	bytes[len] = b;
22	0	len += 1;
23		}
24	0	write!(f, "{}", core::str::from_utf8(&bytes[..len]).unwrap())
25	0	}
26		}
27
28		/// A type that provides a human readable debug impl for arbitrary bytes.
29		///
30		/// This generally works best when the bytes are presumed to be mostly UTF-8,
31		/// but will work for anything.
32		///
33		/// N.B. This is copied nearly verbatim from regex-automata. Sigh.
34		pub(crate) struct Bytes<'a>(pub(crate) &'a [u8]);
35
36		impl<'a> core::fmt::Debug for Bytes<'a> {
37	0	fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
38	0	write!(f, "\"")?;
39		// This is a sad re-implementation of a similar impl found in bstr.
40	0	let mut bytes = self.0;
41	0	while let Some(result) = utf8_decode(bytes) {
42	0	let ch = match result {
43	0	Ok(ch) => ch,
44	0	Err(byte) => {
45	0	write!(f, r"\x{:02x}", byte)?;
46	0	bytes = &bytes[1..];
47	0	continue;
48		}
49		};
50	0	bytes = &bytes[ch.len_utf8()..];
51	0	match ch {
52	0	'\0' => write!(f, "\\0")?,
53		// ASCII control characters except \0, \n, \r, \t
54	0	'\x01'..='\x08'
55		\| '\x0b'
56		\| '\x0c'
57	0	\| '\x0e'..='\x19'
58		\| '\x7f' => {
59	0	write!(f, "\\x{:02x}", u32::from(ch))?;
60		}
61		'\n' \| '\r' \| '\t' \| _ => {
62	0	write!(f, "{}", ch.escape_debug())?;
63		}
64		}
65		}
66	0	write!(f, "\"")?;
67	0	Ok(())
68	0	}
69		}
70
71		/// Decodes the next UTF-8 encoded codepoint from the given byte slice.
72		///
73		/// If no valid encoding of a codepoint exists at the beginning of the given
74		/// byte slice, then the first byte is returned instead.
75		///
76		/// This returns `None` if and only if `bytes` is empty.
77	9	pub(crate) fn utf8_decode(bytes: &[u8]) -> Option<Result<char, u8>> {
78	9	fn len(byte: u8) -> Option<usize> {
79	9	if byte <= 0x7F {
80	9	return Some(1);
81	0	} else if byte & 0b1100_0000 == 0b1000_0000 {
82	0	return None;
83	0	} else if byte <= 0b1101_1111 {
84	0	Some(2)
85	0	} else if byte <= 0b1110_1111 {
86	0	Some(3)
87	0	} else if byte <= 0b1111_0111 {
88	0	Some(4)
89		} else {
90	0	None
91		}
92	9	}
93
94	9	if bytes.is_empty() {
95	0	return None;
96	9	}
97	9	let len = match len(bytes[0]) {
98	0	None => return Some(Err(bytes[0])),
99	9	Some(len) if len > bytes.len() => return Some(Err(bytes[0])),
100	9	Some(1) => return Some(Ok(char::from(bytes[0]))),
101	0	Some(len) => len,
102	0	};
103	0	match core::str::from_utf8(&bytes[..len]) {
104	0	Ok(s) => Some(Ok(s.chars().next().unwrap())),
105	0	Err(_) => Some(Err(bytes[0])),
106		}
107	9	}