/rust/registry/src/index.crates.io-6f17d22bba15001f/jiff-0.1.8/src/util/escape.rs

Source (jump to first uncovered line)
/*!
Provides convenience routines for escaping raw bytes.

This was copied from `regex-automata` with a few light edits.
*/

/// Provides a convenient `Debug` implementation for a `u8`.
///
/// The `Debug` impl treats the byte as an ASCII, and emits a human readable
/// representation of it. If the byte isn't ASCII, then it's emitted as a hex
/// escape sequence.
#[derive(Clone, Copy)]
pub struct Byte(pub u8);

impl core::fmt::Display for Byte {
    fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
        if self.0 == b' ' {
            return write!(f, " ");
        }
        // 10 bytes is enough to cover any output from ascii::escape_default.
        let mut bytes = [0u8; 10];
        let mut len = 0;
        for (i, mut b) in core::ascii::escape_default(self.0).enumerate() {
            // capitalize \xab to \xAB
            if i >= 2 && b'a' <= b && b <= b'f' {
                b -= 32;
            }
            bytes[len] = b;
            len += 1;
        }
        write!(f, "{}", core::str::from_utf8(&bytes[..len]).unwrap())
    }
}

impl core::fmt::Debug for Byte {
    fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
        write!(f, "\"")?;
        core::fmt::Display::fmt(self, f)?;
        write!(f, "\"")?;
        Ok(())
    }
}

/// Provides a convenient `Debug` implementation for `&[u8]`.
///
/// This generally works best when the bytes are presumed to be mostly UTF-8,
/// but will work for anything. For any bytes that aren't UTF-8, they are
/// emitted as hex escape sequences.
pub struct Bytes<'a>(pub &'a [u8]);

impl<'a> core::fmt::Display for Bytes<'a> {
    fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
        // This is a sad re-implementation of a similar impl found in bstr.
        let mut bytes = self.0;
        while let Some(result) = utf8_decode(bytes) {
            let ch = match result {
                Ok(ch) => ch,
                Err(byte) => {
                    write!(f, r"\x{:02x}", byte)?;
                    bytes = &bytes[1..];
                    continue;
                }
            };
            bytes = &bytes[ch.len_utf8()..];
            match ch {
                '\0' => write!(f, "\\0")?,
                // ASCII control characters except \0, \n, \r, \t
                '\x01'..='\x08'
                | '\x0b'
                | '\x0c'
                | '\x0e'..='\x19'
                | '\x7f' => {
                    write!(f, "\\x{:02x}", u32::from(ch))?;
                }
                '\n' | '\r' | '\t' | _ => {
                    write!(f, "{}", ch.escape_debug())?;
                }
            }
        }
        Ok(())
    }
}

impl<'a> core::fmt::Debug for Bytes<'a> {
    fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
        write!(f, "\"")?;
        core::fmt::Display::fmt(self, f)?;
        write!(f, "\"")?;
        Ok(())
    }
}

/// Decodes the next UTF-8 encoded codepoint from the given byte slice.
///
/// If no valid encoding of a codepoint exists at the beginning of the given
/// byte slice, then the first byte is returned instead.
///
/// This returns `None` if and only if `bytes` is empty.
///
/// This never panics.
///
/// *WARNING*: This is not designed for performance. If you're looking for a
/// fast UTF-8 decoder, this is not it. If you feel like you need one in this
/// crate, then please file an issue and discuss your use case.
fn utf8_decode(bytes: &[u8]) -> Option<Result<char, u8>> {
    if bytes.is_empty() {
        return None;
    }
    let len = match utf8_len(bytes[0]) {
        None => return Some(Err(bytes[0])),
        Some(len) if len > bytes.len() => return Some(Err(bytes[0])),
        Some(1) => return Some(Ok(char::from(bytes[0]))),
        Some(len) => len,
    };
    match core::str::from_utf8(&bytes[..len]) {
        Ok(s) => Some(Ok(s.chars().next().unwrap())),
        Err(_) => Some(Err(bytes[0])),
    }
}

/// Given a UTF-8 leading byte, this returns the total number of code units
/// in the following encoded codepoint.
///
/// If the given byte is not a valid UTF-8 leading byte, then this returns
/// `None`.
fn utf8_len(byte: u8) -> Option<usize> {
    if byte <= 0x7F {
        return Some(1);
    } else if byte & 0b1100_0000 == 0b1000_0000 {
        return None;
    } else if byte <= 0b1101_1111 {
        Some(2)
    } else if byte <= 0b1110_1111 {
        Some(3)
    } else if byte <= 0b1111_0111 {
        Some(4)
    } else {
        None
    }
}

Line	Count	Source (jump to first uncovered line)
1		/*!
2		Provides convenience routines for escaping raw bytes.
3
4		This was copied from `regex-automata` with a few light edits.
5		*/
6
7		/// Provides a convenient `Debug` implementation for a `u8`.
8		///
9		/// The `Debug` impl treats the byte as an ASCII, and emits a human readable
10		/// representation of it. If the byte isn't ASCII, then it's emitted as a hex
11		/// escape sequence.
12		#[derive(Clone, Copy)]
13		pub struct Byte(pub u8);
14
15		impl core::fmt::Display for Byte {
16	39.6k	fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
17	39.6k	if self.0 == b' ' {
18	5.85k	return write!(f, " ");
19	33.8k	}
20	33.8k	// 10 bytes is enough to cover any output from ascii::escape_default.
21	33.8k	let mut bytes = [0u8; 10];
22	33.8k	let mut len = 0;
23	44.9k	for (i, mut b) in core::ascii::escape_default(self.0).enumerate() {
24		// capitalize \xab to \xAB
25	44.9k	if i >= 2 && b'a' <= b && b <= b'f' {
26	2.53k	b -= 32;
27	42.3k	}
28	44.9k	bytes[len] = b;
29	44.9k	len += 1;
30		}
31	33.8k	write!(f, "{}", core::str::from_utf8(&bytes[..len]).unwrap())
32	39.6k	}
33		}
34
35		impl core::fmt::Debug for Byte {
36	38.5k	fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
37	38.5k	write!(f, "\"")?;
38	38.5k	core::fmt::Display::fmt(self, f)?;
39	38.5k	write!(f, "\"")?;
40	38.5k	Ok(())
41	38.5k	}
42		}
43
44		/// Provides a convenient `Debug` implementation for `&[u8]`.
45		///
46		/// This generally works best when the bytes are presumed to be mostly UTF-8,
47		/// but will work for anything. For any bytes that aren't UTF-8, they are
48		/// emitted as hex escape sequences.
49		pub struct Bytes<'a>(pub &'a [u8]);
50
51		impl<'a> core::fmt::Display for Bytes<'a> {
52	23.0k	fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
53	23.0k	// This is a sad re-implementation of a similar impl found in bstr.
54	23.0k	let mut bytes = self.0;
55	17.8M	while let Some(result) = utf8_decode(bytes) {
56	17.8M	let ch = match result {
57	17.8M	Ok(ch) => ch,
58	1.27k	Err(byte) => {
59	1.27k	write!(f, r"\x{:02x}", byte)?;
60	1.27k	bytes = &bytes[1..];
61	1.27k	continue;
62		}
63		};
64	17.8M	bytes = &bytes[ch.len_utf8()..];
65	17.8M	match ch {
66	5.21M	'\0' => write!(f, "\\0")?,
67		// ASCII control characters except \0, \n, \r, \t
68	9.11M	'\x01'..='\x08'
69		\| '\x0b'
70		\| '\x0c'
71	9.00M	\| '\x0e'..='\x19'
72		\| '\x7f' => {
73	1.07M	write!(f, "\\x{:02x}", u32::from(ch))?;
74		}
75		'\n' \| '\r' \| '\t' \| _ => {
76	11.5M	write!(f, "{}", ch.escape_debug())?;
77		}
78		}
79		}
80	23.0k	Ok(())
81	23.0k	}
82		}
83
84		impl<'a> core::fmt::Debug for Bytes<'a> {
85	22.8k	fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
86	22.8k	write!(f, "\"")?;
87	22.8k	core::fmt::Display::fmt(self, f)?;
88	22.8k	write!(f, "\"")?;
89	22.8k	Ok(())
90	22.8k	}
91		}
92
93		/// Decodes the next UTF-8 encoded codepoint from the given byte slice.
94		///
95		/// If no valid encoding of a codepoint exists at the beginning of the given
96		/// byte slice, then the first byte is returned instead.
97		///
98		/// This returns `None` if and only if `bytes` is empty.
99		///
100		/// This never panics.
101		///
102		/// WARNING: This is not designed for performance. If you're looking for a
103		/// fast UTF-8 decoder, this is not it. If you feel like you need one in this
104		/// crate, then please file an issue and discuss your use case.
105	17.8M	fn utf8_decode(bytes: &[u8]) -> Option<Result<char, u8>> {
106	17.8M	if bytes.is_empty() {
107	23.0k	return None;
108	17.8M	}
109	17.8M	let len = match utf8_len(bytes[0]) {
110	560	None => return Some(Err(bytes[0])),
111	17.8M	Some(len) if len > bytes.len() => return Some(Err(bytes[0])),
112	17.8M	Some(1) => return Some(Ok(char::from(bytes[0]))),
113	15.0k	Some(len) => len,
114	15.0k	};
115	15.0k	match core::str::from_utf8(&bytes[..len]) {
116	15.0k	Ok(s) => Some(Ok(s.chars().next().unwrap())),
117	0	Err(_) => Some(Err(bytes[0])),
118		}
119	17.8M	}
120
121		/// Given a UTF-8 leading byte, this returns the total number of code units
122		/// in the following encoded codepoint.
123		///
124		/// If the given byte is not a valid UTF-8 leading byte, then this returns
125		/// `None`.
126	17.8M	fn utf8_len(byte: u8) -> Option<usize> {
127	17.8M	if byte <= 0x7F {
128	17.8M	return Some(1);
129	16.3k	} else if byte & 0b1100_0000 == 0b1000_0000 {
130	560	return None;
131	15.7k	} else if byte <= 0b1101_1111 {
132	6.45k	Some(2)
133	9.30k	} else if byte <= 0b1110_1111 {
134	1.90k	Some(3)
135	7.39k	} else if byte <= 0b1111_0111 {
136	7.39k	Some(4)
137		} else {
138	0	None
139		}
140	17.8M	}

Coverage Report

Created: 2024-08-22 06:13