Coverage Report

Created: 2024-08-22 06:13

/rust/registry/src/index.crates.io-6f17d22bba15001f/jiff-0.1.8/src/util/escape.rs
Line
Count
Source (jump to first uncovered line)
1
/*!
2
Provides convenience routines for escaping raw bytes.
3
4
This was copied from `regex-automata` with a few light edits.
5
*/
6
7
/// Provides a convenient `Debug` implementation for a `u8`.
8
///
9
/// The `Debug` impl treats the byte as an ASCII, and emits a human readable
10
/// representation of it. If the byte isn't ASCII, then it's emitted as a hex
11
/// escape sequence.
12
#[derive(Clone, Copy)]
13
pub struct Byte(pub u8);
14
15
impl core::fmt::Display for Byte {
16
39.6k
    fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
17
39.6k
        if self.0 == b' ' {
18
5.85k
            return write!(f, " ");
19
33.8k
        }
20
33.8k
        // 10 bytes is enough to cover any output from ascii::escape_default.
21
33.8k
        let mut bytes = [0u8; 10];
22
33.8k
        let mut len = 0;
23
44.9k
        for (i, mut b) in core::ascii::escape_default(self.0).enumerate() {
24
            // capitalize \xab to \xAB
25
44.9k
            if i >= 2 && b'a' <= b && b <= b'f' {
26
2.53k
                b -= 32;
27
42.3k
            }
28
44.9k
            bytes[len] = b;
29
44.9k
            len += 1;
30
        }
31
33.8k
        write!(f, "{}", core::str::from_utf8(&bytes[..len]).unwrap())
32
39.6k
    }
33
}
34
35
impl core::fmt::Debug for Byte {
36
38.5k
    fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
37
38.5k
        write!(f, "\"")?;
38
38.5k
        core::fmt::Display::fmt(self, f)?;
39
38.5k
        write!(f, "\"")?;
40
38.5k
        Ok(())
41
38.5k
    }
42
}
43
44
/// Provides a convenient `Debug` implementation for `&[u8]`.
45
///
46
/// This generally works best when the bytes are presumed to be mostly UTF-8,
47
/// but will work for anything. For any bytes that aren't UTF-8, they are
48
/// emitted as hex escape sequences.
49
pub struct Bytes<'a>(pub &'a [u8]);
50
51
impl<'a> core::fmt::Display for Bytes<'a> {
52
23.0k
    fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
53
23.0k
        // This is a sad re-implementation of a similar impl found in bstr.
54
23.0k
        let mut bytes = self.0;
55
17.8M
        while let Some(result) = utf8_decode(bytes) {
56
17.8M
            let ch = match result {
57
17.8M
                Ok(ch) => ch,
58
1.27k
                Err(byte) => {
59
1.27k
                    write!(f, r"\x{:02x}", byte)?;
60
1.27k
                    bytes = &bytes[1..];
61
1.27k
                    continue;
62
                }
63
            };
64
17.8M
            bytes = &bytes[ch.len_utf8()..];
65
17.8M
            match ch {
66
5.21M
                '\0' => write!(f, "\\0")?,
67
                // ASCII control characters except \0, \n, \r, \t
68
9.11M
                '\x01'..='\x08'
69
                | '\x0b'
70
                | '\x0c'
71
9.00M
                | '\x0e'..='\x19'
72
                | '\x7f' => {
73
1.07M
                    write!(f, "\\x{:02x}", u32::from(ch))?;
74
                }
75
                '\n' | '\r' | '\t' | _ => {
76
11.5M
                    write!(f, "{}", ch.escape_debug())?;
77
                }
78
            }
79
        }
80
23.0k
        Ok(())
81
23.0k
    }
82
}
83
84
impl<'a> core::fmt::Debug for Bytes<'a> {
85
22.8k
    fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
86
22.8k
        write!(f, "\"")?;
87
22.8k
        core::fmt::Display::fmt(self, f)?;
88
22.8k
        write!(f, "\"")?;
89
22.8k
        Ok(())
90
22.8k
    }
91
}
92
93
/// Decodes the next UTF-8 encoded codepoint from the given byte slice.
94
///
95
/// If no valid encoding of a codepoint exists at the beginning of the given
96
/// byte slice, then the first byte is returned instead.
97
///
98
/// This returns `None` if and only if `bytes` is empty.
99
///
100
/// This never panics.
101
///
102
/// *WARNING*: This is not designed for performance. If you're looking for a
103
/// fast UTF-8 decoder, this is not it. If you feel like you need one in this
104
/// crate, then please file an issue and discuss your use case.
105
17.8M
fn utf8_decode(bytes: &[u8]) -> Option<Result<char, u8>> {
106
17.8M
    if bytes.is_empty() {
107
23.0k
        return None;
108
17.8M
    }
109
17.8M
    let len = match utf8_len(bytes[0]) {
110
560
        None => return Some(Err(bytes[0])),
111
17.8M
        Some(len) if len > bytes.len() => return Some(Err(bytes[0])),
112
17.8M
        Some(1) => return Some(Ok(char::from(bytes[0]))),
113
15.0k
        Some(len) => len,
114
15.0k
    };
115
15.0k
    match core::str::from_utf8(&bytes[..len]) {
116
15.0k
        Ok(s) => Some(Ok(s.chars().next().unwrap())),
117
0
        Err(_) => Some(Err(bytes[0])),
118
    }
119
17.8M
}
120
121
/// Given a UTF-8 leading byte, this returns the total number of code units
122
/// in the following encoded codepoint.
123
///
124
/// If the given byte is not a valid UTF-8 leading byte, then this returns
125
/// `None`.
126
17.8M
fn utf8_len(byte: u8) -> Option<usize> {
127
17.8M
    if byte <= 0x7F {
128
17.8M
        return Some(1);
129
16.3k
    } else if byte & 0b1100_0000 == 0b1000_0000 {
130
560
        return None;
131
15.7k
    } else if byte <= 0b1101_1111 {
132
6.45k
        Some(2)
133
9.30k
    } else if byte <= 0b1110_1111 {
134
1.90k
        Some(3)
135
7.39k
    } else if byte <= 0b1111_0111 {
136
7.39k
        Some(4)
137
    } else {
138
0
        None
139
    }
140
17.8M
}