Coverage Report

Created: 2024-08-22 06:13

/rust/registry/src/index.crates.io-6f17d22bba15001f/bstr-1.10.0/src/escape_bytes.rs
Line
Count
Source (jump to first uncovered line)
1
/// An iterator of `char` values that represent an escaping of arbitrary bytes.
2
///
3
/// The lifetime parameter `'a` refers to the lifetime of the bytes being
4
/// escaped.
5
///
6
/// This iterator is created by the
7
/// [`ByteSlice::escape_bytes`](crate::ByteSlice::escape_bytes) method.
8
#[derive(Clone, Debug)]
9
pub struct EscapeBytes<'a> {
10
    remaining: &'a [u8],
11
    state: EscapeState,
12
}
13
14
impl<'a> EscapeBytes<'a> {
15
0
    pub(crate) fn new(bytes: &'a [u8]) -> EscapeBytes {
16
0
        EscapeBytes { remaining: bytes, state: EscapeState::Start }
17
0
    }
Unexecuted instantiation: <bstr::escape_bytes::EscapeBytes>::new
Unexecuted instantiation: <bstr::escape_bytes::EscapeBytes>::new
Unexecuted instantiation: <bstr::escape_bytes::EscapeBytes>::new
Unexecuted instantiation: <bstr::escape_bytes::EscapeBytes>::new
18
}
19
20
impl<'a> Iterator for EscapeBytes<'a> {
21
    type Item = char;
22
23
    #[inline]
24
0
    fn next(&mut self) -> Option<char> {
25
0
        use self::EscapeState::*;
26
0
27
0
        match self.state {
28
            Start => {
29
0
                let byte = match crate::decode_utf8(self.remaining) {
30
0
                    (None, 0) => return None,
31
                    // If we see invalid UTF-8 or ASCII, then we always just
32
                    // peel one byte off. If it's printable ASCII, we'll pass
33
                    // it through as-is below. Otherwise, below, it will get
34
                    // escaped in some way.
35
                    (None, _) | (Some(_), 1) => {
36
0
                        let byte = self.remaining[0];
37
0
                        self.remaining = &self.remaining[1..];
38
0
                        byte
39
                    }
40
                    // For any valid UTF-8 that is not ASCII, we pass it
41
                    // through as-is. We don't do any Unicode escaping.
42
0
                    (Some(ch), size) => {
43
0
                        self.remaining = &self.remaining[size..];
44
0
                        return Some(ch);
45
                    }
46
                };
47
0
                self.state = match byte {
48
0
                    0x21..=0x5B | 0x5D..=0x7E => {
49
0
                        return Some(char::from(byte))
50
                    }
51
0
                    b'\0' => SpecialEscape('0'),
52
0
                    b'\n' => SpecialEscape('n'),
53
0
                    b'\r' => SpecialEscape('r'),
54
0
                    b'\t' => SpecialEscape('t'),
55
0
                    b'\\' => SpecialEscape('\\'),
56
0
                    _ => HexEscapeX(byte),
57
                };
58
0
                Some('\\')
59
            }
60
0
            SpecialEscape(ch) => {
61
0
                self.state = Start;
62
0
                Some(ch)
63
            }
64
0
            HexEscapeX(byte) => {
65
0
                self.state = HexEscapeHighNybble(byte);
66
0
                Some('x')
67
            }
68
0
            HexEscapeHighNybble(byte) => {
69
0
                self.state = HexEscapeLowNybble(byte);
70
0
                let nybble = byte >> 4;
71
0
                Some(hexdigit_to_char(nybble))
72
            }
73
0
            HexEscapeLowNybble(byte) => {
74
0
                self.state = Start;
75
0
                let nybble = byte & 0xF;
76
0
                Some(hexdigit_to_char(nybble))
77
            }
78
        }
79
0
    }
Unexecuted instantiation: <bstr::escape_bytes::EscapeBytes as core::iter::traits::iterator::Iterator>::next
Unexecuted instantiation: <bstr::escape_bytes::EscapeBytes as core::iter::traits::iterator::Iterator>::next
Unexecuted instantiation: <bstr::escape_bytes::EscapeBytes as core::iter::traits::iterator::Iterator>::next
Unexecuted instantiation: <bstr::escape_bytes::EscapeBytes as core::iter::traits::iterator::Iterator>::next
80
}
81
82
impl<'a> core::fmt::Display for EscapeBytes<'a> {
83
0
    fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
84
        use core::fmt::Write;
85
0
        for ch in self.clone() {
86
0
            f.write_char(ch)?;
87
        }
88
0
        Ok(())
89
0
    }
Unexecuted instantiation: <bstr::escape_bytes::EscapeBytes as core::fmt::Display>::fmt
Unexecuted instantiation: <bstr::escape_bytes::EscapeBytes as core::fmt::Display>::fmt
Unexecuted instantiation: <bstr::escape_bytes::EscapeBytes as core::fmt::Display>::fmt
Unexecuted instantiation: <bstr::escape_bytes::EscapeBytes as core::fmt::Display>::fmt
90
}
91
92
/// The state used by the FSM in the escaping iterator.
93
#[derive(Clone, Debug)]
94
enum EscapeState {
95
    /// Read and remove the next byte from 'remaining'. If 'remaining' is
96
    /// empty, then return None. Otherwise, escape the byte according to the
97
    /// following rules or emit it as-is.
98
    ///
99
    /// If it's \n, \r, \t, \\ or \0, then emit a '\' and set the current
100
    /// state to 'SpecialEscape(n | r | t | \ | 0)'. Otherwise, if the 'byte'
101
    /// is not in [\x21-\x5B\x5D-\x7E], then emit a '\' and set the state to
102
    /// to 'HexEscapeX(byte)'.
103
    Start,
104
    /// Emit the given codepoint as is. This assumes '\' has just been emitted.
105
    /// Then set the state to 'Start'.
106
    SpecialEscape(char),
107
    /// Emit the 'x' part of a hex escape. This assumes '\' has just been
108
    /// emitted. Then set the state to 'HexEscapeHighNybble(byte)'.
109
    HexEscapeX(u8),
110
    /// Emit the high nybble of the byte as a hexadecimal digit. This
111
    /// assumes '\x' has just been emitted. Then set the state to
112
    /// 'HexEscapeLowNybble(byte)'.
113
    HexEscapeHighNybble(u8),
114
    /// Emit the low nybble of the byte as a hexadecimal digit. This assume
115
    /// '\xZ' has just been emitted, where 'Z' is the high nybble of this byte.
116
    /// Then set the state to 'Start'.
117
    HexEscapeLowNybble(u8),
118
}
119
120
/// An iterator of `u8` values that represent an unescaping of a sequence of
121
/// codepoints.
122
///
123
/// The type parameter `I` refers to the iterator of codepoints that is
124
/// unescaped.
125
///
126
/// Currently this iterator is not exposed in the crate API, and instead all
127
/// we expose is a `ByteVec::unescape` method. Which of course requires an
128
/// alloc. That's the most convenient form of this, but in theory, we could
129
/// expose this for core-only use cases too. I'm just not quite sure what the
130
/// API should be.
131
#[derive(Clone, Debug)]
132
#[cfg(feature = "alloc")]
133
pub(crate) struct UnescapeBytes<I> {
134
    it: I,
135
    state: UnescapeState,
136
}
137
138
#[cfg(feature = "alloc")]
139
impl<I: Iterator<Item = char>> UnescapeBytes<I> {
140
0
    pub(crate) fn new<T: IntoIterator<IntoIter = I>>(
141
0
        t: T,
142
0
    ) -> UnescapeBytes<I> {
143
0
        UnescapeBytes { it: t.into_iter(), state: UnescapeState::Start }
144
0
    }
Unexecuted instantiation: <bstr::escape_bytes::UnescapeBytes<_>>::new::<_>
Unexecuted instantiation: <bstr::escape_bytes::UnescapeBytes<_>>::new::<_>
Unexecuted instantiation: <bstr::escape_bytes::UnescapeBytes<_>>::new::<_>
Unexecuted instantiation: <bstr::escape_bytes::UnescapeBytes<_>>::new::<_>
145
}
146
147
#[cfg(feature = "alloc")]
148
impl<I: Iterator<Item = char>> Iterator for UnescapeBytes<I> {
149
    type Item = u8;
150
151
0
    fn next(&mut self) -> Option<u8> {
152
        use self::UnescapeState::*;
153
154
0
        loop {
155
0
            match self.state {
156
                Start => {
157
0
                    let ch = self.it.next()?;
158
0
                    match ch {
159
0
                        '\\' => {
160
0
                            self.state = Escape;
161
0
                        }
162
0
                        ch => {
163
0
                            self.state = UnescapeState::bytes(&[], ch);
164
0
                        }
165
                    }
166
                }
167
0
                Bytes { buf, mut cur, len } => {
168
0
                    let byte = buf[cur];
169
0
                    cur += 1;
170
0
                    if cur >= len {
171
0
                        self.state = Start;
172
0
                    } else {
173
0
                        self.state = Bytes { buf, cur, len };
174
0
                    }
175
0
                    return Some(byte);
176
                }
177
                Escape => {
178
0
                    let ch = match self.it.next() {
179
0
                        Some(ch) => ch,
180
                        None => {
181
0
                            self.state = Start;
182
0
                            // Incomplete escape sequences unescape as
183
0
                            // themselves.
184
0
                            return Some(b'\\');
185
                        }
186
                    };
187
0
                    match ch {
188
                        '0' => {
189
0
                            self.state = Start;
190
0
                            return Some(b'\x00');
191
                        }
192
                        '\\' => {
193
0
                            self.state = Start;
194
0
                            return Some(b'\\');
195
                        }
196
                        'r' => {
197
0
                            self.state = Start;
198
0
                            return Some(b'\r');
199
                        }
200
                        'n' => {
201
0
                            self.state = Start;
202
0
                            return Some(b'\n');
203
                        }
204
                        't' => {
205
0
                            self.state = Start;
206
0
                            return Some(b'\t');
207
                        }
208
0
                        'x' => {
209
0
                            self.state = HexFirst;
210
0
                        }
211
0
                        ch => {
212
0
                            // An invalid escape sequence unescapes as itself.
213
0
                            self.state = UnescapeState::bytes(&[b'\\'], ch);
214
0
                        }
215
                    }
216
                }
217
                HexFirst => {
218
0
                    let ch = match self.it.next() {
219
0
                        Some(ch) => ch,
220
                        None => {
221
                            // An incomplete escape sequence unescapes as
222
                            // itself.
223
0
                            self.state = UnescapeState::bytes_raw(&[b'x']);
224
0
                            return Some(b'\\');
225
                        }
226
                    };
227
0
                    match ch {
228
0
                        '0'..='9' | 'A'..='F' | 'a'..='f' => {
229
0
                            self.state = HexSecond(ch);
230
0
                        }
231
0
                        ch => {
232
0
                            // An invalid escape sequence unescapes as itself.
233
0
                            self.state = UnescapeState::bytes(&[b'x'], ch);
234
0
                            return Some(b'\\');
235
                        }
236
                    }
237
                }
238
0
                HexSecond(first) => {
239
0
                    let second = match self.it.next() {
240
0
                        Some(ch) => ch,
241
                        None => {
242
                            // An incomplete escape sequence unescapes as
243
                            // itself.
244
0
                            self.state = UnescapeState::bytes(&[b'x'], first);
245
0
                            return Some(b'\\');
246
                        }
247
                    };
248
0
                    match second {
249
0
                        '0'..='9' | 'A'..='F' | 'a'..='f' => {
250
0
                            self.state = Start;
251
0
                            let hinybble = char_to_hexdigit(first);
252
0
                            let lonybble = char_to_hexdigit(second);
253
0
                            let byte = hinybble << 4 | lonybble;
254
0
                            return Some(byte);
255
                        }
256
0
                        ch => {
257
0
                            // An invalid escape sequence unescapes as itself.
258
0
                            self.state =
259
0
                                UnescapeState::bytes2(&[b'x'], first, ch);
260
0
                            return Some(b'\\');
261
                        }
262
                    }
263
                }
264
            }
265
        }
266
0
    }
Unexecuted instantiation: <bstr::escape_bytes::UnescapeBytes<_> as core::iter::traits::iterator::Iterator>::next
Unexecuted instantiation: <bstr::escape_bytes::UnescapeBytes<_> as core::iter::traits::iterator::Iterator>::next
Unexecuted instantiation: <bstr::escape_bytes::UnescapeBytes<_> as core::iter::traits::iterator::Iterator>::next
Unexecuted instantiation: <bstr::escape_bytes::UnescapeBytes<_> as core::iter::traits::iterator::Iterator>::next
267
}
268
269
/// The state used by the FSM in the unescaping iterator.
270
#[derive(Clone, Debug)]
271
#[cfg(feature = "alloc")]
272
enum UnescapeState {
273
    /// The start state. Look for an escape sequence, otherwise emit the next
274
    /// codepoint as-is.
275
    Start,
276
    /// Emit the byte at `buf[cur]`.
277
    ///
278
    /// This state should never be created when `cur >= len`. That is, when
279
    /// this state is visited, it is assumed that `cur < len`.
280
    Bytes { buf: [u8; 11], cur: usize, len: usize },
281
    /// This state is entered after a `\` is seen.
282
    Escape,
283
    /// This state is entered after a `\x` is seen.
284
    HexFirst,
285
    /// This state is entered after a `\xN` is seen, where `N` is in
286
    /// `[0-9A-Fa-f]`. The given codepoint corresponds to `N`.
287
    HexSecond(char),
288
}
289
290
#[cfg(feature = "alloc")]
291
impl UnescapeState {
292
    /// Create a new `Bytes` variant with the given slice.
293
    ///
294
    /// # Panics
295
    ///
296
    /// Panics if `bytes.len() > 11`.
297
0
    fn bytes_raw(bytes: &[u8]) -> UnescapeState {
298
0
        // This can be increased, you just need to make sure 'buf' in the
299
0
        // 'Bytes' state has enough room.
300
0
        assert!(bytes.len() <= 11, "no more than 11 bytes allowed");
301
0
        let mut buf = [0; 11];
302
0
        buf[..bytes.len()].copy_from_slice(bytes);
303
0
        UnescapeState::Bytes { buf, cur: 0, len: bytes.len() }
304
0
    }
Unexecuted instantiation: <bstr::escape_bytes::UnescapeState>::bytes_raw
Unexecuted instantiation: <bstr::escape_bytes::UnescapeState>::bytes_raw
Unexecuted instantiation: <bstr::escape_bytes::UnescapeState>::bytes_raw
Unexecuted instantiation: <bstr::escape_bytes::UnescapeState>::bytes_raw
305
306
    /// Create a new `Bytes` variant with the prefix byte slice, followed by
307
    /// the UTF-8 encoding of the given char.
308
    ///
309
    /// # Panics
310
    ///
311
    /// Panics if `prefix.len() > 3`.
312
0
    fn bytes(prefix: &[u8], ch: char) -> UnescapeState {
313
0
        // This can be increased, you just need to make sure 'buf' in the
314
0
        // 'Bytes' state has enough room.
315
0
        assert!(prefix.len() <= 3, "no more than 3 bytes allowed");
316
0
        let mut buf = [0; 11];
317
0
        buf[..prefix.len()].copy_from_slice(prefix);
318
0
        let chlen = ch.encode_utf8(&mut buf[prefix.len()..]).len();
319
0
        UnescapeState::Bytes { buf, cur: 0, len: prefix.len() + chlen }
320
0
    }
Unexecuted instantiation: <bstr::escape_bytes::UnescapeState>::bytes
Unexecuted instantiation: <bstr::escape_bytes::UnescapeState>::bytes
Unexecuted instantiation: <bstr::escape_bytes::UnescapeState>::bytes
Unexecuted instantiation: <bstr::escape_bytes::UnescapeState>::bytes
321
322
    /// Create a new `Bytes` variant with the prefix byte slice, followed by
323
    /// the UTF-8 encoding of `ch1` and then `ch2`.
324
    ///
325
    /// # Panics
326
    ///
327
    /// Panics if `prefix.len() > 3`.
328
0
    fn bytes2(prefix: &[u8], ch1: char, ch2: char) -> UnescapeState {
329
0
        // This can be increased, you just need to make sure 'buf' in the
330
0
        // 'Bytes' state has enough room.
331
0
        assert!(prefix.len() <= 3, "no more than 3 bytes allowed");
332
0
        let mut buf = [0; 11];
333
0
        buf[..prefix.len()].copy_from_slice(prefix);
334
0
        let len1 = ch1.encode_utf8(&mut buf[prefix.len()..]).len();
335
0
        let len2 = ch2.encode_utf8(&mut buf[prefix.len() + len1..]).len();
336
0
        UnescapeState::Bytes { buf, cur: 0, len: prefix.len() + len1 + len2 }
337
0
    }
Unexecuted instantiation: <bstr::escape_bytes::UnescapeState>::bytes2
Unexecuted instantiation: <bstr::escape_bytes::UnescapeState>::bytes2
Unexecuted instantiation: <bstr::escape_bytes::UnescapeState>::bytes2
Unexecuted instantiation: <bstr::escape_bytes::UnescapeState>::bytes2
338
}
339
340
/// Convert the given codepoint to its corresponding hexadecimal digit.
341
///
342
/// # Panics
343
///
344
/// This panics if `ch` is not in `[0-9A-Fa-f]`.
345
#[cfg(feature = "alloc")]
346
0
fn char_to_hexdigit(ch: char) -> u8 {
347
0
    u8::try_from(ch.to_digit(16).unwrap()).unwrap()
348
0
}
Unexecuted instantiation: bstr::escape_bytes::char_to_hexdigit
Unexecuted instantiation: bstr::escape_bytes::char_to_hexdigit
Unexecuted instantiation: bstr::escape_bytes::char_to_hexdigit
Unexecuted instantiation: bstr::escape_bytes::char_to_hexdigit
349
350
/// Convert the given hexadecimal digit to its corresponding codepoint.
351
///
352
/// # Panics
353
///
354
/// This panics when `digit > 15`.
355
0
fn hexdigit_to_char(digit: u8) -> char {
356
0
    char::from_digit(u32::from(digit), 16).unwrap().to_ascii_uppercase()
357
0
}
Unexecuted instantiation: bstr::escape_bytes::hexdigit_to_char
Unexecuted instantiation: bstr::escape_bytes::hexdigit_to_char
Unexecuted instantiation: bstr::escape_bytes::hexdigit_to_char
Unexecuted instantiation: bstr::escape_bytes::hexdigit_to_char
358
359
#[cfg(all(test, feature = "std"))]
360
mod tests {
361
    use alloc::string::{String, ToString};
362
363
    use crate::BString;
364
365
    use super::*;
366
367
    #[allow(non_snake_case)]
368
    fn B<B: AsRef<[u8]>>(bytes: B) -> BString {
369
        BString::from(bytes.as_ref())
370
    }
371
372
    fn e<B: AsRef<[u8]>>(bytes: B) -> String {
373
        EscapeBytes::new(bytes.as_ref()).to_string()
374
    }
375
376
    fn u(string: &str) -> BString {
377
        UnescapeBytes::new(string.chars()).collect()
378
    }
379
380
    #[test]
381
    fn escape() {
382
        assert_eq!(r"a", e(br"a"));
383
        assert_eq!(r"\\x61", e(br"\x61"));
384
        assert_eq!(r"a", e(b"\x61"));
385
        assert_eq!(r"~", e(b"\x7E"));
386
        assert_eq!(r"\x7F", e(b"\x7F"));
387
388
        assert_eq!(r"\n", e(b"\n"));
389
        assert_eq!(r"\r", e(b"\r"));
390
        assert_eq!(r"\t", e(b"\t"));
391
        assert_eq!(r"\\", e(b"\\"));
392
        assert_eq!(r"\0", e(b"\0"));
393
        assert_eq!(r"\0", e(b"\x00"));
394
395
        assert_eq!(r"\x88", e(b"\x88"));
396
        assert_eq!(r"\x8F", e(b"\x8F"));
397
        assert_eq!(r"\xF8", e(b"\xF8"));
398
        assert_eq!(r"\xFF", e(b"\xFF"));
399
400
        assert_eq!(r"\xE2", e(b"\xE2"));
401
        assert_eq!(r"\xE2\x98", e(b"\xE2\x98"));
402
        assert_eq!(r"☃", e(b"\xE2\x98\x83"));
403
404
        assert_eq!(r"\xF0", e(b"\xF0"));
405
        assert_eq!(r"\xF0\x9F", e(b"\xF0\x9F"));
406
        assert_eq!(r"\xF0\x9F\x92", e(b"\xF0\x9F\x92"));
407
        assert_eq!(r"💩", e(b"\xF0\x9F\x92\xA9"));
408
    }
409
410
    #[test]
411
    fn unescape() {
412
        assert_eq!(B(r"a"), u(r"a"));
413
        assert_eq!(B(r"\x61"), u(r"\\x61"));
414
        assert_eq!(B(r"a"), u(r"\x61"));
415
        assert_eq!(B(r"~"), u(r"\x7E"));
416
        assert_eq!(B(b"\x7F"), u(r"\x7F"));
417
418
        assert_eq!(B(b"\n"), u(r"\n"));
419
        assert_eq!(B(b"\r"), u(r"\r"));
420
        assert_eq!(B(b"\t"), u(r"\t"));
421
        assert_eq!(B(b"\\"), u(r"\\"));
422
        assert_eq!(B(b"\0"), u(r"\0"));
423
        assert_eq!(B(b"\0"), u(r"\x00"));
424
425
        assert_eq!(B(b"\x88"), u(r"\x88"));
426
        assert_eq!(B(b"\x8F"), u(r"\x8F"));
427
        assert_eq!(B(b"\xF8"), u(r"\xF8"));
428
        assert_eq!(B(b"\xFF"), u(r"\xFF"));
429
430
        assert_eq!(B(b"\xE2"), u(r"\xE2"));
431
        assert_eq!(B(b"\xE2\x98"), u(r"\xE2\x98"));
432
        assert_eq!(B("☃"), u(r"\xE2\x98\x83"));
433
434
        assert_eq!(B(b"\xF0"), u(r"\xf0"));
435
        assert_eq!(B(b"\xF0\x9F"), u(r"\xf0\x9f"));
436
        assert_eq!(B(b"\xF0\x9F\x92"), u(r"\xf0\x9f\x92"));
437
        assert_eq!(B("💩"), u(r"\xf0\x9f\x92\xa9"));
438
    }
439
440
    #[test]
441
    fn unescape_weird() {
442
        assert_eq!(B(b"\\"), u(r"\"));
443
        assert_eq!(B(b"\\"), u(r"\\"));
444
        assert_eq!(B(b"\\x"), u(r"\x"));
445
        assert_eq!(B(b"\\xA"), u(r"\xA"));
446
447
        assert_eq!(B(b"\\xZ"), u(r"\xZ"));
448
        assert_eq!(B(b"\\xZZ"), u(r"\xZZ"));
449
        assert_eq!(B(b"\\i"), u(r"\i"));
450
        assert_eq!(B(b"\\u"), u(r"\u"));
451
        assert_eq!(B(b"\\u{2603}"), u(r"\u{2603}"));
452
    }
453
}