/rust/registry/src/index.crates.io-6f17d22bba15001f/bstr-1.10.0/src/escape_bytes.rs

Source (jump to first uncovered line)
/// An iterator of `char` values that represent an escaping of arbitrary bytes.
///
/// The lifetime parameter `'a` refers to the lifetime of the bytes being
/// escaped.
///
/// This iterator is created by the
/// [`ByteSlice::escape_bytes`](crate::ByteSlice::escape_bytes) method.
#[derive(Clone, Debug)]
pub struct EscapeBytes<'a> {
    remaining: &'a [u8],
    state: EscapeState,
}

impl<'a> EscapeBytes<'a> {
    pub(crate) fn new(bytes: &'a [u8]) -> EscapeBytes {
        EscapeBytes { remaining: bytes, state: EscapeState::Start }
    }
}

impl<'a> Iterator for EscapeBytes<'a> {
    type Item = char;

    #[inline]
    fn next(&mut self) -> Option<char> {
        use self::EscapeState::*;

        match self.state {
            Start => {
                let byte = match crate::decode_utf8(self.remaining) {
                    (None, 0) => return None,
                    // If we see invalid UTF-8 or ASCII, then we always just
                    // peel one byte off. If it's printable ASCII, we'll pass
                    // it through as-is below. Otherwise, below, it will get
                    // escaped in some way.
                    (None, _) | (Some(_), 1) => {
                        let byte = self.remaining[0];
                        self.remaining = &self.remaining[1..];
                        byte
                    }
                    // For any valid UTF-8 that is not ASCII, we pass it
                    // through as-is. We don't do any Unicode escaping.
                    (Some(ch), size) => {
                        self.remaining = &self.remaining[size..];
                        return Some(ch);
                    }
                };
                self.state = match byte {
                    0x21..=0x5B | 0x5D..=0x7E => {
                        return Some(char::from(byte))
                    }
                    b'\0' => SpecialEscape('0'),
                    b'\n' => SpecialEscape('n'),
                    b'\r' => SpecialEscape('r'),
                    b'\t' => SpecialEscape('t'),
                    b'\\' => SpecialEscape('\\'),
                    _ => HexEscapeX(byte),
                };
                Some('\\')
            }
            SpecialEscape(ch) => {
                self.state = Start;
                Some(ch)
            }
            HexEscapeX(byte) => {
                self.state = HexEscapeHighNybble(byte);
                Some('x')
            }
            HexEscapeHighNybble(byte) => {
                self.state = HexEscapeLowNybble(byte);
                let nybble = byte >> 4;
                Some(hexdigit_to_char(nybble))
            }
            HexEscapeLowNybble(byte) => {
                self.state = Start;
                let nybble = byte & 0xF;
                Some(hexdigit_to_char(nybble))
            }
        }
    }
}

impl<'a> core::fmt::Display for EscapeBytes<'a> {
    fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
        use core::fmt::Write;
        for ch in self.clone() {
            f.write_char(ch)?;
        }
        Ok(())
    }
}

/// The state used by the FSM in the escaping iterator.
#[derive(Clone, Debug)]
enum EscapeState {
    /// Read and remove the next byte from 'remaining'. If 'remaining' is
    /// empty, then return None. Otherwise, escape the byte according to the
    /// following rules or emit it as-is.
    ///
    /// If it's \n, \r, \t, \\ or \0, then emit a '\' and set the current
    /// state to 'SpecialEscape(n | r | t | \ | 0)'. Otherwise, if the 'byte'
    /// is not in [\x21-\x5B\x5D-\x7E], then emit a '\' and set the state to
    /// to 'HexEscapeX(byte)'.
    Start,
    /// Emit the given codepoint as is. This assumes '\' has just been emitted.
    /// Then set the state to 'Start'.
    SpecialEscape(char),
    /// Emit the 'x' part of a hex escape. This assumes '\' has just been
    /// emitted. Then set the state to 'HexEscapeHighNybble(byte)'.
    HexEscapeX(u8),
    /// Emit the high nybble of the byte as a hexadecimal digit. This
    /// assumes '\x' has just been emitted. Then set the state to
    /// 'HexEscapeLowNybble(byte)'.
    HexEscapeHighNybble(u8),
    /// Emit the low nybble of the byte as a hexadecimal digit. This assume
    /// '\xZ' has just been emitted, where 'Z' is the high nybble of this byte.
    /// Then set the state to 'Start'.
    HexEscapeLowNybble(u8),
}

/// An iterator of `u8` values that represent an unescaping of a sequence of
/// codepoints.
///
/// The type parameter `I` refers to the iterator of codepoints that is
/// unescaped.
///
/// Currently this iterator is not exposed in the crate API, and instead all
/// we expose is a `ByteVec::unescape` method. Which of course requires an
/// alloc. That's the most convenient form of this, but in theory, we could
/// expose this for core-only use cases too. I'm just not quite sure what the
/// API should be.
#[derive(Clone, Debug)]
#[cfg(feature = "alloc")]
pub(crate) struct UnescapeBytes<I> {
    it: I,
    state: UnescapeState,
}

#[cfg(feature = "alloc")]
impl<I: Iterator<Item = char>> UnescapeBytes<I> {
    pub(crate) fn new<T: IntoIterator<IntoIter = I>>(
        t: T,
    ) -> UnescapeBytes<I> {
        UnescapeBytes { it: t.into_iter(), state: UnescapeState::Start }
    }
}

#[cfg(feature = "alloc")]
impl<I: Iterator<Item = char>> Iterator for UnescapeBytes<I> {
    type Item = u8;

    fn next(&mut self) -> Option<u8> {
        use self::UnescapeState::*;

        loop {
            match self.state {
                Start => {
                    let ch = self.it.next()?;
                    match ch {
                        '\\' => {
                            self.state = Escape;
                        }
                        ch => {
                            self.state = UnescapeState::bytes(&[], ch);
                        }
                    }
                }
                Bytes { buf, mut cur, len } => {
                    let byte = buf[cur];
                    cur += 1;
                    if cur >= len {
                        self.state = Start;
                    } else {
                        self.state = Bytes { buf, cur, len };
                    }
                    return Some(byte);
                }
                Escape => {
                    let ch = match self.it.next() {
                        Some(ch) => ch,
                        None => {
                            self.state = Start;
                            // Incomplete escape sequences unescape as
                            // themselves.
                            return Some(b'\\');
                        }
                    };
                    match ch {
                        '0' => {
                            self.state = Start;
                            return Some(b'\x00');
                        }
                        '\\' => {
                            self.state = Start;
                            return Some(b'\\');
                        }
                        'r' => {
                            self.state = Start;
                            return Some(b'\r');
                        }
                        'n' => {
                            self.state = Start;
                            return Some(b'\n');
                        }
                        't' => {
                            self.state = Start;
                            return Some(b'\t');
                        }
                        'x' => {
                            self.state = HexFirst;
                        }
                        ch => {
                            // An invalid escape sequence unescapes as itself.
                            self.state = UnescapeState::bytes(&[b'\\'], ch);
                        }
                    }
                }
                HexFirst => {
                    let ch = match self.it.next() {
                        Some(ch) => ch,
                        None => {
                            // An incomplete escape sequence unescapes as
                            // itself.
                            self.state = UnescapeState::bytes_raw(&[b'x']);
                            return Some(b'\\');
                        }
                    };
                    match ch {
                        '0'..='9' | 'A'..='F' | 'a'..='f' => {
                            self.state = HexSecond(ch);
                        }
                        ch => {
                            // An invalid escape sequence unescapes as itself.
                            self.state = UnescapeState::bytes(&[b'x'], ch);
                            return Some(b'\\');
                        }
                    }
                }
                HexSecond(first) => {
                    let second = match self.it.next() {
                        Some(ch) => ch,
                        None => {
                            // An incomplete escape sequence unescapes as
                            // itself.
                            self.state = UnescapeState::bytes(&[b'x'], first);
                            return Some(b'\\');
                        }
                    };
                    match second {
                        '0'..='9' | 'A'..='F' | 'a'..='f' => {
                            self.state = Start;
                            let hinybble = char_to_hexdigit(first);
                            let lonybble = char_to_hexdigit(second);
                            let byte = hinybble << 4 | lonybble;
                            return Some(byte);
                        }
                        ch => {
                            // An invalid escape sequence unescapes as itself.
                            self.state =
                                UnescapeState::bytes2(&[b'x'], first, ch);
                            return Some(b'\\');
                        }
                    }
                }
            }
        }
    }
}

/// The state used by the FSM in the unescaping iterator.
#[derive(Clone, Debug)]
#[cfg(feature = "alloc")]
enum UnescapeState {
    /// The start state. Look for an escape sequence, otherwise emit the next
    /// codepoint as-is.
    Start,
    /// Emit the byte at `buf[cur]`.
    ///
    /// This state should never be created when `cur >= len`. That is, when
    /// this state is visited, it is assumed that `cur < len`.
    Bytes { buf: [u8; 11], cur: usize, len: usize },
    /// This state is entered after a `\` is seen.
    Escape,
    /// This state is entered after a `\x` is seen.
    HexFirst,
    /// This state is entered after a `\xN` is seen, where `N` is in
    /// `[0-9A-Fa-f]`. The given codepoint corresponds to `N`.
    HexSecond(char),
}

#[cfg(feature = "alloc")]
impl UnescapeState {
    /// Create a new `Bytes` variant with the given slice.
    ///
    /// # Panics
    ///
    /// Panics if `bytes.len() > 11`.
    fn bytes_raw(bytes: &[u8]) -> UnescapeState {
        // This can be increased, you just need to make sure 'buf' in the
        // 'Bytes' state has enough room.
        assert!(bytes.len() <= 11, "no more than 11 bytes allowed");
        let mut buf = [0; 11];
        buf[..bytes.len()].copy_from_slice(bytes);
        UnescapeState::Bytes { buf, cur: 0, len: bytes.len() }
    }

    /// Create a new `Bytes` variant with the prefix byte slice, followed by
    /// the UTF-8 encoding of the given char.
    ///
    /// # Panics
    ///
    /// Panics if `prefix.len() > 3`.
    fn bytes(prefix: &[u8], ch: char) -> UnescapeState {
        // This can be increased, you just need to make sure 'buf' in the
        // 'Bytes' state has enough room.
        assert!(prefix.len() <= 3, "no more than 3 bytes allowed");
        let mut buf = [0; 11];
        buf[..prefix.len()].copy_from_slice(prefix);
        let chlen = ch.encode_utf8(&mut buf[prefix.len()..]).len();
        UnescapeState::Bytes { buf, cur: 0, len: prefix.len() + chlen }
    }

    /// Create a new `Bytes` variant with the prefix byte slice, followed by
    /// the UTF-8 encoding of `ch1` and then `ch2`.
    ///
    /// # Panics
    ///
    /// Panics if `prefix.len() > 3`.
    fn bytes2(prefix: &[u8], ch1: char, ch2: char) -> UnescapeState {
        // This can be increased, you just need to make sure 'buf' in the
        // 'Bytes' state has enough room.
        assert!(prefix.len() <= 3, "no more than 3 bytes allowed");
        let mut buf = [0; 11];
        buf[..prefix.len()].copy_from_slice(prefix);
        let len1 = ch1.encode_utf8(&mut buf[prefix.len()..]).len();
        let len2 = ch2.encode_utf8(&mut buf[prefix.len() + len1..]).len();
        UnescapeState::Bytes { buf, cur: 0, len: prefix.len() + len1 + len2 }
    }
}

/// Convert the given codepoint to its corresponding hexadecimal digit.
///
/// # Panics
///
/// This panics if `ch` is not in `[0-9A-Fa-f]`.
#[cfg(feature = "alloc")]
fn char_to_hexdigit(ch: char) -> u8 {
    u8::try_from(ch.to_digit(16).unwrap()).unwrap()
}

/// Convert the given hexadecimal digit to its corresponding codepoint.
///
/// # Panics
///
/// This panics when `digit > 15`.
fn hexdigit_to_char(digit: u8) -> char {
    char::from_digit(u32::from(digit), 16).unwrap().to_ascii_uppercase()
}

#[cfg(all(test, feature = "std"))]
mod tests {
    use alloc::string::{String, ToString};

    use crate::BString;

    use super::*;

    #[allow(non_snake_case)]
    fn B<B: AsRef<[u8]>>(bytes: B) -> BString {
        BString::from(bytes.as_ref())
    }

    fn e<B: AsRef<[u8]>>(bytes: B) -> String {
        EscapeBytes::new(bytes.as_ref()).to_string()
    }

    fn u(string: &str) -> BString {
        UnescapeBytes::new(string.chars()).collect()
    }

    #[test]
    fn escape() {
        assert_eq!(r"a", e(br"a"));
        assert_eq!(r"\\x61", e(br"\x61"));
        assert_eq!(r"a", e(b"\x61"));
        assert_eq!(r"~", e(b"\x7E"));
        assert_eq!(r"\x7F", e(b"\x7F"));

        assert_eq!(r"\n", e(b"\n"));
        assert_eq!(r"\r", e(b"\r"));
        assert_eq!(r"\t", e(b"\t"));
        assert_eq!(r"\\", e(b"\\"));
        assert_eq!(r"\0", e(b"\0"));
        assert_eq!(r"\0", e(b"\x00"));

        assert_eq!(r"\x88", e(b"\x88"));
        assert_eq!(r"\x8F", e(b"\x8F"));
        assert_eq!(r"\xF8", e(b"\xF8"));
        assert_eq!(r"\xFF", e(b"\xFF"));

        assert_eq!(r"\xE2", e(b"\xE2"));
        assert_eq!(r"\xE2\x98", e(b"\xE2\x98"));
        assert_eq!(r"☃", e(b"\xE2\x98\x83"));

        assert_eq!(r"\xF0", e(b"\xF0"));
        assert_eq!(r"\xF0\x9F", e(b"\xF0\x9F"));
        assert_eq!(r"\xF0\x9F\x92", e(b"\xF0\x9F\x92"));
        assert_eq!(r"💩", e(b"\xF0\x9F\x92\xA9"));
    }

    #[test]
    fn unescape() {
        assert_eq!(B(r"a"), u(r"a"));
        assert_eq!(B(r"\x61"), u(r"\\x61"));
        assert_eq!(B(r"a"), u(r"\x61"));
        assert_eq!(B(r"~"), u(r"\x7E"));
        assert_eq!(B(b"\x7F"), u(r"\x7F"));

        assert_eq!(B(b"\n"), u(r"\n"));
        assert_eq!(B(b"\r"), u(r"\r"));
        assert_eq!(B(b"\t"), u(r"\t"));
        assert_eq!(B(b"\\"), u(r"\\"));
        assert_eq!(B(b"\0"), u(r"\0"));
        assert_eq!(B(b"\0"), u(r"\x00"));

        assert_eq!(B(b"\x88"), u(r"\x88"));
        assert_eq!(B(b"\x8F"), u(r"\x8F"));
        assert_eq!(B(b"\xF8"), u(r"\xF8"));
        assert_eq!(B(b"\xFF"), u(r"\xFF"));

        assert_eq!(B(b"\xE2"), u(r"\xE2"));
        assert_eq!(B(b"\xE2\x98"), u(r"\xE2\x98"));
        assert_eq!(B("☃"), u(r"\xE2\x98\x83"));

        assert_eq!(B(b"\xF0"), u(r"\xf0"));
        assert_eq!(B(b"\xF0\x9F"), u(r"\xf0\x9f"));
        assert_eq!(B(b"\xF0\x9F\x92"), u(r"\xf0\x9f\x92"));
        assert_eq!(B("💩"), u(r"\xf0\x9f\x92\xa9"));
    }

    #[test]
    fn unescape_weird() {
        assert_eq!(B(b"\\"), u(r"\"));
        assert_eq!(B(b"\\"), u(r"\\"));
        assert_eq!(B(b"\\x"), u(r"\x"));
        assert_eq!(B(b"\\xA"), u(r"\xA"));

        assert_eq!(B(b"\\xZ"), u(r"\xZ"));
        assert_eq!(B(b"\\xZZ"), u(r"\xZZ"));
        assert_eq!(B(b"\\i"), u(r"\i"));
        assert_eq!(B(b"\\u"), u(r"\u"));
        assert_eq!(B(b"\\u{2603}"), u(r"\u{2603}"));
    }
}

Coverage Report

Created: 2024-08-22 06:13

Line	Count	Source (jump to first uncovered line)
1		/// An iterator of `char` values that represent an escaping of arbitrary bytes.
2		///
3		/// The lifetime parameter `'a` refers to the lifetime of the bytes being
4		/// escaped.
5		///
6		/// This iterator is created by the
7		/// [`ByteSlice::escape_bytes`](crate::ByteSlice::escape_bytes) method.
8		#[derive(Clone, Debug)]
9		pub struct EscapeBytes<'a> {
10		remaining: &'a [u8],
11		state: EscapeState,
12		}
13
14		impl<'a> EscapeBytes<'a> {
15	0	pub(crate) fn new(bytes: &'a [u8]) -> EscapeBytes {
16	0	EscapeBytes { remaining: bytes, state: EscapeState::Start }
17	0	} Unexecuted instantiation: <bstr::escape_bytes::EscapeBytes>::new Unexecuted instantiation: <bstr::escape_bytes::EscapeBytes>::new Unexecuted instantiation: <bstr::escape_bytes::EscapeBytes>::new Unexecuted instantiation: <bstr::escape_bytes::EscapeBytes>::new
18		}
19
20		impl<'a> Iterator for EscapeBytes<'a> {
21		type Item = char;
22
23		#[inline]
24	0	fn next(&mut self) -> Option<char> {
25	0	use self::EscapeState::*;
26	0
27	0	match self.state {
28		Start => {
29	0	let byte = match crate::decode_utf8(self.remaining) {
30	0	(None, 0) => return None,
31		// If we see invalid UTF-8 or ASCII, then we always just
32		// peel one byte off. If it's printable ASCII, we'll pass
33		// it through as-is below. Otherwise, below, it will get
34		// escaped in some way.
35		(None, _) \| (Some(_), 1) => {
36	0	let byte = self.remaining[0];
37	0	self.remaining = &self.remaining[1..];
38	0	byte
39		}
40		// For any valid UTF-8 that is not ASCII, we pass it
41		// through as-is. We don't do any Unicode escaping.
42	0	(Some(ch), size) => {
43	0	self.remaining = &self.remaining[size..];
44	0	return Some(ch);
45		}
46		};
47	0	self.state = match byte {
48	0	0x21..=0x5B \| 0x5D..=0x7E => {
49	0	return Some(char::from(byte))
50		}
51	0	b'\0' => SpecialEscape('0'),
52	0	b'\n' => SpecialEscape('n'),
53	0	b'\r' => SpecialEscape('r'),
54	0	b'\t' => SpecialEscape('t'),
55	0	b'\\' => SpecialEscape('\\'),
56	0	_ => HexEscapeX(byte),
57		};
58	0	Some('\\')
59		}
60	0	SpecialEscape(ch) => {
61	0	self.state = Start;
62	0	Some(ch)
63		}
64	0	HexEscapeX(byte) => {
65	0	self.state = HexEscapeHighNybble(byte);
66	0	Some('x')
67		}
68	0	HexEscapeHighNybble(byte) => {
69	0	self.state = HexEscapeLowNybble(byte);
70	0	let nybble = byte >> 4;
71	0	Some(hexdigit_to_char(nybble))
72		}
73	0	HexEscapeLowNybble(byte) => {
74	0	self.state = Start;
75	0	let nybble = byte & 0xF;
76	0	Some(hexdigit_to_char(nybble))
77		}
78		}
79	0	} Unexecuted instantiation: <bstr::escape_bytes::EscapeBytes as core::iter::traits::iterator::Iterator>::next Unexecuted instantiation: <bstr::escape_bytes::EscapeBytes as core::iter::traits::iterator::Iterator>::next Unexecuted instantiation: <bstr::escape_bytes::EscapeBytes as core::iter::traits::iterator::Iterator>::next Unexecuted instantiation: <bstr::escape_bytes::EscapeBytes as core::iter::traits::iterator::Iterator>::next
80		}
81
82		impl<'a> core::fmt::Display for EscapeBytes<'a> {
83	0	fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
84		use core::fmt::Write;
85	0	for ch in self.clone() {
86	0	f.write_char(ch)?;
87		}
88	0	Ok(())
89	0	} Unexecuted instantiation: <bstr::escape_bytes::EscapeBytes as core::fmt::Display>::fmt Unexecuted instantiation: <bstr::escape_bytes::EscapeBytes as core::fmt::Display>::fmt Unexecuted instantiation: <bstr::escape_bytes::EscapeBytes as core::fmt::Display>::fmt Unexecuted instantiation: <bstr::escape_bytes::EscapeBytes as core::fmt::Display>::fmt
90		}
91
92		/// The state used by the FSM in the escaping iterator.
93		#[derive(Clone, Debug)]
94		enum EscapeState {
95		/// Read and remove the next byte from 'remaining'. If 'remaining' is
96		/// empty, then return None. Otherwise, escape the byte according to the
97		/// following rules or emit it as-is.
98		///
99		/// If it's \n, \r, \t, \\ or \0, then emit a '\' and set the current
100		/// state to 'SpecialEscape(n \| r \| t \| \ \| 0)'. Otherwise, if the 'byte'
101		/// is not in [\x21-\x5B\x5D-\x7E], then emit a '\' and set the state to
102		/// to 'HexEscapeX(byte)'.
103		Start,
104		/// Emit the given codepoint as is. This assumes '\' has just been emitted.
105		/// Then set the state to 'Start'.
106		SpecialEscape(char),
107		/// Emit the 'x' part of a hex escape. This assumes '\' has just been
108		/// emitted. Then set the state to 'HexEscapeHighNybble(byte)'.
109		HexEscapeX(u8),
110		/// Emit the high nybble of the byte as a hexadecimal digit. This
111		/// assumes '\x' has just been emitted. Then set the state to
112		/// 'HexEscapeLowNybble(byte)'.
113		HexEscapeHighNybble(u8),
114		/// Emit the low nybble of the byte as a hexadecimal digit. This assume
115		/// '\xZ' has just been emitted, where 'Z' is the high nybble of this byte.
116		/// Then set the state to 'Start'.
117		HexEscapeLowNybble(u8),
118		}
119
120		/// An iterator of `u8` values that represent an unescaping of a sequence of
121		/// codepoints.
122		///
123		/// The type parameter `I` refers to the iterator of codepoints that is
124		/// unescaped.
125		///
126		/// Currently this iterator is not exposed in the crate API, and instead all
127		/// we expose is a `ByteVec::unescape` method. Which of course requires an
128		/// alloc. That's the most convenient form of this, but in theory, we could
129		/// expose this for core-only use cases too. I'm just not quite sure what the
130		/// API should be.
131		#[derive(Clone, Debug)]
132		#[cfg(feature = "alloc")]
133		pub(crate) struct UnescapeBytes<I> {
134		it: I,
135		state: UnescapeState,
136		}
137
138		#[cfg(feature = "alloc")]
139		impl<I: Iterator<Item = char>> UnescapeBytes<I> {
140	0	pub(crate) fn new<T: IntoIterator<IntoIter = I>>(
141	0	t: T,
142	0	) -> UnescapeBytes<I> {
143	0	UnescapeBytes { it: t.into_iter(), state: UnescapeState::Start }
144	0	} Unexecuted instantiation: <bstr::escape_bytes::UnescapeBytes<_>>::new::<_> Unexecuted instantiation: <bstr::escape_bytes::UnescapeBytes<_>>::new::<_> Unexecuted instantiation: <bstr::escape_bytes::UnescapeBytes<_>>::new::<_> Unexecuted instantiation: <bstr::escape_bytes::UnescapeBytes<_>>::new::<_>
145		}
146
147		#[cfg(feature = "alloc")]
148		impl<I: Iterator<Item = char>> Iterator for UnescapeBytes<I> {
149		type Item = u8;
150
151	0	fn next(&mut self) -> Option<u8> {
152		use self::UnescapeState::*;
153
154	0	loop {
155	0	match self.state {
156		Start => {
157	0	let ch = self.it.next()?;
158	0	match ch {
159	0	'\\' => {
160	0	self.state = Escape;
161	0	}
162	0	ch => {
163	0	self.state = UnescapeState::bytes(&[], ch);
164	0	}
165		}
166		}
167	0	Bytes { buf, mut cur, len } => {
168	0	let byte = buf[cur];
169	0	cur += 1;
170	0	if cur >= len {
171	0	self.state = Start;
172	0	} else {
173	0	self.state = Bytes { buf, cur, len };
174	0	}
175	0	return Some(byte);
176		}
177		Escape => {
178	0	let ch = match self.it.next() {
179	0	Some(ch) => ch,
180		None => {
181	0	self.state = Start;
182	0	// Incomplete escape sequences unescape as
183	0	// themselves.
184	0	return Some(b'\\');
185		}
186		};
187	0	match ch {
188		'0' => {
189	0	self.state = Start;
190	0	return Some(b'\x00');
191		}
192		'\\' => {
193	0	self.state = Start;
194	0	return Some(b'\\');
195		}
196		'r' => {
197	0	self.state = Start;
198	0	return Some(b'\r');
199		}
200		'n' => {
201	0	self.state = Start;
202	0	return Some(b'\n');
203		}
204		't' => {
205	0	self.state = Start;
206	0	return Some(b'\t');
207		}
208	0	'x' => {
209	0	self.state = HexFirst;
210	0	}
211	0	ch => {
212	0	// An invalid escape sequence unescapes as itself.
213	0	self.state = UnescapeState::bytes(&[b'\\'], ch);
214	0	}
215		}
216		}
217		HexFirst => {
218	0	let ch = match self.it.next() {
219	0	Some(ch) => ch,
220		None => {
221		// An incomplete escape sequence unescapes as
222		// itself.
223	0	self.state = UnescapeState::bytes_raw(&[b'x']);
224	0	return Some(b'\\');
225		}
226		};
227	0	match ch {
228	0	'0'..='9' \| 'A'..='F' \| 'a'..='f' => {
229	0	self.state = HexSecond(ch);
230	0	}
231	0	ch => {
232	0	// An invalid escape sequence unescapes as itself.
233	0	self.state = UnescapeState::bytes(&[b'x'], ch);
234	0	return Some(b'\\');
235		}
236		}
237		}
238	0	HexSecond(first) => {
239	0	let second = match self.it.next() {
240	0	Some(ch) => ch,
241		None => {
242		// An incomplete escape sequence unescapes as
243		// itself.
244	0	self.state = UnescapeState::bytes(&[b'x'], first);
245	0	return Some(b'\\');
246		}
247		};
248	0	match second {
249	0	'0'..='9' \| 'A'..='F' \| 'a'..='f' => {
250	0	self.state = Start;
251	0	let hinybble = char_to_hexdigit(first);
252	0	let lonybble = char_to_hexdigit(second);
253	0	let byte = hinybble << 4 \| lonybble;
254	0	return Some(byte);
255		}
256	0	ch => {
257	0	// An invalid escape sequence unescapes as itself.
258	0	self.state =
259	0	UnescapeState::bytes2(&[b'x'], first, ch);
260	0	return Some(b'\\');
261		}
262		}
263		}
264		}
265		}
266	0	} Unexecuted instantiation: <bstr::escape_bytes::UnescapeBytes<_> as core::iter::traits::iterator::Iterator>::next Unexecuted instantiation: <bstr::escape_bytes::UnescapeBytes<_> as core::iter::traits::iterator::Iterator>::next Unexecuted instantiation: <bstr::escape_bytes::UnescapeBytes<_> as core::iter::traits::iterator::Iterator>::next Unexecuted instantiation: <bstr::escape_bytes::UnescapeBytes<_> as core::iter::traits::iterator::Iterator>::next
267		}
268
269		/// The state used by the FSM in the unescaping iterator.
270		#[derive(Clone, Debug)]
271		#[cfg(feature = "alloc")]
272		enum UnescapeState {
273		/// The start state. Look for an escape sequence, otherwise emit the next
274		/// codepoint as-is.
275		Start,
276		/// Emit the byte at `buf[cur]`.
277		///
278		/// This state should never be created when `cur >= len`. That is, when
279		/// this state is visited, it is assumed that `cur < len`.
280		Bytes { buf: [u8; 11], cur: usize, len: usize },
281		/// This state is entered after a `\` is seen.
282		Escape,
283		/// This state is entered after a `\x` is seen.
284		HexFirst,
285		/// This state is entered after a `\xN` is seen, where `N` is in
286		/// `[0-9A-Fa-f]`. The given codepoint corresponds to `N`.
287		HexSecond(char),
288		}
289
290		#[cfg(feature = "alloc")]
291		impl UnescapeState {
292		/// Create a new `Bytes` variant with the given slice.
293		///
294		/// # Panics
295		///
296		/// Panics if `bytes.len() > 11`.
297	0	fn bytes_raw(bytes: &[u8]) -> UnescapeState {
298	0	// This can be increased, you just need to make sure 'buf' in the
299	0	// 'Bytes' state has enough room.
300	0	assert!(bytes.len() <= 11, "no more than 11 bytes allowed");
301	0	let mut buf = [0; 11];
302	0	buf[..bytes.len()].copy_from_slice(bytes);
303	0	UnescapeState::Bytes { buf, cur: 0, len: bytes.len() }
304	0	} Unexecuted instantiation: <bstr::escape_bytes::UnescapeState>::bytes_raw Unexecuted instantiation: <bstr::escape_bytes::UnescapeState>::bytes_raw Unexecuted instantiation: <bstr::escape_bytes::UnescapeState>::bytes_raw Unexecuted instantiation: <bstr::escape_bytes::UnescapeState>::bytes_raw
305
306		/// Create a new `Bytes` variant with the prefix byte slice, followed by
307		/// the UTF-8 encoding of the given char.
308		///
309		/// # Panics
310		///
311		/// Panics if `prefix.len() > 3`.
312	0	fn bytes(prefix: &[u8], ch: char) -> UnescapeState {
313	0	// This can be increased, you just need to make sure 'buf' in the
314	0	// 'Bytes' state has enough room.
315	0	assert!(prefix.len() <= 3, "no more than 3 bytes allowed");
316	0	let mut buf = [0; 11];
317	0	buf[..prefix.len()].copy_from_slice(prefix);
318	0	let chlen = ch.encode_utf8(&mut buf[prefix.len()..]).len();
319	0	UnescapeState::Bytes { buf, cur: 0, len: prefix.len() + chlen }
320	0	} Unexecuted instantiation: <bstr::escape_bytes::UnescapeState>::bytes Unexecuted instantiation: <bstr::escape_bytes::UnescapeState>::bytes Unexecuted instantiation: <bstr::escape_bytes::UnescapeState>::bytes Unexecuted instantiation: <bstr::escape_bytes::UnescapeState>::bytes
321
322		/// Create a new `Bytes` variant with the prefix byte slice, followed by
323		/// the UTF-8 encoding of `ch1` and then `ch2`.
324		///
325		/// # Panics
326		///
327		/// Panics if `prefix.len() > 3`.
328	0	fn bytes2(prefix: &[u8], ch1: char, ch2: char) -> UnescapeState {
329	0	// This can be increased, you just need to make sure 'buf' in the
330	0	// 'Bytes' state has enough room.
331	0	assert!(prefix.len() <= 3, "no more than 3 bytes allowed");
332	0	let mut buf = [0; 11];
333	0	buf[..prefix.len()].copy_from_slice(prefix);
334	0	let len1 = ch1.encode_utf8(&mut buf[prefix.len()..]).len();
335	0	let len2 = ch2.encode_utf8(&mut buf[prefix.len() + len1..]).len();
336	0	UnescapeState::Bytes { buf, cur: 0, len: prefix.len() + len1 + len2 }
337	0	} Unexecuted instantiation: <bstr::escape_bytes::UnescapeState>::bytes2 Unexecuted instantiation: <bstr::escape_bytes::UnescapeState>::bytes2 Unexecuted instantiation: <bstr::escape_bytes::UnescapeState>::bytes2 Unexecuted instantiation: <bstr::escape_bytes::UnescapeState>::bytes2
338		}
339
340		/// Convert the given codepoint to its corresponding hexadecimal digit.
341		///
342		/// # Panics
343		///
344		/// This panics if `ch` is not in `[0-9A-Fa-f]`.
345		#[cfg(feature = "alloc")]
346	0	fn char_to_hexdigit(ch: char) -> u8 {
347	0	u8::try_from(ch.to_digit(16).unwrap()).unwrap()
348	0	} Unexecuted instantiation: bstr::escape_bytes::char_to_hexdigit Unexecuted instantiation: bstr::escape_bytes::char_to_hexdigit Unexecuted instantiation: bstr::escape_bytes::char_to_hexdigit Unexecuted instantiation: bstr::escape_bytes::char_to_hexdigit
349
350		/// Convert the given hexadecimal digit to its corresponding codepoint.
351		///
352		/// # Panics
353		///
354		/// This panics when `digit > 15`.
355	0	fn hexdigit_to_char(digit: u8) -> char {
356	0	char::from_digit(u32::from(digit), 16).unwrap().to_ascii_uppercase()
357	0	} Unexecuted instantiation: bstr::escape_bytes::hexdigit_to_char Unexecuted instantiation: bstr::escape_bytes::hexdigit_to_char Unexecuted instantiation: bstr::escape_bytes::hexdigit_to_char Unexecuted instantiation: bstr::escape_bytes::hexdigit_to_char
358
359		#[cfg(all(test, feature = "std"))]
360		mod tests {
361		use alloc::string::{String, ToString};
362
363		use crate::BString;
364
365		use super::*;
366
367		#[allow(non_snake_case)]
368		fn B<B: AsRef<[u8]>>(bytes: B) -> BString {
369		BString::from(bytes.as_ref())
370		}
371
372		fn e<B: AsRef<[u8]>>(bytes: B) -> String {
373		EscapeBytes::new(bytes.as_ref()).to_string()
374		}
375
376		fn u(string: &str) -> BString {
377		UnescapeBytes::new(string.chars()).collect()
378		}
379
380		#[test]
381		fn escape() {
382		assert_eq!(r"a", e(br"a"));
383		assert_eq!(r"\\x61", e(br"\x61"));
384		assert_eq!(r"a", e(b"\x61"));
385		assert_eq!(r"~", e(b"\x7E"));
386		assert_eq!(r"\x7F", e(b"\x7F"));
387
388		assert_eq!(r"\n", e(b"\n"));
389		assert_eq!(r"\r", e(b"\r"));
390		assert_eq!(r"\t", e(b"\t"));
391		assert_eq!(r"\\", e(b"\\"));
392		assert_eq!(r"\0", e(b"\0"));
393		assert_eq!(r"\0", e(b"\x00"));
394
395		assert_eq!(r"\x88", e(b"\x88"));
396		assert_eq!(r"\x8F", e(b"\x8F"));
397		assert_eq!(r"\xF8", e(b"\xF8"));
398		assert_eq!(r"\xFF", e(b"\xFF"));
399
400		assert_eq!(r"\xE2", e(b"\xE2"));
401		assert_eq!(r"\xE2\x98", e(b"\xE2\x98"));
402		assert_eq!(r"☃", e(b"\xE2\x98\x83"));
403
404		assert_eq!(r"\xF0", e(b"\xF0"));
405		assert_eq!(r"\xF0\x9F", e(b"\xF0\x9F"));
406		assert_eq!(r"\xF0\x9F\x92", e(b"\xF0\x9F\x92"));
407		assert_eq!(r"💩", e(b"\xF0\x9F\x92\xA9"));
408		}
409
410		#[test]
411		fn unescape() {
412		assert_eq!(B(r"a"), u(r"a"));
413		assert_eq!(B(r"\x61"), u(r"\\x61"));
414		assert_eq!(B(r"a"), u(r"\x61"));
415		assert_eq!(B(r"~"), u(r"\x7E"));
416		assert_eq!(B(b"\x7F"), u(r"\x7F"));
417
418		assert_eq!(B(b"\n"), u(r"\n"));
419		assert_eq!(B(b"\r"), u(r"\r"));
420		assert_eq!(B(b"\t"), u(r"\t"));
421		assert_eq!(B(b"\\"), u(r"\\"));
422		assert_eq!(B(b"\0"), u(r"\0"));
423		assert_eq!(B(b"\0"), u(r"\x00"));
424
425		assert_eq!(B(b"\x88"), u(r"\x88"));
426		assert_eq!(B(b"\x8F"), u(r"\x8F"));
427		assert_eq!(B(b"\xF8"), u(r"\xF8"));
428		assert_eq!(B(b"\xFF"), u(r"\xFF"));
429
430		assert_eq!(B(b"\xE2"), u(r"\xE2"));
431		assert_eq!(B(b"\xE2\x98"), u(r"\xE2\x98"));
432		assert_eq!(B("☃"), u(r"\xE2\x98\x83"));
433
434		assert_eq!(B(b"\xF0"), u(r"\xf0"));
435		assert_eq!(B(b"\xF0\x9F"), u(r"\xf0\x9f"));
436		assert_eq!(B(b"\xF0\x9F\x92"), u(r"\xf0\x9f\x92"));
437		assert_eq!(B("💩"), u(r"\xf0\x9f\x92\xa9"));
438		}
439
440		#[test]
441		fn unescape_weird() {
442		assert_eq!(B(b"\\"), u(r"\"));
443		assert_eq!(B(b"\\"), u(r"\\"));
444		assert_eq!(B(b"\\x"), u(r"\x"));
445		assert_eq!(B(b"\\xA"), u(r"\xA"));
446
447		assert_eq!(B(b"\\xZ"), u(r"\xZ"));
448		assert_eq!(B(b"\\xZZ"), u(r"\xZZ"));
449		assert_eq!(B(b"\\i"), u(r"\i"));
450		assert_eq!(B(b"\\u"), u(r"\u"));
451		assert_eq!(B(b"\\u{2603}"), u(r"\u{2603}"));
452		}
453		}