/rust/registry/src/index.crates.io-1949cf8c6b5b557f/anstream-0.6.21/src/adapter/strip.rs

Source
use anstyle_parse::state::state_change;
use anstyle_parse::state::Action;
use anstyle_parse::state::State;

/// Strip ANSI escapes from a `&str`, returning the printable content
///
/// This can be used to take output from a program that includes escape sequences and write it
/// somewhere that does not easily support them, such as a log file.
///
/// For non-contiguous data, see [`StripStr`].
///
/// # Example
///
/// ```rust
/// use std::io::Write as _;
///
/// let styled_text = "\x1b[32mfoo\x1b[m bar";
/// let plain_str = anstream::adapter::strip_str(&styled_text).to_string();
/// assert_eq!(plain_str, "foo bar");
/// ```
#[inline]
pub fn strip_str(data: &str) -> StrippedStr<'_> {
    StrippedStr::new(data)
}

/// See [`strip_str`]
#[derive(Default, Clone, Debug, PartialEq, Eq)]
pub struct StrippedStr<'s> {
    bytes: &'s [u8],
    state: State,
}

impl<'s> StrippedStr<'s> {
    #[inline]
    fn new(data: &'s str) -> Self {
        Self {
            bytes: data.as_bytes(),
            state: State::Ground,
        }
    }

    /// Create a [`String`] of the printable content
    #[inline]
    #[allow(clippy::inherent_to_string_shadow_display)] // Single-allocation implementation
    pub fn to_string(&self) -> String {
        use std::fmt::Write as _;
        let mut stripped = String::with_capacity(self.bytes.len());
        let _ = write!(&mut stripped, "{self}");
        stripped
    }
}

impl std::fmt::Display for StrippedStr<'_> {
    /// **Note:** this does *not* exhaust the [`Iterator`]
    #[inline]
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        let iter = Self {
            bytes: self.bytes,
            state: self.state,
        };
        for printable in iter {
            printable.fmt(f)?;
        }
        Ok(())
    }
}

impl<'s> Iterator for StrippedStr<'s> {
    type Item = &'s str;

    #[inline]
    fn next(&mut self) -> Option<Self::Item> {
        next_str(&mut self.bytes, &mut self.state)
    }
}

/// Incrementally strip non-contiguous data
#[derive(Default, Clone, Debug, PartialEq, Eq)]
pub struct StripStr {
    state: State,
}

impl StripStr {
    /// Initial state
    pub fn new() -> Self {
        Default::default()
    }

    /// Strip the next segment of data
    pub fn strip_next<'s>(&'s mut self, data: &'s str) -> StripStrIter<'s> {
        StripStrIter {
            bytes: data.as_bytes(),
            state: &mut self.state,
        }
    }
}

/// See [`StripStr`]
#[derive(Debug, PartialEq, Eq)]
pub struct StripStrIter<'s> {
    bytes: &'s [u8],
    state: &'s mut State,
}

impl<'s> Iterator for StripStrIter<'s> {
    type Item = &'s str;

    #[inline]
    fn next(&mut self) -> Option<Self::Item> {
        next_str(&mut self.bytes, self.state)
    }
}

#[inline]
fn next_str<'s>(bytes: &mut &'s [u8], state: &mut State) -> Option<&'s str> {
    let offset = bytes.iter().copied().position(|b| {
        let (next_state, action) = state_change(*state, b);
        if next_state != State::Anywhere {
            *state = next_state;
        }
        is_printable_bytes(action, b)
    });
    let (_, next) = bytes.split_at(offset.unwrap_or(bytes.len()));
    *bytes = next;
    *state = State::Ground;

    let offset = bytes.iter().copied().position(|b| {
        let (_next_state, action) = state_change(State::Ground, b);
        !(is_printable_bytes(action, b) || is_utf8_continuation(b))
    });
    let (printable, next) = bytes.split_at(offset.unwrap_or(bytes.len()));
    *bytes = next;
    if printable.is_empty() {
        None
    } else {
        let printable = unsafe {
            from_utf8_unchecked(
                printable,
                "`bytes` was validated as UTF-8, the parser preserves UTF-8 continuations",
            )
        };
        Some(printable)
    }
}

#[inline]
unsafe fn from_utf8_unchecked<'b>(bytes: &'b [u8], safety_justification: &'static str) -> &'b str {
    unsafe {
        if cfg!(debug_assertions) {
            // Catch problems more quickly when testing
            std::str::from_utf8(bytes).expect(safety_justification)
        } else {
            std::str::from_utf8_unchecked(bytes)
        }
    }
}

#[inline]
fn is_utf8_continuation(b: u8) -> bool {
    matches!(b, 0x80..=0xbf)
}

/// Strip ANSI escapes from bytes, returning the printable content
///
/// This can be used to take output from a program that includes escape sequences and write it
/// somewhere that does not easily support them, such as a log file.
///
/// # Example
///
/// ```rust
/// use std::io::Write as _;
///
/// let styled_text = "\x1b[32mfoo\x1b[m bar";
/// let plain_str = anstream::adapter::strip_bytes(styled_text.as_bytes()).into_vec();
/// assert_eq!(plain_str.as_slice(), &b"foo bar"[..]);
/// ```
#[inline]
pub fn strip_bytes(data: &[u8]) -> StrippedBytes<'_> {
    StrippedBytes::new(data)
}

/// See [`strip_bytes`]
#[derive(Default, Clone, Debug, PartialEq, Eq)]
pub struct StrippedBytes<'s> {
    bytes: &'s [u8],
    state: State,
    utf8parser: Utf8Parser,
}

impl<'s> StrippedBytes<'s> {
    /// See [`strip_bytes`]
    #[inline]
    pub fn new(bytes: &'s [u8]) -> Self {
        Self {
            bytes,
            state: State::Ground,
            utf8parser: Default::default(),
        }
    }

    /// Strip the next slice of bytes
    ///
    /// Used when the content is in several non-contiguous slices
    ///
    /// # Panic
    ///
    /// May panic if it is not exhausted / empty
    #[inline]
    pub fn extend(&mut self, bytes: &'s [u8]) {
        debug_assert!(
            self.is_empty(),
            "current bytes must be processed to ensure we end at the right state"
        );
        self.bytes = bytes;
    }

    /// Report the bytes has been exhausted
    #[inline]
    pub fn is_empty(&self) -> bool {
        self.bytes.is_empty()
    }

    /// Create a [`Vec`] of the printable content
    #[inline]
    pub fn into_vec(self) -> Vec<u8> {
        let mut stripped = Vec::with_capacity(self.bytes.len());
        for printable in self {
            stripped.extend(printable);
        }
        stripped
    }
}

impl<'s> Iterator for StrippedBytes<'s> {
    type Item = &'s [u8];

    #[inline]
    fn next(&mut self) -> Option<Self::Item> {
        next_bytes(&mut self.bytes, &mut self.state, &mut self.utf8parser)
    }
}

/// Incrementally strip non-contiguous data
#[derive(Default, Clone, Debug, PartialEq, Eq)]
pub struct StripBytes {
    state: State,
    utf8parser: Utf8Parser,
}

impl StripBytes {
    /// Initial state
    pub fn new() -> Self {
        Default::default()
    }

    /// Strip the next segment of data
    pub fn strip_next<'s>(&'s mut self, bytes: &'s [u8]) -> StripBytesIter<'s> {
        StripBytesIter {
            bytes,
            state: &mut self.state,
            utf8parser: &mut self.utf8parser,
        }
    }
}

/// See [`StripBytes`]
#[derive(Debug, PartialEq, Eq)]
pub struct StripBytesIter<'s> {
    bytes: &'s [u8],
    state: &'s mut State,
    utf8parser: &'s mut Utf8Parser,
}

impl<'s> Iterator for StripBytesIter<'s> {
    type Item = &'s [u8];

    #[inline]
    fn next(&mut self) -> Option<Self::Item> {
        next_bytes(&mut self.bytes, self.state, self.utf8parser)
    }
}

#[inline]
fn next_bytes<'s>(
    bytes: &mut &'s [u8],
    state: &mut State,
    utf8parser: &mut Utf8Parser,
) -> Option<&'s [u8]> {
    let offset = bytes.iter().copied().position(|b| {
        if *state == State::Utf8 {
            true
        } else {
            let (next_state, action) = state_change(*state, b);
            if next_state != State::Anywhere {
                *state = next_state;
            }
            is_printable_bytes(action, b)
        }
    });
    let (_, next) = bytes.split_at(offset.unwrap_or(bytes.len()));
    *bytes = next;

    let offset = bytes.iter().copied().position(|b| {
        if *state == State::Utf8 {
            if utf8parser.add(b) {
                *state = State::Ground;
            }
            false
        } else {
            let (next_state, action) = state_change(State::Ground, b);
            if next_state != State::Anywhere {
                *state = next_state;
            }
            if *state == State::Utf8 {
                utf8parser.add(b);
                false
            } else {
                !is_printable_bytes(action, b)
            }
        }
    });
    let (printable, next) = bytes.split_at(offset.unwrap_or(bytes.len()));
    *bytes = next;
    if printable.is_empty() {
        None
    } else {
        Some(printable)
    }
}

#[derive(Default, Clone, Debug, PartialEq, Eq)]
pub(crate) struct Utf8Parser {
    utf8_parser: utf8parse::Parser,
}

impl Utf8Parser {
    fn add(&mut self, byte: u8) -> bool {
        let mut b = false;
        let mut receiver = VtUtf8Receiver(&mut b);
        self.utf8_parser.advance(&mut receiver, byte);
        b
    }
}

struct VtUtf8Receiver<'a>(&'a mut bool);

impl utf8parse::Receiver for VtUtf8Receiver<'_> {
    fn codepoint(&mut self, _: char) {
        *self.0 = true;
    }

    fn invalid_sequence(&mut self) {
        *self.0 = true;
    }
}

#[inline]
fn is_printable_bytes(action: Action, byte: u8) -> bool {
    // VT320 considered 0x7f to be `Print`able but we expect to be working in UTF-8 systems and not
    // ISO Latin-1, making it DEL and non-printable
    const DEL: u8 = 0x7f;

    // Continuations aren't included as they may also be control codes, requiring more context
    (action == Action::Print && byte != DEL)
        || action == Action::BeginUtf8
        || (action == Action::Execute && byte.is_ascii_whitespace())
}

#[cfg(test)]
mod test {
    use super::*;
    use proptest::prelude::*;

    /// Model based off full parser
    fn parser_strip(bytes: &[u8]) -> String {
        #[derive(Default)]
        struct Strip(String);
        impl Strip {
            fn with_capacity(capacity: usize) -> Self {
                Self(String::with_capacity(capacity))
            }
        }
        impl anstyle_parse::Perform for Strip {
            fn print(&mut self, c: char) {
                self.0.push(c);
            }

            fn execute(&mut self, byte: u8) {
                if byte.is_ascii_whitespace() {
                    self.0.push(byte as char);
                }
            }
        }

        let mut stripped = Strip::with_capacity(bytes.len());
        let mut parser = anstyle_parse::Parser::<anstyle_parse::DefaultCharAccumulator>::new();
        for byte in bytes {
            parser.advance(&mut stripped, *byte);
        }
        stripped.0
    }

    /// Model verifying incremental parsing
    fn strip_char(mut s: &str) -> String {
        let mut result = String::new();
        let mut state = StripStr::new();
        while !s.is_empty() {
            let mut indices = s.char_indices();
            indices.next(); // current
            let offset = indices.next().map(|(i, _)| i).unwrap_or_else(|| s.len());
            let (current, remainder) = s.split_at(offset);
            for printable in state.strip_next(current) {
                result.push_str(printable);
            }
            s = remainder;
        }
        result
    }

    /// Model verifying incremental parsing
    fn strip_byte(s: &[u8]) -> Vec<u8> {
        let mut result = Vec::new();
        let mut state = StripBytes::default();
        for start in 0..s.len() {
            let current = &s[start..=start];
            for printable in state.strip_next(current) {
                result.extend(printable);
            }
        }
        result
    }

    #[test]
    fn test_strip_bytes_multibyte() {
        let bytes = [240, 145, 141, 139];
        let expected = parser_strip(&bytes);
        let actual = String::from_utf8(strip_bytes(&bytes).into_vec()).unwrap();
        assert_eq!(expected, actual);
    }

    #[test]
    fn test_strip_byte_multibyte() {
        let bytes = [240, 145, 141, 139];
        let expected = parser_strip(&bytes);
        let actual = String::from_utf8(strip_byte(&bytes).clone()).unwrap();
        assert_eq!(expected, actual);
    }

    #[test]
    fn test_strip_str_del() {
        let input = std::str::from_utf8(&[0x7f]).unwrap();
        let expected = "";
        let actual = strip_str(input).to_string();
        assert_eq!(expected, actual);
    }

    #[test]
    fn test_strip_byte_del() {
        let bytes = [0x7f];
        let expected = "";
        let actual = String::from_utf8(strip_byte(&bytes).clone()).unwrap();
        assert_eq!(expected, actual);
    }

    #[test]
    fn test_strip_str_handles_broken_sequence() {
        // valid utf8: \xc3\xb6 then \x1b then \xf0\x9f\x98\x80
        let s = "ö\x1b😀hello😀goodbye";
        let mut it = strip_str(s);
        assert_eq!("ö", it.next().unwrap());
        assert_eq!("ello😀goodbye", it.next().unwrap());
    }

    proptest! {
        #[test]
        #[cfg_attr(miri, ignore)]  // See https://github.com/AltSysrq/proptest/issues/253
        fn strip_str_no_escapes(s in "\\PC*") {
            let expected = parser_strip(s.as_bytes());
            let actual = strip_str(&s).to_string();
            assert_eq!(expected, actual);
        }

        #[test]
        #[cfg_attr(miri, ignore)]  // See https://github.com/AltSysrq/proptest/issues/253
        fn strip_char_no_escapes(s in "\\PC*") {
            let expected = parser_strip(s.as_bytes());
            let actual = strip_char(&s);
            assert_eq!(expected, actual);
        }

        #[test]
        #[cfg_attr(miri, ignore)]  // See https://github.com/AltSysrq/proptest/issues/253
        fn strip_bytes_no_escapes(s in "\\PC*") {
            dbg!(&s);
            dbg!(s.as_bytes());
            let expected = parser_strip(s.as_bytes());
            let actual = String::from_utf8(strip_bytes(s.as_bytes()).into_vec()).unwrap();
            assert_eq!(expected, actual);
        }

        #[test]
        #[cfg_attr(miri, ignore)]  // See https://github.com/AltSysrq/proptest/issues/253
        fn strip_byte_no_escapes(s in "\\PC*") {
            dbg!(&s);
            dbg!(s.as_bytes());
            let expected = parser_strip(s.as_bytes());
            let actual = String::from_utf8(strip_byte(s.as_bytes()).clone()).unwrap();
            assert_eq!(expected, actual);
        }
    }
}

Coverage Report

Created: 2025-12-31 06:24

Line	Count	Source
1		use anstyle_parse::state::state_change;
2		use anstyle_parse::state::Action;
3		use anstyle_parse::state::State;
4
5		/// Strip ANSI escapes from a `&str`, returning the printable content
6		///
7		/// This can be used to take output from a program that includes escape sequences and write it
8		/// somewhere that does not easily support them, such as a log file.
9		///
10		/// For non-contiguous data, see [`StripStr`].
11		///
12		/// # Example
13		///
14		/// ```rust
15		/// use std::io::Write as _;
16		///
17		/// let styled_text = "\x1b[32mfoo\x1b[m bar";
18		/// let plain_str = anstream::adapter::strip_str(&styled_text).to_string();
19		/// assert_eq!(plain_str, "foo bar");
20		/// ```
21		#[inline]
22	0	pub fn strip_str(data: &str) -> StrippedStr<'_> {
23	0	StrippedStr::new(data)
24	0	}
25
26		/// See [`strip_str`]
27		#[derive(Default, Clone, Debug, PartialEq, Eq)]
28		pub struct StrippedStr<'s> {
29		bytes: &'s [u8],
30		state: State,
31		}
32
33		impl<'s> StrippedStr<'s> {
34		#[inline]
35	0	fn new(data: &'s str) -> Self {
36	0	Self {
37	0	bytes: data.as_bytes(),
38	0	state: State::Ground,
39	0	}
40	0	}
41
42		/// Create a [`String`] of the printable content
43		#[inline]
44		#[allow(clippy::inherent_to_string_shadow_display)] // Single-allocation implementation
45	0	pub fn to_string(&self) -> String {
46		use std::fmt::Write as _;
47	0	let mut stripped = String::with_capacity(self.bytes.len());
48	0	let _ = write!(&mut stripped, "{self}");
49	0	stripped
50	0	}
51		}
52
53		impl std::fmt::Display for StrippedStr<'_> {
54		/// Note: this does not exhaust the [`Iterator`]
55		#[inline]
56	0	fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
57	0	let iter = Self {
58	0	bytes: self.bytes,
59	0	state: self.state,
60	0	};
61	0	for printable in iter {
62	0	printable.fmt(f)?;
63		}
64	0	Ok(())
65	0	}
66		}
67
68		impl<'s> Iterator for StrippedStr<'s> {
69		type Item = &'s str;
70
71		#[inline]
72	0	fn next(&mut self) -> Option<Self::Item> {
73	0	next_str(&mut self.bytes, &mut self.state)
74	0	}
75		}
76
77		/// Incrementally strip non-contiguous data
78		#[derive(Default, Clone, Debug, PartialEq, Eq)]
79		pub struct StripStr {
80		state: State,
81		}
82
83		impl StripStr {
84		/// Initial state
85	0	pub fn new() -> Self {
86	0	Default::default()
87	0	}
88
89		/// Strip the next segment of data
90	0	pub fn strip_next<'s>(&'s mut self, data: &'s str) -> StripStrIter<'s> {
91	0	StripStrIter {
92	0	bytes: data.as_bytes(),
93	0	state: &mut self.state,
94	0	}
95	0	}
96		}
97
98		/// See [`StripStr`]
99		#[derive(Debug, PartialEq, Eq)]
100		pub struct StripStrIter<'s> {
101		bytes: &'s [u8],
102		state: &'s mut State,
103		}
104
105		impl<'s> Iterator for StripStrIter<'s> {
106		type Item = &'s str;
107
108		#[inline]
109	0	fn next(&mut self) -> Option<Self::Item> {
110	0	next_str(&mut self.bytes, self.state)
111	0	}
112		}
113
114		#[inline]
115	0	fn next_str<'s>(bytes: &mut &'s [u8], state: &mut State) -> Option<&'s str> {
116	0	let offset = bytes.iter().copied().position(\|b\| {
117	0	let (next_state, action) = state_change(*state, b);
118	0	if next_state != State::Anywhere {
119	0	*state = next_state;
120	0	}
121	0	is_printable_bytes(action, b)
122	0	});
123	0	let (_, next) = bytes.split_at(offset.unwrap_or(bytes.len()));
124	0	*bytes = next;
125	0	*state = State::Ground;
126
127	0	let offset = bytes.iter().copied().position(\|b\| {
128	0	let (_next_state, action) = state_change(State::Ground, b);
129	0	!(is_printable_bytes(action, b) \|\| is_utf8_continuation(b))
130	0	});
131	0	let (printable, next) = bytes.split_at(offset.unwrap_or(bytes.len()));
132	0	*bytes = next;
133	0	if printable.is_empty() {
134	0	None
135		} else {
136	0	let printable = unsafe {
137	0	from_utf8_unchecked(
138	0	printable,
139		"`bytes` was validated as UTF-8, the parser preserves UTF-8 continuations",
140		)
141		};
142	0	Some(printable)
143		}
144	0	}
145
146		#[inline]
147	0	unsafe fn from_utf8_unchecked<'b>(bytes: &'b [u8], safety_justification: &'static str) -> &'b str {
148		unsafe {
149	0	if cfg!(debug_assertions) {
150		// Catch problems more quickly when testing
151	0	std::str::from_utf8(bytes).expect(safety_justification)
152		} else {
153	0	std::str::from_utf8_unchecked(bytes)
154		}
155		}
156	0	}
157
158		#[inline]
159	0	fn is_utf8_continuation(b: u8) -> bool {
160	0	matches!(b, 0x80..=0xbf)
161	0	}
162
163		/// Strip ANSI escapes from bytes, returning the printable content
164		///
165		/// This can be used to take output from a program that includes escape sequences and write it
166		/// somewhere that does not easily support them, such as a log file.
167		///
168		/// # Example
169		///
170		/// ```rust
171		/// use std::io::Write as _;
172		///
173		/// let styled_text = "\x1b[32mfoo\x1b[m bar";
174		/// let plain_str = anstream::adapter::strip_bytes(styled_text.as_bytes()).into_vec();
175		/// assert_eq!(plain_str.as_slice(), &b"foo bar"[..]);
176		/// ```
177		#[inline]
178	0	pub fn strip_bytes(data: &[u8]) -> StrippedBytes<'_> {
179	0	StrippedBytes::new(data)
180	0	}
181
182		/// See [`strip_bytes`]
183		#[derive(Default, Clone, Debug, PartialEq, Eq)]
184		pub struct StrippedBytes<'s> {
185		bytes: &'s [u8],
186		state: State,
187		utf8parser: Utf8Parser,
188		}
189
190		impl<'s> StrippedBytes<'s> {
191		/// See [`strip_bytes`]
192		#[inline]
193	0	pub fn new(bytes: &'s [u8]) -> Self {
194	0	Self {
195	0	bytes,
196	0	state: State::Ground,
197	0	utf8parser: Default::default(),
198	0	}
199	0	}
200
201		/// Strip the next slice of bytes
202		///
203		/// Used when the content is in several non-contiguous slices
204		///
205		/// # Panic
206		///
207		/// May panic if it is not exhausted / empty
208		#[inline]
209	0	pub fn extend(&mut self, bytes: &'s [u8]) {
210	0	debug_assert!(
211	0	self.is_empty(),
212	0	"current bytes must be processed to ensure we end at the right state"
213		);
214	0	self.bytes = bytes;
215	0	}
216
217		/// Report the bytes has been exhausted
218		#[inline]
219	0	pub fn is_empty(&self) -> bool {
220	0	self.bytes.is_empty()
221	0	}
222
223		/// Create a [`Vec`] of the printable content
224		#[inline]
225	0	pub fn into_vec(self) -> Vec<u8> {
226	0	let mut stripped = Vec::with_capacity(self.bytes.len());
227	0	for printable in self {
228	0	stripped.extend(printable);
229	0	}
230	0	stripped
231	0	}
232		}
233
234		impl<'s> Iterator for StrippedBytes<'s> {
235		type Item = &'s [u8];
236
237		#[inline]
238	0	fn next(&mut self) -> Option<Self::Item> {
239	0	next_bytes(&mut self.bytes, &mut self.state, &mut self.utf8parser)
240	0	}
241		}
242
243		/// Incrementally strip non-contiguous data
244		#[derive(Default, Clone, Debug, PartialEq, Eq)]
245		pub struct StripBytes {
246		state: State,
247		utf8parser: Utf8Parser,
248		}
249
250		impl StripBytes {
251		/// Initial state
252	0	pub fn new() -> Self {
253	0	Default::default()
254	0	}
255
256		/// Strip the next segment of data
257	0	pub fn strip_next<'s>(&'s mut self, bytes: &'s [u8]) -> StripBytesIter<'s> {
258	0	StripBytesIter {
259	0	bytes,
260	0	state: &mut self.state,
261	0	utf8parser: &mut self.utf8parser,
262	0	}
263	0	}
264		}
265
266		/// See [`StripBytes`]
267		#[derive(Debug, PartialEq, Eq)]
268		pub struct StripBytesIter<'s> {
269		bytes: &'s [u8],
270		state: &'s mut State,
271		utf8parser: &'s mut Utf8Parser,
272		}
273
274		impl<'s> Iterator for StripBytesIter<'s> {
275		type Item = &'s [u8];
276
277		#[inline]
278	0	fn next(&mut self) -> Option<Self::Item> {
279	0	next_bytes(&mut self.bytes, self.state, self.utf8parser)
280	0	}
281		}
282
283		#[inline]
284	0	fn next_bytes<'s>(
285	0	bytes: &mut &'s [u8],
286	0	state: &mut State,
287	0	utf8parser: &mut Utf8Parser,
288	0	) -> Option<&'s [u8]> {
289	0	let offset = bytes.iter().copied().position(\|b\| {
290	0	if *state == State::Utf8 {
291	0	true
292		} else {
293	0	let (next_state, action) = state_change(*state, b);
294	0	if next_state != State::Anywhere {
295	0	*state = next_state;
296	0	}
297	0	is_printable_bytes(action, b)
298		}
299	0	});
300	0	let (_, next) = bytes.split_at(offset.unwrap_or(bytes.len()));
301	0	*bytes = next;
302
303	0	let offset = bytes.iter().copied().position(\|b\| {
304	0	if *state == State::Utf8 {
305	0	if utf8parser.add(b) {
306	0	*state = State::Ground;
307	0	}
308	0	false
309		} else {
310	0	let (next_state, action) = state_change(State::Ground, b);
311	0	if next_state != State::Anywhere {
312	0	*state = next_state;
313	0	}
314	0	if *state == State::Utf8 {
315	0	utf8parser.add(b);
316	0	false
317		} else {
318	0	!is_printable_bytes(action, b)
319		}
320		}
321	0	});
322	0	let (printable, next) = bytes.split_at(offset.unwrap_or(bytes.len()));
323	0	*bytes = next;
324	0	if printable.is_empty() {
325	0	None
326		} else {
327	0	Some(printable)
328		}
329	0	}
330
331		#[derive(Default, Clone, Debug, PartialEq, Eq)]
332		pub(crate) struct Utf8Parser {
333		utf8_parser: utf8parse::Parser,
334		}
335
336		impl Utf8Parser {
337	0	fn add(&mut self, byte: u8) -> bool {
338	0	let mut b = false;
339	0	let mut receiver = VtUtf8Receiver(&mut b);
340	0	self.utf8_parser.advance(&mut receiver, byte);
341	0	b
342	0	}
343		}
344
345		struct VtUtf8Receiver<'a>(&'a mut bool);
346
347		impl utf8parse::Receiver for VtUtf8Receiver<'_> {
348	0	fn codepoint(&mut self, _: char) {
349	0	*self.0 = true;
350	0	}
351
352	0	fn invalid_sequence(&mut self) {
353	0	*self.0 = true;
354	0	}
355		}
356
357		#[inline]
358	0	fn is_printable_bytes(action: Action, byte: u8) -> bool {
359		// VT320 considered 0x7f to be `Print`able but we expect to be working in UTF-8 systems and not
360		// ISO Latin-1, making it DEL and non-printable
361		const DEL: u8 = 0x7f;
362
363		// Continuations aren't included as they may also be control codes, requiring more context
364	0	(action == Action::Print && byte != DEL)
365	0	\|\| action == Action::BeginUtf8
366	0	\|\| (action == Action::Execute && byte.is_ascii_whitespace())
367	0	}
368
369		#[cfg(test)]
370		mod test {
371		use super::*;
372		use proptest::prelude::*;
373
374		/// Model based off full parser
375		fn parser_strip(bytes: &[u8]) -> String {
376		#[derive(Default)]
377		struct Strip(String);
378		impl Strip {
379		fn with_capacity(capacity: usize) -> Self {
380		Self(String::with_capacity(capacity))
381		}
382		}
383		impl anstyle_parse::Perform for Strip {
384		fn print(&mut self, c: char) {
385		self.0.push(c);
386		}
387
388		fn execute(&mut self, byte: u8) {
389		if byte.is_ascii_whitespace() {
390		self.0.push(byte as char);
391		}
392		}
393		}
394
395		let mut stripped = Strip::with_capacity(bytes.len());
396		let mut parser = anstyle_parse::Parser::<anstyle_parse::DefaultCharAccumulator>::new();
397		for byte in bytes {
398		parser.advance(&mut stripped, *byte);
399		}
400		stripped.0
401		}
402
403		/// Model verifying incremental parsing
404		fn strip_char(mut s: &str) -> String {
405		let mut result = String::new();
406		let mut state = StripStr::new();
407		while !s.is_empty() {
408		let mut indices = s.char_indices();
409		indices.next(); // current
410		let offset = indices.next().map(\|(i, _)\| i).unwrap_or_else(\|\| s.len());
411		let (current, remainder) = s.split_at(offset);
412		for printable in state.strip_next(current) {
413		result.push_str(printable);
414		}
415		s = remainder;
416		}
417		result
418		}
419
420		/// Model verifying incremental parsing
421		fn strip_byte(s: &[u8]) -> Vec<u8> {
422		let mut result = Vec::new();
423		let mut state = StripBytes::default();
424		for start in 0..s.len() {
425		let current = &s[start..=start];
426		for printable in state.strip_next(current) {
427		result.extend(printable);
428		}
429		}
430		result
431		}
432
433		#[test]
434		fn test_strip_bytes_multibyte() {
435		let bytes = [240, 145, 141, 139];
436		let expected = parser_strip(&bytes);
437		let actual = String::from_utf8(strip_bytes(&bytes).into_vec()).unwrap();
438		assert_eq!(expected, actual);
439		}
440
441		#[test]
442		fn test_strip_byte_multibyte() {
443		let bytes = [240, 145, 141, 139];
444		let expected = parser_strip(&bytes);
445		let actual = String::from_utf8(strip_byte(&bytes).clone()).unwrap();
446		assert_eq!(expected, actual);
447		}
448
449		#[test]
450		fn test_strip_str_del() {
451		let input = std::str::from_utf8(&[0x7f]).unwrap();
452		let expected = "";
453		let actual = strip_str(input).to_string();
454		assert_eq!(expected, actual);
455		}
456
457		#[test]
458		fn test_strip_byte_del() {
459		let bytes = [0x7f];
460		let expected = "";
461		let actual = String::from_utf8(strip_byte(&bytes).clone()).unwrap();
462		assert_eq!(expected, actual);
463		}
464
465		#[test]
466		fn test_strip_str_handles_broken_sequence() {
467		// valid utf8: \xc3\xb6 then \x1b then \xf0\x9f\x98\x80
468		let s = "ö\x1b😀hello😀goodbye";
469		let mut it = strip_str(s);
470		assert_eq!("ö", it.next().unwrap());
471		assert_eq!("ello😀goodbye", it.next().unwrap());
472		}
473
474		proptest! {
475		#[test]
476		#[cfg_attr(miri, ignore)] // See https://github.com/AltSysrq/proptest/issues/253
477		fn strip_str_no_escapes(s in "\\PC*") {
478		let expected = parser_strip(s.as_bytes());
479		let actual = strip_str(&s).to_string();
480		assert_eq!(expected, actual);
481		}
482
483		#[test]
484		#[cfg_attr(miri, ignore)] // See https://github.com/AltSysrq/proptest/issues/253
485		fn strip_char_no_escapes(s in "\\PC*") {
486		let expected = parser_strip(s.as_bytes());
487		let actual = strip_char(&s);
488		assert_eq!(expected, actual);
489		}
490
491		#[test]
492		#[cfg_attr(miri, ignore)] // See https://github.com/AltSysrq/proptest/issues/253
493		fn strip_bytes_no_escapes(s in "\\PC*") {
494		dbg!(&s);
495		dbg!(s.as_bytes());
496		let expected = parser_strip(s.as_bytes());
497		let actual = String::from_utf8(strip_bytes(s.as_bytes()).into_vec()).unwrap();
498		assert_eq!(expected, actual);
499		}
500
501		#[test]
502		#[cfg_attr(miri, ignore)] // See https://github.com/AltSysrq/proptest/issues/253
503		fn strip_byte_no_escapes(s in "\\PC*") {
504		dbg!(&s);
505		dbg!(s.as_bytes());
506		let expected = parser_strip(s.as_bytes());
507		let actual = String::from_utf8(strip_byte(s.as_bytes()).clone()).unwrap();
508		assert_eq!(expected, actual);
509		}
510		}
511		}