/rust/registry/src/index.crates.io-1949cf8c6b5b557f/anstream-0.6.21/src/adapter/strip.rs
Line | Count | Source |
1 | | use anstyle_parse::state::state_change; |
2 | | use anstyle_parse::state::Action; |
3 | | use anstyle_parse::state::State; |
4 | | |
5 | | /// Strip ANSI escapes from a `&str`, returning the printable content |
6 | | /// |
7 | | /// This can be used to take output from a program that includes escape sequences and write it |
8 | | /// somewhere that does not easily support them, such as a log file. |
9 | | /// |
10 | | /// For non-contiguous data, see [`StripStr`]. |
11 | | /// |
12 | | /// # Example |
13 | | /// |
14 | | /// ```rust |
15 | | /// use std::io::Write as _; |
16 | | /// |
17 | | /// let styled_text = "\x1b[32mfoo\x1b[m bar"; |
18 | | /// let plain_str = anstream::adapter::strip_str(&styled_text).to_string(); |
19 | | /// assert_eq!(plain_str, "foo bar"); |
20 | | /// ``` |
21 | | #[inline] |
22 | 0 | pub fn strip_str(data: &str) -> StrippedStr<'_> { |
23 | 0 | StrippedStr::new(data) |
24 | 0 | } |
25 | | |
26 | | /// See [`strip_str`] |
27 | | #[derive(Default, Clone, Debug, PartialEq, Eq)] |
28 | | pub struct StrippedStr<'s> { |
29 | | bytes: &'s [u8], |
30 | | state: State, |
31 | | } |
32 | | |
33 | | impl<'s> StrippedStr<'s> { |
34 | | #[inline] |
35 | 0 | fn new(data: &'s str) -> Self { |
36 | 0 | Self { |
37 | 0 | bytes: data.as_bytes(), |
38 | 0 | state: State::Ground, |
39 | 0 | } |
40 | 0 | } |
41 | | |
42 | | /// Create a [`String`] of the printable content |
43 | | #[inline] |
44 | | #[allow(clippy::inherent_to_string_shadow_display)] // Single-allocation implementation |
45 | 0 | pub fn to_string(&self) -> String { |
46 | | use std::fmt::Write as _; |
47 | 0 | let mut stripped = String::with_capacity(self.bytes.len()); |
48 | 0 | let _ = write!(&mut stripped, "{self}"); |
49 | 0 | stripped |
50 | 0 | } |
51 | | } |
52 | | |
53 | | impl std::fmt::Display for StrippedStr<'_> { |
54 | | /// **Note:** this does *not* exhaust the [`Iterator`] |
55 | | #[inline] |
56 | 0 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { |
57 | 0 | let iter = Self { |
58 | 0 | bytes: self.bytes, |
59 | 0 | state: self.state, |
60 | 0 | }; |
61 | 0 | for printable in iter { |
62 | 0 | printable.fmt(f)?; |
63 | | } |
64 | 0 | Ok(()) |
65 | 0 | } |
66 | | } |
67 | | |
68 | | impl<'s> Iterator for StrippedStr<'s> { |
69 | | type Item = &'s str; |
70 | | |
71 | | #[inline] |
72 | 0 | fn next(&mut self) -> Option<Self::Item> { |
73 | 0 | next_str(&mut self.bytes, &mut self.state) |
74 | 0 | } |
75 | | } |
76 | | |
77 | | /// Incrementally strip non-contiguous data |
78 | | #[derive(Default, Clone, Debug, PartialEq, Eq)] |
79 | | pub struct StripStr { |
80 | | state: State, |
81 | | } |
82 | | |
83 | | impl StripStr { |
84 | | /// Initial state |
85 | 0 | pub fn new() -> Self { |
86 | 0 | Default::default() |
87 | 0 | } |
88 | | |
89 | | /// Strip the next segment of data |
90 | 0 | pub fn strip_next<'s>(&'s mut self, data: &'s str) -> StripStrIter<'s> { |
91 | 0 | StripStrIter { |
92 | 0 | bytes: data.as_bytes(), |
93 | 0 | state: &mut self.state, |
94 | 0 | } |
95 | 0 | } |
96 | | } |
97 | | |
98 | | /// See [`StripStr`] |
99 | | #[derive(Debug, PartialEq, Eq)] |
100 | | pub struct StripStrIter<'s> { |
101 | | bytes: &'s [u8], |
102 | | state: &'s mut State, |
103 | | } |
104 | | |
105 | | impl<'s> Iterator for StripStrIter<'s> { |
106 | | type Item = &'s str; |
107 | | |
108 | | #[inline] |
109 | 0 | fn next(&mut self) -> Option<Self::Item> { |
110 | 0 | next_str(&mut self.bytes, self.state) |
111 | 0 | } |
112 | | } |
113 | | |
114 | | #[inline] |
115 | 0 | fn next_str<'s>(bytes: &mut &'s [u8], state: &mut State) -> Option<&'s str> { |
116 | 0 | let offset = bytes.iter().copied().position(|b| { |
117 | 0 | let (next_state, action) = state_change(*state, b); |
118 | 0 | if next_state != State::Anywhere { |
119 | 0 | *state = next_state; |
120 | 0 | } |
121 | 0 | is_printable_bytes(action, b) |
122 | 0 | }); |
123 | 0 | let (_, next) = bytes.split_at(offset.unwrap_or(bytes.len())); |
124 | 0 | *bytes = next; |
125 | 0 | *state = State::Ground; |
126 | | |
127 | 0 | let offset = bytes.iter().copied().position(|b| { |
128 | 0 | let (_next_state, action) = state_change(State::Ground, b); |
129 | 0 | !(is_printable_bytes(action, b) || is_utf8_continuation(b)) |
130 | 0 | }); |
131 | 0 | let (printable, next) = bytes.split_at(offset.unwrap_or(bytes.len())); |
132 | 0 | *bytes = next; |
133 | 0 | if printable.is_empty() { |
134 | 0 | None |
135 | | } else { |
136 | 0 | let printable = unsafe { |
137 | 0 | from_utf8_unchecked( |
138 | 0 | printable, |
139 | | "`bytes` was validated as UTF-8, the parser preserves UTF-8 continuations", |
140 | | ) |
141 | | }; |
142 | 0 | Some(printable) |
143 | | } |
144 | 0 | } |
145 | | |
146 | | #[inline] |
147 | 0 | unsafe fn from_utf8_unchecked<'b>(bytes: &'b [u8], safety_justification: &'static str) -> &'b str { |
148 | | unsafe { |
149 | 0 | if cfg!(debug_assertions) { |
150 | | // Catch problems more quickly when testing |
151 | 0 | std::str::from_utf8(bytes).expect(safety_justification) |
152 | | } else { |
153 | 0 | std::str::from_utf8_unchecked(bytes) |
154 | | } |
155 | | } |
156 | 0 | } |
157 | | |
158 | | #[inline] |
159 | 0 | fn is_utf8_continuation(b: u8) -> bool { |
160 | 0 | matches!(b, 0x80..=0xbf) |
161 | 0 | } |
162 | | |
163 | | /// Strip ANSI escapes from bytes, returning the printable content |
164 | | /// |
165 | | /// This can be used to take output from a program that includes escape sequences and write it |
166 | | /// somewhere that does not easily support them, such as a log file. |
167 | | /// |
168 | | /// # Example |
169 | | /// |
170 | | /// ```rust |
171 | | /// use std::io::Write as _; |
172 | | /// |
173 | | /// let styled_text = "\x1b[32mfoo\x1b[m bar"; |
174 | | /// let plain_str = anstream::adapter::strip_bytes(styled_text.as_bytes()).into_vec(); |
175 | | /// assert_eq!(plain_str.as_slice(), &b"foo bar"[..]); |
176 | | /// ``` |
177 | | #[inline] |
178 | 0 | pub fn strip_bytes(data: &[u8]) -> StrippedBytes<'_> { |
179 | 0 | StrippedBytes::new(data) |
180 | 0 | } |
181 | | |
182 | | /// See [`strip_bytes`] |
183 | | #[derive(Default, Clone, Debug, PartialEq, Eq)] |
184 | | pub struct StrippedBytes<'s> { |
185 | | bytes: &'s [u8], |
186 | | state: State, |
187 | | utf8parser: Utf8Parser, |
188 | | } |
189 | | |
190 | | impl<'s> StrippedBytes<'s> { |
191 | | /// See [`strip_bytes`] |
192 | | #[inline] |
193 | 0 | pub fn new(bytes: &'s [u8]) -> Self { |
194 | 0 | Self { |
195 | 0 | bytes, |
196 | 0 | state: State::Ground, |
197 | 0 | utf8parser: Default::default(), |
198 | 0 | } |
199 | 0 | } |
200 | | |
201 | | /// Strip the next slice of bytes |
202 | | /// |
203 | | /// Used when the content is in several non-contiguous slices |
204 | | /// |
205 | | /// # Panic |
206 | | /// |
207 | | /// May panic if it is not exhausted / empty |
208 | | #[inline] |
209 | 0 | pub fn extend(&mut self, bytes: &'s [u8]) { |
210 | 0 | debug_assert!( |
211 | 0 | self.is_empty(), |
212 | 0 | "current bytes must be processed to ensure we end at the right state" |
213 | | ); |
214 | 0 | self.bytes = bytes; |
215 | 0 | } |
216 | | |
217 | | /// Report the bytes has been exhausted |
218 | | #[inline] |
219 | 0 | pub fn is_empty(&self) -> bool { |
220 | 0 | self.bytes.is_empty() |
221 | 0 | } |
222 | | |
223 | | /// Create a [`Vec`] of the printable content |
224 | | #[inline] |
225 | 0 | pub fn into_vec(self) -> Vec<u8> { |
226 | 0 | let mut stripped = Vec::with_capacity(self.bytes.len()); |
227 | 0 | for printable in self { |
228 | 0 | stripped.extend(printable); |
229 | 0 | } |
230 | 0 | stripped |
231 | 0 | } |
232 | | } |
233 | | |
234 | | impl<'s> Iterator for StrippedBytes<'s> { |
235 | | type Item = &'s [u8]; |
236 | | |
237 | | #[inline] |
238 | 0 | fn next(&mut self) -> Option<Self::Item> { |
239 | 0 | next_bytes(&mut self.bytes, &mut self.state, &mut self.utf8parser) |
240 | 0 | } |
241 | | } |
242 | | |
243 | | /// Incrementally strip non-contiguous data |
244 | | #[derive(Default, Clone, Debug, PartialEq, Eq)] |
245 | | pub struct StripBytes { |
246 | | state: State, |
247 | | utf8parser: Utf8Parser, |
248 | | } |
249 | | |
250 | | impl StripBytes { |
251 | | /// Initial state |
252 | 0 | pub fn new() -> Self { |
253 | 0 | Default::default() |
254 | 0 | } |
255 | | |
256 | | /// Strip the next segment of data |
257 | 0 | pub fn strip_next<'s>(&'s mut self, bytes: &'s [u8]) -> StripBytesIter<'s> { |
258 | 0 | StripBytesIter { |
259 | 0 | bytes, |
260 | 0 | state: &mut self.state, |
261 | 0 | utf8parser: &mut self.utf8parser, |
262 | 0 | } |
263 | 0 | } |
264 | | } |
265 | | |
266 | | /// See [`StripBytes`] |
267 | | #[derive(Debug, PartialEq, Eq)] |
268 | | pub struct StripBytesIter<'s> { |
269 | | bytes: &'s [u8], |
270 | | state: &'s mut State, |
271 | | utf8parser: &'s mut Utf8Parser, |
272 | | } |
273 | | |
274 | | impl<'s> Iterator for StripBytesIter<'s> { |
275 | | type Item = &'s [u8]; |
276 | | |
277 | | #[inline] |
278 | 0 | fn next(&mut self) -> Option<Self::Item> { |
279 | 0 | next_bytes(&mut self.bytes, self.state, self.utf8parser) |
280 | 0 | } |
281 | | } |
282 | | |
283 | | #[inline] |
284 | 0 | fn next_bytes<'s>( |
285 | 0 | bytes: &mut &'s [u8], |
286 | 0 | state: &mut State, |
287 | 0 | utf8parser: &mut Utf8Parser, |
288 | 0 | ) -> Option<&'s [u8]> { |
289 | 0 | let offset = bytes.iter().copied().position(|b| { |
290 | 0 | if *state == State::Utf8 { |
291 | 0 | true |
292 | | } else { |
293 | 0 | let (next_state, action) = state_change(*state, b); |
294 | 0 | if next_state != State::Anywhere { |
295 | 0 | *state = next_state; |
296 | 0 | } |
297 | 0 | is_printable_bytes(action, b) |
298 | | } |
299 | 0 | }); |
300 | 0 | let (_, next) = bytes.split_at(offset.unwrap_or(bytes.len())); |
301 | 0 | *bytes = next; |
302 | | |
303 | 0 | let offset = bytes.iter().copied().position(|b| { |
304 | 0 | if *state == State::Utf8 { |
305 | 0 | if utf8parser.add(b) { |
306 | 0 | *state = State::Ground; |
307 | 0 | } |
308 | 0 | false |
309 | | } else { |
310 | 0 | let (next_state, action) = state_change(State::Ground, b); |
311 | 0 | if next_state != State::Anywhere { |
312 | 0 | *state = next_state; |
313 | 0 | } |
314 | 0 | if *state == State::Utf8 { |
315 | 0 | utf8parser.add(b); |
316 | 0 | false |
317 | | } else { |
318 | 0 | !is_printable_bytes(action, b) |
319 | | } |
320 | | } |
321 | 0 | }); |
322 | 0 | let (printable, next) = bytes.split_at(offset.unwrap_or(bytes.len())); |
323 | 0 | *bytes = next; |
324 | 0 | if printable.is_empty() { |
325 | 0 | None |
326 | | } else { |
327 | 0 | Some(printable) |
328 | | } |
329 | 0 | } |
330 | | |
331 | | #[derive(Default, Clone, Debug, PartialEq, Eq)] |
332 | | pub(crate) struct Utf8Parser { |
333 | | utf8_parser: utf8parse::Parser, |
334 | | } |
335 | | |
336 | | impl Utf8Parser { |
337 | 0 | fn add(&mut self, byte: u8) -> bool { |
338 | 0 | let mut b = false; |
339 | 0 | let mut receiver = VtUtf8Receiver(&mut b); |
340 | 0 | self.utf8_parser.advance(&mut receiver, byte); |
341 | 0 | b |
342 | 0 | } |
343 | | } |
344 | | |
345 | | struct VtUtf8Receiver<'a>(&'a mut bool); |
346 | | |
347 | | impl utf8parse::Receiver for VtUtf8Receiver<'_> { |
348 | 0 | fn codepoint(&mut self, _: char) { |
349 | 0 | *self.0 = true; |
350 | 0 | } |
351 | | |
352 | 0 | fn invalid_sequence(&mut self) { |
353 | 0 | *self.0 = true; |
354 | 0 | } |
355 | | } |
356 | | |
357 | | #[inline] |
358 | 0 | fn is_printable_bytes(action: Action, byte: u8) -> bool { |
359 | | // VT320 considered 0x7f to be `Print`able but we expect to be working in UTF-8 systems and not |
360 | | // ISO Latin-1, making it DEL and non-printable |
361 | | const DEL: u8 = 0x7f; |
362 | | |
363 | | // Continuations aren't included as they may also be control codes, requiring more context |
364 | 0 | (action == Action::Print && byte != DEL) |
365 | 0 | || action == Action::BeginUtf8 |
366 | 0 | || (action == Action::Execute && byte.is_ascii_whitespace()) |
367 | 0 | } |
368 | | |
369 | | #[cfg(test)] |
370 | | mod test { |
371 | | use super::*; |
372 | | use proptest::prelude::*; |
373 | | |
374 | | /// Model based off full parser |
375 | | fn parser_strip(bytes: &[u8]) -> String { |
376 | | #[derive(Default)] |
377 | | struct Strip(String); |
378 | | impl Strip { |
379 | | fn with_capacity(capacity: usize) -> Self { |
380 | | Self(String::with_capacity(capacity)) |
381 | | } |
382 | | } |
383 | | impl anstyle_parse::Perform for Strip { |
384 | | fn print(&mut self, c: char) { |
385 | | self.0.push(c); |
386 | | } |
387 | | |
388 | | fn execute(&mut self, byte: u8) { |
389 | | if byte.is_ascii_whitespace() { |
390 | | self.0.push(byte as char); |
391 | | } |
392 | | } |
393 | | } |
394 | | |
395 | | let mut stripped = Strip::with_capacity(bytes.len()); |
396 | | let mut parser = anstyle_parse::Parser::<anstyle_parse::DefaultCharAccumulator>::new(); |
397 | | for byte in bytes { |
398 | | parser.advance(&mut stripped, *byte); |
399 | | } |
400 | | stripped.0 |
401 | | } |
402 | | |
403 | | /// Model verifying incremental parsing |
404 | | fn strip_char(mut s: &str) -> String { |
405 | | let mut result = String::new(); |
406 | | let mut state = StripStr::new(); |
407 | | while !s.is_empty() { |
408 | | let mut indices = s.char_indices(); |
409 | | indices.next(); // current |
410 | | let offset = indices.next().map(|(i, _)| i).unwrap_or_else(|| s.len()); |
411 | | let (current, remainder) = s.split_at(offset); |
412 | | for printable in state.strip_next(current) { |
413 | | result.push_str(printable); |
414 | | } |
415 | | s = remainder; |
416 | | } |
417 | | result |
418 | | } |
419 | | |
420 | | /// Model verifying incremental parsing |
421 | | fn strip_byte(s: &[u8]) -> Vec<u8> { |
422 | | let mut result = Vec::new(); |
423 | | let mut state = StripBytes::default(); |
424 | | for start in 0..s.len() { |
425 | | let current = &s[start..=start]; |
426 | | for printable in state.strip_next(current) { |
427 | | result.extend(printable); |
428 | | } |
429 | | } |
430 | | result |
431 | | } |
432 | | |
433 | | #[test] |
434 | | fn test_strip_bytes_multibyte() { |
435 | | let bytes = [240, 145, 141, 139]; |
436 | | let expected = parser_strip(&bytes); |
437 | | let actual = String::from_utf8(strip_bytes(&bytes).into_vec()).unwrap(); |
438 | | assert_eq!(expected, actual); |
439 | | } |
440 | | |
441 | | #[test] |
442 | | fn test_strip_byte_multibyte() { |
443 | | let bytes = [240, 145, 141, 139]; |
444 | | let expected = parser_strip(&bytes); |
445 | | let actual = String::from_utf8(strip_byte(&bytes).clone()).unwrap(); |
446 | | assert_eq!(expected, actual); |
447 | | } |
448 | | |
449 | | #[test] |
450 | | fn test_strip_str_del() { |
451 | | let input = std::str::from_utf8(&[0x7f]).unwrap(); |
452 | | let expected = ""; |
453 | | let actual = strip_str(input).to_string(); |
454 | | assert_eq!(expected, actual); |
455 | | } |
456 | | |
457 | | #[test] |
458 | | fn test_strip_byte_del() { |
459 | | let bytes = [0x7f]; |
460 | | let expected = ""; |
461 | | let actual = String::from_utf8(strip_byte(&bytes).clone()).unwrap(); |
462 | | assert_eq!(expected, actual); |
463 | | } |
464 | | |
465 | | #[test] |
466 | | fn test_strip_str_handles_broken_sequence() { |
467 | | // valid utf8: \xc3\xb6 then \x1b then \xf0\x9f\x98\x80 |
468 | | let s = "ö\x1b😀hello😀goodbye"; |
469 | | let mut it = strip_str(s); |
470 | | assert_eq!("ö", it.next().unwrap()); |
471 | | assert_eq!("ello😀goodbye", it.next().unwrap()); |
472 | | } |
473 | | |
474 | | proptest! { |
475 | | #[test] |
476 | | #[cfg_attr(miri, ignore)] // See https://github.com/AltSysrq/proptest/issues/253 |
477 | | fn strip_str_no_escapes(s in "\\PC*") { |
478 | | let expected = parser_strip(s.as_bytes()); |
479 | | let actual = strip_str(&s).to_string(); |
480 | | assert_eq!(expected, actual); |
481 | | } |
482 | | |
483 | | #[test] |
484 | | #[cfg_attr(miri, ignore)] // See https://github.com/AltSysrq/proptest/issues/253 |
485 | | fn strip_char_no_escapes(s in "\\PC*") { |
486 | | let expected = parser_strip(s.as_bytes()); |
487 | | let actual = strip_char(&s); |
488 | | assert_eq!(expected, actual); |
489 | | } |
490 | | |
491 | | #[test] |
492 | | #[cfg_attr(miri, ignore)] // See https://github.com/AltSysrq/proptest/issues/253 |
493 | | fn strip_bytes_no_escapes(s in "\\PC*") { |
494 | | dbg!(&s); |
495 | | dbg!(s.as_bytes()); |
496 | | let expected = parser_strip(s.as_bytes()); |
497 | | let actual = String::from_utf8(strip_bytes(s.as_bytes()).into_vec()).unwrap(); |
498 | | assert_eq!(expected, actual); |
499 | | } |
500 | | |
501 | | #[test] |
502 | | #[cfg_attr(miri, ignore)] // See https://github.com/AltSysrq/proptest/issues/253 |
503 | | fn strip_byte_no_escapes(s in "\\PC*") { |
504 | | dbg!(&s); |
505 | | dbg!(s.as_bytes()); |
506 | | let expected = parser_strip(s.as_bytes()); |
507 | | let actual = String::from_utf8(strip_byte(s.as_bytes()).clone()).unwrap(); |
508 | | assert_eq!(expected, actual); |
509 | | } |
510 | | } |
511 | | } |