Coverage Report

Created: 2025-12-31 06:24

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/rust/registry/src/index.crates.io-1949cf8c6b5b557f/anstream-0.6.21/src/adapter/strip.rs
Line
Count
Source
1
use anstyle_parse::state::state_change;
2
use anstyle_parse::state::Action;
3
use anstyle_parse::state::State;
4
5
/// Strip ANSI escapes from a `&str`, returning the printable content
6
///
7
/// This can be used to take output from a program that includes escape sequences and write it
8
/// somewhere that does not easily support them, such as a log file.
9
///
10
/// For non-contiguous data, see [`StripStr`].
11
///
12
/// # Example
13
///
14
/// ```rust
15
/// use std::io::Write as _;
16
///
17
/// let styled_text = "\x1b[32mfoo\x1b[m bar";
18
/// let plain_str = anstream::adapter::strip_str(&styled_text).to_string();
19
/// assert_eq!(plain_str, "foo bar");
20
/// ```
21
#[inline]
22
0
pub fn strip_str(data: &str) -> StrippedStr<'_> {
23
0
    StrippedStr::new(data)
24
0
}
25
26
/// See [`strip_str`]
27
#[derive(Default, Clone, Debug, PartialEq, Eq)]
28
pub struct StrippedStr<'s> {
29
    bytes: &'s [u8],
30
    state: State,
31
}
32
33
impl<'s> StrippedStr<'s> {
34
    #[inline]
35
0
    fn new(data: &'s str) -> Self {
36
0
        Self {
37
0
            bytes: data.as_bytes(),
38
0
            state: State::Ground,
39
0
        }
40
0
    }
41
42
    /// Create a [`String`] of the printable content
43
    #[inline]
44
    #[allow(clippy::inherent_to_string_shadow_display)] // Single-allocation implementation
45
0
    pub fn to_string(&self) -> String {
46
        use std::fmt::Write as _;
47
0
        let mut stripped = String::with_capacity(self.bytes.len());
48
0
        let _ = write!(&mut stripped, "{self}");
49
0
        stripped
50
0
    }
51
}
52
53
impl std::fmt::Display for StrippedStr<'_> {
54
    /// **Note:** this does *not* exhaust the [`Iterator`]
55
    #[inline]
56
0
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
57
0
        let iter = Self {
58
0
            bytes: self.bytes,
59
0
            state: self.state,
60
0
        };
61
0
        for printable in iter {
62
0
            printable.fmt(f)?;
63
        }
64
0
        Ok(())
65
0
    }
66
}
67
68
impl<'s> Iterator for StrippedStr<'s> {
69
    type Item = &'s str;
70
71
    #[inline]
72
0
    fn next(&mut self) -> Option<Self::Item> {
73
0
        next_str(&mut self.bytes, &mut self.state)
74
0
    }
75
}
76
77
/// Incrementally strip non-contiguous data
78
#[derive(Default, Clone, Debug, PartialEq, Eq)]
79
pub struct StripStr {
80
    state: State,
81
}
82
83
impl StripStr {
84
    /// Initial state
85
0
    pub fn new() -> Self {
86
0
        Default::default()
87
0
    }
88
89
    /// Strip the next segment of data
90
0
    pub fn strip_next<'s>(&'s mut self, data: &'s str) -> StripStrIter<'s> {
91
0
        StripStrIter {
92
0
            bytes: data.as_bytes(),
93
0
            state: &mut self.state,
94
0
        }
95
0
    }
96
}
97
98
/// See [`StripStr`]
99
#[derive(Debug, PartialEq, Eq)]
100
pub struct StripStrIter<'s> {
101
    bytes: &'s [u8],
102
    state: &'s mut State,
103
}
104
105
impl<'s> Iterator for StripStrIter<'s> {
106
    type Item = &'s str;
107
108
    #[inline]
109
0
    fn next(&mut self) -> Option<Self::Item> {
110
0
        next_str(&mut self.bytes, self.state)
111
0
    }
112
}
113
114
#[inline]
115
0
fn next_str<'s>(bytes: &mut &'s [u8], state: &mut State) -> Option<&'s str> {
116
0
    let offset = bytes.iter().copied().position(|b| {
117
0
        let (next_state, action) = state_change(*state, b);
118
0
        if next_state != State::Anywhere {
119
0
            *state = next_state;
120
0
        }
121
0
        is_printable_bytes(action, b)
122
0
    });
123
0
    let (_, next) = bytes.split_at(offset.unwrap_or(bytes.len()));
124
0
    *bytes = next;
125
0
    *state = State::Ground;
126
127
0
    let offset = bytes.iter().copied().position(|b| {
128
0
        let (_next_state, action) = state_change(State::Ground, b);
129
0
        !(is_printable_bytes(action, b) || is_utf8_continuation(b))
130
0
    });
131
0
    let (printable, next) = bytes.split_at(offset.unwrap_or(bytes.len()));
132
0
    *bytes = next;
133
0
    if printable.is_empty() {
134
0
        None
135
    } else {
136
0
        let printable = unsafe {
137
0
            from_utf8_unchecked(
138
0
                printable,
139
                "`bytes` was validated as UTF-8, the parser preserves UTF-8 continuations",
140
            )
141
        };
142
0
        Some(printable)
143
    }
144
0
}
145
146
#[inline]
147
0
unsafe fn from_utf8_unchecked<'b>(bytes: &'b [u8], safety_justification: &'static str) -> &'b str {
148
    unsafe {
149
0
        if cfg!(debug_assertions) {
150
            // Catch problems more quickly when testing
151
0
            std::str::from_utf8(bytes).expect(safety_justification)
152
        } else {
153
0
            std::str::from_utf8_unchecked(bytes)
154
        }
155
    }
156
0
}
157
158
#[inline]
159
0
fn is_utf8_continuation(b: u8) -> bool {
160
0
    matches!(b, 0x80..=0xbf)
161
0
}
162
163
/// Strip ANSI escapes from bytes, returning the printable content
164
///
165
/// This can be used to take output from a program that includes escape sequences and write it
166
/// somewhere that does not easily support them, such as a log file.
167
///
168
/// # Example
169
///
170
/// ```rust
171
/// use std::io::Write as _;
172
///
173
/// let styled_text = "\x1b[32mfoo\x1b[m bar";
174
/// let plain_str = anstream::adapter::strip_bytes(styled_text.as_bytes()).into_vec();
175
/// assert_eq!(plain_str.as_slice(), &b"foo bar"[..]);
176
/// ```
177
#[inline]
178
0
pub fn strip_bytes(data: &[u8]) -> StrippedBytes<'_> {
179
0
    StrippedBytes::new(data)
180
0
}
181
182
/// See [`strip_bytes`]
183
#[derive(Default, Clone, Debug, PartialEq, Eq)]
184
pub struct StrippedBytes<'s> {
185
    bytes: &'s [u8],
186
    state: State,
187
    utf8parser: Utf8Parser,
188
}
189
190
impl<'s> StrippedBytes<'s> {
191
    /// See [`strip_bytes`]
192
    #[inline]
193
0
    pub fn new(bytes: &'s [u8]) -> Self {
194
0
        Self {
195
0
            bytes,
196
0
            state: State::Ground,
197
0
            utf8parser: Default::default(),
198
0
        }
199
0
    }
200
201
    /// Strip the next slice of bytes
202
    ///
203
    /// Used when the content is in several non-contiguous slices
204
    ///
205
    /// # Panic
206
    ///
207
    /// May panic if it is not exhausted / empty
208
    #[inline]
209
0
    pub fn extend(&mut self, bytes: &'s [u8]) {
210
0
        debug_assert!(
211
0
            self.is_empty(),
212
0
            "current bytes must be processed to ensure we end at the right state"
213
        );
214
0
        self.bytes = bytes;
215
0
    }
216
217
    /// Report the bytes has been exhausted
218
    #[inline]
219
0
    pub fn is_empty(&self) -> bool {
220
0
        self.bytes.is_empty()
221
0
    }
222
223
    /// Create a [`Vec`] of the printable content
224
    #[inline]
225
0
    pub fn into_vec(self) -> Vec<u8> {
226
0
        let mut stripped = Vec::with_capacity(self.bytes.len());
227
0
        for printable in self {
228
0
            stripped.extend(printable);
229
0
        }
230
0
        stripped
231
0
    }
232
}
233
234
impl<'s> Iterator for StrippedBytes<'s> {
235
    type Item = &'s [u8];
236
237
    #[inline]
238
0
    fn next(&mut self) -> Option<Self::Item> {
239
0
        next_bytes(&mut self.bytes, &mut self.state, &mut self.utf8parser)
240
0
    }
241
}
242
243
/// Incrementally strip non-contiguous data
244
#[derive(Default, Clone, Debug, PartialEq, Eq)]
245
pub struct StripBytes {
246
    state: State,
247
    utf8parser: Utf8Parser,
248
}
249
250
impl StripBytes {
251
    /// Initial state
252
0
    pub fn new() -> Self {
253
0
        Default::default()
254
0
    }
255
256
    /// Strip the next segment of data
257
0
    pub fn strip_next<'s>(&'s mut self, bytes: &'s [u8]) -> StripBytesIter<'s> {
258
0
        StripBytesIter {
259
0
            bytes,
260
0
            state: &mut self.state,
261
0
            utf8parser: &mut self.utf8parser,
262
0
        }
263
0
    }
264
}
265
266
/// See [`StripBytes`]
267
#[derive(Debug, PartialEq, Eq)]
268
pub struct StripBytesIter<'s> {
269
    bytes: &'s [u8],
270
    state: &'s mut State,
271
    utf8parser: &'s mut Utf8Parser,
272
}
273
274
impl<'s> Iterator for StripBytesIter<'s> {
275
    type Item = &'s [u8];
276
277
    #[inline]
278
0
    fn next(&mut self) -> Option<Self::Item> {
279
0
        next_bytes(&mut self.bytes, self.state, self.utf8parser)
280
0
    }
281
}
282
283
#[inline]
284
0
fn next_bytes<'s>(
285
0
    bytes: &mut &'s [u8],
286
0
    state: &mut State,
287
0
    utf8parser: &mut Utf8Parser,
288
0
) -> Option<&'s [u8]> {
289
0
    let offset = bytes.iter().copied().position(|b| {
290
0
        if *state == State::Utf8 {
291
0
            true
292
        } else {
293
0
            let (next_state, action) = state_change(*state, b);
294
0
            if next_state != State::Anywhere {
295
0
                *state = next_state;
296
0
            }
297
0
            is_printable_bytes(action, b)
298
        }
299
0
    });
300
0
    let (_, next) = bytes.split_at(offset.unwrap_or(bytes.len()));
301
0
    *bytes = next;
302
303
0
    let offset = bytes.iter().copied().position(|b| {
304
0
        if *state == State::Utf8 {
305
0
            if utf8parser.add(b) {
306
0
                *state = State::Ground;
307
0
            }
308
0
            false
309
        } else {
310
0
            let (next_state, action) = state_change(State::Ground, b);
311
0
            if next_state != State::Anywhere {
312
0
                *state = next_state;
313
0
            }
314
0
            if *state == State::Utf8 {
315
0
                utf8parser.add(b);
316
0
                false
317
            } else {
318
0
                !is_printable_bytes(action, b)
319
            }
320
        }
321
0
    });
322
0
    let (printable, next) = bytes.split_at(offset.unwrap_or(bytes.len()));
323
0
    *bytes = next;
324
0
    if printable.is_empty() {
325
0
        None
326
    } else {
327
0
        Some(printable)
328
    }
329
0
}
330
331
#[derive(Default, Clone, Debug, PartialEq, Eq)]
332
pub(crate) struct Utf8Parser {
333
    utf8_parser: utf8parse::Parser,
334
}
335
336
impl Utf8Parser {
337
0
    fn add(&mut self, byte: u8) -> bool {
338
0
        let mut b = false;
339
0
        let mut receiver = VtUtf8Receiver(&mut b);
340
0
        self.utf8_parser.advance(&mut receiver, byte);
341
0
        b
342
0
    }
343
}
344
345
struct VtUtf8Receiver<'a>(&'a mut bool);
346
347
impl utf8parse::Receiver for VtUtf8Receiver<'_> {
348
0
    fn codepoint(&mut self, _: char) {
349
0
        *self.0 = true;
350
0
    }
351
352
0
    fn invalid_sequence(&mut self) {
353
0
        *self.0 = true;
354
0
    }
355
}
356
357
#[inline]
358
0
fn is_printable_bytes(action: Action, byte: u8) -> bool {
359
    // VT320 considered 0x7f to be `Print`able but we expect to be working in UTF-8 systems and not
360
    // ISO Latin-1, making it DEL and non-printable
361
    const DEL: u8 = 0x7f;
362
363
    // Continuations aren't included as they may also be control codes, requiring more context
364
0
    (action == Action::Print && byte != DEL)
365
0
        || action == Action::BeginUtf8
366
0
        || (action == Action::Execute && byte.is_ascii_whitespace())
367
0
}
368
369
#[cfg(test)]
370
mod test {
371
    use super::*;
372
    use proptest::prelude::*;
373
374
    /// Model based off full parser
375
    fn parser_strip(bytes: &[u8]) -> String {
376
        #[derive(Default)]
377
        struct Strip(String);
378
        impl Strip {
379
            fn with_capacity(capacity: usize) -> Self {
380
                Self(String::with_capacity(capacity))
381
            }
382
        }
383
        impl anstyle_parse::Perform for Strip {
384
            fn print(&mut self, c: char) {
385
                self.0.push(c);
386
            }
387
388
            fn execute(&mut self, byte: u8) {
389
                if byte.is_ascii_whitespace() {
390
                    self.0.push(byte as char);
391
                }
392
            }
393
        }
394
395
        let mut stripped = Strip::with_capacity(bytes.len());
396
        let mut parser = anstyle_parse::Parser::<anstyle_parse::DefaultCharAccumulator>::new();
397
        for byte in bytes {
398
            parser.advance(&mut stripped, *byte);
399
        }
400
        stripped.0
401
    }
402
403
    /// Model verifying incremental parsing
404
    fn strip_char(mut s: &str) -> String {
405
        let mut result = String::new();
406
        let mut state = StripStr::new();
407
        while !s.is_empty() {
408
            let mut indices = s.char_indices();
409
            indices.next(); // current
410
            let offset = indices.next().map(|(i, _)| i).unwrap_or_else(|| s.len());
411
            let (current, remainder) = s.split_at(offset);
412
            for printable in state.strip_next(current) {
413
                result.push_str(printable);
414
            }
415
            s = remainder;
416
        }
417
        result
418
    }
419
420
    /// Model verifying incremental parsing
421
    fn strip_byte(s: &[u8]) -> Vec<u8> {
422
        let mut result = Vec::new();
423
        let mut state = StripBytes::default();
424
        for start in 0..s.len() {
425
            let current = &s[start..=start];
426
            for printable in state.strip_next(current) {
427
                result.extend(printable);
428
            }
429
        }
430
        result
431
    }
432
433
    #[test]
434
    fn test_strip_bytes_multibyte() {
435
        let bytes = [240, 145, 141, 139];
436
        let expected = parser_strip(&bytes);
437
        let actual = String::from_utf8(strip_bytes(&bytes).into_vec()).unwrap();
438
        assert_eq!(expected, actual);
439
    }
440
441
    #[test]
442
    fn test_strip_byte_multibyte() {
443
        let bytes = [240, 145, 141, 139];
444
        let expected = parser_strip(&bytes);
445
        let actual = String::from_utf8(strip_byte(&bytes).clone()).unwrap();
446
        assert_eq!(expected, actual);
447
    }
448
449
    #[test]
450
    fn test_strip_str_del() {
451
        let input = std::str::from_utf8(&[0x7f]).unwrap();
452
        let expected = "";
453
        let actual = strip_str(input).to_string();
454
        assert_eq!(expected, actual);
455
    }
456
457
    #[test]
458
    fn test_strip_byte_del() {
459
        let bytes = [0x7f];
460
        let expected = "";
461
        let actual = String::from_utf8(strip_byte(&bytes).clone()).unwrap();
462
        assert_eq!(expected, actual);
463
    }
464
465
    #[test]
466
    fn test_strip_str_handles_broken_sequence() {
467
        // valid utf8: \xc3\xb6 then \x1b then \xf0\x9f\x98\x80
468
        let s = "ö\x1b😀hello😀goodbye";
469
        let mut it = strip_str(s);
470
        assert_eq!("ö", it.next().unwrap());
471
        assert_eq!("ello😀goodbye", it.next().unwrap());
472
    }
473
474
    proptest! {
475
        #[test]
476
        #[cfg_attr(miri, ignore)]  // See https://github.com/AltSysrq/proptest/issues/253
477
        fn strip_str_no_escapes(s in "\\PC*") {
478
            let expected = parser_strip(s.as_bytes());
479
            let actual = strip_str(&s).to_string();
480
            assert_eq!(expected, actual);
481
        }
482
483
        #[test]
484
        #[cfg_attr(miri, ignore)]  // See https://github.com/AltSysrq/proptest/issues/253
485
        fn strip_char_no_escapes(s in "\\PC*") {
486
            let expected = parser_strip(s.as_bytes());
487
            let actual = strip_char(&s);
488
            assert_eq!(expected, actual);
489
        }
490
491
        #[test]
492
        #[cfg_attr(miri, ignore)]  // See https://github.com/AltSysrq/proptest/issues/253
493
        fn strip_bytes_no_escapes(s in "\\PC*") {
494
            dbg!(&s);
495
            dbg!(s.as_bytes());
496
            let expected = parser_strip(s.as_bytes());
497
            let actual = String::from_utf8(strip_bytes(s.as_bytes()).into_vec()).unwrap();
498
            assert_eq!(expected, actual);
499
        }
500
501
        #[test]
502
        #[cfg_attr(miri, ignore)]  // See https://github.com/AltSysrq/proptest/issues/253
503
        fn strip_byte_no_escapes(s in "\\PC*") {
504
            dbg!(&s);
505
            dbg!(s.as_bytes());
506
            let expected = parser_strip(s.as_bytes());
507
            let actual = String::from_utf8(strip_byte(s.as_bytes()).clone()).unwrap();
508
            assert_eq!(expected, actual);
509
        }
510
    }
511
}