Coverage Report

Created: 2026-02-18 06:58

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/rust/registry/src/index.crates.io-1949cf8c6b5b557f/anstyle-parse-0.2.7/src/lib.rs
Line
Count
Source
1
//! Parser for implementing virtual terminal emulators
2
//!
3
//! [`Parser`] is implemented according to [Paul Williams' ANSI parser
4
//! state machine]. The state machine doesn't assign meaning to the parsed data
5
//! and is thus not itself sufficient for writing a terminal emulator. Instead,
6
//! it is expected that an implementation of [`Perform`] is provided which does
7
//! something useful with the parsed data. The [`Parser`] handles the book
8
//! keeping, and the [`Perform`] gets to simply handle actions.
9
//!
10
//! # Examples
11
//!
12
//! For an example of using the [`Parser`] please see the examples folder. The example included
13
//! there simply logs all the actions [`Perform`] does. One quick thing to see it in action is to
14
//! pipe `vim` into it
15
//!
16
//! ```sh
17
//! cargo build --release --example parselog
18
//! vim | target/release/examples/parselog
19
//! ```
20
//!
21
//! Just type `:q` to exit.
22
//!
23
//! # Differences from original state machine description
24
//!
25
//! * UTF-8 Support for Input
26
//! * OSC Strings can be terminated by 0x07
27
//! * Only supports 7-bit codes. Some 8-bit codes are still supported, but they no longer work in
28
//!   all states.
29
//!
30
//! [Paul Williams' ANSI parser state machine]: https://vt100.net/emu/dec_ansi_parser
31
#![cfg_attr(not(test), no_std)]
32
#![cfg_attr(docsrs, feature(doc_auto_cfg))]
33
#![allow(missing_docs)]
34
#![warn(clippy::print_stderr)]
35
#![warn(clippy::print_stdout)]
36
37
#[cfg(not(feature = "core"))]
38
extern crate alloc;
39
40
use core::mem::MaybeUninit;
41
42
#[cfg(feature = "core")]
43
use arrayvec::ArrayVec;
44
#[cfg(feature = "utf8")]
45
use utf8parse as utf8;
46
47
mod params;
48
pub mod state;
49
50
pub use params::{Params, ParamsIter};
51
52
use state::{state_change, Action, State};
53
54
const MAX_INTERMEDIATES: usize = 2;
55
const MAX_OSC_PARAMS: usize = 16;
56
#[cfg(feature = "core")]
57
const MAX_OSC_RAW: usize = 1024;
58
59
/// Parser for raw _VTE_ protocol which delegates actions to a [`Perform`]
60
#[allow(unused_qualifications)]
61
#[derive(Default, Clone, Debug, PartialEq, Eq)]
62
pub struct Parser<C = DefaultCharAccumulator> {
63
    state: State,
64
    intermediates: [u8; MAX_INTERMEDIATES],
65
    intermediate_idx: usize,
66
    params: Params,
67
    param: u16,
68
    #[cfg(feature = "core")]
69
    osc_raw: ArrayVec<u8, MAX_OSC_RAW>,
70
    #[cfg(not(feature = "core"))]
71
    osc_raw: alloc::vec::Vec<u8>,
72
    osc_params: [(usize, usize); MAX_OSC_PARAMS],
73
    osc_num_params: usize,
74
    ignoring: bool,
75
    utf8_parser: C,
76
}
77
78
impl<C> Parser<C>
79
where
80
    C: CharAccumulator,
81
{
82
    /// Create a new Parser
83
0
    pub fn new() -> Parser {
84
0
        Parser::default()
85
0
    }
86
87
    #[inline]
88
0
    fn params(&self) -> &Params {
89
0
        &self.params
90
0
    }
91
92
    #[inline]
93
0
    fn intermediates(&self) -> &[u8] {
94
0
        &self.intermediates[..self.intermediate_idx]
95
0
    }
96
97
    /// Advance the parser state
98
    ///
99
    /// Requires a [`Perform`] in case `byte` triggers an action
100
    #[inline]
101
0
    pub fn advance<P: Perform>(&mut self, performer: &mut P, byte: u8) {
102
        // Utf8 characters are handled out-of-band.
103
0
        if let State::Utf8 = self.state {
104
0
            self.process_utf8(performer, byte);
105
0
            return;
106
0
        }
107
108
0
        let (state, action) = state_change(self.state, byte);
109
0
        self.perform_state_change(performer, state, action, byte);
110
0
    }
111
112
    #[inline]
113
0
    fn process_utf8<P>(&mut self, performer: &mut P, byte: u8)
114
0
    where
115
0
        P: Perform,
116
    {
117
0
        if let Some(c) = self.utf8_parser.add(byte) {
118
0
            performer.print(c);
119
0
            self.state = State::Ground;
120
0
        }
121
0
    }
122
123
    #[inline]
124
0
    fn perform_state_change<P>(&mut self, performer: &mut P, state: State, action: Action, byte: u8)
125
0
    where
126
0
        P: Perform,
127
    {
128
0
        match state {
129
0
            State::Anywhere => {
130
0
                // Just run the action
131
0
                self.perform_action(performer, action, byte);
132
0
            }
133
0
            state => {
134
0
                match self.state {
135
0
                    State::DcsPassthrough => {
136
0
                        self.perform_action(performer, Action::Unhook, byte);
137
0
                    }
138
0
                    State::OscString => {
139
0
                        self.perform_action(performer, Action::OscEnd, byte);
140
0
                    }
141
0
                    _ => (),
142
                }
143
144
0
                match action {
145
0
                    Action::Nop => (),
146
0
                    action => {
147
0
                        self.perform_action(performer, action, byte);
148
0
                    }
149
                }
150
151
0
                match state {
152
0
                    State::CsiEntry | State::DcsEntry | State::Escape => {
153
0
                        self.perform_action(performer, Action::Clear, byte);
154
0
                    }
155
0
                    State::DcsPassthrough => {
156
0
                        self.perform_action(performer, Action::Hook, byte);
157
0
                    }
158
0
                    State::OscString => {
159
0
                        self.perform_action(performer, Action::OscStart, byte);
160
0
                    }
161
0
                    _ => (),
162
                }
163
164
                // Assume the new state
165
0
                self.state = state;
166
            }
167
        }
168
0
    }
169
170
    /// Separate method for `osc_dispatch` that borrows self as read-only
171
    ///
172
    /// The aliasing is needed here for multiple slices into `self.osc_raw`
173
    #[inline]
174
0
    fn osc_dispatch<P: Perform>(&self, performer: &mut P, byte: u8) {
175
0
        let mut slices: [MaybeUninit<&[u8]>; MAX_OSC_PARAMS] =
176
0
            unsafe { MaybeUninit::uninit().assume_init() };
177
178
0
        for (i, slice) in slices.iter_mut().enumerate().take(self.osc_num_params) {
179
0
            let indices = self.osc_params[i];
180
0
            *slice = MaybeUninit::new(&self.osc_raw[indices.0..indices.1]);
181
0
        }
182
183
0
        unsafe {
184
0
            let num_params = self.osc_num_params;
185
0
            let params = &slices[..num_params] as *const [MaybeUninit<&[u8]>] as *const [&[u8]];
186
0
            performer.osc_dispatch(&*params, byte == 0x07);
187
0
        }
188
0
    }
189
190
    #[inline]
191
0
    fn perform_action<P: Perform>(&mut self, performer: &mut P, action: Action, byte: u8) {
192
0
        match action {
193
0
            Action::Print => performer.print(byte as char),
194
0
            Action::Execute => performer.execute(byte),
195
            Action::Hook => {
196
0
                if self.params.is_full() {
197
0
                    self.ignoring = true;
198
0
                } else {
199
0
                    self.params.push(self.param);
200
0
                }
201
202
0
                performer.hook(self.params(), self.intermediates(), self.ignoring, byte);
203
            }
204
0
            Action::Put => performer.put(byte),
205
0
            Action::OscStart => {
206
0
                self.osc_raw.clear();
207
0
                self.osc_num_params = 0;
208
0
            }
209
            Action::OscPut => {
210
                #[cfg(feature = "core")]
211
                {
212
                    if self.osc_raw.is_full() {
213
                        return;
214
                    }
215
                }
216
217
0
                let idx = self.osc_raw.len();
218
219
                // Param separator
220
0
                if byte == b';' {
221
0
                    let param_idx = self.osc_num_params;
222
0
                    match param_idx {
223
                        // Only process up to MAX_OSC_PARAMS
224
0
                        MAX_OSC_PARAMS => return,
225
226
                        // First param is special - 0 to current byte index
227
0
                        0 => {
228
0
                            self.osc_params[param_idx] = (0, idx);
229
0
                        }
230
231
                        // All other params depend on previous indexing
232
0
                        _ => {
233
0
                            let prev = self.osc_params[param_idx - 1];
234
0
                            let begin = prev.1;
235
0
                            self.osc_params[param_idx] = (begin, idx);
236
0
                        }
237
                    }
238
239
0
                    self.osc_num_params += 1;
240
0
                } else {
241
0
                    self.osc_raw.push(byte);
242
0
                }
243
            }
244
            Action::OscEnd => {
245
0
                let param_idx = self.osc_num_params;
246
0
                let idx = self.osc_raw.len();
247
248
0
                match param_idx {
249
                    // Finish last parameter if not already maxed
250
0
                    MAX_OSC_PARAMS => (),
251
252
                    // First param is special - 0 to current byte index
253
0
                    0 => {
254
0
                        self.osc_params[param_idx] = (0, idx);
255
0
                        self.osc_num_params += 1;
256
0
                    }
257
258
                    // All other params depend on previous indexing
259
0
                    _ => {
260
0
                        let prev = self.osc_params[param_idx - 1];
261
0
                        let begin = prev.1;
262
0
                        self.osc_params[param_idx] = (begin, idx);
263
0
                        self.osc_num_params += 1;
264
0
                    }
265
                }
266
0
                self.osc_dispatch(performer, byte);
267
            }
268
0
            Action::Unhook => performer.unhook(),
269
            Action::CsiDispatch => {
270
0
                if self.params.is_full() {
271
0
                    self.ignoring = true;
272
0
                } else {
273
0
                    self.params.push(self.param);
274
0
                }
275
276
0
                performer.csi_dispatch(self.params(), self.intermediates(), self.ignoring, byte);
277
            }
278
0
            Action::EscDispatch => {
279
0
                performer.esc_dispatch(self.intermediates(), self.ignoring, byte);
280
0
            }
281
            Action::Collect => {
282
0
                if self.intermediate_idx == MAX_INTERMEDIATES {
283
0
                    self.ignoring = true;
284
0
                } else {
285
0
                    self.intermediates[self.intermediate_idx] = byte;
286
0
                    self.intermediate_idx += 1;
287
0
                }
288
            }
289
            Action::Param => {
290
0
                if self.params.is_full() {
291
0
                    self.ignoring = true;
292
0
                    return;
293
0
                }
294
295
0
                if byte == b';' {
296
0
                    self.params.push(self.param);
297
0
                    self.param = 0;
298
0
                } else if byte == b':' {
299
0
                    self.params.extend(self.param);
300
0
                    self.param = 0;
301
0
                } else {
302
0
                    // Continue collecting bytes into param
303
0
                    self.param = self.param.saturating_mul(10);
304
0
                    self.param = self.param.saturating_add((byte - b'0') as u16);
305
0
                }
306
            }
307
0
            Action::Clear => {
308
0
                // Reset everything on ESC/CSI/DCS entry
309
0
                self.intermediate_idx = 0;
310
0
                self.ignoring = false;
311
0
                self.param = 0;
312
0
313
0
                self.params.clear();
314
0
            }
315
0
            Action::BeginUtf8 => self.process_utf8(performer, byte),
316
0
            Action::Ignore => (),
317
0
            Action::Nop => (),
318
        }
319
0
    }
320
}
321
322
/// Build a `char` out of bytes
323
pub trait CharAccumulator: Default {
324
    /// Build a `char` out of bytes
325
    ///
326
    /// Return `None` when more data is needed
327
    fn add(&mut self, byte: u8) -> Option<char>;
328
}
329
330
/// Most flexible [`CharAccumulator`] for [`Parser`] based on active features
331
#[cfg(feature = "utf8")]
332
pub type DefaultCharAccumulator = Utf8Parser;
333
#[cfg(not(feature = "utf8"))]
334
pub type DefaultCharAccumulator = AsciiParser;
335
336
/// Only allow parsing 7-bit ASCII
337
#[allow(clippy::exhaustive_structs)]
338
#[derive(Default, Clone, Debug, PartialEq, Eq)]
339
pub struct AsciiParser;
340
341
impl CharAccumulator for AsciiParser {
342
0
    fn add(&mut self, _byte: u8) -> Option<char> {
343
0
        unreachable!("multi-byte UTF8 characters are unsupported")
344
    }
345
}
346
347
/// Allow parsing UTF-8
348
#[cfg(feature = "utf8")]
349
#[derive(Default, Clone, Debug, PartialEq, Eq)]
350
pub struct Utf8Parser {
351
    utf8_parser: utf8::Parser,
352
}
353
354
#[cfg(feature = "utf8")]
355
impl CharAccumulator for Utf8Parser {
356
0
    fn add(&mut self, byte: u8) -> Option<char> {
357
0
        let mut c = None;
358
0
        let mut receiver = VtUtf8Receiver(&mut c);
359
0
        self.utf8_parser.advance(&mut receiver, byte);
360
0
        c
361
0
    }
362
}
363
364
#[cfg(feature = "utf8")]
365
struct VtUtf8Receiver<'a>(&'a mut Option<char>);
366
367
#[cfg(feature = "utf8")]
368
impl utf8::Receiver for VtUtf8Receiver<'_> {
369
0
    fn codepoint(&mut self, c: char) {
370
0
        *self.0 = Some(c);
371
0
    }
372
373
0
    fn invalid_sequence(&mut self) {
374
0
        *self.0 = Some('�');
375
0
    }
376
}
377
378
/// Performs actions requested by the [`Parser`]
379
///
380
/// Actions in this case mean, for example, handling a CSI escape sequence describing cursor
381
/// movement, or simply printing characters to the screen.
382
///
383
/// The methods on this type correspond to actions described in
384
/// <http://vt100.net/emu/dec_ansi_parser>. I've done my best to describe them in
385
/// a useful way in my own words for completeness, but the site should be
386
/// referenced if something isn't clear. If the site disappears at some point in
387
/// the future, consider checking archive.org.
388
pub trait Perform {
389
    /// Draw a character to the screen and update states.
390
0
    fn print(&mut self, _c: char) {}
391
392
    /// Execute a C0 or C1 control function.
393
0
    fn execute(&mut self, _byte: u8) {}
394
395
    /// Invoked when a final character arrives in first part of device control string.
396
    ///
397
    /// The control function should be determined from the private marker, final character, and
398
    /// execute with a parameter list. A handler should be selected for remaining characters in the
399
    /// string; the handler function should subsequently be called by `put` for every character in
400
    /// the control string.
401
    ///
402
    /// The `ignore` flag indicates that more than two intermediates arrived and
403
    /// subsequent characters were ignored.
404
0
    fn hook(&mut self, _params: &Params, _intermediates: &[u8], _ignore: bool, _action: u8) {}
405
406
    /// Pass bytes as part of a device control string to the handle chosen in `hook`. C0 controls
407
    /// will also be passed to the handler.
408
0
    fn put(&mut self, _byte: u8) {}
409
410
    /// Called when a device control string is terminated.
411
    ///
412
    /// The previously selected handler should be notified that the DCS has
413
    /// terminated.
414
0
    fn unhook(&mut self) {}
415
416
    /// Dispatch an operating system command.
417
0
    fn osc_dispatch(&mut self, _params: &[&[u8]], _bell_terminated: bool) {}
418
419
    /// A final character has arrived for a CSI sequence
420
    ///
421
    /// The `ignore` flag indicates that either more than two intermediates arrived
422
    /// or the number of parameters exceeded the maximum supported length,
423
    /// and subsequent characters were ignored.
424
0
    fn csi_dispatch(
425
0
        &mut self,
426
0
        _params: &Params,
427
0
        _intermediates: &[u8],
428
0
        _ignore: bool,
429
0
        _action: u8,
430
0
    ) {
431
0
    }
432
433
    /// The final character of an escape sequence has arrived.
434
    ///
435
    /// The `ignore` flag indicates that more than two intermediates arrived and
436
    /// subsequent characters were ignored.
437
0
    fn esc_dispatch(&mut self, _intermediates: &[u8], _ignore: bool, _byte: u8) {}
438
}
439
440
#[doc = include_str!("../README.md")]
441
#[cfg(doctest)]
442
pub struct ReadmeDoctests;