/rust/registry/src/index.crates.io-1949cf8c6b5b557f/anstyle-parse-0.2.7/src/lib.rs
Line | Count | Source |
1 | | //! Parser for implementing virtual terminal emulators |
2 | | //! |
3 | | //! [`Parser`] is implemented according to [Paul Williams' ANSI parser |
4 | | //! state machine]. The state machine doesn't assign meaning to the parsed data |
5 | | //! and is thus not itself sufficient for writing a terminal emulator. Instead, |
6 | | //! it is expected that an implementation of [`Perform`] is provided which does |
7 | | //! something useful with the parsed data. The [`Parser`] handles the book |
8 | | //! keeping, and the [`Perform`] gets to simply handle actions. |
9 | | //! |
10 | | //! # Examples |
11 | | //! |
12 | | //! For an example of using the [`Parser`] please see the examples folder. The example included |
13 | | //! there simply logs all the actions [`Perform`] does. One quick thing to see it in action is to |
14 | | //! pipe `vim` into it |
15 | | //! |
16 | | //! ```sh |
17 | | //! cargo build --release --example parselog |
18 | | //! vim | target/release/examples/parselog |
19 | | //! ``` |
20 | | //! |
21 | | //! Just type `:q` to exit. |
22 | | //! |
23 | | //! # Differences from original state machine description |
24 | | //! |
25 | | //! * UTF-8 Support for Input |
26 | | //! * OSC Strings can be terminated by 0x07 |
27 | | //! * Only supports 7-bit codes. Some 8-bit codes are still supported, but they no longer work in |
28 | | //! all states. |
29 | | //! |
30 | | //! [Paul Williams' ANSI parser state machine]: https://vt100.net/emu/dec_ansi_parser |
31 | | #![cfg_attr(not(test), no_std)] |
32 | | #![cfg_attr(docsrs, feature(doc_auto_cfg))] |
33 | | #![allow(missing_docs)] |
34 | | #![warn(clippy::print_stderr)] |
35 | | #![warn(clippy::print_stdout)] |
36 | | |
37 | | #[cfg(not(feature = "core"))] |
38 | | extern crate alloc; |
39 | | |
40 | | use core::mem::MaybeUninit; |
41 | | |
42 | | #[cfg(feature = "core")] |
43 | | use arrayvec::ArrayVec; |
44 | | #[cfg(feature = "utf8")] |
45 | | use utf8parse as utf8; |
46 | | |
47 | | mod params; |
48 | | pub mod state; |
49 | | |
50 | | pub use params::{Params, ParamsIter}; |
51 | | |
52 | | use state::{state_change, Action, State}; |
53 | | |
54 | | const MAX_INTERMEDIATES: usize = 2; |
55 | | const MAX_OSC_PARAMS: usize = 16; |
56 | | #[cfg(feature = "core")] |
57 | | const MAX_OSC_RAW: usize = 1024; |
58 | | |
59 | | /// Parser for raw _VTE_ protocol which delegates actions to a [`Perform`] |
60 | | #[allow(unused_qualifications)] |
61 | | #[derive(Default, Clone, Debug, PartialEq, Eq)] |
62 | | pub struct Parser<C = DefaultCharAccumulator> { |
63 | | state: State, |
64 | | intermediates: [u8; MAX_INTERMEDIATES], |
65 | | intermediate_idx: usize, |
66 | | params: Params, |
67 | | param: u16, |
68 | | #[cfg(feature = "core")] |
69 | | osc_raw: ArrayVec<u8, MAX_OSC_RAW>, |
70 | | #[cfg(not(feature = "core"))] |
71 | | osc_raw: alloc::vec::Vec<u8>, |
72 | | osc_params: [(usize, usize); MAX_OSC_PARAMS], |
73 | | osc_num_params: usize, |
74 | | ignoring: bool, |
75 | | utf8_parser: C, |
76 | | } |
77 | | |
78 | | impl<C> Parser<C> |
79 | | where |
80 | | C: CharAccumulator, |
81 | | { |
82 | | /// Create a new Parser |
83 | 0 | pub fn new() -> Parser { |
84 | 0 | Parser::default() |
85 | 0 | } |
86 | | |
87 | | #[inline] |
88 | 0 | fn params(&self) -> &Params { |
89 | 0 | &self.params |
90 | 0 | } |
91 | | |
92 | | #[inline] |
93 | 0 | fn intermediates(&self) -> &[u8] { |
94 | 0 | &self.intermediates[..self.intermediate_idx] |
95 | 0 | } |
96 | | |
97 | | /// Advance the parser state |
98 | | /// |
99 | | /// Requires a [`Perform`] in case `byte` triggers an action |
100 | | #[inline] |
101 | 0 | pub fn advance<P: Perform>(&mut self, performer: &mut P, byte: u8) { |
102 | | // Utf8 characters are handled out-of-band. |
103 | 0 | if let State::Utf8 = self.state { |
104 | 0 | self.process_utf8(performer, byte); |
105 | 0 | return; |
106 | 0 | } |
107 | | |
108 | 0 | let (state, action) = state_change(self.state, byte); |
109 | 0 | self.perform_state_change(performer, state, action, byte); |
110 | 0 | } |
111 | | |
112 | | #[inline] |
113 | 0 | fn process_utf8<P>(&mut self, performer: &mut P, byte: u8) |
114 | 0 | where |
115 | 0 | P: Perform, |
116 | | { |
117 | 0 | if let Some(c) = self.utf8_parser.add(byte) { |
118 | 0 | performer.print(c); |
119 | 0 | self.state = State::Ground; |
120 | 0 | } |
121 | 0 | } |
122 | | |
123 | | #[inline] |
124 | 0 | fn perform_state_change<P>(&mut self, performer: &mut P, state: State, action: Action, byte: u8) |
125 | 0 | where |
126 | 0 | P: Perform, |
127 | | { |
128 | 0 | match state { |
129 | 0 | State::Anywhere => { |
130 | 0 | // Just run the action |
131 | 0 | self.perform_action(performer, action, byte); |
132 | 0 | } |
133 | 0 | state => { |
134 | 0 | match self.state { |
135 | 0 | State::DcsPassthrough => { |
136 | 0 | self.perform_action(performer, Action::Unhook, byte); |
137 | 0 | } |
138 | 0 | State::OscString => { |
139 | 0 | self.perform_action(performer, Action::OscEnd, byte); |
140 | 0 | } |
141 | 0 | _ => (), |
142 | | } |
143 | | |
144 | 0 | match action { |
145 | 0 | Action::Nop => (), |
146 | 0 | action => { |
147 | 0 | self.perform_action(performer, action, byte); |
148 | 0 | } |
149 | | } |
150 | | |
151 | 0 | match state { |
152 | 0 | State::CsiEntry | State::DcsEntry | State::Escape => { |
153 | 0 | self.perform_action(performer, Action::Clear, byte); |
154 | 0 | } |
155 | 0 | State::DcsPassthrough => { |
156 | 0 | self.perform_action(performer, Action::Hook, byte); |
157 | 0 | } |
158 | 0 | State::OscString => { |
159 | 0 | self.perform_action(performer, Action::OscStart, byte); |
160 | 0 | } |
161 | 0 | _ => (), |
162 | | } |
163 | | |
164 | | // Assume the new state |
165 | 0 | self.state = state; |
166 | | } |
167 | | } |
168 | 0 | } |
169 | | |
170 | | /// Separate method for `osc_dispatch` that borrows self as read-only |
171 | | /// |
172 | | /// The aliasing is needed here for multiple slices into `self.osc_raw` |
173 | | #[inline] |
174 | 0 | fn osc_dispatch<P: Perform>(&self, performer: &mut P, byte: u8) { |
175 | 0 | let mut slices: [MaybeUninit<&[u8]>; MAX_OSC_PARAMS] = |
176 | 0 | unsafe { MaybeUninit::uninit().assume_init() }; |
177 | | |
178 | 0 | for (i, slice) in slices.iter_mut().enumerate().take(self.osc_num_params) { |
179 | 0 | let indices = self.osc_params[i]; |
180 | 0 | *slice = MaybeUninit::new(&self.osc_raw[indices.0..indices.1]); |
181 | 0 | } |
182 | | |
183 | 0 | unsafe { |
184 | 0 | let num_params = self.osc_num_params; |
185 | 0 | let params = &slices[..num_params] as *const [MaybeUninit<&[u8]>] as *const [&[u8]]; |
186 | 0 | performer.osc_dispatch(&*params, byte == 0x07); |
187 | 0 | } |
188 | 0 | } |
189 | | |
190 | | #[inline] |
191 | 0 | fn perform_action<P: Perform>(&mut self, performer: &mut P, action: Action, byte: u8) { |
192 | 0 | match action { |
193 | 0 | Action::Print => performer.print(byte as char), |
194 | 0 | Action::Execute => performer.execute(byte), |
195 | | Action::Hook => { |
196 | 0 | if self.params.is_full() { |
197 | 0 | self.ignoring = true; |
198 | 0 | } else { |
199 | 0 | self.params.push(self.param); |
200 | 0 | } |
201 | | |
202 | 0 | performer.hook(self.params(), self.intermediates(), self.ignoring, byte); |
203 | | } |
204 | 0 | Action::Put => performer.put(byte), |
205 | 0 | Action::OscStart => { |
206 | 0 | self.osc_raw.clear(); |
207 | 0 | self.osc_num_params = 0; |
208 | 0 | } |
209 | | Action::OscPut => { |
210 | | #[cfg(feature = "core")] |
211 | | { |
212 | | if self.osc_raw.is_full() { |
213 | | return; |
214 | | } |
215 | | } |
216 | | |
217 | 0 | let idx = self.osc_raw.len(); |
218 | | |
219 | | // Param separator |
220 | 0 | if byte == b';' { |
221 | 0 | let param_idx = self.osc_num_params; |
222 | 0 | match param_idx { |
223 | | // Only process up to MAX_OSC_PARAMS |
224 | 0 | MAX_OSC_PARAMS => return, |
225 | | |
226 | | // First param is special - 0 to current byte index |
227 | 0 | 0 => { |
228 | 0 | self.osc_params[param_idx] = (0, idx); |
229 | 0 | } |
230 | | |
231 | | // All other params depend on previous indexing |
232 | 0 | _ => { |
233 | 0 | let prev = self.osc_params[param_idx - 1]; |
234 | 0 | let begin = prev.1; |
235 | 0 | self.osc_params[param_idx] = (begin, idx); |
236 | 0 | } |
237 | | } |
238 | | |
239 | 0 | self.osc_num_params += 1; |
240 | 0 | } else { |
241 | 0 | self.osc_raw.push(byte); |
242 | 0 | } |
243 | | } |
244 | | Action::OscEnd => { |
245 | 0 | let param_idx = self.osc_num_params; |
246 | 0 | let idx = self.osc_raw.len(); |
247 | | |
248 | 0 | match param_idx { |
249 | | // Finish last parameter if not already maxed |
250 | 0 | MAX_OSC_PARAMS => (), |
251 | | |
252 | | // First param is special - 0 to current byte index |
253 | 0 | 0 => { |
254 | 0 | self.osc_params[param_idx] = (0, idx); |
255 | 0 | self.osc_num_params += 1; |
256 | 0 | } |
257 | | |
258 | | // All other params depend on previous indexing |
259 | 0 | _ => { |
260 | 0 | let prev = self.osc_params[param_idx - 1]; |
261 | 0 | let begin = prev.1; |
262 | 0 | self.osc_params[param_idx] = (begin, idx); |
263 | 0 | self.osc_num_params += 1; |
264 | 0 | } |
265 | | } |
266 | 0 | self.osc_dispatch(performer, byte); |
267 | | } |
268 | 0 | Action::Unhook => performer.unhook(), |
269 | | Action::CsiDispatch => { |
270 | 0 | if self.params.is_full() { |
271 | 0 | self.ignoring = true; |
272 | 0 | } else { |
273 | 0 | self.params.push(self.param); |
274 | 0 | } |
275 | | |
276 | 0 | performer.csi_dispatch(self.params(), self.intermediates(), self.ignoring, byte); |
277 | | } |
278 | 0 | Action::EscDispatch => { |
279 | 0 | performer.esc_dispatch(self.intermediates(), self.ignoring, byte); |
280 | 0 | } |
281 | | Action::Collect => { |
282 | 0 | if self.intermediate_idx == MAX_INTERMEDIATES { |
283 | 0 | self.ignoring = true; |
284 | 0 | } else { |
285 | 0 | self.intermediates[self.intermediate_idx] = byte; |
286 | 0 | self.intermediate_idx += 1; |
287 | 0 | } |
288 | | } |
289 | | Action::Param => { |
290 | 0 | if self.params.is_full() { |
291 | 0 | self.ignoring = true; |
292 | 0 | return; |
293 | 0 | } |
294 | | |
295 | 0 | if byte == b';' { |
296 | 0 | self.params.push(self.param); |
297 | 0 | self.param = 0; |
298 | 0 | } else if byte == b':' { |
299 | 0 | self.params.extend(self.param); |
300 | 0 | self.param = 0; |
301 | 0 | } else { |
302 | 0 | // Continue collecting bytes into param |
303 | 0 | self.param = self.param.saturating_mul(10); |
304 | 0 | self.param = self.param.saturating_add((byte - b'0') as u16); |
305 | 0 | } |
306 | | } |
307 | 0 | Action::Clear => { |
308 | 0 | // Reset everything on ESC/CSI/DCS entry |
309 | 0 | self.intermediate_idx = 0; |
310 | 0 | self.ignoring = false; |
311 | 0 | self.param = 0; |
312 | 0 |
|
313 | 0 | self.params.clear(); |
314 | 0 | } |
315 | 0 | Action::BeginUtf8 => self.process_utf8(performer, byte), |
316 | 0 | Action::Ignore => (), |
317 | 0 | Action::Nop => (), |
318 | | } |
319 | 0 | } |
320 | | } |
321 | | |
322 | | /// Build a `char` out of bytes |
323 | | pub trait CharAccumulator: Default { |
324 | | /// Build a `char` out of bytes |
325 | | /// |
326 | | /// Return `None` when more data is needed |
327 | | fn add(&mut self, byte: u8) -> Option<char>; |
328 | | } |
329 | | |
330 | | /// Most flexible [`CharAccumulator`] for [`Parser`] based on active features |
331 | | #[cfg(feature = "utf8")] |
332 | | pub type DefaultCharAccumulator = Utf8Parser; |
333 | | #[cfg(not(feature = "utf8"))] |
334 | | pub type DefaultCharAccumulator = AsciiParser; |
335 | | |
336 | | /// Only allow parsing 7-bit ASCII |
337 | | #[allow(clippy::exhaustive_structs)] |
338 | | #[derive(Default, Clone, Debug, PartialEq, Eq)] |
339 | | pub struct AsciiParser; |
340 | | |
341 | | impl CharAccumulator for AsciiParser { |
342 | 0 | fn add(&mut self, _byte: u8) -> Option<char> { |
343 | 0 | unreachable!("multi-byte UTF8 characters are unsupported") |
344 | | } |
345 | | } |
346 | | |
347 | | /// Allow parsing UTF-8 |
348 | | #[cfg(feature = "utf8")] |
349 | | #[derive(Default, Clone, Debug, PartialEq, Eq)] |
350 | | pub struct Utf8Parser { |
351 | | utf8_parser: utf8::Parser, |
352 | | } |
353 | | |
354 | | #[cfg(feature = "utf8")] |
355 | | impl CharAccumulator for Utf8Parser { |
356 | 0 | fn add(&mut self, byte: u8) -> Option<char> { |
357 | 0 | let mut c = None; |
358 | 0 | let mut receiver = VtUtf8Receiver(&mut c); |
359 | 0 | self.utf8_parser.advance(&mut receiver, byte); |
360 | 0 | c |
361 | 0 | } |
362 | | } |
363 | | |
364 | | #[cfg(feature = "utf8")] |
365 | | struct VtUtf8Receiver<'a>(&'a mut Option<char>); |
366 | | |
367 | | #[cfg(feature = "utf8")] |
368 | | impl utf8::Receiver for VtUtf8Receiver<'_> { |
369 | 0 | fn codepoint(&mut self, c: char) { |
370 | 0 | *self.0 = Some(c); |
371 | 0 | } |
372 | | |
373 | 0 | fn invalid_sequence(&mut self) { |
374 | 0 | *self.0 = Some('�'); |
375 | 0 | } |
376 | | } |
377 | | |
378 | | /// Performs actions requested by the [`Parser`] |
379 | | /// |
380 | | /// Actions in this case mean, for example, handling a CSI escape sequence describing cursor |
381 | | /// movement, or simply printing characters to the screen. |
382 | | /// |
383 | | /// The methods on this type correspond to actions described in |
384 | | /// <http://vt100.net/emu/dec_ansi_parser>. I've done my best to describe them in |
385 | | /// a useful way in my own words for completeness, but the site should be |
386 | | /// referenced if something isn't clear. If the site disappears at some point in |
387 | | /// the future, consider checking archive.org. |
388 | | pub trait Perform { |
389 | | /// Draw a character to the screen and update states. |
390 | 0 | fn print(&mut self, _c: char) {} |
391 | | |
392 | | /// Execute a C0 or C1 control function. |
393 | 0 | fn execute(&mut self, _byte: u8) {} |
394 | | |
395 | | /// Invoked when a final character arrives in first part of device control string. |
396 | | /// |
397 | | /// The control function should be determined from the private marker, final character, and |
398 | | /// execute with a parameter list. A handler should be selected for remaining characters in the |
399 | | /// string; the handler function should subsequently be called by `put` for every character in |
400 | | /// the control string. |
401 | | /// |
402 | | /// The `ignore` flag indicates that more than two intermediates arrived and |
403 | | /// subsequent characters were ignored. |
404 | 0 | fn hook(&mut self, _params: &Params, _intermediates: &[u8], _ignore: bool, _action: u8) {} |
405 | | |
406 | | /// Pass bytes as part of a device control string to the handle chosen in `hook`. C0 controls |
407 | | /// will also be passed to the handler. |
408 | 0 | fn put(&mut self, _byte: u8) {} |
409 | | |
410 | | /// Called when a device control string is terminated. |
411 | | /// |
412 | | /// The previously selected handler should be notified that the DCS has |
413 | | /// terminated. |
414 | 0 | fn unhook(&mut self) {} |
415 | | |
416 | | /// Dispatch an operating system command. |
417 | 0 | fn osc_dispatch(&mut self, _params: &[&[u8]], _bell_terminated: bool) {} |
418 | | |
419 | | /// A final character has arrived for a CSI sequence |
420 | | /// |
421 | | /// The `ignore` flag indicates that either more than two intermediates arrived |
422 | | /// or the number of parameters exceeded the maximum supported length, |
423 | | /// and subsequent characters were ignored. |
424 | 0 | fn csi_dispatch( |
425 | 0 | &mut self, |
426 | 0 | _params: &Params, |
427 | 0 | _intermediates: &[u8], |
428 | 0 | _ignore: bool, |
429 | 0 | _action: u8, |
430 | 0 | ) { |
431 | 0 | } |
432 | | |
433 | | /// The final character of an escape sequence has arrived. |
434 | | /// |
435 | | /// The `ignore` flag indicates that more than two intermediates arrived and |
436 | | /// subsequent characters were ignored. |
437 | 0 | fn esc_dispatch(&mut self, _intermediates: &[u8], _ignore: bool, _byte: u8) {} |
438 | | } |
439 | | |
440 | | #[doc = include_str!("../README.md")] |
441 | | #[cfg(doctest)] |
442 | | pub struct ReadmeDoctests; |