Coverage Report

Created: 2025-09-27 06:56

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/rust/registry/src/index.crates.io-1949cf8c6b5b557f/utf8parse-0.2.2/src/lib.rs
Line
Count
Source
1
//! A table-driven UTF-8 Parser
2
//!
3
//! This module implements a table-driven UTF-8 parser which should
4
//! theoretically contain the minimal number of branches (1). The only branch is
5
//! on the `Action` returned from unpacking a transition.
6
#![deny(clippy::all, clippy::if_not_else, clippy::enum_glob_use)]
7
#![cfg_attr(all(feature = "nightly", test), feature(test))]
8
#![no_std]
9
10
use core::char;
11
12
mod types;
13
14
use types::{Action, State};
15
16
/// Handles codepoint and invalid sequence events from the parser.
17
pub trait Receiver {
18
    /// Called whenever a codepoint is parsed successfully
19
    fn codepoint(&mut self, _: char);
20
21
    /// Called when an invalid_sequence is detected
22
    fn invalid_sequence(&mut self);
23
}
24
25
/// A parser for Utf8 Characters
26
///
27
/// Repeatedly call `advance` with bytes to emit Utf8 characters
28
#[derive(Clone, Default, PartialEq, Eq, Debug)]
29
pub struct Parser {
30
    point: u32,
31
    state: State,
32
}
33
34
/// Continuation bytes are masked with this value.
35
const CONTINUATION_MASK: u8 = 0b0011_1111;
36
37
impl Parser {
38
    /// Create a new Parser
39
    pub fn new() -> Parser {
40
        Parser { point: 0, state: State::Ground }
41
    }
42
43
    /// Advance the parser
44
    ///
45
    /// The provider receiver will be called whenever a codepoint is completed or an invalid
46
    /// sequence is detected.
47
0
    pub fn advance<R>(&mut self, receiver: &mut R, byte: u8)
48
0
    where
49
0
        R: Receiver,
50
    {
51
0
        let (state, action) = self.state.advance(byte);
52
0
        self.perform_action(receiver, byte, action);
53
0
        self.state = state;
54
0
    }
Unexecuted instantiation: <utf8parse::Parser>::advance::<anstream::adapter::strip::VtUtf8Receiver>
Unexecuted instantiation: <utf8parse::Parser>::advance::<anstyle_parse::VtUtf8Receiver>
55
56
0
    fn perform_action<R>(&mut self, receiver: &mut R, byte: u8, action: Action)
57
0
    where
58
0
        R: Receiver,
59
    {
60
0
        match action {
61
0
            Action::InvalidSequence => {
62
0
                self.point = 0;
63
0
                receiver.invalid_sequence();
64
0
            },
65
0
            Action::EmitByte => {
66
0
                receiver.codepoint(byte as char);
67
0
            },
68
0
            Action::SetByte1 => {
69
0
                let point = self.point | ((byte & CONTINUATION_MASK) as u32);
70
0
                let c = unsafe { char::from_u32_unchecked(point) };
71
0
                self.point = 0;
72
0
73
0
                receiver.codepoint(c);
74
0
            },
75
0
            Action::SetByte2 => {
76
0
                self.point |= ((byte & CONTINUATION_MASK) as u32) << 6;
77
0
            },
78
0
            Action::SetByte2Top => {
79
0
                self.point |= ((byte & 0b0001_1111) as u32) << 6;
80
0
            },
81
0
            Action::SetByte3 => {
82
0
                self.point |= ((byte & CONTINUATION_MASK) as u32) << 12;
83
0
            },
84
0
            Action::SetByte3Top => {
85
0
                self.point |= ((byte & 0b0000_1111) as u32) << 12;
86
0
            },
87
0
            Action::SetByte4 => {
88
0
                self.point |= ((byte & 0b0000_0111) as u32) << 18;
89
0
            },
90
        }
91
0
    }
Unexecuted instantiation: <utf8parse::Parser>::perform_action::<anstream::adapter::strip::VtUtf8Receiver>
Unexecuted instantiation: <utf8parse::Parser>::perform_action::<anstyle_parse::VtUtf8Receiver>
92
}
93
94
#[cfg(all(feature = "nightly", test))]
95
mod benches {
96
    extern crate std;
97
    extern crate test;
98
99
    use super::{Parser, Receiver};
100
101
    use self::test::{black_box, Bencher};
102
103
    static UTF8_DEMO: &[u8] = include_bytes!("../tests/UTF-8-demo.txt");
104
105
    impl Receiver for () {
106
        fn codepoint(&mut self, c: char) {
107
            black_box(c);
108
        }
109
110
        fn invalid_sequence(&mut self) {}
111
    }
112
113
    #[bench]
114
    fn parse_bench_utf8_demo(b: &mut Bencher) {
115
        let mut parser = Parser::new();
116
117
        b.iter(|| {
118
            for byte in UTF8_DEMO {
119
                parser.advance(&mut (), *byte);
120
            }
121
        })
122
    }
123
124
    #[bench]
125
    fn std_string_parse_utf8(b: &mut Bencher) {
126
        b.iter(|| {
127
            for c in std::str::from_utf8(UTF8_DEMO).unwrap().chars() {
128
                black_box(c);
129
            }
130
        });
131
    }
132
}