Coverage Report

Created: 2025-10-13 06:48

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/rust/registry/src/index.crates.io-1949cf8c6b5b557f/simdutf8-0.1.5/src/basic.rs
Line
Count
Source
1
//! The `basic` API flavor provides barebones UTF-8 checking at the highest speed.
2
//!
3
//! It is fastest on valid UTF-8, but only checks for errors after processing the whole byte sequence
4
//! and does not provide detailed information if the data is not valid UTF-8. [`Utf8Error`] is a zero-sized error struct.
5
//!
6
//! If you need detailed error information use the functions from the [`crate::compat`] module instead.
7
8
use core::str::{from_utf8_unchecked, from_utf8_unchecked_mut};
9
10
use crate::implementation::validate_utf8_basic;
11
12
/// Simple zero-sized UTF-8 error.
13
///
14
/// No information is provided where the error occurred or how long the invalid byte
15
/// byte sequence is.
16
#[derive(Copy, Eq, PartialEq, Clone, Debug)]
17
pub struct Utf8Error;
18
19
impl core::fmt::Display for Utf8Error {
20
0
    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
21
0
        f.write_str("invalid utf-8 sequence")
22
0
    }
23
}
24
25
#[cfg(feature = "std")]
26
impl std::error::Error for Utf8Error {}
27
28
/// Analogue to [`std::str::from_utf8()`].
29
///
30
/// Checks if the passed byte sequence is valid UTF-8 and returns an
31
/// [`std::str`] reference to the passed byte slice wrapped in `Ok()` if it is.
32
///
33
/// # Errors
34
/// Will return the zero-sized Err([`Utf8Error`]) on if the input contains invalid UTF-8.
35
#[inline]
36
15.4M
pub fn from_utf8(input: &[u8]) -> Result<&str, Utf8Error> {
37
    unsafe {
38
15.4M
        validate_utf8_basic(input)?;
39
15.2M
        Ok(from_utf8_unchecked(input))
40
    }
41
15.4M
}
simdutf8::basic::from_utf8
Line
Count
Source
36
15.4M
pub fn from_utf8(input: &[u8]) -> Result<&str, Utf8Error> {
37
    unsafe {
38
15.4M
        validate_utf8_basic(input)?;
39
15.2M
        Ok(from_utf8_unchecked(input))
40
    }
41
15.4M
}
Unexecuted instantiation: simdutf8::basic::from_utf8
42
43
/// Analogue to [`std::str::from_utf8_mut()`].
44
///
45
/// Checks if the passed mutable byte sequence is valid UTF-8 and returns a mutable
46
/// [`std::str`] reference to the passed byte slice wrapped in `Ok()` if it is.
47
///
48
/// # Errors
49
/// Will return the zero-sized Err([`Utf8Error`]) on if the input contains invalid UTF-8.
50
#[inline]
51
0
pub fn from_utf8_mut(input: &mut [u8]) -> Result<&mut str, Utf8Error> {
52
    unsafe {
53
0
        validate_utf8_basic(input)?;
54
0
        Ok(from_utf8_unchecked_mut(input))
55
    }
56
0
}
57
58
/// Allows direct access to the platform-specific unsafe validation implementations.
59
#[cfg(feature = "public_imp")]
60
pub mod imp {
61
    use crate::basic;
62
63
    /// A low-level interface for streaming validation of UTF-8 data. It is meant to be integrated
64
    /// in high-performance data processing pipelines.
65
    ///
66
    /// Data can be streamed in arbitrarily-sized chunks using the [`Self::update()`] method. There is
67
    /// no way to find out if the input so far was valid UTF-8 during the validation. Only when
68
    /// the validation is completed with the [`Self::finalize()`] method the result of the validation is
69
    /// returned. Use [`ChunkedUtf8Validator`] if possible for highest performance.
70
    ///
71
    /// This implementation requires CPU SIMD features specified by the module it resides in.
72
    /// It is undefined behavior to use it if the required CPU features are not available which
73
    /// is why all trait methods are `unsafe`.
74
    ///
75
    /// General usage:
76
    /// ```rust
77
    /// use simdutf8::basic::imp::Utf8Validator;
78
    /// use std::io::{stdin, Read, Result};
79
    ///
80
    /// # #[cfg(target_arch = "x86_64")]
81
    /// fn main() -> Result<()> {
82
    ///     unsafe {
83
    ///         if !std::is_x86_feature_detected!("avx2") {
84
    ///             panic!("This example only works with CPUs supporting AVX 2");
85
    ///         }
86
    ///
87
    ///         let mut validator = simdutf8::basic::imp::x86::avx2::Utf8ValidatorImp::new();
88
    ///         let mut buf = vec![0; 8192];
89
    ///         loop {
90
    ///             let bytes_read = stdin().read(buf.as_mut())?;
91
    ///             if bytes_read == 0 {
92
    ///                 break;
93
    ///             }
94
    ///             validator.update(&buf);
95
    ///         }
96
    ///
97
    ///         if validator.finalize().is_ok() {
98
    ///             println!("Input is valid UTF-8");
99
    ///         } else {
100
    ///             println!("Input is not valid UTF-8");
101
    ///         }
102
    ///     }
103
    ///
104
    ///     Ok(())
105
    /// }
106
    ///
107
    /// # #[cfg(not(target_arch = "x86_64"))]
108
    /// # fn main() { }
109
    /// ```
110
    ///
111
    pub trait Utf8Validator {
112
        /// Creates a new validator.
113
        ///
114
        /// # Safety
115
        /// This implementation requires CPU SIMD features specified by the module it resides in.
116
        /// It is undefined behavior to call it if the required CPU features are not available.
117
        #[must_use]
118
        unsafe fn new() -> Self
119
        where
120
            Self: Sized;
121
122
        /// Updates the validator with `input`.
123
        ///
124
        /// # Safety
125
        /// This implementation requires CPU SIMD features specified by the module it resides in.
126
        /// It is undefined behavior to call it if the required CPU features are not available.
127
        unsafe fn update(&mut self, input: &[u8]);
128
129
        /// Finishes the validation and returns `Ok(())` if the input was valid UTF-8.
130
        ///
131
        /// # Errors
132
        /// A [`basic::Utf8Error`] is returned if the input was not valid UTF-8. No
133
        /// further information about the location of the error is provided.
134
        ///
135
        /// # Safety
136
        /// This implementation requires CPU SIMD features specified by the module it resides in.
137
        /// It is undefined behavior to call it if the required CPU features are not available.
138
        unsafe fn finalize(self) -> core::result::Result<(), basic::Utf8Error>;
139
    }
140
141
    /// Like [`Utf8Validator`] this low-level API is for streaming validation of UTF-8 data.
142
    ///
143
    /// It has additional restrictions imposed on how the input is passed in to allow
144
    /// validation with as little overhead as possible.
145
    ///
146
    /// To feed it data you need to call the [`Self::update_from_chunks()`] method which takes slices which
147
    /// have to be a multiple of 64 bytes long. The method will panic otherwise.  There is
148
    /// no way to find out if the input so far was valid UTF-8 during the validation. Only when
149
    /// the validation is completed with the [`Self::finalize()`] method the result of the validation is
150
    /// returned.
151
    ///
152
    /// The `Self::finalize()` method can be fed the rest of the data. There is no restriction on the
153
    /// data passed to it.
154
    ///
155
    /// This implementation requires CPU SIMD features specified by the module it resides in.
156
    /// It is undefined behavior to use it if the required CPU features are not available which
157
    /// is why all trait methods are `unsafe`.
158
    pub trait ChunkedUtf8Validator {
159
        /// Creates a new validator.
160
        ///
161
        /// # Safety
162
        /// This implementation requires CPU SIMD features specified by the module it resides in.
163
        /// It is undefined behavior to call it if the required CPU features are not available.
164
        #[must_use]
165
        unsafe fn new() -> Self
166
        where
167
            Self: Sized;
168
169
        /// Updates the validator with `input`.
170
        ///
171
        /// # Panics
172
        /// If `input.len()` is not a multiple of 64.
173
        ///
174
        /// # Safety
175
        /// This implementation requires CPU SIMD features specified by the module it resides in.
176
        /// It is undefined behavior to call it if the required CPU features are not available.
177
        unsafe fn update_from_chunks(&mut self, input: &[u8]);
178
179
        /// Updates the validator with remaining input if any. There is no restriction on the
180
        /// data provided.
181
        ///
182
        /// Finishes the validation and returns `Ok(())` if the input was valid UTF-8.
183
        ///
184
        /// # Errors
185
        /// A [`basic::Utf8Error`] is returned if the input was not valid UTF-8. No
186
        /// further information about the location of the error is provided.
187
        ///
188
        /// # Safety
189
        /// This implementation requires CPU SIMD features specified by the module it resides in.
190
        /// It is undefined behavior to call it if the required CPU features are not available.
191
        unsafe fn finalize(
192
            self,
193
            remaining_input: core::option::Option<&[u8]>,
194
        ) -> core::result::Result<(), basic::Utf8Error>;
195
    }
196
197
    /// Includes the x86/x86-64 SIMD implementations.
198
    #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
199
    pub mod x86 {
200
        /// Includes the validation implementation for AVX 2-compatible CPUs.
201
        ///
202
        /// Using the provided functionality on CPUs which do not support AVX 2 is undefined
203
        /// behavior and will very likely cause a crash.
204
        pub mod avx2 {
205
            pub use crate::implementation::x86::avx2::validate_utf8_basic as validate_utf8;
206
            pub use crate::implementation::x86::avx2::ChunkedUtf8ValidatorImp;
207
            pub use crate::implementation::x86::avx2::Utf8ValidatorImp;
208
        }
209
        /// Includes the validation implementation for SSE 4.2-compatible CPUs.
210
        ///
211
        /// Using the provided functionality on CPUs which do not support SSE 4.2 is undefined
212
        /// behavior and will very likely cause a crash.
213
        pub mod sse42 {
214
            pub use crate::implementation::x86::sse42::validate_utf8_basic as validate_utf8;
215
            pub use crate::implementation::x86::sse42::ChunkedUtf8ValidatorImp;
216
            pub use crate::implementation::x86::sse42::Utf8ValidatorImp;
217
        }
218
    }
219
220
    /// Includes the aarch64 SIMD implementations.
221
    #[cfg(all(feature = "aarch64_neon", target_arch = "aarch64"))]
222
    pub mod aarch64 {
223
        /// Includes the Neon-based validation implementation for aarch64 CPUs.
224
        ///
225
        /// Should be supported on all ARM64 CPUSs. If it is not supported by the operating
226
        /// system using it is undefined behavior and will likely cause a crash.
227
        pub mod neon {
228
            pub use crate::implementation::aarch64::neon::validate_utf8_basic as validate_utf8;
229
            pub use crate::implementation::aarch64::neon::ChunkedUtf8ValidatorImp;
230
            pub use crate::implementation::aarch64::neon::Utf8ValidatorImp;
231
        }
232
    }
233
234
    /// Includes the wasm32 SIMD implementations.
235
    #[cfg(all(target_arch = "wasm32", target_feature = "simd128"))]
236
    pub mod wasm32 {
237
        /// Includes the simd128-based validation implementation for WASM runtimes.
238
        ///
239
        /// Using the provided functionality on WASM runtimes that do not support SIMD
240
        /// instructions will likely cause a crash.
241
        pub mod simd128 {
242
            pub use crate::implementation::wasm32::simd128::validate_utf8_basic as validate_utf8;
243
            pub use crate::implementation::wasm32::simd128::ChunkedUtf8ValidatorImp;
244
            pub use crate::implementation::wasm32::simd128::Utf8ValidatorImp;
245
        }
246
    }
247
}