/rust/registry/src/index.crates.io-1949cf8c6b5b557f/simdutf8-0.1.5/src/basic.rs
Line | Count | Source |
1 | | //! The `basic` API flavor provides barebones UTF-8 checking at the highest speed. |
2 | | //! |
3 | | //! It is fastest on valid UTF-8, but only checks for errors after processing the whole byte sequence |
4 | | //! and does not provide detailed information if the data is not valid UTF-8. [`Utf8Error`] is a zero-sized error struct. |
5 | | //! |
6 | | //! If you need detailed error information use the functions from the [`crate::compat`] module instead. |
7 | | |
8 | | use core::str::{from_utf8_unchecked, from_utf8_unchecked_mut}; |
9 | | |
10 | | use crate::implementation::validate_utf8_basic; |
11 | | |
12 | | /// Simple zero-sized UTF-8 error. |
13 | | /// |
14 | | /// No information is provided where the error occurred or how long the invalid byte |
15 | | /// byte sequence is. |
16 | | #[derive(Copy, Eq, PartialEq, Clone, Debug)] |
17 | | pub struct Utf8Error; |
18 | | |
19 | | impl core::fmt::Display for Utf8Error { |
20 | 0 | fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { |
21 | 0 | f.write_str("invalid utf-8 sequence") |
22 | 0 | } |
23 | | } |
24 | | |
25 | | #[cfg(feature = "std")] |
26 | | impl std::error::Error for Utf8Error {} |
27 | | |
28 | | /// Analogue to [`std::str::from_utf8()`]. |
29 | | /// |
30 | | /// Checks if the passed byte sequence is valid UTF-8 and returns an |
31 | | /// [`std::str`] reference to the passed byte slice wrapped in `Ok()` if it is. |
32 | | /// |
33 | | /// # Errors |
34 | | /// Will return the zero-sized Err([`Utf8Error`]) on if the input contains invalid UTF-8. |
35 | | #[inline] |
36 | 15.4M | pub fn from_utf8(input: &[u8]) -> Result<&str, Utf8Error> { |
37 | | unsafe { |
38 | 15.4M | validate_utf8_basic(input)?; |
39 | 15.2M | Ok(from_utf8_unchecked(input)) |
40 | | } |
41 | 15.4M | } simdutf8::basic::from_utf8 Line | Count | Source | 36 | 15.4M | pub fn from_utf8(input: &[u8]) -> Result<&str, Utf8Error> { | 37 | | unsafe { | 38 | 15.4M | validate_utf8_basic(input)?; | 39 | 15.2M | Ok(from_utf8_unchecked(input)) | 40 | | } | 41 | 15.4M | } |
Unexecuted instantiation: simdutf8::basic::from_utf8 |
42 | | |
43 | | /// Analogue to [`std::str::from_utf8_mut()`]. |
44 | | /// |
45 | | /// Checks if the passed mutable byte sequence is valid UTF-8 and returns a mutable |
46 | | /// [`std::str`] reference to the passed byte slice wrapped in `Ok()` if it is. |
47 | | /// |
48 | | /// # Errors |
49 | | /// Will return the zero-sized Err([`Utf8Error`]) on if the input contains invalid UTF-8. |
50 | | #[inline] |
51 | 0 | pub fn from_utf8_mut(input: &mut [u8]) -> Result<&mut str, Utf8Error> { |
52 | | unsafe { |
53 | 0 | validate_utf8_basic(input)?; |
54 | 0 | Ok(from_utf8_unchecked_mut(input)) |
55 | | } |
56 | 0 | } |
57 | | |
58 | | /// Allows direct access to the platform-specific unsafe validation implementations. |
59 | | #[cfg(feature = "public_imp")] |
60 | | pub mod imp { |
61 | | use crate::basic; |
62 | | |
63 | | /// A low-level interface for streaming validation of UTF-8 data. It is meant to be integrated |
64 | | /// in high-performance data processing pipelines. |
65 | | /// |
66 | | /// Data can be streamed in arbitrarily-sized chunks using the [`Self::update()`] method. There is |
67 | | /// no way to find out if the input so far was valid UTF-8 during the validation. Only when |
68 | | /// the validation is completed with the [`Self::finalize()`] method the result of the validation is |
69 | | /// returned. Use [`ChunkedUtf8Validator`] if possible for highest performance. |
70 | | /// |
71 | | /// This implementation requires CPU SIMD features specified by the module it resides in. |
72 | | /// It is undefined behavior to use it if the required CPU features are not available which |
73 | | /// is why all trait methods are `unsafe`. |
74 | | /// |
75 | | /// General usage: |
76 | | /// ```rust |
77 | | /// use simdutf8::basic::imp::Utf8Validator; |
78 | | /// use std::io::{stdin, Read, Result}; |
79 | | /// |
80 | | /// # #[cfg(target_arch = "x86_64")] |
81 | | /// fn main() -> Result<()> { |
82 | | /// unsafe { |
83 | | /// if !std::is_x86_feature_detected!("avx2") { |
84 | | /// panic!("This example only works with CPUs supporting AVX 2"); |
85 | | /// } |
86 | | /// |
87 | | /// let mut validator = simdutf8::basic::imp::x86::avx2::Utf8ValidatorImp::new(); |
88 | | /// let mut buf = vec![0; 8192]; |
89 | | /// loop { |
90 | | /// let bytes_read = stdin().read(buf.as_mut())?; |
91 | | /// if bytes_read == 0 { |
92 | | /// break; |
93 | | /// } |
94 | | /// validator.update(&buf); |
95 | | /// } |
96 | | /// |
97 | | /// if validator.finalize().is_ok() { |
98 | | /// println!("Input is valid UTF-8"); |
99 | | /// } else { |
100 | | /// println!("Input is not valid UTF-8"); |
101 | | /// } |
102 | | /// } |
103 | | /// |
104 | | /// Ok(()) |
105 | | /// } |
106 | | /// |
107 | | /// # #[cfg(not(target_arch = "x86_64"))] |
108 | | /// # fn main() { } |
109 | | /// ``` |
110 | | /// |
111 | | pub trait Utf8Validator { |
112 | | /// Creates a new validator. |
113 | | /// |
114 | | /// # Safety |
115 | | /// This implementation requires CPU SIMD features specified by the module it resides in. |
116 | | /// It is undefined behavior to call it if the required CPU features are not available. |
117 | | #[must_use] |
118 | | unsafe fn new() -> Self |
119 | | where |
120 | | Self: Sized; |
121 | | |
122 | | /// Updates the validator with `input`. |
123 | | /// |
124 | | /// # Safety |
125 | | /// This implementation requires CPU SIMD features specified by the module it resides in. |
126 | | /// It is undefined behavior to call it if the required CPU features are not available. |
127 | | unsafe fn update(&mut self, input: &[u8]); |
128 | | |
129 | | /// Finishes the validation and returns `Ok(())` if the input was valid UTF-8. |
130 | | /// |
131 | | /// # Errors |
132 | | /// A [`basic::Utf8Error`] is returned if the input was not valid UTF-8. No |
133 | | /// further information about the location of the error is provided. |
134 | | /// |
135 | | /// # Safety |
136 | | /// This implementation requires CPU SIMD features specified by the module it resides in. |
137 | | /// It is undefined behavior to call it if the required CPU features are not available. |
138 | | unsafe fn finalize(self) -> core::result::Result<(), basic::Utf8Error>; |
139 | | } |
140 | | |
141 | | /// Like [`Utf8Validator`] this low-level API is for streaming validation of UTF-8 data. |
142 | | /// |
143 | | /// It has additional restrictions imposed on how the input is passed in to allow |
144 | | /// validation with as little overhead as possible. |
145 | | /// |
146 | | /// To feed it data you need to call the [`Self::update_from_chunks()`] method which takes slices which |
147 | | /// have to be a multiple of 64 bytes long. The method will panic otherwise. There is |
148 | | /// no way to find out if the input so far was valid UTF-8 during the validation. Only when |
149 | | /// the validation is completed with the [`Self::finalize()`] method the result of the validation is |
150 | | /// returned. |
151 | | /// |
152 | | /// The `Self::finalize()` method can be fed the rest of the data. There is no restriction on the |
153 | | /// data passed to it. |
154 | | /// |
155 | | /// This implementation requires CPU SIMD features specified by the module it resides in. |
156 | | /// It is undefined behavior to use it if the required CPU features are not available which |
157 | | /// is why all trait methods are `unsafe`. |
158 | | pub trait ChunkedUtf8Validator { |
159 | | /// Creates a new validator. |
160 | | /// |
161 | | /// # Safety |
162 | | /// This implementation requires CPU SIMD features specified by the module it resides in. |
163 | | /// It is undefined behavior to call it if the required CPU features are not available. |
164 | | #[must_use] |
165 | | unsafe fn new() -> Self |
166 | | where |
167 | | Self: Sized; |
168 | | |
169 | | /// Updates the validator with `input`. |
170 | | /// |
171 | | /// # Panics |
172 | | /// If `input.len()` is not a multiple of 64. |
173 | | /// |
174 | | /// # Safety |
175 | | /// This implementation requires CPU SIMD features specified by the module it resides in. |
176 | | /// It is undefined behavior to call it if the required CPU features are not available. |
177 | | unsafe fn update_from_chunks(&mut self, input: &[u8]); |
178 | | |
179 | | /// Updates the validator with remaining input if any. There is no restriction on the |
180 | | /// data provided. |
181 | | /// |
182 | | /// Finishes the validation and returns `Ok(())` if the input was valid UTF-8. |
183 | | /// |
184 | | /// # Errors |
185 | | /// A [`basic::Utf8Error`] is returned if the input was not valid UTF-8. No |
186 | | /// further information about the location of the error is provided. |
187 | | /// |
188 | | /// # Safety |
189 | | /// This implementation requires CPU SIMD features specified by the module it resides in. |
190 | | /// It is undefined behavior to call it if the required CPU features are not available. |
191 | | unsafe fn finalize( |
192 | | self, |
193 | | remaining_input: core::option::Option<&[u8]>, |
194 | | ) -> core::result::Result<(), basic::Utf8Error>; |
195 | | } |
196 | | |
197 | | /// Includes the x86/x86-64 SIMD implementations. |
198 | | #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] |
199 | | pub mod x86 { |
200 | | /// Includes the validation implementation for AVX 2-compatible CPUs. |
201 | | /// |
202 | | /// Using the provided functionality on CPUs which do not support AVX 2 is undefined |
203 | | /// behavior and will very likely cause a crash. |
204 | | pub mod avx2 { |
205 | | pub use crate::implementation::x86::avx2::validate_utf8_basic as validate_utf8; |
206 | | pub use crate::implementation::x86::avx2::ChunkedUtf8ValidatorImp; |
207 | | pub use crate::implementation::x86::avx2::Utf8ValidatorImp; |
208 | | } |
209 | | /// Includes the validation implementation for SSE 4.2-compatible CPUs. |
210 | | /// |
211 | | /// Using the provided functionality on CPUs which do not support SSE 4.2 is undefined |
212 | | /// behavior and will very likely cause a crash. |
213 | | pub mod sse42 { |
214 | | pub use crate::implementation::x86::sse42::validate_utf8_basic as validate_utf8; |
215 | | pub use crate::implementation::x86::sse42::ChunkedUtf8ValidatorImp; |
216 | | pub use crate::implementation::x86::sse42::Utf8ValidatorImp; |
217 | | } |
218 | | } |
219 | | |
220 | | /// Includes the aarch64 SIMD implementations. |
221 | | #[cfg(all(feature = "aarch64_neon", target_arch = "aarch64"))] |
222 | | pub mod aarch64 { |
223 | | /// Includes the Neon-based validation implementation for aarch64 CPUs. |
224 | | /// |
225 | | /// Should be supported on all ARM64 CPUSs. If it is not supported by the operating |
226 | | /// system using it is undefined behavior and will likely cause a crash. |
227 | | pub mod neon { |
228 | | pub use crate::implementation::aarch64::neon::validate_utf8_basic as validate_utf8; |
229 | | pub use crate::implementation::aarch64::neon::ChunkedUtf8ValidatorImp; |
230 | | pub use crate::implementation::aarch64::neon::Utf8ValidatorImp; |
231 | | } |
232 | | } |
233 | | |
234 | | /// Includes the wasm32 SIMD implementations. |
235 | | #[cfg(all(target_arch = "wasm32", target_feature = "simd128"))] |
236 | | pub mod wasm32 { |
237 | | /// Includes the simd128-based validation implementation for WASM runtimes. |
238 | | /// |
239 | | /// Using the provided functionality on WASM runtimes that do not support SIMD |
240 | | /// instructions will likely cause a crash. |
241 | | pub mod simd128 { |
242 | | pub use crate::implementation::wasm32::simd128::validate_utf8_basic as validate_utf8; |
243 | | pub use crate::implementation::wasm32::simd128::ChunkedUtf8ValidatorImp; |
244 | | pub use crate::implementation::wasm32::simd128::Utf8ValidatorImp; |
245 | | } |
246 | | } |
247 | | } |