/rust/registry/src/index.crates.io-6f17d22bba15001f/simdutf8-0.1.5/src/compat.rs
Line | Count | Source (jump to first uncovered line) |
1 | | //! The `compat` API flavor provides full compatibility with [`std::str::from_utf8()`] and detailed validation errors. |
2 | | //! |
3 | | //! In particular, [`from_utf8()`] |
4 | | //! returns an [`Utf8Error`], which has the [`valid_up_to()`](Utf8Error#method.valid_up_to) and |
5 | | //! [`error_len()`](Utf8Error#method.error_len) methods. The first is useful for verification of streamed data. The |
6 | | //! second is useful e.g. for replacing invalid byte sequences with a replacement character. |
7 | | //! |
8 | | //! The functions in this module also fail early: errors are checked on-the-fly as the string is processed and once |
9 | | //! an invalid UTF-8 sequence is encountered, it returns without processing the rest of the data. |
10 | | //! This comes at a slight performance penalty compared to the [`crate::basic`] module if the input is valid UTF-8. |
11 | | |
12 | | use core::fmt::Display; |
13 | | use core::fmt::Formatter; |
14 | | |
15 | | use core::str::{from_utf8_unchecked, from_utf8_unchecked_mut}; |
16 | | |
17 | | use crate::implementation::validate_utf8_compat; |
18 | | |
19 | | /// UTF-8 error information compatible with [`std::str::Utf8Error`]. |
20 | | /// |
21 | | /// Contains information on the location of the encountered validation error and the length of the |
22 | | /// invalid UTF-8 sequence. |
23 | | #[derive(Copy, Eq, PartialEq, Clone, Debug)] |
24 | | pub struct Utf8Error { |
25 | | pub(crate) valid_up_to: usize, |
26 | | pub(crate) error_len: Option<u8>, |
27 | | } |
28 | | |
29 | | impl Utf8Error { |
30 | | /// Analogue to [`std::str::Utf8Error::valid_up_to()`](std::str::Utf8Error#method.valid_up_to). |
31 | | /// |
32 | | /// ... |
33 | | #[inline] |
34 | | #[must_use] |
35 | 0 | pub fn valid_up_to(&self) -> usize { |
36 | 0 | self.valid_up_to |
37 | 0 | } |
38 | | |
39 | | /// Analogue to [`std::str::Utf8Error::error_len()`](std::str::Utf8Error#method.error_len). |
40 | | /// |
41 | | /// ... |
42 | | #[inline] |
43 | | #[must_use] |
44 | 0 | pub fn error_len(&self) -> Option<usize> { |
45 | 0 | self.error_len.map(|len| len as usize) |
46 | 0 | } |
47 | | } |
48 | | |
49 | | impl Display for Utf8Error { |
50 | 0 | fn fmt(&self, f: &mut Formatter<'_>) -> core::fmt::Result { |
51 | 0 | if let Some(error_len) = self.error_len { |
52 | 0 | write!( |
53 | 0 | f, |
54 | 0 | "invalid utf-8 sequence of {} bytes from index {}", |
55 | 0 | error_len, self.valid_up_to |
56 | 0 | ) |
57 | | } else { |
58 | 0 | write!( |
59 | 0 | f, |
60 | 0 | "incomplete utf-8 byte sequence from index {}", |
61 | 0 | self.valid_up_to |
62 | 0 | ) |
63 | | } |
64 | 0 | } |
65 | | } |
66 | | |
67 | | #[cfg(feature = "std")] |
68 | | impl std::error::Error for Utf8Error {} |
69 | | |
70 | | /// Analogue to [`std::str::from_utf8()`]. |
71 | | /// |
72 | | /// Checks if the passed byte sequence is valid UTF-8 and returns an |
73 | | /// [`std::str`] reference to the passed byte slice wrapped in `Ok()` if it is. |
74 | | /// |
75 | | /// # Errors |
76 | | /// Will return Err([`Utf8Error`]) on if the input contains invalid UTF-8 with |
77 | | /// detailed error information. |
78 | | #[inline] |
79 | 0 | pub fn from_utf8(input: &[u8]) -> Result<&str, Utf8Error> { |
80 | 0 | unsafe { |
81 | 0 | validate_utf8_compat(input)?; |
82 | 0 | Ok(from_utf8_unchecked(input)) |
83 | | } |
84 | 0 | } |
85 | | |
86 | | /// Analogue to [`std::str::from_utf8_mut()`]. |
87 | | /// |
88 | | /// Checks if the passed mutable byte sequence is valid UTF-8 and returns a mutable |
89 | | /// [`std::str`] reference to the passed byte slice wrapped in `Ok()` if it is. |
90 | | /// |
91 | | /// # Errors |
92 | | /// Will return Err([`Utf8Error`]) on if the input contains invalid UTF-8 with |
93 | | /// detailed error information. |
94 | | #[inline] |
95 | 0 | pub fn from_utf8_mut(input: &mut [u8]) -> Result<&mut str, Utf8Error> { |
96 | 0 | unsafe { |
97 | 0 | validate_utf8_compat(input)?; |
98 | 0 | Ok(from_utf8_unchecked_mut(input)) |
99 | | } |
100 | 0 | } |
101 | | |
102 | | /// Allows direct access to the platform-specific unsafe validation implementations. |
103 | | #[cfg(feature = "public_imp")] |
104 | | pub mod imp { |
105 | | /// Includes the x86/x86-64 SIMD implementations. |
106 | | #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] |
107 | | pub mod x86 { |
108 | | /// Includes the validation implementation for AVX 2-compatible CPUs. |
109 | | pub mod avx2 { |
110 | | pub use crate::implementation::x86::avx2::validate_utf8_compat as validate_utf8; |
111 | | } |
112 | | /// Includes the validation implementation for SSE 4.2-compatible CPUs. |
113 | | pub mod sse42 { |
114 | | pub use crate::implementation::x86::sse42::validate_utf8_compat as validate_utf8; |
115 | | } |
116 | | } |
117 | | |
118 | | /// Includes the aarch64 SIMD implementations. |
119 | | #[cfg(all(feature = "aarch64_neon", target_arch = "aarch64"))] |
120 | | pub mod aarch64 { |
121 | | /// Includes the validation implementation for Neon SIMD. |
122 | | pub mod neon { |
123 | | pub use crate::implementation::aarch64::neon::validate_utf8_compat as validate_utf8; |
124 | | } |
125 | | } |
126 | | |
127 | | /// Includes the wasm32 SIMD implementations. |
128 | | #[cfg(all(target_arch = "wasm32", target_feature = "simd128"))] |
129 | | pub mod wasm32 { |
130 | | /// Includes the validation implementation for WASM simd128. |
131 | | pub mod simd128 { |
132 | | pub use crate::implementation::wasm32::simd128::validate_utf8_compat as validate_utf8; |
133 | | } |
134 | | } |
135 | | } |