Coverage Report

Created: 2025-07-12 06:37

/rust/registry/src/index.crates.io-6f17d22bba15001f/simdutf8-0.1.5/src/compat.rs
Line
Count
Source (jump to first uncovered line)
1
//! The `compat` API flavor provides full compatibility with [`std::str::from_utf8()`] and detailed validation errors.
2
//!
3
//! In particular, [`from_utf8()`]
4
//! returns an [`Utf8Error`], which has the [`valid_up_to()`](Utf8Error#method.valid_up_to) and
5
//! [`error_len()`](Utf8Error#method.error_len) methods. The first is useful for verification of streamed data. The
6
//! second is useful e.g. for replacing invalid byte sequences with a replacement character.
7
//!
8
//! The functions in this module also fail early: errors are checked on-the-fly as the string is processed and once
9
//! an invalid UTF-8 sequence is encountered, it returns without processing the rest of the data.
10
//! This comes at a slight performance penalty compared to the [`crate::basic`] module if the input is valid UTF-8.
11
12
use core::fmt::Display;
13
use core::fmt::Formatter;
14
15
use core::str::{from_utf8_unchecked, from_utf8_unchecked_mut};
16
17
use crate::implementation::validate_utf8_compat;
18
19
/// UTF-8 error information compatible with [`std::str::Utf8Error`].
20
///
21
/// Contains information on the location of the encountered validation error and the length of the
22
/// invalid UTF-8 sequence.
23
#[derive(Copy, Eq, PartialEq, Clone, Debug)]
24
pub struct Utf8Error {
25
    pub(crate) valid_up_to: usize,
26
    pub(crate) error_len: Option<u8>,
27
}
28
29
impl Utf8Error {
30
    /// Analogue to [`std::str::Utf8Error::valid_up_to()`](std::str::Utf8Error#method.valid_up_to).
31
    ///
32
    /// ...
33
    #[inline]
34
    #[must_use]
35
0
    pub fn valid_up_to(&self) -> usize {
36
0
        self.valid_up_to
37
0
    }
38
39
    /// Analogue to [`std::str::Utf8Error::error_len()`](std::str::Utf8Error#method.error_len).
40
    ///
41
    /// ...
42
    #[inline]
43
    #[must_use]
44
0
    pub fn error_len(&self) -> Option<usize> {
45
0
        self.error_len.map(|len| len as usize)
46
0
    }
47
}
48
49
impl Display for Utf8Error {
50
0
    fn fmt(&self, f: &mut Formatter<'_>) -> core::fmt::Result {
51
0
        if let Some(error_len) = self.error_len {
52
0
            write!(
53
0
                f,
54
0
                "invalid utf-8 sequence of {} bytes from index {}",
55
0
                error_len, self.valid_up_to
56
0
            )
57
        } else {
58
0
            write!(
59
0
                f,
60
0
                "incomplete utf-8 byte sequence from index {}",
61
0
                self.valid_up_to
62
0
            )
63
        }
64
0
    }
65
}
66
67
#[cfg(feature = "std")]
68
impl std::error::Error for Utf8Error {}
69
70
/// Analogue to [`std::str::from_utf8()`].
71
///
72
/// Checks if the passed byte sequence is valid UTF-8 and returns an
73
/// [`std::str`] reference to the passed byte slice wrapped in `Ok()` if it is.
74
///
75
/// # Errors
76
/// Will return Err([`Utf8Error`]) on if the input contains invalid UTF-8 with
77
/// detailed error information.
78
#[inline]
79
0
pub fn from_utf8(input: &[u8]) -> Result<&str, Utf8Error> {
80
0
    unsafe {
81
0
        validate_utf8_compat(input)?;
82
0
        Ok(from_utf8_unchecked(input))
83
    }
84
0
}
85
86
/// Analogue to [`std::str::from_utf8_mut()`].
87
///
88
/// Checks if the passed mutable byte sequence is valid UTF-8 and returns a mutable
89
/// [`std::str`] reference to the passed byte slice wrapped in `Ok()` if it is.
90
///
91
/// # Errors
92
/// Will return Err([`Utf8Error`]) on if the input contains invalid UTF-8 with
93
/// detailed error information.
94
#[inline]
95
0
pub fn from_utf8_mut(input: &mut [u8]) -> Result<&mut str, Utf8Error> {
96
0
    unsafe {
97
0
        validate_utf8_compat(input)?;
98
0
        Ok(from_utf8_unchecked_mut(input))
99
    }
100
0
}
101
102
/// Allows direct access to the platform-specific unsafe validation implementations.
103
#[cfg(feature = "public_imp")]
104
pub mod imp {
105
    /// Includes the x86/x86-64 SIMD implementations.
106
    #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
107
    pub mod x86 {
108
        /// Includes the validation implementation for AVX 2-compatible CPUs.
109
        pub mod avx2 {
110
            pub use crate::implementation::x86::avx2::validate_utf8_compat as validate_utf8;
111
        }
112
        /// Includes the validation implementation for SSE 4.2-compatible CPUs.
113
        pub mod sse42 {
114
            pub use crate::implementation::x86::sse42::validate_utf8_compat as validate_utf8;
115
        }
116
    }
117
118
    /// Includes the aarch64 SIMD implementations.
119
    #[cfg(all(feature = "aarch64_neon", target_arch = "aarch64"))]
120
    pub mod aarch64 {
121
        /// Includes the validation implementation for Neon SIMD.
122
        pub mod neon {
123
            pub use crate::implementation::aarch64::neon::validate_utf8_compat as validate_utf8;
124
        }
125
    }
126
127
    /// Includes the wasm32 SIMD implementations.
128
    #[cfg(all(target_arch = "wasm32", target_feature = "simd128"))]
129
    pub mod wasm32 {
130
        /// Includes the validation implementation for WASM simd128.
131
        pub mod simd128 {
132
            pub use crate::implementation::wasm32::simd128::validate_utf8_compat as validate_utf8;
133
        }
134
    }
135
}