Coverage Report

Created: 2024-10-16 07:58

/rust/registry/src/index.crates.io-6f17d22bba15001f/simdutf8-0.1.4/src/compat.rs
Line
Count
Source (jump to first uncovered line)
1
//! The `compat` API flavor provides full compatibility with [`std::str::from_utf8()`] and detailed validation errors.
2
//!
3
//! In particular, [`from_utf8()`]
4
//! returns an [`Utf8Error`], which has the [`valid_up_to()`](Utf8Error#method.valid_up_to) and
5
//! [`error_len()`](Utf8Error#method.error_len) methods. The first is useful for verification of streamed data. The
6
//! second is useful e.g. for replacing invalid byte sequences with a replacement character.
7
//!
8
//! The functions in this module also fail early: errors are checked on-the-fly as the string is processed and once
9
//! an invalid UTF-8 sequence is encountered, it returns without processing the rest of the data.
10
//! This comes at a slight performance penality compared to the [`crate::basic`] module if the input is valid UTF-8.
11
12
use core::fmt::Display;
13
use core::fmt::Formatter;
14
15
use core::str::{from_utf8_unchecked, from_utf8_unchecked_mut};
16
17
use crate::implementation::validate_utf8_compat;
18
19
/// UTF-8 error information compatible with [`std::str::Utf8Error`].
20
///
21
/// Contains information on the location of the encountered validation error and the length of the
22
/// invalid UTF-8 sequence.
23
#[derive(Copy, Eq, PartialEq, Clone, Debug)]
24
pub struct Utf8Error {
25
    pub(crate) valid_up_to: usize,
26
    pub(crate) error_len: Option<u8>,
27
}
28
29
impl Utf8Error {
30
    /// Analogue to [`std::str::Utf8Error::valid_up_to()`](std::str::Utf8Error#method.valid_up_to).
31
    ///
32
    /// ...
33
    #[inline]
34
    #[must_use]
35
    #[allow(clippy::missing_const_for_fn)] // would not provide any benefit
36
0
    pub fn valid_up_to(&self) -> usize {
37
0
        self.valid_up_to
38
0
    }
39
40
    /// Analogue to [`std::str::Utf8Error::error_len()`](std::str::Utf8Error#method.error_len).
41
    ///
42
    /// ...
43
    #[inline]
44
    #[must_use]
45
0
    pub fn error_len(&self) -> Option<usize> {
46
0
        self.error_len.map(|len| len as usize)
47
0
    }
48
}
49
50
impl Display for Utf8Error {
51
0
    fn fmt(&self, f: &mut Formatter<'_>) -> core::fmt::Result {
52
0
        if let Some(error_len) = self.error_len {
53
0
            write!(
54
0
                f,
55
0
                "invalid utf-8 sequence of {} bytes from index {}",
56
0
                error_len, self.valid_up_to
57
0
            )
58
        } else {
59
0
            write!(
60
0
                f,
61
0
                "incomplete utf-8 byte sequence from index {}",
62
0
                self.valid_up_to
63
0
            )
64
        }
65
0
    }
66
}
67
68
#[cfg(feature = "std")]
69
impl std::error::Error for Utf8Error {}
70
71
/// Analogue to [`std::str::from_utf8_mut()`].
72
///
73
/// Checks if the passed byte sequence is valid UTF-8 and returns an
74
/// [`std::str`] reference to the passed byte slice wrapped in `Ok()` if it is.
75
///
76
/// # Errors
77
/// Will return Err([`Utf8Error`]) on if the input contains invalid UTF-8 with
78
/// detailed error information.
79
#[inline]
80
0
pub fn from_utf8(input: &[u8]) -> Result<&str, Utf8Error> {
81
0
    unsafe {
82
0
        validate_utf8_compat(input)?;
83
0
        Ok(from_utf8_unchecked(input))
84
    }
85
0
}
86
87
/// Analogue to [`std::str::from_utf8_mut()`].
88
///
89
/// Checks if the passed mutable byte sequence is valid UTF-8 and returns a mutable
90
/// [`std::str`] reference to the passed byte slice wrapped in `Ok()` if it is.
91
///
92
/// # Errors
93
/// Will return Err([`Utf8Error`]) on if the input contains invalid UTF-8 with
94
/// detailed error information.
95
#[inline]
96
0
pub fn from_utf8_mut(input: &mut [u8]) -> Result<&mut str, Utf8Error> {
97
0
    unsafe {
98
0
        validate_utf8_compat(input)?;
99
0
        Ok(from_utf8_unchecked_mut(input))
100
    }
101
0
}
102
103
/// Allows direct access to the platform-specific unsafe validation implementations.
104
#[cfg(feature = "public_imp")]
105
pub mod imp {
106
    /// Includes the x86/x86-64 SIMD implementations.
107
    #[cfg(all(any(target_arch = "x86", target_arch = "x86_64")))]
108
    pub mod x86 {
109
        /// Includes the validation implementation for AVX 2-compatible CPUs.
110
        pub mod avx2 {
111
            pub use crate::implementation::x86::avx2::validate_utf8_compat as validate_utf8;
112
        }
113
        /// Includes the validation implementation for SSE 4.2-compatible CPUs.
114
        pub mod sse42 {
115
            pub use crate::implementation::x86::sse42::validate_utf8_compat as validate_utf8;
116
        }
117
    }
118
119
    /// Includes the aarch64 SIMD implementations.
120
    #[cfg(all(feature = "aarch64_neon", target_arch = "aarch64"))]
121
    pub mod aarch64 {
122
        /// Includes the validation implementation for Neon SIMD.
123
        pub mod neon {
124
            pub use crate::implementation::aarch64::neon::validate_utf8_compat as validate_utf8;
125
        }
126
    }
127
128
    /// Includes the wasm32 SIMD implementations.
129
    #[cfg(all(target_arch = "wasm32", target_feature = "simd128"))]
130
    pub mod wasm32 {
131
        /// Includes the validation implementation for WASM simd128.
132
        pub mod simd128 {
133
            pub use crate::implementation::wasm32::simd128::validate_utf8_compat as validate_utf8;
134
        }
135
    }
136
}