Coverage Report

Created: 2025-11-16 07:09

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/rust/registry/src/index.crates.io-1949cf8c6b5b557f/potential_utf-0.1.4/src/ustr.rs
Line
Count
Source
1
// This file is part of ICU4X. For terms of use, please see the file
2
// called LICENSE at the top level of the ICU4X source tree
3
// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
4
5
#[cfg(feature = "alloc")]
6
use alloc::boxed::Box;
7
use core::cmp::Ordering;
8
use core::fmt;
9
use core::ops::Deref;
10
11
/// A byte slice that is expected to be a UTF-8 string but does not enforce that invariant.
12
///
13
/// Use this type instead of `str` if you don't need to enforce UTF-8 during deserialization. For
14
/// example, strings that are keys of a map don't need to ever be reified as `str`s.
15
///
16
/// [`PotentialUtf8`] derefs to `[u8]`. To obtain a `str`, use [`Self::try_as_str()`].
17
///
18
/// The main advantage of this type over `[u8]` is that it serializes as a string in
19
/// human-readable formats like JSON.
20
///
21
/// # Examples
22
///
23
/// Using an [`PotentialUtf8`] as the key of a [`ZeroMap`]:
24
///
25
/// ```
26
/// use potential_utf::PotentialUtf8;
27
/// use zerovec::ZeroMap;
28
///
29
/// // This map is cheap to deserialize, as we don't need to perform UTF-8 validation.
30
/// let map: ZeroMap<PotentialUtf8, u8> = [
31
///     (PotentialUtf8::from_bytes(b"abc"), 11),
32
///     (PotentialUtf8::from_bytes(b"def"), 22),
33
///     (PotentialUtf8::from_bytes(b"ghi"), 33),
34
/// ]
35
/// .into_iter()
36
/// .collect();
37
///
38
/// let key = "abc";
39
/// let value = map.get_copied(PotentialUtf8::from_str(key));
40
/// assert_eq!(Some(11), value);
41
/// ```
42
///
43
/// [`ZeroMap`]: zerovec::ZeroMap
44
#[repr(transparent)]
45
#[derive(PartialEq, Eq, PartialOrd, Ord)]
46
#[allow(clippy::exhaustive_structs)] // transparent newtype
47
pub struct PotentialUtf8(pub [u8]);
48
49
impl fmt::Debug for PotentialUtf8 {
50
0
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
51
        // Debug as a string if possible
52
0
        match self.try_as_str() {
53
0
            Ok(s) => fmt::Debug::fmt(s, f),
54
0
            Err(_) => fmt::Debug::fmt(&self.0, f),
55
        }
56
0
    }
57
}
58
59
impl PotentialUtf8 {
60
    /// Create a [`PotentialUtf8`] from a byte slice.
61
    #[inline]
62
0
    pub const fn from_bytes(other: &[u8]) -> &Self {
63
        // Safety: PotentialUtf8 is transparent over [u8]
64
0
        unsafe { core::mem::transmute(other) }
65
0
    }
66
67
    /// Create a [`PotentialUtf8`] from a string slice.
68
    #[inline]
69
0
    pub const fn from_str(s: &str) -> &Self {
70
0
        Self::from_bytes(s.as_bytes())
71
0
    }
72
73
    /// Create a [`PotentialUtf8`] from boxed bytes.
74
    ///
75
    /// ✨ *Enabled with the `alloc` Cargo feature.*
76
    #[inline]
77
    #[cfg(feature = "alloc")]
78
    pub fn from_boxed_bytes(other: Box<[u8]>) -> Box<Self> {
79
        // Safety: PotentialUtf8 is transparent over [u8]
80
        unsafe { core::mem::transmute(other) }
81
    }
82
83
    /// Create a [`PotentialUtf8`] from a boxed `str`.
84
    ///
85
    /// ✨ *Enabled with the `alloc` Cargo feature.*
86
    #[inline]
87
    #[cfg(feature = "alloc")]
88
    pub fn from_boxed_str(other: Box<str>) -> Box<Self> {
89
        Self::from_boxed_bytes(other.into_boxed_bytes())
90
    }
91
92
    /// Get the bytes from a [`PotentialUtf8].
93
    #[inline]
94
0
    pub const fn as_bytes(&self) -> &[u8] {
95
0
        &self.0
96
0
    }
97
98
    /// Attempt to convert a [`PotentialUtf8`] to a `str`.
99
    ///
100
    /// # Examples
101
    ///
102
    /// ```
103
    /// use potential_utf::PotentialUtf8;
104
    ///
105
    /// static A: &PotentialUtf8 = PotentialUtf8::from_bytes(b"abc");
106
    ///
107
    /// let b = A.try_as_str().unwrap();
108
    /// assert_eq!(b, "abc");
109
    /// ```
110
    // Note: this is const starting in 1.63
111
    #[inline]
112
0
    pub fn try_as_str(&self) -> Result<&str, core::str::Utf8Error> {
113
0
        core::str::from_utf8(&self.0)
114
0
    }
115
}
116
117
impl<'a> From<&'a str> for &'a PotentialUtf8 {
118
    #[inline]
119
0
    fn from(other: &'a str) -> Self {
120
0
        PotentialUtf8::from_str(other)
121
0
    }
122
}
123
124
impl PartialEq<str> for PotentialUtf8 {
125
0
    fn eq(&self, other: &str) -> bool {
126
0
        self.eq(Self::from_str(other))
127
0
    }
128
}
129
130
impl PartialOrd<str> for PotentialUtf8 {
131
0
    fn partial_cmp(&self, other: &str) -> Option<Ordering> {
132
0
        self.partial_cmp(Self::from_str(other))
133
0
    }
134
}
135
136
impl PartialEq<PotentialUtf8> for str {
137
0
    fn eq(&self, other: &PotentialUtf8) -> bool {
138
0
        PotentialUtf8::from_str(self).eq(other)
139
0
    }
140
}
141
142
impl PartialOrd<PotentialUtf8> for str {
143
0
    fn partial_cmp(&self, other: &PotentialUtf8) -> Option<Ordering> {
144
0
        PotentialUtf8::from_str(self).partial_cmp(other)
145
0
    }
146
}
147
148
#[cfg(feature = "alloc")]
149
impl From<Box<str>> for Box<PotentialUtf8> {
150
    #[inline]
151
    fn from(other: Box<str>) -> Self {
152
        PotentialUtf8::from_boxed_str(other)
153
    }
154
}
155
156
impl Deref for PotentialUtf8 {
157
    type Target = [u8];
158
0
    fn deref(&self) -> &Self::Target {
159
0
        &self.0
160
0
    }
161
}
162
163
/// This impl requires enabling the optional `zerovec` Cargo feature
164
#[cfg(all(feature = "zerovec", feature = "alloc"))]
165
impl<'a> zerovec::maps::ZeroMapKV<'a> for PotentialUtf8 {
166
    type Container = zerovec::VarZeroVec<'a, PotentialUtf8>;
167
    type Slice = zerovec::VarZeroSlice<PotentialUtf8>;
168
    type GetType = PotentialUtf8;
169
    type OwnedType = Box<PotentialUtf8>;
170
}
171
172
// Safety (based on the safety checklist on the VarULE trait):
173
//  1. PotentialUtf8 does not include any uninitialized or padding bytes (transparent over a ULE)
174
//  2. PotentialUtf8 is aligned to 1 byte (transparent over a ULE)
175
//  3. The impl of `validate_bytes()` returns an error if any byte is not valid (impossible)
176
//  4. The impl of `validate_bytes()` returns an error if the slice cannot be used in its entirety (impossible)
177
//  5. The impl of `from_bytes_unchecked()` returns a reference to the same data (returns the argument directly)
178
//  6. All other methods are defaulted
179
//  7. `[T]` byte equality is semantic equality (transparent over a ULE)
180
/// This impl requires enabling the optional `zerovec` Cargo feature
181
#[cfg(feature = "zerovec")]
182
unsafe impl zerovec::ule::VarULE for PotentialUtf8 {
183
    #[inline]
184
0
    fn validate_bytes(_: &[u8]) -> Result<(), zerovec::ule::UleError> {
185
0
        Ok(())
186
0
    }
187
    #[inline]
188
0
    unsafe fn from_bytes_unchecked(bytes: &[u8]) -> &Self {
189
0
        PotentialUtf8::from_bytes(bytes)
190
0
    }
191
}
192
193
/// This impl requires enabling the optional `serde` Cargo feature
194
#[cfg(feature = "serde")]
195
impl serde_core::Serialize for PotentialUtf8 {
196
    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
197
    where
198
        S: serde_core::Serializer,
199
    {
200
        use serde_core::ser::Error;
201
        let s = self
202
            .try_as_str()
203
            .map_err(|_| S::Error::custom("invalid UTF-8 in PotentialUtf8"))?;
204
        if serializer.is_human_readable() {
205
            serializer.serialize_str(s)
206
        } else {
207
            serializer.serialize_bytes(s.as_bytes())
208
        }
209
    }
210
}
211
212
/// This impl requires enabling the optional `serde` Cargo feature
213
#[cfg(all(feature = "serde", feature = "alloc"))]
214
impl<'de> serde_core::Deserialize<'de> for Box<PotentialUtf8> {
215
    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
216
    where
217
        D: serde_core::Deserializer<'de>,
218
    {
219
        if deserializer.is_human_readable() {
220
            let boxed_str = Box::<str>::deserialize(deserializer)?;
221
            Ok(PotentialUtf8::from_boxed_str(boxed_str))
222
        } else {
223
            let boxed_bytes = Box::<[u8]>::deserialize(deserializer)?;
224
            Ok(PotentialUtf8::from_boxed_bytes(boxed_bytes))
225
        }
226
    }
227
}
228
229
/// This impl requires enabling the optional `serde` Cargo feature
230
#[cfg(feature = "serde")]
231
impl<'de, 'a> serde_core::Deserialize<'de> for &'a PotentialUtf8
232
where
233
    'de: 'a,
234
{
235
    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
236
    where
237
        D: serde_core::Deserializer<'de>,
238
    {
239
        if deserializer.is_human_readable() {
240
            let s = <&str>::deserialize(deserializer)?;
241
            Ok(PotentialUtf8::from_str(s))
242
        } else {
243
            let bytes = <&[u8]>::deserialize(deserializer)?;
244
            Ok(PotentialUtf8::from_bytes(bytes))
245
        }
246
    }
247
}
248
249
#[repr(transparent)]
250
#[derive(PartialEq, Eq, PartialOrd, Ord)]
251
#[allow(clippy::exhaustive_structs)] // transparent newtype
252
pub struct PotentialUtf16(pub [u16]);
253
254
impl fmt::Debug for PotentialUtf16 {
255
0
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
256
        // Debug as a string if possible
257
0
        for c in char::decode_utf16(self.0.iter().copied()) {
258
0
            match c {
259
0
                Ok(c) => write!(f, "{c}")?,
260
0
                Err(e) => write!(f, "\\0x{:x}", e.unpaired_surrogate())?,
261
            }
262
        }
263
0
        Ok(())
264
0
    }
265
}
266
267
impl PotentialUtf16 {
268
    /// Create a [`PotentialUtf16`] from a u16 slice.
269
    #[inline]
270
0
    pub const fn from_slice(other: &[u16]) -> &Self {
271
        // Safety: PotentialUtf16 is transparent over [u16]
272
0
        unsafe { core::mem::transmute(other) }
273
0
    }
274
275
0
    pub fn chars(&self) -> impl Iterator<Item = char> + '_ {
276
0
        char::decode_utf16(self.0.iter().copied()).map(|c| c.unwrap_or(char::REPLACEMENT_CHARACTER))
277
0
    }
278
}