Coverage Report

Created: 2025-10-28 06:23

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/rust/registry/src/index.crates.io-1949cf8c6b5b557f/potential_utf-0.1.3/src/ustr.rs
Line
Count
Source
1
// This file is part of ICU4X. For terms of use, please see the file
2
// called LICENSE at the top level of the ICU4X source tree
3
// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
4
5
#[cfg(feature = "alloc")]
6
use alloc::boxed::Box;
7
use core::cmp::Ordering;
8
use core::fmt;
9
use core::ops::Deref;
10
11
/// A byte slice that is expected to be a UTF-8 string but does not enforce that invariant.
12
///
13
/// Use this type instead of `str` if you don't need to enforce UTF-8 during deserialization. For
14
/// example, strings that are keys of a map don't need to ever be reified as `str`s.
15
///
16
/// [`PotentialUtf8`] derefs to `[u8]`. To obtain a `str`, use [`Self::try_as_str()`].
17
///
18
/// The main advantage of this type over `[u8]` is that it serializes as a string in
19
/// human-readable formats like JSON.
20
///
21
/// # Examples
22
///
23
/// Using an [`PotentialUtf8`] as the key of a [`ZeroMap`]:
24
///
25
/// ```
26
/// use potential_utf::PotentialUtf8;
27
/// use zerovec::ZeroMap;
28
///
29
/// // This map is cheap to deserialize, as we don't need to perform UTF-8 validation.
30
/// let map: ZeroMap<PotentialUtf8, u8> = [
31
///     (PotentialUtf8::from_bytes(b"abc"), 11),
32
///     (PotentialUtf8::from_bytes(b"def"), 22),
33
///     (PotentialUtf8::from_bytes(b"ghi"), 33),
34
/// ]
35
/// .into_iter()
36
/// .collect();
37
///
38
/// let key = "abc";
39
/// let value = map.get_copied(PotentialUtf8::from_str(key));
40
/// assert_eq!(Some(11), value);
41
/// ```
42
///
43
/// [`ZeroMap`]: zerovec::ZeroMap
44
#[repr(transparent)]
45
#[derive(PartialEq, Eq, PartialOrd, Ord)]
46
#[allow(clippy::exhaustive_structs)] // transparent newtype
47
pub struct PotentialUtf8(pub [u8]);
48
49
impl fmt::Debug for PotentialUtf8 {
50
0
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
51
        // Debug as a string if possible
52
0
        match self.try_as_str() {
53
0
            Ok(s) => fmt::Debug::fmt(s, f),
54
0
            Err(_) => fmt::Debug::fmt(&self.0, f),
55
        }
56
0
    }
57
}
58
59
impl PotentialUtf8 {
60
    /// Create a [`PotentialUtf8`] from a byte slice.
61
    #[inline]
62
0
    pub const fn from_bytes(other: &[u8]) -> &Self {
63
        // Safety: PotentialUtf8 is transparent over [u8]
64
0
        unsafe { core::mem::transmute(other) }
65
0
    }
66
67
    /// Create a [`PotentialUtf8`] from a string slice.
68
    #[inline]
69
0
    pub const fn from_str(s: &str) -> &Self {
70
0
        Self::from_bytes(s.as_bytes())
71
0
    }
72
73
    /// Create a [`PotentialUtf8`] from boxed bytes.
74
    #[inline]
75
    #[cfg(feature = "alloc")]
76
    pub fn from_boxed_bytes(other: Box<[u8]>) -> Box<Self> {
77
        // Safety: PotentialUtf8 is transparent over [u8]
78
        unsafe { core::mem::transmute(other) }
79
    }
80
81
    /// Create a [`PotentialUtf8`] from a boxed `str`.
82
    #[inline]
83
    #[cfg(feature = "alloc")]
84
    pub fn from_boxed_str(other: Box<str>) -> Box<Self> {
85
        Self::from_boxed_bytes(other.into_boxed_bytes())
86
    }
87
88
    /// Get the bytes from a [`PotentialUtf8].
89
    #[inline]
90
0
    pub const fn as_bytes(&self) -> &[u8] {
91
0
        &self.0
92
0
    }
93
94
    /// Attempt to convert a [`PotentialUtf8`] to a `str`.
95
    ///
96
    /// # Examples
97
    ///
98
    /// ```
99
    /// use potential_utf::PotentialUtf8;
100
    ///
101
    /// static A: &PotentialUtf8 = PotentialUtf8::from_bytes(b"abc");
102
    ///
103
    /// let b = A.try_as_str().unwrap();
104
    /// assert_eq!(b, "abc");
105
    /// ```
106
    // Note: this is const starting in 1.63
107
    #[inline]
108
0
    pub fn try_as_str(&self) -> Result<&str, core::str::Utf8Error> {
109
0
        core::str::from_utf8(&self.0)
110
0
    }
111
}
112
113
impl<'a> From<&'a str> for &'a PotentialUtf8 {
114
    #[inline]
115
0
    fn from(other: &'a str) -> Self {
116
0
        PotentialUtf8::from_str(other)
117
0
    }
118
}
119
120
impl PartialEq<str> for PotentialUtf8 {
121
0
    fn eq(&self, other: &str) -> bool {
122
0
        self.eq(Self::from_str(other))
123
0
    }
124
}
125
126
impl PartialOrd<str> for PotentialUtf8 {
127
0
    fn partial_cmp(&self, other: &str) -> Option<Ordering> {
128
0
        self.partial_cmp(Self::from_str(other))
129
0
    }
130
}
131
132
impl PartialEq<PotentialUtf8> for str {
133
0
    fn eq(&self, other: &PotentialUtf8) -> bool {
134
0
        PotentialUtf8::from_str(self).eq(other)
135
0
    }
136
}
137
138
impl PartialOrd<PotentialUtf8> for str {
139
0
    fn partial_cmp(&self, other: &PotentialUtf8) -> Option<Ordering> {
140
0
        PotentialUtf8::from_str(self).partial_cmp(other)
141
0
    }
142
}
143
144
#[cfg(feature = "alloc")]
145
impl From<Box<str>> for Box<PotentialUtf8> {
146
    #[inline]
147
    fn from(other: Box<str>) -> Self {
148
        PotentialUtf8::from_boxed_str(other)
149
    }
150
}
151
152
impl Deref for PotentialUtf8 {
153
    type Target = [u8];
154
0
    fn deref(&self) -> &Self::Target {
155
0
        &self.0
156
0
    }
157
}
158
159
/// This impl requires enabling the optional `zerovec` Cargo feature
160
#[cfg(all(feature = "zerovec", feature = "alloc"))]
161
impl<'a> zerovec::maps::ZeroMapKV<'a> for PotentialUtf8 {
162
    type Container = zerovec::VarZeroVec<'a, PotentialUtf8>;
163
    type Slice = zerovec::VarZeroSlice<PotentialUtf8>;
164
    type GetType = PotentialUtf8;
165
    type OwnedType = Box<PotentialUtf8>;
166
}
167
168
// Safety (based on the safety checklist on the VarULE trait):
169
//  1. PotentialUtf8 does not include any uninitialized or padding bytes (transparent over a ULE)
170
//  2. PotentialUtf8 is aligned to 1 byte (transparent over a ULE)
171
//  3. The impl of `validate_bytes()` returns an error if any byte is not valid (impossible)
172
//  4. The impl of `validate_bytes()` returns an error if the slice cannot be used in its entirety (impossible)
173
//  5. The impl of `from_bytes_unchecked()` returns a reference to the same data (returns the argument directly)
174
//  6. All other methods are defaulted
175
//  7. `[T]` byte equality is semantic equality (transparent over a ULE)
176
/// This impl requires enabling the optional `zerovec` Cargo feature
177
#[cfg(feature = "zerovec")]
178
unsafe impl zerovec::ule::VarULE for PotentialUtf8 {
179
    #[inline]
180
0
    fn validate_bytes(_: &[u8]) -> Result<(), zerovec::ule::UleError> {
181
0
        Ok(())
182
0
    }
183
    #[inline]
184
0
    unsafe fn from_bytes_unchecked(bytes: &[u8]) -> &Self {
185
0
        PotentialUtf8::from_bytes(bytes)
186
0
    }
187
}
188
189
/// This impl requires enabling the optional `serde` Cargo feature
190
#[cfg(feature = "serde")]
191
impl serde::Serialize for PotentialUtf8 {
192
    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
193
    where
194
        S: serde::Serializer,
195
    {
196
        use serde::ser::Error;
197
        let s = self
198
            .try_as_str()
199
            .map_err(|_| S::Error::custom("invalid UTF-8 in PotentialUtf8"))?;
200
        if serializer.is_human_readable() {
201
            serializer.serialize_str(s)
202
        } else {
203
            serializer.serialize_bytes(s.as_bytes())
204
        }
205
    }
206
}
207
208
/// This impl requires enabling the optional `serde` Cargo feature
209
#[cfg(all(feature = "serde", feature = "alloc"))]
210
impl<'de> serde::Deserialize<'de> for Box<PotentialUtf8> {
211
    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
212
    where
213
        D: serde::Deserializer<'de>,
214
    {
215
        if deserializer.is_human_readable() {
216
            let boxed_str = Box::<str>::deserialize(deserializer)?;
217
            Ok(PotentialUtf8::from_boxed_str(boxed_str))
218
        } else {
219
            let boxed_bytes = Box::<[u8]>::deserialize(deserializer)?;
220
            Ok(PotentialUtf8::from_boxed_bytes(boxed_bytes))
221
        }
222
    }
223
}
224
225
/// This impl requires enabling the optional `serde` Cargo feature
226
#[cfg(feature = "serde")]
227
impl<'de, 'a> serde::Deserialize<'de> for &'a PotentialUtf8
228
where
229
    'de: 'a,
230
{
231
    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
232
    where
233
        D: serde::Deserializer<'de>,
234
    {
235
        if deserializer.is_human_readable() {
236
            let s = <&str>::deserialize(deserializer)?;
237
            Ok(PotentialUtf8::from_str(s))
238
        } else {
239
            let bytes = <&[u8]>::deserialize(deserializer)?;
240
            Ok(PotentialUtf8::from_bytes(bytes))
241
        }
242
    }
243
}
244
245
#[repr(transparent)]
246
#[derive(PartialEq, Eq, PartialOrd, Ord)]
247
#[allow(clippy::exhaustive_structs)] // transparent newtype
248
pub struct PotentialUtf16(pub [u16]);
249
250
impl fmt::Debug for PotentialUtf16 {
251
0
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
252
        // Debug as a string if possible
253
0
        for c in char::decode_utf16(self.0.iter().copied()) {
254
0
            match c {
255
0
                Ok(c) => write!(f, "{c}")?,
256
0
                Err(e) => write!(f, "\\0x{:x}", e.unpaired_surrogate())?,
257
            }
258
        }
259
0
        Ok(())
260
0
    }
261
}
262
263
impl PotentialUtf16 {
264
    /// Create a [`PotentialUtf16`] from a u16 slice.
265
    #[inline]
266
0
    pub const fn from_slice(other: &[u16]) -> &Self {
267
        // Safety: PotentialUtf16 is transparent over [u16]
268
0
        unsafe { core::mem::transmute(other) }
269
0
    }
270
271
0
    pub fn chars(&self) -> impl Iterator<Item = char> + '_ {
272
0
        char::decode_utf16(self.0.iter().copied()).map(|c| c.unwrap_or(char::REPLACEMENT_CHARACTER))
273
0
    }
274
}