Coverage Report

Created: 2025-10-29 07:05

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/rust/registry/src/index.crates.io-1949cf8c6b5b557f/zerovec-0.10.4/src/ule/unvalidated.rs
Line
Count
Source
1
// This file is part of ICU4X. For terms of use, please see the file
2
// called LICENSE at the top level of the ICU4X source tree
3
// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
4
5
use super::{AsULE, RawBytesULE, VarULE};
6
use crate::ule::EqULE;
7
use crate::{map::ZeroMapKV, VarZeroSlice, VarZeroVec, ZeroVecError};
8
use alloc::boxed::Box;
9
use core::cmp::Ordering;
10
use core::fmt;
11
use core::ops::Deref;
12
13
/// A byte slice that is expected to be a UTF-8 string but does not enforce that invariant.
14
///
15
/// Use this type instead of `str` if you don't need to enforce UTF-8 during deserialization. For
16
/// example, strings that are keys of a map don't need to ever be reified as `str`s.
17
///
18
/// [`UnvalidatedStr`] derefs to `[u8]`. To obtain a `str`, use [`Self::try_as_str()`].
19
///
20
/// The main advantage of this type over `[u8]` is that it serializes as a string in
21
/// human-readable formats like JSON.
22
///
23
/// # Examples
24
///
25
/// Using an [`UnvalidatedStr`] as the key of a [`ZeroMap`]:
26
///
27
/// ```
28
/// use zerovec::ule::UnvalidatedStr;
29
/// use zerovec::ZeroMap;
30
///
31
/// let map: ZeroMap<UnvalidatedStr, usize> = [
32
///     (UnvalidatedStr::from_str("abc"), 11),
33
///     (UnvalidatedStr::from_str("def"), 22),
34
///     (UnvalidatedStr::from_str("ghi"), 33),
35
/// ]
36
/// .into_iter()
37
/// .collect();
38
///
39
/// let key = "abc";
40
/// let value = map.get_copied_by(|uvstr| uvstr.as_bytes().cmp(key.as_bytes()));
41
/// assert_eq!(Some(11), value);
42
/// ```
43
///
44
/// [`ZeroMap`]: crate::ZeroMap
45
#[repr(transparent)]
46
#[derive(PartialEq, Eq, PartialOrd, Ord)]
47
#[allow(clippy::exhaustive_structs)] // transparent newtype
48
pub struct UnvalidatedStr([u8]);
49
50
impl fmt::Debug for UnvalidatedStr {
51
0
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
52
        // Debug as a string if possible
53
0
        match self.try_as_str() {
54
0
            Ok(s) => fmt::Debug::fmt(s, f),
55
0
            Err(_) => fmt::Debug::fmt(&self.0, f),
56
        }
57
0
    }
58
}
59
60
impl UnvalidatedStr {
61
    /// Create a [`UnvalidatedStr`] from a byte slice.
62
    #[inline]
63
0
    pub const fn from_bytes(other: &[u8]) -> &Self {
64
        // Safety: UnvalidatedStr is transparent over [u8]
65
0
        unsafe { core::mem::transmute(other) }
66
0
    }
Unexecuted instantiation: <zerovec::ule::unvalidated::UnvalidatedStr>::from_bytes
Unexecuted instantiation: <zerovec::ule::unvalidated::UnvalidatedStr>::from_bytes
Unexecuted instantiation: <zerovec::ule::unvalidated::UnvalidatedStr>::from_bytes
67
68
    /// Create a [`UnvalidatedStr`] from a string slice.
69
    #[inline]
70
0
    pub const fn from_str(s: &str) -> &Self {
71
0
        Self::from_bytes(s.as_bytes())
72
0
    }
Unexecuted instantiation: <zerovec::ule::unvalidated::UnvalidatedStr>::from_str
Unexecuted instantiation: <zerovec::ule::unvalidated::UnvalidatedStr>::from_str
73
74
    /// Create a [`UnvalidatedStr`] from boxed bytes.
75
    #[inline]
76
0
    pub fn from_boxed_bytes(other: Box<[u8]>) -> Box<Self> {
77
        // Safety: UnvalidatedStr is transparent over [u8]
78
0
        unsafe { core::mem::transmute(other) }
79
0
    }
Unexecuted instantiation: <zerovec::ule::unvalidated::UnvalidatedStr>::from_boxed_bytes
Unexecuted instantiation: <zerovec::ule::unvalidated::UnvalidatedStr>::from_boxed_bytes
80
81
    /// Create a [`UnvalidatedStr`] from a boxed `str`.
82
    #[inline]
83
0
    pub fn from_boxed_str(other: Box<str>) -> Box<Self> {
84
0
        Self::from_boxed_bytes(other.into_boxed_bytes())
85
0
    }
86
87
    /// Get the bytes from a [`UnvalidatedStr].
88
    #[inline]
89
0
    pub const fn as_bytes(&self) -> &[u8] {
90
0
        &self.0
91
0
    }
92
93
    /// Attempt to convert a [`UnvalidatedStr`] to a `str`.
94
    ///
95
    /// # Examples
96
    ///
97
    /// ```
98
    /// use zerovec::ule::UnvalidatedStr;
99
    ///
100
    /// static A: &UnvalidatedStr = UnvalidatedStr::from_bytes(b"abc");
101
    ///
102
    /// let b = A.try_as_str().unwrap();
103
    /// assert_eq!(b, "abc");
104
    /// ```
105
    // Note: this is const starting in 1.63
106
    #[inline]
107
0
    pub fn try_as_str(&self) -> Result<&str, core::str::Utf8Error> {
108
0
        core::str::from_utf8(&self.0)
109
0
    }
110
}
111
112
impl<'a> From<&'a str> for &'a UnvalidatedStr {
113
    #[inline]
114
0
    fn from(other: &'a str) -> Self {
115
0
        UnvalidatedStr::from_str(other)
116
0
    }
117
}
118
119
impl From<Box<str>> for Box<UnvalidatedStr> {
120
    #[inline]
121
0
    fn from(other: Box<str>) -> Self {
122
0
        UnvalidatedStr::from_boxed_str(other)
123
0
    }
124
}
125
126
impl Deref for UnvalidatedStr {
127
    type Target = [u8];
128
0
    fn deref(&self) -> &Self::Target {
129
0
        &self.0
130
0
    }
131
}
132
133
impl<'a> ZeroMapKV<'a> for UnvalidatedStr {
134
    type Container = VarZeroVec<'a, UnvalidatedStr>;
135
    type Slice = VarZeroSlice<UnvalidatedStr>;
136
    type GetType = UnvalidatedStr;
137
    type OwnedType = Box<UnvalidatedStr>;
138
}
139
140
// Safety (based on the safety checklist on the VarULE trait):
141
//  1. UnvalidatedStr does not include any uninitialized or padding bytes (transparent over a ULE)
142
//  2. UnvalidatedStr is aligned to 1 byte (transparent over a ULE)
143
//  3. The impl of `validate_byte_slice()` returns an error if any byte is not valid (impossible)
144
//  4. The impl of `validate_byte_slice()` returns an error if the slice cannot be used in its entirety (impossible)
145
//  5. The impl of `from_byte_slice_unchecked()` returns a reference to the same data (returns the argument directly)
146
//  6. All other methods are defaulted
147
//  7. `[T]` byte equality is semantic equality (transparent over a ULE)
148
unsafe impl VarULE for UnvalidatedStr {
149
    #[inline]
150
0
    fn validate_byte_slice(_: &[u8]) -> Result<(), ZeroVecError> {
151
0
        Ok(())
152
0
    }
Unexecuted instantiation: <zerovec::ule::unvalidated::UnvalidatedStr as zerovec::ule::VarULE>::validate_byte_slice
Unexecuted instantiation: <zerovec::ule::unvalidated::UnvalidatedStr as zerovec::ule::VarULE>::validate_byte_slice
153
    #[inline]
154
0
    unsafe fn from_byte_slice_unchecked(bytes: &[u8]) -> &Self {
155
0
        UnvalidatedStr::from_bytes(bytes)
156
0
    }
Unexecuted instantiation: <zerovec::ule::unvalidated::UnvalidatedStr as zerovec::ule::VarULE>::from_byte_slice_unchecked
Unexecuted instantiation: <zerovec::ule::unvalidated::UnvalidatedStr as zerovec::ule::VarULE>::from_byte_slice_unchecked
Unexecuted instantiation: <zerovec::ule::unvalidated::UnvalidatedStr as zerovec::ule::VarULE>::from_byte_slice_unchecked
157
}
158
159
/// This impl requires enabling the optional `serde` Cargo feature of the `zerovec` crate
160
#[cfg(feature = "serde")]
161
impl serde::Serialize for UnvalidatedStr {
162
    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
163
    where
164
        S: serde::Serializer,
165
    {
166
        use serde::ser::Error;
167
        let s = self
168
            .try_as_str()
169
            .map_err(|_| S::Error::custom("invalid UTF-8 in UnvalidatedStr"))?;
170
        if serializer.is_human_readable() {
171
            serializer.serialize_str(s)
172
        } else {
173
            serializer.serialize_bytes(s.as_bytes())
174
        }
175
    }
176
}
177
178
/// This impl requires enabling the optional `serde` Cargo feature of the `zerovec` crate
179
#[cfg(feature = "serde")]
180
impl<'de> serde::Deserialize<'de> for Box<UnvalidatedStr> {
181
    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
182
    where
183
        D: serde::Deserializer<'de>,
184
    {
185
        if deserializer.is_human_readable() {
186
            let boxed_str = Box::<str>::deserialize(deserializer)?;
187
            Ok(UnvalidatedStr::from_boxed_str(boxed_str))
188
        } else {
189
            let boxed_bytes = Box::<[u8]>::deserialize(deserializer)?;
190
            Ok(UnvalidatedStr::from_boxed_bytes(boxed_bytes))
191
        }
192
    }
193
}
194
195
/// This impl requires enabling the optional `serde` Cargo feature of the `zerovec` crate
196
#[cfg(feature = "serde")]
197
impl<'de, 'a> serde::Deserialize<'de> for &'a UnvalidatedStr
198
where
199
    'de: 'a,
200
{
201
    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
202
    where
203
        D: serde::Deserializer<'de>,
204
    {
205
        if deserializer.is_human_readable() {
206
            let s = <&str>::deserialize(deserializer)?;
207
            Ok(UnvalidatedStr::from_str(s))
208
        } else {
209
            let bytes = <&[u8]>::deserialize(deserializer)?;
210
            Ok(UnvalidatedStr::from_bytes(bytes))
211
        }
212
    }
213
}
214
215
/// A u8 array of little-endian data that is expected to be a Unicode scalar value, but is not
216
/// validated as such.
217
///
218
/// Use this type instead of `char` when you want to deal with data that is expected to be valid
219
/// Unicode scalar values, but you want control over when or if you validate that assumption.
220
///
221
/// # Examples
222
///
223
/// ```
224
/// use zerovec::ule::UnvalidatedChar;
225
/// use zerovec::{ZeroSlice, ZeroVec};
226
///
227
/// // data known to be little-endian three-byte chunks of valid Unicode scalar values
228
/// let data = [0x68, 0x00, 0x00, 0x69, 0x00, 0x00, 0x4B, 0xF4, 0x01];
229
/// // ground truth expectation
230
/// let real = ['h', 'i', '👋'];
231
///
232
/// let chars: &ZeroSlice<UnvalidatedChar> = ZeroSlice::parse_byte_slice(&data).expect("invalid data length");
233
/// let parsed: Vec<_> = chars.iter().map(|c| unsafe { c.to_char_unchecked() }).collect();
234
/// assert_eq!(&parsed, &real);
235
///
236
/// let real_chars: ZeroVec<_> = real.iter().copied().map(UnvalidatedChar::from_char).collect();
237
/// let serialized_data = chars.as_bytes();
238
/// assert_eq!(serialized_data, &data);
239
/// ```
240
#[repr(transparent)]
241
#[derive(PartialEq, Eq, Clone, Copy, Hash)]
242
pub struct UnvalidatedChar([u8; 3]);
243
244
impl UnvalidatedChar {
245
    /// Create a [`UnvalidatedChar`] from a `char`.
246
    ///
247
    /// # Examples
248
    ///
249
    /// ```
250
    /// use zerovec::ule::UnvalidatedChar;
251
    ///
252
    /// let a = UnvalidatedChar::from_char('a');
253
    /// assert_eq!(a.try_to_char().unwrap(), 'a');
254
    /// ```
255
    #[inline]
256
0
    pub const fn from_char(c: char) -> Self {
257
0
        let [u0, u1, u2, _u3] = (c as u32).to_le_bytes();
258
0
        Self([u0, u1, u2])
259
0
    }
260
261
    #[inline]
262
    #[doc(hidden)]
263
0
    pub const fn from_u24(c: u32) -> Self {
264
0
        let [u0, u1, u2, _u3] = c.to_le_bytes();
265
0
        Self([u0, u1, u2])
266
0
    }
267
268
    /// Attempt to convert a [`UnvalidatedChar`] to a `char`.
269
    ///
270
    /// # Examples
271
    ///
272
    /// ```
273
    /// use zerovec::ule::{AsULE, UnvalidatedChar};
274
    ///
275
    /// let a = UnvalidatedChar::from_char('a');
276
    /// assert_eq!(a.try_to_char(), Ok('a'));
277
    ///
278
    /// let b = UnvalidatedChar::from_unaligned([0xFF, 0xFF, 0xFF].into());
279
    /// assert!(matches!(b.try_to_char(), Err(_)));
280
    /// ```
281
    #[inline]
282
0
    pub fn try_to_char(self) -> Result<char, core::char::CharTryFromError> {
283
0
        let [u0, u1, u2] = self.0;
284
0
        char::try_from(u32::from_le_bytes([u0, u1, u2, 0]))
285
0
    }
286
287
    /// Convert a [`UnvalidatedChar`] to a `char', returning [`char::REPLACEMENT_CHARACTER`]
288
    /// if the `UnvalidatedChar` does not represent a valid Unicode scalar value.
289
    ///
290
    /// # Examples
291
    ///
292
    /// ```
293
    /// use zerovec::ule::{AsULE, UnvalidatedChar};
294
    ///
295
    /// let a = UnvalidatedChar::from_unaligned([0xFF, 0xFF, 0xFF].into());
296
    /// assert_eq!(a.to_char_lossy(), char::REPLACEMENT_CHARACTER);
297
    /// ```
298
    #[inline]
299
0
    pub fn to_char_lossy(self) -> char {
300
0
        self.try_to_char().unwrap_or(char::REPLACEMENT_CHARACTER)
301
0
    }
302
303
    /// Convert a [`UnvalidatedChar`] to a `char` without checking that it is
304
    /// a valid Unicode scalar value.
305
    ///
306
    /// # Safety
307
    ///
308
    /// The `UnvalidatedChar` must be a valid Unicode scalar value in little-endian order.
309
    ///
310
    /// # Examples
311
    ///
312
    /// ```
313
    /// use zerovec::ule::UnvalidatedChar;
314
    ///
315
    /// let a = UnvalidatedChar::from_char('a');
316
    /// assert_eq!(unsafe { a.to_char_unchecked() }, 'a');
317
    /// ```
318
    #[inline]
319
0
    pub unsafe fn to_char_unchecked(self) -> char {
320
0
        let [u0, u1, u2] = self.0;
321
0
        char::from_u32_unchecked(u32::from_le_bytes([u0, u1, u2, 0]))
322
0
    }
323
}
324
325
impl RawBytesULE<3> {
326
    /// Converts a [`UnvalidatedChar`] to its ULE type. This is equivalent to calling
327
    /// [`AsULE::to_unaligned`].
328
    #[inline]
329
0
    pub const fn from_unvalidated_char(uc: UnvalidatedChar) -> Self {
330
0
        RawBytesULE(uc.0)
331
0
    }
332
}
333
334
impl AsULE for UnvalidatedChar {
335
    type ULE = RawBytesULE<3>;
336
337
    #[inline]
338
0
    fn to_unaligned(self) -> Self::ULE {
339
0
        RawBytesULE(self.0)
340
0
    }
341
342
    #[inline]
343
0
    fn from_unaligned(unaligned: Self::ULE) -> Self {
344
0
        Self(unaligned.0)
345
0
    }
346
}
347
348
// Safety: UnvalidatedChar is always the little-endian representation of a char,
349
// which corresponds to its AsULE::ULE type
350
unsafe impl EqULE for UnvalidatedChar {}
351
352
impl fmt::Debug for UnvalidatedChar {
353
0
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
354
        // Debug as a char if possible
355
0
        match self.try_to_char() {
356
0
            Ok(c) => fmt::Debug::fmt(&c, f),
357
0
            Err(_) => fmt::Debug::fmt(&self.0, f),
358
        }
359
0
    }
360
}
361
362
impl PartialOrd for UnvalidatedChar {
363
0
    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
364
0
        Some(self.cmp(other))
365
0
    }
366
}
367
368
impl Ord for UnvalidatedChar {
369
    // custom implementation, as derived Ord would compare lexicographically
370
0
    fn cmp(&self, other: &Self) -> Ordering {
371
0
        let [a0, a1, a2] = self.0;
372
0
        let a = u32::from_le_bytes([a0, a1, a2, 0]);
373
0
        let [b0, b1, b2] = other.0;
374
0
        let b = u32::from_le_bytes([b0, b1, b2, 0]);
375
0
        a.cmp(&b)
376
0
    }
377
}
378
379
impl From<char> for UnvalidatedChar {
380
    #[inline]
381
0
    fn from(value: char) -> Self {
382
0
        Self::from_char(value)
383
0
    }
384
}
385
386
impl TryFrom<UnvalidatedChar> for char {
387
    type Error = core::char::CharTryFromError;
388
389
    #[inline]
390
0
    fn try_from(value: UnvalidatedChar) -> Result<char, Self::Error> {
391
0
        value.try_to_char()
392
0
    }
393
}
394
395
/// This impl requires enabling the optional `serde` Cargo feature of the `zerovec` crate
396
#[cfg(feature = "serde")]
397
impl serde::Serialize for UnvalidatedChar {
398
    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
399
    where
400
        S: serde::Serializer,
401
    {
402
        use serde::ser::Error;
403
        let c = self
404
            .try_to_char()
405
            .map_err(|_| S::Error::custom("invalid Unicode scalar value in UnvalidatedChar"))?;
406
        if serializer.is_human_readable() {
407
            serializer.serialize_char(c)
408
        } else {
409
            self.0.serialize(serializer)
410
        }
411
    }
412
}
413
414
/// This impl requires enabling the optional `serde` Cargo feature of the `zerovec` crate
415
#[cfg(feature = "serde")]
416
impl<'de> serde::Deserialize<'de> for UnvalidatedChar {
417
    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
418
    where
419
        D: serde::Deserializer<'de>,
420
    {
421
        if deserializer.is_human_readable() {
422
            let c = <char>::deserialize(deserializer)?;
423
            Ok(UnvalidatedChar::from_char(c))
424
        } else {
425
            let bytes = <[u8; 3]>::deserialize(deserializer)?;
426
            Ok(UnvalidatedChar(bytes))
427
        }
428
    }
429
}
430
431
#[cfg(feature = "databake")]
432
impl databake::Bake for UnvalidatedChar {
433
    fn bake(&self, env: &databake::CrateEnv) -> databake::TokenStream {
434
        match self.try_to_char() {
435
            Ok(ch) => {
436
                env.insert("zerovec");
437
                let ch = ch.bake(env);
438
                databake::quote! {
439
                    zerovec::ule::UnvalidatedChar::from_char(#ch)
440
                }
441
            }
442
            Err(_) => {
443
                env.insert("zerovec");
444
                let u24 = u32::from_le_bytes([self.0[0], self.0[1], self.0[2], 0]);
445
                databake::quote! {
446
                    zerovec::ule::UnvalidatedChar::from_u24(#u24)
447
                }
448
            }
449
        }
450
    }
451
}
452
453
#[cfg(test)]
454
mod test {
455
    use super::*;
456
    use crate::ZeroVec;
457
458
    #[test]
459
    fn test_serde_fail() {
460
        let uc = UnvalidatedChar([0xFF, 0xFF, 0xFF]);
461
        serde_json::to_string(&uc).expect_err("serialize invalid char bytes");
462
        bincode::serialize(&uc).expect_err("serialize invalid char bytes");
463
    }
464
465
    #[test]
466
    fn test_serde_json() {
467
        let c = '🙃';
468
        let uc = UnvalidatedChar::from_char(c);
469
        let json_ser = serde_json::to_string(&uc).unwrap();
470
471
        assert_eq!(json_ser, r#""🙃""#);
472
473
        let json_de: UnvalidatedChar = serde_json::from_str(&json_ser).unwrap();
474
475
        assert_eq!(uc, json_de);
476
    }
477
478
    #[test]
479
    fn test_serde_bincode() {
480
        let c = '🙃';
481
        let uc = UnvalidatedChar::from_char(c);
482
        let bytes_ser = bincode::serialize(&uc).unwrap();
483
484
        assert_eq!(bytes_ser, [0x43, 0xF6, 0x01]);
485
486
        let bytes_de: UnvalidatedChar = bincode::deserialize(&bytes_ser).unwrap();
487
488
        assert_eq!(uc, bytes_de);
489
    }
490
491
    #[test]
492
    fn test_representation() {
493
        let chars = ['w', 'ω', '文', '𑄃', '🙃'];
494
495
        // backed by [UnvalidatedChar]
496
        let uvchars: Vec<_> = chars
497
            .iter()
498
            .copied()
499
            .map(UnvalidatedChar::from_char)
500
            .collect();
501
        // backed by [RawBytesULE<3>]
502
        let zvec: ZeroVec<_> = uvchars.clone().into_iter().collect();
503
504
        let ule_bytes = zvec.as_bytes();
505
        let uvbytes;
506
        unsafe {
507
            let ptr = &uvchars[..] as *const _ as *const u8;
508
            uvbytes = core::slice::from_raw_parts(ptr, ule_bytes.len());
509
        }
510
511
        // UnvalidatedChar is defined as little-endian, so this must be true on all platforms
512
        // also asserts that to_unaligned/from_unaligned are no-ops
513
        assert_eq!(uvbytes, ule_bytes);
514
515
        assert_eq!(
516
            &[119, 0, 0, 201, 3, 0, 135, 101, 0, 3, 17, 1, 67, 246, 1],
517
            ule_bytes
518
        );
519
    }
520
521
    #[test]
522
    fn test_char_bake() {
523
        databake::test_bake!(UnvalidatedChar, const: crate::ule::UnvalidatedChar::from_char('b'), zerovec);
524
        // surrogate code point
525
        databake::test_bake!(UnvalidatedChar, const: crate::ule::UnvalidatedChar::from_u24(55296u32), zerovec);
526
    }
527
}