Coverage Report

Created: 2025-05-08 06:13

/rust/registry/src/index.crates.io-6f17d22bba15001f/zerovec-0.10.4/src/ule/chars.rs
Line
Count
Source (jump to first uncovered line)
1
// This file is part of ICU4X. For terms of use, please see the file
2
// called LICENSE at the top level of the ICU4X source tree
3
// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
4
5
#![allow(clippy::upper_case_acronyms)]
6
//! ULE implementation for the `char` type.
7
8
use super::*;
9
use crate::impl_ule_from_array;
10
use core::cmp::Ordering;
11
use core::convert::TryFrom;
12
13
/// A u8 array of little-endian data corresponding to a Unicode scalar value.
14
///
15
/// The bytes of a `CharULE` are guaranteed to represent a little-endian-encoded u32 that is a
16
/// valid `char` and can be converted without validation.
17
///
18
/// # Examples
19
///
20
/// Convert a `char` to a `CharULE` and back again:
21
///
22
/// ```
23
/// use zerovec::ule::{AsULE, CharULE, ULE};
24
///
25
/// let c1 = '𑄃';
26
/// let ule = c1.to_unaligned();
27
/// assert_eq!(CharULE::as_byte_slice(&[ule]), &[0x03, 0x11, 0x01]);
28
/// let c2 = char::from_unaligned(ule);
29
/// assert_eq!(c1, c2);
30
/// ```
31
///
32
/// Attempt to parse invalid bytes to a `CharULE`:
33
///
34
/// ```
35
/// use zerovec::ule::{CharULE, ULE};
36
///
37
/// let bytes: &[u8] = &[0xFF, 0xFF, 0xFF, 0xFF];
38
/// CharULE::parse_byte_slice(bytes).expect_err("Invalid bytes");
39
/// ```
40
#[repr(transparent)]
41
#[derive(Debug, PartialEq, Eq, Clone, Copy, Hash)]
42
pub struct CharULE([u8; 3]);
43
44
impl CharULE {
45
    /// Converts a [`char`] to a [`CharULE`]. This is equivalent to calling
46
    /// [`AsULE::to_unaligned()`]
47
    ///
48
    /// See the type-level documentation for [`CharULE`] for more information.
49
    #[inline]
50
0
    pub const fn from_aligned(c: char) -> Self {
51
0
        let [u0, u1, u2, _u3] = (c as u32).to_le_bytes();
52
0
        Self([u0, u1, u2])
53
0
    }
54
55
    impl_ule_from_array!(char, CharULE, Self([0; 3]));
56
}
57
58
// Safety (based on the safety checklist on the ULE trait):
59
//  1. CharULE does not include any uninitialized or padding bytes.
60
//     (achieved by `#[repr(transparent)]` on a type that satisfies this invariant)
61
//  2. CharULE is aligned to 1 byte.
62
//     (achieved by `#[repr(transparent)]` on a type that satisfies this invariant)
63
//  3. The impl of validate_byte_slice() returns an error if any byte is not valid.
64
//  4. The impl of validate_byte_slice() returns an error if there are extra bytes.
65
//  5. The other ULE methods use the default impl.
66
//  6. CharULE byte equality is semantic equality
67
unsafe impl ULE for CharULE {
68
    #[inline]
69
0
    fn validate_byte_slice(bytes: &[u8]) -> Result<(), ZeroVecError> {
70
0
        if bytes.len() % 3 != 0 {
71
0
            return Err(ZeroVecError::length::<Self>(bytes.len()));
72
0
        }
73
        // Validate the bytes
74
0
        for chunk in bytes.chunks_exact(3) {
75
            // TODO: Use slice::as_chunks() when stabilized
76
            #[allow(clippy::indexing_slicing)]
77
            // Won't panic because the chunks are always 3 bytes long
78
0
            let u = u32::from_le_bytes([chunk[0], chunk[1], chunk[2], 0]);
79
0
            char::try_from(u).map_err(|_| ZeroVecError::parse::<Self>())?;
80
        }
81
0
        Ok(())
82
0
    }
83
}
84
85
impl AsULE for char {
86
    type ULE = CharULE;
87
88
    #[inline]
89
0
    fn to_unaligned(self) -> Self::ULE {
90
0
        CharULE::from_aligned(self)
91
0
    }
92
93
    #[inline]
94
6.59k
    fn from_unaligned(unaligned: Self::ULE) -> Self {
95
6.59k
        // Safe because the bytes of CharULE are defined to represent a valid Unicode scalar value.
96
6.59k
        unsafe {
97
6.59k
            Self::from_u32_unchecked(u32::from_le_bytes([
98
6.59k
                unaligned.0[0],
99
6.59k
                unaligned.0[1],
100
6.59k
                unaligned.0[2],
101
6.59k
                0,
102
6.59k
            ]))
103
6.59k
        }
104
6.59k
    }
<char as zerovec::ule::AsULE>::from_unaligned
Line
Count
Source
94
6.59k
    fn from_unaligned(unaligned: Self::ULE) -> Self {
95
6.59k
        // Safe because the bytes of CharULE are defined to represent a valid Unicode scalar value.
96
6.59k
        unsafe {
97
6.59k
            Self::from_u32_unchecked(u32::from_le_bytes([
98
6.59k
                unaligned.0[0],
99
6.59k
                unaligned.0[1],
100
6.59k
                unaligned.0[2],
101
6.59k
                0,
102
6.59k
            ]))
103
6.59k
        }
104
6.59k
    }
Unexecuted instantiation: <char as zerovec::ule::AsULE>::from_unaligned
Unexecuted instantiation: <char as zerovec::ule::AsULE>::from_unaligned
105
}
106
107
impl PartialOrd for CharULE {
108
0
    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
109
0
        Some(self.cmp(other))
110
0
    }
111
}
112
113
impl Ord for CharULE {
114
0
    fn cmp(&self, other: &Self) -> Ordering {
115
0
        char::from_unaligned(*self).cmp(&char::from_unaligned(*other))
116
0
    }
117
}
118
119
#[cfg(test)]
120
mod test {
121
    use super::*;
122
123
    #[test]
124
    fn test_from_array() {
125
        const CHARS: [char; 2] = ['a', '🙃'];
126
        const CHARS_ULE: [CharULE; 2] = CharULE::from_array(CHARS);
127
        assert_eq!(
128
            CharULE::as_byte_slice(&CHARS_ULE),
129
            &[0x61, 0x00, 0x00, 0x43, 0xF6, 0x01]
130
        );
131
    }
132
133
    #[test]
134
    fn test_from_array_zst() {
135
        const CHARS: [char; 0] = [];
136
        const CHARS_ULE: [CharULE; 0] = CharULE::from_array(CHARS);
137
        let bytes = CharULE::as_byte_slice(&CHARS_ULE);
138
        let empty: &[u8] = &[];
139
        assert_eq!(bytes, empty);
140
    }
141
142
    #[test]
143
    fn test_parse() {
144
        // 1-byte, 2-byte, 3-byte, and two 4-byte character in UTF-8 (not as relevant in UTF-32)
145
        let chars = ['w', 'ω', '文', '𑄃', '🙃'];
146
        let char_ules: Vec<CharULE> = chars.iter().copied().map(char::to_unaligned).collect();
147
        let char_bytes: &[u8] = CharULE::as_byte_slice(&char_ules);
148
149
        // Check parsing
150
        let parsed_ules: &[CharULE] = CharULE::parse_byte_slice(char_bytes).unwrap();
151
        assert_eq!(char_ules, parsed_ules);
152
        let parsed_chars: Vec<char> = parsed_ules
153
            .iter()
154
            .copied()
155
            .map(char::from_unaligned)
156
            .collect();
157
        assert_eq!(&chars, parsed_chars.as_slice());
158
159
        // Compare to golden expected data
160
        assert_eq!(
161
            &[119, 0, 0, 201, 3, 0, 135, 101, 0, 3, 17, 1, 67, 246, 1],
162
            char_bytes
163
        );
164
    }
165
166
    #[test]
167
    fn test_failures() {
168
        // 119 and 120 are valid, but not 0xD800 (high surrogate)
169
        let u32s = [119, 0xD800, 120];
170
        let u32_ules: Vec<RawBytesULE<4>> = u32s
171
            .iter()
172
            .copied()
173
            .map(<u32 as AsULE>::to_unaligned)
174
            .collect();
175
        let u32_bytes: &[u8] = RawBytesULE::<4>::as_byte_slice(&u32_ules);
176
        let parsed_ules_result = CharULE::parse_byte_slice(u32_bytes);
177
        assert!(parsed_ules_result.is_err());
178
179
        // 0x20FFFF is out of range for a char
180
        let u32s = [0x20FFFF];
181
        let u32_ules: Vec<RawBytesULE<4>> = u32s
182
            .iter()
183
            .copied()
184
            .map(<u32 as AsULE>::to_unaligned)
185
            .collect();
186
        let u32_bytes: &[u8] = RawBytesULE::<4>::as_byte_slice(&u32_ules);
187
        let parsed_ules_result = CharULE::parse_byte_slice(u32_bytes);
188
        assert!(parsed_ules_result.is_err());
189
    }
190
}