/rust/registry/src/index.crates.io-6f17d22bba15001f/zerovec-0.10.4/src/ule/chars.rs
Line | Count | Source (jump to first uncovered line) |
1 | | // This file is part of ICU4X. For terms of use, please see the file |
2 | | // called LICENSE at the top level of the ICU4X source tree |
3 | | // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). |
4 | | |
5 | | #![allow(clippy::upper_case_acronyms)] |
6 | | //! ULE implementation for the `char` type. |
7 | | |
8 | | use super::*; |
9 | | use crate::impl_ule_from_array; |
10 | | use core::cmp::Ordering; |
11 | | use core::convert::TryFrom; |
12 | | |
13 | | /// A u8 array of little-endian data corresponding to a Unicode scalar value. |
14 | | /// |
15 | | /// The bytes of a `CharULE` are guaranteed to represent a little-endian-encoded u32 that is a |
16 | | /// valid `char` and can be converted without validation. |
17 | | /// |
18 | | /// # Examples |
19 | | /// |
20 | | /// Convert a `char` to a `CharULE` and back again: |
21 | | /// |
22 | | /// ``` |
23 | | /// use zerovec::ule::{AsULE, CharULE, ULE}; |
24 | | /// |
25 | | /// let c1 = '𑄃'; |
26 | | /// let ule = c1.to_unaligned(); |
27 | | /// assert_eq!(CharULE::as_byte_slice(&[ule]), &[0x03, 0x11, 0x01]); |
28 | | /// let c2 = char::from_unaligned(ule); |
29 | | /// assert_eq!(c1, c2); |
30 | | /// ``` |
31 | | /// |
32 | | /// Attempt to parse invalid bytes to a `CharULE`: |
33 | | /// |
34 | | /// ``` |
35 | | /// use zerovec::ule::{CharULE, ULE}; |
36 | | /// |
37 | | /// let bytes: &[u8] = &[0xFF, 0xFF, 0xFF, 0xFF]; |
38 | | /// CharULE::parse_byte_slice(bytes).expect_err("Invalid bytes"); |
39 | | /// ``` |
40 | | #[repr(transparent)] |
41 | | #[derive(Debug, PartialEq, Eq, Clone, Copy, Hash)] |
42 | | pub struct CharULE([u8; 3]); |
43 | | |
44 | | impl CharULE { |
45 | | /// Converts a [`char`] to a [`CharULE`]. This is equivalent to calling |
46 | | /// [`AsULE::to_unaligned()`] |
47 | | /// |
48 | | /// See the type-level documentation for [`CharULE`] for more information. |
49 | | #[inline] |
50 | 0 | pub const fn from_aligned(c: char) -> Self { |
51 | 0 | let [u0, u1, u2, _u3] = (c as u32).to_le_bytes(); |
52 | 0 | Self([u0, u1, u2]) |
53 | 0 | } |
54 | | |
55 | | impl_ule_from_array!(char, CharULE, Self([0; 3])); |
56 | | } |
57 | | |
58 | | // Safety (based on the safety checklist on the ULE trait): |
59 | | // 1. CharULE does not include any uninitialized or padding bytes. |
60 | | // (achieved by `#[repr(transparent)]` on a type that satisfies this invariant) |
61 | | // 2. CharULE is aligned to 1 byte. |
62 | | // (achieved by `#[repr(transparent)]` on a type that satisfies this invariant) |
63 | | // 3. The impl of validate_byte_slice() returns an error if any byte is not valid. |
64 | | // 4. The impl of validate_byte_slice() returns an error if there are extra bytes. |
65 | | // 5. The other ULE methods use the default impl. |
66 | | // 6. CharULE byte equality is semantic equality |
67 | | unsafe impl ULE for CharULE { |
68 | | #[inline] |
69 | 0 | fn validate_byte_slice(bytes: &[u8]) -> Result<(), ZeroVecError> { |
70 | 0 | if bytes.len() % 3 != 0 { |
71 | 0 | return Err(ZeroVecError::length::<Self>(bytes.len())); |
72 | 0 | } |
73 | | // Validate the bytes |
74 | 0 | for chunk in bytes.chunks_exact(3) { |
75 | | // TODO: Use slice::as_chunks() when stabilized |
76 | | #[allow(clippy::indexing_slicing)] |
77 | | // Won't panic because the chunks are always 3 bytes long |
78 | 0 | let u = u32::from_le_bytes([chunk[0], chunk[1], chunk[2], 0]); |
79 | 0 | char::try_from(u).map_err(|_| ZeroVecError::parse::<Self>())?; |
80 | | } |
81 | 0 | Ok(()) |
82 | 0 | } |
83 | | } |
84 | | |
85 | | impl AsULE for char { |
86 | | type ULE = CharULE; |
87 | | |
88 | | #[inline] |
89 | 0 | fn to_unaligned(self) -> Self::ULE { |
90 | 0 | CharULE::from_aligned(self) |
91 | 0 | } |
92 | | |
93 | | #[inline] |
94 | 6.59k | fn from_unaligned(unaligned: Self::ULE) -> Self { |
95 | 6.59k | // Safe because the bytes of CharULE are defined to represent a valid Unicode scalar value. |
96 | 6.59k | unsafe { |
97 | 6.59k | Self::from_u32_unchecked(u32::from_le_bytes([ |
98 | 6.59k | unaligned.0[0], |
99 | 6.59k | unaligned.0[1], |
100 | 6.59k | unaligned.0[2], |
101 | 6.59k | 0, |
102 | 6.59k | ])) |
103 | 6.59k | } |
104 | 6.59k | } <char as zerovec::ule::AsULE>::from_unaligned Line | Count | Source | 94 | 6.59k | fn from_unaligned(unaligned: Self::ULE) -> Self { | 95 | 6.59k | // Safe because the bytes of CharULE are defined to represent a valid Unicode scalar value. | 96 | 6.59k | unsafe { | 97 | 6.59k | Self::from_u32_unchecked(u32::from_le_bytes([ | 98 | 6.59k | unaligned.0[0], | 99 | 6.59k | unaligned.0[1], | 100 | 6.59k | unaligned.0[2], | 101 | 6.59k | 0, | 102 | 6.59k | ])) | 103 | 6.59k | } | 104 | 6.59k | } |
Unexecuted instantiation: <char as zerovec::ule::AsULE>::from_unaligned Unexecuted instantiation: <char as zerovec::ule::AsULE>::from_unaligned |
105 | | } |
106 | | |
107 | | impl PartialOrd for CharULE { |
108 | 0 | fn partial_cmp(&self, other: &Self) -> Option<Ordering> { |
109 | 0 | Some(self.cmp(other)) |
110 | 0 | } |
111 | | } |
112 | | |
113 | | impl Ord for CharULE { |
114 | 0 | fn cmp(&self, other: &Self) -> Ordering { |
115 | 0 | char::from_unaligned(*self).cmp(&char::from_unaligned(*other)) |
116 | 0 | } |
117 | | } |
118 | | |
119 | | #[cfg(test)] |
120 | | mod test { |
121 | | use super::*; |
122 | | |
123 | | #[test] |
124 | | fn test_from_array() { |
125 | | const CHARS: [char; 2] = ['a', '🙃']; |
126 | | const CHARS_ULE: [CharULE; 2] = CharULE::from_array(CHARS); |
127 | | assert_eq!( |
128 | | CharULE::as_byte_slice(&CHARS_ULE), |
129 | | &[0x61, 0x00, 0x00, 0x43, 0xF6, 0x01] |
130 | | ); |
131 | | } |
132 | | |
133 | | #[test] |
134 | | fn test_from_array_zst() { |
135 | | const CHARS: [char; 0] = []; |
136 | | const CHARS_ULE: [CharULE; 0] = CharULE::from_array(CHARS); |
137 | | let bytes = CharULE::as_byte_slice(&CHARS_ULE); |
138 | | let empty: &[u8] = &[]; |
139 | | assert_eq!(bytes, empty); |
140 | | } |
141 | | |
142 | | #[test] |
143 | | fn test_parse() { |
144 | | // 1-byte, 2-byte, 3-byte, and two 4-byte character in UTF-8 (not as relevant in UTF-32) |
145 | | let chars = ['w', 'ω', '文', '𑄃', '🙃']; |
146 | | let char_ules: Vec<CharULE> = chars.iter().copied().map(char::to_unaligned).collect(); |
147 | | let char_bytes: &[u8] = CharULE::as_byte_slice(&char_ules); |
148 | | |
149 | | // Check parsing |
150 | | let parsed_ules: &[CharULE] = CharULE::parse_byte_slice(char_bytes).unwrap(); |
151 | | assert_eq!(char_ules, parsed_ules); |
152 | | let parsed_chars: Vec<char> = parsed_ules |
153 | | .iter() |
154 | | .copied() |
155 | | .map(char::from_unaligned) |
156 | | .collect(); |
157 | | assert_eq!(&chars, parsed_chars.as_slice()); |
158 | | |
159 | | // Compare to golden expected data |
160 | | assert_eq!( |
161 | | &[119, 0, 0, 201, 3, 0, 135, 101, 0, 3, 17, 1, 67, 246, 1], |
162 | | char_bytes |
163 | | ); |
164 | | } |
165 | | |
166 | | #[test] |
167 | | fn test_failures() { |
168 | | // 119 and 120 are valid, but not 0xD800 (high surrogate) |
169 | | let u32s = [119, 0xD800, 120]; |
170 | | let u32_ules: Vec<RawBytesULE<4>> = u32s |
171 | | .iter() |
172 | | .copied() |
173 | | .map(<u32 as AsULE>::to_unaligned) |
174 | | .collect(); |
175 | | let u32_bytes: &[u8] = RawBytesULE::<4>::as_byte_slice(&u32_ules); |
176 | | let parsed_ules_result = CharULE::parse_byte_slice(u32_bytes); |
177 | | assert!(parsed_ules_result.is_err()); |
178 | | |
179 | | // 0x20FFFF is out of range for a char |
180 | | let u32s = [0x20FFFF]; |
181 | | let u32_ules: Vec<RawBytesULE<4>> = u32s |
182 | | .iter() |
183 | | .copied() |
184 | | .map(<u32 as AsULE>::to_unaligned) |
185 | | .collect(); |
186 | | let u32_bytes: &[u8] = RawBytesULE::<4>::as_byte_slice(&u32_ules); |
187 | | let parsed_ules_result = CharULE::parse_byte_slice(u32_bytes); |
188 | | assert!(parsed_ules_result.is_err()); |
189 | | } |
190 | | } |