/rust/registry/src/index.crates.io-1949cf8c6b5b557f/potential_utf-0.1.4/src/ustr.rs
Line | Count | Source |
1 | | // This file is part of ICU4X. For terms of use, please see the file |
2 | | // called LICENSE at the top level of the ICU4X source tree |
3 | | // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). |
4 | | |
5 | | #[cfg(feature = "alloc")] |
6 | | use alloc::boxed::Box; |
7 | | use core::cmp::Ordering; |
8 | | use core::fmt; |
9 | | use core::ops::Deref; |
10 | | |
11 | | /// A byte slice that is expected to be a UTF-8 string but does not enforce that invariant. |
12 | | /// |
13 | | /// Use this type instead of `str` if you don't need to enforce UTF-8 during deserialization. For |
14 | | /// example, strings that are keys of a map don't need to ever be reified as `str`s. |
15 | | /// |
16 | | /// [`PotentialUtf8`] derefs to `[u8]`. To obtain a `str`, use [`Self::try_as_str()`]. |
17 | | /// |
18 | | /// The main advantage of this type over `[u8]` is that it serializes as a string in |
19 | | /// human-readable formats like JSON. |
20 | | /// |
21 | | /// # Examples |
22 | | /// |
23 | | /// Using an [`PotentialUtf8`] as the key of a [`ZeroMap`]: |
24 | | /// |
25 | | /// ``` |
26 | | /// use potential_utf::PotentialUtf8; |
27 | | /// use zerovec::ZeroMap; |
28 | | /// |
29 | | /// // This map is cheap to deserialize, as we don't need to perform UTF-8 validation. |
30 | | /// let map: ZeroMap<PotentialUtf8, u8> = [ |
31 | | /// (PotentialUtf8::from_bytes(b"abc"), 11), |
32 | | /// (PotentialUtf8::from_bytes(b"def"), 22), |
33 | | /// (PotentialUtf8::from_bytes(b"ghi"), 33), |
34 | | /// ] |
35 | | /// .into_iter() |
36 | | /// .collect(); |
37 | | /// |
38 | | /// let key = "abc"; |
39 | | /// let value = map.get_copied(PotentialUtf8::from_str(key)); |
40 | | /// assert_eq!(Some(11), value); |
41 | | /// ``` |
42 | | /// |
43 | | /// [`ZeroMap`]: zerovec::ZeroMap |
44 | | #[repr(transparent)] |
45 | | #[derive(PartialEq, Eq, PartialOrd, Ord)] |
46 | | #[allow(clippy::exhaustive_structs)] // transparent newtype |
47 | | pub struct PotentialUtf8(pub [u8]); |
48 | | |
49 | | impl fmt::Debug for PotentialUtf8 { |
50 | 0 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { |
51 | | // Debug as a string if possible |
52 | 0 | match self.try_as_str() { |
53 | 0 | Ok(s) => fmt::Debug::fmt(s, f), |
54 | 0 | Err(_) => fmt::Debug::fmt(&self.0, f), |
55 | | } |
56 | 0 | } |
57 | | } |
58 | | |
59 | | impl PotentialUtf8 { |
60 | | /// Create a [`PotentialUtf8`] from a byte slice. |
61 | | #[inline] |
62 | 0 | pub const fn from_bytes(other: &[u8]) -> &Self { |
63 | | // Safety: PotentialUtf8 is transparent over [u8] |
64 | 0 | unsafe { core::mem::transmute(other) } |
65 | 0 | } |
66 | | |
67 | | /// Create a [`PotentialUtf8`] from a string slice. |
68 | | #[inline] |
69 | 0 | pub const fn from_str(s: &str) -> &Self { |
70 | 0 | Self::from_bytes(s.as_bytes()) |
71 | 0 | } |
72 | | |
73 | | /// Create a [`PotentialUtf8`] from boxed bytes. |
74 | | /// |
75 | | /// ✨ *Enabled with the `alloc` Cargo feature.* |
76 | | #[inline] |
77 | | #[cfg(feature = "alloc")] |
78 | | pub fn from_boxed_bytes(other: Box<[u8]>) -> Box<Self> { |
79 | | // Safety: PotentialUtf8 is transparent over [u8] |
80 | | unsafe { core::mem::transmute(other) } |
81 | | } |
82 | | |
83 | | /// Create a [`PotentialUtf8`] from a boxed `str`. |
84 | | /// |
85 | | /// ✨ *Enabled with the `alloc` Cargo feature.* |
86 | | #[inline] |
87 | | #[cfg(feature = "alloc")] |
88 | | pub fn from_boxed_str(other: Box<str>) -> Box<Self> { |
89 | | Self::from_boxed_bytes(other.into_boxed_bytes()) |
90 | | } |
91 | | |
92 | | /// Get the bytes from a [`PotentialUtf8]. |
93 | | #[inline] |
94 | 0 | pub const fn as_bytes(&self) -> &[u8] { |
95 | 0 | &self.0 |
96 | 0 | } |
97 | | |
98 | | /// Attempt to convert a [`PotentialUtf8`] to a `str`. |
99 | | /// |
100 | | /// # Examples |
101 | | /// |
102 | | /// ``` |
103 | | /// use potential_utf::PotentialUtf8; |
104 | | /// |
105 | | /// static A: &PotentialUtf8 = PotentialUtf8::from_bytes(b"abc"); |
106 | | /// |
107 | | /// let b = A.try_as_str().unwrap(); |
108 | | /// assert_eq!(b, "abc"); |
109 | | /// ``` |
110 | | // Note: this is const starting in 1.63 |
111 | | #[inline] |
112 | 0 | pub fn try_as_str(&self) -> Result<&str, core::str::Utf8Error> { |
113 | 0 | core::str::from_utf8(&self.0) |
114 | 0 | } |
115 | | } |
116 | | |
117 | | impl<'a> From<&'a str> for &'a PotentialUtf8 { |
118 | | #[inline] |
119 | 0 | fn from(other: &'a str) -> Self { |
120 | 0 | PotentialUtf8::from_str(other) |
121 | 0 | } |
122 | | } |
123 | | |
124 | | impl PartialEq<str> for PotentialUtf8 { |
125 | 0 | fn eq(&self, other: &str) -> bool { |
126 | 0 | self.eq(Self::from_str(other)) |
127 | 0 | } |
128 | | } |
129 | | |
130 | | impl PartialOrd<str> for PotentialUtf8 { |
131 | 0 | fn partial_cmp(&self, other: &str) -> Option<Ordering> { |
132 | 0 | self.partial_cmp(Self::from_str(other)) |
133 | 0 | } |
134 | | } |
135 | | |
136 | | impl PartialEq<PotentialUtf8> for str { |
137 | 0 | fn eq(&self, other: &PotentialUtf8) -> bool { |
138 | 0 | PotentialUtf8::from_str(self).eq(other) |
139 | 0 | } |
140 | | } |
141 | | |
142 | | impl PartialOrd<PotentialUtf8> for str { |
143 | 0 | fn partial_cmp(&self, other: &PotentialUtf8) -> Option<Ordering> { |
144 | 0 | PotentialUtf8::from_str(self).partial_cmp(other) |
145 | 0 | } |
146 | | } |
147 | | |
148 | | #[cfg(feature = "alloc")] |
149 | | impl From<Box<str>> for Box<PotentialUtf8> { |
150 | | #[inline] |
151 | | fn from(other: Box<str>) -> Self { |
152 | | PotentialUtf8::from_boxed_str(other) |
153 | | } |
154 | | } |
155 | | |
156 | | impl Deref for PotentialUtf8 { |
157 | | type Target = [u8]; |
158 | 0 | fn deref(&self) -> &Self::Target { |
159 | 0 | &self.0 |
160 | 0 | } |
161 | | } |
162 | | |
163 | | /// This impl requires enabling the optional `zerovec` Cargo feature |
164 | | #[cfg(all(feature = "zerovec", feature = "alloc"))] |
165 | | impl<'a> zerovec::maps::ZeroMapKV<'a> for PotentialUtf8 { |
166 | | type Container = zerovec::VarZeroVec<'a, PotentialUtf8>; |
167 | | type Slice = zerovec::VarZeroSlice<PotentialUtf8>; |
168 | | type GetType = PotentialUtf8; |
169 | | type OwnedType = Box<PotentialUtf8>; |
170 | | } |
171 | | |
172 | | // Safety (based on the safety checklist on the VarULE trait): |
173 | | // 1. PotentialUtf8 does not include any uninitialized or padding bytes (transparent over a ULE) |
174 | | // 2. PotentialUtf8 is aligned to 1 byte (transparent over a ULE) |
175 | | // 3. The impl of `validate_bytes()` returns an error if any byte is not valid (impossible) |
176 | | // 4. The impl of `validate_bytes()` returns an error if the slice cannot be used in its entirety (impossible) |
177 | | // 5. The impl of `from_bytes_unchecked()` returns a reference to the same data (returns the argument directly) |
178 | | // 6. All other methods are defaulted |
179 | | // 7. `[T]` byte equality is semantic equality (transparent over a ULE) |
180 | | /// This impl requires enabling the optional `zerovec` Cargo feature |
181 | | #[cfg(feature = "zerovec")] |
182 | | unsafe impl zerovec::ule::VarULE for PotentialUtf8 { |
183 | | #[inline] |
184 | 0 | fn validate_bytes(_: &[u8]) -> Result<(), zerovec::ule::UleError> { |
185 | 0 | Ok(()) |
186 | 0 | } |
187 | | #[inline] |
188 | 0 | unsafe fn from_bytes_unchecked(bytes: &[u8]) -> &Self { |
189 | 0 | PotentialUtf8::from_bytes(bytes) |
190 | 0 | } |
191 | | } |
192 | | |
193 | | /// This impl requires enabling the optional `serde` Cargo feature |
194 | | #[cfg(feature = "serde")] |
195 | | impl serde_core::Serialize for PotentialUtf8 { |
196 | | fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error> |
197 | | where |
198 | | S: serde_core::Serializer, |
199 | | { |
200 | | use serde_core::ser::Error; |
201 | | let s = self |
202 | | .try_as_str() |
203 | | .map_err(|_| S::Error::custom("invalid UTF-8 in PotentialUtf8"))?; |
204 | | if serializer.is_human_readable() { |
205 | | serializer.serialize_str(s) |
206 | | } else { |
207 | | serializer.serialize_bytes(s.as_bytes()) |
208 | | } |
209 | | } |
210 | | } |
211 | | |
212 | | /// This impl requires enabling the optional `serde` Cargo feature |
213 | | #[cfg(all(feature = "serde", feature = "alloc"))] |
214 | | impl<'de> serde_core::Deserialize<'de> for Box<PotentialUtf8> { |
215 | | fn deserialize<D>(deserializer: D) -> Result<Self, D::Error> |
216 | | where |
217 | | D: serde_core::Deserializer<'de>, |
218 | | { |
219 | | if deserializer.is_human_readable() { |
220 | | let boxed_str = Box::<str>::deserialize(deserializer)?; |
221 | | Ok(PotentialUtf8::from_boxed_str(boxed_str)) |
222 | | } else { |
223 | | let boxed_bytes = Box::<[u8]>::deserialize(deserializer)?; |
224 | | Ok(PotentialUtf8::from_boxed_bytes(boxed_bytes)) |
225 | | } |
226 | | } |
227 | | } |
228 | | |
229 | | /// This impl requires enabling the optional `serde` Cargo feature |
230 | | #[cfg(feature = "serde")] |
231 | | impl<'de, 'a> serde_core::Deserialize<'de> for &'a PotentialUtf8 |
232 | | where |
233 | | 'de: 'a, |
234 | | { |
235 | | fn deserialize<D>(deserializer: D) -> Result<Self, D::Error> |
236 | | where |
237 | | D: serde_core::Deserializer<'de>, |
238 | | { |
239 | | if deserializer.is_human_readable() { |
240 | | let s = <&str>::deserialize(deserializer)?; |
241 | | Ok(PotentialUtf8::from_str(s)) |
242 | | } else { |
243 | | let bytes = <&[u8]>::deserialize(deserializer)?; |
244 | | Ok(PotentialUtf8::from_bytes(bytes)) |
245 | | } |
246 | | } |
247 | | } |
248 | | |
249 | | #[repr(transparent)] |
250 | | #[derive(PartialEq, Eq, PartialOrd, Ord)] |
251 | | #[allow(clippy::exhaustive_structs)] // transparent newtype |
252 | | pub struct PotentialUtf16(pub [u16]); |
253 | | |
254 | | impl fmt::Debug for PotentialUtf16 { |
255 | 0 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { |
256 | | // Debug as a string if possible |
257 | 0 | for c in char::decode_utf16(self.0.iter().copied()) { |
258 | 0 | match c { |
259 | 0 | Ok(c) => write!(f, "{c}")?, |
260 | 0 | Err(e) => write!(f, "\\0x{:x}", e.unpaired_surrogate())?, |
261 | | } |
262 | | } |
263 | 0 | Ok(()) |
264 | 0 | } |
265 | | } |
266 | | |
267 | | impl PotentialUtf16 { |
268 | | /// Create a [`PotentialUtf16`] from a u16 slice. |
269 | | #[inline] |
270 | 0 | pub const fn from_slice(other: &[u16]) -> &Self { |
271 | | // Safety: PotentialUtf16 is transparent over [u16] |
272 | 0 | unsafe { core::mem::transmute(other) } |
273 | 0 | } |
274 | | |
275 | 0 | pub fn chars(&self) -> impl Iterator<Item = char> + '_ { |
276 | 0 | char::decode_utf16(self.0.iter().copied()).map(|c| c.unwrap_or(char::REPLACEMENT_CHARACTER)) |
277 | 0 | } |
278 | | } |