/rust/registry/src/index.crates.io-1949cf8c6b5b557f/potential_utf-0.1.3/src/ustr.rs
Line | Count | Source |
1 | | // This file is part of ICU4X. For terms of use, please see the file |
2 | | // called LICENSE at the top level of the ICU4X source tree |
3 | | // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). |
4 | | |
5 | | #[cfg(feature = "alloc")] |
6 | | use alloc::boxed::Box; |
7 | | use core::cmp::Ordering; |
8 | | use core::fmt; |
9 | | use core::ops::Deref; |
10 | | |
11 | | /// A byte slice that is expected to be a UTF-8 string but does not enforce that invariant. |
12 | | /// |
13 | | /// Use this type instead of `str` if you don't need to enforce UTF-8 during deserialization. For |
14 | | /// example, strings that are keys of a map don't need to ever be reified as `str`s. |
15 | | /// |
16 | | /// [`PotentialUtf8`] derefs to `[u8]`. To obtain a `str`, use [`Self::try_as_str()`]. |
17 | | /// |
18 | | /// The main advantage of this type over `[u8]` is that it serializes as a string in |
19 | | /// human-readable formats like JSON. |
20 | | /// |
21 | | /// # Examples |
22 | | /// |
23 | | /// Using an [`PotentialUtf8`] as the key of a [`ZeroMap`]: |
24 | | /// |
25 | | /// ``` |
26 | | /// use potential_utf::PotentialUtf8; |
27 | | /// use zerovec::ZeroMap; |
28 | | /// |
29 | | /// // This map is cheap to deserialize, as we don't need to perform UTF-8 validation. |
30 | | /// let map: ZeroMap<PotentialUtf8, u8> = [ |
31 | | /// (PotentialUtf8::from_bytes(b"abc"), 11), |
32 | | /// (PotentialUtf8::from_bytes(b"def"), 22), |
33 | | /// (PotentialUtf8::from_bytes(b"ghi"), 33), |
34 | | /// ] |
35 | | /// .into_iter() |
36 | | /// .collect(); |
37 | | /// |
38 | | /// let key = "abc"; |
39 | | /// let value = map.get_copied(PotentialUtf8::from_str(key)); |
40 | | /// assert_eq!(Some(11), value); |
41 | | /// ``` |
42 | | /// |
43 | | /// [`ZeroMap`]: zerovec::ZeroMap |
44 | | #[repr(transparent)] |
45 | | #[derive(PartialEq, Eq, PartialOrd, Ord)] |
46 | | #[allow(clippy::exhaustive_structs)] // transparent newtype |
47 | | pub struct PotentialUtf8(pub [u8]); |
48 | | |
49 | | impl fmt::Debug for PotentialUtf8 { |
50 | 0 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { |
51 | | // Debug as a string if possible |
52 | 0 | match self.try_as_str() { |
53 | 0 | Ok(s) => fmt::Debug::fmt(s, f), |
54 | 0 | Err(_) => fmt::Debug::fmt(&self.0, f), |
55 | | } |
56 | 0 | } |
57 | | } |
58 | | |
59 | | impl PotentialUtf8 { |
60 | | /// Create a [`PotentialUtf8`] from a byte slice. |
61 | | #[inline] |
62 | 0 | pub const fn from_bytes(other: &[u8]) -> &Self { |
63 | | // Safety: PotentialUtf8 is transparent over [u8] |
64 | 0 | unsafe { core::mem::transmute(other) } |
65 | 0 | } |
66 | | |
67 | | /// Create a [`PotentialUtf8`] from a string slice. |
68 | | #[inline] |
69 | 0 | pub const fn from_str(s: &str) -> &Self { |
70 | 0 | Self::from_bytes(s.as_bytes()) |
71 | 0 | } |
72 | | |
73 | | /// Create a [`PotentialUtf8`] from boxed bytes. |
74 | | #[inline] |
75 | | #[cfg(feature = "alloc")] |
76 | | pub fn from_boxed_bytes(other: Box<[u8]>) -> Box<Self> { |
77 | | // Safety: PotentialUtf8 is transparent over [u8] |
78 | | unsafe { core::mem::transmute(other) } |
79 | | } |
80 | | |
81 | | /// Create a [`PotentialUtf8`] from a boxed `str`. |
82 | | #[inline] |
83 | | #[cfg(feature = "alloc")] |
84 | | pub fn from_boxed_str(other: Box<str>) -> Box<Self> { |
85 | | Self::from_boxed_bytes(other.into_boxed_bytes()) |
86 | | } |
87 | | |
88 | | /// Get the bytes from a [`PotentialUtf8]. |
89 | | #[inline] |
90 | 0 | pub const fn as_bytes(&self) -> &[u8] { |
91 | 0 | &self.0 |
92 | 0 | } |
93 | | |
94 | | /// Attempt to convert a [`PotentialUtf8`] to a `str`. |
95 | | /// |
96 | | /// # Examples |
97 | | /// |
98 | | /// ``` |
99 | | /// use potential_utf::PotentialUtf8; |
100 | | /// |
101 | | /// static A: &PotentialUtf8 = PotentialUtf8::from_bytes(b"abc"); |
102 | | /// |
103 | | /// let b = A.try_as_str().unwrap(); |
104 | | /// assert_eq!(b, "abc"); |
105 | | /// ``` |
106 | | // Note: this is const starting in 1.63 |
107 | | #[inline] |
108 | 0 | pub fn try_as_str(&self) -> Result<&str, core::str::Utf8Error> { |
109 | 0 | core::str::from_utf8(&self.0) |
110 | 0 | } |
111 | | } |
112 | | |
113 | | impl<'a> From<&'a str> for &'a PotentialUtf8 { |
114 | | #[inline] |
115 | 0 | fn from(other: &'a str) -> Self { |
116 | 0 | PotentialUtf8::from_str(other) |
117 | 0 | } |
118 | | } |
119 | | |
120 | | impl PartialEq<str> for PotentialUtf8 { |
121 | 0 | fn eq(&self, other: &str) -> bool { |
122 | 0 | self.eq(Self::from_str(other)) |
123 | 0 | } |
124 | | } |
125 | | |
126 | | impl PartialOrd<str> for PotentialUtf8 { |
127 | 0 | fn partial_cmp(&self, other: &str) -> Option<Ordering> { |
128 | 0 | self.partial_cmp(Self::from_str(other)) |
129 | 0 | } |
130 | | } |
131 | | |
132 | | impl PartialEq<PotentialUtf8> for str { |
133 | 0 | fn eq(&self, other: &PotentialUtf8) -> bool { |
134 | 0 | PotentialUtf8::from_str(self).eq(other) |
135 | 0 | } |
136 | | } |
137 | | |
138 | | impl PartialOrd<PotentialUtf8> for str { |
139 | 0 | fn partial_cmp(&self, other: &PotentialUtf8) -> Option<Ordering> { |
140 | 0 | PotentialUtf8::from_str(self).partial_cmp(other) |
141 | 0 | } |
142 | | } |
143 | | |
144 | | #[cfg(feature = "alloc")] |
145 | | impl From<Box<str>> for Box<PotentialUtf8> { |
146 | | #[inline] |
147 | | fn from(other: Box<str>) -> Self { |
148 | | PotentialUtf8::from_boxed_str(other) |
149 | | } |
150 | | } |
151 | | |
152 | | impl Deref for PotentialUtf8 { |
153 | | type Target = [u8]; |
154 | 0 | fn deref(&self) -> &Self::Target { |
155 | 0 | &self.0 |
156 | 0 | } |
157 | | } |
158 | | |
159 | | /// This impl requires enabling the optional `zerovec` Cargo feature |
160 | | #[cfg(all(feature = "zerovec", feature = "alloc"))] |
161 | | impl<'a> zerovec::maps::ZeroMapKV<'a> for PotentialUtf8 { |
162 | | type Container = zerovec::VarZeroVec<'a, PotentialUtf8>; |
163 | | type Slice = zerovec::VarZeroSlice<PotentialUtf8>; |
164 | | type GetType = PotentialUtf8; |
165 | | type OwnedType = Box<PotentialUtf8>; |
166 | | } |
167 | | |
168 | | // Safety (based on the safety checklist on the VarULE trait): |
169 | | // 1. PotentialUtf8 does not include any uninitialized or padding bytes (transparent over a ULE) |
170 | | // 2. PotentialUtf8 is aligned to 1 byte (transparent over a ULE) |
171 | | // 3. The impl of `validate_bytes()` returns an error if any byte is not valid (impossible) |
172 | | // 4. The impl of `validate_bytes()` returns an error if the slice cannot be used in its entirety (impossible) |
173 | | // 5. The impl of `from_bytes_unchecked()` returns a reference to the same data (returns the argument directly) |
174 | | // 6. All other methods are defaulted |
175 | | // 7. `[T]` byte equality is semantic equality (transparent over a ULE) |
176 | | /// This impl requires enabling the optional `zerovec` Cargo feature |
177 | | #[cfg(feature = "zerovec")] |
178 | | unsafe impl zerovec::ule::VarULE for PotentialUtf8 { |
179 | | #[inline] |
180 | 0 | fn validate_bytes(_: &[u8]) -> Result<(), zerovec::ule::UleError> { |
181 | 0 | Ok(()) |
182 | 0 | } |
183 | | #[inline] |
184 | 0 | unsafe fn from_bytes_unchecked(bytes: &[u8]) -> &Self { |
185 | 0 | PotentialUtf8::from_bytes(bytes) |
186 | 0 | } |
187 | | } |
188 | | |
189 | | /// This impl requires enabling the optional `serde` Cargo feature |
190 | | #[cfg(feature = "serde")] |
191 | | impl serde::Serialize for PotentialUtf8 { |
192 | | fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error> |
193 | | where |
194 | | S: serde::Serializer, |
195 | | { |
196 | | use serde::ser::Error; |
197 | | let s = self |
198 | | .try_as_str() |
199 | | .map_err(|_| S::Error::custom("invalid UTF-8 in PotentialUtf8"))?; |
200 | | if serializer.is_human_readable() { |
201 | | serializer.serialize_str(s) |
202 | | } else { |
203 | | serializer.serialize_bytes(s.as_bytes()) |
204 | | } |
205 | | } |
206 | | } |
207 | | |
208 | | /// This impl requires enabling the optional `serde` Cargo feature |
209 | | #[cfg(all(feature = "serde", feature = "alloc"))] |
210 | | impl<'de> serde::Deserialize<'de> for Box<PotentialUtf8> { |
211 | | fn deserialize<D>(deserializer: D) -> Result<Self, D::Error> |
212 | | where |
213 | | D: serde::Deserializer<'de>, |
214 | | { |
215 | | if deserializer.is_human_readable() { |
216 | | let boxed_str = Box::<str>::deserialize(deserializer)?; |
217 | | Ok(PotentialUtf8::from_boxed_str(boxed_str)) |
218 | | } else { |
219 | | let boxed_bytes = Box::<[u8]>::deserialize(deserializer)?; |
220 | | Ok(PotentialUtf8::from_boxed_bytes(boxed_bytes)) |
221 | | } |
222 | | } |
223 | | } |
224 | | |
225 | | /// This impl requires enabling the optional `serde` Cargo feature |
226 | | #[cfg(feature = "serde")] |
227 | | impl<'de, 'a> serde::Deserialize<'de> for &'a PotentialUtf8 |
228 | | where |
229 | | 'de: 'a, |
230 | | { |
231 | | fn deserialize<D>(deserializer: D) -> Result<Self, D::Error> |
232 | | where |
233 | | D: serde::Deserializer<'de>, |
234 | | { |
235 | | if deserializer.is_human_readable() { |
236 | | let s = <&str>::deserialize(deserializer)?; |
237 | | Ok(PotentialUtf8::from_str(s)) |
238 | | } else { |
239 | | let bytes = <&[u8]>::deserialize(deserializer)?; |
240 | | Ok(PotentialUtf8::from_bytes(bytes)) |
241 | | } |
242 | | } |
243 | | } |
244 | | |
245 | | #[repr(transparent)] |
246 | | #[derive(PartialEq, Eq, PartialOrd, Ord)] |
247 | | #[allow(clippy::exhaustive_structs)] // transparent newtype |
248 | | pub struct PotentialUtf16(pub [u16]); |
249 | | |
250 | | impl fmt::Debug for PotentialUtf16 { |
251 | 0 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { |
252 | | // Debug as a string if possible |
253 | 0 | for c in char::decode_utf16(self.0.iter().copied()) { |
254 | 0 | match c { |
255 | 0 | Ok(c) => write!(f, "{c}")?, |
256 | 0 | Err(e) => write!(f, "\\0x{:x}", e.unpaired_surrogate())?, |
257 | | } |
258 | | } |
259 | 0 | Ok(()) |
260 | 0 | } |
261 | | } |
262 | | |
263 | | impl PotentialUtf16 { |
264 | | /// Create a [`PotentialUtf16`] from a u16 slice. |
265 | | #[inline] |
266 | 0 | pub const fn from_slice(other: &[u16]) -> &Self { |
267 | | // Safety: PotentialUtf16 is transparent over [u16] |
268 | 0 | unsafe { core::mem::transmute(other) } |
269 | 0 | } |
270 | | |
271 | 0 | pub fn chars(&self) -> impl Iterator<Item = char> + '_ { |
272 | 0 | char::decode_utf16(self.0.iter().copied()).map(|c| c.unwrap_or(char::REPLACEMENT_CHARACTER)) |
273 | 0 | } |
274 | | } |