/rust/registry/src/index.crates.io-1949cf8c6b5b557f/potential_utf-0.1.3/src/ustr.rs
Line  | Count  | Source  | 
1  |  | // This file is part of ICU4X. For terms of use, please see the file  | 
2  |  | // called LICENSE at the top level of the ICU4X source tree  | 
3  |  | // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).  | 
4  |  |  | 
5  |  | #[cfg(feature = "alloc")]  | 
6  |  | use alloc::boxed::Box;  | 
7  |  | use core::cmp::Ordering;  | 
8  |  | use core::fmt;  | 
9  |  | use core::ops::Deref;  | 
10  |  |  | 
11  |  | /// A byte slice that is expected to be a UTF-8 string but does not enforce that invariant.  | 
12  |  | ///  | 
13  |  | /// Use this type instead of `str` if you don't need to enforce UTF-8 during deserialization. For  | 
14  |  | /// example, strings that are keys of a map don't need to ever be reified as `str`s.  | 
15  |  | ///  | 
16  |  | /// [`PotentialUtf8`] derefs to `[u8]`. To obtain a `str`, use [`Self::try_as_str()`].  | 
17  |  | ///  | 
18  |  | /// The main advantage of this type over `[u8]` is that it serializes as a string in  | 
19  |  | /// human-readable formats like JSON.  | 
20  |  | ///  | 
21  |  | /// # Examples  | 
22  |  | ///  | 
23  |  | /// Using an [`PotentialUtf8`] as the key of a [`ZeroMap`]:  | 
24  |  | ///  | 
25  |  | /// ```  | 
26  |  | /// use potential_utf::PotentialUtf8;  | 
27  |  | /// use zerovec::ZeroMap;  | 
28  |  | ///  | 
29  |  | /// // This map is cheap to deserialize, as we don't need to perform UTF-8 validation.  | 
30  |  | /// let map: ZeroMap<PotentialUtf8, u8> = [  | 
31  |  | ///     (PotentialUtf8::from_bytes(b"abc"), 11),  | 
32  |  | ///     (PotentialUtf8::from_bytes(b"def"), 22),  | 
33  |  | ///     (PotentialUtf8::from_bytes(b"ghi"), 33),  | 
34  |  | /// ]  | 
35  |  | /// .into_iter()  | 
36  |  | /// .collect();  | 
37  |  | ///  | 
38  |  | /// let key = "abc";  | 
39  |  | /// let value = map.get_copied(PotentialUtf8::from_str(key));  | 
40  |  | /// assert_eq!(Some(11), value);  | 
41  |  | /// ```  | 
42  |  | ///  | 
43  |  | /// [`ZeroMap`]: zerovec::ZeroMap  | 
44  |  | #[repr(transparent)]  | 
45  |  | #[derive(PartialEq, Eq, PartialOrd, Ord)]  | 
46  |  | #[allow(clippy::exhaustive_structs)] // transparent newtype  | 
47  |  | pub struct PotentialUtf8(pub [u8]);  | 
48  |  |  | 
49  |  | impl fmt::Debug for PotentialUtf8 { | 
50  | 0  |     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { | 
51  |  |         // Debug as a string if possible  | 
52  | 0  |         match self.try_as_str() { | 
53  | 0  |             Ok(s) => fmt::Debug::fmt(s, f),  | 
54  | 0  |             Err(_) => fmt::Debug::fmt(&self.0, f),  | 
55  |  |         }  | 
56  | 0  |     }  | 
57  |  | }  | 
58  |  |  | 
59  |  | impl PotentialUtf8 { | 
60  |  |     /// Create a [`PotentialUtf8`] from a byte slice.  | 
61  |  |     #[inline]  | 
62  | 0  |     pub const fn from_bytes(other: &[u8]) -> &Self { | 
63  |  |         // Safety: PotentialUtf8 is transparent over [u8]  | 
64  | 0  |         unsafe { core::mem::transmute(other) } | 
65  | 0  |     }  | 
66  |  |  | 
67  |  |     /// Create a [`PotentialUtf8`] from a string slice.  | 
68  |  |     #[inline]  | 
69  | 0  |     pub const fn from_str(s: &str) -> &Self { | 
70  | 0  |         Self::from_bytes(s.as_bytes())  | 
71  | 0  |     }  | 
72  |  |  | 
73  |  |     /// Create a [`PotentialUtf8`] from boxed bytes.  | 
74  |  |     #[inline]  | 
75  |  |     #[cfg(feature = "alloc")]  | 
76  |  |     pub fn from_boxed_bytes(other: Box<[u8]>) -> Box<Self> { | 
77  |  |         // Safety: PotentialUtf8 is transparent over [u8]  | 
78  |  |         unsafe { core::mem::transmute(other) } | 
79  |  |     }  | 
80  |  |  | 
81  |  |     /// Create a [`PotentialUtf8`] from a boxed `str`.  | 
82  |  |     #[inline]  | 
83  |  |     #[cfg(feature = "alloc")]  | 
84  |  |     pub fn from_boxed_str(other: Box<str>) -> Box<Self> { | 
85  |  |         Self::from_boxed_bytes(other.into_boxed_bytes())  | 
86  |  |     }  | 
87  |  |  | 
88  |  |     /// Get the bytes from a [`PotentialUtf8].  | 
89  |  |     #[inline]  | 
90  | 0  |     pub const fn as_bytes(&self) -> &[u8] { | 
91  | 0  |         &self.0  | 
92  | 0  |     }  | 
93  |  |  | 
94  |  |     /// Attempt to convert a [`PotentialUtf8`] to a `str`.  | 
95  |  |     ///  | 
96  |  |     /// # Examples  | 
97  |  |     ///  | 
98  |  |     /// ```  | 
99  |  |     /// use potential_utf::PotentialUtf8;  | 
100  |  |     ///  | 
101  |  |     /// static A: &PotentialUtf8 = PotentialUtf8::from_bytes(b"abc");  | 
102  |  |     ///  | 
103  |  |     /// let b = A.try_as_str().unwrap();  | 
104  |  |     /// assert_eq!(b, "abc");  | 
105  |  |     /// ```  | 
106  |  |     // Note: this is const starting in 1.63  | 
107  |  |     #[inline]  | 
108  | 0  |     pub fn try_as_str(&self) -> Result<&str, core::str::Utf8Error> { | 
109  | 0  |         core::str::from_utf8(&self.0)  | 
110  | 0  |     }  | 
111  |  | }  | 
112  |  |  | 
113  |  | impl<'a> From<&'a str> for &'a PotentialUtf8 { | 
114  |  |     #[inline]  | 
115  | 0  |     fn from(other: &'a str) -> Self { | 
116  | 0  |         PotentialUtf8::from_str(other)  | 
117  | 0  |     }  | 
118  |  | }  | 
119  |  |  | 
120  |  | impl PartialEq<str> for PotentialUtf8 { | 
121  | 0  |     fn eq(&self, other: &str) -> bool { | 
122  | 0  |         self.eq(Self::from_str(other))  | 
123  | 0  |     }  | 
124  |  | }  | 
125  |  |  | 
126  |  | impl PartialOrd<str> for PotentialUtf8 { | 
127  | 0  |     fn partial_cmp(&self, other: &str) -> Option<Ordering> { | 
128  | 0  |         self.partial_cmp(Self::from_str(other))  | 
129  | 0  |     }  | 
130  |  | }  | 
131  |  |  | 
132  |  | impl PartialEq<PotentialUtf8> for str { | 
133  | 0  |     fn eq(&self, other: &PotentialUtf8) -> bool { | 
134  | 0  |         PotentialUtf8::from_str(self).eq(other)  | 
135  | 0  |     }  | 
136  |  | }  | 
137  |  |  | 
138  |  | impl PartialOrd<PotentialUtf8> for str { | 
139  | 0  |     fn partial_cmp(&self, other: &PotentialUtf8) -> Option<Ordering> { | 
140  | 0  |         PotentialUtf8::from_str(self).partial_cmp(other)  | 
141  | 0  |     }  | 
142  |  | }  | 
143  |  |  | 
144  |  | #[cfg(feature = "alloc")]  | 
145  |  | impl From<Box<str>> for Box<PotentialUtf8> { | 
146  |  |     #[inline]  | 
147  |  |     fn from(other: Box<str>) -> Self { | 
148  |  |         PotentialUtf8::from_boxed_str(other)  | 
149  |  |     }  | 
150  |  | }  | 
151  |  |  | 
152  |  | impl Deref for PotentialUtf8 { | 
153  |  |     type Target = [u8];  | 
154  | 0  |     fn deref(&self) -> &Self::Target { | 
155  | 0  |         &self.0  | 
156  | 0  |     }  | 
157  |  | }  | 
158  |  |  | 
159  |  | /// This impl requires enabling the optional `zerovec` Cargo feature  | 
160  |  | #[cfg(all(feature = "zerovec", feature = "alloc"))]  | 
161  |  | impl<'a> zerovec::maps::ZeroMapKV<'a> for PotentialUtf8 { | 
162  |  |     type Container = zerovec::VarZeroVec<'a, PotentialUtf8>;  | 
163  |  |     type Slice = zerovec::VarZeroSlice<PotentialUtf8>;  | 
164  |  |     type GetType = PotentialUtf8;  | 
165  |  |     type OwnedType = Box<PotentialUtf8>;  | 
166  |  | }  | 
167  |  |  | 
168  |  | // Safety (based on the safety checklist on the VarULE trait):  | 
169  |  | //  1. PotentialUtf8 does not include any uninitialized or padding bytes (transparent over a ULE)  | 
170  |  | //  2. PotentialUtf8 is aligned to 1 byte (transparent over a ULE)  | 
171  |  | //  3. The impl of `validate_bytes()` returns an error if any byte is not valid (impossible)  | 
172  |  | //  4. The impl of `validate_bytes()` returns an error if the slice cannot be used in its entirety (impossible)  | 
173  |  | //  5. The impl of `from_bytes_unchecked()` returns a reference to the same data (returns the argument directly)  | 
174  |  | //  6. All other methods are defaulted  | 
175  |  | //  7. `[T]` byte equality is semantic equality (transparent over a ULE)  | 
176  |  | /// This impl requires enabling the optional `zerovec` Cargo feature  | 
177  |  | #[cfg(feature = "zerovec")]  | 
178  |  | unsafe impl zerovec::ule::VarULE for PotentialUtf8 { | 
179  |  |     #[inline]  | 
180  | 0  |     fn validate_bytes(_: &[u8]) -> Result<(), zerovec::ule::UleError> { | 
181  | 0  |         Ok(())  | 
182  | 0  |     }  | 
183  |  |     #[inline]  | 
184  | 0  |     unsafe fn from_bytes_unchecked(bytes: &[u8]) -> &Self { | 
185  | 0  |         PotentialUtf8::from_bytes(bytes)  | 
186  | 0  |     }  | 
187  |  | }  | 
188  |  |  | 
189  |  | /// This impl requires enabling the optional `serde` Cargo feature  | 
190  |  | #[cfg(feature = "serde")]  | 
191  |  | impl serde::Serialize for PotentialUtf8 { | 
192  |  |     fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>  | 
193  |  |     where  | 
194  |  |         S: serde::Serializer,  | 
195  |  |     { | 
196  |  |         use serde::ser::Error;  | 
197  |  |         let s = self  | 
198  |  |             .try_as_str()  | 
199  |  |             .map_err(|_| S::Error::custom("invalid UTF-8 in PotentialUtf8"))?; | 
200  |  |         if serializer.is_human_readable() { | 
201  |  |             serializer.serialize_str(s)  | 
202  |  |         } else { | 
203  |  |             serializer.serialize_bytes(s.as_bytes())  | 
204  |  |         }  | 
205  |  |     }  | 
206  |  | }  | 
207  |  |  | 
208  |  | /// This impl requires enabling the optional `serde` Cargo feature  | 
209  |  | #[cfg(all(feature = "serde", feature = "alloc"))]  | 
210  |  | impl<'de> serde::Deserialize<'de> for Box<PotentialUtf8> { | 
211  |  |     fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>  | 
212  |  |     where  | 
213  |  |         D: serde::Deserializer<'de>,  | 
214  |  |     { | 
215  |  |         if deserializer.is_human_readable() { | 
216  |  |             let boxed_str = Box::<str>::deserialize(deserializer)?;  | 
217  |  |             Ok(PotentialUtf8::from_boxed_str(boxed_str))  | 
218  |  |         } else { | 
219  |  |             let boxed_bytes = Box::<[u8]>::deserialize(deserializer)?;  | 
220  |  |             Ok(PotentialUtf8::from_boxed_bytes(boxed_bytes))  | 
221  |  |         }  | 
222  |  |     }  | 
223  |  | }  | 
224  |  |  | 
225  |  | /// This impl requires enabling the optional `serde` Cargo feature  | 
226  |  | #[cfg(feature = "serde")]  | 
227  |  | impl<'de, 'a> serde::Deserialize<'de> for &'a PotentialUtf8  | 
228  |  | where  | 
229  |  |     'de: 'a,  | 
230  |  | { | 
231  |  |     fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>  | 
232  |  |     where  | 
233  |  |         D: serde::Deserializer<'de>,  | 
234  |  |     { | 
235  |  |         if deserializer.is_human_readable() { | 
236  |  |             let s = <&str>::deserialize(deserializer)?;  | 
237  |  |             Ok(PotentialUtf8::from_str(s))  | 
238  |  |         } else { | 
239  |  |             let bytes = <&[u8]>::deserialize(deserializer)?;  | 
240  |  |             Ok(PotentialUtf8::from_bytes(bytes))  | 
241  |  |         }  | 
242  |  |     }  | 
243  |  | }  | 
244  |  |  | 
245  |  | #[repr(transparent)]  | 
246  |  | #[derive(PartialEq, Eq, PartialOrd, Ord)]  | 
247  |  | #[allow(clippy::exhaustive_structs)] // transparent newtype  | 
248  |  | pub struct PotentialUtf16(pub [u16]);  | 
249  |  |  | 
250  |  | impl fmt::Debug for PotentialUtf16 { | 
251  | 0  |     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { | 
252  |  |         // Debug as a string if possible  | 
253  | 0  |         for c in char::decode_utf16(self.0.iter().copied()) { | 
254  | 0  |             match c { | 
255  | 0  |                 Ok(c) => write!(f, "{c}")?, | 
256  | 0  |                 Err(e) => write!(f, "\\0x{:x}", e.unpaired_surrogate())?, | 
257  |  |             }  | 
258  |  |         }  | 
259  | 0  |         Ok(())  | 
260  | 0  |     }  | 
261  |  | }  | 
262  |  |  | 
263  |  | impl PotentialUtf16 { | 
264  |  |     /// Create a [`PotentialUtf16`] from a u16 slice.  | 
265  |  |     #[inline]  | 
266  | 0  |     pub const fn from_slice(other: &[u16]) -> &Self { | 
267  |  |         // Safety: PotentialUtf16 is transparent over [u16]  | 
268  | 0  |         unsafe { core::mem::transmute(other) } | 
269  | 0  |     }  | 
270  |  |  | 
271  | 0  |     pub fn chars(&self) -> impl Iterator<Item = char> + '_ { | 
272  | 0  |         char::decode_utf16(self.0.iter().copied()).map(|c| c.unwrap_or(char::REPLACEMENT_CHARACTER))  | 
273  | 0  |     }  | 
274  |  | }  |