/rust/registry/src/index.crates.io-1949cf8c6b5b557f/zerovec-0.11.4/src/cow.rs
Line | Count | Source |
1 | | // This file is part of ICU4X. For terms of use, please see the file |
2 | | // called LICENSE at the top level of the ICU4X source tree |
3 | | // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). |
4 | | |
5 | | use crate::ule::{EncodeAsVarULE, UleError, VarULE}; |
6 | | #[cfg(feature = "alloc")] |
7 | | use alloc::boxed::Box; |
8 | | use core::fmt; |
9 | | use core::marker::PhantomData; |
10 | | #[cfg(feature = "alloc")] |
11 | | use core::mem::ManuallyDrop; |
12 | | use core::ops::Deref; |
13 | | use core::ptr::NonNull; |
14 | | use zerofrom::ZeroFrom; |
15 | | |
16 | | /// Copy-on-write type that efficiently represents [`VarULE`] types as their bitstream representation. |
17 | | /// |
18 | | /// The primary use case for [`VarULE`] types is the ability to store complex variable-length datastructures |
19 | | /// inside variable-length collections like [`crate::VarZeroVec`]. |
20 | | /// |
21 | | /// Underlying this ability is the fact that [`VarULE`] types can be efficiently represented as a flat |
22 | | /// bytestream. |
23 | | /// |
24 | | /// In zero-copy cases, sometimes one wishes to unconditionally use this bytestream representation, for example |
25 | | /// to save stack size. A struct with five `Cow<'a, str>`s is not as stack-efficient as a single `Cow` containing |
26 | | /// the bytestream representation of, say, `Tuple5VarULE<str, str, str, str, str>`. |
27 | | /// |
28 | | /// This type helps in this case: It is logically a `Cow<'a, V>`, with some optimizations, that is guaranteed |
29 | | /// to serialize as a byte stream in machine-readable scenarios. |
30 | | /// |
31 | | /// During human-readable serialization, it will fall back to the serde impls on `V`, which ought to have |
32 | | /// a human-readable variant. |
33 | | pub struct VarZeroCow<'a, V: ?Sized> { |
34 | | /// Safety invariant: Contained slice must be a valid V |
35 | | /// It may or may not have a lifetime valid for 'a, it must be valid for as long as this type is around. |
36 | | raw: RawVarZeroCow, |
37 | | marker1: PhantomData<&'a V>, |
38 | | #[cfg(feature = "alloc")] |
39 | | marker2: PhantomData<Box<V>>, |
40 | | } |
41 | | |
42 | | /// VarZeroCow without the `V` to simulate a dropck eyepatch |
43 | | /// (i.e., prove to rustc that the dtor is not able to observe V or 'a) |
44 | | /// |
45 | | /// This is effectively `Cow<'a, [u8]>`, with the lifetime managed externally |
46 | | struct RawVarZeroCow { |
47 | | /// Pointer to data |
48 | | /// |
49 | | /// # Safety Invariants |
50 | | /// |
51 | | /// 1. This slice must always be valid as a byte slice |
52 | | /// 2. If `owned` is true, this slice can be freed. |
53 | | /// 3. VarZeroCow, the only user of this type, will impose an additional invariant that the buffer is a valid V |
54 | | buf: NonNull<[u8]>, |
55 | | /// The buffer is `Box<[u8]>` if true |
56 | | #[cfg(feature = "alloc")] |
57 | | owned: bool, |
58 | | // Safety: We do not need any PhantomDatas here, since the Drop impl does not observe borrowed data |
59 | | // if there is any. |
60 | | } |
61 | | |
62 | | #[cfg(feature = "alloc")] |
63 | | impl Drop for RawVarZeroCow { |
64 | 0 | fn drop(&mut self) { |
65 | | // Note: this drop impl NEVER observes borrowed data (which may have already been cleaned up by the time the impl is called) |
66 | 0 | if self.owned { |
67 | 0 | unsafe { |
68 | 0 | // Safety: (Invariant 2 on buf) |
69 | 0 | // since owned is true, this is a valid Box<[u8]> and can be cleaned up |
70 | 0 | let _ = Box::<[u8]>::from_raw(self.buf.as_ptr()); |
71 | 0 | } |
72 | 0 | } |
73 | 0 | } |
74 | | } |
75 | | |
76 | | // This is mostly just a `Cow<[u8]>`, safe to implement Send and Sync on |
77 | | unsafe impl Send for RawVarZeroCow {} |
78 | | unsafe impl Sync for RawVarZeroCow {} |
79 | | |
80 | | impl Clone for RawVarZeroCow { |
81 | 0 | fn clone(&self) -> Self { |
82 | | #[cfg(feature = "alloc")] |
83 | 0 | if self.is_owned() { |
84 | | // This clones the box |
85 | 0 | let b: Box<[u8]> = self.as_bytes().into(); |
86 | 0 | let b = ManuallyDrop::new(b); |
87 | 0 | let buf: NonNull<[u8]> = (&**b).into(); |
88 | 0 | return Self { |
89 | 0 | // Invariants upheld: |
90 | 0 | // 1 & 3: The bytes came from `self` so they're a valid value and byte slice |
91 | 0 | // 2: This is owned (we cloned it), so we set owned to true. |
92 | 0 | buf, |
93 | 0 | owned: true, |
94 | 0 | }; |
95 | 0 | } |
96 | | // Unfortunately we can't just use `new_borrowed(self.deref())` since the lifetime is shorter |
97 | 0 | Self { |
98 | 0 | // Invariants upheld: |
99 | 0 | // 1 & 3: The bytes came from `self` so they're a valid value and byte slice |
100 | 0 | // 2: This is borrowed (we're sharing a borrow), so we set owned to false. |
101 | 0 | buf: self.buf, |
102 | 0 | #[cfg(feature = "alloc")] |
103 | 0 | owned: false, |
104 | 0 | } |
105 | 0 | } |
106 | | } |
107 | | |
108 | | impl<'a, V: ?Sized> Clone for VarZeroCow<'a, V> { |
109 | 0 | fn clone(&self) -> Self { |
110 | 0 | let raw = self.raw.clone(); |
111 | | // Invariant upheld: raw came from a valid VarZeroCow, so it |
112 | | // is a valid V |
113 | 0 | unsafe { Self::from_raw(raw) } |
114 | 0 | } |
115 | | } |
116 | | |
117 | | impl<'a, V: VarULE + ?Sized> VarZeroCow<'a, V> { |
118 | | /// Construct from a slice. Errors if the slice doesn't represent a valid `V` |
119 | 0 | pub fn parse_bytes(bytes: &'a [u8]) -> Result<Self, UleError> { |
120 | 0 | let val = V::parse_bytes(bytes)?; |
121 | 0 | Ok(Self::new_borrowed(val)) |
122 | 0 | } |
123 | | |
124 | | /// Construct from an owned slice. Errors if the slice doesn't represent a valid `V` |
125 | | #[cfg(feature = "alloc")] |
126 | 0 | pub fn parse_owned_bytes(bytes: Box<[u8]>) -> Result<Self, UleError> { |
127 | 0 | V::validate_bytes(&bytes)?; |
128 | 0 | let bytes = ManuallyDrop::new(bytes); |
129 | 0 | let buf: NonNull<[u8]> = (&**bytes).into(); |
130 | 0 | let raw = RawVarZeroCow { |
131 | 0 | // Invariants upheld: |
132 | 0 | // 1 & 3: The bytes came from `val` so they're a valid value and byte slice |
133 | 0 | // 2: This is owned, so we set owned to true. |
134 | 0 | buf, |
135 | 0 | owned: true, |
136 | 0 | }; |
137 | 0 | Ok(Self { |
138 | 0 | raw, |
139 | 0 | marker1: PhantomData, |
140 | 0 | #[cfg(feature = "alloc")] |
141 | 0 | marker2: PhantomData, |
142 | 0 | }) |
143 | 0 | } |
144 | | |
145 | | /// Construct from a slice that is known to represent a valid `V` |
146 | | /// |
147 | | /// # Safety |
148 | | /// |
149 | | /// `bytes` must be a valid `V`, i.e. it must successfully pass through |
150 | | /// `V::parse_bytes()` or `V::validate_bytes()`. |
151 | 0 | pub const unsafe fn from_bytes_unchecked(bytes: &'a [u8]) -> Self { |
152 | | unsafe { |
153 | | // Safety: bytes is an &T which is always non-null |
154 | 0 | let buf: NonNull<[u8]> = NonNull::new_unchecked(bytes as *const [u8] as *mut [u8]); |
155 | 0 | let raw = RawVarZeroCow { |
156 | 0 | // Invariants upheld: |
157 | 0 | // 1 & 3: Passed upstream to caller |
158 | 0 | // 2: This is borrowed, so we set owned to false. |
159 | 0 | buf, |
160 | 0 | #[cfg(feature = "alloc")] |
161 | 0 | owned: false, |
162 | 0 | }; |
163 | | // Invariant passed upstream to caller |
164 | 0 | Self::from_raw(raw) |
165 | | } |
166 | 0 | } |
167 | | |
168 | | /// Construct this from an [`EncodeAsVarULE`] version of the contained type |
169 | | /// |
170 | | /// Will always construct an owned version |
171 | | #[cfg(feature = "alloc")] |
172 | 0 | pub fn from_encodeable<E: EncodeAsVarULE<V>>(encodeable: &E) -> Self { |
173 | 0 | let b = crate::ule::encode_varule_to_box(encodeable); |
174 | 0 | Self::new_owned(b) |
175 | 0 | } |
176 | | |
177 | | /// Construct a new borrowed version of this |
178 | 0 | pub fn new_borrowed(val: &'a V) -> Self { |
179 | | unsafe { |
180 | | // Safety: val is a valid V, by type |
181 | 0 | Self::from_bytes_unchecked(val.as_bytes()) |
182 | | } |
183 | 0 | } |
184 | | |
185 | | /// Construct a new borrowed version of this |
186 | | #[cfg(feature = "alloc")] |
187 | 0 | pub fn new_owned(val: Box<V>) -> Self { |
188 | 0 | let val = ManuallyDrop::new(val); |
189 | 0 | let buf: NonNull<[u8]> = val.as_bytes().into(); |
190 | 0 | let raw = RawVarZeroCow { |
191 | 0 | // Invariants upheld: |
192 | 0 | // 1 & 3: The bytes came from `val` so they're a valid value and byte slice |
193 | 0 | // 2: This is owned, so we set owned to true. |
194 | 0 | buf, |
195 | 0 | #[cfg(feature = "alloc")] |
196 | 0 | owned: true, |
197 | 0 | }; |
198 | | // The bytes came from `val`, so it's a valid value |
199 | 0 | unsafe { Self::from_raw(raw) } |
200 | 0 | } |
201 | | } |
202 | | |
203 | | impl<'a, V: ?Sized> VarZeroCow<'a, V> { |
204 | | /// Whether or not this is owned |
205 | 0 | pub fn is_owned(&self) -> bool { |
206 | 0 | self.raw.is_owned() |
207 | 0 | } |
208 | | |
209 | | /// Get the byte representation of this type |
210 | | /// |
211 | | /// Is also always a valid `V` and can be passed to |
212 | | /// `V::from_bytes_unchecked()` |
213 | 0 | pub fn as_bytes(&self) -> &[u8] { |
214 | | // The valid V invariant comes from Invariant 2 |
215 | 0 | self.raw.as_bytes() |
216 | 0 | } |
217 | | |
218 | | /// Invariant: `raw` must wrap a valid V, either owned or borrowed for 'a |
219 | 0 | const unsafe fn from_raw(raw: RawVarZeroCow) -> Self { |
220 | 0 | Self { |
221 | 0 | // Invariant passed up to caller |
222 | 0 | raw, |
223 | 0 | marker1: PhantomData, |
224 | 0 | #[cfg(feature = "alloc")] |
225 | 0 | marker2: PhantomData, |
226 | 0 | } |
227 | 0 | } |
228 | | } |
229 | | |
230 | | impl RawVarZeroCow { |
231 | | /// Whether or not this is owned |
232 | | #[inline] |
233 | 0 | pub fn is_owned(&self) -> bool { |
234 | | #[cfg(feature = "alloc")] |
235 | 0 | return self.owned; |
236 | | #[cfg(not(feature = "alloc"))] |
237 | | return false; |
238 | 0 | } |
239 | | |
240 | | /// Get the byte representation of this type |
241 | | #[inline] |
242 | 0 | pub fn as_bytes(&self) -> &[u8] { |
243 | | // Safety: Invariant 1 on self.buf |
244 | 0 | unsafe { self.buf.as_ref() } |
245 | 0 | } |
246 | | } |
247 | | |
248 | | impl<'a, V: VarULE + ?Sized> Deref for VarZeroCow<'a, V> { |
249 | | type Target = V; |
250 | 0 | fn deref(&self) -> &V { |
251 | | // Safety: From invariant 2 on self.buf |
252 | 0 | unsafe { V::from_bytes_unchecked(self.as_bytes()) } |
253 | 0 | } |
254 | | } |
255 | | |
256 | | impl<'a, V: VarULE + ?Sized> From<&'a V> for VarZeroCow<'a, V> { |
257 | 0 | fn from(other: &'a V) -> Self { |
258 | 0 | Self::new_borrowed(other) |
259 | 0 | } |
260 | | } |
261 | | |
262 | | #[cfg(feature = "alloc")] |
263 | | impl<'a, V: VarULE + ?Sized> From<Box<V>> for VarZeroCow<'a, V> { |
264 | 0 | fn from(other: Box<V>) -> Self { |
265 | 0 | Self::new_owned(other) |
266 | 0 | } |
267 | | } |
268 | | |
269 | | impl<'a, V: VarULE + ?Sized + fmt::Debug> fmt::Debug for VarZeroCow<'a, V> { |
270 | 0 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> Result<(), fmt::Error> { |
271 | 0 | self.deref().fmt(f) |
272 | 0 | } |
273 | | } |
274 | | |
275 | | // We need manual impls since `#[derive()]` is disallowed on packed types |
276 | | impl<'a, V: VarULE + ?Sized + PartialEq> PartialEq for VarZeroCow<'a, V> { |
277 | 0 | fn eq(&self, other: &Self) -> bool { |
278 | 0 | self.deref().eq(other.deref()) |
279 | 0 | } |
280 | | } |
281 | | |
282 | | impl<'a, V: VarULE + ?Sized + Eq> Eq for VarZeroCow<'a, V> {} |
283 | | |
284 | | impl<'a, V: VarULE + ?Sized + PartialOrd> PartialOrd for VarZeroCow<'a, V> { |
285 | 0 | fn partial_cmp(&self, other: &Self) -> Option<core::cmp::Ordering> { |
286 | 0 | self.deref().partial_cmp(other.deref()) |
287 | 0 | } |
288 | | } |
289 | | |
290 | | impl<'a, V: VarULE + ?Sized + Ord> Ord for VarZeroCow<'a, V> { |
291 | 0 | fn cmp(&self, other: &Self) -> core::cmp::Ordering { |
292 | 0 | self.deref().cmp(other.deref()) |
293 | 0 | } |
294 | | } |
295 | | |
296 | | // # Safety |
297 | | // |
298 | | // encode_var_ule_len: Produces the length of the contained bytes, which are known to be a valid V by invariant |
299 | | // |
300 | | // encode_var_ule_write: Writes the contained bytes, which are known to be a valid V by invariant |
301 | | unsafe impl<'a, V: VarULE + ?Sized> EncodeAsVarULE<V> for VarZeroCow<'a, V> { |
302 | 0 | fn encode_var_ule_as_slices<R>(&self, _: impl FnOnce(&[&[u8]]) -> R) -> R { |
303 | | // unnecessary if the other two are implemented |
304 | 0 | unreachable!() |
305 | | } |
306 | | |
307 | | #[inline] |
308 | 0 | fn encode_var_ule_len(&self) -> usize { |
309 | 0 | self.as_bytes().len() |
310 | 0 | } |
311 | | |
312 | | #[inline] |
313 | 0 | fn encode_var_ule_write(&self, dst: &mut [u8]) { |
314 | 0 | dst.copy_from_slice(self.as_bytes()) |
315 | 0 | } |
316 | | } |
317 | | |
318 | | #[cfg(feature = "serde")] |
319 | | impl<'a, V: VarULE + ?Sized + serde::Serialize> serde::Serialize for VarZeroCow<'a, V> { |
320 | | fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error> |
321 | | where |
322 | | S: serde::Serializer, |
323 | | { |
324 | | if serializer.is_human_readable() { |
325 | | <V as serde::Serialize>::serialize(self.deref(), serializer) |
326 | | } else { |
327 | | serializer.serialize_bytes(self.as_bytes()) |
328 | | } |
329 | | } |
330 | | } |
331 | | |
332 | | #[cfg(feature = "serde")] |
333 | | impl<'a, 'de: 'a, V: VarULE + ?Sized> serde::Deserialize<'de> for VarZeroCow<'a, V> |
334 | | where |
335 | | Box<V>: serde::Deserialize<'de>, |
336 | | { |
337 | | fn deserialize<Des>(deserializer: Des) -> Result<Self, Des::Error> |
338 | | where |
339 | | Des: serde::Deserializer<'de>, |
340 | | { |
341 | | if deserializer.is_human_readable() { |
342 | | let b = Box::<V>::deserialize(deserializer)?; |
343 | | Ok(Self::new_owned(b)) |
344 | | } else { |
345 | | let bytes = <&[u8]>::deserialize(deserializer)?; |
346 | | Self::parse_bytes(bytes).map_err(serde::de::Error::custom) |
347 | | } |
348 | | } |
349 | | } |
350 | | |
351 | | #[cfg(feature = "databake")] |
352 | | impl<'a, V: VarULE + ?Sized> databake::Bake for VarZeroCow<'a, V> { |
353 | | fn bake(&self, env: &databake::CrateEnv) -> databake::TokenStream { |
354 | | env.insert("zerovec"); |
355 | | let bytes = self.as_bytes().bake(env); |
356 | | databake::quote! { |
357 | | // Safety: Known to come from a valid V since self.as_bytes() is always a valid V |
358 | | unsafe { |
359 | | zerovec::VarZeroCow::from_bytes_unchecked(#bytes) |
360 | | } |
361 | | } |
362 | | } |
363 | | } |
364 | | |
365 | | #[cfg(feature = "databake")] |
366 | | impl<'a, V: VarULE + ?Sized> databake::BakeSize for VarZeroCow<'a, V> { |
367 | | fn borrows_size(&self) -> usize { |
368 | | self.as_bytes().len() |
369 | | } |
370 | | } |
371 | | |
372 | | impl<'a, V: VarULE + ?Sized> ZeroFrom<'a, V> for VarZeroCow<'a, V> { |
373 | | #[inline] |
374 | 0 | fn zero_from(other: &'a V) -> Self { |
375 | 0 | Self::new_borrowed(other) |
376 | 0 | } |
377 | | } |
378 | | |
379 | | impl<'a, 'b, V: VarULE + ?Sized> ZeroFrom<'a, VarZeroCow<'b, V>> for VarZeroCow<'a, V> { |
380 | | #[inline] |
381 | 0 | fn zero_from(other: &'a VarZeroCow<'b, V>) -> Self { |
382 | 0 | Self::new_borrowed(other) |
383 | 0 | } |
384 | | } |
385 | | |
386 | | #[cfg(test)] |
387 | | mod tests { |
388 | | use super::VarZeroCow; |
389 | | use crate::ule::tuplevar::Tuple3VarULE; |
390 | | use crate::vecs::VarZeroSlice; |
391 | | #[test] |
392 | | fn test_cow_roundtrip() { |
393 | | type Messy = Tuple3VarULE<str, [u8], VarZeroSlice<str>>; |
394 | | let vec = vec!["one", "two", "three"]; |
395 | | let messy: VarZeroCow<Messy> = |
396 | | VarZeroCow::from_encodeable(&("hello", &b"g\xFF\xFFdbye"[..], vec)); |
397 | | |
398 | | assert_eq!(messy.a(), "hello"); |
399 | | assert_eq!(messy.b(), b"g\xFF\xFFdbye"); |
400 | | assert_eq!(&messy.c()[1], "two"); |
401 | | |
402 | | #[cfg(feature = "serde")] |
403 | | { |
404 | | let bincode = bincode::serialize(&messy).unwrap(); |
405 | | let deserialized: VarZeroCow<Messy> = bincode::deserialize(&bincode).unwrap(); |
406 | | assert_eq!( |
407 | | messy, deserialized, |
408 | | "Single element roundtrips with bincode" |
409 | | ); |
410 | | assert!(!deserialized.is_owned()); |
411 | | |
412 | | let json = serde_json::to_string(&messy).unwrap(); |
413 | | let deserialized: VarZeroCow<Messy> = serde_json::from_str(&json).unwrap(); |
414 | | assert_eq!(messy, deserialized, "Single element roundtrips with serde"); |
415 | | } |
416 | | } |
417 | | |
418 | | struct TwoCows<'a> { |
419 | | cow1: VarZeroCow<'a, str>, |
420 | | cow2: VarZeroCow<'a, str>, |
421 | | } |
422 | | |
423 | | #[test] |
424 | | fn test_eyepatch_works() { |
425 | | // This code should compile |
426 | | let mut two = TwoCows { |
427 | | cow1: VarZeroCow::new_borrowed("hello"), |
428 | | cow2: VarZeroCow::new_owned("world".into()), |
429 | | }; |
430 | | let three = VarZeroCow::new_borrowed(&*two.cow2); |
431 | | two.cow1 = three; |
432 | | |
433 | | // Without the eyepatch, dropck will be worried that the dtor of two.cow1 can observe the |
434 | | // data it borrowed from two.cow2, which may have already been deleted |
435 | | |
436 | | // This test will fail if you add an empty `impl<'a, V: ?Sized> Drop for VarZeroCow<'a, V>` |
437 | | } |
438 | | } |