Coverage Report

Created: 2025-12-31 07:00

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/rust/registry/src/index.crates.io-1949cf8c6b5b557f/zerovec-0.11.5/src/varzerovec/vec.rs
Line
Count
Source
1
// This file is part of ICU4X. For terms of use, please see the file
2
// called LICENSE at the top level of the ICU4X source tree
3
// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
4
5
use crate::ule::*;
6
7
use core::cmp::{Ord, Ordering, PartialOrd};
8
use core::fmt;
9
use core::ops::Deref;
10
11
use super::*;
12
13
/// A zero-copy, byte-aligned vector for variable-width types.
14
///
15
/// `VarZeroVec<T>` is designed as a drop-in replacement for `Vec<T>` in situations where it is
16
/// desirable to borrow data from an unaligned byte slice, such as zero-copy deserialization, and
17
/// where `T`'s data is variable-length (e.g. `String`)
18
///
19
/// `T` must implement [`VarULE`], which is already implemented for [`str`] and `[u8]`. For storing more
20
/// complicated series of elements, it is implemented on `ZeroSlice<T>` as well as `VarZeroSlice<T>`
21
/// for nesting. [`zerovec::make_varule`](crate::make_varule) may be used to generate
22
/// a dynamically-sized [`VarULE`] type and conversions to and from a custom type.
23
///
24
/// For example, here are some owned types and their zero-copy equivalents:
25
///
26
/// - `Vec<String>`: `VarZeroVec<'a, str>`
27
/// - `Vec<Vec<u8>>>`: `VarZeroVec<'a, [u8]>`
28
/// - `Vec<Vec<u32>>`: `VarZeroVec<'a, ZeroSlice<u32>>`
29
/// - `Vec<Vec<String>>`: `VarZeroVec<'a, VarZeroSlice<str>>`
30
///
31
/// Most of the methods on `VarZeroVec<'a, T>` come from its [`Deref`] implementation to [`VarZeroSlice<T>`](VarZeroSlice).
32
///
33
/// For creating zero-copy vectors of fixed-size types, see [`ZeroVec`](crate::ZeroVec).
34
///
35
/// `VarZeroVec<T>` behaves much like [`Cow`](alloc::borrow::Cow), where it can be constructed from
36
/// owned data (and then mutated!) but can also borrow from some buffer.
37
///
38
/// The `F` type parameter is a [`VarZeroVecFormat`] (see its docs for more details), which can be used to select the
39
/// precise format of the backing buffer with various size and performance tradeoffs. It defaults to [`Index16`].
40
///
41
/// # Bytes and Equality
42
///
43
/// Two [`VarZeroVec`]s are equal if and only if their bytes are equal, as described in the trait
44
/// [`VarULE`]. However, we do not guarantee stability of byte equality or serialization format
45
/// across major SemVer releases.
46
///
47
/// To compare a [`Vec<T>`] to a [`VarZeroVec<T>`], it is generally recommended to use
48
/// [`Iterator::eq`], since it is somewhat expensive at runtime to convert from a [`Vec<T>`] to a
49
/// [`VarZeroVec<T>`] or vice-versa.
50
///
51
/// Prior to zerovec reaching 1.0, the precise byte representation of [`VarZeroVec`] is still
52
/// under consideration, with different options along the space-time spectrum. See
53
/// [#1410](https://github.com/unicode-org/icu4x/issues/1410).
54
///
55
/// # Example
56
///
57
/// ```rust
58
/// use zerovec::VarZeroVec;
59
///
60
/// // The little-endian bytes correspond to the list of strings.
61
/// let strings = vec!["w", "ω", "文", "𑄃"];
62
///
63
/// #[derive(serde::Serialize, serde::Deserialize)]
64
/// struct Data<'a> {
65
///     #[serde(borrow)]
66
///     strings: VarZeroVec<'a, str>,
67
/// }
68
///
69
/// let data = Data {
70
///     strings: VarZeroVec::from(&strings),
71
/// };
72
///
73
/// let bincode_bytes =
74
///     bincode::serialize(&data).expect("Serialization should be successful");
75
///
76
/// // Will deserialize without allocations
77
/// let deserialized: Data = bincode::deserialize(&bincode_bytes)
78
///     .expect("Deserialization should be successful");
79
///
80
/// assert_eq!(deserialized.strings.get(2), Some("文"));
81
/// assert_eq!(deserialized.strings, &*strings);
82
/// ```
83
///
84
/// Here's another example with `ZeroSlice<T>` (similar to `[T]`):
85
///
86
/// ```rust
87
/// use zerovec::VarZeroVec;
88
/// use zerovec::ZeroSlice;
89
///
90
/// // The structured list correspond to the list of integers.
91
/// let numbers: &[&[u32]] = &[
92
///     &[12, 25, 38],
93
///     &[39179, 100],
94
///     &[42, 55555],
95
///     &[12345, 54321, 9],
96
/// ];
97
///
98
/// #[derive(serde::Serialize, serde::Deserialize)]
99
/// struct Data<'a> {
100
///     #[serde(borrow)]
101
///     vecs: VarZeroVec<'a, ZeroSlice<u32>>,
102
/// }
103
///
104
/// let data = Data {
105
///     vecs: VarZeroVec::from(numbers),
106
/// };
107
///
108
/// let bincode_bytes =
109
///     bincode::serialize(&data).expect("Serialization should be successful");
110
///
111
/// let deserialized: Data = bincode::deserialize(&bincode_bytes)
112
///     .expect("Deserialization should be successful");
113
///
114
/// assert_eq!(deserialized.vecs[0].get(1).unwrap(), 25);
115
/// assert_eq!(deserialized.vecs[1], *numbers[1]);
116
/// ```
117
///
118
/// [`VarZeroVec`]s can be nested infinitely via a similar mechanism, see the docs of [`VarZeroSlice`]
119
/// for more information.
120
///
121
/// # How it Works
122
///
123
/// `VarZeroVec<T>`, when used with non-human-readable serializers (like `bincode`), will
124
/// serialize to a specially formatted list of bytes. The format is:
125
///
126
/// -  2 bytes for `length` (interpreted as a little-endian u16)
127
/// - `2 * (length - 1)` bytes of `indices` (interpreted as little-endian u16s)
128
/// - Remaining bytes for actual `data`
129
///
130
/// The format is tweakable by setting the `F` parameter, by default it uses u16 indices and lengths but other
131
/// `VarZeroVecFormat` types can set other sizes.
132
///
133
/// Each element in the `indices` array points to the ending index of its corresponding
134
/// data part in the `data` list. The starting index can be calculated from the ending index
135
/// of the next element (or 0 for the first element). The last ending index, not stored in the array, is
136
/// the length of the `data` segment.
137
///
138
/// See [the design doc](https://github.com/unicode-org/icu4x/blob/main/utils/zerovec/design_doc.md) for more details.
139
///
140
/// [`ule`]: crate::ule
141
pub struct VarZeroVec<'a, T: ?Sized, F = Index16>(pub(crate) VarZeroVecInner<'a, T, F>);
142
143
pub(crate) enum VarZeroVecInner<'a, T: ?Sized, F = Index16> {
144
    #[cfg(feature = "alloc")]
145
    Owned(VarZeroVecOwned<T, F>),
146
    Borrowed(&'a VarZeroSlice<T, F>),
147
}
148
149
impl<'a, T: ?Sized, F> Clone for VarZeroVec<'a, T, F> {
150
0
    fn clone(&self) -> Self {
151
0
        match self.0 {
152
            #[cfg(feature = "alloc")]
153
            VarZeroVecInner::Owned(ref o) => o.clone().into(),
154
0
            VarZeroVecInner::Borrowed(b) => b.into(),
155
        }
156
0
    }
157
}
158
159
impl<T: VarULE + ?Sized, F: VarZeroVecFormat> fmt::Debug for VarZeroVec<'_, T, F>
160
where
161
    T: fmt::Debug,
162
{
163
0
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
164
0
        VarZeroSlice::fmt(self, f)
165
0
    }
Unexecuted instantiation: <zerovec::varzerovec::vec::VarZeroVec<str> as core::fmt::Debug>::fmt
Unexecuted instantiation: <zerovec::varzerovec::vec::VarZeroVec<_, _> as core::fmt::Debug>::fmt
166
}
167
168
#[cfg(feature = "alloc")]
169
impl<'a, T: ?Sized, F> From<VarZeroVecOwned<T, F>> for VarZeroVec<'a, T, F> {
170
    #[inline]
171
    fn from(other: VarZeroVecOwned<T, F>) -> Self {
172
        Self(VarZeroVecInner::Owned(other))
173
    }
174
}
175
176
impl<'a, T: ?Sized, F> From<&'a VarZeroSlice<T, F>> for VarZeroVec<'a, T, F> {
177
0
    fn from(other: &'a VarZeroSlice<T, F>) -> Self {
178
0
        Self(VarZeroVecInner::Borrowed(other))
179
0
    }
Unexecuted instantiation: <zerovec::varzerovec::vec::VarZeroVec<zerovec::zerovec::slice::ZeroSlice<icu_properties::props::Script>> as core::convert::From<&zerovec::varzerovec::slice::VarZeroSlice<zerovec::zerovec::slice::ZeroSlice<icu_properties::props::Script>>>>::from
Unexecuted instantiation: <zerovec::varzerovec::vec::VarZeroVec<str> as core::convert::From<&zerovec::varzerovec::slice::VarZeroSlice<str>>>::from
Unexecuted instantiation: <zerovec::varzerovec::vec::VarZeroVec<_, _> as core::convert::From<&zerovec::varzerovec::slice::VarZeroSlice<_, _>>>::from
180
}
181
182
#[cfg(feature = "alloc")]
183
impl<'a, T: ?Sized + VarULE, F: VarZeroVecFormat> From<VarZeroVec<'a, T, F>>
184
    for VarZeroVecOwned<T, F>
185
{
186
    #[inline]
187
    fn from(other: VarZeroVec<'a, T, F>) -> Self {
188
        match other.0 {
189
            VarZeroVecInner::Owned(o) => o,
190
            VarZeroVecInner::Borrowed(b) => b.into(),
191
        }
192
    }
193
}
194
195
impl<T: VarULE + ?Sized, F: VarZeroVecFormat> Default for VarZeroVec<'_, T, F> {
196
    #[inline]
197
0
    fn default() -> Self {
198
0
        Self::new()
199
0
    }
200
}
201
202
impl<T: VarULE + ?Sized, F: VarZeroVecFormat> Deref for VarZeroVec<'_, T, F> {
203
    type Target = VarZeroSlice<T, F>;
204
0
    fn deref(&self) -> &VarZeroSlice<T, F> {
205
0
        self.as_slice()
206
0
    }
Unexecuted instantiation: <zerovec::varzerovec::vec::VarZeroVec<zerovec::zerovec::slice::ZeroSlice<icu_properties::props::Script>> as core::ops::deref::Deref>::deref
Unexecuted instantiation: <zerovec::varzerovec::vec::VarZeroVec<str> as core::ops::deref::Deref>::deref
Unexecuted instantiation: <zerovec::varzerovec::vec::VarZeroVec<_, _> as core::ops::deref::Deref>::deref
207
}
208
209
impl<'a, T: VarULE + ?Sized, F: VarZeroVecFormat> VarZeroVec<'a, T, F> {
210
    /// Creates a new, empty `VarZeroVec<T>`.
211
    ///
212
    /// # Examples
213
    ///
214
    /// ```
215
    /// use zerovec::VarZeroVec;
216
    ///
217
    /// let vzv: VarZeroVec<str> = VarZeroVec::new();
218
    /// assert!(vzv.is_empty());
219
    /// ```
220
    #[inline]
221
0
    pub const fn new() -> Self {
222
0
        Self(VarZeroVecInner::Borrowed(VarZeroSlice::new_empty()))
223
0
    }
224
225
    /// Parse a VarZeroVec from a slice of the appropriate format
226
    ///
227
    /// Slices of the right format can be obtained via [`VarZeroSlice::as_bytes()`].
228
    ///
229
    /// # Example
230
    ///
231
    /// ```rust
232
    /// # use zerovec::VarZeroVec;
233
    ///
234
    /// let strings = vec!["foo", "bar", "baz", "quux"];
235
    /// let vec = VarZeroVec::<str>::from(&strings);
236
    ///
237
    /// assert_eq!(&vec[0], "foo");
238
    /// assert_eq!(&vec[1], "bar");
239
    /// assert_eq!(&vec[2], "baz");
240
    /// assert_eq!(&vec[3], "quux");
241
    /// ```
242
0
    pub fn parse_bytes(slice: &'a [u8]) -> Result<Self, UleError> {
243
0
        let borrowed = VarZeroSlice::<T, F>::parse_bytes(slice)?;
244
245
0
        Ok(Self(VarZeroVecInner::Borrowed(borrowed)))
246
0
    }
247
248
    /// Uses a `&[u8]` buffer as a `VarZeroVec<T>` without any verification.
249
    ///
250
    /// # Safety
251
    ///
252
    /// `bytes` need to be an output from [`VarZeroSlice::as_bytes()`].
253
0
    pub const unsafe fn from_bytes_unchecked(bytes: &'a [u8]) -> Self {
254
0
        Self(VarZeroVecInner::Borrowed(core::mem::transmute::<
255
0
            &[u8],
256
0
            &VarZeroSlice<T, F>,
257
0
        >(bytes)))
258
0
    }
259
260
    /// Convert this into a mutable vector of the owned `T` type, cloning if necessary.
261
    ///
262
    /// ✨ *Enabled with the `alloc` Cargo feature.*
263
    ///
264
    /// # Example
265
    ///
266
    /// ```rust,ignore
267
    /// # use zerovec::VarZeroVec;
268
    /// let strings = vec!["foo", "bar", "baz", "quux"];
269
    /// let mut vec = VarZeroVec::<str>::from(&strings);
270
    ///
271
    /// assert_eq!(vec.len(), 4);
272
    /// let mutvec = vec.make_mut();
273
    /// mutvec.push("lorem ipsum".into());
274
    /// mutvec[2] = "dolor sit".into();
275
    /// assert_eq!(&vec[0], "foo");
276
    /// assert_eq!(&vec[1], "bar");
277
    /// assert_eq!(&vec[2], "dolor sit");
278
    /// assert_eq!(&vec[3], "quux");
279
    /// assert_eq!(&vec[4], "lorem ipsum");
280
    /// ```
281
    //
282
    // This function is crate-public for now since we don't yet want to stabilize
283
    // the internal implementation details
284
    #[cfg(feature = "alloc")]
285
    pub fn make_mut(&mut self) -> &mut VarZeroVecOwned<T, F> {
286
        match self.0 {
287
            VarZeroVecInner::Owned(ref mut vec) => vec,
288
            VarZeroVecInner::Borrowed(slice) => {
289
                let new_self = VarZeroVecOwned::from_slice(slice);
290
                *self = new_self.into();
291
                // recursion is limited since we are guaranteed to hit the Owned branch
292
                self.make_mut()
293
            }
294
        }
295
    }
296
297
    /// Converts a borrowed ZeroVec to an owned ZeroVec. No-op if already owned.
298
    ///
299
    /// ✨ *Enabled with the `alloc` Cargo feature.*
300
    ///
301
    /// # Example
302
    ///
303
    /// ```
304
    /// # use zerovec::VarZeroVec;
305
    ///
306
    /// let strings = vec!["foo", "bar", "baz", "quux"];
307
    /// let vec = VarZeroVec::<str>::from(&strings);
308
    ///
309
    /// assert_eq!(vec.len(), 4);
310
    /// // has 'static lifetime
311
    /// let owned = vec.into_owned();
312
    /// ```
313
    #[cfg(feature = "alloc")]
314
    pub fn into_owned(mut self) -> VarZeroVec<'static, T, F> {
315
        self.make_mut();
316
        match self.0 {
317
            VarZeroVecInner::Owned(vec) => vec.into(),
318
            _ => unreachable!(),
319
        }
320
    }
321
322
    /// Obtain this `VarZeroVec` as a [`VarZeroSlice`]
323
0
    pub fn as_slice(&self) -> &VarZeroSlice<T, F> {
324
0
        match self.0 {
325
            #[cfg(feature = "alloc")]
326
            VarZeroVecInner::Owned(ref owned) => owned,
327
0
            VarZeroVecInner::Borrowed(b) => b,
328
        }
329
0
    }
Unexecuted instantiation: <zerovec::varzerovec::vec::VarZeroVec<zerovec::zerovec::slice::ZeroSlice<icu_properties::props::Script>>>::as_slice
Unexecuted instantiation: <zerovec::varzerovec::vec::VarZeroVec<str>>::as_slice
Unexecuted instantiation: <zerovec::varzerovec::vec::VarZeroVec<_, _>>::as_slice
330
331
    /// Takes the byte vector representing the encoded data of this VarZeroVec. If borrowed,
332
    /// this function allocates a byte vector and copies the borrowed bytes into it.
333
    ///
334
    /// The bytes can be passed back to [`Self::parse_bytes()`].
335
    ///
336
    /// To get a reference to the bytes without moving, see [`VarZeroSlice::as_bytes()`].
337
    ///
338
    /// ✨ *Enabled with the `alloc` Cargo feature.*
339
    ///
340
    /// # Example
341
    ///
342
    /// ```rust
343
    /// # use zerovec::VarZeroVec;
344
    ///
345
    /// let strings = vec!["foo", "bar", "baz"];
346
    /// let bytes = VarZeroVec::<str>::from(&strings).into_bytes();
347
    ///
348
    /// let mut borrowed: VarZeroVec<str> =
349
    ///     VarZeroVec::parse_bytes(&bytes).unwrap();
350
    /// assert_eq!(borrowed, &*strings);
351
    /// ```
352
    #[cfg(feature = "alloc")]
353
    pub fn into_bytes(self) -> alloc::vec::Vec<u8> {
354
        match self.0 {
355
            VarZeroVecInner::Owned(vec) => vec.into_bytes(),
356
            VarZeroVecInner::Borrowed(vec) => vec.as_bytes().to_vec(),
357
        }
358
    }
359
360
    /// Return whether the [`VarZeroVec`] is operating on owned or borrowed
361
    /// data. [`VarZeroVec::into_owned()`] and [`VarZeroVec::make_mut()`] can
362
    /// be used to force it into an owned type
363
0
    pub fn is_owned(&self) -> bool {
364
0
        match self.0 {
365
            #[cfg(feature = "alloc")]
366
            VarZeroVecInner::Owned(..) => true,
367
0
            VarZeroVecInner::Borrowed(..) => false,
368
        }
369
0
    }
370
371
    #[doc(hidden)]
372
0
    pub fn as_components<'b>(&'b self) -> VarZeroVecComponents<'b, T, F> {
373
0
        self.as_slice().as_components()
374
0
    }
375
}
376
377
#[cfg(feature = "alloc")]
378
impl<A, T, F> From<&alloc::vec::Vec<A>> for VarZeroVec<'static, T, F>
379
where
380
    T: VarULE + ?Sized,
381
    A: EncodeAsVarULE<T>,
382
    F: VarZeroVecFormat,
383
{
384
    #[inline]
385
    fn from(elements: &alloc::vec::Vec<A>) -> Self {
386
        Self::from(elements.as_slice())
387
    }
388
}
389
390
#[cfg(feature = "alloc")]
391
impl<A, T, F> From<&[A]> for VarZeroVec<'static, T, F>
392
where
393
    T: VarULE + ?Sized,
394
    A: EncodeAsVarULE<T>,
395
    F: VarZeroVecFormat,
396
{
397
    #[inline]
398
    fn from(elements: &[A]) -> Self {
399
        if elements.is_empty() {
400
            VarZeroSlice::new_empty().into()
401
        } else {
402
            #[expect(clippy::unwrap_used)] // TODO(#1410) Better story for fallibility
403
            VarZeroVecOwned::try_from_elements(elements).unwrap().into()
404
        }
405
    }
406
}
407
408
#[cfg(feature = "alloc")]
409
impl<A, T, F, const N: usize> From<&[A; N]> for VarZeroVec<'static, T, F>
410
where
411
    T: VarULE + ?Sized,
412
    A: EncodeAsVarULE<T>,
413
    F: VarZeroVecFormat,
414
{
415
    #[inline]
416
    fn from(elements: &[A; N]) -> Self {
417
        Self::from(elements.as_slice())
418
    }
419
}
420
421
impl<'a, 'b, T, F> PartialEq<VarZeroVec<'b, T, F>> for VarZeroVec<'a, T, F>
422
where
423
    T: VarULE,
424
    T: ?Sized,
425
    T: PartialEq,
426
    F: VarZeroVecFormat,
427
{
428
    #[inline]
429
0
    fn eq(&self, other: &VarZeroVec<'b, T, F>) -> bool {
430
        // VZV::from_elements used to produce a non-canonical representation of the
431
        // empty VZV, so we cannot use byte equality for empty vecs.
432
0
        if self.is_empty() || other.is_empty() {
433
0
            return self.is_empty() && other.is_empty();
434
0
        }
435
        // VarULE has an API guarantee that byte equality is semantic equality.
436
        // For non-empty VZVs, there's only a single metadata representation,
437
        // so this guarantee extends to the whole VZV representation.
438
0
        self.as_bytes().eq(other.as_bytes())
439
0
    }
440
}
441
442
impl<'a, T, F> Eq for VarZeroVec<'a, T, F>
443
where
444
    T: VarULE,
445
    T: ?Sized,
446
    T: Eq,
447
    F: VarZeroVecFormat,
448
{
449
}
450
451
impl<T, A, F> PartialEq<&'_ [A]> for VarZeroVec<'_, T, F>
452
where
453
    T: VarULE + ?Sized,
454
    T: PartialEq,
455
    A: AsRef<T>,
456
    F: VarZeroVecFormat,
457
{
458
    #[inline]
459
0
    fn eq(&self, other: &&[A]) -> bool {
460
0
        self.iter().eq(other.iter().map(|t| t.as_ref()))
461
0
    }
462
}
463
464
impl<T, A, F, const N: usize> PartialEq<[A; N]> for VarZeroVec<'_, T, F>
465
where
466
    T: VarULE + ?Sized,
467
    T: PartialEq,
468
    A: AsRef<T>,
469
    F: VarZeroVecFormat,
470
{
471
    #[inline]
472
0
    fn eq(&self, other: &[A; N]) -> bool {
473
0
        self.iter().eq(other.iter().map(|t| t.as_ref()))
474
0
    }
475
}
476
477
impl<'a, T: VarULE + ?Sized + PartialOrd, F: VarZeroVecFormat> PartialOrd for VarZeroVec<'a, T, F> {
478
0
    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
479
0
        self.iter().partial_cmp(other.iter())
480
0
    }
481
}
482
483
impl<'a, T: VarULE + ?Sized + Ord, F: VarZeroVecFormat> Ord for VarZeroVec<'a, T, F> {
484
0
    fn cmp(&self, other: &Self) -> Ordering {
485
0
        self.iter().cmp(other.iter())
486
0
    }
487
}
488
489
#[test]
490
fn assert_single_empty_representation() {
491
    assert_eq!(
492
        VarZeroVec::<str>::new().as_bytes(),
493
        VarZeroVec::<str>::from(&[] as &[&str]).as_bytes()
494
    );
495
496
    use crate::map::MutableZeroVecLike;
497
    let mut vzv = VarZeroVec::<str>::from(&["hello", "world"][..]);
498
    assert_eq!(vzv.len(), 2);
499
    assert!(!vzv.as_bytes().is_empty());
500
    vzv.zvl_remove(0);
501
    assert_eq!(vzv.len(), 1);
502
    assert!(!vzv.as_bytes().is_empty());
503
    vzv.zvl_remove(0);
504
    assert_eq!(vzv.len(), 0);
505
    assert!(vzv.as_bytes().is_empty());
506
    vzv.zvl_insert(0, "something");
507
    assert_eq!(vzv.len(), 1);
508
    assert!(!vzv.as_bytes().is_empty());
509
}
510
511
#[test]
512
fn weird_empty_representation_equality() {
513
    assert_eq!(
514
        VarZeroVec::<str>::parse_bytes(&[0, 0, 0, 0]).unwrap(),
515
        VarZeroVec::<str>::parse_bytes(&[]).unwrap()
516
    );
517
}