Coverage Report

Created: 2025-10-13 06:09

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/rust/registry/src/index.crates.io-1949cf8c6b5b557f/zerovec-0.11.4/src/varzerovec/vec.rs
Line
Count
Source
1
// This file is part of ICU4X. For terms of use, please see the file
2
// called LICENSE at the top level of the ICU4X source tree
3
// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
4
5
use crate::ule::*;
6
7
use core::cmp::{Ord, Ordering, PartialOrd};
8
use core::fmt;
9
use core::ops::Deref;
10
11
use super::*;
12
13
/// A zero-copy, byte-aligned vector for variable-width types.
14
///
15
/// `VarZeroVec<T>` is designed as a drop-in replacement for `Vec<T>` in situations where it is
16
/// desirable to borrow data from an unaligned byte slice, such as zero-copy deserialization, and
17
/// where `T`'s data is variable-length (e.g. `String`)
18
///
19
/// `T` must implement [`VarULE`], which is already implemented for [`str`] and `[u8]`. For storing more
20
/// complicated series of elements, it is implemented on `ZeroSlice<T>` as well as `VarZeroSlice<T>`
21
/// for nesting. [`zerovec::make_varule`](crate::make_varule) may be used to generate
22
/// a dynamically-sized [`VarULE`] type and conversions to and from a custom type.
23
///
24
/// For example, here are some owned types and their zero-copy equivalents:
25
///
26
/// - `Vec<String>`: `VarZeroVec<'a, str>`
27
/// - `Vec<Vec<u8>>>`: `VarZeroVec<'a, [u8]>`
28
/// - `Vec<Vec<u32>>`: `VarZeroVec<'a, ZeroSlice<u32>>`
29
/// - `Vec<Vec<String>>`: `VarZeroVec<'a, VarZeroSlice<str>>`
30
///
31
/// Most of the methods on `VarZeroVec<'a, T>` come from its [`Deref`] implementation to [`VarZeroSlice<T>`](VarZeroSlice).
32
///
33
/// For creating zero-copy vectors of fixed-size types, see [`ZeroVec`](crate::ZeroVec).
34
///
35
/// `VarZeroVec<T>` behaves much like [`Cow`](alloc::borrow::Cow), where it can be constructed from
36
/// owned data (and then mutated!) but can also borrow from some buffer.
37
///
38
/// The `F` type parameter is a [`VarZeroVecFormat`] (see its docs for more details), which can be used to select the
39
/// precise format of the backing buffer with various size and performance tradeoffs. It defaults to [`Index16`].
40
///
41
/// # Bytes and Equality
42
///
43
/// Two [`VarZeroVec`]s are equal if and only if their bytes are equal, as described in the trait
44
/// [`VarULE`]. However, we do not guarantee stability of byte equality or serialization format
45
/// across major SemVer releases.
46
///
47
/// To compare a [`Vec<T>`] to a [`VarZeroVec<T>`], it is generally recommended to use
48
/// [`Iterator::eq`], since it is somewhat expensive at runtime to convert from a [`Vec<T>`] to a
49
/// [`VarZeroVec<T>`] or vice-versa.
50
///
51
/// Prior to zerovec reaching 1.0, the precise byte representation of [`VarZeroVec`] is still
52
/// under consideration, with different options along the space-time spectrum. See
53
/// [#1410](https://github.com/unicode-org/icu4x/issues/1410).
54
///
55
/// # Example
56
///
57
/// ```rust
58
/// use zerovec::VarZeroVec;
59
///
60
/// // The little-endian bytes correspond to the list of strings.
61
/// let strings = vec!["w", "ω", "文", "𑄃"];
62
///
63
/// #[derive(serde::Serialize, serde::Deserialize)]
64
/// struct Data<'a> {
65
///     #[serde(borrow)]
66
///     strings: VarZeroVec<'a, str>,
67
/// }
68
///
69
/// let data = Data {
70
///     strings: VarZeroVec::from(&strings),
71
/// };
72
///
73
/// let bincode_bytes =
74
///     bincode::serialize(&data).expect("Serialization should be successful");
75
///
76
/// // Will deserialize without allocations
77
/// let deserialized: Data = bincode::deserialize(&bincode_bytes)
78
///     .expect("Deserialization should be successful");
79
///
80
/// assert_eq!(deserialized.strings.get(2), Some("文"));
81
/// assert_eq!(deserialized.strings, &*strings);
82
/// ```
83
///
84
/// Here's another example with `ZeroSlice<T>` (similar to `[T]`):
85
///
86
/// ```rust
87
/// use zerovec::VarZeroVec;
88
/// use zerovec::ZeroSlice;
89
///
90
/// // The structured list correspond to the list of integers.
91
/// let numbers: &[&[u32]] = &[
92
///     &[12, 25, 38],
93
///     &[39179, 100],
94
///     &[42, 55555],
95
///     &[12345, 54321, 9],
96
/// ];
97
///
98
/// #[derive(serde::Serialize, serde::Deserialize)]
99
/// struct Data<'a> {
100
///     #[serde(borrow)]
101
///     vecs: VarZeroVec<'a, ZeroSlice<u32>>,
102
/// }
103
///
104
/// let data = Data {
105
///     vecs: VarZeroVec::from(numbers),
106
/// };
107
///
108
/// let bincode_bytes =
109
///     bincode::serialize(&data).expect("Serialization should be successful");
110
///
111
/// let deserialized: Data = bincode::deserialize(&bincode_bytes)
112
///     .expect("Deserialization should be successful");
113
///
114
/// assert_eq!(deserialized.vecs[0].get(1).unwrap(), 25);
115
/// assert_eq!(deserialized.vecs[1], *numbers[1]);
116
/// ```
117
///
118
/// [`VarZeroVec`]s can be nested infinitely via a similar mechanism, see the docs of [`VarZeroSlice`]
119
/// for more information.
120
///
121
/// # How it Works
122
///
123
/// `VarZeroVec<T>`, when used with non-human-readable serializers (like `bincode`), will
124
/// serialize to a specially formatted list of bytes. The format is:
125
///
126
/// -  2 bytes for `length` (interpreted as a little-endian u16)
127
/// - `2 * (length - 1)` bytes of `indices` (interpreted as little-endian u16s)
128
/// - Remaining bytes for actual `data`
129
///
130
/// The format is tweakable by setting the `F` parameter, by default it uses u16 indices and lengths but other
131
/// `VarZeroVecFormat` types can set other sizes.
132
///
133
/// Each element in the `indices` array points to the ending index of its corresponding
134
/// data part in the `data` list. The starting index can be calculated from the ending index
135
/// of the next element (or 0 for the first element). The last ending index, not stored in the array, is
136
/// the length of the `data` segment.
137
///
138
/// See [the design doc](https://github.com/unicode-org/icu4x/blob/main/utils/zerovec/design_doc.md) for more details.
139
///
140
/// [`ule`]: crate::ule
141
pub struct VarZeroVec<'a, T: ?Sized, F = Index16>(pub(crate) VarZeroVecInner<'a, T, F>);
142
143
pub(crate) enum VarZeroVecInner<'a, T: ?Sized, F = Index16> {
144
    #[cfg(feature = "alloc")]
145
    Owned(VarZeroVecOwned<T, F>),
146
    Borrowed(&'a VarZeroSlice<T, F>),
147
}
148
149
impl<'a, T: ?Sized, F> Clone for VarZeroVec<'a, T, F> {
150
0
    fn clone(&self) -> Self {
151
0
        match self.0 {
152
            #[cfg(feature = "alloc")]
153
0
            VarZeroVecInner::Owned(ref o) => o.clone().into(),
154
0
            VarZeroVecInner::Borrowed(b) => b.into(),
155
        }
156
0
    }
157
}
158
159
impl<T: VarULE + ?Sized, F: VarZeroVecFormat> fmt::Debug for VarZeroVec<'_, T, F>
160
where
161
    T: fmt::Debug,
162
{
163
0
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
164
0
        VarZeroSlice::fmt(self, f)
165
0
    }
Unexecuted instantiation: <zerovec::varzerovec::vec::VarZeroVec<str> as core::fmt::Debug>::fmt
Unexecuted instantiation: <zerovec::varzerovec::vec::VarZeroVec<_, _> as core::fmt::Debug>::fmt
166
}
167
168
#[cfg(feature = "alloc")]
169
impl<'a, T: ?Sized, F> From<VarZeroVecOwned<T, F>> for VarZeroVec<'a, T, F> {
170
    #[inline]
171
0
    fn from(other: VarZeroVecOwned<T, F>) -> Self {
172
0
        Self(VarZeroVecInner::Owned(other))
173
0
    }
174
}
175
176
impl<'a, T: ?Sized, F> From<&'a VarZeroSlice<T, F>> for VarZeroVec<'a, T, F> {
177
0
    fn from(other: &'a VarZeroSlice<T, F>) -> Self {
178
0
        Self(VarZeroVecInner::Borrowed(other))
179
0
    }
Unexecuted instantiation: <zerovec::varzerovec::vec::VarZeroVec<zerovec::zerovec::slice::ZeroSlice<icu_properties::props::Script>> as core::convert::From<&zerovec::varzerovec::slice::VarZeroSlice<zerovec::zerovec::slice::ZeroSlice<icu_properties::props::Script>>>>::from
Unexecuted instantiation: <zerovec::varzerovec::vec::VarZeroVec<str> as core::convert::From<&zerovec::varzerovec::slice::VarZeroSlice<str>>>::from
Unexecuted instantiation: <zerovec::varzerovec::vec::VarZeroVec<_, _> as core::convert::From<&zerovec::varzerovec::slice::VarZeroSlice<_, _>>>::from
180
}
181
182
#[cfg(feature = "alloc")]
183
impl<'a, T: ?Sized + VarULE, F: VarZeroVecFormat> From<VarZeroVec<'a, T, F>>
184
    for VarZeroVecOwned<T, F>
185
{
186
    #[inline]
187
0
    fn from(other: VarZeroVec<'a, T, F>) -> Self {
188
0
        match other.0 {
189
0
            VarZeroVecInner::Owned(o) => o,
190
0
            VarZeroVecInner::Borrowed(b) => b.into(),
191
        }
192
0
    }
193
}
194
195
impl<T: VarULE + ?Sized, F: VarZeroVecFormat> Default for VarZeroVec<'_, T, F> {
196
    #[inline]
197
0
    fn default() -> Self {
198
0
        Self::new()
199
0
    }
200
}
201
202
impl<T: VarULE + ?Sized, F: VarZeroVecFormat> Deref for VarZeroVec<'_, T, F> {
203
    type Target = VarZeroSlice<T, F>;
204
0
    fn deref(&self) -> &VarZeroSlice<T, F> {
205
0
        self.as_slice()
206
0
    }
Unexecuted instantiation: <zerovec::varzerovec::vec::VarZeroVec<zerovec::zerovec::slice::ZeroSlice<icu_properties::props::Script>> as core::ops::deref::Deref>::deref
Unexecuted instantiation: <zerovec::varzerovec::vec::VarZeroVec<str> as core::ops::deref::Deref>::deref
Unexecuted instantiation: <zerovec::varzerovec::vec::VarZeroVec<_, _> as core::ops::deref::Deref>::deref
207
}
208
209
impl<'a, T: VarULE + ?Sized, F: VarZeroVecFormat> VarZeroVec<'a, T, F> {
210
    /// Creates a new, empty `VarZeroVec<T>`.
211
    ///
212
    /// # Examples
213
    ///
214
    /// ```
215
    /// use zerovec::VarZeroVec;
216
    ///
217
    /// let vzv: VarZeroVec<str> = VarZeroVec::new();
218
    /// assert!(vzv.is_empty());
219
    /// ```
220
    #[inline]
221
0
    pub const fn new() -> Self {
222
0
        Self(VarZeroVecInner::Borrowed(VarZeroSlice::new_empty()))
223
0
    }
224
225
    /// Parse a VarZeroVec from a slice of the appropriate format
226
    ///
227
    /// Slices of the right format can be obtained via [`VarZeroSlice::as_bytes()`].
228
    ///
229
    /// # Example
230
    ///
231
    /// ```rust
232
    /// # use zerovec::VarZeroVec;
233
    ///
234
    /// let strings = vec!["foo", "bar", "baz", "quux"];
235
    /// let vec = VarZeroVec::<str>::from(&strings);
236
    ///
237
    /// assert_eq!(&vec[0], "foo");
238
    /// assert_eq!(&vec[1], "bar");
239
    /// assert_eq!(&vec[2], "baz");
240
    /// assert_eq!(&vec[3], "quux");
241
    /// ```
242
0
    pub fn parse_bytes(slice: &'a [u8]) -> Result<Self, UleError> {
243
0
        let borrowed = VarZeroSlice::<T, F>::parse_bytes(slice)?;
244
245
0
        Ok(Self(VarZeroVecInner::Borrowed(borrowed)))
246
0
    }
247
248
    /// Uses a `&[u8]` buffer as a `VarZeroVec<T>` without any verification.
249
    ///
250
    /// # Safety
251
    ///
252
    /// `bytes` need to be an output from [`VarZeroSlice::as_bytes()`].
253
0
    pub const unsafe fn from_bytes_unchecked(bytes: &'a [u8]) -> Self {
254
0
        Self(VarZeroVecInner::Borrowed(core::mem::transmute::<
255
0
            &[u8],
256
0
            &VarZeroSlice<T, F>,
257
0
        >(bytes)))
258
0
    }
259
260
    /// Convert this into a mutable vector of the owned `T` type, cloning if necessary.
261
    ///
262
    ///
263
    /// # Example
264
    ///
265
    /// ```rust,ignore
266
    /// # use zerovec::VarZeroVec;
267
    /// let strings = vec!["foo", "bar", "baz", "quux"];
268
    /// let mut vec = VarZeroVec::<str>::from(&strings);
269
    ///
270
    /// assert_eq!(vec.len(), 4);
271
    /// let mutvec = vec.make_mut();
272
    /// mutvec.push("lorem ipsum".into());
273
    /// mutvec[2] = "dolor sit".into();
274
    /// assert_eq!(&vec[0], "foo");
275
    /// assert_eq!(&vec[1], "bar");
276
    /// assert_eq!(&vec[2], "dolor sit");
277
    /// assert_eq!(&vec[3], "quux");
278
    /// assert_eq!(&vec[4], "lorem ipsum");
279
    /// ```
280
    //
281
    // This function is crate-public for now since we don't yet want to stabilize
282
    // the internal implementation details
283
    #[cfg(feature = "alloc")]
284
0
    pub fn make_mut(&mut self) -> &mut VarZeroVecOwned<T, F> {
285
0
        match self.0 {
286
0
            VarZeroVecInner::Owned(ref mut vec) => vec,
287
0
            VarZeroVecInner::Borrowed(slice) => {
288
0
                let new_self = VarZeroVecOwned::from_slice(slice);
289
0
                *self = new_self.into();
290
                // recursion is limited since we are guaranteed to hit the Owned branch
291
0
                self.make_mut()
292
            }
293
        }
294
0
    }
295
296
    /// Converts a borrowed ZeroVec to an owned ZeroVec. No-op if already owned.
297
    ///
298
    /// # Example
299
    ///
300
    /// ```
301
    /// # use zerovec::VarZeroVec;
302
    ///
303
    /// let strings = vec!["foo", "bar", "baz", "quux"];
304
    /// let vec = VarZeroVec::<str>::from(&strings);
305
    ///
306
    /// assert_eq!(vec.len(), 4);
307
    /// // has 'static lifetime
308
    /// let owned = vec.into_owned();
309
    /// ```
310
    #[cfg(feature = "alloc")]
311
0
    pub fn into_owned(mut self) -> VarZeroVec<'static, T, F> {
312
0
        self.make_mut();
313
0
        match self.0 {
314
0
            VarZeroVecInner::Owned(vec) => vec.into(),
315
0
            _ => unreachable!(),
316
        }
317
0
    }
318
319
    /// Obtain this `VarZeroVec` as a [`VarZeroSlice`]
320
0
    pub fn as_slice(&self) -> &VarZeroSlice<T, F> {
321
0
        match self.0 {
322
            #[cfg(feature = "alloc")]
323
0
            VarZeroVecInner::Owned(ref owned) => owned,
324
0
            VarZeroVecInner::Borrowed(b) => b,
325
        }
326
0
    }
Unexecuted instantiation: <zerovec::varzerovec::vec::VarZeroVec<zerovec::zerovec::slice::ZeroSlice<icu_properties::props::Script>>>::as_slice
Unexecuted instantiation: <zerovec::varzerovec::vec::VarZeroVec<str>>::as_slice
Unexecuted instantiation: <zerovec::varzerovec::vec::VarZeroVec<_, _>>::as_slice
327
328
    /// Takes the byte vector representing the encoded data of this VarZeroVec. If borrowed,
329
    /// this function allocates a byte vector and copies the borrowed bytes into it.
330
    ///
331
    /// The bytes can be passed back to [`Self::parse_bytes()`].
332
    ///
333
    /// To get a reference to the bytes without moving, see [`VarZeroSlice::as_bytes()`].
334
    ///
335
    /// # Example
336
    ///
337
    /// ```rust
338
    /// # use zerovec::VarZeroVec;
339
    ///
340
    /// let strings = vec!["foo", "bar", "baz"];
341
    /// let bytes = VarZeroVec::<str>::from(&strings).into_bytes();
342
    ///
343
    /// let mut borrowed: VarZeroVec<str> =
344
    ///     VarZeroVec::parse_bytes(&bytes).unwrap();
345
    /// assert_eq!(borrowed, &*strings);
346
    /// ```
347
    #[cfg(feature = "alloc")]
348
0
    pub fn into_bytes(self) -> alloc::vec::Vec<u8> {
349
0
        match self.0 {
350
            #[cfg(feature = "alloc")]
351
0
            VarZeroVecInner::Owned(vec) => vec.into_bytes(),
352
0
            VarZeroVecInner::Borrowed(vec) => vec.as_bytes().to_vec(),
353
        }
354
0
    }
355
356
    /// Return whether the [`VarZeroVec`] is operating on owned or borrowed
357
    /// data. [`VarZeroVec::into_owned()`] and [`VarZeroVec::make_mut()`] can
358
    /// be used to force it into an owned type
359
0
    pub fn is_owned(&self) -> bool {
360
0
        match self.0 {
361
            #[cfg(feature = "alloc")]
362
0
            VarZeroVecInner::Owned(..) => true,
363
0
            VarZeroVecInner::Borrowed(..) => false,
364
        }
365
0
    }
366
367
    #[doc(hidden)]
368
0
    pub fn as_components<'b>(&'b self) -> VarZeroVecComponents<'b, T, F> {
369
0
        self.as_slice().as_components()
370
0
    }
371
}
372
373
#[cfg(feature = "alloc")]
374
impl<A, T, F> From<&alloc::vec::Vec<A>> for VarZeroVec<'static, T, F>
375
where
376
    T: VarULE + ?Sized,
377
    A: EncodeAsVarULE<T>,
378
    F: VarZeroVecFormat,
379
{
380
    #[inline]
381
0
    fn from(elements: &alloc::vec::Vec<A>) -> Self {
382
0
        Self::from(elements.as_slice())
383
0
    }
384
}
385
386
#[cfg(feature = "alloc")]
387
impl<A, T, F> From<&[A]> for VarZeroVec<'static, T, F>
388
where
389
    T: VarULE + ?Sized,
390
    A: EncodeAsVarULE<T>,
391
    F: VarZeroVecFormat,
392
{
393
    #[inline]
394
0
    fn from(elements: &[A]) -> Self {
395
0
        if elements.is_empty() {
396
0
            VarZeroSlice::new_empty().into()
397
        } else {
398
            #[expect(clippy::unwrap_used)] // TODO(#1410) Better story for fallibility
399
0
            VarZeroVecOwned::try_from_elements(elements).unwrap().into()
400
        }
401
0
    }
402
}
403
404
#[cfg(feature = "alloc")]
405
impl<A, T, F, const N: usize> From<&[A; N]> for VarZeroVec<'static, T, F>
406
where
407
    T: VarULE + ?Sized,
408
    A: EncodeAsVarULE<T>,
409
    F: VarZeroVecFormat,
410
{
411
    #[inline]
412
0
    fn from(elements: &[A; N]) -> Self {
413
0
        Self::from(elements.as_slice())
414
0
    }
415
}
416
417
impl<'a, 'b, T, F> PartialEq<VarZeroVec<'b, T, F>> for VarZeroVec<'a, T, F>
418
where
419
    T: VarULE,
420
    T: ?Sized,
421
    T: PartialEq,
422
    F: VarZeroVecFormat,
423
{
424
    #[inline]
425
0
    fn eq(&self, other: &VarZeroVec<'b, T, F>) -> bool {
426
        // VZV::from_elements used to produce a non-canonical representation of the
427
        // empty VZV, so we cannot use byte equality for empty vecs.
428
0
        if self.is_empty() || other.is_empty() {
429
0
            return self.is_empty() && other.is_empty();
430
0
        }
431
        // VarULE has an API guarantee that byte equality is semantic equality.
432
        // For non-empty VZVs, there's only a single metadata representation,
433
        // so this guarantee extends to the whole VZV representation.
434
0
        self.as_bytes().eq(other.as_bytes())
435
0
    }
436
}
437
438
impl<'a, T, F> Eq for VarZeroVec<'a, T, F>
439
where
440
    T: VarULE,
441
    T: ?Sized,
442
    T: Eq,
443
    F: VarZeroVecFormat,
444
{
445
}
446
447
impl<T, A, F> PartialEq<&'_ [A]> for VarZeroVec<'_, T, F>
448
where
449
    T: VarULE + ?Sized,
450
    T: PartialEq,
451
    A: AsRef<T>,
452
    F: VarZeroVecFormat,
453
{
454
    #[inline]
455
0
    fn eq(&self, other: &&[A]) -> bool {
456
0
        self.iter().eq(other.iter().map(|t| t.as_ref()))
457
0
    }
458
}
459
460
impl<T, A, F, const N: usize> PartialEq<[A; N]> for VarZeroVec<'_, T, F>
461
where
462
    T: VarULE + ?Sized,
463
    T: PartialEq,
464
    A: AsRef<T>,
465
    F: VarZeroVecFormat,
466
{
467
    #[inline]
468
0
    fn eq(&self, other: &[A; N]) -> bool {
469
0
        self.iter().eq(other.iter().map(|t| t.as_ref()))
470
0
    }
471
}
472
473
impl<'a, T: VarULE + ?Sized + PartialOrd, F: VarZeroVecFormat> PartialOrd for VarZeroVec<'a, T, F> {
474
0
    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
475
0
        self.iter().partial_cmp(other.iter())
476
0
    }
477
}
478
479
impl<'a, T: VarULE + ?Sized + Ord, F: VarZeroVecFormat> Ord for VarZeroVec<'a, T, F> {
480
0
    fn cmp(&self, other: &Self) -> Ordering {
481
0
        self.iter().cmp(other.iter())
482
0
    }
483
}
484
485
#[test]
486
fn assert_single_empty_representation() {
487
    assert_eq!(
488
        VarZeroVec::<str>::new().as_bytes(),
489
        VarZeroVec::<str>::from(&[] as &[&str]).as_bytes()
490
    );
491
492
    use crate::map::MutableZeroVecLike;
493
    let mut vzv = VarZeroVec::<str>::from(&["hello", "world"][..]);
494
    assert_eq!(vzv.len(), 2);
495
    assert!(!vzv.as_bytes().is_empty());
496
    vzv.zvl_remove(0);
497
    assert_eq!(vzv.len(), 1);
498
    assert!(!vzv.as_bytes().is_empty());
499
    vzv.zvl_remove(0);
500
    assert_eq!(vzv.len(), 0);
501
    assert!(vzv.as_bytes().is_empty());
502
    vzv.zvl_insert(0, "something");
503
    assert_eq!(vzv.len(), 1);
504
    assert!(!vzv.as_bytes().is_empty());
505
}
506
507
#[test]
508
fn weird_empty_representation_equality() {
509
    assert_eq!(
510
        VarZeroVec::<str>::parse_bytes(&[0, 0, 0, 0]).unwrap(),
511
        VarZeroVec::<str>::parse_bytes(&[]).unwrap()
512
    );
513
}