/rust/registry/src/index.crates.io-6f17d22bba15001f/zerovec-0.10.4/src/flexzerovec/slice.rs
Line | Count | Source (jump to first uncovered line) |
1 | | // This file is part of ICU4X. For terms of use, please see the file |
2 | | // called LICENSE at the top level of the ICU4X source tree |
3 | | // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). |
4 | | |
5 | | use super::FlexZeroVec; |
6 | | use crate::ZeroVecError; |
7 | | use alloc::vec::Vec; |
8 | | use core::cmp::Ordering; |
9 | | use core::fmt; |
10 | | use core::mem; |
11 | | use core::ops::Range; |
12 | | |
13 | | const USIZE_WIDTH: usize = mem::size_of::<usize>(); |
14 | | |
15 | | /// A zero-copy "slice" that efficiently represents `[usize]`. |
16 | | #[repr(C, packed)] |
17 | | pub struct FlexZeroSlice { |
18 | | // Hard Invariant: 1 <= width <= USIZE_WIDTH (which is target_pointer_width) |
19 | | // Soft Invariant: width == the width of the largest element |
20 | | width: u8, |
21 | | // Hard Invariant: data.len() % width == 0 |
22 | | data: [u8], |
23 | | } |
24 | | |
25 | | impl fmt::Debug for FlexZeroSlice { |
26 | 0 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { |
27 | 0 | self.to_vec().fmt(f) |
28 | 0 | } |
29 | | } |
30 | | |
31 | | impl PartialEq for FlexZeroSlice { |
32 | 0 | fn eq(&self, other: &Self) -> bool { |
33 | 0 | self.width == other.width && self.data == other.data |
34 | 0 | } |
35 | | } |
36 | | impl Eq for FlexZeroSlice {} |
37 | | |
38 | | /// Helper function to decode a little-endian "chunk" (byte slice of a specific length) |
39 | | /// into a `usize`. We cannot call `usize::from_le_bytes` directly because that function |
40 | | /// requires the high bits to be set to 0. |
41 | | #[inline] |
42 | 0 | pub(crate) fn chunk_to_usize(chunk: &[u8], width: usize) -> usize { |
43 | 0 | debug_assert_eq!(chunk.len(), width); |
44 | 0 | let mut bytes = [0; USIZE_WIDTH]; |
45 | 0 | #[allow(clippy::indexing_slicing)] // protected by debug_assert above |
46 | 0 | bytes[0..width].copy_from_slice(chunk); |
47 | 0 | usize::from_le_bytes(bytes) |
48 | 0 | } |
49 | | |
50 | | impl FlexZeroSlice { |
51 | | /// Constructs a new empty [`FlexZeroSlice`]. |
52 | | /// |
53 | | /// ``` |
54 | | /// use zerovec::vecs::FlexZeroSlice; |
55 | | /// |
56 | | /// const EMPTY_SLICE: &FlexZeroSlice = FlexZeroSlice::new_empty(); |
57 | | /// |
58 | | /// assert!(EMPTY_SLICE.is_empty()); |
59 | | /// assert_eq!(EMPTY_SLICE.len(), 0); |
60 | | /// assert_eq!(EMPTY_SLICE.first(), None); |
61 | | /// ``` |
62 | | #[inline] |
63 | 0 | pub const fn new_empty() -> &'static Self { |
64 | | const ARR: &[u8] = &[1u8]; |
65 | | // Safety: The slice is a valid empty `FlexZeroSlice` |
66 | 0 | unsafe { Self::from_byte_slice_unchecked(ARR) } |
67 | 0 | } |
68 | | |
69 | | /// Safely constructs a [`FlexZeroSlice`] from a byte array. |
70 | | /// |
71 | | /// # Examples |
72 | | /// |
73 | | /// ``` |
74 | | /// use zerovec::vecs::FlexZeroSlice; |
75 | | /// |
76 | | /// const FZS: &FlexZeroSlice = match FlexZeroSlice::parse_byte_slice(&[ |
77 | | /// 2, // width |
78 | | /// 0x42, 0x00, // first value |
79 | | /// 0x07, 0x09, // second value |
80 | | /// 0xFF, 0xFF, // third value |
81 | | /// ]) { |
82 | | /// Ok(v) => v, |
83 | | /// Err(_) => panic!("invalid bytes"), |
84 | | /// }; |
85 | | /// |
86 | | /// assert!(!FZS.is_empty()); |
87 | | /// assert_eq!(FZS.len(), 3); |
88 | | /// assert_eq!(FZS.first(), Some(0x0042)); |
89 | | /// assert_eq!(FZS.get(0), Some(0x0042)); |
90 | | /// assert_eq!(FZS.get(1), Some(0x0907)); |
91 | | /// assert_eq!(FZS.get(2), Some(0xFFFF)); |
92 | | /// assert_eq!(FZS.get(3), None); |
93 | | /// assert_eq!(FZS.last(), Some(0xFFFF)); |
94 | | /// ``` |
95 | 0 | pub const fn parse_byte_slice(bytes: &[u8]) -> Result<&Self, ZeroVecError> { |
96 | 0 | let (width_u8, data) = match bytes.split_first() { |
97 | 0 | Some(v) => v, |
98 | | None => { |
99 | 0 | return Err(ZeroVecError::InvalidLength { |
100 | 0 | ty: "FlexZeroSlice", |
101 | 0 | len: 0, |
102 | 0 | }) |
103 | | } |
104 | | }; |
105 | 0 | let width = *width_u8 as usize; |
106 | 0 | if width < 1 || width > USIZE_WIDTH { |
107 | 0 | return Err(ZeroVecError::ParseError { |
108 | 0 | ty: "FlexZeroSlice", |
109 | 0 | }); |
110 | 0 | } |
111 | 0 | if data.len() % width != 0 { |
112 | 0 | return Err(ZeroVecError::InvalidLength { |
113 | 0 | ty: "FlexZeroSlice", |
114 | 0 | len: bytes.len(), |
115 | 0 | }); |
116 | 0 | } |
117 | 0 | // Safety: All hard invariants have been checked. |
118 | 0 | // Note: The soft invariant requires a linear search that we don't do here. |
119 | 0 | Ok(unsafe { Self::from_byte_slice_unchecked(bytes) }) |
120 | 0 | } |
121 | | |
122 | | /// Constructs a [`FlexZeroSlice`] without checking invariants. |
123 | | /// |
124 | | /// # Panics |
125 | | /// |
126 | | /// Panics if `bytes` is empty. |
127 | | /// |
128 | | /// # Safety |
129 | | /// |
130 | | /// Must be called on a valid [`FlexZeroSlice`] byte array. |
131 | | #[inline] |
132 | 0 | pub const unsafe fn from_byte_slice_unchecked(bytes: &[u8]) -> &Self { |
133 | 0 | // Safety: The DST of FlexZeroSlice is a pointer to the `width` element and has a metadata |
134 | 0 | // equal to the length of the `data` field, which will be one less than the length of the |
135 | 0 | // overall array. |
136 | 0 | #[allow(clippy::panic)] // panic is documented in function contract |
137 | 0 | if bytes.is_empty() { |
138 | 0 | panic!("from_byte_slice_unchecked called with empty slice") |
139 | 0 | } |
140 | 0 | let slice = core::ptr::slice_from_raw_parts(bytes.as_ptr(), bytes.len() - 1); |
141 | 0 | &*(slice as *const Self) |
142 | 0 | } |
143 | | |
144 | | #[inline] |
145 | 0 | pub(crate) unsafe fn from_byte_slice_mut_unchecked(bytes: &mut [u8]) -> &mut Self { |
146 | 0 | // Safety: See comments in `from_byte_slice_unchecked` |
147 | 0 | let remainder = core::ptr::slice_from_raw_parts_mut(bytes.as_mut_ptr(), bytes.len() - 1); |
148 | 0 | &mut *(remainder as *mut Self) |
149 | 0 | } |
150 | | |
151 | | /// Returns this slice as its underlying `&[u8]` byte buffer representation. |
152 | | /// |
153 | | /// Useful for serialization. |
154 | | /// |
155 | | /// # Example |
156 | | /// |
157 | | /// ``` |
158 | | /// use zerovec::vecs::FlexZeroSlice; |
159 | | /// |
160 | | /// let bytes: &[u8] = &[2, 0xD3, 0x00, 0x19, 0x01, 0xA5, 0x01, 0xCD, 0x80]; |
161 | | /// let fzv = FlexZeroSlice::parse_byte_slice(bytes).expect("valid bytes"); |
162 | | /// |
163 | | /// assert_eq!(bytes, fzv.as_bytes()); |
164 | | /// ``` |
165 | | #[inline] |
166 | 0 | pub fn as_bytes(&self) -> &[u8] { |
167 | 0 | // Safety: See comments in `from_byte_slice_unchecked` |
168 | 0 | unsafe { |
169 | 0 | core::slice::from_raw_parts(self as *const Self as *const u8, self.data.len() + 1) |
170 | 0 | } |
171 | 0 | } |
172 | | |
173 | | /// Borrows this `FlexZeroSlice` as a [`FlexZeroVec::Borrowed`]. |
174 | | #[inline] |
175 | 0 | pub const fn as_flexzerovec(&self) -> FlexZeroVec { |
176 | 0 | FlexZeroVec::Borrowed(self) |
177 | 0 | } |
178 | | |
179 | | /// Returns the number of elements in the `FlexZeroSlice`. |
180 | | #[inline] |
181 | 0 | pub fn len(&self) -> usize { |
182 | 0 | self.data.len() / self.get_width() |
183 | 0 | } |
184 | | |
185 | | #[inline] |
186 | 0 | pub(crate) fn get_width(&self) -> usize { |
187 | 0 | usize::from(self.width) |
188 | 0 | } |
189 | | |
190 | | /// Returns whether there are zero elements in the `FlexZeroSlice`. |
191 | | #[inline] |
192 | 0 | pub fn is_empty(&self) -> bool { |
193 | 0 | self.data.len() == 0 |
194 | 0 | } |
195 | | |
196 | | /// Gets the element at `index`, or `None` if `index >= self.len()`. |
197 | | /// |
198 | | /// # Examples |
199 | | /// |
200 | | /// ``` |
201 | | /// use zerovec::vecs::FlexZeroVec; |
202 | | /// |
203 | | /// let fzv: FlexZeroVec = [22, 33].iter().copied().collect(); |
204 | | /// assert_eq!(fzv.get(0), Some(22)); |
205 | | /// assert_eq!(fzv.get(1), Some(33)); |
206 | | /// assert_eq!(fzv.get(2), None); |
207 | | /// ``` |
208 | | #[inline] |
209 | 0 | pub fn get(&self, index: usize) -> Option<usize> { |
210 | 0 | if index >= self.len() { |
211 | 0 | None |
212 | | } else { |
213 | 0 | Some(unsafe { self.get_unchecked(index) }) |
214 | | } |
215 | 0 | } |
216 | | |
217 | | /// Gets the element at `index` as a chunk of bytes, or `None` if `index >= self.len()`. |
218 | | #[inline] |
219 | 0 | pub(crate) fn get_chunk(&self, index: usize) -> Option<&[u8]> { |
220 | 0 | let w = self.get_width(); |
221 | 0 | self.data.get(index * w..index * w + w) |
222 | 0 | } |
223 | | |
224 | | /// Gets the element at `index` without checking bounds. |
225 | | /// |
226 | | /// # Safety |
227 | | /// |
228 | | /// `index` must be in-range. |
229 | | #[inline] |
230 | 0 | pub unsafe fn get_unchecked(&self, index: usize) -> usize { |
231 | 0 | match self.width { |
232 | 0 | 1 => *self.data.get_unchecked(index) as usize, |
233 | | 2 => { |
234 | 0 | let ptr = self.data.as_ptr().add(index * 2); |
235 | 0 | u16::from_le_bytes(core::ptr::read(ptr as *const [u8; 2])) as usize |
236 | | } |
237 | | _ => { |
238 | 0 | let mut bytes = [0; USIZE_WIDTH]; |
239 | 0 | let w = self.get_width(); |
240 | 0 | assert!(w <= USIZE_WIDTH); |
241 | 0 | let ptr = self.data.as_ptr().add(index * w); |
242 | 0 | core::ptr::copy_nonoverlapping(ptr, bytes.as_mut_ptr(), w); |
243 | 0 | usize::from_le_bytes(bytes) |
244 | | } |
245 | | } |
246 | 0 | } |
247 | | |
248 | | /// Gets the first element of the slice, or `None` if the slice is empty. |
249 | | #[inline] |
250 | 0 | pub fn first(&self) -> Option<usize> { |
251 | 0 | let w = self.get_width(); |
252 | 0 | self.data.get(0..w).map(|chunk| chunk_to_usize(chunk, w)) |
253 | 0 | } |
254 | | |
255 | | /// Gets the last element of the slice, or `None` if the slice is empty. |
256 | | #[inline] |
257 | 0 | pub fn last(&self) -> Option<usize> { |
258 | 0 | let l = self.data.len(); |
259 | 0 | if l == 0 { |
260 | 0 | None |
261 | | } else { |
262 | 0 | let w = self.get_width(); |
263 | 0 | self.data |
264 | 0 | .get(l - w..l) |
265 | 0 | .map(|chunk| chunk_to_usize(chunk, w)) |
266 | | } |
267 | 0 | } |
268 | | |
269 | | /// Gets an iterator over the elements of the slice as `usize`. |
270 | | #[inline] |
271 | 0 | pub fn iter( |
272 | 0 | &self, |
273 | 0 | ) -> impl DoubleEndedIterator<Item = usize> + '_ + ExactSizeIterator<Item = usize> { |
274 | 0 | let w = self.get_width(); |
275 | 0 | self.data |
276 | 0 | .chunks_exact(w) |
277 | 0 | .map(move |chunk| chunk_to_usize(chunk, w)) |
278 | 0 | } |
279 | | |
280 | | /// Gets an iterator over pairs of elements. |
281 | | /// |
282 | | /// The second element of the final pair is `None`. |
283 | | /// |
284 | | /// # Examples |
285 | | /// |
286 | | /// ``` |
287 | | /// use zerovec::vecs::FlexZeroVec; |
288 | | /// |
289 | | /// let nums: &[usize] = &[211, 281, 421, 461]; |
290 | | /// let fzv: FlexZeroVec = nums.iter().copied().collect(); |
291 | | /// |
292 | | /// let mut pairs_it = fzv.iter_pairs(); |
293 | | /// |
294 | | /// assert_eq!(pairs_it.next(), Some((211, Some(281)))); |
295 | | /// assert_eq!(pairs_it.next(), Some((281, Some(421)))); |
296 | | /// assert_eq!(pairs_it.next(), Some((421, Some(461)))); |
297 | | /// assert_eq!(pairs_it.next(), Some((461, None))); |
298 | | /// assert_eq!(pairs_it.next(), None); |
299 | | /// ``` |
300 | 0 | pub fn iter_pairs(&self) -> impl Iterator<Item = (usize, Option<usize>)> + '_ { |
301 | 0 | self.iter().zip(self.iter().skip(1).map(Some).chain([None])) |
302 | 0 | } |
303 | | |
304 | | /// Creates a `Vec<usize>` from a [`FlexZeroSlice`] (or `FlexZeroVec`). |
305 | | /// |
306 | | /// # Examples |
307 | | /// |
308 | | /// ``` |
309 | | /// use zerovec::vecs::FlexZeroVec; |
310 | | /// |
311 | | /// let nums: &[usize] = &[211, 281, 421, 461]; |
312 | | /// let fzv: FlexZeroVec = nums.iter().copied().collect(); |
313 | | /// let vec: Vec<usize> = fzv.to_vec(); |
314 | | /// |
315 | | /// assert_eq!(nums, vec.as_slice()); |
316 | | /// ``` |
317 | | #[inline] |
318 | 0 | pub fn to_vec(&self) -> Vec<usize> { |
319 | 0 | self.iter().collect() |
320 | 0 | } |
321 | | |
322 | | /// Binary searches a sorted `FlexZeroSlice` for the given `usize` value. |
323 | | /// |
324 | | /// # Examples |
325 | | /// |
326 | | /// ``` |
327 | | /// use zerovec::vecs::FlexZeroVec; |
328 | | /// |
329 | | /// let nums: &[usize] = &[211, 281, 421, 461]; |
330 | | /// let fzv: FlexZeroVec = nums.iter().copied().collect(); |
331 | | /// |
332 | | /// assert_eq!(fzv.binary_search(0), Err(0)); |
333 | | /// assert_eq!(fzv.binary_search(211), Ok(0)); |
334 | | /// assert_eq!(fzv.binary_search(250), Err(1)); |
335 | | /// assert_eq!(fzv.binary_search(281), Ok(1)); |
336 | | /// assert_eq!(fzv.binary_search(300), Err(2)); |
337 | | /// assert_eq!(fzv.binary_search(421), Ok(2)); |
338 | | /// assert_eq!(fzv.binary_search(450), Err(3)); |
339 | | /// assert_eq!(fzv.binary_search(461), Ok(3)); |
340 | | /// assert_eq!(fzv.binary_search(462), Err(4)); |
341 | | /// ``` |
342 | | #[inline] |
343 | 0 | pub fn binary_search(&self, needle: usize) -> Result<usize, usize> { |
344 | 0 | self.binary_search_by(|probe| probe.cmp(&needle)) |
345 | 0 | } |
346 | | |
347 | | /// Binary searches a sorted range of a `FlexZeroSlice` for the given `usize` value. |
348 | | /// |
349 | | /// The indices in the return value are relative to the start of the range. |
350 | | /// |
351 | | /// # Examples |
352 | | /// |
353 | | /// ``` |
354 | | /// use zerovec::vecs::FlexZeroVec; |
355 | | /// |
356 | | /// // Make a FlexZeroVec with two sorted ranges: 0..3 and 3..5 |
357 | | /// let nums: &[usize] = &[111, 222, 444, 333, 555]; |
358 | | /// let fzv: FlexZeroVec = nums.iter().copied().collect(); |
359 | | /// |
360 | | /// // Search in the first range: |
361 | | /// assert_eq!(fzv.binary_search_in_range(0, 0..3), Some(Err(0))); |
362 | | /// assert_eq!(fzv.binary_search_in_range(111, 0..3), Some(Ok(0))); |
363 | | /// assert_eq!(fzv.binary_search_in_range(199, 0..3), Some(Err(1))); |
364 | | /// assert_eq!(fzv.binary_search_in_range(222, 0..3), Some(Ok(1))); |
365 | | /// assert_eq!(fzv.binary_search_in_range(399, 0..3), Some(Err(2))); |
366 | | /// assert_eq!(fzv.binary_search_in_range(444, 0..3), Some(Ok(2))); |
367 | | /// assert_eq!(fzv.binary_search_in_range(999, 0..3), Some(Err(3))); |
368 | | /// |
369 | | /// // Search in the second range: |
370 | | /// assert_eq!(fzv.binary_search_in_range(0, 3..5), Some(Err(0))); |
371 | | /// assert_eq!(fzv.binary_search_in_range(333, 3..5), Some(Ok(0))); |
372 | | /// assert_eq!(fzv.binary_search_in_range(399, 3..5), Some(Err(1))); |
373 | | /// assert_eq!(fzv.binary_search_in_range(555, 3..5), Some(Ok(1))); |
374 | | /// assert_eq!(fzv.binary_search_in_range(999, 3..5), Some(Err(2))); |
375 | | /// |
376 | | /// // Out-of-bounds range: |
377 | | /// assert_eq!(fzv.binary_search_in_range(0, 4..6), None); |
378 | | /// ``` |
379 | | #[inline] |
380 | 0 | pub fn binary_search_in_range( |
381 | 0 | &self, |
382 | 0 | needle: usize, |
383 | 0 | range: Range<usize>, |
384 | 0 | ) -> Option<Result<usize, usize>> { |
385 | 0 | self.binary_search_in_range_by(|probe| probe.cmp(&needle), range) |
386 | 0 | } |
387 | | |
388 | | /// Binary searches a sorted `FlexZeroSlice` according to a predicate function. |
389 | | #[inline] |
390 | 0 | pub fn binary_search_by( |
391 | 0 | &self, |
392 | 0 | predicate: impl FnMut(usize) -> Ordering, |
393 | 0 | ) -> Result<usize, usize> { |
394 | 0 | debug_assert!(self.len() <= self.data.len()); |
395 | | // Safety: self.len() <= self.data.len() |
396 | 0 | let scaled_slice = unsafe { self.data.get_unchecked(0..self.len()) }; |
397 | 0 | self.binary_search_impl(predicate, scaled_slice) |
398 | 0 | } |
399 | | |
400 | | /// Binary searches a sorted range of a `FlexZeroSlice` according to a predicate function. |
401 | | /// |
402 | | /// The indices in the return value are relative to the start of the range. |
403 | | #[inline] |
404 | 0 | pub fn binary_search_in_range_by( |
405 | 0 | &self, |
406 | 0 | predicate: impl FnMut(usize) -> Ordering, |
407 | 0 | range: Range<usize>, |
408 | 0 | ) -> Option<Result<usize, usize>> { |
409 | 0 | // Note: We need to check bounds separately, since `self.data.get(range)` does not return |
410 | 0 | // bounds errors, since it is indexing directly into the upscaled data array |
411 | 0 | if range.start > self.len() || range.end > self.len() { |
412 | 0 | return None; |
413 | 0 | } |
414 | 0 | let scaled_slice = self.data.get(range)?; |
415 | 0 | Some(self.binary_search_impl(predicate, scaled_slice)) |
416 | 0 | } |
417 | | |
418 | | /// Binary searches a `FlexZeroSlice` by its indices. |
419 | | /// |
420 | | /// The `predicate` function is passed in-bounds indices into the `FlexZeroSlice`. |
421 | | #[inline] |
422 | 0 | pub fn binary_search_with_index( |
423 | 0 | &self, |
424 | 0 | predicate: impl FnMut(usize) -> Ordering, |
425 | 0 | ) -> Result<usize, usize> { |
426 | 0 | debug_assert!(self.len() <= self.data.len()); |
427 | | // Safety: self.len() <= self.data.len() |
428 | 0 | let scaled_slice = unsafe { self.data.get_unchecked(0..self.len()) }; |
429 | 0 | self.binary_search_with_index_impl(predicate, scaled_slice) |
430 | 0 | } |
431 | | |
432 | | /// Binary searches a range of a `FlexZeroSlice` by its indices. |
433 | | /// |
434 | | /// The `predicate` function is passed in-bounds indices into the `FlexZeroSlice`, which are |
435 | | /// relative to the start of the entire slice. |
436 | | /// |
437 | | /// The indices in the return value are relative to the start of the range. |
438 | | #[inline] |
439 | 0 | pub fn binary_search_in_range_with_index( |
440 | 0 | &self, |
441 | 0 | predicate: impl FnMut(usize) -> Ordering, |
442 | 0 | range: Range<usize>, |
443 | 0 | ) -> Option<Result<usize, usize>> { |
444 | 0 | // Note: We need to check bounds separately, since `self.data.get(range)` does not return |
445 | 0 | // bounds errors, since it is indexing directly into the upscaled data array |
446 | 0 | if range.start > self.len() || range.end > self.len() { |
447 | 0 | return None; |
448 | 0 | } |
449 | 0 | let scaled_slice = self.data.get(range)?; |
450 | 0 | Some(self.binary_search_with_index_impl(predicate, scaled_slice)) |
451 | 0 | } |
452 | | |
453 | | /// # Safety |
454 | | /// |
455 | | /// `scaled_slice` must be a subslice of `self.data` |
456 | | #[inline] |
457 | 0 | fn binary_search_impl( |
458 | 0 | &self, |
459 | 0 | mut predicate: impl FnMut(usize) -> Ordering, |
460 | 0 | scaled_slice: &[u8], |
461 | 0 | ) -> Result<usize, usize> { |
462 | 0 | self.binary_search_with_index_impl( |
463 | 0 | |index| { |
464 | 0 | // Safety: The contract of `binary_search_with_index_impl` says `index` is in bounds |
465 | 0 | let actual_probe = unsafe { self.get_unchecked(index) }; |
466 | 0 | predicate(actual_probe) |
467 | 0 | }, Unexecuted instantiation: <zerovec::flexzerovec::slice::FlexZeroSlice>::binary_search_impl::<<zerovec::flexzerovec::slice::FlexZeroSlice>::binary_search::{closure#0}>::{closure#0} Unexecuted instantiation: <zerovec::flexzerovec::slice::FlexZeroSlice>::binary_search_impl::<<zerovec::flexzerovec::slice::FlexZeroSlice>::binary_search_in_range::{closure#0}>::{closure#0} |
468 | 0 | scaled_slice, |
469 | 0 | ) |
470 | 0 | } Unexecuted instantiation: <zerovec::flexzerovec::slice::FlexZeroSlice>::binary_search_impl::<<zerovec::flexzerovec::slice::FlexZeroSlice>::binary_search::{closure#0}> Unexecuted instantiation: <zerovec::flexzerovec::slice::FlexZeroSlice>::binary_search_impl::<<zerovec::flexzerovec::slice::FlexZeroSlice>::binary_search_in_range::{closure#0}> |
471 | | |
472 | | /// `predicate` is passed a valid index as an argument. |
473 | | /// |
474 | | /// # Safety |
475 | | /// |
476 | | /// `scaled_slice` must be a subslice of `self.data` |
477 | 0 | fn binary_search_with_index_impl( |
478 | 0 | &self, |
479 | 0 | mut predicate: impl FnMut(usize) -> Ordering, |
480 | 0 | scaled_slice: &[u8], |
481 | 0 | ) -> Result<usize, usize> { |
482 | 0 | // This code is an absolute atrocity. This code is not a place of honor. This |
483 | 0 | // code is known to the State of California to cause cancer. |
484 | 0 | // |
485 | 0 | // Unfortunately, the stdlib's `binary_search*` functions can only operate on slices. |
486 | 0 | // We do not have a slice. We have something we can .get() and index on, but that is not |
487 | 0 | // a slice. |
488 | 0 | // |
489 | 0 | // The `binary_search*` functions also do not have a variant where they give you the element's |
490 | 0 | // index, which we could otherwise use to directly index `self`. |
491 | 0 | // We do have `self.indices`, but these are indices into a byte buffer, which cannot in |
492 | 0 | // isolation be used to recoup the logical index of the element they refer to. |
493 | 0 | // |
494 | 0 | // However, `binary_search_by()` provides references to the elements of the slice being iterated. |
495 | 0 | // Since the layout of Rust slices is well-defined, we can do pointer arithmetic on these references |
496 | 0 | // to obtain the index being used by the search. |
497 | 0 | // |
498 | 0 | // It's worth noting that the slice we choose to search is irrelevant, as long as it has the appropriate |
499 | 0 | // length. `self.indices` is defined to have length `self.len()`, so it is convenient to use |
500 | 0 | // here and does not require additional allocations. |
501 | 0 | // |
502 | 0 | // The alternative to doing this is to implement our own binary search. This is significantly less fun. |
503 | 0 |
|
504 | 0 | // Note: We always use zero_index relative to the whole indices array, even if we are |
505 | 0 | // only searching a subslice of it. |
506 | 0 | let zero_index = self.data.as_ptr() as *const _ as usize; |
507 | 0 | scaled_slice.binary_search_by(|probe: &_| { |
508 | 0 | // Note: `scaled_slice` is a slice of u8 |
509 | 0 | let index = probe as *const _ as usize - zero_index; |
510 | 0 | predicate(index) |
511 | 0 | }) Unexecuted instantiation: <zerovec::flexzerovec::slice::FlexZeroSlice>::binary_search_with_index_impl::<<zerovec::flexzerovec::slice::FlexZeroSlice>::binary_search_impl<<zerovec::flexzerovec::slice::FlexZeroSlice>::binary_search::{closure#0}>::{closure#0}>::{closure#0} Unexecuted instantiation: <zerovec::flexzerovec::slice::FlexZeroSlice>::binary_search_with_index_impl::<<zerovec::flexzerovec::slice::FlexZeroSlice>::binary_search_impl<<zerovec::flexzerovec::slice::FlexZeroSlice>::binary_search_in_range::{closure#0}>::{closure#0}>::{closure#0} |
512 | 0 | } Unexecuted instantiation: <zerovec::flexzerovec::slice::FlexZeroSlice>::binary_search_with_index_impl::<<zerovec::flexzerovec::slice::FlexZeroSlice>::binary_search_impl<<zerovec::flexzerovec::slice::FlexZeroSlice>::binary_search::{closure#0}>::{closure#0}> Unexecuted instantiation: <zerovec::flexzerovec::slice::FlexZeroSlice>::binary_search_with_index_impl::<<zerovec::flexzerovec::slice::FlexZeroSlice>::binary_search_impl<<zerovec::flexzerovec::slice::FlexZeroSlice>::binary_search_in_range::{closure#0}>::{closure#0}> |
513 | | } |
514 | | |
515 | | #[inline] |
516 | 0 | pub(crate) fn get_item_width(item_bytes: &[u8; USIZE_WIDTH]) -> usize { |
517 | 0 | USIZE_WIDTH - item_bytes.iter().rev().take_while(|b| **b == 0).count() |
518 | 0 | } |
519 | | |
520 | | /// Pre-computed information about a pending insertion operation. |
521 | | /// |
522 | | /// Do not create one of these directly; call `get_insert_info()`. |
523 | | pub(crate) struct InsertInfo { |
524 | | /// The bytes to be inserted, with zero-fill. |
525 | | pub item_bytes: [u8; USIZE_WIDTH], |
526 | | /// The new item width after insertion. |
527 | | pub new_width: usize, |
528 | | /// The new number of items in the vector: self.len() after insertion. |
529 | | pub new_count: usize, |
530 | | /// The new number of bytes required for the entire slice (self.data.len() + 1). |
531 | | pub new_bytes_len: usize, |
532 | | } |
533 | | |
534 | | impl FlexZeroSlice { |
535 | | /// Compute the [`InsertInfo`] for inserting the specified item anywhere into the vector. |
536 | | /// |
537 | | /// # Panics |
538 | | /// |
539 | | /// Panics if inserting the element would require allocating more than `usize::MAX` bytes. |
540 | 0 | pub(crate) fn get_insert_info(&self, new_item: usize) -> InsertInfo { |
541 | 0 | let item_bytes = new_item.to_le_bytes(); |
542 | 0 | let item_width = get_item_width(&item_bytes); |
543 | 0 | let old_width = self.get_width(); |
544 | 0 | let new_width = core::cmp::max(old_width, item_width); |
545 | 0 | let new_count = 1 + (self.data.len() / old_width); |
546 | 0 | #[allow(clippy::unwrap_used)] // panic is documented in function contract |
547 | 0 | let new_bytes_len = new_count |
548 | 0 | .checked_mul(new_width) |
549 | 0 | .unwrap() |
550 | 0 | .checked_add(1) |
551 | 0 | .unwrap(); |
552 | 0 | InsertInfo { |
553 | 0 | item_bytes, |
554 | 0 | new_width, |
555 | 0 | new_count, |
556 | 0 | new_bytes_len, |
557 | 0 | } |
558 | 0 | } |
559 | | |
560 | | /// This function should be called on a slice with a data array `new_data_len` long |
561 | | /// which previously held `new_count - 1` elements. |
562 | | /// |
563 | | /// After calling this function, all bytes in the slice will have been written. |
564 | 0 | pub(crate) fn insert_impl(&mut self, insert_info: InsertInfo, insert_index: usize) { |
565 | 0 | let InsertInfo { |
566 | 0 | item_bytes, |
567 | 0 | new_width, |
568 | 0 | new_count, |
569 | 0 | new_bytes_len, |
570 | 0 | } = insert_info; |
571 | 0 | debug_assert!(new_width <= USIZE_WIDTH); |
572 | 0 | debug_assert!(new_width >= self.get_width()); |
573 | 0 | debug_assert!(insert_index < new_count); |
574 | 0 | debug_assert_eq!(new_bytes_len, new_count * new_width + 1); |
575 | 0 | debug_assert_eq!(new_bytes_len, self.data.len() + 1); |
576 | | // For efficiency, calculate how many items we can skip copying. |
577 | 0 | let lower_i = if new_width == self.get_width() { |
578 | 0 | insert_index |
579 | | } else { |
580 | 0 | 0 |
581 | | }; |
582 | | // Copy elements starting from the end into the new empty section of the vector. |
583 | | // Note: We could copy fully in place, but we need to set 0 bytes for the high bytes, |
584 | | // so we stage the new value on the stack. |
585 | 0 | for i in (lower_i..new_count).rev() { |
586 | 0 | let bytes_to_write = if i == insert_index { |
587 | 0 | item_bytes |
588 | | } else { |
589 | 0 | let j = if i > insert_index { i - 1 } else { i }; |
590 | 0 | debug_assert!(j < new_count - 1); |
591 | | // Safety: j is in range (assertion on previous line), and it has not been |
592 | | // overwritten yet since we are walking backwards. |
593 | 0 | unsafe { self.get_unchecked(j).to_le_bytes() } |
594 | | }; |
595 | | // Safety: The vector has capacity for `new_width` items at the new index, which is |
596 | | // later in the array than the bytes that we read above. |
597 | 0 | unsafe { |
598 | 0 | core::ptr::copy_nonoverlapping( |
599 | 0 | bytes_to_write.as_ptr(), |
600 | 0 | self.data.as_mut_ptr().add(new_width * i), |
601 | 0 | new_width, |
602 | 0 | ); |
603 | 0 | } |
604 | | } |
605 | 0 | self.width = new_width as u8; |
606 | 0 | } |
607 | | } |
608 | | |
609 | | /// Pre-computed information about a pending removal operation. |
610 | | /// |
611 | | /// Do not create one of these directly; call `get_remove_info()` or `get_sorted_pop_info()`. |
612 | | pub(crate) struct RemoveInfo { |
613 | | /// The index of the item to be removed. |
614 | | pub remove_index: usize, |
615 | | /// The new item width after insertion. |
616 | | pub new_width: usize, |
617 | | /// The new number of items in the vector: self.len() after insertion. |
618 | | pub new_count: usize, |
619 | | /// The new number of bytes required for the entire slice (self.data.len() + 1). |
620 | | pub new_bytes_len: usize, |
621 | | } |
622 | | |
623 | | impl FlexZeroSlice { |
624 | | /// Compute the [`RemoveInfo`] for removing the item at the specified index. |
625 | 0 | pub(crate) fn get_remove_info(&self, remove_index: usize) -> RemoveInfo { |
626 | 0 | debug_assert!(remove_index < self.len()); |
627 | | // Safety: remove_index is in range (assertion on previous line) |
628 | 0 | let item_bytes = unsafe { self.get_unchecked(remove_index).to_le_bytes() }; |
629 | 0 | let item_width = get_item_width(&item_bytes); |
630 | 0 | let old_width = self.get_width(); |
631 | 0 | let old_count = self.data.len() / old_width; |
632 | 0 | let new_width = if item_width < old_width { |
633 | 0 | old_width |
634 | | } else { |
635 | 0 | debug_assert_eq!(old_width, item_width); |
636 | | // We might be removing the widest element. If so, we need to scale down. |
637 | 0 | let mut largest_width = 1; |
638 | 0 | for i in 0..old_count { |
639 | 0 | if i == remove_index { |
640 | 0 | continue; |
641 | 0 | } |
642 | 0 | // Safety: i is in range (between 0 and old_count) |
643 | 0 | let curr_bytes = unsafe { self.get_unchecked(i).to_le_bytes() }; |
644 | 0 | let curr_width = get_item_width(&curr_bytes); |
645 | 0 | largest_width = core::cmp::max(curr_width, largest_width); |
646 | | } |
647 | 0 | largest_width |
648 | | }; |
649 | 0 | let new_count = old_count - 1; |
650 | 0 | // Note: the following line won't overflow because we are making the slice shorter. |
651 | 0 | let new_bytes_len = new_count * new_width + 1; |
652 | 0 | RemoveInfo { |
653 | 0 | remove_index, |
654 | 0 | new_width, |
655 | 0 | new_count, |
656 | 0 | new_bytes_len, |
657 | 0 | } |
658 | 0 | } |
659 | | |
660 | | /// Returns the [`RemoveInfo`] for removing the last element. Should be called |
661 | | /// on a slice sorted in ascending order. |
662 | | /// |
663 | | /// This is more efficient than `get_remove_info()` because it doesn't require a |
664 | | /// linear traversal of the vector in order to calculate `new_width`. |
665 | 0 | pub(crate) fn get_sorted_pop_info(&self) -> RemoveInfo { |
666 | 0 | debug_assert!(!self.is_empty()); |
667 | 0 | let remove_index = self.len() - 1; |
668 | 0 | let old_count = self.len(); |
669 | 0 | let new_width = if old_count == 1 { |
670 | 0 | 1 |
671 | | } else { |
672 | | // Safety: the FlexZeroSlice has at least two elements |
673 | 0 | let largest_item = unsafe { self.get_unchecked(remove_index - 1).to_le_bytes() }; |
674 | 0 | get_item_width(&largest_item) |
675 | | }; |
676 | 0 | let new_count = old_count - 1; |
677 | 0 | // Note: the following line won't overflow because we are making the slice shorter. |
678 | 0 | let new_bytes_len = new_count * new_width + 1; |
679 | 0 | RemoveInfo { |
680 | 0 | remove_index, |
681 | 0 | new_width, |
682 | 0 | new_count, |
683 | 0 | new_bytes_len, |
684 | 0 | } |
685 | 0 | } |
686 | | |
687 | | /// This function should be called on a valid slice. |
688 | | /// |
689 | | /// After calling this function, the slice data should be truncated to `new_data_len` bytes. |
690 | 0 | pub(crate) fn remove_impl(&mut self, remove_info: RemoveInfo) { |
691 | 0 | let RemoveInfo { |
692 | 0 | remove_index, |
693 | 0 | new_width, |
694 | 0 | new_count, |
695 | 0 | .. |
696 | 0 | } = remove_info; |
697 | 0 | debug_assert!(new_width <= self.get_width()); |
698 | 0 | debug_assert!(new_count < self.len()); |
699 | | // For efficiency, calculate how many items we can skip copying. |
700 | 0 | let lower_i = if new_width == self.get_width() { |
701 | 0 | remove_index |
702 | | } else { |
703 | 0 | 0 |
704 | | }; |
705 | | // Copy elements starting from the beginning to compress the vector to fewer bytes. |
706 | 0 | for i in lower_i..new_count { |
707 | 0 | let j = if i < remove_index { i } else { i + 1 }; |
708 | | // Safety: j is in range because j <= new_count < self.len() |
709 | 0 | let bytes_to_write = unsafe { self.get_unchecked(j).to_le_bytes() }; |
710 | 0 | // Safety: The bytes are being copied to a section of the array that is not after |
711 | 0 | // the section of the array that currently holds the bytes. |
712 | 0 | unsafe { |
713 | 0 | core::ptr::copy_nonoverlapping( |
714 | 0 | bytes_to_write.as_ptr(), |
715 | 0 | self.data.as_mut_ptr().add(new_width * i), |
716 | 0 | new_width, |
717 | 0 | ); |
718 | 0 | } |
719 | | } |
720 | 0 | self.width = new_width as u8; |
721 | 0 | } |
722 | | } |