/rust/registry/src/index.crates.io-6f17d22bba15001f/roaring-0.10.10/src/bitmap/multiops.rs
Line | Count | Source (jump to first uncovered line) |
1 | | use core::{ |
2 | | cmp::Reverse, |
3 | | convert::Infallible, |
4 | | mem, |
5 | | ops::{BitOrAssign, BitXorAssign}, |
6 | | }; |
7 | | |
8 | | use alloc::borrow::Cow; |
9 | | |
10 | | use crate::{MultiOps, RoaringBitmap}; |
11 | | |
12 | | use super::{container::Container, store::Store}; |
13 | | |
14 | | #[cfg(not(feature = "std"))] |
15 | | use alloc::vec::Vec; |
16 | | |
17 | | /// When collecting bitmaps for optimizing the computation. If we don't know how many |
18 | | // elements are in the iterator we collect 10 elements. |
19 | | const BASE_COLLECT: usize = 10; |
20 | | |
21 | | /// If an iterator contain 50 elements or less we collect everything because it'll be so |
22 | | /// much faster without impacting the memory usage too much (in most cases). |
23 | | const MAX_COLLECT: usize = 50; |
24 | | |
25 | | impl<I> MultiOps<RoaringBitmap> for I |
26 | | where |
27 | | I: IntoIterator<Item = RoaringBitmap>, |
28 | | { |
29 | | type Output = RoaringBitmap; |
30 | | |
31 | 0 | fn union(self) -> Self::Output { |
32 | 0 | try_multi_or_owned(self.into_iter().map(Ok::<_, Infallible>)).unwrap() |
33 | 0 | } |
34 | | |
35 | 0 | fn intersection(self) -> Self::Output { |
36 | 0 | try_multi_and_owned(self.into_iter().map(Ok::<_, Infallible>)).unwrap() |
37 | 0 | } |
38 | | |
39 | 0 | fn difference(self) -> Self::Output { |
40 | 0 | try_multi_sub_owned(self.into_iter().map(Ok::<_, Infallible>)).unwrap() |
41 | 0 | } |
42 | | |
43 | 0 | fn symmetric_difference(self) -> Self::Output { |
44 | 0 | try_multi_xor_owned(self.into_iter().map(Ok::<_, Infallible>)).unwrap() |
45 | 0 | } |
46 | | } |
47 | | |
48 | | impl<I, E> MultiOps<Result<RoaringBitmap, E>> for I |
49 | | where |
50 | | I: IntoIterator<Item = Result<RoaringBitmap, E>>, |
51 | | { |
52 | | type Output = Result<RoaringBitmap, E>; |
53 | | |
54 | 0 | fn union(self) -> Self::Output { |
55 | 0 | try_multi_or_owned(self) |
56 | 0 | } |
57 | | |
58 | 0 | fn intersection(self) -> Self::Output { |
59 | 0 | try_multi_and_owned(self) |
60 | 0 | } |
61 | | |
62 | 0 | fn difference(self) -> Self::Output { |
63 | 0 | try_multi_sub_owned(self) |
64 | 0 | } |
65 | | |
66 | 0 | fn symmetric_difference(self) -> Self::Output { |
67 | 0 | try_multi_xor_owned(self) |
68 | 0 | } |
69 | | } |
70 | | |
71 | | impl<'a, I> MultiOps<&'a RoaringBitmap> for I |
72 | | where |
73 | | I: IntoIterator<Item = &'a RoaringBitmap>, |
74 | | { |
75 | | type Output = RoaringBitmap; |
76 | | |
77 | 0 | fn union(self) -> Self::Output { |
78 | 0 | try_multi_or_ref(self.into_iter().map(Ok::<_, Infallible>)).unwrap() |
79 | 0 | } |
80 | | |
81 | 0 | fn intersection(self) -> Self::Output { |
82 | 0 | try_multi_and_ref(self.into_iter().map(Ok::<_, Infallible>)).unwrap() |
83 | 0 | } |
84 | | |
85 | 0 | fn difference(self) -> Self::Output { |
86 | 0 | try_multi_sub_ref(self.into_iter().map(Ok::<_, Infallible>)).unwrap() |
87 | 0 | } |
88 | | |
89 | 0 | fn symmetric_difference(self) -> Self::Output { |
90 | 0 | try_multi_xor_ref(self.into_iter().map(Ok::<_, Infallible>)).unwrap() |
91 | 0 | } |
92 | | } |
93 | | |
94 | | impl<'a, I, E: 'a> MultiOps<Result<&'a RoaringBitmap, E>> for I |
95 | | where |
96 | | I: IntoIterator<Item = Result<&'a RoaringBitmap, E>>, |
97 | | { |
98 | | type Output = Result<RoaringBitmap, E>; |
99 | | |
100 | 0 | fn union(self) -> Self::Output { |
101 | 0 | try_multi_or_ref(self) |
102 | 0 | } |
103 | | |
104 | 0 | fn intersection(self) -> Self::Output { |
105 | 0 | try_multi_and_ref(self) |
106 | 0 | } |
107 | | |
108 | 0 | fn difference(self) -> Self::Output { |
109 | 0 | try_multi_sub_ref(self) |
110 | 0 | } |
111 | | |
112 | 0 | fn symmetric_difference(self) -> Self::Output { |
113 | 0 | try_multi_xor_ref(self) |
114 | 0 | } |
115 | | } |
116 | | |
117 | | #[inline] |
118 | 0 | fn try_multi_and_owned<E>( |
119 | 0 | bitmaps: impl IntoIterator<Item = Result<RoaringBitmap, E>>, |
120 | 0 | ) -> Result<RoaringBitmap, E> { |
121 | 0 | let mut iter = bitmaps.into_iter(); |
122 | | |
123 | | // We're going to take a bunch of elements at the start of the iterator and sort |
124 | | // them to reduce the size of our bitmap faster. |
125 | 0 | let mut start = collect_starting_elements(iter.by_ref())?; |
126 | 0 | start.sort_unstable_by_key(|bitmap| bitmap.containers.len()); |
127 | 0 | let mut start = start.into_iter(); |
128 | | |
129 | 0 | if let Some(mut lhs) = start.next() { |
130 | 0 | for rhs in start.map(Ok).chain(iter) { |
131 | 0 | if lhs.is_empty() { |
132 | 0 | return Ok(lhs); |
133 | 0 | } |
134 | 0 | lhs &= rhs?; |
135 | | } |
136 | | |
137 | 0 | Ok(lhs) |
138 | | } else { |
139 | 0 | Ok(RoaringBitmap::new()) |
140 | | } |
141 | 0 | } |
142 | | |
143 | | #[inline] |
144 | 0 | fn try_multi_and_ref<'a, E>( |
145 | 0 | bitmaps: impl IntoIterator<Item = Result<&'a RoaringBitmap, E>>, |
146 | 0 | ) -> Result<RoaringBitmap, E> { |
147 | 0 | let mut iter = bitmaps.into_iter(); |
148 | | |
149 | | // We're going to take a bunch of elements at the start of the iterator and sort |
150 | | // them to reduce the size of our bitmap faster. |
151 | 0 | let mut start = collect_starting_elements(iter.by_ref())?; |
152 | 0 | start.sort_unstable_by_key(|bitmap| bitmap.containers.len()); |
153 | 0 | let mut start = start.into_iter(); |
154 | | |
155 | 0 | if let Some(mut lhs) = start.next().cloned() { |
156 | 0 | for rhs in start.map(Ok).chain(iter) { |
157 | 0 | if lhs.is_empty() { |
158 | 0 | return Ok(lhs); |
159 | 0 | } |
160 | 0 | lhs &= rhs?; |
161 | | } |
162 | 0 | Ok(lhs) |
163 | | } else { |
164 | 0 | Ok(RoaringBitmap::new()) |
165 | | } |
166 | 0 | } |
167 | | |
168 | | #[inline] |
169 | 0 | fn try_multi_sub_owned<E>( |
170 | 0 | bitmaps: impl IntoIterator<Item = Result<RoaringBitmap, E>>, |
171 | 0 | ) -> Result<RoaringBitmap, E> { |
172 | 0 | let mut iter = bitmaps.into_iter(); |
173 | 0 | match iter.next().transpose()? { |
174 | 0 | Some(mut lhs) => { |
175 | 0 | for rhs in iter { |
176 | 0 | if lhs.is_empty() { |
177 | 0 | return Ok(lhs); |
178 | 0 | } |
179 | 0 | lhs -= rhs?; |
180 | | } |
181 | 0 | Ok(lhs) |
182 | | } |
183 | 0 | None => Ok(RoaringBitmap::default()), |
184 | | } |
185 | 0 | } |
186 | | |
187 | | #[inline] |
188 | 0 | fn try_multi_sub_ref<'a, E>( |
189 | 0 | bitmaps: impl IntoIterator<Item = Result<&'a RoaringBitmap, E>>, |
190 | 0 | ) -> Result<RoaringBitmap, E> { |
191 | 0 | let mut iter = bitmaps.into_iter(); |
192 | 0 | match iter.next().transpose()?.cloned() { |
193 | 0 | Some(mut lhs) => { |
194 | 0 | for rhs in iter { |
195 | 0 | if lhs.is_empty() { |
196 | 0 | return Ok(lhs); |
197 | 0 | } |
198 | 0 | lhs -= rhs?; |
199 | | } |
200 | | |
201 | 0 | Ok(lhs) |
202 | | } |
203 | 0 | None => Ok(RoaringBitmap::default()), |
204 | | } |
205 | 0 | } |
206 | | |
207 | | #[inline] |
208 | 0 | fn try_multi_or_owned<E>( |
209 | 0 | bitmaps: impl IntoIterator<Item = Result<RoaringBitmap, E>>, |
210 | 0 | ) -> Result<RoaringBitmap, E> { |
211 | 0 | let mut iter = bitmaps.into_iter(); |
212 | | |
213 | | // We're going to take a bunch of elements at the start of the iterator and |
214 | | // move the biggest one first to grow faster. |
215 | 0 | let mut start = collect_starting_elements(iter.by_ref())?; |
216 | 0 | start.sort_unstable_by_key(|bitmap| Reverse(bitmap.containers.len())); |
217 | 0 | let start_size = start.len(); |
218 | 0 | let mut start = start.into_iter(); |
219 | | |
220 | 0 | let mut containers = if let Some(c) = start.next() { |
221 | 0 | if c.is_empty() { |
222 | 0 | // everything must be empty if the max is empty |
223 | 0 | start.by_ref().nth(start_size); |
224 | 0 | } |
225 | 0 | c.containers |
226 | | } else { |
227 | 0 | return Ok(RoaringBitmap::new()); |
228 | | }; |
229 | | |
230 | 0 | for bitmap in start.map(Ok).chain(iter) { |
231 | 0 | merge_container_owned(&mut containers, bitmap?.containers, BitOrAssign::bitor_assign); |
232 | | } |
233 | | |
234 | 0 | containers.retain_mut(|container| { |
235 | 0 | if !container.is_empty() { |
236 | 0 | container.ensure_correct_store(); |
237 | 0 | true |
238 | | } else { |
239 | 0 | false |
240 | | } |
241 | 0 | }); |
242 | 0 |
|
243 | 0 | Ok(RoaringBitmap { containers }) |
244 | 0 | } |
245 | | |
246 | | #[inline] |
247 | 0 | fn try_multi_xor_owned<E>( |
248 | 0 | bitmaps: impl IntoIterator<Item = Result<RoaringBitmap, E>>, |
249 | 0 | ) -> Result<RoaringBitmap, E> { |
250 | 0 | let mut iter = bitmaps.into_iter(); |
251 | 0 | let mut containers = match iter.next().transpose()? { |
252 | 0 | None => Vec::new(), |
253 | 0 | Some(v) => v.containers, |
254 | | }; |
255 | | |
256 | 0 | for bitmap in iter { |
257 | 0 | merge_container_owned(&mut containers, bitmap?.containers, BitXorAssign::bitxor_assign); |
258 | | } |
259 | | |
260 | 0 | containers.retain_mut(|container| { |
261 | 0 | if !container.is_empty() { |
262 | 0 | container.ensure_correct_store(); |
263 | 0 | true |
264 | | } else { |
265 | 0 | false |
266 | | } |
267 | 0 | }); |
268 | 0 |
|
269 | 0 | Ok(RoaringBitmap { containers }) |
270 | 0 | } |
271 | | |
272 | 0 | fn merge_container_owned( |
273 | 0 | lhs: &mut Vec<Container>, |
274 | 0 | rhs: Vec<Container>, |
275 | 0 | op: impl Fn(&mut Store, Store), |
276 | 0 | ) { |
277 | 0 | for mut rhs in rhs { |
278 | 0 | match lhs.binary_search_by_key(&rhs.key, |c| c.key) { |
279 | 0 | Err(loc) => lhs.insert(loc, rhs), |
280 | 0 | Ok(loc) => { |
281 | 0 | let lhs = &mut lhs[loc]; |
282 | 0 | match (&lhs.store, &rhs.store) { |
283 | 0 | (Store::Array(..), Store::Array(..)) => lhs.store = lhs.store.to_bitmap(), |
284 | 0 | (Store::Array(..), Store::Bitmap(..)) => mem::swap(lhs, &mut rhs), |
285 | 0 | _ => (), |
286 | | }; |
287 | 0 | op(&mut lhs.store, rhs.store); |
288 | | } |
289 | | } |
290 | | } |
291 | 0 | } |
292 | | |
293 | | #[inline] |
294 | 0 | fn try_multi_or_ref<'a, E: 'a>( |
295 | 0 | bitmaps: impl IntoIterator<Item = Result<&'a RoaringBitmap, E>>, |
296 | 0 | ) -> Result<RoaringBitmap, E> { |
297 | 0 | // This algorithm operates on bitmaps. It must deal with arrays for which there are not (yet) |
298 | 0 | // any others with the same key. |
299 | 0 | // |
300 | 0 | // 1. Eager cloning would create useless intermediate values that might become bitmaps |
301 | 0 | // 2. Eager promoting forces disjoint containers to converted back to arrays at the end |
302 | 0 | // |
303 | 0 | // This strategy uses COW to lazily promote arrays to bitmaps as they are operated on. |
304 | 0 | // More memory efficient, negligible wall time difference benchmarks |
305 | 0 |
|
306 | 0 | // Phase 1. Borrow all the containers from the first element. |
307 | 0 | let mut iter = bitmaps.into_iter(); |
308 | 0 | let mut start = collect_starting_elements(iter.by_ref())?; |
309 | 0 | let start_size = start.len(); |
310 | 0 |
|
311 | 0 | start.sort_unstable_by_key(|bitmap| Reverse(bitmap.containers.len())); |
312 | 0 | let mut start = start.into_iter(); |
313 | 0 | let mut containers = match start.next() { |
314 | 0 | Some(c) => { |
315 | 0 | let c: Vec<Cow<Container>> = c.containers.iter().map(Cow::Borrowed).collect(); |
316 | 0 | if c.is_empty() { |
317 | 0 | // everything must be empty if the max is empty |
318 | 0 | start.by_ref().nth(start_size); |
319 | 0 | } |
320 | 0 | c |
321 | | } |
322 | | None => { |
323 | 0 | return Ok(RoaringBitmap::new()); |
324 | | } |
325 | | }; |
326 | | |
327 | | // Phase 2: Operate on the remaining containers |
328 | 0 | for bitmap in start.map(Ok).chain(iter) { |
329 | 0 | merge_container_ref(&mut containers, &bitmap?.containers, |a, b| *a |= b); |
330 | 0 | } |
331 | | |
332 | | // Phase 3: Clean up |
333 | 0 | let containers: Vec<_> = containers |
334 | 0 | .into_iter() |
335 | 0 | .filter(|container| container.len() > 0) |
336 | 0 | .map(|c| { |
337 | 0 | // Any borrowed bitmaps or arrays left over get cloned here |
338 | 0 | let mut container = c.into_owned(); |
339 | 0 | container.ensure_correct_store(); |
340 | 0 | container |
341 | 0 | }) |
342 | 0 | .collect(); |
343 | 0 |
|
344 | 0 | Ok(RoaringBitmap { containers }) |
345 | 0 | } |
346 | | |
347 | | #[inline] |
348 | 0 | fn try_multi_xor_ref<'a, E: 'a>( |
349 | 0 | bitmaps: impl IntoIterator<Item = Result<&'a RoaringBitmap, E>>, |
350 | 0 | ) -> Result<RoaringBitmap, E> { |
351 | 0 | // |
352 | 0 | // This algorithm operates on bitmaps. It must deal with arrays for which there are not (yet) |
353 | 0 | // any others with the same key. |
354 | 0 | // |
355 | 0 | // 1. Eager cloning would create useless intermediate values that might become bitmaps |
356 | 0 | // 2. Eager promoting forces disjoint containers to converted back to arrays at the end |
357 | 0 | // |
358 | 0 | // This strategy uses COW to lazily promote arrays to bitmaps as they are operated on. |
359 | 0 | // More memory efficient, negligible wall time difference benchmarks |
360 | 0 |
|
361 | 0 | // Phase 1. Borrow all the containers from the first element. |
362 | 0 | let mut iter = bitmaps.into_iter(); |
363 | 0 | let mut containers: Vec<Cow<Container>> = match iter.next().transpose()? { |
364 | 0 | None => Vec::new(), |
365 | 0 | Some(v) => v.containers.iter().map(Cow::Borrowed).collect(), |
366 | | }; |
367 | | |
368 | | // Phase 2: Operate on the remaining containers |
369 | 0 | for bitmap in iter { |
370 | 0 | merge_container_ref(&mut containers, &bitmap?.containers, |a, b| *a ^= b); |
371 | 0 | } |
372 | | |
373 | | // Phase 3: Clean up |
374 | 0 | let containers: Vec<_> = containers |
375 | 0 | .into_iter() |
376 | 0 | .filter(|container| container.len() > 0) |
377 | 0 | .map(|c| { |
378 | 0 | // Any borrowed bitmaps or arrays left over get cloned here |
379 | 0 | let mut container = c.into_owned(); |
380 | 0 | container.ensure_correct_store(); |
381 | 0 | container |
382 | 0 | }) |
383 | 0 | .collect(); |
384 | 0 |
|
385 | 0 | Ok(RoaringBitmap { containers }) |
386 | 0 | } |
387 | | |
388 | 0 | fn merge_container_ref<'a>( |
389 | 0 | containers: &mut Vec<Cow<'a, Container>>, |
390 | 0 | rhs: &'a [Container], |
391 | 0 | op: impl Fn(&mut Store, &Store), |
392 | 0 | ) { |
393 | 0 | for rhs in rhs { |
394 | 0 | match containers.binary_search_by_key(&rhs.key, |c| c.key) { |
395 | 0 | Err(loc) => { |
396 | 0 | // A container not currently in containers. Borrow it. |
397 | 0 | containers.insert(loc, Cow::Borrowed(rhs)) |
398 | | } |
399 | 0 | Ok(loc) => { |
400 | 0 | // A container that is in containers. Operate on it. |
401 | 0 | let lhs = &mut containers[loc]; |
402 | 0 | match (&lhs.store, &rhs.store) { |
403 | 0 | (Store::Array(..), Store::Array(..)) => { |
404 | 0 | // We had borrowed an array. Without cloning it, create a new bitmap |
405 | 0 | // Add all the elements to the new bitmap |
406 | 0 | let mut store = lhs.store.to_bitmap(); |
407 | 0 | op(&mut store, &rhs.store); |
408 | 0 | *lhs = Cow::Owned(Container { key: lhs.key, store }); |
409 | 0 | } |
410 | 0 | (Store::Array(..), Store::Bitmap(..)) => { |
411 | 0 | // We had borrowed an array. Copy the rhs bitmap, add lhs to it |
412 | 0 | let mut store = rhs.store.clone(); |
413 | 0 | op(&mut store, &lhs.store); |
414 | 0 | *lhs = Cow::Owned(Container { key: lhs.key, store }); |
415 | 0 | } |
416 | 0 | (Store::Bitmap(..), _) => { |
417 | 0 | // This might be a owned or borrowed bitmap. |
418 | 0 | // If it was borrowed it will clone-on-write |
419 | 0 | op(&mut lhs.to_mut().store, &rhs.store); |
420 | 0 | } |
421 | | }; |
422 | | } |
423 | | } |
424 | | } |
425 | 0 | } |
426 | | |
427 | | #[inline] |
428 | 0 | fn collect_starting_elements<I, El, Er>(iter: I) -> Result<Vec<El>, Er> |
429 | 0 | where |
430 | 0 | I: IntoIterator<Item = Result<El, Er>>, |
431 | 0 | { |
432 | 0 | let iter = iter.into_iter(); |
433 | 0 | let mut to_collect = iter.size_hint().1.unwrap_or(BASE_COLLECT); |
434 | 0 | if to_collect > MAX_COLLECT { |
435 | 0 | to_collect = BASE_COLLECT; |
436 | 0 | } |
437 | | |
438 | 0 | let mut ret = Vec::with_capacity(to_collect); |
439 | 0 | for el in iter.take(to_collect) { |
440 | 0 | ret.push(el?); |
441 | | } |
442 | | |
443 | 0 | Ok(ret) |
444 | 0 | } |