Coverage Report

Created: 2025-02-21 07:11

/rust/registry/src/index.crates.io-6f17d22bba15001f/roaring-0.10.10/src/bitmap/multiops.rs
Line
Count
Source (jump to first uncovered line)
1
use core::{
2
    cmp::Reverse,
3
    convert::Infallible,
4
    mem,
5
    ops::{BitOrAssign, BitXorAssign},
6
};
7
8
use alloc::borrow::Cow;
9
10
use crate::{MultiOps, RoaringBitmap};
11
12
use super::{container::Container, store::Store};
13
14
#[cfg(not(feature = "std"))]
15
use alloc::vec::Vec;
16
17
/// When collecting bitmaps for optimizing the computation. If we don't know how many
18
// elements are in the iterator we collect 10 elements.
19
const BASE_COLLECT: usize = 10;
20
21
/// If an iterator contain 50 elements or less we collect everything because it'll be so
22
/// much faster without impacting the memory usage too much (in most cases).
23
const MAX_COLLECT: usize = 50;
24
25
impl<I> MultiOps<RoaringBitmap> for I
26
where
27
    I: IntoIterator<Item = RoaringBitmap>,
28
{
29
    type Output = RoaringBitmap;
30
31
0
    fn union(self) -> Self::Output {
32
0
        try_multi_or_owned(self.into_iter().map(Ok::<_, Infallible>)).unwrap()
33
0
    }
34
35
0
    fn intersection(self) -> Self::Output {
36
0
        try_multi_and_owned(self.into_iter().map(Ok::<_, Infallible>)).unwrap()
37
0
    }
38
39
0
    fn difference(self) -> Self::Output {
40
0
        try_multi_sub_owned(self.into_iter().map(Ok::<_, Infallible>)).unwrap()
41
0
    }
42
43
0
    fn symmetric_difference(self) -> Self::Output {
44
0
        try_multi_xor_owned(self.into_iter().map(Ok::<_, Infallible>)).unwrap()
45
0
    }
46
}
47
48
impl<I, E> MultiOps<Result<RoaringBitmap, E>> for I
49
where
50
    I: IntoIterator<Item = Result<RoaringBitmap, E>>,
51
{
52
    type Output = Result<RoaringBitmap, E>;
53
54
0
    fn union(self) -> Self::Output {
55
0
        try_multi_or_owned(self)
56
0
    }
57
58
0
    fn intersection(self) -> Self::Output {
59
0
        try_multi_and_owned(self)
60
0
    }
61
62
0
    fn difference(self) -> Self::Output {
63
0
        try_multi_sub_owned(self)
64
0
    }
65
66
0
    fn symmetric_difference(self) -> Self::Output {
67
0
        try_multi_xor_owned(self)
68
0
    }
69
}
70
71
impl<'a, I> MultiOps<&'a RoaringBitmap> for I
72
where
73
    I: IntoIterator<Item = &'a RoaringBitmap>,
74
{
75
    type Output = RoaringBitmap;
76
77
0
    fn union(self) -> Self::Output {
78
0
        try_multi_or_ref(self.into_iter().map(Ok::<_, Infallible>)).unwrap()
79
0
    }
80
81
0
    fn intersection(self) -> Self::Output {
82
0
        try_multi_and_ref(self.into_iter().map(Ok::<_, Infallible>)).unwrap()
83
0
    }
84
85
0
    fn difference(self) -> Self::Output {
86
0
        try_multi_sub_ref(self.into_iter().map(Ok::<_, Infallible>)).unwrap()
87
0
    }
88
89
0
    fn symmetric_difference(self) -> Self::Output {
90
0
        try_multi_xor_ref(self.into_iter().map(Ok::<_, Infallible>)).unwrap()
91
0
    }
92
}
93
94
impl<'a, I, E: 'a> MultiOps<Result<&'a RoaringBitmap, E>> for I
95
where
96
    I: IntoIterator<Item = Result<&'a RoaringBitmap, E>>,
97
{
98
    type Output = Result<RoaringBitmap, E>;
99
100
0
    fn union(self) -> Self::Output {
101
0
        try_multi_or_ref(self)
102
0
    }
103
104
0
    fn intersection(self) -> Self::Output {
105
0
        try_multi_and_ref(self)
106
0
    }
107
108
0
    fn difference(self) -> Self::Output {
109
0
        try_multi_sub_ref(self)
110
0
    }
111
112
0
    fn symmetric_difference(self) -> Self::Output {
113
0
        try_multi_xor_ref(self)
114
0
    }
115
}
116
117
#[inline]
118
0
fn try_multi_and_owned<E>(
119
0
    bitmaps: impl IntoIterator<Item = Result<RoaringBitmap, E>>,
120
0
) -> Result<RoaringBitmap, E> {
121
0
    let mut iter = bitmaps.into_iter();
122
123
    // We're going to take a bunch of elements at the start of the iterator and sort
124
    // them to reduce the size of our bitmap faster.
125
0
    let mut start = collect_starting_elements(iter.by_ref())?;
126
0
    start.sort_unstable_by_key(|bitmap| bitmap.containers.len());
127
0
    let mut start = start.into_iter();
128
129
0
    if let Some(mut lhs) = start.next() {
130
0
        for rhs in start.map(Ok).chain(iter) {
131
0
            if lhs.is_empty() {
132
0
                return Ok(lhs);
133
0
            }
134
0
            lhs &= rhs?;
135
        }
136
137
0
        Ok(lhs)
138
    } else {
139
0
        Ok(RoaringBitmap::new())
140
    }
141
0
}
142
143
#[inline]
144
0
fn try_multi_and_ref<'a, E>(
145
0
    bitmaps: impl IntoIterator<Item = Result<&'a RoaringBitmap, E>>,
146
0
) -> Result<RoaringBitmap, E> {
147
0
    let mut iter = bitmaps.into_iter();
148
149
    // We're going to take a bunch of elements at the start of the iterator and sort
150
    // them to reduce the size of our bitmap faster.
151
0
    let mut start = collect_starting_elements(iter.by_ref())?;
152
0
    start.sort_unstable_by_key(|bitmap| bitmap.containers.len());
153
0
    let mut start = start.into_iter();
154
155
0
    if let Some(mut lhs) = start.next().cloned() {
156
0
        for rhs in start.map(Ok).chain(iter) {
157
0
            if lhs.is_empty() {
158
0
                return Ok(lhs);
159
0
            }
160
0
            lhs &= rhs?;
161
        }
162
0
        Ok(lhs)
163
    } else {
164
0
        Ok(RoaringBitmap::new())
165
    }
166
0
}
167
168
#[inline]
169
0
fn try_multi_sub_owned<E>(
170
0
    bitmaps: impl IntoIterator<Item = Result<RoaringBitmap, E>>,
171
0
) -> Result<RoaringBitmap, E> {
172
0
    let mut iter = bitmaps.into_iter();
173
0
    match iter.next().transpose()? {
174
0
        Some(mut lhs) => {
175
0
            for rhs in iter {
176
0
                if lhs.is_empty() {
177
0
                    return Ok(lhs);
178
0
                }
179
0
                lhs -= rhs?;
180
            }
181
0
            Ok(lhs)
182
        }
183
0
        None => Ok(RoaringBitmap::default()),
184
    }
185
0
}
186
187
#[inline]
188
0
fn try_multi_sub_ref<'a, E>(
189
0
    bitmaps: impl IntoIterator<Item = Result<&'a RoaringBitmap, E>>,
190
0
) -> Result<RoaringBitmap, E> {
191
0
    let mut iter = bitmaps.into_iter();
192
0
    match iter.next().transpose()?.cloned() {
193
0
        Some(mut lhs) => {
194
0
            for rhs in iter {
195
0
                if lhs.is_empty() {
196
0
                    return Ok(lhs);
197
0
                }
198
0
                lhs -= rhs?;
199
            }
200
201
0
            Ok(lhs)
202
        }
203
0
        None => Ok(RoaringBitmap::default()),
204
    }
205
0
}
206
207
#[inline]
208
0
fn try_multi_or_owned<E>(
209
0
    bitmaps: impl IntoIterator<Item = Result<RoaringBitmap, E>>,
210
0
) -> Result<RoaringBitmap, E> {
211
0
    let mut iter = bitmaps.into_iter();
212
213
    // We're going to take a bunch of elements at the start of the iterator and
214
    // move the biggest one first to grow faster.
215
0
    let mut start = collect_starting_elements(iter.by_ref())?;
216
0
    start.sort_unstable_by_key(|bitmap| Reverse(bitmap.containers.len()));
217
0
    let start_size = start.len();
218
0
    let mut start = start.into_iter();
219
220
0
    let mut containers = if let Some(c) = start.next() {
221
0
        if c.is_empty() {
222
0
            // everything must be empty if the max is empty
223
0
            start.by_ref().nth(start_size);
224
0
        }
225
0
        c.containers
226
    } else {
227
0
        return Ok(RoaringBitmap::new());
228
    };
229
230
0
    for bitmap in start.map(Ok).chain(iter) {
231
0
        merge_container_owned(&mut containers, bitmap?.containers, BitOrAssign::bitor_assign);
232
    }
233
234
0
    containers.retain_mut(|container| {
235
0
        if !container.is_empty() {
236
0
            container.ensure_correct_store();
237
0
            true
238
        } else {
239
0
            false
240
        }
241
0
    });
242
0
243
0
    Ok(RoaringBitmap { containers })
244
0
}
245
246
#[inline]
247
0
fn try_multi_xor_owned<E>(
248
0
    bitmaps: impl IntoIterator<Item = Result<RoaringBitmap, E>>,
249
0
) -> Result<RoaringBitmap, E> {
250
0
    let mut iter = bitmaps.into_iter();
251
0
    let mut containers = match iter.next().transpose()? {
252
0
        None => Vec::new(),
253
0
        Some(v) => v.containers,
254
    };
255
256
0
    for bitmap in iter {
257
0
        merge_container_owned(&mut containers, bitmap?.containers, BitXorAssign::bitxor_assign);
258
    }
259
260
0
    containers.retain_mut(|container| {
261
0
        if !container.is_empty() {
262
0
            container.ensure_correct_store();
263
0
            true
264
        } else {
265
0
            false
266
        }
267
0
    });
268
0
269
0
    Ok(RoaringBitmap { containers })
270
0
}
271
272
0
fn merge_container_owned(
273
0
    lhs: &mut Vec<Container>,
274
0
    rhs: Vec<Container>,
275
0
    op: impl Fn(&mut Store, Store),
276
0
) {
277
0
    for mut rhs in rhs {
278
0
        match lhs.binary_search_by_key(&rhs.key, |c| c.key) {
279
0
            Err(loc) => lhs.insert(loc, rhs),
280
0
            Ok(loc) => {
281
0
                let lhs = &mut lhs[loc];
282
0
                match (&lhs.store, &rhs.store) {
283
0
                    (Store::Array(..), Store::Array(..)) => lhs.store = lhs.store.to_bitmap(),
284
0
                    (Store::Array(..), Store::Bitmap(..)) => mem::swap(lhs, &mut rhs),
285
0
                    _ => (),
286
                };
287
0
                op(&mut lhs.store, rhs.store);
288
            }
289
        }
290
    }
291
0
}
292
293
#[inline]
294
0
fn try_multi_or_ref<'a, E: 'a>(
295
0
    bitmaps: impl IntoIterator<Item = Result<&'a RoaringBitmap, E>>,
296
0
) -> Result<RoaringBitmap, E> {
297
0
    // This algorithm operates on bitmaps. It must deal with arrays for which there are not (yet)
298
0
    // any others with the same key.
299
0
    //
300
0
    //   1. Eager cloning would create useless intermediate values that might become bitmaps
301
0
    //   2. Eager promoting forces disjoint containers to converted back to arrays at the end
302
0
    //
303
0
    // This strategy uses COW to lazily promote arrays to bitmaps as they are operated on.
304
0
    // More memory efficient, negligible wall time difference benchmarks
305
0
306
0
    // Phase 1. Borrow all the containers from the first element.
307
0
    let mut iter = bitmaps.into_iter();
308
0
    let mut start = collect_starting_elements(iter.by_ref())?;
309
0
    let start_size = start.len();
310
0
311
0
    start.sort_unstable_by_key(|bitmap| Reverse(bitmap.containers.len()));
312
0
    let mut start = start.into_iter();
313
0
    let mut containers = match start.next() {
314
0
        Some(c) => {
315
0
            let c: Vec<Cow<Container>> = c.containers.iter().map(Cow::Borrowed).collect();
316
0
            if c.is_empty() {
317
0
                // everything must be empty if the max is empty
318
0
                start.by_ref().nth(start_size);
319
0
            }
320
0
            c
321
        }
322
        None => {
323
0
            return Ok(RoaringBitmap::new());
324
        }
325
    };
326
327
    // Phase 2: Operate on the remaining containers
328
0
    for bitmap in start.map(Ok).chain(iter) {
329
0
        merge_container_ref(&mut containers, &bitmap?.containers, |a, b| *a |= b);
330
0
    }
331
332
    // Phase 3: Clean up
333
0
    let containers: Vec<_> = containers
334
0
        .into_iter()
335
0
        .filter(|container| container.len() > 0)
336
0
        .map(|c| {
337
0
            // Any borrowed bitmaps or arrays left over get cloned here
338
0
            let mut container = c.into_owned();
339
0
            container.ensure_correct_store();
340
0
            container
341
0
        })
342
0
        .collect();
343
0
344
0
    Ok(RoaringBitmap { containers })
345
0
}
346
347
#[inline]
348
0
fn try_multi_xor_ref<'a, E: 'a>(
349
0
    bitmaps: impl IntoIterator<Item = Result<&'a RoaringBitmap, E>>,
350
0
) -> Result<RoaringBitmap, E> {
351
0
    //
352
0
    // This algorithm operates on bitmaps. It must deal with arrays for which there are not (yet)
353
0
    // any others with the same key.
354
0
    //
355
0
    //   1. Eager cloning would create useless intermediate values that might become bitmaps
356
0
    //   2. Eager promoting forces disjoint containers to converted back to arrays at the end
357
0
    //
358
0
    // This strategy uses COW to lazily promote arrays to bitmaps as they are operated on.
359
0
    // More memory efficient, negligible wall time difference benchmarks
360
0
361
0
    // Phase 1. Borrow all the containers from the first element.
362
0
    let mut iter = bitmaps.into_iter();
363
0
    let mut containers: Vec<Cow<Container>> = match iter.next().transpose()? {
364
0
        None => Vec::new(),
365
0
        Some(v) => v.containers.iter().map(Cow::Borrowed).collect(),
366
    };
367
368
    // Phase 2: Operate on the remaining containers
369
0
    for bitmap in iter {
370
0
        merge_container_ref(&mut containers, &bitmap?.containers, |a, b| *a ^= b);
371
0
    }
372
373
    // Phase 3: Clean up
374
0
    let containers: Vec<_> = containers
375
0
        .into_iter()
376
0
        .filter(|container| container.len() > 0)
377
0
        .map(|c| {
378
0
            // Any borrowed bitmaps or arrays left over get cloned here
379
0
            let mut container = c.into_owned();
380
0
            container.ensure_correct_store();
381
0
            container
382
0
        })
383
0
        .collect();
384
0
385
0
    Ok(RoaringBitmap { containers })
386
0
}
387
388
0
fn merge_container_ref<'a>(
389
0
    containers: &mut Vec<Cow<'a, Container>>,
390
0
    rhs: &'a [Container],
391
0
    op: impl Fn(&mut Store, &Store),
392
0
) {
393
0
    for rhs in rhs {
394
0
        match containers.binary_search_by_key(&rhs.key, |c| c.key) {
395
0
            Err(loc) => {
396
0
                // A container not currently in containers. Borrow it.
397
0
                containers.insert(loc, Cow::Borrowed(rhs))
398
            }
399
0
            Ok(loc) => {
400
0
                // A container that is in containers. Operate on it.
401
0
                let lhs = &mut containers[loc];
402
0
                match (&lhs.store, &rhs.store) {
403
0
                    (Store::Array(..), Store::Array(..)) => {
404
0
                        // We had borrowed an array. Without cloning it, create a new bitmap
405
0
                        // Add all the elements to the new bitmap
406
0
                        let mut store = lhs.store.to_bitmap();
407
0
                        op(&mut store, &rhs.store);
408
0
                        *lhs = Cow::Owned(Container { key: lhs.key, store });
409
0
                    }
410
0
                    (Store::Array(..), Store::Bitmap(..)) => {
411
0
                        // We had borrowed an array. Copy the rhs bitmap, add lhs to it
412
0
                        let mut store = rhs.store.clone();
413
0
                        op(&mut store, &lhs.store);
414
0
                        *lhs = Cow::Owned(Container { key: lhs.key, store });
415
0
                    }
416
0
                    (Store::Bitmap(..), _) => {
417
0
                        // This might be a owned or borrowed bitmap.
418
0
                        // If it was borrowed it will clone-on-write
419
0
                        op(&mut lhs.to_mut().store, &rhs.store);
420
0
                    }
421
                };
422
            }
423
        }
424
    }
425
0
}
426
427
#[inline]
428
0
fn collect_starting_elements<I, El, Er>(iter: I) -> Result<Vec<El>, Er>
429
0
where
430
0
    I: IntoIterator<Item = Result<El, Er>>,
431
0
{
432
0
    let iter = iter.into_iter();
433
0
    let mut to_collect = iter.size_hint().1.unwrap_or(BASE_COLLECT);
434
0
    if to_collect > MAX_COLLECT {
435
0
        to_collect = BASE_COLLECT;
436
0
    }
437
438
0
    let mut ret = Vec::with_capacity(to_collect);
439
0
    for el in iter.take(to_collect) {
440
0
        ret.push(el?);
441
    }
442
443
0
    Ok(ret)
444
0
}