Coverage Report

Created: 2026-06-07 06:50

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/croaring/src/containers/mixed_andnot.c
Line
Count
Source
1
/*
2
 * mixed_andnot.c.  More methods since operation is not symmetric,
3
 * except no "wide" andnot , so no lazy options motivated.
4
 */
5
6
#include <assert.h>
7
#include <string.h>
8
9
#include <roaring/array_util.h>
10
#include <roaring/bitset_util.h>
11
#include <roaring/containers/containers.h>
12
#include <roaring/containers/convert.h>
13
#include <roaring/containers/mixed_andnot.h>
14
#include <roaring/containers/perfparameters.h>
15
16
#ifdef __cplusplus
17
extern "C" {
18
namespace roaring {
19
namespace internal {
20
#endif
21
22
/* Compute the andnot of src_1 and src_2 and write the result to
23
 * dst, a valid array container that could be the same as dst.*/
24
void array_bitset_container_andnot(const array_container_t *src_1,
25
                                   const bitset_container_t *src_2,
26
608
                                   array_container_t *dst) {
27
    // follows Java implementation as of June 2016
28
608
    if (dst->capacity < src_1->cardinality) {
29
608
        array_container_grow(dst, src_1->cardinality, false);
30
608
    }
31
608
    int32_t newcard = 0;
32
608
    const int32_t origcard = src_1->cardinality;
33
103k
    for (int i = 0; i < origcard; ++i) {
34
102k
        uint16_t key = src_1->array[i];
35
102k
        dst->array[newcard] = key;
36
102k
        newcard += 1 - bitset_container_contains(src_2, key);
37
102k
    }
38
608
    dst->cardinality = newcard;
39
608
}
40
41
/* Compute the andnot of src_1 and src_2 and write the result to
42
 * src_1 */
43
44
void array_bitset_container_iandnot(array_container_t *src_1,
45
0
                                    const bitset_container_t *src_2) {
46
0
    array_bitset_container_andnot(src_1, src_2, src_1);
47
0
}
48
49
/* Compute the andnot of src_1 and src_2 and write the result to
50
 * dst, which does not initially have a valid container.
51
 * Return true for a bitset result; false for array
52
 */
53
54
bool bitset_array_container_andnot(const bitset_container_t *src_1,
55
                                   const array_container_t *src_2,
56
0
                                   container_t **dst) {
57
    // Java did this directly, but we have option of asm or avx
58
0
    bitset_container_t *result = bitset_container_create();
59
0
    bitset_container_copy(src_1, result);
60
0
    result->cardinality =
61
0
        (int32_t)bitset_clear_list(result->words, (uint64_t)result->cardinality,
62
0
                                   src_2->array, (uint64_t)src_2->cardinality);
63
64
    // do required type conversions.
65
0
    if (result->cardinality <= DEFAULT_MAX_SIZE) {
66
0
        *dst = array_container_from_bitset(result);
67
0
        bitset_container_free(result);
68
0
        return false;
69
0
    }
70
0
    *dst = result;
71
0
    return true;
72
0
}
73
74
/* Compute the andnot of src_1 and src_2 and write the result to
75
 * dst (which has no container initially).  It will modify src_1
76
 * to be dst if the result is a bitset.  Otherwise, it will
77
 * free src_1 and dst will be a new array container.  In both
78
 * cases, the caller is responsible for deallocating dst.
79
 * Returns true iff dst is a bitset  */
80
81
bool bitset_array_container_iandnot(bitset_container_t *src_1,
82
                                    const array_container_t *src_2,
83
1.50k
                                    container_t **dst) {
84
1.50k
    *dst = src_1;
85
1.50k
    src_1->cardinality =
86
1.50k
        (int32_t)bitset_clear_list(src_1->words, (uint64_t)src_1->cardinality,
87
1.50k
                                   src_2->array, (uint64_t)src_2->cardinality);
88
89
1.50k
    if (src_1->cardinality <= DEFAULT_MAX_SIZE) {
90
198
        *dst = array_container_from_bitset(src_1);
91
198
        bitset_container_free(src_1);
92
198
        return false;  // not bitset
93
198
    } else
94
1.31k
        return true;
95
1.50k
}
96
97
/* Compute the andnot of src_1 and src_2 and write the result to
98
 * dst. Result may be either a bitset or an array container
99
 * (returns "result is bitset"). dst does not initially have
100
 * any container, but becomes either a bitset container (return
101
 * result true) or an array container.
102
 */
103
104
bool run_bitset_container_andnot(const run_container_t *src_1,
105
                                 const bitset_container_t *src_2,
106
650
                                 container_t **dst) {
107
    // follows the Java implementation as of June 2016
108
650
    int card = run_container_cardinality(src_1);
109
650
    if (card <= DEFAULT_MAX_SIZE) {
110
        // must be an array
111
650
        array_container_t *answer = array_container_create_given_capacity(card);
112
650
        answer->cardinality = 0;
113
54.9k
        for (int32_t rlepos = 0; rlepos < src_1->n_runs; ++rlepos) {
114
54.2k
            rle16_t rle = src_1->runs[rlepos];
115
194k
            for (int run_value = rle.value; run_value <= rle.value + rle.length;
116
140k
                 ++run_value) {
117
140k
                if (!bitset_container_get(src_2, (uint16_t)run_value)) {
118
39.5k
                    answer->array[answer->cardinality++] = (uint16_t)run_value;
119
39.5k
                }
120
140k
            }
121
54.2k
        }
122
650
        *dst = answer;
123
650
        return false;
124
650
    } else {  // we guess it will be a bitset, though have to check guess when
125
              // done
126
0
        bitset_container_t *answer = bitset_container_clone(src_2);
127
128
0
        uint32_t last_pos = 0;
129
0
        for (int32_t rlepos = 0; rlepos < src_1->n_runs; ++rlepos) {
130
0
            rle16_t rle = src_1->runs[rlepos];
131
132
0
            uint32_t start = rle.value;
133
0
            uint32_t end = start + rle.length + 1;
134
0
            bitset_reset_range(answer->words, last_pos, start);
135
0
            bitset_flip_range(answer->words, start, end);
136
0
            last_pos = end;
137
0
        }
138
0
        bitset_reset_range(answer->words, last_pos, (uint32_t)(1 << 16));
139
140
0
        answer->cardinality = bitset_container_compute_cardinality(answer);
141
142
0
        if (answer->cardinality <= DEFAULT_MAX_SIZE) {
143
0
            *dst = array_container_from_bitset(answer);
144
0
            bitset_container_free(answer);
145
0
            return false;  // not bitset
146
0
        }
147
0
        *dst = answer;
148
0
        return true;  // bitset
149
0
    }
150
650
}
151
152
/* Compute the andnot of src_1 and src_2 and write the result to
153
 * dst. Result may be either a bitset or an array container
154
 * (returns "result is bitset"). dst does not initially have
155
 * any container, but becomes either a bitset container (return
156
 * result true) or an array container.
157
 */
158
159
bool run_bitset_container_iandnot(run_container_t *src_1,
160
                                  const bitset_container_t *src_2,
161
0
                                  container_t **dst) {
162
    // dummy implementation
163
0
    bool ans = run_bitset_container_andnot(src_1, src_2, dst);
164
0
    run_container_free(src_1);
165
0
    return ans;
166
0
}
167
168
/* Compute the andnot of src_1 and src_2 and write the result to
169
 * dst. Result may be either a bitset or an array container
170
 * (returns "result is bitset").  dst does not initially have
171
 * any container, but becomes either a bitset container (return
172
 * result true) or an array container.
173
 */
174
175
bool bitset_run_container_andnot(const bitset_container_t *src_1,
176
                                 const run_container_t *src_2,
177
0
                                 container_t **dst) {
178
    // follows Java implementation
179
0
    bitset_container_t *result = bitset_container_create();
180
181
0
    bitset_container_copy(src_1, result);
182
0
    for (int32_t rlepos = 0; rlepos < src_2->n_runs; ++rlepos) {
183
0
        rle16_t rle = src_2->runs[rlepos];
184
0
        bitset_reset_range(result->words, rle.value,
185
0
                           rle.value + rle.length + UINT32_C(1));
186
0
    }
187
0
    result->cardinality = bitset_container_compute_cardinality(result);
188
189
0
    if (result->cardinality <= DEFAULT_MAX_SIZE) {
190
0
        *dst = array_container_from_bitset(result);
191
0
        bitset_container_free(result);
192
0
        return false;  // not bitset
193
0
    }
194
0
    *dst = result;
195
0
    return true;  // bitset
196
0
}
197
198
/* Compute the andnot of src_1 and src_2 and write the result to
199
 * dst (which has no container initially).  It will modify src_1
200
 * to be dst if the result is a bitset.  Otherwise, it will
201
 * free src_1 and dst will be a new array container.  In both
202
 * cases, the caller is responsible for deallocating dst.
203
 * Returns true iff dst is a bitset  */
204
205
bool bitset_run_container_iandnot(bitset_container_t *src_1,
206
                                  const run_container_t *src_2,
207
0
                                  container_t **dst) {
208
0
    *dst = src_1;
209
210
0
    for (int32_t rlepos = 0; rlepos < src_2->n_runs; ++rlepos) {
211
0
        rle16_t rle = src_2->runs[rlepos];
212
0
        bitset_reset_range(src_1->words, rle.value,
213
0
                           rle.value + rle.length + UINT32_C(1));
214
0
    }
215
0
    src_1->cardinality = bitset_container_compute_cardinality(src_1);
216
217
0
    if (src_1->cardinality <= DEFAULT_MAX_SIZE) {
218
0
        *dst = array_container_from_bitset(src_1);
219
0
        bitset_container_free(src_1);
220
0
        return false;  // not bitset
221
0
    } else
222
0
        return true;
223
0
}
224
225
/* helper. a_out must be a valid array container with adequate capacity.
226
 * Returns the cardinality of the output container. Partly Based on Java
227
 * implementation Util.unsignedDifference.
228
 *
229
 * TODO: Util.unsignedDifference does not use advanceUntil.  Is it cheaper
230
 * to avoid advanceUntil?
231
 */
232
233
static int run_array_array_subtract(const run_container_t *rc,
234
                                    const array_container_t *a_in,
235
1.32k
                                    array_container_t *a_out) {
236
1.32k
    int out_card = 0;
237
1.32k
    int32_t in_array_pos =
238
1.32k
        -1;  // since advanceUntil always assumes we start the search AFTER this
239
240
98.1k
    for (int rlepos = 0; rlepos < rc->n_runs; rlepos++) {
241
96.7k
        int32_t start = rc->runs[rlepos].value;
242
96.7k
        int32_t end = start + rc->runs[rlepos].length + 1;
243
244
96.7k
        in_array_pos = advanceUntil(a_in->array, in_array_pos,
245
96.7k
                                    a_in->cardinality, (uint16_t)start);
246
247
96.7k
        if (in_array_pos >= a_in->cardinality) {  // run has no items subtracted
248
66.7k
            for (int32_t i = start; i < end; ++i)
249
61.1k
                a_out->array[out_card++] = (uint16_t)i;
250
91.1k
        } else {
251
91.1k
            uint16_t next_nonincluded = a_in->array[in_array_pos];
252
91.1k
            if (next_nonincluded >= end) {
253
                // another case when run goes unaltered
254
83.7k
                for (int32_t i = start; i < end; ++i)
255
52.6k
                    a_out->array[out_card++] = (uint16_t)i;
256
31.1k
                in_array_pos--;  // ensure we see this item again if necessary
257
59.9k
            } else {
258
298k
                for (int32_t i = start; i < end; ++i)
259
238k
                    if (i != next_nonincluded)
260
104k
                        a_out->array[out_card++] = (uint16_t)i;
261
134k
                    else  // 0 should ensure  we don't match
262
134k
                        next_nonincluded =
263
134k
                            (in_array_pos + 1 >= a_in->cardinality)
264
134k
                                ? 0
265
134k
                                : a_in->array[++in_array_pos];
266
59.9k
                in_array_pos--;  // see again
267
59.9k
            }
268
91.1k
        }
269
96.7k
    }
270
1.32k
    return out_card;
271
1.32k
}
272
273
/* dst does not indicate a valid container initially.  Eventually it
274
 * can become any type of container.
275
 */
276
277
int run_array_container_andnot(const run_container_t *src_1,
278
                               const array_container_t *src_2,
279
2.00k
                               container_t **dst) {
280
    // follows the Java impl as of June 2016
281
282
2.00k
    int card = run_container_cardinality(src_1);
283
2.00k
    const int arbitrary_threshold = 32;
284
285
2.00k
    if (card <= arbitrary_threshold) {
286
378
        if (src_2->cardinality == 0) {
287
0
            *dst = run_container_clone(src_1);
288
0
            return RUN_CONTAINER_TYPE;
289
0
        }
290
        // Java's "lazyandNot.toEfficientContainer" thing
291
378
        run_container_t *answer = run_container_create_given_capacity(
292
378
            card + array_container_cardinality(src_2));
293
294
378
        int rlepos = 0;
295
378
        int xrlepos = 0;  // "x" is src_2
296
378
        rle16_t rle = src_1->runs[rlepos];
297
378
        int32_t start = rle.value;
298
378
        int32_t end = start + rle.length + 1;
299
378
        int32_t xstart = src_2->array[xrlepos];
300
301
12.5k
        while ((rlepos < src_1->n_runs) && (xrlepos < src_2->cardinality)) {
302
12.1k
            if (end <= xstart) {
303
                // output the first run
304
664
                answer->runs[answer->n_runs++] =
305
664
                    CROARING_MAKE_RLE16(start, end - start - 1);
306
664
                rlepos++;
307
664
                if (rlepos < src_1->n_runs) {
308
624
                    start = src_1->runs[rlepos].value;
309
624
                    end = start + src_1->runs[rlepos].length + 1;
310
624
                }
311
11.4k
            } else if (xstart + 1 <= start) {
312
                // exit the second run
313
7.19k
                xrlepos++;
314
7.19k
                if (xrlepos < src_2->cardinality) {
315
7.11k
                    xstart = src_2->array[xrlepos];
316
7.11k
                }
317
7.19k
            } else {
318
4.29k
                if (start < xstart) {
319
352
                    answer->runs[answer->n_runs++] =
320
352
                        CROARING_MAKE_RLE16(start, xstart - start - 1);
321
352
                }
322
4.29k
                if (xstart + 1 < end) {
323
2.60k
                    start = xstart + 1;
324
2.60k
                } else {
325
1.69k
                    rlepos++;
326
1.69k
                    if (rlepos < src_1->n_runs) {
327
1.43k
                        start = src_1->runs[rlepos].value;
328
1.43k
                        end = start + src_1->runs[rlepos].length + 1;
329
1.43k
                    }
330
1.69k
                }
331
4.29k
            }
332
12.1k
        }
333
378
        if (rlepos < src_1->n_runs) {
334
79
            answer->runs[answer->n_runs++] =
335
79
                CROARING_MAKE_RLE16(start, end - start - 1);
336
79
            rlepos++;
337
79
            if (rlepos < src_1->n_runs) {
338
27
                memcpy(answer->runs + answer->n_runs, src_1->runs + rlepos,
339
27
                       (src_1->n_runs - rlepos) * sizeof(rle16_t));
340
27
                answer->n_runs += (src_1->n_runs - rlepos);
341
27
            }
342
79
        }
343
378
        uint8_t return_type;
344
378
        *dst = convert_run_to_efficient_container(answer, &return_type);
345
378
        if (answer != *dst) run_container_free(answer);
346
378
        return return_type;
347
378
    }
348
    // else it's a bitmap or array
349
350
1.62k
    if (card <= DEFAULT_MAX_SIZE) {
351
1.32k
        array_container_t *ac = array_container_create_given_capacity(card);
352
        // nb Java code used a generic iterator-based merge to compute
353
        // difference
354
1.32k
        ac->cardinality = run_array_array_subtract(src_1, src_2, ac);
355
1.32k
        *dst = ac;
356
1.32k
        return ARRAY_CONTAINER_TYPE;
357
1.32k
    }
358
297
    bitset_container_t *ans = bitset_container_from_run(src_1);
359
297
    bool result_is_bitset = bitset_array_container_iandnot(ans, src_2, dst);
360
297
    return (result_is_bitset ? BITSET_CONTAINER_TYPE : ARRAY_CONTAINER_TYPE);
361
1.62k
}
362
363
/* Compute the andnot of src_1 and src_2 and write the result to
364
 * dst (which has no container initially).  It will modify src_1
365
 * to be dst if the result is a bitset.  Otherwise, it will
366
 * free src_1 and dst will be a new array container.  In both
367
 * cases, the caller is responsible for deallocating dst.
368
 * Returns true iff dst is a bitset  */
369
370
int run_array_container_iandnot(run_container_t *src_1,
371
                                const array_container_t *src_2,
372
765
                                container_t **dst) {
373
    // dummy implementation same as June 2016 Java
374
765
    int ans = run_array_container_andnot(src_1, src_2, dst);
375
765
    run_container_free(src_1);
376
765
    return ans;
377
765
}
378
379
/* dst must be a valid array container, allowed to be src_1 */
380
381
void array_run_container_andnot(const array_container_t *src_1,
382
                                const run_container_t *src_2,
383
801
                                array_container_t *dst) {
384
    // basically following Java impl as of June 2016
385
801
    if (src_1->cardinality > dst->capacity) {
386
754
        array_container_grow(dst, src_1->cardinality, false);
387
754
    }
388
389
801
    if (src_2->n_runs == 0) {
390
0
        memmove(dst->array, src_1->array,
391
0
                sizeof(uint16_t) * src_1->cardinality);
392
0
        dst->cardinality = src_1->cardinality;
393
0
        return;
394
0
    }
395
801
    int32_t run_start = src_2->runs[0].value;
396
801
    int32_t run_end = run_start + src_2->runs[0].length;
397
801
    int which_run = 0;
398
399
801
    uint16_t val = 0;
400
801
    int dest_card = 0;
401
174k
    for (int i = 0; i < src_1->cardinality; ++i) {
402
173k
        val = src_1->array[i];
403
173k
        if (val < run_start)
404
43.7k
            dst->array[dest_card++] = val;
405
130k
        else if (val <= run_end) {
406
93.6k
            ;  // omitted item
407
93.6k
        } else {
408
50.1k
            do {
409
50.1k
                if (which_run + 1 < src_2->n_runs) {
410
49.9k
                    ++which_run;
411
49.9k
                    run_start = src_2->runs[which_run].value;
412
49.9k
                    run_end = run_start + src_2->runs[which_run].length;
413
414
49.9k
                } else
415
188
                    run_start = run_end = (1 << 16) + 1;
416
50.1k
            } while (val > run_end);
417
36.3k
            --i;
418
36.3k
        }
419
173k
    }
420
801
    dst->cardinality = dest_card;
421
801
}
422
423
/* dst does not indicate a valid container initially.  Eventually it
424
 * can become any kind of container.
425
 */
426
427
void array_run_container_iandnot(array_container_t *src_1,
428
47
                                 const run_container_t *src_2) {
429
47
    array_run_container_andnot(src_1, src_2, src_1);
430
47
}
431
432
/* dst does not indicate a valid container initially.  Eventually it
433
 * can become any kind of container.
434
 */
435
436
int run_run_container_andnot(const run_container_t *src_1,
437
1.18k
                             const run_container_t *src_2, container_t **dst) {
438
1.18k
    run_container_t *ans = run_container_create();
439
1.18k
    run_container_andnot(src_1, src_2, ans);
440
1.18k
    uint8_t typecode_after;
441
1.18k
    *dst = convert_run_to_efficient_container_and_free(ans, &typecode_after);
442
1.18k
    return typecode_after;
443
1.18k
}
444
445
/* Compute the andnot of src_1 and src_2 and write the result to
446
 * dst (which has no container initially).  It will modify src_1
447
 * to be dst if the result is a bitset.  Otherwise, it will
448
 * free src_1 and dst will be a new array container.  In both
449
 * cases, the caller is responsible for deallocating dst.
450
 * Returns true iff dst is a bitset  */
451
452
int run_run_container_iandnot(run_container_t *src_1,
453
470
                              const run_container_t *src_2, container_t **dst) {
454
    // following Java impl as of June 2016 (dummy)
455
470
    int ans = run_run_container_andnot(src_1, src_2, dst);
456
470
    run_container_free(src_1);
457
470
    return ans;
458
470
}
459
460
/*
461
 * dst is a valid array container and may be the same as src_1
462
 */
463
464
void array_array_container_andnot(const array_container_t *src_1,
465
                                  const array_container_t *src_2,
466
2.40k
                                  array_container_t *dst) {
467
2.40k
    array_container_andnot(src_1, src_2, dst);
468
2.40k
}
469
470
/* inplace array-array andnot will always be able to reuse the space of
471
 * src_1 */
472
void array_array_container_iandnot(array_container_t *src_1,
473
3.57k
                                   const array_container_t *src_2) {
474
3.57k
    array_container_andnot(src_1, src_2, src_1);
475
3.57k
}
476
477
/* Compute the andnot of src_1 and src_2 and write the result to
478
 * dst (which has no container initially). Return value is
479
 * "dst is a bitset"
480
 */
481
482
bool bitset_bitset_container_andnot(const bitset_container_t *src_1,
483
                                    const bitset_container_t *src_2,
484
0
                                    container_t **dst) {
485
0
    bitset_container_t *ans = bitset_container_create();
486
0
    int card = bitset_container_andnot(src_1, src_2, ans);
487
0
    if (card <= DEFAULT_MAX_SIZE) {
488
0
        *dst = array_container_from_bitset(ans);
489
0
        bitset_container_free(ans);
490
0
        return false;  // not bitset
491
0
    } else {
492
0
        *dst = ans;
493
0
        return true;
494
0
    }
495
0
}
496
497
/* Compute the andnot of src_1 and src_2 and write the result to
498
 * dst (which has no container initially).  It will modify src_1
499
 * to be dst if the result is a bitset.  Otherwise, it will
500
 * free src_1 and dst will be a new array container.  In both
501
 * cases, the caller is responsible for deallocating dst.
502
 * Returns true iff dst is a bitset  */
503
504
bool bitset_bitset_container_iandnot(bitset_container_t *src_1,
505
                                     const bitset_container_t *src_2,
506
0
                                     container_t **dst) {
507
0
    int card = bitset_container_andnot(src_1, src_2, src_1);
508
0
    if (card <= DEFAULT_MAX_SIZE) {
509
0
        *dst = array_container_from_bitset(src_1);
510
0
        bitset_container_free(src_1);
511
0
        return false;  // not bitset
512
0
    } else {
513
0
        *dst = src_1;
514
        return true;
515
0
    }
516
0
}
517
518
#ifdef __cplusplus
519
}
520
}
521
}  // extern "C" { namespace roaring { namespace internal {
522
#endif