Coverage Report

Created: 2026-05-30 06:50

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/croaring/src/containers/mixed_xor.c
Line
Count
Source
1
/*
2
 * mixed_xor.c
3
 */
4
5
#include <assert.h>
6
#include <string.h>
7
8
#include <roaring/bitset_util.h>
9
#include <roaring/containers/containers.h>
10
#include <roaring/containers/convert.h>
11
#include <roaring/containers/mixed_xor.h>
12
#include <roaring/containers/perfparameters.h>
13
14
#ifdef __cplusplus
15
extern "C" {
16
namespace roaring {
17
namespace internal {
18
#endif
19
20
/* Compute the xor of src_1 and src_2 and write the result to
21
 * dst (which has no container initially).
22
 * Result is true iff dst is a bitset  */
23
bool array_bitset_container_xor(const array_container_t *src_1,
24
                                const bitset_container_t *src_2,
25
642
                                container_t **dst) {
26
642
    bitset_container_t *result = bitset_container_create();
27
642
    bitset_container_copy(src_2, result);
28
642
    result->cardinality = (int32_t)bitset_flip_list_withcard(
29
642
        result->words, result->cardinality, src_1->array, src_1->cardinality);
30
31
    // do required type conversions.
32
642
    if (result->cardinality <= DEFAULT_MAX_SIZE) {
33
124
        *dst = array_container_from_bitset(result);
34
124
        bitset_container_free(result);
35
124
        return false;  // not bitset
36
124
    }
37
518
    *dst = result;
38
518
    return true;  // bitset
39
642
}
40
41
/* Compute the xor of src_1 and src_2 and write the result to
42
 * dst. It is allowed for src_2 to be dst.  This version does not
43
 * update the cardinality of dst (it is set to BITSET_UNKNOWN_CARDINALITY).
44
 */
45
46
void array_bitset_container_lazy_xor(const array_container_t *src_1,
47
                                     const bitset_container_t *src_2,
48
0
                                     bitset_container_t *dst) {
49
0
    if (src_2 != dst) bitset_container_copy(src_2, dst);
50
0
    bitset_flip_list(dst->words, src_1->array, src_1->cardinality);
51
0
    dst->cardinality = BITSET_UNKNOWN_CARDINALITY;
52
0
}
53
54
/* Compute the xor of src_1 and src_2 and write the result to
55
 * dst. Result may be either a bitset or an array container
56
 * (returns "result is bitset"). dst does not initially have
57
 * any container, but becomes either a bitset container (return
58
 * result true) or an array container.
59
 */
60
61
bool run_bitset_container_xor(const run_container_t *src_1,
62
                              const bitset_container_t *src_2,
63
684
                              container_t **dst) {
64
684
    bitset_container_t *result = bitset_container_create();
65
66
684
    bitset_container_copy(src_2, result);
67
55.3k
    for (int32_t rlepos = 0; rlepos < src_1->n_runs; ++rlepos) {
68
54.6k
        rle16_t rle = src_1->runs[rlepos];
69
54.6k
        bitset_flip_range(result->words, rle.value,
70
54.6k
                          rle.value + rle.length + UINT32_C(1));
71
54.6k
    }
72
684
    result->cardinality = bitset_container_compute_cardinality(result);
73
74
684
    if (result->cardinality <= DEFAULT_MAX_SIZE) {
75
125
        *dst = array_container_from_bitset(result);
76
125
        bitset_container_free(result);
77
125
        return false;  // not bitset
78
125
    }
79
559
    *dst = result;
80
559
    return true;  // bitset
81
684
}
82
83
/* lazy xor.  Dst is initialized and may be equal to src_2.
84
 *  Result is left as a bitset container, even if actual
85
 *  cardinality would dictate an array container.
86
 */
87
88
void run_bitset_container_lazy_xor(const run_container_t *src_1,
89
                                   const bitset_container_t *src_2,
90
0
                                   bitset_container_t *dst) {
91
0
    if (src_2 != dst) bitset_container_copy(src_2, dst);
92
0
    for (int32_t rlepos = 0; rlepos < src_1->n_runs; ++rlepos) {
93
0
        rle16_t rle = src_1->runs[rlepos];
94
0
        bitset_flip_range(dst->words, rle.value,
95
0
                          rle.value + rle.length + UINT32_C(1));
96
0
    }
97
0
    dst->cardinality = BITSET_UNKNOWN_CARDINALITY;
98
0
}
99
100
/* dst does not indicate a valid container initially.  Eventually it
101
 * can become any kind of container.
102
 */
103
104
int array_run_container_xor(const array_container_t *src_1,
105
2.23k
                            const run_container_t *src_2, container_t **dst) {
106
    // semi following Java XOR implementation as of May 2016
107
    // the C OR implementation works quite differently and can return a run
108
    // container
109
    // TODO could optimize for full run containers.
110
111
    // use of lazy following Java impl.
112
2.23k
    const int arbitrary_threshold = 32;
113
2.23k
    if (src_1->cardinality < arbitrary_threshold) {
114
915
        run_container_t *ans = run_container_create();
115
915
        array_run_container_lazy_xor(src_1, src_2, ans);  // keeps runs.
116
915
        uint8_t typecode_after;
117
915
        *dst =
118
915
            convert_run_to_efficient_container_and_free(ans, &typecode_after);
119
915
        return typecode_after;
120
915
    }
121
122
1.31k
    int card = run_container_cardinality(src_2);
123
1.31k
    if (card <= DEFAULT_MAX_SIZE) {
124
        // Java implementation works with the array, xoring the run elements via
125
        // iterator
126
1.09k
        array_container_t *temp = array_container_from_run(src_2);
127
1.09k
        bool ret_is_bitset = array_array_container_xor(temp, src_1, dst);
128
1.09k
        array_container_free(temp);
129
1.09k
        return ret_is_bitset ? BITSET_CONTAINER_TYPE : ARRAY_CONTAINER_TYPE;
130
131
1.09k
    } else {  // guess that it will end up as a bitset
132
219
        bitset_container_t *result = bitset_container_from_run(src_2);
133
219
        bool is_bitset = bitset_array_container_ixor(result, src_1, dst);
134
        // any necessary type conversion has been done by the ixor
135
219
        int retval = (is_bitset ? BITSET_CONTAINER_TYPE : ARRAY_CONTAINER_TYPE);
136
219
        return retval;
137
219
    }
138
1.31k
}
139
140
/* Dst is a valid run container. (Can it be src_2? Let's say not.)
141
 * Leaves result as run container, even if other options are
142
 * smaller.
143
 */
144
145
void array_run_container_lazy_xor(const array_container_t *src_1,
146
                                  const run_container_t *src_2,
147
915
                                  run_container_t *dst) {
148
915
    run_container_grow(dst, src_1->cardinality + src_2->n_runs, false);
149
915
    int32_t rlepos = 0;
150
915
    int32_t arraypos = 0;
151
915
    dst->n_runs = 0;
152
153
28.8k
    while ((rlepos < src_2->n_runs) && (arraypos < src_1->cardinality)) {
154
27.9k
        if (src_2->runs[rlepos].value <= src_1->array[arraypos]) {
155
17.0k
            run_container_smart_append_exclusive(dst, src_2->runs[rlepos].value,
156
17.0k
                                                 src_2->runs[rlepos].length);
157
17.0k
            rlepos++;
158
17.0k
        } else {
159
10.8k
            run_container_smart_append_exclusive(dst, src_1->array[arraypos],
160
10.8k
                                                 0);
161
10.8k
            arraypos++;
162
10.8k
        }
163
27.9k
    }
164
3.14k
    while (arraypos < src_1->cardinality) {
165
2.22k
        run_container_smart_append_exclusive(dst, src_1->array[arraypos], 0);
166
2.22k
        arraypos++;
167
2.22k
    }
168
7.99k
    while (rlepos < src_2->n_runs) {
169
7.07k
        run_container_smart_append_exclusive(dst, src_2->runs[rlepos].value,
170
7.07k
                                             src_2->runs[rlepos].length);
171
7.07k
        rlepos++;
172
7.07k
    }
173
915
}
174
175
/* dst does not indicate a valid container initially.  Eventually it
176
 * can become any kind of container.
177
 */
178
179
int run_run_container_xor(const run_container_t *src_1,
180
55.3k
                          const run_container_t *src_2, container_t **dst) {
181
55.3k
    run_container_t *ans = run_container_create();
182
55.3k
    run_container_xor(src_1, src_2, ans);
183
55.3k
    uint8_t typecode_after;
184
55.3k
    *dst = convert_run_to_efficient_container_and_free(ans, &typecode_after);
185
55.3k
    return typecode_after;
186
55.3k
}
187
188
/*
189
 * Java implementation (as of May 2016) for array_run, run_run
190
 * and  bitset_run don't do anything different for inplace.
191
 * Could adopt the mixed_union.c approach instead (ie, using
192
 * smart_append_exclusive)
193
 *
194
 */
195
196
bool array_array_container_xor(const array_container_t *src_1,
197
                               const array_container_t *src_2,
198
8.19k
                               container_t **dst) {
199
8.19k
    int totalCardinality =
200
8.19k
        src_1->cardinality + src_2->cardinality;  // upper bound
201
8.19k
    if (totalCardinality <= DEFAULT_MAX_SIZE) {
202
7.70k
        *dst = array_container_create_given_capacity(totalCardinality);
203
7.70k
        array_container_xor(src_1, src_2, CAST_array(*dst));
204
7.70k
        return false;  // not a bitset
205
7.70k
    }
206
485
    *dst = bitset_container_from_array(src_1);
207
485
    bool returnval = true;  // expect a bitset
208
485
    bitset_container_t *ourbitset = CAST_bitset(*dst);
209
485
    ourbitset->cardinality = (uint32_t)bitset_flip_list_withcard(
210
485
        ourbitset->words, src_1->cardinality, src_2->array, src_2->cardinality);
211
485
    if (ourbitset->cardinality <= DEFAULT_MAX_SIZE) {
212
        // need to convert!
213
468
        *dst = array_container_from_bitset(ourbitset);
214
468
        bitset_container_free(ourbitset);
215
468
        returnval = false;  // not going to be a bitset
216
468
    }
217
218
485
    return returnval;
219
8.19k
}
220
221
bool array_array_container_lazy_xor(const array_container_t *src_1,
222
                                    const array_container_t *src_2,
223
0
                                    container_t **dst) {
224
0
    int totalCardinality = src_1->cardinality + src_2->cardinality;
225
    //
226
    // We assume that operations involving bitset containers will be faster than
227
    // operations involving solely array containers, except maybe when array
228
    // containers are small. Indeed, for example, it is cheap to compute the
229
    // exclusive union between an array and a bitset container, generally more
230
    // so than between a large array and another array. So it is advantageous to
231
    // favour bitset containers during the computation. Of course, if we convert
232
    // array containers eagerly to bitset containers, we may later need to
233
    // revert the bitset containers to array containerr to satisfy the Roaring
234
    // format requirements, but such one-time conversions at the end may not be
235
    // overly expensive. We arrived to this design based on extensive
236
    // benchmarking on unions. For XOR/exclusive union, we simply followed the
237
    // heuristic used by the unions (see  mixed_union.c). Further tuning is
238
    // possible.
239
    //
240
0
    if (totalCardinality <= ARRAY_LAZY_LOWERBOUND) {
241
0
        *dst = array_container_create_given_capacity(totalCardinality);
242
0
        if (*dst != NULL) array_container_xor(src_1, src_2, CAST_array(*dst));
243
0
        return false;  // not a bitset
244
0
    }
245
0
    *dst = bitset_container_from_array(src_1);
246
0
    bool returnval = true;  // expect a bitset (maybe, for XOR??)
247
0
    if (*dst != NULL) {
248
0
        bitset_container_t *ourbitset = CAST_bitset(*dst);
249
0
        bitset_flip_list(ourbitset->words, src_2->array, src_2->cardinality);
250
0
        ourbitset->cardinality = BITSET_UNKNOWN_CARDINALITY;
251
0
    }
252
0
    return returnval;
253
0
}
254
255
/* Compute the xor of src_1 and src_2 and write the result to
256
 * dst (which has no container initially). Return value is
257
 * "dst is a bitset"
258
 */
259
260
bool bitset_bitset_container_xor(const bitset_container_t *src_1,
261
                                 const bitset_container_t *src_2,
262
0
                                 container_t **dst) {
263
0
    bitset_container_t *ans = bitset_container_create();
264
0
    int card = bitset_container_xor(src_1, src_2, ans);
265
0
    if (card <= DEFAULT_MAX_SIZE) {
266
0
        *dst = array_container_from_bitset(ans);
267
0
        bitset_container_free(ans);
268
0
        return false;  // not bitset
269
0
    } else {
270
0
        *dst = ans;
271
0
        return true;
272
0
    }
273
0
}
274
275
/* Compute the xor of src_1 and src_2 and write the result to
276
 * dst (which has no container initially).  It will modify src_1
277
 * to be dst if the result is a bitset.  Otherwise, it will
278
 * free src_1 and dst will be a new array container.  In both
279
 * cases, the caller is responsible for deallocating dst.
280
 * Returns true iff dst is a bitset  */
281
282
bool bitset_array_container_ixor(bitset_container_t *src_1,
283
                                 const array_container_t *src_2,
284
219
                                 container_t **dst) {
285
219
    *dst = src_1;
286
219
    src_1->cardinality = (uint32_t)bitset_flip_list_withcard(
287
219
        src_1->words, src_1->cardinality, src_2->array, src_2->cardinality);
288
289
219
    if (src_1->cardinality <= DEFAULT_MAX_SIZE) {
290
13
        *dst = array_container_from_bitset(src_1);
291
13
        bitset_container_free(src_1);
292
13
        return false;  // not bitset
293
13
    } else
294
206
        return true;
295
219
}
296
297
/* a bunch of in-place, some of which may not *really* be inplace.
298
 * TODO: write actual inplace routine if efficiency warrants it
299
 * Anything inplace with a bitset is a good candidate
300
 */
301
302
bool bitset_bitset_container_ixor(bitset_container_t *src_1,
303
                                  const bitset_container_t *src_2,
304
1.43k
                                  container_t **dst) {
305
1.43k
    int card = bitset_container_xor(src_1, src_2, src_1);
306
1.43k
    if (card <= DEFAULT_MAX_SIZE) {
307
1.43k
        *dst = array_container_from_bitset(src_1);
308
1.43k
        bitset_container_free(src_1);
309
1.43k
        return false;  // not bitset
310
1.43k
    } else {
311
0
        *dst = src_1;
312
0
        return true;
313
0
    }
314
1.43k
}
315
316
bool array_bitset_container_ixor(array_container_t *src_1,
317
                                 const bitset_container_t *src_2,
318
89
                                 container_t **dst) {
319
89
    bool ans = array_bitset_container_xor(src_1, src_2, dst);
320
89
    array_container_free(src_1);
321
89
    return ans;
322
89
}
323
324
/* Compute the xor of src_1 and src_2 and write the result to
325
 * dst. Result may be either a bitset or an array container
326
 * (returns "result is bitset"). dst does not initially have
327
 * any container, but becomes either a bitset container (return
328
 * result true) or an array container.
329
 */
330
331
bool run_bitset_container_ixor(run_container_t *src_1,
332
                               const bitset_container_t *src_2,
333
10
                               container_t **dst) {
334
10
    bool ans = run_bitset_container_xor(src_1, src_2, dst);
335
10
    run_container_free(src_1);
336
10
    return ans;
337
10
}
338
339
bool bitset_run_container_ixor(bitset_container_t *src_1,
340
                               const run_container_t *src_2,
341
68
                               container_t **dst) {
342
68
    bool ans = run_bitset_container_xor(src_2, src_1, dst);
343
68
    bitset_container_free(src_1);
344
68
    return ans;
345
68
}
346
347
/* dst does not indicate a valid container initially.  Eventually it
348
 * can become any kind of container.
349
 */
350
351
int array_run_container_ixor(array_container_t *src_1,
352
253
                             const run_container_t *src_2, container_t **dst) {
353
253
    int ans = array_run_container_xor(src_1, src_2, dst);
354
253
    array_container_free(src_1);
355
253
    return ans;
356
253
}
357
358
int run_array_container_ixor(run_container_t *src_1,
359
                             const array_container_t *src_2,
360
53
                             container_t **dst) {
361
53
    int ans = array_run_container_xor(src_2, src_1, dst);
362
53
    run_container_free(src_1);
363
53
    return ans;
364
53
}
365
366
bool array_array_container_ixor(array_container_t *src_1,
367
                                const array_container_t *src_2,
368
4.82k
                                container_t **dst) {
369
4.82k
    bool ans = array_array_container_xor(src_1, src_2, dst);
370
4.82k
    array_container_free(src_1);
371
4.82k
    return ans;
372
4.82k
}
373
374
int run_run_container_ixor(run_container_t *src_1, const run_container_t *src_2,
375
54.7k
                           container_t **dst) {
376
54.7k
    int ans = run_run_container_xor(src_1, src_2, dst);
377
54.7k
    run_container_free(src_1);
378
54.7k
    return ans;
379
54.7k
}
380
381
#ifdef __cplusplus
382
}
383
}
384
}  // extern "C" { namespace roaring { namespace internal {
385
#endif