Coverage Report

Created: 2026-01-10 06:43

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/croaring/src/containers/convert.c
Line
Count
Source
1
#include <stdio.h>
2
3
#include <roaring/bitset_util.h>
4
#include <roaring/containers/containers.h>
5
#include <roaring/containers/convert.h>
6
#include <roaring/containers/perfparameters.h>
7
8
#if CROARING_IS_X64
9
#ifndef CROARING_COMPILER_SUPPORTS_AVX512
10
#error "CROARING_COMPILER_SUPPORTS_AVX512 needs to be defined."
11
#endif  // CROARING_COMPILER_SUPPORTS_AVX512
12
#endif
13
14
#ifdef __cplusplus
15
extern "C" {
16
namespace roaring {
17
namespace internal {
18
#endif
19
20
// file contains grubby stuff that must know impl. details of all container
21
// types.
22
0
bitset_container_t *bitset_container_from_array(const array_container_t *ac) {
23
0
    bitset_container_t *ans = bitset_container_create();
24
0
    int limit = array_container_cardinality(ac);
25
0
    for (int i = 0; i < limit; ++i) bitset_container_set(ans, ac->array[i]);
26
0
    return ans;
27
0
}
28
29
0
bitset_container_t *bitset_container_from_run(const run_container_t *arr) {
30
0
    int card = run_container_cardinality(arr);
31
0
    bitset_container_t *answer = bitset_container_create();
32
0
    for (int rlepos = 0; rlepos < arr->n_runs; ++rlepos) {
33
0
        rle16_t vl = arr->runs[rlepos];
34
0
        bitset_set_lenrange(answer->words, vl.value, vl.length);
35
0
    }
36
0
    answer->cardinality = card;
37
0
    return answer;
38
0
}
39
40
0
array_container_t *array_container_from_run(const run_container_t *arr) {
41
0
    array_container_t *answer =
42
0
        array_container_create_given_capacity(run_container_cardinality(arr));
43
0
    answer->cardinality = 0;
44
0
    for (int rlepos = 0; rlepos < arr->n_runs; ++rlepos) {
45
0
        int run_start = arr->runs[rlepos].value;
46
0
        int run_end = run_start + arr->runs[rlepos].length;
47
48
0
        for (int run_value = run_start; run_value <= run_end; ++run_value) {
49
0
            answer->array[answer->cardinality++] = (uint16_t)run_value;
50
0
        }
51
0
    }
52
0
    return answer;
53
0
}
54
55
0
array_container_t *array_container_from_bitset(const bitset_container_t *bits) {
56
0
    array_container_t *result =
57
0
        array_container_create_given_capacity(bits->cardinality);
58
0
    result->cardinality = bits->cardinality;
59
0
#if CROARING_IS_X64
60
0
#if CROARING_COMPILER_SUPPORTS_AVX512
61
0
    if (croaring_hardware_support() & ROARING_SUPPORTS_AVX512) {
62
0
        bitset_extract_setbits_avx512_uint16(
63
0
            bits->words, BITSET_CONTAINER_SIZE_IN_WORDS, result->array,
64
0
            bits->cardinality, 0);
65
0
    } else
66
0
#endif
67
0
    {
68
        //  sse version ends up being slower here
69
        // (bitset_extract_setbits_sse_uint16)
70
        // because of the sparsity of the data
71
0
        bitset_extract_setbits_uint16(
72
0
            bits->words, BITSET_CONTAINER_SIZE_IN_WORDS, result->array, 0);
73
0
    }
74
#else
75
    // If the system is not x64, then we have no accelerated function.
76
    bitset_extract_setbits_uint16(bits->words, BITSET_CONTAINER_SIZE_IN_WORDS,
77
                                  result->array, 0);
78
#endif
79
80
0
    return result;
81
0
}
82
83
/* assumes that container has adequate space.  Run from [s,e] (inclusive) */
84
0
static void add_run(run_container_t *rc, int s, int e) {
85
0
    rc->runs[rc->n_runs].value = s;
86
0
    rc->runs[rc->n_runs].length = e - s;
87
0
    rc->n_runs++;
88
0
}
89
90
0
run_container_t *run_container_from_array(const array_container_t *c) {
91
0
    int32_t n_runs = array_container_number_of_runs(c);
92
0
    run_container_t *answer = run_container_create_given_capacity(n_runs);
93
0
    int prev = -2;
94
0
    int run_start = -1;
95
0
    int32_t card = c->cardinality;
96
0
    if (card == 0) return answer;
97
0
    for (int i = 0; i < card; ++i) {
98
0
        const uint16_t cur_val = c->array[i];
99
0
        if (cur_val != prev + 1) {
100
            // new run starts; flush old one, if any
101
0
            if (run_start != -1) add_run(answer, run_start, prev);
102
0
            run_start = cur_val;
103
0
        }
104
0
        prev = c->array[i];
105
0
    }
106
    // now prev is the last seen value
107
0
    add_run(answer, run_start, prev);
108
    // assert(run_container_cardinality(answer) == c->cardinality);
109
0
    return answer;
110
0
}
111
112
/**
113
 * Convert the runcontainer to either a Bitmap or an Array Container, depending
114
 * on the cardinality.  Frees the container.
115
 * Allocates and returns new container, which caller is responsible for freeing.
116
 * It does not free the run container.
117
 */
118
container_t *convert_to_bitset_or_array_container(run_container_t *rc,
119
                                                  int32_t card,
120
0
                                                  uint8_t *resulttype) {
121
0
    if (card <= DEFAULT_MAX_SIZE) {
122
0
        array_container_t *answer = array_container_create_given_capacity(card);
123
0
        answer->cardinality = 0;
124
0
        for (int rlepos = 0; rlepos < rc->n_runs; ++rlepos) {
125
0
            uint16_t run_start = rc->runs[rlepos].value;
126
0
            uint16_t run_end = run_start + rc->runs[rlepos].length;
127
0
            for (uint16_t run_value = run_start; run_value < run_end;
128
0
                 ++run_value) {
129
0
                answer->array[answer->cardinality++] = run_value;
130
0
            }
131
0
            answer->array[answer->cardinality++] = run_end;
132
0
        }
133
0
        assert(card == answer->cardinality);
134
0
        *resulttype = ARRAY_CONTAINER_TYPE;
135
        // run_container_free(r);
136
0
        return answer;
137
0
    }
138
0
    bitset_container_t *answer = bitset_container_create();
139
0
    for (int rlepos = 0; rlepos < rc->n_runs; ++rlepos) {
140
0
        uint16_t run_start = rc->runs[rlepos].value;
141
0
        bitset_set_lenrange(answer->words, run_start, rc->runs[rlepos].length);
142
0
    }
143
0
    answer->cardinality = card;
144
0
    *resulttype = BITSET_CONTAINER_TYPE;
145
    // run_container_free(r);
146
0
    return answer;
147
0
}
148
149
/* Converts a run container to either an array or a bitset, IF it saves space.
150
 */
151
/* If a conversion occurs, the caller is responsible to free the original
152
 * container and
153
 * he becomes responsible to free the new one. */
154
container_t *convert_run_to_efficient_container(run_container_t *c,
155
0
                                                uint8_t *typecode_after) {
156
0
    int32_t size_as_run_container =
157
0
        run_container_serialized_size_in_bytes(c->n_runs);
158
159
0
    int32_t size_as_bitset_container =
160
0
        bitset_container_serialized_size_in_bytes();
161
0
    int32_t card = run_container_cardinality(c);
162
0
    int32_t size_as_array_container =
163
0
        array_container_serialized_size_in_bytes(card);
164
165
0
    int32_t min_size_non_run =
166
0
        size_as_bitset_container < size_as_array_container
167
0
            ? size_as_bitset_container
168
0
            : size_as_array_container;
169
0
    if (size_as_run_container <= min_size_non_run) {  // no conversion
170
0
        *typecode_after = RUN_CONTAINER_TYPE;
171
0
        return c;
172
0
    }
173
0
    if (card <= DEFAULT_MAX_SIZE) {
174
        // to array
175
0
        array_container_t *answer = array_container_create_given_capacity(card);
176
0
        answer->cardinality = 0;
177
0
        for (int rlepos = 0; rlepos < c->n_runs; ++rlepos) {
178
0
            int run_start = c->runs[rlepos].value;
179
0
            int run_end = run_start + c->runs[rlepos].length;
180
181
0
            for (int run_value = run_start; run_value <= run_end; ++run_value) {
182
0
                answer->array[answer->cardinality++] = (uint16_t)run_value;
183
0
            }
184
0
        }
185
0
        *typecode_after = ARRAY_CONTAINER_TYPE;
186
0
        return answer;
187
0
    }
188
189
    // else to bitset
190
0
    bitset_container_t *answer = bitset_container_create();
191
192
0
    for (int rlepos = 0; rlepos < c->n_runs; ++rlepos) {
193
0
        int start = c->runs[rlepos].value;
194
0
        int end = start + c->runs[rlepos].length;
195
0
        bitset_set_range(answer->words, start, end + 1);
196
0
    }
197
0
    answer->cardinality = card;
198
0
    *typecode_after = BITSET_CONTAINER_TYPE;
199
0
    return answer;
200
0
}
201
202
// like convert_run_to_efficient_container but frees the old result if needed
203
container_t *convert_run_to_efficient_container_and_free(
204
0
    run_container_t *c, uint8_t *typecode_after) {
205
0
    container_t *answer = convert_run_to_efficient_container(c, typecode_after);
206
0
    if (answer != c) run_container_free(c);
207
0
    return answer;
208
0
}
209
210
/* once converted, the original container is disposed here, rather than
211
   in roaring_array
212
*/
213
214
// TODO: split into run-  array-  and bitset-  subfunctions for sanity;
215
// a few function calls won't really matter.
216
217
container_t *convert_run_optimize(container_t *c, uint8_t typecode_original,
218
0
                                  uint8_t *typecode_after) {
219
0
    if (typecode_original == RUN_CONTAINER_TYPE) {
220
0
        container_t *newc =
221
0
            convert_run_to_efficient_container(CAST_run(c), typecode_after);
222
0
        if (newc != c) {
223
0
            container_free(c, typecode_original);
224
0
        }
225
0
        return newc;
226
0
    } else if (typecode_original == ARRAY_CONTAINER_TYPE) {
227
        // it might need to be converted to a run container.
228
0
        array_container_t *c_qua_array = CAST_array(c);
229
0
        int32_t n_runs = array_container_number_of_runs(c_qua_array);
230
0
        int32_t size_as_run_container =
231
0
            run_container_serialized_size_in_bytes(n_runs);
232
0
        int32_t card = array_container_cardinality(c_qua_array);
233
0
        int32_t size_as_array_container =
234
0
            array_container_serialized_size_in_bytes(card);
235
236
0
        if (size_as_run_container >= size_as_array_container) {
237
0
            *typecode_after = ARRAY_CONTAINER_TYPE;
238
0
            return c;
239
0
        }
240
        // else convert array to run container
241
0
        run_container_t *answer = run_container_create_given_capacity(n_runs);
242
0
        int prev = -2;
243
0
        int run_start = -1;
244
245
0
        assert(card > 0);
246
0
        for (int i = 0; i < card; ++i) {
247
0
            uint16_t cur_val = c_qua_array->array[i];
248
0
            if (cur_val != prev + 1) {
249
                // new run starts; flush old one, if any
250
0
                if (run_start != -1) add_run(answer, run_start, prev);
251
0
                run_start = cur_val;
252
0
            }
253
0
            prev = c_qua_array->array[i];
254
0
        }
255
0
        assert(run_start >= 0);
256
        // now prev is the last seen value
257
0
        add_run(answer, run_start, prev);
258
0
        *typecode_after = RUN_CONTAINER_TYPE;
259
0
        array_container_free(c_qua_array);
260
0
        return answer;
261
0
    } else if (typecode_original ==
262
0
               BITSET_CONTAINER_TYPE) {  // run conversions on bitset
263
        // does bitset need conversion to run?
264
0
        bitset_container_t *c_qua_bitset = CAST_bitset(c);
265
0
        int32_t n_runs = bitset_container_number_of_runs(c_qua_bitset);
266
0
        int32_t size_as_run_container =
267
0
            run_container_serialized_size_in_bytes(n_runs);
268
0
        int32_t size_as_bitset_container =
269
0
            bitset_container_serialized_size_in_bytes();
270
271
0
        if (size_as_bitset_container <= size_as_run_container) {
272
            // no conversion needed.
273
0
            *typecode_after = BITSET_CONTAINER_TYPE;
274
0
            return c;
275
0
        }
276
        // bitset to runcontainer (ported from Java  RunContainer(
277
        // BitmapContainer bc, int nbrRuns))
278
0
        assert(n_runs > 0);  // no empty bitmaps
279
0
        run_container_t *answer = run_container_create_given_capacity(n_runs);
280
281
0
        int long_ctr = 0;
282
0
        uint64_t cur_word = c_qua_bitset->words[0];
283
0
        while (true) {
284
0
            while (cur_word == UINT64_C(0) &&
285
0
                   long_ctr < BITSET_CONTAINER_SIZE_IN_WORDS - 1)
286
0
                cur_word = c_qua_bitset->words[++long_ctr];
287
288
0
            if (cur_word == UINT64_C(0)) {
289
0
                bitset_container_free(c_qua_bitset);
290
0
                *typecode_after = RUN_CONTAINER_TYPE;
291
0
                return answer;
292
0
            }
293
294
0
            int local_run_start = roaring_trailing_zeroes(cur_word);
295
0
            int run_start = local_run_start + 64 * long_ctr;
296
0
            uint64_t cur_word_with_1s = cur_word | (cur_word - 1);
297
298
0
            int run_end = 0;
299
0
            while (cur_word_with_1s == UINT64_C(0xFFFFFFFFFFFFFFFF) &&
300
0
                   long_ctr < BITSET_CONTAINER_SIZE_IN_WORDS - 1)
301
0
                cur_word_with_1s = c_qua_bitset->words[++long_ctr];
302
303
0
            if (cur_word_with_1s == UINT64_C(0xFFFFFFFFFFFFFFFF)) {
304
0
                run_end = 64 + long_ctr * 64;  // exclusive, I guess
305
0
                add_run(answer, run_start, run_end - 1);
306
0
                bitset_container_free(c_qua_bitset);
307
0
                *typecode_after = RUN_CONTAINER_TYPE;
308
0
                return answer;
309
0
            }
310
0
            int local_run_end = roaring_trailing_zeroes(~cur_word_with_1s);
311
0
            run_end = local_run_end + long_ctr * 64;
312
0
            add_run(answer, run_start, run_end - 1);
313
0
            cur_word = cur_word_with_1s & (cur_word_with_1s + 1);
314
0
        }
315
0
        return answer;
316
0
    } else {
317
0
        assert(false);
318
0
        roaring_unreachable;
319
0
        return NULL;
320
0
    }
321
0
}
322
323
container_t *container_from_run_range(const run_container_t *run, uint32_t min,
324
0
                                      uint32_t max, uint8_t *typecode_after) {
325
    // We expect most of the time to end up with a bitset container
326
0
    bitset_container_t *bitset = bitset_container_create();
327
0
    *typecode_after = BITSET_CONTAINER_TYPE;
328
0
    int32_t union_cardinality = 0;
329
0
    for (int32_t i = 0; i < run->n_runs; ++i) {
330
0
        uint32_t rle_min = run->runs[i].value;
331
0
        uint32_t rle_max = rle_min + run->runs[i].length;
332
0
        bitset_set_lenrange(bitset->words, rle_min, rle_max - rle_min);
333
0
        union_cardinality += run->runs[i].length + 1;
334
0
    }
335
0
    union_cardinality += max - min + 1;
336
0
    union_cardinality -=
337
0
        bitset_lenrange_cardinality(bitset->words, min, max - min);
338
0
    bitset_set_lenrange(bitset->words, min, max - min);
339
0
    bitset->cardinality = union_cardinality;
340
0
    if (bitset->cardinality <= DEFAULT_MAX_SIZE) {
341
        // we need to convert to an array container
342
0
        array_container_t *array = array_container_from_bitset(bitset);
343
0
        *typecode_after = ARRAY_CONTAINER_TYPE;
344
0
        bitset_container_free(bitset);
345
0
        return array;
346
0
    }
347
0
    return bitset;
348
0
}
349
350
#ifdef __cplusplus
351
}
352
}
353
}  // extern "C" { namespace roaring { namespace internal {
354
#endif