Coverage Report

Created: 2026-06-07 06:48

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/croaring/src/containers/run.c
Line
Count
Source
1
#include <stdio.h>
2
#include <stdlib.h>
3
4
#include <roaring/containers/run.h>
5
#include <roaring/memory.h>
6
#include <roaring/portability.h>
7
8
#if CROARING_IS_X64
9
#ifndef CROARING_COMPILER_SUPPORTS_AVX512
10
#error "CROARING_COMPILER_SUPPORTS_AVX512 needs to be defined."
11
#endif  // CROARING_COMPILER_SUPPORTS_AVX512
12
#endif
13
#if defined(__GNUC__) && !defined(__clang__)
14
#pragma GCC diagnostic push
15
#pragma GCC diagnostic ignored "-Wuninitialized"
16
#pragma GCC diagnostic ignored "-Wmaybe-uninitialized"
17
#endif
18
#ifdef __cplusplus
19
extern "C" {
20
namespace roaring {
21
namespace internal {
22
#endif
23
24
extern inline uint16_t run_container_minimum(const run_container_t *run);
25
extern inline uint16_t run_container_maximum(const run_container_t *run);
26
extern inline int32_t interleavedBinarySearch(const rle16_t *array,
27
                                              int32_t lenarray, uint16_t ikey);
28
extern inline bool run_container_contains(const run_container_t *run,
29
                                          uint16_t pos);
30
extern inline int run_container_index_equalorlarger(const run_container_t *arr,
31
                                                    uint16_t x);
32
extern inline bool run_container_is_full(const run_container_t *run);
33
extern inline bool run_container_nonzero_cardinality(const run_container_t *rc);
34
extern inline int32_t run_container_serialized_size_in_bytes(int32_t num_runs);
35
extern inline run_container_t *run_container_create_range(uint32_t start,
36
                                                          uint32_t stop);
37
extern inline int run_container_cardinality(const run_container_t *run);
38
39
0
bool run_container_add(run_container_t *run, uint16_t pos) {
40
0
    int32_t index = interleavedBinarySearch(run->runs, run->n_runs, pos);
41
0
    if (index >= 0) return false;  // already there
42
0
    index = -index - 2;            // points to preceding value, possibly -1
43
0
    if (index >= 0) {              // possible match
44
0
        int32_t offset = pos - run->runs[index].value;
45
0
        int32_t le = run->runs[index].length;
46
0
        if (offset <= le) return false;  // already there
47
0
        if (offset == le + 1) {
48
            // we may need to fuse
49
0
            if (index + 1 < run->n_runs) {
50
0
                if (run->runs[index + 1].value == pos + 1) {
51
                    // indeed fusion is needed
52
0
                    run->runs[index].length = run->runs[index + 1].value +
53
0
                                              run->runs[index + 1].length -
54
0
                                              run->runs[index].value;
55
0
                    recoverRoomAtIndex(run, (uint16_t)(index + 1));
56
0
                    return true;
57
0
                }
58
0
            }
59
0
            run->runs[index].length++;
60
0
            return true;
61
0
        }
62
0
        if (index + 1 < run->n_runs) {
63
            // we may need to fuse
64
0
            if (run->runs[index + 1].value == pos + 1) {
65
                // indeed fusion is needed
66
0
                run->runs[index + 1].value = pos;
67
0
                run->runs[index + 1].length = run->runs[index + 1].length + 1;
68
0
                return true;
69
0
            }
70
0
        }
71
0
    }
72
0
    if (index == -1) {
73
        // we may need to extend the first run
74
0
        if (0 < run->n_runs) {
75
0
            if (run->runs[0].value == pos + 1) {
76
0
                run->runs[0].length++;
77
0
                run->runs[0].value--;
78
0
                return true;
79
0
            }
80
0
        }
81
0
    }
82
0
    makeRoomAtIndex(run, (uint16_t)(index + 1));
83
0
    run->runs[index + 1].value = pos;
84
0
    run->runs[index + 1].length = 0;
85
0
    return true;
86
0
}
87
88
/* Create a new run container. Return NULL in case of failure. */
89
0
run_container_t *run_container_create_given_capacity(int32_t size) {
90
0
    run_container_t *run;
91
    /* Allocate the run container itself. */
92
0
    if ((run = (run_container_t *)roaring_malloc(sizeof(run_container_t))) ==
93
0
        NULL) {
94
0
        return NULL;
95
0
    }
96
0
    if (size <= 0) {  // we don't want to rely on malloc(0)
97
0
        run->runs = NULL;
98
0
    } else if ((run->runs = (rle16_t *)roaring_malloc(sizeof(rle16_t) *
99
0
                                                      size)) == NULL) {
100
0
        roaring_free(run);
101
0
        return NULL;
102
0
    }
103
0
    run->capacity = size;
104
0
    run->n_runs = 0;
105
0
    return run;
106
0
}
107
108
0
int run_container_shrink_to_fit(run_container_t *src) {
109
0
    if (src->n_runs == src->capacity) return 0;  // nothing to do
110
0
    int savings = src->capacity - src->n_runs;
111
0
    src->capacity = src->n_runs;
112
0
    rle16_t *oldruns = src->runs;
113
0
    src->runs =
114
0
        (rle16_t *)roaring_realloc(oldruns, src->capacity * sizeof(rle16_t));
115
0
    if (src->runs == NULL) roaring_free(oldruns);  // should never happen?
116
0
    return savings;
117
0
}
118
/* Create a new run container. Return NULL in case of failure. */
119
0
run_container_t *run_container_create(void) {
120
0
    return run_container_create_given_capacity(RUN_DEFAULT_INIT_SIZE);
121
0
}
122
123
CROARING_ALLOW_UNALIGNED
124
0
run_container_t *run_container_clone(const run_container_t *src) {
125
0
    run_container_t *run = run_container_create_given_capacity(src->capacity);
126
0
    if (run == NULL) return NULL;
127
0
    run->capacity = src->capacity;
128
0
    run->n_runs = src->n_runs;
129
0
    memcpy(run->runs, src->runs, src->n_runs * sizeof(rle16_t));
130
0
    return run;
131
0
}
132
133
void run_container_offset(const run_container_t *c, container_t **loc,
134
0
                          container_t **hic, uint16_t offset) {
135
0
    run_container_t *lo = NULL, *hi = NULL;
136
137
0
    bool split;
138
0
    unsigned int lo_cap, hi_cap;
139
0
    int top, pivot;
140
141
0
    top = (1 << 16) - offset;
142
0
    pivot = run_container_index_equalorlarger(c, top);
143
    // pivot is the index of the first run that is >= top or -1 if no such run
144
145
0
    if (pivot >= 0) {
146
0
        split = c->runs[pivot].value < top;
147
0
        lo_cap = pivot + (split ? 1 : 0);
148
0
        hi_cap = c->n_runs - pivot;
149
0
    } else {
150
        // here pivot < 0
151
0
        split = false;
152
0
        lo_cap = c->n_runs;
153
0
        hi_cap = 0;
154
0
    }
155
156
0
    if (loc && lo_cap) {
157
0
        lo = run_container_create_given_capacity(lo_cap);
158
0
        memcpy(lo->runs, c->runs, lo_cap * sizeof(rle16_t));
159
0
        lo->n_runs = lo_cap;
160
0
        for (unsigned int i = 0; i < lo_cap; ++i) {
161
0
            lo->runs[i].value += offset;
162
0
        }
163
0
        *loc = (container_t *)lo;
164
0
    }
165
166
0
    if (hic && hi_cap) {
167
0
        hi = run_container_create_given_capacity(hi_cap);
168
0
        memcpy(hi->runs, c->runs + pivot, hi_cap * sizeof(rle16_t));
169
0
        hi->n_runs = hi_cap;
170
0
        for (unsigned int i = 0; i < hi_cap; ++i) {
171
0
            hi->runs[i].value += offset;
172
0
        }
173
0
        *hic = (container_t *)hi;
174
0
    }
175
176
    // Fix the split.
177
0
    if (split) {
178
0
        if (lo != NULL) {
179
            // Add the missing run to 'lo', exhausting length.
180
0
            lo->runs[lo->n_runs - 1].length =
181
0
                (1 << 16) - lo->runs[lo->n_runs - 1].value - 1;
182
0
        }
183
184
0
        if (hi != NULL) {
185
            // Fix the first run in 'hi'.
186
0
            hi->runs[0].length -= UINT16_MAX - hi->runs[0].value + 1;
187
0
            hi->runs[0].value = 0;
188
0
        }
189
0
    }
190
0
}
191
192
/* Free memory. */
193
0
void run_container_free(run_container_t *run) {
194
0
    if (run == NULL) return;
195
0
    roaring_free(run->runs);
196
0
    roaring_free(run);
197
0
}
198
199
0
void run_container_grow(run_container_t *run, int32_t min, bool copy) {
200
0
    int32_t newCapacity = (run->capacity == 0)   ? RUN_DEFAULT_INIT_SIZE
201
0
                          : run->capacity < 64   ? run->capacity * 2
202
0
                          : run->capacity < 1024 ? run->capacity * 3 / 2
203
0
                                                 : run->capacity * 5 / 4;
204
0
    if (newCapacity < min) newCapacity = min;
205
0
    run->capacity = newCapacity;
206
0
    assert(run->capacity >= min);
207
0
    if (copy) {
208
0
        rle16_t *oldruns = run->runs;
209
0
        run->runs = (rle16_t *)roaring_realloc(oldruns,
210
0
                                               run->capacity * sizeof(rle16_t));
211
0
        if (run->runs == NULL) roaring_free(oldruns);
212
0
    } else {
213
0
        roaring_free(run->runs);
214
0
        run->runs = (rle16_t *)roaring_malloc(run->capacity * sizeof(rle16_t));
215
0
    }
216
    // We may have run->runs == NULL.
217
0
}
218
219
/* copy one container into another */
220
0
void run_container_copy(const run_container_t *src, run_container_t *dst) {
221
0
    const int32_t n_runs = src->n_runs;
222
0
    if (src->n_runs > dst->capacity) {
223
0
        run_container_grow(dst, n_runs, false);
224
0
    }
225
0
    dst->n_runs = n_runs;
226
0
    memcpy(dst->runs, src->runs, sizeof(rle16_t) * n_runs);
227
0
}
228
229
/* Compute the union of `src_1' and `src_2' and write the result to `dst'
230
 * It is assumed that `dst' is distinct from both `src_1' and `src_2'. */
231
void run_container_union(const run_container_t *src_1,
232
0
                         const run_container_t *src_2, run_container_t *dst) {
233
    // TODO: this could be a lot more efficient
234
235
    // we start out with inexpensive checks
236
0
    const bool if1 = run_container_is_full(src_1);
237
0
    const bool if2 = run_container_is_full(src_2);
238
0
    if (if1 || if2) {
239
0
        if (if1) {
240
0
            run_container_copy(src_1, dst);
241
0
            return;
242
0
        }
243
0
        if (if2) {
244
0
            run_container_copy(src_2, dst);
245
0
            return;
246
0
        }
247
0
    }
248
0
    const int32_t neededcapacity = src_1->n_runs + src_2->n_runs;
249
0
    if (dst->capacity < neededcapacity)
250
0
        run_container_grow(dst, neededcapacity, false);
251
0
    dst->n_runs = 0;
252
0
    int32_t rlepos = 0;
253
0
    int32_t xrlepos = 0;
254
255
0
    rle16_t previousrle;
256
0
    if (src_1->runs[rlepos].value <= src_2->runs[xrlepos].value) {
257
0
        previousrle = run_container_append_first(dst, src_1->runs[rlepos]);
258
0
        rlepos++;
259
0
    } else {
260
0
        previousrle = run_container_append_first(dst, src_2->runs[xrlepos]);
261
0
        xrlepos++;
262
0
    }
263
264
0
    while ((xrlepos < src_2->n_runs) && (rlepos < src_1->n_runs)) {
265
0
        rle16_t newrl;
266
0
        if (src_1->runs[rlepos].value <= src_2->runs[xrlepos].value) {
267
0
            newrl = src_1->runs[rlepos];
268
0
            rlepos++;
269
0
        } else {
270
0
            newrl = src_2->runs[xrlepos];
271
0
            xrlepos++;
272
0
        }
273
0
        run_container_append(dst, newrl, &previousrle);
274
0
    }
275
0
    while (xrlepos < src_2->n_runs) {
276
0
        run_container_append(dst, src_2->runs[xrlepos], &previousrle);
277
0
        xrlepos++;
278
0
    }
279
0
    while (rlepos < src_1->n_runs) {
280
0
        run_container_append(dst, src_1->runs[rlepos], &previousrle);
281
0
        rlepos++;
282
0
    }
283
0
}
284
285
/* Compute the union of `src_1' and `src_2' and write the result to `src_1'
286
 */
287
void run_container_union_inplace(run_container_t *src_1,
288
0
                                 const run_container_t *src_2) {
289
    // TODO: this could be a lot more efficient
290
291
    // we start out with inexpensive checks
292
0
    const bool if1 = run_container_is_full(src_1);
293
0
    const bool if2 = run_container_is_full(src_2);
294
0
    if (if1 || if2) {
295
0
        if (if1) {
296
0
            return;
297
0
        }
298
0
        if (if2) {
299
0
            run_container_copy(src_2, src_1);
300
0
            return;
301
0
        }
302
0
    }
303
    // we move the data to the end of the current array
304
0
    const int32_t maxoutput = src_1->n_runs + src_2->n_runs;
305
0
    const int32_t neededcapacity = maxoutput + src_1->n_runs;
306
0
    if (src_1->capacity < neededcapacity)
307
0
        run_container_grow(src_1, neededcapacity, true);
308
0
    memmove(src_1->runs + maxoutput, src_1->runs,
309
0
            src_1->n_runs * sizeof(rle16_t));
310
0
    rle16_t *inputsrc1 = src_1->runs + maxoutput;
311
0
    const int32_t input1nruns = src_1->n_runs;
312
0
    src_1->n_runs = 0;
313
0
    int32_t rlepos = 0;
314
0
    int32_t xrlepos = 0;
315
316
0
    rle16_t previousrle;
317
0
    if (inputsrc1[rlepos].value <= src_2->runs[xrlepos].value) {
318
0
        previousrle = run_container_append_first(src_1, inputsrc1[rlepos]);
319
0
        rlepos++;
320
0
    } else {
321
0
        previousrle = run_container_append_first(src_1, src_2->runs[xrlepos]);
322
0
        xrlepos++;
323
0
    }
324
0
    while ((xrlepos < src_2->n_runs) && (rlepos < input1nruns)) {
325
0
        rle16_t newrl;
326
0
        if (inputsrc1[rlepos].value <= src_2->runs[xrlepos].value) {
327
0
            newrl = inputsrc1[rlepos];
328
0
            rlepos++;
329
0
        } else {
330
0
            newrl = src_2->runs[xrlepos];
331
0
            xrlepos++;
332
0
        }
333
0
        run_container_append(src_1, newrl, &previousrle);
334
0
    }
335
0
    while (xrlepos < src_2->n_runs) {
336
0
        run_container_append(src_1, src_2->runs[xrlepos], &previousrle);
337
0
        xrlepos++;
338
0
    }
339
0
    while (rlepos < input1nruns) {
340
0
        run_container_append(src_1, inputsrc1[rlepos], &previousrle);
341
0
        rlepos++;
342
0
    }
343
0
}
344
345
/* Compute the symmetric difference of `src_1' and `src_2' and write the result
346
 * to `dst'
347
 * It is assumed that `dst' is distinct from both `src_1' and `src_2'. */
348
void run_container_xor(const run_container_t *src_1,
349
0
                       const run_container_t *src_2, run_container_t *dst) {
350
    // don't bother to convert xor with full range into negation
351
    // since negation is implemented similarly
352
353
0
    const int32_t neededcapacity = src_1->n_runs + src_2->n_runs;
354
0
    if (dst->capacity < neededcapacity)
355
0
        run_container_grow(dst, neededcapacity, false);
356
357
0
    int32_t pos1 = 0;
358
0
    int32_t pos2 = 0;
359
0
    dst->n_runs = 0;
360
361
0
    while ((pos1 < src_1->n_runs) && (pos2 < src_2->n_runs)) {
362
0
        if (src_1->runs[pos1].value <= src_2->runs[pos2].value) {
363
0
            run_container_smart_append_exclusive(dst, src_1->runs[pos1].value,
364
0
                                                 src_1->runs[pos1].length);
365
0
            pos1++;
366
0
        } else {
367
0
            run_container_smart_append_exclusive(dst, src_2->runs[pos2].value,
368
0
                                                 src_2->runs[pos2].length);
369
0
            pos2++;
370
0
        }
371
0
    }
372
0
    while (pos1 < src_1->n_runs) {
373
0
        run_container_smart_append_exclusive(dst, src_1->runs[pos1].value,
374
0
                                             src_1->runs[pos1].length);
375
0
        pos1++;
376
0
    }
377
378
0
    while (pos2 < src_2->n_runs) {
379
0
        run_container_smart_append_exclusive(dst, src_2->runs[pos2].value,
380
0
                                             src_2->runs[pos2].length);
381
0
        pos2++;
382
0
    }
383
0
}
384
385
/* Compute the intersection of src_1 and src_2 and write the result to
386
 * dst. It is assumed that dst is distinct from both src_1 and src_2. */
387
void run_container_intersection(const run_container_t *src_1,
388
                                const run_container_t *src_2,
389
0
                                run_container_t *dst) {
390
0
    const bool if1 = run_container_is_full(src_1);
391
0
    const bool if2 = run_container_is_full(src_2);
392
0
    if (if1 || if2) {
393
0
        if (if1) {
394
0
            run_container_copy(src_2, dst);
395
0
            return;
396
0
        }
397
0
        if (if2) {
398
0
            run_container_copy(src_1, dst);
399
0
            return;
400
0
        }
401
0
    }
402
    // TODO: this could be a lot more efficient, could use SIMD optimizations
403
0
    const int32_t neededcapacity = src_1->n_runs + src_2->n_runs;
404
0
    if (dst->capacity < neededcapacity)
405
0
        run_container_grow(dst, neededcapacity, false);
406
0
    dst->n_runs = 0;
407
0
    int32_t rlepos = 0;
408
0
    int32_t xrlepos = 0;
409
0
    int32_t start = src_1->runs[rlepos].value;
410
0
    int32_t end = start + src_1->runs[rlepos].length + 1;
411
0
    int32_t xstart = src_2->runs[xrlepos].value;
412
0
    int32_t xend = xstart + src_2->runs[xrlepos].length + 1;
413
0
    while ((rlepos < src_1->n_runs) && (xrlepos < src_2->n_runs)) {
414
0
        if (end <= xstart) {
415
0
            ++rlepos;
416
0
            if (rlepos < src_1->n_runs) {
417
0
                start = src_1->runs[rlepos].value;
418
0
                end = start + src_1->runs[rlepos].length + 1;
419
0
            }
420
0
        } else if (xend <= start) {
421
0
            ++xrlepos;
422
0
            if (xrlepos < src_2->n_runs) {
423
0
                xstart = src_2->runs[xrlepos].value;
424
0
                xend = xstart + src_2->runs[xrlepos].length + 1;
425
0
            }
426
0
        } else {  // they overlap
427
0
            const int32_t lateststart = start > xstart ? start : xstart;
428
0
            int32_t earliestend;
429
0
            if (end == xend) {  // improbable
430
0
                earliestend = end;
431
0
                rlepos++;
432
0
                xrlepos++;
433
0
                if (rlepos < src_1->n_runs) {
434
0
                    start = src_1->runs[rlepos].value;
435
0
                    end = start + src_1->runs[rlepos].length + 1;
436
0
                }
437
0
                if (xrlepos < src_2->n_runs) {
438
0
                    xstart = src_2->runs[xrlepos].value;
439
0
                    xend = xstart + src_2->runs[xrlepos].length + 1;
440
0
                }
441
0
            } else if (end < xend) {
442
0
                earliestend = end;
443
0
                rlepos++;
444
0
                if (rlepos < src_1->n_runs) {
445
0
                    start = src_1->runs[rlepos].value;
446
0
                    end = start + src_1->runs[rlepos].length + 1;
447
0
                }
448
449
0
            } else {  // end > xend
450
0
                earliestend = xend;
451
0
                xrlepos++;
452
0
                if (xrlepos < src_2->n_runs) {
453
0
                    xstart = src_2->runs[xrlepos].value;
454
0
                    xend = xstart + src_2->runs[xrlepos].length + 1;
455
0
                }
456
0
            }
457
0
            dst->runs[dst->n_runs].value = (uint16_t)lateststart;
458
0
            dst->runs[dst->n_runs].length =
459
0
                (uint16_t)(earliestend - lateststart - 1);
460
0
            dst->n_runs++;
461
0
        }
462
0
    }
463
0
}
464
465
/* Compute the size of the intersection of src_1 and src_2 . */
466
int run_container_intersection_cardinality(const run_container_t *src_1,
467
0
                                           const run_container_t *src_2) {
468
0
    const bool if1 = run_container_is_full(src_1);
469
0
    const bool if2 = run_container_is_full(src_2);
470
0
    if (if1 || if2) {
471
0
        if (if1) {
472
0
            return run_container_cardinality(src_2);
473
0
        }
474
0
        if (if2) {
475
0
            return run_container_cardinality(src_1);
476
0
        }
477
0
    }
478
0
    int answer = 0;
479
0
    int32_t rlepos = 0;
480
0
    int32_t xrlepos = 0;
481
0
    int32_t start = src_1->runs[rlepos].value;
482
0
    int32_t end = start + src_1->runs[rlepos].length + 1;
483
0
    int32_t xstart = src_2->runs[xrlepos].value;
484
0
    int32_t xend = xstart + src_2->runs[xrlepos].length + 1;
485
0
    while ((rlepos < src_1->n_runs) && (xrlepos < src_2->n_runs)) {
486
0
        if (end <= xstart) {
487
0
            ++rlepos;
488
0
            if (rlepos < src_1->n_runs) {
489
0
                start = src_1->runs[rlepos].value;
490
0
                end = start + src_1->runs[rlepos].length + 1;
491
0
            }
492
0
        } else if (xend <= start) {
493
0
            ++xrlepos;
494
0
            if (xrlepos < src_2->n_runs) {
495
0
                xstart = src_2->runs[xrlepos].value;
496
0
                xend = xstart + src_2->runs[xrlepos].length + 1;
497
0
            }
498
0
        } else {  // they overlap
499
0
            const int32_t lateststart = start > xstart ? start : xstart;
500
0
            int32_t earliestend;
501
0
            if (end == xend) {  // improbable
502
0
                earliestend = end;
503
0
                rlepos++;
504
0
                xrlepos++;
505
0
                if (rlepos < src_1->n_runs) {
506
0
                    start = src_1->runs[rlepos].value;
507
0
                    end = start + src_1->runs[rlepos].length + 1;
508
0
                }
509
0
                if (xrlepos < src_2->n_runs) {
510
0
                    xstart = src_2->runs[xrlepos].value;
511
0
                    xend = xstart + src_2->runs[xrlepos].length + 1;
512
0
                }
513
0
            } else if (end < xend) {
514
0
                earliestend = end;
515
0
                rlepos++;
516
0
                if (rlepos < src_1->n_runs) {
517
0
                    start = src_1->runs[rlepos].value;
518
0
                    end = start + src_1->runs[rlepos].length + 1;
519
0
                }
520
521
0
            } else {  // end > xend
522
0
                earliestend = xend;
523
0
                xrlepos++;
524
0
                if (xrlepos < src_2->n_runs) {
525
0
                    xstart = src_2->runs[xrlepos].value;
526
0
                    xend = xstart + src_2->runs[xrlepos].length + 1;
527
0
                }
528
0
            }
529
0
            answer += earliestend - lateststart;
530
0
        }
531
0
    }
532
0
    return answer;
533
0
}
534
535
bool run_container_intersect(const run_container_t *src_1,
536
0
                             const run_container_t *src_2) {
537
0
    const bool if1 = run_container_is_full(src_1);
538
0
    const bool if2 = run_container_is_full(src_2);
539
0
    if (if1 || if2) {
540
0
        if (if1) {
541
0
            return !run_container_empty(src_2);
542
0
        }
543
0
        if (if2) {
544
0
            return !run_container_empty(src_1);
545
0
        }
546
0
    }
547
0
    int32_t rlepos = 0;
548
0
    int32_t xrlepos = 0;
549
0
    int32_t start = src_1->runs[rlepos].value;
550
0
    int32_t end = start + src_1->runs[rlepos].length + 1;
551
0
    int32_t xstart = src_2->runs[xrlepos].value;
552
0
    int32_t xend = xstart + src_2->runs[xrlepos].length + 1;
553
0
    while ((rlepos < src_1->n_runs) && (xrlepos < src_2->n_runs)) {
554
0
        if (end <= xstart) {
555
0
            ++rlepos;
556
0
            if (rlepos < src_1->n_runs) {
557
0
                start = src_1->runs[rlepos].value;
558
0
                end = start + src_1->runs[rlepos].length + 1;
559
0
            }
560
0
        } else if (xend <= start) {
561
0
            ++xrlepos;
562
0
            if (xrlepos < src_2->n_runs) {
563
0
                xstart = src_2->runs[xrlepos].value;
564
0
                xend = xstart + src_2->runs[xrlepos].length + 1;
565
0
            }
566
0
        } else {  // they overlap
567
0
            return true;
568
0
        }
569
0
    }
570
0
    return false;
571
0
}
572
573
/* Compute the difference of src_1 and src_2 and write the result to
574
 * dst. It is assumed that dst is distinct from both src_1 and src_2. */
575
void run_container_andnot(const run_container_t *src_1,
576
0
                          const run_container_t *src_2, run_container_t *dst) {
577
    // following Java implementation as of June 2016
578
579
0
    if (dst->capacity < src_1->n_runs + src_2->n_runs)
580
0
        run_container_grow(dst, src_1->n_runs + src_2->n_runs, false);
581
582
0
    dst->n_runs = 0;
583
584
0
    int rlepos1 = 0;
585
0
    int rlepos2 = 0;
586
0
    int32_t start = src_1->runs[rlepos1].value;
587
0
    int32_t end = start + src_1->runs[rlepos1].length + 1;
588
0
    int32_t start2 = src_2->runs[rlepos2].value;
589
0
    int32_t end2 = start2 + src_2->runs[rlepos2].length + 1;
590
591
0
    while ((rlepos1 < src_1->n_runs) && (rlepos2 < src_2->n_runs)) {
592
0
        if (end <= start2) {
593
            // output the first run
594
0
            dst->runs[dst->n_runs++] =
595
0
                CROARING_MAKE_RLE16(start, end - start - 1);
596
0
            rlepos1++;
597
0
            if (rlepos1 < src_1->n_runs) {
598
0
                start = src_1->runs[rlepos1].value;
599
0
                end = start + src_1->runs[rlepos1].length + 1;
600
0
            }
601
0
        } else if (end2 <= start) {
602
            // exit the second run
603
0
            rlepos2++;
604
0
            if (rlepos2 < src_2->n_runs) {
605
0
                start2 = src_2->runs[rlepos2].value;
606
0
                end2 = start2 + src_2->runs[rlepos2].length + 1;
607
0
            }
608
0
        } else {
609
0
            if (start < start2) {
610
0
                dst->runs[dst->n_runs++] =
611
0
                    CROARING_MAKE_RLE16(start, start2 - start - 1);
612
0
            }
613
0
            if (end2 < end) {
614
0
                start = end2;
615
0
            } else {
616
0
                rlepos1++;
617
0
                if (rlepos1 < src_1->n_runs) {
618
0
                    start = src_1->runs[rlepos1].value;
619
0
                    end = start + src_1->runs[rlepos1].length + 1;
620
0
                }
621
0
            }
622
0
        }
623
0
    }
624
0
    if (rlepos1 < src_1->n_runs) {
625
0
        dst->runs[dst->n_runs++] = CROARING_MAKE_RLE16(start, end - start - 1);
626
0
        rlepos1++;
627
0
        if (rlepos1 < src_1->n_runs) {
628
0
            memcpy(dst->runs + dst->n_runs, src_1->runs + rlepos1,
629
0
                   sizeof(rle16_t) * (src_1->n_runs - rlepos1));
630
0
            dst->n_runs += src_1->n_runs - rlepos1;
631
0
        }
632
0
    }
633
0
}
634
635
/*
636
 * Print this container using printf (useful for debugging).
637
 */
638
0
void run_container_printf(const run_container_t *cont) {
639
0
    for (int i = 0; i < cont->n_runs; ++i) {
640
0
        uint16_t run_start = cont->runs[i].value;
641
0
        uint16_t le = cont->runs[i].length;
642
0
        printf("[%d,%d]", run_start, run_start + le);
643
0
    }
644
0
}
645
646
/*
647
 * Print this container using printf as a comma-separated list of 32-bit
648
 * integers starting at base.
649
 */
650
void run_container_printf_as_uint32_array(const run_container_t *cont,
651
0
                                          uint32_t base) {
652
0
    if (cont->n_runs == 0) return;
653
0
    {
654
0
        uint32_t run_start = base + cont->runs[0].value;
655
0
        uint16_t le = cont->runs[0].length;
656
0
        printf("%u", run_start);
657
0
        for (uint32_t j = 1; j <= le; ++j) printf(",%u", run_start + j);
658
0
    }
659
0
    for (int32_t i = 1; i < cont->n_runs; ++i) {
660
0
        uint32_t run_start = base + cont->runs[i].value;
661
0
        uint16_t le = cont->runs[i].length;
662
0
        for (uint32_t j = 0; j <= le; ++j) printf(",%u", run_start + j);
663
0
    }
664
0
}
665
666
/*
667
 * Validate the container. Returns true if valid.
668
 */
669
0
bool run_container_validate(const run_container_t *run, const char **reason) {
670
0
    if (run->n_runs < 0) {
671
0
        *reason = "negative run count";
672
0
        return false;
673
0
    }
674
0
    if (run->capacity < 0) {
675
0
        *reason = "negative run capacity";
676
0
        return false;
677
0
    }
678
0
    if (run->capacity < run->n_runs) {
679
0
        *reason = "capacity less than run count";
680
0
        return false;
681
0
    }
682
683
0
    if (run->n_runs == 0) {
684
0
        *reason = "zero run count";
685
0
        return false;
686
0
    }
687
0
    if (run->runs == NULL) {
688
0
        *reason = "NULL runs";
689
0
        return false;
690
0
    }
691
692
    // Use uint32_t to avoid overflow issues on ranges that contain UINT16_MAX.
693
0
    uint32_t last_end = 0;
694
0
    for (int i = 0; i < run->n_runs; ++i) {
695
0
        uint32_t start = run->runs[i].value;
696
0
        uint32_t end = start + run->runs[i].length + 1;
697
0
        if (end <= start) {
698
0
            *reason = "run start + length overflow";
699
0
            return false;
700
0
        }
701
0
        if (end > (1 << 16)) {
702
0
            *reason = "run start + length too large";
703
0
            return false;
704
0
        }
705
0
        if (start < last_end) {
706
0
            *reason = "run start less than last end";
707
0
            return false;
708
0
        }
709
0
        if (start == last_end && last_end != 0) {
710
0
            *reason = "run start equal to last end, should have combined";
711
0
            return false;
712
0
        }
713
0
        last_end = end;
714
0
    }
715
0
    return true;
716
0
}
717
718
0
int32_t run_container_write(const run_container_t *container, char *buf) {
719
0
    uint16_t cast_16 = container->n_runs;
720
0
    uint16_t n_runs_le = croaring_htole16(cast_16);
721
0
    memcpy(buf, &n_runs_le, sizeof(uint16_t));
722
#if CROARING_IS_BIG_ENDIAN
723
    char *out = buf + sizeof(uint16_t);
724
    for (int32_t i = 0; i < container->n_runs; ++i) {
725
        uint16_t v_le = croaring_htole16(container->runs[i].value);
726
        uint16_t l_le = croaring_htole16(container->runs[i].length);
727
        memcpy(out, &v_le, sizeof(uint16_t));
728
        memcpy(out + sizeof(uint16_t), &l_le, sizeof(uint16_t));
729
        out += sizeof(rle16_t);
730
    }
731
#else
732
0
    memcpy(buf + sizeof(uint16_t), container->runs,
733
0
           container->n_runs * sizeof(rle16_t));
734
0
#endif
735
0
    return run_container_size_in_bytes(container);
736
0
}
737
738
int32_t run_container_read(int32_t cardinality, run_container_t *container,
739
0
                           const char *buf) {
740
0
    (void)cardinality;
741
0
    uint16_t cast_16;
742
0
    memcpy(&cast_16, buf, sizeof(uint16_t));
743
0
    container->n_runs = croaring_letoh16(cast_16);
744
0
    if (container->n_runs > container->capacity)
745
0
        run_container_grow(container, container->n_runs, false);
746
0
    if (container->n_runs > 0) {
747
#if CROARING_IS_BIG_ENDIAN
748
        const char *in = buf + sizeof(uint16_t);
749
        for (int32_t i = 0; i < container->n_runs; ++i) {
750
            uint16_t v_le, l_le;
751
            memcpy(&v_le, in, sizeof(uint16_t));
752
            memcpy(&l_le, in + sizeof(uint16_t), sizeof(uint16_t));
753
            container->runs[i].value = croaring_letoh16(v_le);
754
            container->runs[i].length = croaring_letoh16(l_le);
755
            in += sizeof(rle16_t);
756
        }
757
#else
758
0
        memcpy(container->runs, buf + sizeof(uint16_t),
759
0
               container->n_runs * sizeof(rle16_t));
760
0
#endif
761
0
    }
762
0
    return run_container_size_in_bytes(container);
763
0
}
764
765
bool run_container_iterate(const run_container_t *cont, uint32_t base,
766
0
                           roaring_iterator iterator, void *ptr) {
767
0
    for (int i = 0; i < cont->n_runs; ++i) {
768
0
        uint32_t run_start = base + cont->runs[i].value;
769
0
        uint16_t le = cont->runs[i].length;
770
771
0
        for (int j = 0; j <= le; ++j)
772
0
            if (!iterator(run_start + j, ptr)) return false;
773
0
    }
774
0
    return true;
775
0
}
776
777
bool run_container_iterate64(const run_container_t *cont, uint32_t base,
778
                             roaring_iterator64 iterator, uint64_t high_bits,
779
0
                             void *ptr) {
780
0
    for (int i = 0; i < cont->n_runs; ++i) {
781
0
        uint32_t run_start = base + cont->runs[i].value;
782
0
        uint16_t le = cont->runs[i].length;
783
784
0
        for (int j = 0; j <= le; ++j)
785
0
            if (!iterator(high_bits | (uint64_t)(run_start + j), ptr))
786
0
                return false;
787
0
    }
788
0
    return true;
789
0
}
790
791
bool run_container_is_subset(const run_container_t *container1,
792
0
                             const run_container_t *container2) {
793
0
    int i1 = 0, i2 = 0;
794
0
    while (i1 < container1->n_runs && i2 < container2->n_runs) {
795
0
        int start1 = container1->runs[i1].value;
796
0
        int stop1 = start1 + container1->runs[i1].length;
797
0
        int start2 = container2->runs[i2].value;
798
0
        int stop2 = start2 + container2->runs[i2].length;
799
0
        if (start1 < start2) {
800
0
            return false;
801
0
        } else {  // start1 >= start2
802
0
            if (stop1 < stop2) {
803
0
                i1++;
804
0
            } else if (stop1 == stop2) {
805
0
                i1++;
806
0
                i2++;
807
0
            } else {  // stop1 > stop2
808
0
                i2++;
809
0
            }
810
0
        }
811
0
    }
812
0
    if (i1 == container1->n_runs) {
813
0
        return true;
814
0
    } else {
815
0
        return false;
816
0
    }
817
0
}
818
819
// TODO: write smart_append_exclusive version to match the overloaded 1 param
820
// Java version (or  is it even used?)
821
822
// follows the Java implementation closely
823
// length is the rle-value.  Ie, run [10,12) uses a length value 1.
824
void run_container_smart_append_exclusive(run_container_t *src,
825
                                          const uint16_t start,
826
0
                                          const uint16_t length) {
827
0
    int old_end;
828
0
    rle16_t *last_run = src->n_runs ? src->runs + (src->n_runs - 1) : NULL;
829
0
    rle16_t *appended_last_run = src->runs + src->n_runs;
830
831
0
    if (!src->n_runs ||
832
0
        (start > (old_end = last_run->value + last_run->length + 1))) {
833
0
        *appended_last_run = CROARING_MAKE_RLE16(start, length);
834
0
        src->n_runs++;
835
0
        return;
836
0
    }
837
0
    if (old_end == start) {
838
        // we merge
839
0
        last_run->length += (length + 1);
840
0
        return;
841
0
    }
842
0
    int new_end = start + length + 1;
843
844
0
    if (start == last_run->value) {
845
        // wipe out previous
846
0
        if (new_end < old_end) {
847
0
            *last_run = CROARING_MAKE_RLE16(new_end, old_end - new_end - 1);
848
0
            return;
849
0
        } else if (new_end > old_end) {
850
0
            *last_run = CROARING_MAKE_RLE16(old_end, new_end - old_end - 1);
851
0
            return;
852
0
        } else {
853
0
            src->n_runs--;
854
0
            return;
855
0
        }
856
0
    }
857
0
    last_run->length = start - last_run->value - 1;
858
0
    if (new_end < old_end) {
859
0
        *appended_last_run =
860
0
            CROARING_MAKE_RLE16(new_end, old_end - new_end - 1);
861
0
        src->n_runs++;
862
0
    } else if (new_end > old_end) {
863
0
        *appended_last_run =
864
0
            CROARING_MAKE_RLE16(old_end, new_end - old_end - 1);
865
0
        src->n_runs++;
866
0
    }
867
0
}
868
869
bool run_container_select(const run_container_t *container,
870
                          uint32_t *start_rank, uint32_t rank,
871
0
                          uint32_t *element) {
872
0
    for (int i = 0; i < container->n_runs; i++) {
873
0
        uint16_t length = container->runs[i].length;
874
0
        if (rank <= *start_rank + length) {
875
0
            uint16_t value = container->runs[i].value;
876
0
            *element = value + rank - (*start_rank);
877
0
            return true;
878
0
        } else
879
0
            *start_rank += length + 1;
880
0
    }
881
0
    return false;
882
0
}
883
884
0
int run_container_rank(const run_container_t *container, uint16_t x) {
885
0
    int sum = 0;
886
0
    uint32_t x32 = x;
887
0
    for (int i = 0; i < container->n_runs; i++) {
888
0
        uint32_t startpoint = container->runs[i].value;
889
0
        uint32_t length = container->runs[i].length;
890
0
        uint32_t endpoint = length + startpoint;
891
0
        if (x <= endpoint) {
892
0
            if (x < startpoint) break;
893
0
            return sum + (x32 - startpoint) + 1;
894
0
        } else {
895
0
            sum += length + 1;
896
0
        }
897
0
    }
898
0
    return sum;
899
0
}
900
uint32_t run_container_rank_many(const run_container_t *container,
901
                                 uint64_t start_rank, const uint32_t *begin,
902
0
                                 const uint32_t *end, uint64_t *ans) {
903
0
    const uint16_t high = (uint16_t)((*begin) >> 16);
904
0
    const uint32_t *iter = begin;
905
0
    int sum = 0;
906
0
    int i = 0;
907
0
    for (; iter != end; iter++) {
908
0
        uint32_t x = *iter;
909
0
        uint16_t xhigh = (uint16_t)(x >> 16);
910
0
        if (xhigh != high) return iter - begin;  // stop at next container
911
912
0
        uint32_t x32 = x & 0xFFFF;
913
0
        while (i < container->n_runs) {
914
0
            uint32_t startpoint = container->runs[i].value;
915
0
            uint32_t length = container->runs[i].length;
916
0
            uint32_t endpoint = length + startpoint;
917
0
            if (x32 <= endpoint) {
918
0
                if (x32 < startpoint) {
919
0
                    *(ans++) = start_rank + sum;
920
0
                } else {
921
0
                    *(ans++) = start_rank + sum + (x32 - startpoint) + 1;
922
0
                }
923
0
                break;
924
0
            } else {
925
0
                sum += length + 1;
926
0
                i++;
927
0
            }
928
0
        }
929
0
        if (i >= container->n_runs) *(ans++) = start_rank + sum;
930
0
    }
931
932
0
    return iter - begin;
933
0
}
934
935
0
int run_container_get_index(const run_container_t *container, uint16_t x) {
936
0
    if (run_container_contains(container, x)) {
937
0
        int sum = 0;
938
0
        uint32_t x32 = x;
939
0
        for (int i = 0; i < container->n_runs; i++) {
940
0
            uint32_t startpoint = container->runs[i].value;
941
0
            uint32_t length = container->runs[i].length;
942
0
            uint32_t endpoint = length + startpoint;
943
0
            if (x <= endpoint) {
944
0
                if (x < startpoint) break;
945
0
                return sum + (x32 - startpoint);
946
0
            } else {
947
0
                sum += length + 1;
948
0
            }
949
0
        }
950
0
        return sum - 1;
951
0
    } else {
952
0
        return -1;
953
0
    }
954
0
}
955
#define CROARING_ENABLE_AVX512_RUN_CONTAINER_CARDINALITY 0
956
957
#if defined(CROARING_IS_X64) && CROARING_COMPILER_SUPPORTS_AVX512
958
959
#if CROARING_ENABLE_AVX512_RUN_CONTAINER_CARDINALITY
960
CROARING_TARGET_AVX512
961
CROARING_ALLOW_UNALIGNED
962
/* Get the cardinality of `run'. Requires an actual computation. */
963
static inline int _avx512_run_container_cardinality(
964
    const run_container_t *run) {
965
    const int32_t n_runs = run->n_runs;
966
    const rle16_t *runs = run->runs;
967
968
    int32_t k = 0;
969
    const int32_t step512 = sizeof(__m512i) / sizeof(rle16_t);
970
    __m512i total = _mm512_setzero_si512();
971
    for (; k + step512 <= n_runs; k += step512) {
972
        __m512i ymm1 = _mm512_loadu_si512((const __m512i *)(runs + k));
973
        __m512i justlengths = _mm512_srli_epi32(ymm1, 16);
974
        total = _mm512_add_epi32(total, justlengths);
975
    }
976
    if (k < n_runs) {
977
        __m512i ymm1 = _mm512_maskz_loadu_epi32((1 << (n_runs - k)) - 1,
978
                                                (const __m512i *)(runs + k));
979
        __m512i justlengths = _mm512_srli_epi32(ymm1, 16);
980
        total = _mm512_add_epi32(total, justlengths);
981
    }
982
    return _mm512_reduce_add_epi32(total) + n_runs;
983
}
984
985
CROARING_UNTARGET_AVX512
986
#endif  // CROARING_ENABLE_AVX512_RUN_CONTAINER_CARDINALITY
987
988
CROARING_TARGET_AVX2
989
CROARING_ALLOW_UNALIGNED
990
/* Get the cardinality of `run'. Requires an actual computation. */
991
0
static inline int _avx2_run_container_cardinality(const run_container_t *run) {
992
0
    const int32_t n_runs = run->n_runs;
993
0
    const rle16_t *runs = run->runs;
994
995
    /* by initializing with n_runs, we omit counting the +1 for each pair. */
996
0
    int sum = n_runs;
997
0
    int32_t k = 0;
998
0
    const int32_t step = sizeof(__m256i) / sizeof(rle16_t);
999
0
    if (n_runs > step) {
1000
0
        __m256i total = _mm256_setzero_si256();
1001
0
        for (; k + step <= n_runs; k += step) {
1002
0
            __m256i ymm1 = _mm256_lddqu_si256((const __m256i *)(runs + k));
1003
0
            __m256i justlengths = _mm256_srli_epi32(ymm1, 16);
1004
0
            total = _mm256_add_epi32(total, justlengths);
1005
0
        }
1006
        // a store might be faster than extract?
1007
0
        uint32_t buffer[sizeof(__m256i) / sizeof(rle16_t)];
1008
0
        _mm256_storeu_si256((__m256i *)buffer, total);
1009
0
        sum += (buffer[0] + buffer[1]) + (buffer[2] + buffer[3]) +
1010
0
               (buffer[4] + buffer[5]) + (buffer[6] + buffer[7]);
1011
0
    }
1012
0
    for (; k < n_runs; ++k) {
1013
0
        sum += runs[k].length;
1014
0
    }
1015
1016
0
    return sum;
1017
0
}
1018
1019
CROARING_ALLOW_UNALIGNED
1020
int _avx2_run_container_to_uint32_array(void *vout, const run_container_t *cont,
1021
0
                                        uint32_t base) {
1022
0
    int outpos = 0;
1023
0
    uint32_t *out = (uint32_t *)vout;
1024
1025
0
    for (int i = 0; i < cont->n_runs; ++i) {
1026
0
        uint32_t run_start = base + cont->runs[i].value;
1027
0
        uint16_t le = cont->runs[i].length;
1028
0
        if (le < 8) {
1029
0
            for (int j = 0; j <= le; ++j) {
1030
0
                uint32_t val = run_start + j;
1031
0
                memcpy(out + outpos, &val,
1032
0
                       sizeof(uint32_t));  // should be compiled as a MOV on x64
1033
0
                outpos++;
1034
0
            }
1035
0
        } else {
1036
0
            int j = 0;
1037
0
            __m256i run_start_v = _mm256_set1_epi32(run_start);
1038
            // [8,8,8,8....]
1039
0
            __m256i inc = _mm256_set1_epi32(8);
1040
            // used for generate sequence:
1041
            // [0, 1, 2, 3...], [8, 9, 10,...]
1042
0
            __m256i delta = _mm256_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7);
1043
0
            for (j = 0; j + 8 <= le; j += 8) {
1044
0
                __m256i val_v = _mm256_add_epi32(run_start_v, delta);
1045
0
                _mm256_storeu_si256((__m256i *)(out + outpos), val_v);
1046
0
                delta = _mm256_add_epi32(inc, delta);
1047
0
                outpos += 8;
1048
0
            }
1049
0
            for (; j <= le; ++j) {
1050
0
                uint32_t val = run_start + j;
1051
0
                memcpy(out + outpos, &val,
1052
0
                       sizeof(uint32_t));  // should be compiled as a MOV on x64
1053
0
                outpos++;
1054
0
            }
1055
0
        }
1056
0
    }
1057
0
    return outpos;
1058
0
}
1059
1060
CROARING_UNTARGET_AVX2
1061
1062
/* Get the cardinality of `run'. Requires an actual computation. */
1063
static inline int _scalar_run_container_cardinality(
1064
0
    const run_container_t *run) {
1065
0
    const int32_t n_runs = run->n_runs;
1066
0
    const rle16_t *runs = run->runs;
1067
1068
    /* by initializing with n_runs, we omit counting the +1 for each pair. */
1069
0
    int sum = n_runs;
1070
0
    for (int k = 0; k < n_runs; ++k) {
1071
0
        sum += runs[k].length;
1072
0
    }
1073
1074
0
    return sum;
1075
0
}
1076
1077
0
int run_container_cardinality(const run_container_t *run) {
1078
    // Empirically AVX-512 is not always faster than AVX2
1079
#if CROARING_COMPILER_SUPPORTS_AVX512 && \
1080
    CROARING_ENABLE_AVX512_RUN_CONTAINER_CARDINALITY
1081
    if (croaring_hardware_support() & ROARING_SUPPORTS_AVX512) {
1082
        return _avx512_run_container_cardinality(run);
1083
    } else
1084
#endif
1085
0
        if (croaring_hardware_support() & ROARING_SUPPORTS_AVX2) {
1086
0
        return _avx2_run_container_cardinality(run);
1087
0
    } else {
1088
0
        return _scalar_run_container_cardinality(run);
1089
0
    }
1090
0
}
1091
1092
int _scalar_run_container_to_uint32_array(void *vout,
1093
                                          const run_container_t *cont,
1094
0
                                          uint32_t base) {
1095
0
    int outpos = 0;
1096
0
    uint32_t *out = (uint32_t *)vout;
1097
0
    for (int i = 0; i < cont->n_runs; ++i) {
1098
0
        uint32_t run_start = base + cont->runs[i].value;
1099
0
        uint16_t le = cont->runs[i].length;
1100
0
        for (int j = 0; j <= le; ++j) {
1101
0
            uint32_t val = run_start + j;
1102
0
            memcpy(out + outpos, &val,
1103
0
                   sizeof(uint32_t));  // should be compiled as a MOV on x64
1104
0
            outpos++;
1105
0
        }
1106
0
    }
1107
0
    return outpos;
1108
0
}
1109
1110
int run_container_to_uint32_array(void *vout, const run_container_t *cont,
1111
0
                                  uint32_t base) {
1112
0
    if (croaring_hardware_support() & ROARING_SUPPORTS_AVX2) {
1113
0
        return _avx2_run_container_to_uint32_array(vout, cont, base);
1114
0
    } else {
1115
0
        return _scalar_run_container_to_uint32_array(vout, cont, base);
1116
0
    }
1117
0
}
1118
1119
#else
1120
1121
/* Get the cardinality of `run'. Requires an actual computation. */
1122
CROARING_ALLOW_UNALIGNED
1123
int run_container_cardinality(const run_container_t *run) {
1124
    const int32_t n_runs = run->n_runs;
1125
    const rle16_t *runs = run->runs;
1126
1127
    /* by initializing with n_runs, we omit counting the +1 for each pair. */
1128
    int sum = n_runs;
1129
    for (int k = 0; k < n_runs; ++k) {
1130
        sum += runs[k].length;
1131
    }
1132
1133
    return sum;
1134
}
1135
1136
CROARING_ALLOW_UNALIGNED
1137
int run_container_to_uint32_array(void *vout, const run_container_t *cont,
1138
                                  uint32_t base) {
1139
    int outpos = 0;
1140
    uint32_t *out = (uint32_t *)vout;
1141
    for (int i = 0; i < cont->n_runs; ++i) {
1142
        uint32_t run_start = base + cont->runs[i].value;
1143
        uint16_t le = cont->runs[i].length;
1144
        for (int j = 0; j <= le; ++j) {
1145
            uint32_t val = run_start + j;
1146
            memcpy(out + outpos, &val,
1147
                   sizeof(uint32_t));  // should be compiled as a MOV on x64
1148
            outpos++;
1149
        }
1150
    }
1151
    return outpos;
1152
}
1153
1154
#endif
1155
1156
#ifdef __cplusplus
1157
}
1158
}
1159
}  // extern "C" { namespace roaring { namespace internal {
1160
#endif
1161
#if defined(__GNUC__) && !defined(__clang__)
1162
#pragma GCC diagnostic pop
1163
#endif