/src/croaring/cpp/roaring.hh
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | A C++ header for Roaring Bitmaps. |
3 | | */ |
4 | | #ifndef INCLUDE_ROARING_HH_ |
5 | | #define INCLUDE_ROARING_HH_ |
6 | | |
7 | | #include <algorithm> |
8 | | #include <cstdarg> |
9 | | #include <initializer_list> |
10 | | #include <limits> |
11 | | #include <new> |
12 | | #include <stdexcept> |
13 | | #include <string> |
14 | | |
15 | | #if !defined(ROARING_EXCEPTIONS) |
16 | | // __cpp_exceptions is required by C++98 and we require C++11 or better. |
17 | | #ifndef __cpp_exceptions |
18 | | #error "__cpp_exceptions should be defined" |
19 | | #endif |
20 | | #if __cpp_exceptions |
21 | | #define ROARING_EXCEPTIONS 1 |
22 | | #else |
23 | | #define ROARING_EXCEPTIONS 0 |
24 | | #endif |
25 | | #endif |
26 | | |
27 | | #ifndef ROARING_TERMINATE |
28 | | #if ROARING_EXCEPTIONS |
29 | 3.22k | #define ROARING_TERMINATE(_s) throw std::runtime_error(_s) |
30 | | #else |
31 | | #define ROARING_TERMINATE(_s) std::terminate() |
32 | | #endif |
33 | | #endif |
34 | | |
35 | | #define ROARING_API_NOT_IN_GLOBAL_NAMESPACE // see remarks in roaring.h |
36 | | #include <roaring/roaring.h> |
37 | | #undef ROARING_API_NOT_IN_GLOBAL_NAMESPACE |
38 | | |
39 | | #include <roaring/roaring_array.h> // roaring::internal array functions used |
40 | | |
41 | | namespace roaring { |
42 | | |
43 | | class RoaringSetBitBiDirectionalIterator; |
44 | | |
45 | | /** DEPRECATED, use `RoaringSetBitBiDirectionalIterator`. */ |
46 | | using RoaringSetBitForwardIterator = RoaringSetBitBiDirectionalIterator; |
47 | | |
48 | | /** |
49 | | * A bit of context usable with `*Bulk()` functions. |
50 | | * |
51 | | * A context may only be used with a single bitmap, and any modification to a |
52 | | * bitmap (other than modifications performed with `Bulk()` functions with the |
53 | | * context passed) will invalidate any contexts associated with that bitmap. |
54 | | */ |
55 | | class BulkContext { |
56 | | public: |
57 | | friend class Roaring; |
58 | | using roaring_bitmap_bulk_context_t = api::roaring_bulk_context_t; |
59 | 0 | BulkContext() : context_{nullptr, 0, 0, 0} {} |
60 | | |
61 | | BulkContext(const BulkContext &) = delete; |
62 | | BulkContext &operator=(const BulkContext &) = delete; |
63 | | BulkContext(BulkContext &&) noexcept = default; |
64 | | BulkContext &operator=(BulkContext &&) noexcept = default; |
65 | | |
66 | | private: |
67 | | roaring_bitmap_bulk_context_t context_; |
68 | | }; |
69 | | |
70 | | class Roaring { |
71 | | typedef api::roaring_bitmap_t roaring_bitmap_t; // class-local name alias |
72 | | |
73 | | public: |
74 | | /** |
75 | | * Create an empty bitmap in the existing memory for the class. |
76 | | * The bitmap will be in the "clear" state with no auxiliary allocations. |
77 | | */ |
78 | 9.71k | Roaring() : roaring{} { |
79 | | // The empty constructor roaring{} silences warnings from pedantic |
80 | | // static analyzers. |
81 | 9.71k | api::roaring_bitmap_init_cleared(&roaring); |
82 | 9.71k | } |
83 | | |
84 | | /** |
85 | | * Construct a bitmap from a list of 32-bit integer values. |
86 | | */ |
87 | 6.47k | Roaring(size_t n, const uint32_t *data) : Roaring() { |
88 | 6.47k | api::roaring_bitmap_add_many(&roaring, n, data); |
89 | 6.47k | } |
90 | | |
91 | | /** |
92 | | * Construct a bitmap from an initializer list. |
93 | | */ |
94 | 0 | Roaring(std::initializer_list<uint32_t> l) : Roaring() { |
95 | 0 | addMany(l.size(), l.begin()); |
96 | 0 | } |
97 | | |
98 | | /** |
99 | | * Construct a roaring object by taking control of a malloc()'d C struct. |
100 | | * |
101 | | * Passing a NULL pointer is unsafe. |
102 | | * The pointer to the C struct will be invalid after the call. |
103 | | */ |
104 | 16.2k | explicit Roaring(roaring_bitmap_t *s) noexcept : roaring(*s) { |
105 | 16.2k | roaring_free(s); // deallocate the passed-in pointer |
106 | 16.2k | } |
107 | | |
108 | | /** |
109 | | * Copy constructor. |
110 | | * It may throw std::runtime_error if there is insufficient memory. |
111 | | */ |
112 | 3.23k | Roaring(const Roaring &r) : Roaring() { |
113 | 3.23k | if (!api::roaring_bitmap_overwrite(&roaring, &r.roaring)) { |
114 | 0 | ROARING_TERMINATE("failed roaring_bitmap_overwrite in constructor"); |
115 | 0 | } |
116 | 3.23k | api::roaring_bitmap_set_copy_on_write( |
117 | 3.23k | &roaring, api::roaring_bitmap_get_copy_on_write(&r.roaring)); |
118 | 3.23k | } |
119 | | |
120 | | /** |
121 | | * Move constructor. The moved-from object remains valid but empty, i.e. |
122 | | * it behaves as though it was just freshly constructed. |
123 | | */ |
124 | 3.23k | Roaring(Roaring &&r) noexcept : roaring(r.roaring) { |
125 | | // |
126 | | // !!! This clones the bits of the roaring structure to a new location |
127 | | // and then overwrites the old bits...assuming that this will still |
128 | | // work. There are scenarios where this could break; e.g. if some of |
129 | | // those bits were pointers into the structure memory itself. If such |
130 | | // things were possible, a roaring_bitmap_move() API would be needed. |
131 | | // |
132 | 3.23k | api::roaring_bitmap_init_cleared(&r.roaring); |
133 | 3.23k | } |
134 | | |
135 | | /** |
136 | | * Construct a bitmap from a list of uint32_t values. |
137 | | */ |
138 | 0 | static Roaring bitmapOf(size_t n, ...) { |
139 | 0 | Roaring ans; |
140 | 0 | va_list vl; |
141 | 0 | va_start(vl, n); |
142 | 0 | for (size_t i = 0; i < n; i++) { |
143 | 0 | ans.add(va_arg(vl, uint32_t)); |
144 | 0 | } |
145 | 0 | va_end(vl); |
146 | 0 | return ans; |
147 | 0 | } |
148 | | |
149 | | /** |
150 | | * Copies the content of the provided bitmap, and |
151 | | * discard the current content. |
152 | | * It may throw std::runtime_error if there is insufficient memory. |
153 | | */ |
154 | 3.23k | Roaring &operator=(const Roaring &r) { |
155 | 3.23k | if (!api::roaring_bitmap_overwrite(&roaring, &r.roaring)) { |
156 | 0 | ROARING_TERMINATE("failed memory alloc in assignment"); |
157 | 0 | } |
158 | 3.23k | api::roaring_bitmap_set_copy_on_write( |
159 | 3.23k | &roaring, api::roaring_bitmap_get_copy_on_write(&r.roaring)); |
160 | 3.23k | return *this; |
161 | 3.23k | } |
162 | | |
163 | | /** |
164 | | * Moves the content of the provided bitmap, and |
165 | | * discard the current content. |
166 | | */ |
167 | 3.23k | Roaring &operator=(Roaring &&r) noexcept { |
168 | 3.23k | api::roaring_bitmap_clear(&roaring); // free this class's allocations |
169 | | |
170 | | // !!! See notes in the Move Constructor regarding roaring_bitmap_move() |
171 | | // |
172 | 3.23k | roaring = r.roaring; |
173 | 3.23k | api::roaring_bitmap_init_cleared(&r.roaring); |
174 | | |
175 | 3.23k | return *this; |
176 | 3.23k | } |
177 | | |
178 | | /** |
179 | | * Assignment from an initializer list. |
180 | | */ |
181 | 0 | Roaring &operator=(std::initializer_list<uint32_t> l) { |
182 | 0 | // Delegate to move assignment operator |
183 | 0 | *this = Roaring(l); |
184 | 0 | return *this; |
185 | 0 | } |
186 | | |
187 | | /** |
188 | | * Construct a bitmap from a list of uint32_t values. |
189 | | * E.g., bitmapOfList({1,2,3}). |
190 | | */ |
191 | 0 | static Roaring bitmapOfList(std::initializer_list<uint32_t> l) { |
192 | 0 | Roaring ans; |
193 | 0 | ans.addMany(l.size(), l.begin()); |
194 | 0 | return ans; |
195 | 0 | } |
196 | | |
197 | | /** |
198 | | * Add value x |
199 | | */ |
200 | 3.23k | void add(uint32_t x) noexcept { api::roaring_bitmap_add(&roaring, x); } |
201 | | |
202 | | /** |
203 | | * Add value x |
204 | | * Returns true if a new value was added, false if the value was already |
205 | | * existing. |
206 | | */ |
207 | 3.23k | bool addChecked(uint32_t x) noexcept { |
208 | 3.23k | return api::roaring_bitmap_add_checked(&roaring, x); |
209 | 3.23k | } |
210 | | |
211 | | /** |
212 | | * Add all values in range [min, max) |
213 | | */ |
214 | 3.23k | void addRange(const uint64_t min, const uint64_t max) noexcept { |
215 | 3.23k | return api::roaring_bitmap_add_range(&roaring, min, max); |
216 | 3.23k | } |
217 | | |
218 | | /** |
219 | | * Add all values in range [min, max] |
220 | | */ |
221 | 0 | void addRangeClosed(const uint32_t min, const uint32_t max) noexcept { |
222 | 0 | return api::roaring_bitmap_add_range_closed(&roaring, min, max); |
223 | 0 | } |
224 | | |
225 | | /** |
226 | | * Add value n_args from pointer vals |
227 | | */ |
228 | 3.23k | void addMany(size_t n_args, const uint32_t *vals) noexcept { |
229 | 3.23k | api::roaring_bitmap_add_many(&roaring, n_args, vals); |
230 | 3.23k | } |
231 | | |
232 | | /** |
233 | | * Add value val, using context from a previous insert for speed |
234 | | * optimization. |
235 | | * |
236 | | * `context` will be used to store information between calls to make bulk |
237 | | * operations faster. `context` should be default-initialized before the |
238 | | * first call to this function. |
239 | | */ |
240 | 0 | void addBulk(BulkContext &context, uint32_t x) noexcept { |
241 | 0 | api::roaring_bitmap_add_bulk(&roaring, &context.context_, x); |
242 | 0 | } |
243 | | |
244 | | /** |
245 | | * Check if item x is present, using context from a previous insert or |
246 | | * search for speed optimization. |
247 | | * |
248 | | * `context` will be used to store information between calls to make bulk |
249 | | * operations faster. `context` should be default-initialized before the |
250 | | * first call to this function. |
251 | | */ |
252 | 0 | bool containsBulk(BulkContext &context, uint32_t x) const noexcept { |
253 | 0 | return api::roaring_bitmap_contains_bulk(&roaring, &context.context_, |
254 | 0 | x); |
255 | 0 | } |
256 | | |
257 | | /** |
258 | | * Remove value x |
259 | | */ |
260 | 3.23k | void remove(uint32_t x) noexcept { |
261 | 3.23k | api::roaring_bitmap_remove(&roaring, x); |
262 | 3.23k | } |
263 | | |
264 | | /** |
265 | | * Remove value x |
266 | | * Returns true if a new value was removed, false if the value was not |
267 | | * existing. |
268 | | */ |
269 | 3.23k | bool removeChecked(uint32_t x) noexcept { |
270 | 3.23k | return api::roaring_bitmap_remove_checked(&roaring, x); |
271 | 3.23k | } |
272 | | |
273 | | /** |
274 | | * Remove all values in range [min, max) |
275 | | */ |
276 | 3.23k | void removeRange(uint64_t min, uint64_t max) noexcept { |
277 | 3.23k | return api::roaring_bitmap_remove_range(&roaring, min, max); |
278 | 3.23k | } |
279 | | |
280 | | /** |
281 | | * Remove all values in range [min, max] |
282 | | */ |
283 | 3.23k | void removeRangeClosed(uint32_t min, uint32_t max) noexcept { |
284 | 3.23k | return api::roaring_bitmap_remove_range_closed(&roaring, min, max); |
285 | 3.23k | } |
286 | | |
287 | | /** |
288 | | * Clears the bitmap. |
289 | | */ |
290 | 0 | void clear() { api::roaring_bitmap_clear(&roaring); } |
291 | | |
292 | | /** |
293 | | * Return the largest value (if not empty) |
294 | | */ |
295 | 3.23k | uint32_t maximum() const noexcept { |
296 | 3.23k | return api::roaring_bitmap_maximum(&roaring); |
297 | 3.23k | } |
298 | | |
299 | | /** |
300 | | * Return the smallest value (if not empty) |
301 | | */ |
302 | 3.23k | uint32_t minimum() const noexcept { |
303 | 3.23k | return api::roaring_bitmap_minimum(&roaring); |
304 | 3.23k | } |
305 | | |
306 | | /** |
307 | | * Check if value x is present |
308 | | */ |
309 | 3.23k | bool contains(uint32_t x) const noexcept { |
310 | 3.23k | return api::roaring_bitmap_contains(&roaring, x); |
311 | 3.23k | } |
312 | | |
313 | | /** |
314 | | * Check if all values from x (included) to y (excluded) are present |
315 | | */ |
316 | 3.23k | bool containsRange(const uint64_t x, const uint64_t y) const noexcept { |
317 | 3.23k | return api::roaring_bitmap_contains_range(&roaring, x, y); |
318 | 3.23k | } |
319 | | |
320 | | bool containsRangeClosed(const uint32_t x, |
321 | 0 | const uint32_t y) const noexcept { |
322 | 0 | return api::roaring_bitmap_contains_range_closed(&roaring, x, y); |
323 | 0 | } |
324 | | |
325 | | /** |
326 | | * Compute the intersection between the current bitmap and the provided |
327 | | * bitmap, writing the result in the current bitmap. The provided bitmap |
328 | | * is not modified. |
329 | | * |
330 | | * Performance hint: if you are computing the intersection between several |
331 | | * bitmaps, two-by-two, it is best to start with the smallest bitmap. |
332 | | */ |
333 | 3.23k | Roaring &operator&=(const Roaring &r) noexcept { |
334 | 3.23k | api::roaring_bitmap_and_inplace(&roaring, &r.roaring); |
335 | 3.23k | return *this; |
336 | 3.23k | } |
337 | | |
338 | | /** |
339 | | * Compute the difference between the current bitmap and the provided |
340 | | * bitmap, writing the result in the current bitmap. The provided bitmap |
341 | | * is not modified. |
342 | | */ |
343 | 3.23k | Roaring &operator-=(const Roaring &r) noexcept { |
344 | 3.23k | api::roaring_bitmap_andnot_inplace(&roaring, &r.roaring); |
345 | 3.23k | return *this; |
346 | 3.23k | } |
347 | | |
348 | | /** |
349 | | * Compute the union between the current bitmap and the provided bitmap, |
350 | | * writing the result in the current bitmap. The provided bitmap is not |
351 | | * modified. |
352 | | * |
353 | | * See also the fastunion function to aggregate many bitmaps more quickly. |
354 | | */ |
355 | 3.23k | Roaring &operator|=(const Roaring &r) noexcept { |
356 | 3.23k | api::roaring_bitmap_or_inplace(&roaring, &r.roaring); |
357 | 3.23k | return *this; |
358 | 3.23k | } |
359 | | |
360 | | /** |
361 | | * Compute the symmetric union between the current bitmap and the provided |
362 | | * bitmap, writing the result in the current bitmap. The provided bitmap |
363 | | * is not modified. |
364 | | */ |
365 | 3.23k | Roaring &operator^=(const Roaring &r) noexcept { |
366 | 3.23k | api::roaring_bitmap_xor_inplace(&roaring, &r.roaring); |
367 | 3.23k | return *this; |
368 | 3.23k | } |
369 | | |
370 | | /** |
371 | | * Exchange the content of this bitmap with another. |
372 | | */ |
373 | 0 | void swap(Roaring &r) noexcept { std::swap(r.roaring, roaring); } |
374 | | |
375 | | /** |
376 | | * Get the cardinality of the bitmap (number of elements). |
377 | | */ |
378 | 3.23k | uint64_t cardinality() const noexcept { |
379 | 3.23k | return api::roaring_bitmap_get_cardinality(&roaring); |
380 | 3.23k | } |
381 | | |
382 | | /** |
383 | | * Returns true if the bitmap is empty (cardinality is zero). |
384 | | */ |
385 | 6.47k | bool isEmpty() const noexcept { |
386 | 6.47k | return api::roaring_bitmap_is_empty(&roaring); |
387 | 6.47k | } |
388 | | |
389 | | /** |
390 | | * Returns true if the bitmap is full (cardinality is uint32_t max + 1). |
391 | | * we put std::numeric_limits<>::max/min in parentheses |
392 | | * to avoid a clash with the Windows.h header under Windows. |
393 | | */ |
394 | 0 | bool isFull() const noexcept { |
395 | 0 | return api::roaring_bitmap_get_cardinality(&roaring) == |
396 | 0 | ((uint64_t)(std::numeric_limits<uint32_t>::max)()) + 1; |
397 | 0 | } |
398 | | |
399 | | /** |
400 | | * Returns true if the bitmap is subset of the other. |
401 | | */ |
402 | 3.23k | bool isSubset(const Roaring &r) const noexcept { |
403 | 3.23k | return api::roaring_bitmap_is_subset(&roaring, &r.roaring); |
404 | 3.23k | } |
405 | | |
406 | | /** |
407 | | * Returns true if the bitmap is strict subset of the other. |
408 | | */ |
409 | 3.23k | bool isStrictSubset(const Roaring &r) const noexcept { |
410 | 3.23k | return api::roaring_bitmap_is_strict_subset(&roaring, &r.roaring); |
411 | 3.23k | } |
412 | | |
413 | | /** |
414 | | * Convert the bitmap to an array. Write the output to "ans", caller is |
415 | | * responsible to ensure that there is enough memory allocated |
416 | | * (e.g., ans = new uint32[mybitmap.cardinality()];) |
417 | | */ |
418 | 3.23k | void toUint32Array(uint32_t *ans) const noexcept { |
419 | 3.23k | api::roaring_bitmap_to_uint32_array(&roaring, ans); |
420 | 3.23k | } |
421 | | /** |
422 | | * To int array with pagination |
423 | | */ |
424 | | void rangeUint32Array(uint32_t *ans, size_t offset, |
425 | 0 | size_t limit) const noexcept { |
426 | 0 | api::roaring_bitmap_range_uint32_array(&roaring, offset, limit, ans); |
427 | 0 | } |
428 | | |
429 | | /** |
430 | | * Return true if the two bitmaps contain the same elements. |
431 | | */ |
432 | 6.47k | bool operator==(const Roaring &r) const noexcept { |
433 | 6.47k | return api::roaring_bitmap_equals(&roaring, &r.roaring); |
434 | 6.47k | } |
435 | | |
436 | | /** |
437 | | * Compute the negation of the roaring bitmap within the half-open interval |
438 | | * [range_start, range_end). Areas outside the interval are unchanged. |
439 | | */ |
440 | 3.23k | void flip(uint64_t range_start, uint64_t range_end) noexcept { |
441 | 3.23k | api::roaring_bitmap_flip_inplace(&roaring, range_start, range_end); |
442 | 3.23k | } |
443 | | |
444 | | /** |
445 | | * Compute the negation of the roaring bitmap within the closed interval |
446 | | * [range_start, range_end]. Areas outside the interval are unchanged. |
447 | | */ |
448 | 3.23k | void flipClosed(uint32_t range_start, uint32_t range_end) noexcept { |
449 | 3.23k | api::roaring_bitmap_flip_inplace_closed(&roaring, range_start, |
450 | 3.23k | range_end); |
451 | 3.23k | } |
452 | | |
453 | | /** |
454 | | * Remove run-length encoding even when it is more space efficient. |
455 | | * Return whether a change was applied. |
456 | | */ |
457 | 3.23k | bool removeRunCompression() noexcept { |
458 | 3.23k | return api::roaring_bitmap_remove_run_compression(&roaring); |
459 | 3.23k | } |
460 | | |
461 | | /** |
462 | | * Convert array and bitmap containers to run containers when it is more |
463 | | * efficient; also convert from run containers when more space efficient. |
464 | | * Returns true if the result has at least one run container. Additional |
465 | | * savings might be possible by calling shrinkToFit(). |
466 | | */ |
467 | 6.47k | bool runOptimize() noexcept { |
468 | 6.47k | return api::roaring_bitmap_run_optimize(&roaring); |
469 | 6.47k | } |
470 | | |
471 | | /** |
472 | | * If needed, reallocate memory to shrink the memory usage. Returns |
473 | | * the number of bytes saved. |
474 | | */ |
475 | 3.23k | size_t shrinkToFit() noexcept { |
476 | 3.23k | return api::roaring_bitmap_shrink_to_fit(&roaring); |
477 | 3.23k | } |
478 | | |
479 | | /** |
480 | | * Iterate over the bitmap elements. The function iterator is called once |
481 | | * for all the values with ptr (can be NULL) as the second parameter of |
482 | | * each call. |
483 | | * |
484 | | * roaring_iterator is simply a pointer to a function that returns bool |
485 | | * (true means that the iteration should continue while false means that it |
486 | | * should stop), and takes (uint32_t,void*) as inputs. |
487 | | */ |
488 | 3.22k | void iterate(api::roaring_iterator iterator, void *ptr) const { |
489 | 3.22k | api::roaring_iterate(&roaring, iterator, ptr); |
490 | 3.22k | } |
491 | | |
492 | | /** |
493 | | * Selects the value at index rnk in the bitmap, where the smallest value |
494 | | * is at index 0. |
495 | | * |
496 | | * If the size of the roaring bitmap is strictly greater than rank, then |
497 | | * this function returns true and sets element to the element of given rank. |
498 | | * Otherwise, it returns false. |
499 | | */ |
500 | 3.23k | bool select(uint32_t rnk, uint32_t *element) const noexcept { |
501 | 3.23k | return api::roaring_bitmap_select(&roaring, rnk, element); |
502 | 3.23k | } |
503 | | |
504 | | /** |
505 | | * Computes the size of the intersection between two bitmaps. |
506 | | */ |
507 | 0 | uint64_t and_cardinality(const Roaring &r) const noexcept { |
508 | 0 | return api::roaring_bitmap_and_cardinality(&roaring, &r.roaring); |
509 | 0 | } |
510 | | |
511 | | /** |
512 | | * Check whether the two bitmaps intersect. |
513 | | */ |
514 | 3.23k | bool intersect(const Roaring &r) const noexcept { |
515 | 3.23k | return api::roaring_bitmap_intersect(&roaring, &r.roaring); |
516 | 3.23k | } |
517 | | |
518 | | /** |
519 | | * Computes the Jaccard index between two bitmaps. (Also known as the |
520 | | * Tanimoto distance, |
521 | | * or the Jaccard similarity coefficient) |
522 | | * |
523 | | * The Jaccard index is undefined if both bitmaps are empty. |
524 | | */ |
525 | 3.23k | double jaccard_index(const Roaring &r) const noexcept { |
526 | 3.23k | return api::roaring_bitmap_jaccard_index(&roaring, &r.roaring); |
527 | 3.23k | } |
528 | | |
529 | | /** |
530 | | * Computes the size of the union between two bitmaps. |
531 | | */ |
532 | 3.23k | uint64_t or_cardinality(const Roaring &r) const noexcept { |
533 | 3.23k | return api::roaring_bitmap_or_cardinality(&roaring, &r.roaring); |
534 | 3.23k | } |
535 | | |
536 | | /** |
537 | | * Computes the size of the difference (andnot) between two bitmaps. |
538 | | */ |
539 | 3.23k | uint64_t andnot_cardinality(const Roaring &r) const noexcept { |
540 | 3.23k | return api::roaring_bitmap_andnot_cardinality(&roaring, &r.roaring); |
541 | 3.23k | } |
542 | | |
543 | | /** |
544 | | * Computes the size of the symmetric difference (andnot) between two |
545 | | * bitmaps. |
546 | | */ |
547 | 3.23k | uint64_t xor_cardinality(const Roaring &r) const noexcept { |
548 | 3.23k | return api::roaring_bitmap_xor_cardinality(&roaring, &r.roaring); |
549 | 3.23k | } |
550 | | |
551 | | /** |
552 | | * Returns the number of integers that are smaller or equal to x. |
553 | | * Thus the rank of the smallest element is one. If |
554 | | * x is smaller than the smallest element, this function will return 0. |
555 | | * The rank and select functions differ in convention: this function returns |
556 | | * 1 when ranking the smallest value, but the select function returns the |
557 | | * smallest value when using index 0. |
558 | | */ |
559 | 3.23k | uint64_t rank(uint32_t x) const noexcept { |
560 | 3.23k | return api::roaring_bitmap_rank(&roaring, x); |
561 | 3.23k | } |
562 | | |
563 | | /** |
564 | | * Get `rank()` values in bulk. The values in `[begin .. end)` must be in |
565 | | * Ascending order. possible implementation: for(auto* iter = begin; iter != |
566 | | * end; ++iter) *(ans++) = rank(*iter); |
567 | | */ |
568 | | void rank_many(const uint32_t *begin, const uint32_t *end, |
569 | 0 | uint64_t *ans) const noexcept { |
570 | 0 | return api::roaring_bitmap_rank_many(&roaring, begin, end, ans); |
571 | 0 | } |
572 | | |
573 | | /** |
574 | | * Returns the index of x in the set, index start from 0. |
575 | | * If the set doesn't contain x , this function will return -1. |
576 | | * The difference with rank function is that this function will return -1 |
577 | | * when x isn't in the set, but the rank function will return a |
578 | | * non-negative number. |
579 | | */ |
580 | 0 | int64_t getIndex(uint32_t x) const noexcept { |
581 | 0 | return api::roaring_bitmap_get_index(&roaring, x); |
582 | 0 | } |
583 | | |
584 | | /** |
585 | | * Write a bitmap to a char buffer. This is meant to be compatible with |
586 | | * the Java and Go versions. Returns how many bytes were written which |
587 | | * should be getSizeInBytes(). |
588 | | * |
589 | | * Setting the portable flag to false enable a custom format that |
590 | | * can save space compared to the portable format (e.g., for very |
591 | | * sparse bitmaps). |
592 | | * |
593 | | * Boost users can serialize bitmaps in this manner: |
594 | | * |
595 | | * BOOST_SERIALIZATION_SPLIT_FREE(Roaring) |
596 | | * namespace boost { |
597 | | * namespace serialization { |
598 | | * |
599 | | * template <class Archive> |
600 | | * void save(Archive& ar, const Roaring& bitmask, |
601 | | * const unsigned int version) { |
602 | | * std::size_t expected_size_in_bytes = bitmask.getSizeInBytes(); |
603 | | * std::vector<char> buffer(expected_size_in_bytes); |
604 | | * std::size_t size_in_bytes = bitmask.write(buffer.data()); |
605 | | * |
606 | | * ar& size_in_bytes; |
607 | | * ar& boost::serialization::make_binary_object(buffer.data(), |
608 | | * size_in_bytes); |
609 | | * } |
610 | | * template <class Archive> |
611 | | * void load(Archive& ar, Roaring& bitmask, |
612 | | * const unsigned int version) { |
613 | | * std::size_t size_in_bytes = 0; |
614 | | * ar& size_in_bytes; |
615 | | * std::vector<char> buffer(size_in_bytes); |
616 | | * ar& boost::serialization::make_binary_object(buffer.data(), |
617 | | * size_in_bytes); |
618 | | * bitmask = Roaring::readSafe(buffer.data(), size_in_bytes); |
619 | | * } |
620 | | * } // namespace serialization |
621 | | * } // namespace boost |
622 | | */ |
623 | 3.23k | size_t write(char *buf, bool portable = true) const noexcept { |
624 | 3.23k | if (portable) { |
625 | 3.23k | return api::roaring_bitmap_portable_serialize(&roaring, buf); |
626 | 3.23k | } else { |
627 | 0 | return api::roaring_bitmap_serialize(&roaring, buf); |
628 | 0 | } |
629 | 3.23k | } |
630 | | |
631 | | /** |
632 | | * Read a bitmap from a serialized version. This is meant to be compatible |
633 | | * with the Java and Go versions. |
634 | | * |
635 | | * Setting the portable flag to false enable a custom format that |
636 | | * can save space compared to the portable format (e.g., for very |
637 | | * sparse bitmaps). |
638 | | * |
639 | | * This function is unsafe in the sense that if you provide bad data, |
640 | | * many, many bytes could be read. See also readSafe. |
641 | | * |
642 | | * The function may throw std::runtime_error if a bitmap could not be read. |
643 | | * Not that even if it does not throw, the bitmap could still be unusable if |
644 | | * the loaded data does not match the portable Roaring specification: you |
645 | | * should ensure that the data you load come from a serialized bitmap. |
646 | | */ |
647 | 0 | static Roaring read(const char *buf, bool portable = true) { |
648 | 0 | roaring_bitmap_t *r = |
649 | 0 | portable ? api::roaring_bitmap_portable_deserialize(buf) |
650 | 0 | : api::roaring_bitmap_deserialize(buf); |
651 | 0 | if (r == NULL) { |
652 | 0 | ROARING_TERMINATE("failed alloc while reading"); |
653 | 0 | } |
654 | 0 | return Roaring(r); |
655 | 0 | } |
656 | | |
657 | | /** |
658 | | * Read a bitmap from a serialized version, reading no more than maxbytes |
659 | | * bytes. This is meant to be compatible with the Java and Go versions. |
660 | | * The function itself is safe in the sense that it will not cause buffer |
661 | | * overflows. However, for correct operations, it is assumed that the bitmap |
662 | | * read was once serialized from a valid bitmap. If you provided an |
663 | | * incorrect input (garbage), then the bitmap read may not be in a valid |
664 | | * state and following operations may not lead to sensible results. It is |
665 | | * your responsability to ensure that the input bytes follow the format |
666 | | * specification if you want a usable bitmap: |
667 | | * https://github.com/RoaringBitmap/RoaringFormatSpec |
668 | | * In particular, the serialized array containers need to be in sorted |
669 | | * order, and the run containers should be in sorted non-overlapping order. |
670 | | * This is is guaranteed to happen when serializing an existing bitmap, but |
671 | | * not for random inputs. Note that this function assumes that your bitmap |
672 | | * was serialized in *portable* mode (which is the default with the 'write' |
673 | | * method). |
674 | | * |
675 | | * The function may throw std::runtime_error if a bitmap could not be read. |
676 | | * Not that even if it does not throw, the bitmap could still be unusable if |
677 | | * the loaded data does not match the portable Roaring specification: you |
678 | | * should ensure that the data you load come from a serialized bitmap. |
679 | | */ |
680 | 6.47k | static Roaring readSafe(const char *buf, size_t maxbytes) { |
681 | 6.47k | roaring_bitmap_t *r = |
682 | 6.47k | api::roaring_bitmap_portable_deserialize_safe(buf, maxbytes); |
683 | 6.47k | if (r == NULL) { |
684 | 3.22k | ROARING_TERMINATE("failed alloc while reading"); |
685 | 3.22k | } |
686 | 3.24k | return Roaring(r); |
687 | 6.47k | } |
688 | | |
689 | | /** |
690 | | * How many bytes are required to serialize this bitmap (meant to be |
691 | | * compatible with Java and Go versions) |
692 | | * |
693 | | * Setting the portable flag to false enable a custom format that |
694 | | * can save space compared to the portable format (e.g., for very |
695 | | * sparse bitmaps). |
696 | | */ |
697 | 6.47k | size_t getSizeInBytes(bool portable = true) const noexcept { |
698 | 6.47k | if (portable) { |
699 | 6.47k | return api::roaring_bitmap_portable_size_in_bytes(&roaring); |
700 | 6.47k | } else { |
701 | 0 | return api::roaring_bitmap_size_in_bytes(&roaring); |
702 | 0 | } |
703 | 6.47k | } |
704 | | |
705 | | /** |
706 | | * For advanced users. |
707 | | * This function may throw std::runtime_error. |
708 | | */ |
709 | 0 | static const Roaring frozenView(const char *buf, size_t length) { |
710 | 0 | const roaring_bitmap_t *s = |
711 | 0 | api::roaring_bitmap_frozen_view(buf, length); |
712 | 0 | if (s == NULL) { |
713 | 0 | ROARING_TERMINATE("failed to read frozen bitmap"); |
714 | 0 | } |
715 | 0 | Roaring r; |
716 | 0 | r.roaring = *s; |
717 | 0 | return r; |
718 | 0 | } |
719 | | |
720 | | /** |
721 | | * For advanced users; see roaring_bitmap_portable_deserialize_frozen. |
722 | | * This function may throw std::runtime_error. |
723 | | */ |
724 | 0 | static const Roaring portableDeserializeFrozen(const char *buf) { |
725 | 0 | const roaring_bitmap_t *s = |
726 | 0 | api::roaring_bitmap_portable_deserialize_frozen(buf); |
727 | 0 | if (s == NULL) { |
728 | 0 | ROARING_TERMINATE("failed to read portable frozen bitmap"); |
729 | 0 | } |
730 | 0 | Roaring r; |
731 | 0 | r.roaring = *s; |
732 | 0 | return r; |
733 | 0 | } |
734 | | |
735 | | /** |
736 | | * For advanced users. |
737 | | */ |
738 | 0 | void writeFrozen(char *buf) const noexcept { |
739 | 0 | roaring_bitmap_frozen_serialize(&roaring, buf); |
740 | 0 | } |
741 | | |
742 | | /** |
743 | | * For advanced users. |
744 | | */ |
745 | 0 | size_t getFrozenSizeInBytes() const noexcept { |
746 | 0 | return roaring_bitmap_frozen_size_in_bytes(&roaring); |
747 | 0 | } |
748 | | |
749 | | /** |
750 | | * Computes the intersection between two bitmaps and returns new bitmap. |
751 | | * The current bitmap and the provided bitmap are unchanged. |
752 | | * |
753 | | * Performance hint: if you are computing the intersection between several |
754 | | * bitmaps, two-by-two, it is best to start with the smallest bitmap. |
755 | | * Consider also using the operator &= to avoid needlessly creating |
756 | | * many temporary bitmaps. |
757 | | * This function may throw std::runtime_error. |
758 | | */ |
759 | 3.23k | Roaring operator&(const Roaring &o) const { |
760 | 3.23k | roaring_bitmap_t *r = api::roaring_bitmap_and(&roaring, &o.roaring); |
761 | 3.23k | if (r == NULL) { |
762 | 0 | ROARING_TERMINATE("failed materalization in and"); |
763 | 0 | } |
764 | 3.23k | return Roaring(r); |
765 | 3.23k | } |
766 | | |
767 | | /** |
768 | | * Computes the difference between two bitmaps and returns new bitmap. |
769 | | * The current bitmap and the provided bitmap are unchanged. |
770 | | * This function may throw std::runtime_error. |
771 | | */ |
772 | 3.23k | Roaring operator-(const Roaring &o) const { |
773 | 3.23k | roaring_bitmap_t *r = api::roaring_bitmap_andnot(&roaring, &o.roaring); |
774 | 3.23k | if (r == NULL) { |
775 | 0 | ROARING_TERMINATE("failed materalization in andnot"); |
776 | 0 | } |
777 | 3.23k | return Roaring(r); |
778 | 3.23k | } |
779 | | |
780 | | /** |
781 | | * Computes the union between two bitmaps and returns new bitmap. |
782 | | * The current bitmap and the provided bitmap are unchanged. |
783 | | * This function may throw std::runtime_error. |
784 | | */ |
785 | 3.23k | Roaring operator|(const Roaring &o) const { |
786 | 3.23k | roaring_bitmap_t *r = api::roaring_bitmap_or(&roaring, &o.roaring); |
787 | 3.23k | if (r == NULL) { |
788 | 0 | ROARING_TERMINATE("failed materalization in or"); |
789 | 0 | } |
790 | 3.23k | return Roaring(r); |
791 | 3.23k | } |
792 | | |
793 | | /** |
794 | | * Computes the symmetric union between two bitmaps and returns new bitmap. |
795 | | * The current bitmap and the provided bitmap are unchanged. |
796 | | * This function may throw std::runtime_error. |
797 | | */ |
798 | 3.23k | Roaring operator^(const Roaring &o) const { |
799 | 3.23k | roaring_bitmap_t *r = api::roaring_bitmap_xor(&roaring, &o.roaring); |
800 | 3.23k | if (r == NULL) { |
801 | 0 | ROARING_TERMINATE("failed materalization in xor"); |
802 | 0 | } |
803 | 3.23k | return Roaring(r); |
804 | 3.23k | } |
805 | | |
806 | | /** |
807 | | * Whether or not we apply copy and write. |
808 | | */ |
809 | 0 | void setCopyOnWrite(bool val) noexcept { |
810 | 0 | api::roaring_bitmap_set_copy_on_write(&roaring, val); |
811 | 0 | } |
812 | | |
813 | | /** |
814 | | * Print the content of the bitmap |
815 | | */ |
816 | 0 | void printf() const noexcept { api::roaring_bitmap_printf(&roaring); } |
817 | | |
818 | | /** |
819 | | * Print the content of the bitmap into a string |
820 | | */ |
821 | 3.23k | std::string toString() const noexcept { |
822 | 3.23k | struct iter_data { |
823 | 3.23k | std::string str{}; // The empty constructor silences warnings from |
824 | | // pedantic static analyzers. |
825 | 3.23k | char first_char = '{'; |
826 | 3.23k | } outer_iter_data; |
827 | 3.23k | if (!isEmpty()) { |
828 | 3.22k | iterate( |
829 | 1.92G | [](uint32_t value, void *inner_iter_data) -> bool { |
830 | 1.92G | ((iter_data *)inner_iter_data)->str += |
831 | 1.92G | ((iter_data *)inner_iter_data)->first_char; |
832 | 1.92G | ((iter_data *)inner_iter_data)->str += |
833 | 1.92G | std::to_string(value); |
834 | 1.92G | ((iter_data *)inner_iter_data)->first_char = ','; |
835 | 1.92G | return true; |
836 | 1.92G | }, |
837 | 3.22k | (void *)&outer_iter_data); |
838 | 3.22k | } else |
839 | 15 | outer_iter_data.str = '{'; |
840 | 3.23k | outer_iter_data.str += '}'; |
841 | 3.23k | return outer_iter_data.str; |
842 | 3.23k | } |
843 | | |
844 | | /** |
845 | | * Whether or not copy and write is active. |
846 | | */ |
847 | 0 | bool getCopyOnWrite() const noexcept { |
848 | 0 | return api::roaring_bitmap_get_copy_on_write(&roaring); |
849 | 0 | } |
850 | | |
851 | | /** |
852 | | * Computes the logical or (union) between "n" bitmaps (referenced by a |
853 | | * pointer). |
854 | | * This function may throw std::runtime_error. |
855 | | */ |
856 | 0 | static Roaring fastunion(size_t n, const Roaring **inputs) { |
857 | 0 | const roaring_bitmap_t **x = (const roaring_bitmap_t **)roaring_malloc( |
858 | 0 | n * sizeof(roaring_bitmap_t *)); |
859 | 0 | if (x == NULL) { |
860 | 0 | ROARING_TERMINATE("failed memory alloc in fastunion"); |
861 | 0 | } |
862 | 0 | for (size_t k = 0; k < n; ++k) x[k] = &inputs[k]->roaring; |
863 | 0 |
|
864 | 0 | roaring_bitmap_t *c_ans = api::roaring_bitmap_or_many(n, x); |
865 | 0 | if (c_ans == NULL) { |
866 | 0 | roaring_free(x); |
867 | 0 | ROARING_TERMINATE("failed memory alloc in fastunion"); |
868 | 0 | } |
869 | 0 | Roaring ans(c_ans); |
870 | 0 | roaring_free(x); |
871 | 0 | return ans; |
872 | 0 | } |
873 | | |
874 | | /** |
875 | | * Destructor. By contract, calling roaring_bitmap_clear() is enough to |
876 | | * release all auxiliary memory used by the structure. |
877 | | */ |
878 | 29.1k | ~Roaring() { |
879 | 29.1k | if (!(roaring.high_low_container.flags & ROARING_FLAG_FROZEN)) { |
880 | 29.1k | api::roaring_bitmap_clear(&roaring); |
881 | 29.1k | } else { |
882 | | // The roaring member variable copies the `roaring_bitmap_t` and |
883 | | // nested `roaring_array_t` structures by value and is freed in the |
884 | | // constructor, however the underlying memory arena used for the |
885 | | // container data is not freed with it. Here we derive the arena |
886 | | // pointer from the second arena allocation in |
887 | | // `roaring_bitmap_frozen_view` and free it as well. |
888 | 0 | roaring_bitmap_free( |
889 | 0 | (roaring_bitmap_t *)((char *) |
890 | 0 | roaring.high_low_container.containers - |
891 | 0 | sizeof(roaring_bitmap_t))); |
892 | 0 | } |
893 | 29.1k | } |
894 | | |
895 | | friend class RoaringSetBitBiDirectionalIterator; |
896 | | typedef RoaringSetBitBiDirectionalIterator const_iterator; |
897 | | typedef RoaringSetBitBiDirectionalIterator const_bidirectional_iterator; |
898 | | |
899 | | /** |
900 | | * Returns an iterator that can be used to access the position of the set |
901 | | * bits. The running time complexity of a full scan is proportional to the |
902 | | * number of set bits: be aware that if you have long strings of 1s, this |
903 | | * can be very inefficient. |
904 | | * |
905 | | * It can be much faster to use the toArray method if you want to retrieve |
906 | | * the set bits. |
907 | | */ |
908 | | const_iterator begin() const; |
909 | | |
910 | | /** |
911 | | * A bogus iterator that can be used together with begin() |
912 | | * for constructions such as for (auto i = b.begin(); * i!=b.end(); ++i) {} |
913 | | */ |
914 | | const_iterator &end() const; |
915 | | |
916 | | roaring_bitmap_t roaring; |
917 | | }; |
918 | | |
919 | | /** |
920 | | * Used to go through the set bits. Not optimally fast, but convenient. |
921 | | */ |
922 | | class RoaringSetBitBiDirectionalIterator final { |
923 | | public: |
924 | | typedef std::bidirectional_iterator_tag iterator_category; |
925 | | typedef uint32_t *pointer; |
926 | | typedef uint32_t &reference_type; |
927 | | typedef uint32_t value_type; |
928 | | typedef int32_t difference_type; |
929 | | typedef RoaringSetBitBiDirectionalIterator type_of_iterator; |
930 | | |
931 | | explicit RoaringSetBitBiDirectionalIterator(const Roaring &parent, |
932 | 6.47k | bool exhausted = false) { |
933 | 6.47k | if (exhausted) { |
934 | 1 | i.parent = &parent.roaring; |
935 | 1 | i.container_index = INT32_MAX; |
936 | 1 | i.has_value = false; |
937 | 1 | i.current_value = UINT32_MAX; |
938 | 6.47k | } else { |
939 | 6.47k | api::roaring_iterator_init(&parent.roaring, &i); |
940 | 6.47k | } |
941 | 6.47k | } |
942 | | |
943 | | /** |
944 | | * Provides the location of the set bit. |
945 | | */ |
946 | 158k | value_type operator*() const { return i.current_value; } |
947 | | |
948 | 0 | bool operator<(const type_of_iterator &o) const { |
949 | 0 | if (!i.has_value) return false; |
950 | 0 | if (!o.i.has_value) return true; |
951 | 0 | return i.current_value < *o; |
952 | 0 | } |
953 | | |
954 | 0 | bool operator<=(const type_of_iterator &o) const { |
955 | 0 | if (!o.i.has_value) return true; |
956 | 0 | if (!i.has_value) return false; |
957 | 0 | return i.current_value <= *o; |
958 | 0 | } |
959 | | |
960 | 0 | bool operator>(const type_of_iterator &o) const { |
961 | 0 | if (!o.i.has_value) return false; |
962 | 0 | if (!i.has_value) return true; |
963 | 0 | return i.current_value > *o; |
964 | 0 | } |
965 | | |
966 | 0 | bool operator>=(const type_of_iterator &o) const { |
967 | 0 | if (!i.has_value) return true; |
968 | 0 | if (!o.i.has_value) return false; |
969 | 0 | return i.current_value >= *o; |
970 | 0 | } |
971 | | |
972 | 0 | type_of_iterator &operator++() { // ++i, must returned inc. value |
973 | 0 | api::roaring_uint32_iterator_advance(&i); |
974 | 0 | return *this; |
975 | 0 | } |
976 | | |
977 | 155k | type_of_iterator operator++(int) { // i++, must return orig. value |
978 | 155k | RoaringSetBitBiDirectionalIterator orig(*this); |
979 | 155k | api::roaring_uint32_iterator_advance(&i); |
980 | 155k | return orig; |
981 | 155k | } |
982 | | |
983 | | /** |
984 | | * Move the iterator to the first value >= val. |
985 | | * Return true if there is such a value. |
986 | | */ |
987 | 0 | bool move_equalorlarger(value_type val) { |
988 | 0 | return api::roaring_uint32_iterator_move_equalorlarger(&i, val); |
989 | 0 | } |
990 | | |
991 | | /** DEPRECATED, use `move_equalorlarger`.*/ |
992 | 3.23k | CROARING_DEPRECATED void equalorlarger(uint32_t val) { |
993 | 3.23k | api::roaring_uint32_iterator_move_equalorlarger(&i, val); |
994 | 3.23k | } |
995 | | |
996 | 0 | type_of_iterator &operator--() { // prefix -- |
997 | 0 | api::roaring_uint32_iterator_previous(&i); |
998 | 0 | return *this; |
999 | 0 | } |
1000 | | |
1001 | 0 | type_of_iterator operator--(int) { // postfix -- |
1002 | 0 | RoaringSetBitBiDirectionalIterator orig(*this); |
1003 | 0 | api::roaring_uint32_iterator_previous(&i); |
1004 | 0 | return orig; |
1005 | 0 | } |
1006 | | |
1007 | 0 | bool operator==(const RoaringSetBitBiDirectionalIterator &o) const { |
1008 | 0 | return i.current_value == *o && i.has_value == o.i.has_value; |
1009 | 0 | } |
1010 | | |
1011 | 158k | bool operator!=(const RoaringSetBitBiDirectionalIterator &o) const { |
1012 | 158k | return i.current_value != *o || i.has_value != o.i.has_value; |
1013 | 158k | } |
1014 | | |
1015 | | api::roaring_uint32_iterator_t |
1016 | | i{}; // The empty constructor silences warnings from pedantic static |
1017 | | // analyzers. |
1018 | | }; |
1019 | | |
1020 | 6.47k | inline RoaringSetBitBiDirectionalIterator Roaring::begin() const { |
1021 | 6.47k | return RoaringSetBitBiDirectionalIterator(*this); |
1022 | 6.47k | } |
1023 | | |
1024 | 158k | inline RoaringSetBitBiDirectionalIterator &Roaring::end() const { |
1025 | 158k | static RoaringSetBitBiDirectionalIterator e(*this, true); |
1026 | 158k | return e; |
1027 | 158k | } |
1028 | | |
1029 | | } // namespace roaring |
1030 | | |
1031 | | #endif /* INCLUDE_ROARING_HH_ */ |