/src/croaring/include/roaring/bitset/bitset.h
Line | Count | Source |
1 | | /* |
2 | | * bitset.h |
3 | | * |
4 | | * This bitset is a general-purpose dynamic bitmap storing bits in a contiguous |
5 | | * array of 64-bit words. The array field points to the word buffer, arraysize |
6 | | * records how many words are currently in use, and capacity records how many |
7 | | * words are allocated. |
8 | | * |
9 | | * Unlike the fixed 16-bit-domain container bitset, this structure can grow to |
10 | | * cover an arbitrary number of bit positions. It is useful when callers need a |
11 | | * resizable bitmap with efficient bitwise operations, scans, and shifts over a |
12 | | * larger or runtime-defined domain. |
13 | | */ |
14 | | #ifndef CROARING_CBITSET_BITSET_H |
15 | | #define CROARING_CBITSET_BITSET_H |
16 | | |
17 | | // For compatibility with MSVC with the use of `restrict` |
18 | | #ifdef __cplusplus |
19 | | #define CROARING_CBITSET_RESTRICT |
20 | | #elif (__STDC_VERSION__ >= 199901L) || \ |
21 | | (defined(__GNUC__) && defined(__STDC_VERSION__)) |
22 | | #define CROARING_CBITSET_RESTRICT restrict |
23 | | #else |
24 | | #define CROARING_CBITSET_RESTRICT |
25 | | #endif |
26 | | |
27 | | #include <stdbool.h> |
28 | | #include <stdint.h> |
29 | | #include <stdio.h> |
30 | | #include <stdlib.h> |
31 | | #include <string.h> |
32 | | |
33 | | #include <roaring/portability.h> |
34 | | |
35 | | #ifdef __cplusplus |
36 | | extern "C" { |
37 | | namespace roaring { |
38 | | namespace api { |
39 | | #endif |
40 | | |
41 | | struct bitset_s { |
42 | | uint64_t *CROARING_CBITSET_RESTRICT array; |
43 | | /* For simplicity and performance, we prefer to have a size and a capacity |
44 | | * that is a multiple of 64 bits. Thus we only track the size and the |
45 | | * capacity in terms of 64-bit words allocated */ |
46 | | size_t arraysize; |
47 | | size_t capacity; |
48 | | }; |
49 | | |
50 | | typedef struct bitset_s bitset_t; |
51 | | |
52 | | /* Create a new bitset. Return NULL in case of failure. */ |
53 | | bitset_t *bitset_create(void); |
54 | | |
55 | | /* Create a new bitset able to contain size bits. Return NULL in case of |
56 | | * failure. */ |
57 | | bitset_t *bitset_create_with_capacity(size_t size); |
58 | | |
59 | | /* Free memory. */ |
60 | | void bitset_free(bitset_t *bitset); |
61 | | |
62 | | /* Set all bits to zero. */ |
63 | | void bitset_clear(bitset_t *bitset); |
64 | | |
65 | | /* Set all bits to one. */ |
66 | | void bitset_fill(bitset_t *bitset); |
67 | | |
68 | | /* Create a copy */ |
69 | | bitset_t *bitset_copy(const bitset_t *bitset); |
70 | | |
71 | | /* For advanced users: Resize the bitset so that it can support newarraysize * |
72 | | * 64 bits. Return true in case of success, false for failure. Pad with zeroes |
73 | | * new buffer areas if requested. */ |
74 | | bool bitset_resize(bitset_t *bitset, size_t newarraysize, bool padwithzeroes); |
75 | | |
76 | | /* returns how many bytes of memory the backend buffer uses */ |
77 | 0 | inline size_t bitset_size_in_bytes(const bitset_t *bitset) { |
78 | 0 | return bitset->arraysize * sizeof(uint64_t); |
79 | 0 | } |
80 | | |
81 | | /* returns how many bits can be accessed */ |
82 | 0 | inline size_t bitset_size_in_bits(const bitset_t *bitset) { |
83 | 0 | return bitset->arraysize * 64; |
84 | 0 | } |
85 | | |
86 | | /* returns how many words (64-bit) of memory the backend buffer uses */ |
87 | 0 | inline size_t bitset_size_in_words(const bitset_t *bitset) { |
88 | 0 | return bitset->arraysize; |
89 | 0 | } |
90 | | |
91 | | /* For advanced users: Grow the bitset so that it can support newarraysize * 64 |
92 | | * bits with padding. Return true in case of success, false for failure. */ |
93 | | bool bitset_grow(bitset_t *bitset, size_t newarraysize); |
94 | | |
95 | | /* attempts to recover unused memory, return false in case of |
96 | | * roaring_reallocation failure */ |
97 | | bool bitset_trim(bitset_t *bitset); |
98 | | |
99 | | /* shifts all bits by 's' positions so that the bitset representing values |
100 | | * 1,2,10 would represent values 1+s, 2+s, 10+s */ |
101 | | void bitset_shift_left(bitset_t *bitset, size_t s); |
102 | | |
103 | | /* shifts all bits by 's' positions so that the bitset representing values |
104 | | * 1,2,10 would represent values 1-s, 2-s, 10-s, negative values are deleted */ |
105 | | void bitset_shift_right(bitset_t *bitset, size_t s); |
106 | | |
107 | | /* Set the ith bit. Attempts to resize the bitset if needed (may silently fail) |
108 | | */ |
109 | 0 | inline void bitset_set(bitset_t *bitset, size_t i) { |
110 | 0 | size_t shiftedi = i / 64; |
111 | 0 | if (shiftedi >= bitset->arraysize) { |
112 | 0 | if (!bitset_grow(bitset, shiftedi + 1)) { |
113 | 0 | return; |
114 | 0 | } |
115 | 0 | } |
116 | 0 | bitset->array[shiftedi] |= ((uint64_t)1) << (i % 64); |
117 | 0 | } |
118 | | |
119 | | /* Set the ith bit to the specified value. Attempts to resize the bitset if |
120 | | * needed (may silently fail) */ |
121 | 0 | inline void bitset_set_to_value(bitset_t *bitset, size_t i, bool flag) { |
122 | 0 | size_t shiftedi = i / 64; |
123 | 0 | uint64_t mask = ((uint64_t)1) << (i % 64); |
124 | 0 | uint64_t dynmask = ((uint64_t)flag) << (i % 64); |
125 | 0 | if (shiftedi >= bitset->arraysize) { |
126 | 0 | if (!bitset_grow(bitset, shiftedi + 1)) { |
127 | 0 | return; |
128 | 0 | } |
129 | 0 | } |
130 | 0 | uint64_t w = bitset->array[shiftedi]; |
131 | 0 | w &= ~mask; |
132 | 0 | w |= dynmask; |
133 | 0 | bitset->array[shiftedi] = w; |
134 | 0 | } |
135 | | |
136 | | /* Get the value of the ith bit. */ |
137 | 0 | inline bool bitset_get(const bitset_t *bitset, size_t i) { |
138 | 0 | size_t shiftedi = i / 64; |
139 | 0 | if (shiftedi >= bitset->arraysize) { |
140 | 0 | return false; |
141 | 0 | } |
142 | 0 | return (bitset->array[shiftedi] & (((uint64_t)1) << (i % 64))) != 0; |
143 | 0 | } |
144 | | |
145 | | /* Count number of bits set. */ |
146 | | size_t bitset_count(const bitset_t *bitset); |
147 | | |
148 | | /* Returns true if no bit is set. */ |
149 | | bool bitset_empty(const bitset_t *bitset); |
150 | | |
151 | | /* Find the index of the first bit set. Or SIZE_MAX if the bitset is empty. */ |
152 | | size_t bitset_minimum(const bitset_t *bitset); |
153 | | |
154 | | /* Find the index of the last bit set. Or zero if the bitset is empty. */ |
155 | | size_t bitset_maximum(const bitset_t *bitset); |
156 | | |
157 | | /* compute the union in-place (to b1), returns true if successful, to generate a |
158 | | * new bitset first call bitset_copy */ |
159 | | bool bitset_inplace_union(bitset_t *CROARING_CBITSET_RESTRICT b1, |
160 | | const bitset_t *CROARING_CBITSET_RESTRICT b2); |
161 | | |
162 | | /* report the size of the union (without materializing it) */ |
163 | | size_t bitset_union_count(const bitset_t *CROARING_CBITSET_RESTRICT b1, |
164 | | const bitset_t *CROARING_CBITSET_RESTRICT b2); |
165 | | |
166 | | /* compute the intersection in-place (to b1), to generate a new bitset first |
167 | | * call bitset_copy */ |
168 | | void bitset_inplace_intersection(bitset_t *CROARING_CBITSET_RESTRICT b1, |
169 | | const bitset_t *CROARING_CBITSET_RESTRICT b2); |
170 | | |
171 | | /* report the size of the intersection (without materializing it) */ |
172 | | size_t bitset_intersection_count(const bitset_t *CROARING_CBITSET_RESTRICT b1, |
173 | | const bitset_t *CROARING_CBITSET_RESTRICT b2); |
174 | | |
175 | | /* returns true if the bitsets contain no common elements */ |
176 | | bool bitsets_disjoint(const bitset_t *CROARING_CBITSET_RESTRICT b1, |
177 | | const bitset_t *CROARING_CBITSET_RESTRICT b2); |
178 | | |
179 | | /* returns true if the bitsets contain any common elements */ |
180 | | bool bitsets_intersect(const bitset_t *CROARING_CBITSET_RESTRICT b1, |
181 | | const bitset_t *CROARING_CBITSET_RESTRICT b2); |
182 | | |
183 | | /* returns true if b1 contains all of the set bits of b2 */ |
184 | | bool bitset_contains_all(const bitset_t *CROARING_CBITSET_RESTRICT b1, |
185 | | const bitset_t *CROARING_CBITSET_RESTRICT b2); |
186 | | |
187 | | /* compute the difference in-place (to b1), to generate a new bitset first call |
188 | | * bitset_copy */ |
189 | | void bitset_inplace_difference(bitset_t *CROARING_CBITSET_RESTRICT b1, |
190 | | const bitset_t *CROARING_CBITSET_RESTRICT b2); |
191 | | |
192 | | /* compute the size of the difference */ |
193 | | size_t bitset_difference_count(const bitset_t *CROARING_CBITSET_RESTRICT b1, |
194 | | const bitset_t *CROARING_CBITSET_RESTRICT b2); |
195 | | |
196 | | /* compute the symmetric difference in-place (to b1), return true if successful, |
197 | | * to generate a new bitset first call bitset_copy */ |
198 | | bool bitset_inplace_symmetric_difference( |
199 | | bitset_t *CROARING_CBITSET_RESTRICT b1, |
200 | | const bitset_t *CROARING_CBITSET_RESTRICT b2); |
201 | | |
202 | | /* compute the size of the symmetric difference */ |
203 | | size_t bitset_symmetric_difference_count( |
204 | | const bitset_t *CROARING_CBITSET_RESTRICT b1, |
205 | | const bitset_t *CROARING_CBITSET_RESTRICT b2); |
206 | | |
207 | | /* iterate over the set bits |
208 | | like so : |
209 | | for(size_t i = 0; bitset_next_set_bit(b,&i) ; i++) { |
210 | | //..... |
211 | | } |
212 | | */ |
213 | 0 | inline bool bitset_next_set_bit(const bitset_t *bitset, size_t *i) { |
214 | 0 | size_t x = *i / 64; |
215 | 0 | if (x >= bitset->arraysize) { |
216 | 0 | return false; |
217 | 0 | } |
218 | 0 | uint64_t w = bitset->array[x]; |
219 | 0 | w >>= (*i & 63); |
220 | 0 | if (w != 0) { |
221 | 0 | *i += roaring_trailing_zeroes(w); |
222 | 0 | return true; |
223 | 0 | } |
224 | 0 | x++; |
225 | 0 | while (x < bitset->arraysize) { |
226 | 0 | w = bitset->array[x]; |
227 | 0 | if (w != 0) { |
228 | 0 | *i = x * 64 + roaring_trailing_zeroes(w); |
229 | 0 | return true; |
230 | 0 | } |
231 | 0 | x++; |
232 | 0 | } |
233 | 0 | return false; |
234 | 0 | } |
235 | | |
236 | | /* iterate over the set bits |
237 | | like so : |
238 | | size_t buffer[256]; |
239 | | size_t howmany = 0; |
240 | | for(size_t startfrom = 0; (howmany = bitset_next_set_bits(b,buffer,256, |
241 | | &startfrom)) > 0 ; startfrom++) { |
242 | | //..... |
243 | | } |
244 | | */ |
245 | | inline size_t bitset_next_set_bits(const bitset_t *bitset, size_t *buffer, |
246 | 0 | size_t capacity, size_t *startfrom) { |
247 | 0 | if (capacity == 0) return 0; // sanity check |
248 | 0 | size_t x = *startfrom / 64; |
249 | 0 | if (x >= bitset->arraysize) { |
250 | 0 | return 0; // nothing more to iterate over |
251 | 0 | } |
252 | 0 | uint64_t w = bitset->array[x]; |
253 | | // unset low bits inside the word less than *startfrom |
254 | 0 | w &= ~((UINT64_C(1) << (*startfrom & 63)) - 1); |
255 | 0 | size_t howmany = 0; |
256 | 0 | size_t base = x << 6; |
257 | 0 | while (howmany < capacity) { |
258 | 0 | while (w != 0) { |
259 | 0 | uint64_t t = w & (~w + 1); |
260 | 0 | int r = roaring_trailing_zeroes(w); |
261 | 0 | buffer[howmany++] = r + base; |
262 | 0 | if (howmany == capacity) goto end; |
263 | 0 | w ^= t; |
264 | 0 | } |
265 | 0 | x += 1; |
266 | 0 | if (x == bitset->arraysize) { |
267 | 0 | break; |
268 | 0 | } |
269 | 0 | base += 64; |
270 | 0 | w = bitset->array[x]; |
271 | 0 | } |
272 | 0 | end: |
273 | 0 | if (howmany > 0) { |
274 | 0 | *startfrom = buffer[howmany - 1]; |
275 | 0 | } |
276 | 0 | return howmany; |
277 | 0 | } |
278 | | |
279 | | typedef bool (*bitset_iterator)(size_t value, void *param); |
280 | | |
281 | | // return true if uninterrupted |
282 | | inline bool bitset_for_each(const bitset_t *b, bitset_iterator iterator, |
283 | 0 | void *ptr) { |
284 | 0 | size_t base = 0; |
285 | 0 | for (size_t i = 0; i < b->arraysize; ++i) { |
286 | 0 | uint64_t w = b->array[i]; |
287 | 0 | while (w != 0) { |
288 | 0 | uint64_t t = w & (~w + 1); |
289 | 0 | int r = roaring_trailing_zeroes(w); |
290 | 0 | if (!iterator(r + base, ptr)) return false; |
291 | 0 | w ^= t; |
292 | 0 | } |
293 | 0 | base += 64; |
294 | 0 | } |
295 | 0 | return true; |
296 | 0 | } |
297 | | |
298 | 0 | inline void bitset_print(const bitset_t *b) { |
299 | 0 | printf("{"); |
300 | 0 | for (size_t i = 0; bitset_next_set_bit(b, &i); i++) { |
301 | 0 | printf("%zu, ", i); |
302 | 0 | } |
303 | 0 | printf("}"); |
304 | 0 | } |
305 | | |
306 | | #ifdef __cplusplus |
307 | | } |
308 | | } |
309 | | } // extern "C" { namespace roaring { namespace api { |
310 | | #endif |
311 | | |
312 | | #endif |