/src/croaring/include/roaring/bitset/bitset.h
Line | Count | Source |
1 | | #ifndef CROARING_CBITSET_BITSET_H |
2 | | #define CROARING_CBITSET_BITSET_H |
3 | | |
4 | | // For compatibility with MSVC with the use of `restrict` |
5 | | #ifdef __cplusplus |
6 | | #define CROARING_CBITSET_RESTRICT |
7 | | #elif (__STDC_VERSION__ >= 199901L) || \ |
8 | | (defined(__GNUC__) && defined(__STDC_VERSION__)) |
9 | | #define CROARING_CBITSET_RESTRICT restrict |
10 | | #else |
11 | | #define CROARING_CBITSET_RESTRICT |
12 | | #endif |
13 | | |
14 | | #include <stdbool.h> |
15 | | #include <stdint.h> |
16 | | #include <stdio.h> |
17 | | #include <stdlib.h> |
18 | | #include <string.h> |
19 | | |
20 | | #include <roaring/portability.h> |
21 | | |
22 | | #ifdef __cplusplus |
23 | | extern "C" { |
24 | | namespace roaring { |
25 | | namespace api { |
26 | | #endif |
27 | | |
28 | | struct bitset_s { |
29 | | uint64_t *CROARING_CBITSET_RESTRICT array; |
30 | | /* For simplicity and performance, we prefer to have a size and a capacity |
31 | | * that is a multiple of 64 bits. Thus we only track the size and the |
32 | | * capacity in terms of 64-bit words allocated */ |
33 | | size_t arraysize; |
34 | | size_t capacity; |
35 | | }; |
36 | | |
37 | | typedef struct bitset_s bitset_t; |
38 | | |
39 | | /* Create a new bitset. Return NULL in case of failure. */ |
40 | | bitset_t *bitset_create(void); |
41 | | |
42 | | /* Create a new bitset able to contain size bits. Return NULL in case of |
43 | | * failure. */ |
44 | | bitset_t *bitset_create_with_capacity(size_t size); |
45 | | |
46 | | /* Free memory. */ |
47 | | void bitset_free(bitset_t *bitset); |
48 | | |
49 | | /* Set all bits to zero. */ |
50 | | void bitset_clear(bitset_t *bitset); |
51 | | |
52 | | /* Set all bits to one. */ |
53 | | void bitset_fill(bitset_t *bitset); |
54 | | |
55 | | /* Create a copy */ |
56 | | bitset_t *bitset_copy(const bitset_t *bitset); |
57 | | |
58 | | /* For advanced users: Resize the bitset so that it can support newarraysize * |
59 | | * 64 bits. Return true in case of success, false for failure. Pad with zeroes |
60 | | * new buffer areas if requested. */ |
61 | | bool bitset_resize(bitset_t *bitset, size_t newarraysize, bool padwithzeroes); |
62 | | |
63 | | /* returns how many bytes of memory the backend buffer uses */ |
64 | 0 | inline size_t bitset_size_in_bytes(const bitset_t *bitset) { |
65 | 0 | return bitset->arraysize * sizeof(uint64_t); |
66 | 0 | } |
67 | | |
68 | | /* returns how many bits can be accessed */ |
69 | 0 | inline size_t bitset_size_in_bits(const bitset_t *bitset) { |
70 | 0 | return bitset->arraysize * 64; |
71 | 0 | } |
72 | | |
73 | | /* returns how many words (64-bit) of memory the backend buffer uses */ |
74 | 0 | inline size_t bitset_size_in_words(const bitset_t *bitset) { |
75 | 0 | return bitset->arraysize; |
76 | 0 | } |
77 | | |
78 | | /* For advanced users: Grow the bitset so that it can support newarraysize * 64 |
79 | | * bits with padding. Return true in case of success, false for failure. */ |
80 | | bool bitset_grow(bitset_t *bitset, size_t newarraysize); |
81 | | |
82 | | /* attempts to recover unused memory, return false in case of |
83 | | * roaring_reallocation failure */ |
84 | | bool bitset_trim(bitset_t *bitset); |
85 | | |
86 | | /* shifts all bits by 's' positions so that the bitset representing values |
87 | | * 1,2,10 would represent values 1+s, 2+s, 10+s */ |
88 | | void bitset_shift_left(bitset_t *bitset, size_t s); |
89 | | |
90 | | /* shifts all bits by 's' positions so that the bitset representing values |
91 | | * 1,2,10 would represent values 1-s, 2-s, 10-s, negative values are deleted */ |
92 | | void bitset_shift_right(bitset_t *bitset, size_t s); |
93 | | |
94 | | /* Set the ith bit. Attempts to resize the bitset if needed (may silently fail) |
95 | | */ |
96 | 0 | inline void bitset_set(bitset_t *bitset, size_t i) { |
97 | 0 | size_t shiftedi = i / 64; |
98 | 0 | if (shiftedi >= bitset->arraysize) { |
99 | 0 | if (!bitset_grow(bitset, shiftedi + 1)) { |
100 | 0 | return; |
101 | 0 | } |
102 | 0 | } |
103 | 0 | bitset->array[shiftedi] |= ((uint64_t)1) << (i % 64); |
104 | 0 | } |
105 | | |
106 | | /* Set the ith bit to the specified value. Attempts to resize the bitset if |
107 | | * needed (may silently fail) */ |
108 | 0 | inline void bitset_set_to_value(bitset_t *bitset, size_t i, bool flag) { |
109 | 0 | size_t shiftedi = i / 64; |
110 | 0 | uint64_t mask = ((uint64_t)1) << (i % 64); |
111 | 0 | uint64_t dynmask = ((uint64_t)flag) << (i % 64); |
112 | 0 | if (shiftedi >= bitset->arraysize) { |
113 | 0 | if (!bitset_grow(bitset, shiftedi + 1)) { |
114 | 0 | return; |
115 | 0 | } |
116 | 0 | } |
117 | 0 | uint64_t w = bitset->array[shiftedi]; |
118 | 0 | w &= ~mask; |
119 | 0 | w |= dynmask; |
120 | 0 | bitset->array[shiftedi] = w; |
121 | 0 | } |
122 | | |
123 | | /* Get the value of the ith bit. */ |
124 | 0 | inline bool bitset_get(const bitset_t *bitset, size_t i) { |
125 | 0 | size_t shiftedi = i / 64; |
126 | 0 | if (shiftedi >= bitset->arraysize) { |
127 | 0 | return false; |
128 | 0 | } |
129 | 0 | return (bitset->array[shiftedi] & (((uint64_t)1) << (i % 64))) != 0; |
130 | 0 | } |
131 | | |
132 | | /* Count number of bits set. */ |
133 | | size_t bitset_count(const bitset_t *bitset); |
134 | | |
135 | | /* Returns true if no bit is set. */ |
136 | | bool bitset_empty(const bitset_t *bitset); |
137 | | |
138 | | /* Find the index of the first bit set. Or SIZE_MAX if the bitset is empty. */ |
139 | | size_t bitset_minimum(const bitset_t *bitset); |
140 | | |
141 | | /* Find the index of the last bit set. Or zero if the bitset is empty. */ |
142 | | size_t bitset_maximum(const bitset_t *bitset); |
143 | | |
144 | | /* compute the union in-place (to b1), returns true if successful, to generate a |
145 | | * new bitset first call bitset_copy */ |
146 | | bool bitset_inplace_union(bitset_t *CROARING_CBITSET_RESTRICT b1, |
147 | | const bitset_t *CROARING_CBITSET_RESTRICT b2); |
148 | | |
149 | | /* report the size of the union (without materializing it) */ |
150 | | size_t bitset_union_count(const bitset_t *CROARING_CBITSET_RESTRICT b1, |
151 | | const bitset_t *CROARING_CBITSET_RESTRICT b2); |
152 | | |
153 | | /* compute the intersection in-place (to b1), to generate a new bitset first |
154 | | * call bitset_copy */ |
155 | | void bitset_inplace_intersection(bitset_t *CROARING_CBITSET_RESTRICT b1, |
156 | | const bitset_t *CROARING_CBITSET_RESTRICT b2); |
157 | | |
158 | | /* report the size of the intersection (without materializing it) */ |
159 | | size_t bitset_intersection_count(const bitset_t *CROARING_CBITSET_RESTRICT b1, |
160 | | const bitset_t *CROARING_CBITSET_RESTRICT b2); |
161 | | |
162 | | /* returns true if the bitsets contain no common elements */ |
163 | | bool bitsets_disjoint(const bitset_t *CROARING_CBITSET_RESTRICT b1, |
164 | | const bitset_t *CROARING_CBITSET_RESTRICT b2); |
165 | | |
166 | | /* returns true if the bitsets contain any common elements */ |
167 | | bool bitsets_intersect(const bitset_t *CROARING_CBITSET_RESTRICT b1, |
168 | | const bitset_t *CROARING_CBITSET_RESTRICT b2); |
169 | | |
170 | | /* returns true if b1 contains all of the set bits of b2 */ |
171 | | bool bitset_contains_all(const bitset_t *CROARING_CBITSET_RESTRICT b1, |
172 | | const bitset_t *CROARING_CBITSET_RESTRICT b2); |
173 | | |
174 | | /* compute the difference in-place (to b1), to generate a new bitset first call |
175 | | * bitset_copy */ |
176 | | void bitset_inplace_difference(bitset_t *CROARING_CBITSET_RESTRICT b1, |
177 | | const bitset_t *CROARING_CBITSET_RESTRICT b2); |
178 | | |
179 | | /* compute the size of the difference */ |
180 | | size_t bitset_difference_count(const bitset_t *CROARING_CBITSET_RESTRICT b1, |
181 | | const bitset_t *CROARING_CBITSET_RESTRICT b2); |
182 | | |
183 | | /* compute the symmetric difference in-place (to b1), return true if successful, |
184 | | * to generate a new bitset first call bitset_copy */ |
185 | | bool bitset_inplace_symmetric_difference( |
186 | | bitset_t *CROARING_CBITSET_RESTRICT b1, |
187 | | const bitset_t *CROARING_CBITSET_RESTRICT b2); |
188 | | |
189 | | /* compute the size of the symmetric difference */ |
190 | | size_t bitset_symmetric_difference_count( |
191 | | const bitset_t *CROARING_CBITSET_RESTRICT b1, |
192 | | const bitset_t *CROARING_CBITSET_RESTRICT b2); |
193 | | |
194 | | /* iterate over the set bits |
195 | | like so : |
196 | | for(size_t i = 0; bitset_next_set_bit(b,&i) ; i++) { |
197 | | //..... |
198 | | } |
199 | | */ |
200 | 0 | inline bool bitset_next_set_bit(const bitset_t *bitset, size_t *i) { |
201 | 0 | size_t x = *i / 64; |
202 | 0 | if (x >= bitset->arraysize) { |
203 | 0 | return false; |
204 | 0 | } |
205 | 0 | uint64_t w = bitset->array[x]; |
206 | 0 | w >>= (*i & 63); |
207 | 0 | if (w != 0) { |
208 | 0 | *i += roaring_trailing_zeroes(w); |
209 | 0 | return true; |
210 | 0 | } |
211 | 0 | x++; |
212 | 0 | while (x < bitset->arraysize) { |
213 | 0 | w = bitset->array[x]; |
214 | 0 | if (w != 0) { |
215 | 0 | *i = x * 64 + roaring_trailing_zeroes(w); |
216 | 0 | return true; |
217 | 0 | } |
218 | 0 | x++; |
219 | 0 | } |
220 | 0 | return false; |
221 | 0 | } |
222 | | |
223 | | /* iterate over the set bits |
224 | | like so : |
225 | | size_t buffer[256]; |
226 | | size_t howmany = 0; |
227 | | for(size_t startfrom = 0; (howmany = bitset_next_set_bits(b,buffer,256, |
228 | | &startfrom)) > 0 ; startfrom++) { |
229 | | //..... |
230 | | } |
231 | | */ |
232 | | inline size_t bitset_next_set_bits(const bitset_t *bitset, size_t *buffer, |
233 | 0 | size_t capacity, size_t *startfrom) { |
234 | 0 | if (capacity == 0) return 0; // sanity check |
235 | 0 | size_t x = *startfrom / 64; |
236 | 0 | if (x >= bitset->arraysize) { |
237 | 0 | return 0; // nothing more to iterate over |
238 | 0 | } |
239 | 0 | uint64_t w = bitset->array[x]; |
240 | | // unset low bits inside the word less than *startfrom |
241 | 0 | w &= ~((UINT64_C(1) << (*startfrom & 63)) - 1); |
242 | 0 | size_t howmany = 0; |
243 | 0 | size_t base = x << 6; |
244 | 0 | while (howmany < capacity) { |
245 | 0 | while (w != 0) { |
246 | 0 | uint64_t t = w & (~w + 1); |
247 | 0 | int r = roaring_trailing_zeroes(w); |
248 | 0 | buffer[howmany++] = r + base; |
249 | 0 | if (howmany == capacity) goto end; |
250 | 0 | w ^= t; |
251 | 0 | } |
252 | 0 | x += 1; |
253 | 0 | if (x == bitset->arraysize) { |
254 | 0 | break; |
255 | 0 | } |
256 | 0 | base += 64; |
257 | 0 | w = bitset->array[x]; |
258 | 0 | } |
259 | 0 | end: |
260 | 0 | if (howmany > 0) { |
261 | 0 | *startfrom = buffer[howmany - 1]; |
262 | 0 | } |
263 | 0 | return howmany; |
264 | 0 | } |
265 | | |
266 | | typedef bool (*bitset_iterator)(size_t value, void *param); |
267 | | |
268 | | // return true if uninterrupted |
269 | | inline bool bitset_for_each(const bitset_t *b, bitset_iterator iterator, |
270 | 0 | void *ptr) { |
271 | 0 | size_t base = 0; |
272 | 0 | for (size_t i = 0; i < b->arraysize; ++i) { |
273 | 0 | uint64_t w = b->array[i]; |
274 | 0 | while (w != 0) { |
275 | 0 | uint64_t t = w & (~w + 1); |
276 | 0 | int r = roaring_trailing_zeroes(w); |
277 | 0 | if (!iterator(r + base, ptr)) return false; |
278 | 0 | w ^= t; |
279 | 0 | } |
280 | 0 | base += 64; |
281 | 0 | } |
282 | 0 | return true; |
283 | 0 | } |
284 | | |
285 | 0 | inline void bitset_print(const bitset_t *b) { |
286 | 0 | printf("{"); |
287 | 0 | for (size_t i = 0; bitset_next_set_bit(b, &i); i++) { |
288 | 0 | printf("%zu, ", i); |
289 | 0 | } |
290 | 0 | printf("}"); |
291 | 0 | } |
292 | | |
293 | | #ifdef __cplusplus |
294 | | } |
295 | | } |
296 | | } // extern "C" { namespace roaring { namespace api { |
297 | | #endif |
298 | | |
299 | | #endif |