/src/croaring/src/containers/mixed_negation.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * mixed_negation.c |
3 | | * |
4 | | */ |
5 | | |
6 | | #include <assert.h> |
7 | | #include <string.h> |
8 | | |
9 | | #include <roaring/array_util.h> |
10 | | #include <roaring/bitset_util.h> |
11 | | #include <roaring/containers/containers.h> |
12 | | #include <roaring/containers/convert.h> |
13 | | #include <roaring/containers/mixed_negation.h> |
14 | | #include <roaring/containers/run.h> |
15 | | |
16 | | #ifdef __cplusplus |
17 | | extern "C" { namespace roaring { namespace internal { |
18 | | #endif |
19 | | |
20 | | // TODO: make simplified and optimized negation code across |
21 | | // the full range. |
22 | | |
23 | | /* Negation across the entire range of the container. |
24 | | * Compute the negation of src and write the result |
25 | | * to *dst. The complement of a |
26 | | * sufficiently sparse set will always be dense and a hence a bitmap |
27 | | ' * We assume that dst is pre-allocated and a valid bitset container |
28 | | * There can be no in-place version. |
29 | | */ |
30 | | void array_container_negation(const array_container_t *src, |
31 | 0 | bitset_container_t *dst) { |
32 | 0 | uint64_t card = UINT64_C(1 << 16); |
33 | 0 | bitset_container_set_all(dst); |
34 | |
|
35 | 0 | if (src->cardinality == 0) { |
36 | 0 | return; |
37 | 0 | } |
38 | | |
39 | 0 | dst->cardinality = (int32_t)bitset_clear_list(dst->words, card, src->array, |
40 | 0 | (uint64_t)src->cardinality); |
41 | 0 | } |
42 | | |
43 | | /* Negation across the entire range of the container |
44 | | * Compute the negation of src and write the result |
45 | | * to *dst. A true return value indicates a bitset result, |
46 | | * otherwise the result is an array container. |
47 | | * We assume that dst is not pre-allocated. In |
48 | | * case of failure, *dst will be NULL. |
49 | | */ |
50 | | bool bitset_container_negation( |
51 | | const bitset_container_t *src, container_t **dst |
52 | 0 | ){ |
53 | 0 | return bitset_container_negation_range(src, 0, (1 << 16), dst); |
54 | 0 | } |
55 | | |
56 | | /* inplace version */ |
57 | | /* |
58 | | * Same as bitset_container_negation except that if the output is to |
59 | | * be a |
60 | | * bitset_container_t, then src is modified and no allocation is made. |
61 | | * If the output is to be an array_container_t, then caller is responsible |
62 | | * to free the container. |
63 | | * In all cases, the result is in *dst. |
64 | | */ |
65 | | bool bitset_container_negation_inplace( |
66 | | bitset_container_t *src, container_t **dst |
67 | 0 | ){ |
68 | 0 | return bitset_container_negation_range_inplace(src, 0, (1 << 16), dst); |
69 | 0 | } |
70 | | |
71 | | /* Negation across the entire range of container |
72 | | * Compute the negation of src and write the result |
73 | | * to *dst. Return values are the *_TYPECODES as defined * in containers.h |
74 | | * We assume that dst is not pre-allocated. In |
75 | | * case of failure, *dst will be NULL. |
76 | | */ |
77 | 0 | int run_container_negation(const run_container_t *src, container_t **dst) { |
78 | 0 | return run_container_negation_range(src, 0, (1 << 16), dst); |
79 | 0 | } |
80 | | |
81 | | /* |
82 | | * Same as run_container_negation except that if the output is to |
83 | | * be a |
84 | | * run_container_t, and has the capacity to hold the result, |
85 | | * then src is modified and no allocation is made. |
86 | | * In all cases, the result is in *dst. |
87 | | */ |
88 | 0 | int run_container_negation_inplace(run_container_t *src, container_t **dst) { |
89 | 0 | return run_container_negation_range_inplace(src, 0, (1 << 16), dst); |
90 | 0 | } |
91 | | |
92 | | /* Negation across a range of the container. |
93 | | * Compute the negation of src and write the result |
94 | | * to *dst. Returns true if the result is a bitset container |
95 | | * and false for an array container. *dst is not preallocated. |
96 | | */ |
97 | | bool array_container_negation_range( |
98 | | const array_container_t *src, |
99 | | const int range_start, const int range_end, |
100 | | container_t **dst |
101 | 0 | ){ |
102 | | /* close port of the Java implementation */ |
103 | 0 | if (range_start >= range_end) { |
104 | 0 | *dst = array_container_clone(src); |
105 | 0 | return false; |
106 | 0 | } |
107 | | |
108 | 0 | int32_t start_index = |
109 | 0 | binarySearch(src->array, src->cardinality, (uint16_t)range_start); |
110 | 0 | if (start_index < 0) start_index = -start_index - 1; |
111 | |
|
112 | 0 | int32_t last_index = |
113 | 0 | binarySearch(src->array, src->cardinality, (uint16_t)(range_end - 1)); |
114 | 0 | if (last_index < 0) last_index = -last_index - 2; |
115 | |
|
116 | 0 | const int32_t current_values_in_range = last_index - start_index + 1; |
117 | 0 | const int32_t span_to_be_flipped = range_end - range_start; |
118 | 0 | const int32_t new_values_in_range = |
119 | 0 | span_to_be_flipped - current_values_in_range; |
120 | 0 | const int32_t cardinality_change = |
121 | 0 | new_values_in_range - current_values_in_range; |
122 | 0 | const int32_t new_cardinality = src->cardinality + cardinality_change; |
123 | |
|
124 | 0 | if (new_cardinality > DEFAULT_MAX_SIZE) { |
125 | 0 | bitset_container_t *temp = bitset_container_from_array(src); |
126 | 0 | bitset_flip_range(temp->words, (uint32_t)range_start, |
127 | 0 | (uint32_t)range_end); |
128 | 0 | temp->cardinality = new_cardinality; |
129 | 0 | *dst = temp; |
130 | 0 | return true; |
131 | 0 | } |
132 | | |
133 | 0 | array_container_t *arr = |
134 | 0 | array_container_create_given_capacity(new_cardinality); |
135 | 0 | *dst = (container_t *)arr; |
136 | 0 | if(new_cardinality == 0) { |
137 | 0 | arr->cardinality = new_cardinality; |
138 | 0 | return false; // we are done. |
139 | 0 | } |
140 | | // copy stuff before the active area |
141 | 0 | memcpy(arr->array, src->array, start_index * sizeof(uint16_t)); |
142 | | |
143 | | // work on the range |
144 | 0 | int32_t out_pos = start_index, in_pos = start_index; |
145 | 0 | int32_t val_in_range = range_start; |
146 | 0 | for (; val_in_range < range_end && in_pos <= last_index; ++val_in_range) { |
147 | 0 | if ((uint16_t)val_in_range != src->array[in_pos]) { |
148 | 0 | arr->array[out_pos++] = (uint16_t)val_in_range; |
149 | 0 | } else { |
150 | 0 | ++in_pos; |
151 | 0 | } |
152 | 0 | } |
153 | 0 | for (; val_in_range < range_end; ++val_in_range) |
154 | 0 | arr->array[out_pos++] = (uint16_t)val_in_range; |
155 | | |
156 | | // content after the active range |
157 | 0 | memcpy(arr->array + out_pos, src->array + (last_index + 1), |
158 | 0 | (src->cardinality - (last_index + 1)) * sizeof(uint16_t)); |
159 | 0 | arr->cardinality = new_cardinality; |
160 | 0 | return false; |
161 | 0 | } |
162 | | |
163 | | /* Even when the result would fit, it is unclear how to make an |
164 | | * inplace version without inefficient copying. |
165 | | */ |
166 | | |
167 | | bool array_container_negation_range_inplace( |
168 | | array_container_t *src, |
169 | | const int range_start, const int range_end, |
170 | | container_t **dst |
171 | 0 | ){ |
172 | 0 | bool ans = array_container_negation_range(src, range_start, range_end, dst); |
173 | | // TODO : try a real inplace version |
174 | 0 | array_container_free(src); |
175 | 0 | return ans; |
176 | 0 | } |
177 | | |
178 | | /* Negation across a range of the container |
179 | | * Compute the negation of src and write the result |
180 | | * to *dst. A true return value indicates a bitset result, |
181 | | * otherwise the result is an array container. |
182 | | * We assume that dst is not pre-allocated. In |
183 | | * case of failure, *dst will be NULL. |
184 | | */ |
185 | | bool bitset_container_negation_range( |
186 | | const bitset_container_t *src, |
187 | | const int range_start, const int range_end, |
188 | | container_t **dst |
189 | 0 | ){ |
190 | | // TODO maybe consider density-based estimate |
191 | | // and sometimes build result directly as array, with |
192 | | // conversion back to bitset if wrong. Or determine |
193 | | // actual result cardinality, then go directly for the known final cont. |
194 | | |
195 | | // keep computation using bitsets as long as possible. |
196 | 0 | bitset_container_t *t = bitset_container_clone(src); |
197 | 0 | bitset_flip_range(t->words, (uint32_t)range_start, (uint32_t)range_end); |
198 | 0 | t->cardinality = bitset_container_compute_cardinality(t); |
199 | |
|
200 | 0 | if (t->cardinality > DEFAULT_MAX_SIZE) { |
201 | 0 | *dst = t; |
202 | 0 | return true; |
203 | 0 | } else { |
204 | 0 | *dst = array_container_from_bitset(t); |
205 | 0 | bitset_container_free(t); |
206 | 0 | return false; |
207 | 0 | } |
208 | 0 | } |
209 | | |
210 | | /* inplace version */ |
211 | | /* |
212 | | * Same as bitset_container_negation except that if the output is to |
213 | | * be a |
214 | | * bitset_container_t, then src is modified and no allocation is made. |
215 | | * If the output is to be an array_container_t, then caller is responsible |
216 | | * to free the container. |
217 | | * In all cases, the result is in *dst. |
218 | | */ |
219 | | bool bitset_container_negation_range_inplace( |
220 | | bitset_container_t *src, |
221 | | const int range_start, const int range_end, |
222 | | container_t **dst |
223 | 0 | ){ |
224 | 0 | bitset_flip_range(src->words, (uint32_t)range_start, (uint32_t)range_end); |
225 | 0 | src->cardinality = bitset_container_compute_cardinality(src); |
226 | 0 | if (src->cardinality > DEFAULT_MAX_SIZE) { |
227 | 0 | *dst = src; |
228 | 0 | return true; |
229 | 0 | } |
230 | 0 | *dst = array_container_from_bitset(src); |
231 | 0 | bitset_container_free(src); |
232 | 0 | return false; |
233 | 0 | } |
234 | | |
235 | | /* Negation across a range of container |
236 | | * Compute the negation of src and write the result |
237 | | * to *dst. Return values are the *_TYPECODES as defined * in containers.h |
238 | | * We assume that dst is not pre-allocated. In |
239 | | * case of failure, *dst will be NULL. |
240 | | */ |
241 | | int run_container_negation_range( |
242 | | const run_container_t *src, |
243 | | const int range_start, const int range_end, |
244 | | container_t **dst |
245 | 0 | ){ |
246 | 0 | uint8_t return_typecode; |
247 | | |
248 | | // follows the Java implementation |
249 | 0 | if (range_end <= range_start) { |
250 | 0 | *dst = run_container_clone(src); |
251 | 0 | return RUN_CONTAINER_TYPE; |
252 | 0 | } |
253 | | |
254 | 0 | run_container_t *ans = run_container_create_given_capacity( |
255 | 0 | src->n_runs + 1); // src->n_runs + 1); |
256 | 0 | int k = 0; |
257 | 0 | for (; k < src->n_runs && src->runs[k].value < range_start; ++k) { |
258 | 0 | ans->runs[k] = src->runs[k]; |
259 | 0 | ans->n_runs++; |
260 | 0 | } |
261 | |
|
262 | 0 | run_container_smart_append_exclusive( |
263 | 0 | ans, (uint16_t)range_start, (uint16_t)(range_end - range_start - 1)); |
264 | |
|
265 | 0 | for (; k < src->n_runs; ++k) { |
266 | 0 | run_container_smart_append_exclusive(ans, src->runs[k].value, |
267 | 0 | src->runs[k].length); |
268 | 0 | } |
269 | |
|
270 | 0 | *dst = convert_run_to_efficient_container(ans, &return_typecode); |
271 | 0 | if (return_typecode != RUN_CONTAINER_TYPE) run_container_free(ans); |
272 | |
|
273 | 0 | return return_typecode; |
274 | 0 | } |
275 | | |
276 | | /* |
277 | | * Same as run_container_negation except that if the output is to |
278 | | * be a |
279 | | * run_container_t, and has the capacity to hold the result, |
280 | | * then src is modified and no allocation is made. |
281 | | * In all cases, the result is in *dst. |
282 | | */ |
283 | | int run_container_negation_range_inplace( |
284 | | run_container_t *src, |
285 | | const int range_start, const int range_end, |
286 | | container_t **dst |
287 | 0 | ){ |
288 | 0 | uint8_t return_typecode; |
289 | |
|
290 | 0 | if (range_end <= range_start) { |
291 | 0 | *dst = src; |
292 | 0 | return RUN_CONTAINER_TYPE; |
293 | 0 | } |
294 | | |
295 | | // TODO: efficient special case when range is 0 to 65535 inclusive |
296 | | |
297 | 0 | if (src->capacity == src->n_runs) { |
298 | | // no excess room. More checking to see if result can fit |
299 | 0 | bool last_val_before_range = false; |
300 | 0 | bool first_val_in_range = false; |
301 | 0 | bool last_val_in_range = false; |
302 | 0 | bool first_val_past_range = false; |
303 | |
|
304 | 0 | if (range_start > 0) |
305 | 0 | last_val_before_range = |
306 | 0 | run_container_contains(src, (uint16_t)(range_start - 1)); |
307 | 0 | first_val_in_range = run_container_contains(src, (uint16_t)range_start); |
308 | |
|
309 | 0 | if (last_val_before_range == first_val_in_range) { |
310 | 0 | last_val_in_range = |
311 | 0 | run_container_contains(src, (uint16_t)(range_end - 1)); |
312 | 0 | if (range_end != 0x10000) |
313 | 0 | first_val_past_range = |
314 | 0 | run_container_contains(src, (uint16_t)range_end); |
315 | |
|
316 | 0 | if (last_val_in_range == |
317 | 0 | first_val_past_range) { // no space for inplace |
318 | 0 | int ans = run_container_negation_range(src, range_start, |
319 | 0 | range_end, dst); |
320 | 0 | run_container_free(src); |
321 | 0 | return ans; |
322 | 0 | } |
323 | 0 | } |
324 | 0 | } |
325 | | // all other cases: result will fit |
326 | | |
327 | 0 | run_container_t *ans = src; |
328 | 0 | int my_nbr_runs = src->n_runs; |
329 | |
|
330 | 0 | ans->n_runs = 0; |
331 | 0 | int k = 0; |
332 | 0 | for (; (k < my_nbr_runs) && (src->runs[k].value < range_start); ++k) { |
333 | | // ans->runs[k] = src->runs[k]; (would be self-copy) |
334 | 0 | ans->n_runs++; |
335 | 0 | } |
336 | | |
337 | | // as with Java implementation, use locals to give self a buffer of depth 1 |
338 | 0 | rle16_t buffered = MAKE_RLE16(0, 0); |
339 | 0 | rle16_t next = buffered; |
340 | 0 | if (k < my_nbr_runs) buffered = src->runs[k]; |
341 | |
|
342 | 0 | run_container_smart_append_exclusive( |
343 | 0 | ans, (uint16_t)range_start, (uint16_t)(range_end - range_start - 1)); |
344 | |
|
345 | 0 | for (; k < my_nbr_runs; ++k) { |
346 | 0 | if (k + 1 < my_nbr_runs) next = src->runs[k + 1]; |
347 | |
|
348 | 0 | run_container_smart_append_exclusive(ans, buffered.value, |
349 | 0 | buffered.length); |
350 | 0 | buffered = next; |
351 | 0 | } |
352 | |
|
353 | 0 | *dst = convert_run_to_efficient_container(ans, &return_typecode); |
354 | 0 | if (return_typecode != RUN_CONTAINER_TYPE) run_container_free(ans); |
355 | |
|
356 | 0 | return return_typecode; |
357 | 0 | } |
358 | | |
359 | | #ifdef __cplusplus |
360 | | } } } // extern "C" { namespace roaring { namespace internal { |
361 | | #endif |