Coverage Report

Created: 2023-01-15 06:02

/src/croaring/src/containers/mixed_negation.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * mixed_negation.c
3
 *
4
 */
5
6
#include <assert.h>
7
#include <string.h>
8
9
#include <roaring/array_util.h>
10
#include <roaring/bitset_util.h>
11
#include <roaring/containers/containers.h>
12
#include <roaring/containers/convert.h>
13
#include <roaring/containers/mixed_negation.h>
14
#include <roaring/containers/run.h>
15
16
#ifdef __cplusplus
17
extern "C" { namespace roaring { namespace internal {
18
#endif
19
20
// TODO: make simplified and optimized negation code across
21
// the full range.
22
23
/* Negation across the entire range of the container.
24
 * Compute the  negation of src  and write the result
25
 * to *dst. The complement of a
26
 * sufficiently sparse set will always be dense and a hence a bitmap
27
' * We assume that dst is pre-allocated and a valid bitset container
28
 * There can be no in-place version.
29
 */
30
void array_container_negation(const array_container_t *src,
31
0
                              bitset_container_t *dst) {
32
0
    uint64_t card = UINT64_C(1 << 16);
33
0
    bitset_container_set_all(dst);
34
35
0
    if (src->cardinality == 0) {
36
0
        return;
37
0
    }
38
39
0
    dst->cardinality = (int32_t)bitset_clear_list(dst->words, card, src->array,
40
0
                                                  (uint64_t)src->cardinality);
41
0
}
42
43
/* Negation across the entire range of the container
44
 * Compute the  negation of src  and write the result
45
 * to *dst.  A true return value indicates a bitset result,
46
 * otherwise the result is an array container.
47
 *  We assume that dst is not pre-allocated. In
48
 * case of failure, *dst will be NULL.
49
 */
50
bool bitset_container_negation(
51
    const bitset_container_t *src, container_t **dst
52
0
){
53
0
    return bitset_container_negation_range(src, 0, (1 << 16), dst);
54
0
}
55
56
/* inplace version */
57
/*
58
 * Same as bitset_container_negation except that if the output is to
59
 * be a
60
 * bitset_container_t, then src is modified and no allocation is made.
61
 * If the output is to be an array_container_t, then caller is responsible
62
 * to free the container.
63
 * In all cases, the result is in *dst.
64
 */
65
bool bitset_container_negation_inplace(
66
    bitset_container_t *src, container_t **dst
67
0
){
68
0
    return bitset_container_negation_range_inplace(src, 0, (1 << 16), dst);
69
0
}
70
71
/* Negation across the entire range of container
72
 * Compute the  negation of src  and write the result
73
 * to *dst.  Return values are the *_TYPECODES as defined * in containers.h
74
 *  We assume that dst is not pre-allocated. In
75
 * case of failure, *dst will be NULL.
76
 */
77
0
int run_container_negation(const run_container_t *src, container_t **dst) {
78
0
    return run_container_negation_range(src, 0, (1 << 16), dst);
79
0
}
80
81
/*
82
 * Same as run_container_negation except that if the output is to
83
 * be a
84
 * run_container_t, and has the capacity to hold the result,
85
 * then src is modified and no allocation is made.
86
 * In all cases, the result is in *dst.
87
 */
88
0
int run_container_negation_inplace(run_container_t *src, container_t **dst) {
89
0
    return run_container_negation_range_inplace(src, 0, (1 << 16), dst);
90
0
}
91
92
/* Negation across a range of the container.
93
 * Compute the  negation of src  and write the result
94
 * to *dst. Returns true if the result is a bitset container
95
 * and false for an array container.  *dst is not preallocated.
96
 */
97
bool array_container_negation_range(
98
    const array_container_t *src,
99
    const int range_start, const int range_end,
100
    container_t **dst
101
0
){
102
    /* close port of the Java implementation */
103
0
    if (range_start >= range_end) {
104
0
        *dst = array_container_clone(src);
105
0
        return false;
106
0
    }
107
108
0
    int32_t start_index =
109
0
        binarySearch(src->array, src->cardinality, (uint16_t)range_start);
110
0
    if (start_index < 0) start_index = -start_index - 1;
111
112
0
    int32_t last_index =
113
0
        binarySearch(src->array, src->cardinality, (uint16_t)(range_end - 1));
114
0
    if (last_index < 0) last_index = -last_index - 2;
115
116
0
    const int32_t current_values_in_range = last_index - start_index + 1;
117
0
    const int32_t span_to_be_flipped = range_end - range_start;
118
0
    const int32_t new_values_in_range =
119
0
        span_to_be_flipped - current_values_in_range;
120
0
    const int32_t cardinality_change =
121
0
        new_values_in_range - current_values_in_range;
122
0
    const int32_t new_cardinality = src->cardinality + cardinality_change;
123
124
0
    if (new_cardinality > DEFAULT_MAX_SIZE) {
125
0
        bitset_container_t *temp = bitset_container_from_array(src);
126
0
        bitset_flip_range(temp->words, (uint32_t)range_start,
127
0
                          (uint32_t)range_end);
128
0
        temp->cardinality = new_cardinality;
129
0
        *dst = temp;
130
0
        return true;
131
0
    }
132
133
0
    array_container_t *arr =
134
0
        array_container_create_given_capacity(new_cardinality);
135
0
    *dst = (container_t *)arr;
136
0
    if(new_cardinality == 0) {
137
0
      arr->cardinality = new_cardinality;
138
0
      return false; // we are done.
139
0
    }
140
    // copy stuff before the active area
141
0
    memcpy(arr->array, src->array, start_index * sizeof(uint16_t));
142
143
    // work on the range
144
0
    int32_t out_pos = start_index, in_pos = start_index;
145
0
    int32_t val_in_range = range_start;
146
0
    for (; val_in_range < range_end && in_pos <= last_index; ++val_in_range) {
147
0
        if ((uint16_t)val_in_range != src->array[in_pos]) {
148
0
            arr->array[out_pos++] = (uint16_t)val_in_range;
149
0
        } else {
150
0
            ++in_pos;
151
0
        }
152
0
    }
153
0
    for (; val_in_range < range_end; ++val_in_range)
154
0
        arr->array[out_pos++] = (uint16_t)val_in_range;
155
156
    // content after the active range
157
0
    memcpy(arr->array + out_pos, src->array + (last_index + 1),
158
0
           (src->cardinality - (last_index + 1)) * sizeof(uint16_t));
159
0
    arr->cardinality = new_cardinality;
160
0
    return false;
161
0
}
162
163
/* Even when the result would fit, it is unclear how to make an
164
 * inplace version without inefficient copying.
165
 */
166
167
bool array_container_negation_range_inplace(
168
    array_container_t *src,
169
    const int range_start, const int range_end,
170
    container_t **dst
171
0
){
172
0
    bool ans = array_container_negation_range(src, range_start, range_end, dst);
173
    // TODO : try a real inplace version
174
0
    array_container_free(src);
175
0
    return ans;
176
0
}
177
178
/* Negation across a range of the container
179
 * Compute the  negation of src  and write the result
180
 * to *dst.  A true return value indicates a bitset result,
181
 * otherwise the result is an array container.
182
 *  We assume that dst is not pre-allocated. In
183
 * case of failure, *dst will be NULL.
184
 */
185
bool bitset_container_negation_range(
186
    const bitset_container_t *src,
187
    const int range_start, const int range_end,
188
    container_t **dst
189
0
){
190
    // TODO maybe consider density-based estimate
191
    // and sometimes build result directly as array, with
192
    // conversion back to bitset if wrong.  Or determine
193
    // actual result cardinality, then go directly for the known final cont.
194
195
    // keep computation using bitsets as long as possible.
196
0
    bitset_container_t *t = bitset_container_clone(src);
197
0
    bitset_flip_range(t->words, (uint32_t)range_start, (uint32_t)range_end);
198
0
    t->cardinality = bitset_container_compute_cardinality(t);
199
200
0
    if (t->cardinality > DEFAULT_MAX_SIZE) {
201
0
        *dst = t;
202
0
        return true;
203
0
    } else {
204
0
        *dst = array_container_from_bitset(t);
205
0
        bitset_container_free(t);
206
0
        return false;
207
0
    }
208
0
}
209
210
/* inplace version */
211
/*
212
 * Same as bitset_container_negation except that if the output is to
213
 * be a
214
 * bitset_container_t, then src is modified and no allocation is made.
215
 * If the output is to be an array_container_t, then caller is responsible
216
 * to free the container.
217
 * In all cases, the result is in *dst.
218
 */
219
bool bitset_container_negation_range_inplace(
220
    bitset_container_t *src,
221
    const int range_start, const int range_end,
222
    container_t **dst
223
0
){
224
0
    bitset_flip_range(src->words, (uint32_t)range_start, (uint32_t)range_end);
225
0
    src->cardinality = bitset_container_compute_cardinality(src);
226
0
    if (src->cardinality > DEFAULT_MAX_SIZE) {
227
0
        *dst = src;
228
0
        return true;
229
0
    }
230
0
    *dst = array_container_from_bitset(src);
231
0
    bitset_container_free(src);
232
0
    return false;
233
0
}
234
235
/* Negation across a range of container
236
 * Compute the  negation of src  and write the result
237
 * to *dst. Return values are the *_TYPECODES as defined * in containers.h
238
 *  We assume that dst is not pre-allocated. In
239
 * case of failure, *dst will be NULL.
240
 */
241
int run_container_negation_range(
242
    const run_container_t *src,
243
    const int range_start, const int range_end,
244
    container_t **dst
245
0
){
246
0
    uint8_t return_typecode;
247
248
    // follows the Java implementation
249
0
    if (range_end <= range_start) {
250
0
        *dst = run_container_clone(src);
251
0
        return RUN_CONTAINER_TYPE;
252
0
    }
253
254
0
    run_container_t *ans = run_container_create_given_capacity(
255
0
        src->n_runs + 1);  // src->n_runs + 1);
256
0
    int k = 0;
257
0
    for (; k < src->n_runs && src->runs[k].value < range_start; ++k) {
258
0
        ans->runs[k] = src->runs[k];
259
0
        ans->n_runs++;
260
0
    }
261
262
0
    run_container_smart_append_exclusive(
263
0
        ans, (uint16_t)range_start, (uint16_t)(range_end - range_start - 1));
264
265
0
    for (; k < src->n_runs; ++k) {
266
0
        run_container_smart_append_exclusive(ans, src->runs[k].value,
267
0
                                             src->runs[k].length);
268
0
    }
269
270
0
    *dst = convert_run_to_efficient_container(ans, &return_typecode);
271
0
    if (return_typecode != RUN_CONTAINER_TYPE) run_container_free(ans);
272
273
0
    return return_typecode;
274
0
}
275
276
/*
277
 * Same as run_container_negation except that if the output is to
278
 * be a
279
 * run_container_t, and has the capacity to hold the result,
280
 * then src is modified and no allocation is made.
281
 * In all cases, the result is in *dst.
282
 */
283
int run_container_negation_range_inplace(
284
    run_container_t *src,
285
    const int range_start, const int range_end,
286
    container_t **dst
287
0
){
288
0
    uint8_t return_typecode;
289
290
0
    if (range_end <= range_start) {
291
0
        *dst = src;
292
0
        return RUN_CONTAINER_TYPE;
293
0
    }
294
295
    // TODO: efficient special case when range is 0 to 65535 inclusive
296
297
0
    if (src->capacity == src->n_runs) {
298
        // no excess room.  More checking to see if result can fit
299
0
        bool last_val_before_range = false;
300
0
        bool first_val_in_range = false;
301
0
        bool last_val_in_range = false;
302
0
        bool first_val_past_range = false;
303
304
0
        if (range_start > 0)
305
0
            last_val_before_range =
306
0
                run_container_contains(src, (uint16_t)(range_start - 1));
307
0
        first_val_in_range = run_container_contains(src, (uint16_t)range_start);
308
309
0
        if (last_val_before_range == first_val_in_range) {
310
0
            last_val_in_range =
311
0
                run_container_contains(src, (uint16_t)(range_end - 1));
312
0
            if (range_end != 0x10000)
313
0
                first_val_past_range =
314
0
                    run_container_contains(src, (uint16_t)range_end);
315
316
0
            if (last_val_in_range ==
317
0
                first_val_past_range) {  // no space for inplace
318
0
                int ans = run_container_negation_range(src, range_start,
319
0
                                                       range_end, dst);
320
0
                run_container_free(src);
321
0
                return ans;
322
0
            }
323
0
        }
324
0
    }
325
    // all other cases: result will fit
326
327
0
    run_container_t *ans = src;
328
0
    int my_nbr_runs = src->n_runs;
329
330
0
    ans->n_runs = 0;
331
0
    int k = 0;
332
0
    for (; (k < my_nbr_runs) && (src->runs[k].value < range_start); ++k) {
333
        // ans->runs[k] = src->runs[k]; (would be self-copy)
334
0
        ans->n_runs++;
335
0
    }
336
337
    // as with Java implementation, use locals to give self a buffer of depth 1
338
0
    rle16_t buffered = MAKE_RLE16(0, 0);
339
0
    rle16_t next = buffered;
340
0
    if (k < my_nbr_runs) buffered = src->runs[k];
341
342
0
    run_container_smart_append_exclusive(
343
0
        ans, (uint16_t)range_start, (uint16_t)(range_end - range_start - 1));
344
345
0
    for (; k < my_nbr_runs; ++k) {
346
0
        if (k + 1 < my_nbr_runs) next = src->runs[k + 1];
347
348
0
        run_container_smart_append_exclusive(ans, buffered.value,
349
0
                                             buffered.length);
350
0
        buffered = next;
351
0
    }
352
353
0
    *dst = convert_run_to_efficient_container(ans, &return_typecode);
354
0
    if (return_typecode != RUN_CONTAINER_TYPE) run_container_free(ans);
355
356
0
    return return_typecode;
357
0
}
358
359
#ifdef __cplusplus
360
} } }  // extern "C" { namespace roaring { namespace internal {
361
#endif