/src/croaring/include/roaring/array_util.h
Line | Count | Source (jump to first uncovered line) |
1 | | #ifndef CROARING_ARRAY_UTIL_H |
2 | | #define CROARING_ARRAY_UTIL_H |
3 | | |
4 | | #include <stddef.h> // for size_t |
5 | | #include <stdint.h> |
6 | | |
7 | | #include <roaring/portability.h> |
8 | | |
9 | | #if CROARING_IS_X64 |
10 | | #ifndef CROARING_COMPILER_SUPPORTS_AVX512 |
11 | | #error "CROARING_COMPILER_SUPPORTS_AVX512 needs to be defined." |
12 | | #endif // CROARING_COMPILER_SUPPORTS_AVX512 |
13 | | #endif |
14 | | #if defined(__GNUC__) && !defined(__clang__) |
15 | | #pragma GCC diagnostic push |
16 | | #pragma GCC diagnostic ignored "-Wuninitialized" |
17 | | #pragma GCC diagnostic ignored "-Wmaybe-uninitialized" |
18 | | #endif |
19 | | #ifdef __cplusplus |
20 | | extern "C" { |
21 | | namespace roaring { |
22 | | namespace internal { |
23 | | #endif |
24 | | |
25 | | /* |
26 | | * Good old binary search. |
27 | | * Assumes that array is sorted, has logarithmic complexity. |
28 | | * if the result is x, then: |
29 | | * if ( x>0 ) you have array[x] = ikey |
30 | | * if ( x<0 ) then inserting ikey at position -x-1 in array (insuring that |
31 | | * array[-x-1]=ikey) keys the array sorted. |
32 | | */ |
33 | | inline int32_t binarySearch(const uint16_t *array, int32_t lenarray, |
34 | 6.19M | uint16_t ikey) { |
35 | 6.19M | int32_t low = 0; |
36 | 6.19M | int32_t high = lenarray - 1; |
37 | 29.0M | while (low <= high) { |
38 | 27.5M | int32_t middleIndex = (low + high) >> 1; |
39 | 27.5M | uint16_t middleValue = array[middleIndex]; |
40 | 27.5M | if (middleValue < ikey) { |
41 | 8.19M | low = middleIndex + 1; |
42 | 19.3M | } else if (middleValue > ikey) { |
43 | 14.6M | high = middleIndex - 1; |
44 | 14.6M | } else { |
45 | 4.69M | return middleIndex; |
46 | 4.69M | } |
47 | 27.5M | } |
48 | 1.49M | return -(low + 1); |
49 | 6.19M | } |
50 | | |
51 | | /** |
52 | | * Galloping search |
53 | | * Assumes that array is sorted, has logarithmic complexity. |
54 | | * if the result is x, then if x = length, you have that all values in array |
55 | | * between pos and length are smaller than min. otherwise returns the first |
56 | | * index x such that array[x] >= min. |
57 | | */ |
58 | | static inline int32_t advanceUntil(const uint16_t *array, int32_t pos, |
59 | 233k | int32_t length, uint16_t min) { |
60 | 233k | int32_t lower = pos + 1; |
61 | | |
62 | 233k | if ((lower >= length) || (array[lower] >= min)) { |
63 | 154k | return lower; |
64 | 154k | } |
65 | | |
66 | 78.6k | int32_t spansize = 1; |
67 | | |
68 | 207k | while ((lower + spansize < length) && (array[lower + spansize] < min)) { |
69 | 128k | spansize <<= 1; |
70 | 128k | } |
71 | 78.6k | int32_t upper = (lower + spansize < length) ? lower + spansize : length - 1; |
72 | | |
73 | 78.6k | if (array[upper] == min) { |
74 | 36.3k | return upper; |
75 | 36.3k | } |
76 | 42.3k | if (array[upper] < min) { |
77 | | // means |
78 | | // array |
79 | | // has no |
80 | | // item |
81 | | // >= min |
82 | | // pos = array.length; |
83 | 1.01k | return length; |
84 | 1.01k | } |
85 | | |
86 | | // we know that the next-smallest span was too small |
87 | 41.3k | lower += (spansize >> 1); |
88 | | |
89 | 41.3k | int32_t mid = 0; |
90 | 84.5k | while (lower + 1 != upper) { |
91 | 62.6k | mid = (lower + upper) >> 1; |
92 | 62.6k | if (array[mid] == min) { |
93 | 19.4k | return mid; |
94 | 43.2k | } else if (array[mid] < min) { |
95 | 18.3k | lower = mid; |
96 | 24.8k | } else { |
97 | 24.8k | upper = mid; |
98 | 24.8k | } |
99 | 62.6k | } |
100 | 21.8k | return upper; |
101 | 41.3k | } Unexecuted instantiation: croaring_fuzzer_cc.cc:roaring::internal::advanceUntil(unsigned short const*, int, int, unsigned short) Line | Count | Source | 59 | 637 | int32_t length, uint16_t min) { | 60 | 637 | int32_t lower = pos + 1; | 61 | | | 62 | 637 | if ((lower >= length) || (array[lower] >= min)) { | 63 | 637 | return lower; | 64 | 637 | } | 65 | | | 66 | 0 | int32_t spansize = 1; | 67 | |
| 68 | 0 | while ((lower + spansize < length) && (array[lower + spansize] < min)) { | 69 | 0 | spansize <<= 1; | 70 | 0 | } | 71 | 0 | int32_t upper = (lower + spansize < length) ? lower + spansize : length - 1; | 72 | |
| 73 | 0 | if (array[upper] == min) { | 74 | 0 | return upper; | 75 | 0 | } | 76 | 0 | if (array[upper] < min) { | 77 | | // means | 78 | | // array | 79 | | // has no | 80 | | // item | 81 | | // >= min | 82 | | // pos = array.length; | 83 | 0 | return length; | 84 | 0 | } | 85 | | | 86 | | // we know that the next-smallest span was too small | 87 | 0 | lower += (spansize >> 1); | 88 | |
| 89 | 0 | int32_t mid = 0; | 90 | 0 | while (lower + 1 != upper) { | 91 | 0 | mid = (lower + upper) >> 1; | 92 | 0 | if (array[mid] == min) { | 93 | 0 | return mid; | 94 | 0 | } else if (array[mid] < min) { | 95 | 0 | lower = mid; | 96 | 0 | } else { | 97 | 0 | upper = mid; | 98 | 0 | } | 99 | 0 | } | 100 | 0 | return upper; | 101 | 0 | } |
Unexecuted instantiation: roaring_array.c:advanceUntil array_util.c:advanceUntil Line | Count | Source | 59 | 17.8k | int32_t length, uint16_t min) { | 60 | 17.8k | int32_t lower = pos + 1; | 61 | | | 62 | 17.8k | if ((lower >= length) || (array[lower] >= min)) { | 63 | 5.85k | return lower; | 64 | 5.85k | } | 65 | | | 66 | 11.9k | int32_t spansize = 1; | 67 | | | 68 | 51.4k | while ((lower + spansize < length) && (array[lower + spansize] < min)) { | 69 | 39.4k | spansize <<= 1; | 70 | 39.4k | } | 71 | 11.9k | int32_t upper = (lower + spansize < length) ? lower + spansize : length - 1; | 72 | | | 73 | 11.9k | if (array[upper] == min) { | 74 | 3.01k | return upper; | 75 | 3.01k | } | 76 | 8.97k | if (array[upper] < min) { | 77 | | // means | 78 | | // array | 79 | | // has no | 80 | | // item | 81 | | // >= min | 82 | | // pos = array.length; | 83 | 180 | return length; | 84 | 180 | } | 85 | | | 86 | | // we know that the next-smallest span was too small | 87 | 8.79k | lower += (spansize >> 1); | 88 | | | 89 | 8.79k | int32_t mid = 0; | 90 | 27.6k | while (lower + 1 != upper) { | 91 | 23.5k | mid = (lower + upper) >> 1; | 92 | 23.5k | if (array[mid] == min) { | 93 | 4.74k | return mid; | 94 | 18.8k | } else if (array[mid] < min) { | 95 | 9.33k | lower = mid; | 96 | 9.48k | } else { | 97 | 9.48k | upper = mid; | 98 | 9.48k | } | 99 | 23.5k | } | 100 | 4.05k | return upper; | 101 | 8.79k | } |
Unexecuted instantiation: array.c:advanceUntil Unexecuted instantiation: bitset.c:advanceUntil Unexecuted instantiation: containers.c:advanceUntil Unexecuted instantiation: convert.c:advanceUntil mixed_intersection.c:advanceUntil Line | Count | Source | 59 | 125k | int32_t length, uint16_t min) { | 60 | 125k | int32_t lower = pos + 1; | 61 | | | 62 | 125k | if ((lower >= length) || (array[lower] >= min)) { | 63 | 71.9k | return lower; | 64 | 71.9k | } | 65 | | | 66 | 53.7k | int32_t spansize = 1; | 67 | | | 68 | 128k | while ((lower + spansize < length) && (array[lower + spansize] < min)) { | 69 | 75.0k | spansize <<= 1; | 70 | 75.0k | } | 71 | 53.7k | int32_t upper = (lower + spansize < length) ? lower + spansize : length - 1; | 72 | | | 73 | 53.7k | if (array[upper] == min) { | 74 | 26.8k | return upper; | 75 | 26.8k | } | 76 | 26.8k | if (array[upper] < min) { | 77 | | // means | 78 | | // array | 79 | | // has no | 80 | | // item | 81 | | // >= min | 82 | | // pos = array.length; | 83 | 400 | return length; | 84 | 400 | } | 85 | | | 86 | | // we know that the next-smallest span was too small | 87 | 26.4k | lower += (spansize >> 1); | 88 | | | 89 | 26.4k | int32_t mid = 0; | 90 | 47.2k | while (lower + 1 != upper) { | 91 | 33.2k | mid = (lower + upper) >> 1; | 92 | 33.2k | if (array[mid] == min) { | 93 | 12.4k | return mid; | 94 | 20.8k | } else if (array[mid] < min) { | 95 | 7.71k | lower = mid; | 96 | 13.1k | } else { | 97 | 13.1k | upper = mid; | 98 | 13.1k | } | 99 | 33.2k | } | 100 | 13.9k | return upper; | 101 | 26.4k | } |
Unexecuted instantiation: mixed_union.c:advanceUntil Unexecuted instantiation: mixed_equal.c:advanceUntil mixed_subset.c:advanceUntil Line | Count | Source | 59 | 324 | int32_t length, uint16_t min) { | 60 | 324 | int32_t lower = pos + 1; | 61 | | | 62 | 324 | if ((lower >= length) || (array[lower] >= min)) { | 63 | 172 | return lower; | 64 | 172 | } | 65 | | | 66 | 152 | int32_t spansize = 1; | 67 | | | 68 | 636 | while ((lower + spansize < length) && (array[lower + spansize] < min)) { | 69 | 484 | spansize <<= 1; | 70 | 484 | } | 71 | 152 | int32_t upper = (lower + spansize < length) ? lower + spansize : length - 1; | 72 | | | 73 | 152 | if (array[upper] == min) { | 74 | 0 | return upper; | 75 | 0 | } | 76 | 152 | if (array[upper] < min) { | 77 | | // means | 78 | | // array | 79 | | // has no | 80 | | // item | 81 | | // >= min | 82 | | // pos = array.length; | 83 | 10 | return length; | 84 | 10 | } | 85 | | | 86 | | // we know that the next-smallest span was too small | 87 | 142 | lower += (spansize >> 1); | 88 | | | 89 | 142 | int32_t mid = 0; | 90 | 452 | while (lower + 1 != upper) { | 91 | 310 | mid = (lower + upper) >> 1; | 92 | 310 | if (array[mid] == min) { | 93 | 0 | return mid; | 94 | 310 | } else if (array[mid] < min) { | 95 | 150 | lower = mid; | 96 | 160 | } else { | 97 | 160 | upper = mid; | 98 | 160 | } | 99 | 310 | } | 100 | 142 | return upper; | 101 | 142 | } |
Unexecuted instantiation: mixed_negation.c:advanceUntil Unexecuted instantiation: mixed_xor.c:advanceUntil mixed_andnot.c:advanceUntil Line | Count | Source | 59 | 88.7k | int32_t length, uint16_t min) { | 60 | 88.7k | int32_t lower = pos + 1; | 61 | | | 62 | 88.7k | if ((lower >= length) || (array[lower] >= min)) { | 63 | 75.9k | return lower; | 64 | 75.9k | } | 65 | | | 66 | 12.8k | int32_t spansize = 1; | 67 | | | 68 | 26.2k | while ((lower + spansize < length) && (array[lower + spansize] < min)) { | 69 | 13.4k | spansize <<= 1; | 70 | 13.4k | } | 71 | 12.8k | int32_t upper = (lower + spansize < length) ? lower + spansize : length - 1; | 72 | | | 73 | 12.8k | if (array[upper] == min) { | 74 | 6.45k | return upper; | 75 | 6.45k | } | 76 | 6.34k | if (array[upper] < min) { | 77 | | // means | 78 | | // array | 79 | | // has no | 80 | | // item | 81 | | // >= min | 82 | | // pos = array.length; | 83 | 425 | return length; | 84 | 425 | } | 85 | | | 86 | | // we know that the next-smallest span was too small | 87 | 5.92k | lower += (spansize >> 1); | 88 | | | 89 | 5.92k | int32_t mid = 0; | 90 | 9.18k | while (lower + 1 != upper) { | 91 | 5.52k | mid = (lower + upper) >> 1; | 92 | 5.52k | if (array[mid] == min) { | 93 | 2.25k | return mid; | 94 | 3.26k | } else if (array[mid] < min) { | 95 | 1.14k | lower = mid; | 96 | 2.12k | } else { | 97 | 2.12k | upper = mid; | 98 | 2.12k | } | 99 | 5.52k | } | 100 | 3.66k | return upper; | 101 | 5.92k | } |
Unexecuted instantiation: run.c:advanceUntil |
102 | | |
103 | | /** |
104 | | * Returns number of elements which are less than ikey. |
105 | | * Array elements must be unique and sorted. |
106 | | */ |
107 | | static inline int32_t count_less(const uint16_t *array, int32_t lenarray, |
108 | 13.5k | uint16_t ikey) { |
109 | 13.5k | if (lenarray == 0) return 0; |
110 | 11.0k | int32_t pos = binarySearch(array, lenarray, ikey); |
111 | 11.0k | return pos >= 0 ? pos : -(pos + 1); |
112 | 13.5k | } Unexecuted instantiation: croaring_fuzzer_cc.cc:roaring::internal::count_less(unsigned short const*, int, unsigned short) Line | Count | Source | 108 | 13.5k | uint16_t ikey) { | 109 | 13.5k | if (lenarray == 0) return 0; | 110 | 11.0k | int32_t pos = binarySearch(array, lenarray, ikey); | 111 | 11.0k | return pos >= 0 ? pos : -(pos + 1); | 112 | 13.5k | } |
Unexecuted instantiation: roaring_array.c:count_less Unexecuted instantiation: array_util.c:count_less Unexecuted instantiation: array.c:count_less Unexecuted instantiation: bitset.c:count_less Unexecuted instantiation: containers.c:count_less Unexecuted instantiation: convert.c:count_less Unexecuted instantiation: mixed_intersection.c:count_less Unexecuted instantiation: mixed_union.c:count_less Unexecuted instantiation: mixed_equal.c:count_less Unexecuted instantiation: mixed_subset.c:count_less Unexecuted instantiation: mixed_negation.c:count_less Unexecuted instantiation: mixed_xor.c:count_less Unexecuted instantiation: mixed_andnot.c:count_less Unexecuted instantiation: run.c:count_less |
113 | | |
114 | | /** |
115 | | * Returns number of elements which are greater than ikey. |
116 | | * Array elements must be unique and sorted. |
117 | | */ |
118 | | static inline int32_t count_greater(const uint16_t *array, int32_t lenarray, |
119 | 7.88k | uint16_t ikey) { |
120 | 7.88k | if (lenarray == 0) return 0; |
121 | 7.88k | int32_t pos = binarySearch(array, lenarray, ikey); |
122 | 7.88k | if (pos >= 0) { |
123 | 1.72k | return lenarray - (pos + 1); |
124 | 6.16k | } else { |
125 | 6.16k | return lenarray - (-pos - 1); |
126 | 6.16k | } |
127 | 7.88k | } Unexecuted instantiation: croaring_fuzzer_cc.cc:roaring::internal::count_greater(unsigned short const*, int, unsigned short) Line | Count | Source | 119 | 7.88k | uint16_t ikey) { | 120 | 7.88k | if (lenarray == 0) return 0; | 121 | 7.88k | int32_t pos = binarySearch(array, lenarray, ikey); | 122 | 7.88k | if (pos >= 0) { | 123 | 1.72k | return lenarray - (pos + 1); | 124 | 6.16k | } else { | 125 | 6.16k | return lenarray - (-pos - 1); | 126 | 6.16k | } | 127 | 7.88k | } |
Unexecuted instantiation: roaring_array.c:count_greater Unexecuted instantiation: array_util.c:count_greater Unexecuted instantiation: array.c:count_greater Unexecuted instantiation: bitset.c:count_greater Unexecuted instantiation: containers.c:count_greater Unexecuted instantiation: convert.c:count_greater Unexecuted instantiation: mixed_intersection.c:count_greater Unexecuted instantiation: mixed_union.c:count_greater Unexecuted instantiation: mixed_equal.c:count_greater Unexecuted instantiation: mixed_subset.c:count_greater Unexecuted instantiation: mixed_negation.c:count_greater Unexecuted instantiation: mixed_xor.c:count_greater Unexecuted instantiation: mixed_andnot.c:count_greater Unexecuted instantiation: run.c:count_greater |
128 | | |
129 | | /** |
130 | | * From Schlegel et al., Fast Sorted-Set Intersection using SIMD Instructions |
131 | | * Optimized by D. Lemire on May 3rd 2013 |
132 | | * |
133 | | * C should have capacity greater than the minimum of s_1 and s_b + 8 |
134 | | * where 8 is sizeof(__m128i)/sizeof(uint16_t). |
135 | | */ |
136 | | int32_t intersect_vector16(const uint16_t *__restrict__ A, size_t s_a, |
137 | | const uint16_t *__restrict__ B, size_t s_b, |
138 | | uint16_t *C); |
139 | | |
140 | | int32_t intersect_vector16_inplace(uint16_t *__restrict__ A, size_t s_a, |
141 | | const uint16_t *__restrict__ B, size_t s_b); |
142 | | |
143 | | /** |
144 | | * Take an array container and write it out to a 32-bit array, using base |
145 | | * as the offset. |
146 | | */ |
147 | | int array_container_to_uint32_array_vector16(void *vout, const uint16_t *array, |
148 | | size_t cardinality, uint32_t base); |
149 | | #if CROARING_COMPILER_SUPPORTS_AVX512 |
150 | | int avx512_array_container_to_uint32_array(void *vout, const uint16_t *array, |
151 | | size_t cardinality, uint32_t base); |
152 | | #endif |
153 | | /** |
154 | | * Compute the cardinality of the intersection using SSE4 instructions |
155 | | */ |
156 | | int32_t intersect_vector16_cardinality(const uint16_t *__restrict__ A, |
157 | | size_t s_a, |
158 | | const uint16_t *__restrict__ B, |
159 | | size_t s_b); |
160 | | |
161 | | /* Computes the intersection between one small and one large set of uint16_t. |
162 | | * Stores the result into buffer and return the number of elements. */ |
163 | | int32_t intersect_skewed_uint16(const uint16_t *smallarray, size_t size_s, |
164 | | const uint16_t *largearray, size_t size_l, |
165 | | uint16_t *buffer); |
166 | | |
167 | | /* Computes the size of the intersection between one small and one large set of |
168 | | * uint16_t. */ |
169 | | int32_t intersect_skewed_uint16_cardinality(const uint16_t *smallarray, |
170 | | size_t size_s, |
171 | | const uint16_t *largearray, |
172 | | size_t size_l); |
173 | | |
174 | | /* Check whether the size of the intersection between one small and one large |
175 | | * set of uint16_t is non-zero. */ |
176 | | bool intersect_skewed_uint16_nonempty(const uint16_t *smallarray, size_t size_s, |
177 | | const uint16_t *largearray, |
178 | | size_t size_l); |
179 | | /** |
180 | | * Generic intersection function. |
181 | | */ |
182 | | int32_t intersect_uint16(const uint16_t *A, const size_t lenA, |
183 | | const uint16_t *B, const size_t lenB, uint16_t *out); |
184 | | /** |
185 | | * Compute the size of the intersection (generic). |
186 | | */ |
187 | | int32_t intersect_uint16_cardinality(const uint16_t *A, const size_t lenA, |
188 | | const uint16_t *B, const size_t lenB); |
189 | | |
190 | | /** |
191 | | * Checking whether the size of the intersection is non-zero. |
192 | | */ |
193 | | bool intersect_uint16_nonempty(const uint16_t *A, const size_t lenA, |
194 | | const uint16_t *B, const size_t lenB); |
195 | | /** |
196 | | * Generic union function. |
197 | | */ |
198 | | size_t union_uint16(const uint16_t *set_1, size_t size_1, const uint16_t *set_2, |
199 | | size_t size_2, uint16_t *buffer); |
200 | | |
201 | | /** |
202 | | * Generic XOR function. |
203 | | */ |
204 | | int32_t xor_uint16(const uint16_t *array_1, int32_t card_1, |
205 | | const uint16_t *array_2, int32_t card_2, uint16_t *out); |
206 | | |
207 | | /** |
208 | | * Generic difference function (ANDNOT). |
209 | | */ |
210 | | int difference_uint16(const uint16_t *a1, int length1, const uint16_t *a2, |
211 | | int length2, uint16_t *a_out); |
212 | | |
213 | | /** |
214 | | * Generic intersection function. |
215 | | */ |
216 | | size_t intersection_uint32(const uint32_t *A, const size_t lenA, |
217 | | const uint32_t *B, const size_t lenB, uint32_t *out); |
218 | | |
219 | | /** |
220 | | * Generic intersection function, returns just the cardinality. |
221 | | */ |
222 | | size_t intersection_uint32_card(const uint32_t *A, const size_t lenA, |
223 | | const uint32_t *B, const size_t lenB); |
224 | | |
225 | | /** |
226 | | * Generic union function. |
227 | | */ |
228 | | size_t union_uint32(const uint32_t *set_1, size_t size_1, const uint32_t *set_2, |
229 | | size_t size_2, uint32_t *buffer); |
230 | | |
231 | | /** |
232 | | * A fast SSE-based union function. |
233 | | */ |
234 | | uint32_t union_vector16(const uint16_t *__restrict__ set_1, uint32_t size_1, |
235 | | const uint16_t *__restrict__ set_2, uint32_t size_2, |
236 | | uint16_t *__restrict__ buffer); |
237 | | /** |
238 | | * A fast SSE-based XOR function. |
239 | | */ |
240 | | uint32_t xor_vector16(const uint16_t *__restrict__ array1, uint32_t length1, |
241 | | const uint16_t *__restrict__ array2, uint32_t length2, |
242 | | uint16_t *__restrict__ output); |
243 | | |
244 | | /** |
245 | | * A fast SSE-based difference function. |
246 | | */ |
247 | | int32_t difference_vector16(const uint16_t *__restrict__ A, size_t s_a, |
248 | | const uint16_t *__restrict__ B, size_t s_b, |
249 | | uint16_t *C); |
250 | | |
251 | | /** |
252 | | * Generic union function, returns just the cardinality. |
253 | | */ |
254 | | size_t union_uint32_card(const uint32_t *set_1, size_t size_1, |
255 | | const uint32_t *set_2, size_t size_2); |
256 | | |
257 | | /** |
258 | | * combines union_uint16 and union_vector16 optimally |
259 | | */ |
260 | | size_t fast_union_uint16(const uint16_t *set_1, size_t size_1, |
261 | | const uint16_t *set_2, size_t size_2, |
262 | | uint16_t *buffer); |
263 | | |
264 | | bool memequals(const void *s1, const void *s2, size_t n); |
265 | | |
266 | | #ifdef __cplusplus |
267 | | } |
268 | | } |
269 | | } // extern "C" { namespace roaring { namespace internal { |
270 | | #endif |
271 | | #if defined(__GNUC__) && !defined(__clang__) |
272 | | #pragma GCC diagnostic pop |
273 | | #endif |
274 | | #endif |