/src/croaring/include/roaring/bitset_util.h
Line | Count | Source |
1 | | #ifndef CROARING_BITSET_UTIL_H |
2 | | #define CROARING_BITSET_UTIL_H |
3 | | |
4 | | #include <stdint.h> |
5 | | |
6 | | #include <roaring/portability.h> |
7 | | #include <roaring/utilasm.h> |
8 | | |
9 | | #if CROARING_IS_X64 |
10 | | #ifndef CROARING_COMPILER_SUPPORTS_AVX512 |
11 | | #error "CROARING_COMPILER_SUPPORTS_AVX512 needs to be defined." |
12 | | #endif // CROARING_COMPILER_SUPPORTS_AVX512 |
13 | | #endif |
14 | | #if defined(__GNUC__) && !defined(__clang__) |
15 | | #pragma GCC diagnostic push |
16 | | #pragma GCC diagnostic ignored "-Wuninitialized" |
17 | | #pragma GCC diagnostic ignored "-Wmaybe-uninitialized" |
18 | | #endif |
19 | | #ifdef __cplusplus |
20 | | extern "C" { |
21 | | namespace roaring { |
22 | | namespace internal { |
23 | | #endif |
24 | | |
25 | | /* |
26 | | * Set all bits in indexes [begin,end) to true. |
27 | | */ |
28 | | static inline void bitset_set_range(uint64_t *words, uint32_t start, |
29 | 0 | uint32_t end) { |
30 | 0 | if (start == end) return; |
31 | 0 | uint32_t firstword = start / 64; |
32 | 0 | uint32_t endword = (end - 1) / 64; |
33 | 0 | if (firstword == endword) { |
34 | 0 | words[firstword] |= ((~UINT64_C(0)) << (start % 64)) & |
35 | 0 | ((~UINT64_C(0)) >> ((~end + 1) % 64)); |
36 | 0 | return; |
37 | 0 | } |
38 | 0 | words[firstword] |= (~UINT64_C(0)) << (start % 64); |
39 | 0 | for (uint32_t i = firstword + 1; i < endword; i++) { |
40 | 0 | words[i] = ~UINT64_C(0); |
41 | 0 | } |
42 | 0 | words[endword] |= (~UINT64_C(0)) >> ((~end + 1) % 64); |
43 | 0 | } Unexecuted instantiation: croaring_fuzzer_cc.cc:roaring::internal::bitset_set_range(unsigned long*, unsigned int, unsigned int) Unexecuted instantiation: roaring.c:bitset_set_range Unexecuted instantiation: roaring_array.c:bitset_set_range Unexecuted instantiation: bitset_util.c:bitset_set_range Unexecuted instantiation: bitset.c:bitset_set_range Unexecuted instantiation: containers.c:bitset_set_range Unexecuted instantiation: convert.c:bitset_set_range Unexecuted instantiation: mixed_intersection.c:bitset_set_range Unexecuted instantiation: mixed_union.c:bitset_set_range Unexecuted instantiation: mixed_negation.c:bitset_set_range Unexecuted instantiation: mixed_xor.c:bitset_set_range Unexecuted instantiation: mixed_andnot.c:bitset_set_range Unexecuted instantiation: roaring64.c:bitset_set_range |
44 | | |
45 | | /* |
46 | | * Find the cardinality of the bitset in [begin,begin+lenminusone] |
47 | | */ |
48 | | static inline int bitset_lenrange_cardinality(const uint64_t *words, |
49 | | uint32_t start, |
50 | 191k | uint32_t lenminusone) { |
51 | 191k | uint32_t firstword = start / 64; |
52 | 191k | uint32_t endword = (start + lenminusone) / 64; |
53 | 191k | if (firstword == endword) { |
54 | 185k | return roaring_hamming(words[firstword] & |
55 | 185k | ((~UINT64_C(0)) >> ((63 - lenminusone) % 64)) |
56 | 185k | << (start % 64)); |
57 | 185k | } |
58 | 6.10k | int answer = |
59 | 6.10k | roaring_hamming(words[firstword] & ((~UINT64_C(0)) << (start % 64))); |
60 | 178k | for (uint32_t i = firstword + 1; i < endword; i++) { |
61 | 172k | answer += roaring_hamming(words[i]); |
62 | 172k | } |
63 | 6.10k | answer += roaring_hamming(words[endword] & |
64 | 6.10k | (~UINT64_C(0)) >> |
65 | 6.10k | (((~start + 1) - lenminusone - 1) % 64)); |
66 | 6.10k | return answer; |
67 | 191k | } Unexecuted instantiation: croaring_fuzzer_cc.cc:roaring::internal::bitset_lenrange_cardinality(unsigned long const*, unsigned int, unsigned int) roaring.c:bitset_lenrange_cardinality Line | Count | Source | 50 | 1.25k | uint32_t lenminusone) { | 51 | 1.25k | uint32_t firstword = start / 64; | 52 | 1.25k | uint32_t endword = (start + lenminusone) / 64; | 53 | 1.25k | if (firstword == endword) { | 54 | 876 | return roaring_hamming(words[firstword] & | 55 | 876 | ((~UINT64_C(0)) >> ((63 - lenminusone) % 64)) | 56 | 876 | << (start % 64)); | 57 | 876 | } | 58 | 381 | int answer = | 59 | 381 | roaring_hamming(words[firstword] & ((~UINT64_C(0)) << (start % 64))); | 60 | 172k | for (uint32_t i = firstword + 1; i < endword; i++) { | 61 | 172k | answer += roaring_hamming(words[i]); | 62 | 172k | } | 63 | 381 | answer += roaring_hamming(words[endword] & | 64 | | (~UINT64_C(0)) >> | 65 | 381 | (((~start + 1) - lenminusone - 1) % 64)); | 66 | 381 | return answer; | 67 | 1.25k | } |
Unexecuted instantiation: roaring_array.c:bitset_lenrange_cardinality Unexecuted instantiation: bitset_util.c:bitset_lenrange_cardinality Unexecuted instantiation: bitset.c:bitset_lenrange_cardinality Unexecuted instantiation: containers.c:bitset_lenrange_cardinality Unexecuted instantiation: convert.c:bitset_lenrange_cardinality mixed_intersection.c:bitset_lenrange_cardinality Line | Count | Source | 50 | 190k | uint32_t lenminusone) { | 51 | 190k | uint32_t firstword = start / 64; | 52 | 190k | uint32_t endword = (start + lenminusone) / 64; | 53 | 190k | if (firstword == endword) { | 54 | 184k | return roaring_hamming(words[firstword] & | 55 | 184k | ((~UINT64_C(0)) >> ((63 - lenminusone) % 64)) | 56 | 184k | << (start % 64)); | 57 | 184k | } | 58 | 5.72k | int answer = | 59 | 5.72k | roaring_hamming(words[firstword] & ((~UINT64_C(0)) << (start % 64))); | 60 | 6.01k | for (uint32_t i = firstword + 1; i < endword; i++) { | 61 | 292 | answer += roaring_hamming(words[i]); | 62 | 292 | } | 63 | 5.72k | answer += roaring_hamming(words[endword] & | 64 | | (~UINT64_C(0)) >> | 65 | 5.72k | (((~start + 1) - lenminusone - 1) % 64)); | 66 | 5.72k | return answer; | 67 | 190k | } |
Unexecuted instantiation: mixed_union.c:bitset_lenrange_cardinality Unexecuted instantiation: mixed_negation.c:bitset_lenrange_cardinality Unexecuted instantiation: mixed_xor.c:bitset_lenrange_cardinality Unexecuted instantiation: mixed_andnot.c:bitset_lenrange_cardinality Unexecuted instantiation: roaring64.c:bitset_lenrange_cardinality |
68 | | |
69 | | /* |
70 | | * Check whether the cardinality of the bitset in [begin,begin+lenminusone] is 0 |
71 | | */ |
72 | | static inline bool bitset_lenrange_empty(const uint64_t *words, uint32_t start, |
73 | 4.73k | uint32_t lenminusone) { |
74 | 4.73k | uint32_t firstword = start / 64; |
75 | 4.73k | uint32_t endword = (start + lenminusone) / 64; |
76 | 4.73k | if (firstword == endword) { |
77 | 4.41k | return (words[firstword] & ((~UINT64_C(0)) >> ((63 - lenminusone) % 64)) |
78 | 4.41k | << (start % 64)) == 0; |
79 | 4.41k | } |
80 | 325 | if (((words[firstword] & ((~UINT64_C(0)) << (start % 64)))) != 0) { |
81 | 98 | return false; |
82 | 98 | } |
83 | 238 | for (uint32_t i = firstword + 1; i < endword; i++) { |
84 | 12 | if (words[i] != 0) { |
85 | 1 | return false; |
86 | 1 | } |
87 | 12 | } |
88 | 226 | if ((words[endword] & |
89 | 226 | (~UINT64_C(0)) >> (((~start + 1) - lenminusone - 1) % 64)) != 0) { |
90 | 105 | return false; |
91 | 105 | } |
92 | 121 | return true; |
93 | 226 | } Unexecuted instantiation: croaring_fuzzer_cc.cc:roaring::internal::bitset_lenrange_empty(unsigned long const*, unsigned int, unsigned int) Unexecuted instantiation: roaring.c:bitset_lenrange_empty Unexecuted instantiation: roaring_array.c:bitset_lenrange_empty Unexecuted instantiation: bitset_util.c:bitset_lenrange_empty Unexecuted instantiation: bitset.c:bitset_lenrange_empty Unexecuted instantiation: containers.c:bitset_lenrange_empty Unexecuted instantiation: convert.c:bitset_lenrange_empty mixed_intersection.c:bitset_lenrange_empty Line | Count | Source | 73 | 4.73k | uint32_t lenminusone) { | 74 | 4.73k | uint32_t firstword = start / 64; | 75 | 4.73k | uint32_t endword = (start + lenminusone) / 64; | 76 | 4.73k | if (firstword == endword) { | 77 | 4.41k | return (words[firstword] & ((~UINT64_C(0)) >> ((63 - lenminusone) % 64)) | 78 | 4.41k | << (start % 64)) == 0; | 79 | 4.41k | } | 80 | 325 | if (((words[firstword] & ((~UINT64_C(0)) << (start % 64)))) != 0) { | 81 | 98 | return false; | 82 | 98 | } | 83 | 238 | for (uint32_t i = firstword + 1; i < endword; i++) { | 84 | 12 | if (words[i] != 0) { | 85 | 1 | return false; | 86 | 1 | } | 87 | 12 | } | 88 | 226 | if ((words[endword] & | 89 | 226 | (~UINT64_C(0)) >> (((~start + 1) - lenminusone - 1) % 64)) != 0) { | 90 | 105 | return false; | 91 | 105 | } | 92 | 121 | return true; | 93 | 226 | } |
Unexecuted instantiation: mixed_union.c:bitset_lenrange_empty Unexecuted instantiation: mixed_negation.c:bitset_lenrange_empty Unexecuted instantiation: mixed_xor.c:bitset_lenrange_empty Unexecuted instantiation: mixed_andnot.c:bitset_lenrange_empty Unexecuted instantiation: roaring64.c:bitset_lenrange_empty |
94 | | |
95 | | /* |
96 | | * Set all bits in indexes [begin,begin+lenminusone] to true. |
97 | | */ |
98 | | static inline void bitset_set_lenrange(uint64_t *words, uint32_t start, |
99 | 204k | uint32_t lenminusone) { |
100 | 204k | uint32_t firstword = start / 64; |
101 | 204k | uint32_t endword = (start + lenminusone) / 64; |
102 | 204k | if (firstword == endword) { |
103 | 128k | words[firstword] |= ((~UINT64_C(0)) >> ((63 - lenminusone) % 64)) |
104 | 128k | << (start % 64); |
105 | 128k | return; |
106 | 128k | } |
107 | 76.0k | uint64_t temp = words[endword]; |
108 | 76.0k | words[firstword] |= (~UINT64_C(0)) << (start % 64); |
109 | 35.9M | for (uint32_t i = firstword + 1; i < endword; i += 2) |
110 | 35.8M | words[i] = words[i + 1] = ~UINT64_C(0); |
111 | 76.0k | words[endword] = |
112 | 76.0k | temp | (~UINT64_C(0)) >> (((~start + 1) - lenminusone - 1) % 64); |
113 | 76.0k | } Unexecuted instantiation: croaring_fuzzer_cc.cc:roaring::internal::bitset_set_lenrange(unsigned long*, unsigned int, unsigned int) roaring.c:bitset_set_lenrange Line | Count | Source | 99 | 1.43k | uint32_t lenminusone) { | 100 | 1.43k | uint32_t firstword = start / 64; | 101 | 1.43k | uint32_t endword = (start + lenminusone) / 64; | 102 | 1.43k | if (firstword == endword) { | 103 | 0 | words[firstword] |= ((~UINT64_C(0)) >> ((63 - lenminusone) % 64)) | 104 | 0 | << (start % 64); | 105 | 0 | return; | 106 | 0 | } | 107 | 1.43k | uint64_t temp = words[endword]; | 108 | 1.43k | words[firstword] |= (~UINT64_C(0)) << (start % 64); | 109 | 434k | for (uint32_t i = firstword + 1; i < endword; i += 2) | 110 | 432k | words[i] = words[i + 1] = ~UINT64_C(0); | 111 | 1.43k | words[endword] = | 112 | | temp | (~UINT64_C(0)) >> (((~start + 1) - lenminusone - 1) % 64); | 113 | 1.43k | } |
Unexecuted instantiation: roaring_array.c:bitset_set_lenrange Unexecuted instantiation: bitset_util.c:bitset_set_lenrange Unexecuted instantiation: bitset.c:bitset_set_lenrange Unexecuted instantiation: containers.c:bitset_set_lenrange convert.c:bitset_set_lenrange Line | Count | Source | 99 | 107k | uint32_t lenminusone) { | 100 | 107k | uint32_t firstword = start / 64; | 101 | 107k | uint32_t endword = (start + lenminusone) / 64; | 102 | 107k | if (firstword == endword) { | 103 | 35.6k | words[firstword] |= ((~UINT64_C(0)) >> ((63 - lenminusone) % 64)) | 104 | 35.6k | << (start % 64); | 105 | 35.6k | return; | 106 | 35.6k | } | 107 | 71.7k | uint64_t temp = words[endword]; | 108 | 71.7k | words[firstword] |= (~UINT64_C(0)) << (start % 64); | 109 | 35.5M | for (uint32_t i = firstword + 1; i < endword; i += 2) | 110 | 35.4M | words[i] = words[i + 1] = ~UINT64_C(0); | 111 | 71.7k | words[endword] = | 112 | | temp | (~UINT64_C(0)) >> (((~start + 1) - lenminusone - 1) % 64); | 113 | 71.7k | } |
Unexecuted instantiation: mixed_intersection.c:bitset_set_lenrange mixed_union.c:bitset_set_lenrange Line | Count | Source | 99 | 95.2k | uint32_t lenminusone) { | 100 | 95.2k | uint32_t firstword = start / 64; | 101 | 95.2k | uint32_t endword = (start + lenminusone) / 64; | 102 | 95.2k | if (firstword == endword) { | 103 | 92.4k | words[firstword] |= ((~UINT64_C(0)) >> ((63 - lenminusone) % 64)) | 104 | 92.4k | << (start % 64); | 105 | 92.4k | return; | 106 | 92.4k | } | 107 | 2.86k | uint64_t temp = words[endword]; | 108 | 2.86k | words[firstword] |= (~UINT64_C(0)) << (start % 64); | 109 | 2.97k | for (uint32_t i = firstword + 1; i < endword; i += 2) | 110 | 110 | words[i] = words[i + 1] = ~UINT64_C(0); | 111 | 2.86k | words[endword] = | 112 | | temp | (~UINT64_C(0)) >> (((~start + 1) - lenminusone - 1) % 64); | 113 | 2.86k | } |
Unexecuted instantiation: mixed_negation.c:bitset_set_lenrange Unexecuted instantiation: mixed_xor.c:bitset_set_lenrange Unexecuted instantiation: mixed_andnot.c:bitset_set_lenrange Unexecuted instantiation: roaring64.c:bitset_set_lenrange |
114 | | |
115 | | /* |
116 | | * Flip all the bits in indexes [begin,end). |
117 | | */ |
118 | | static inline void bitset_flip_range(uint64_t *words, uint32_t start, |
119 | 53.2k | uint32_t end) { |
120 | 53.2k | if (start == end) return; |
121 | 53.2k | uint32_t firstword = start / 64; |
122 | 53.2k | uint32_t endword = (end - 1) / 64; |
123 | 53.2k | words[firstword] ^= ~((~UINT64_C(0)) << (start % 64)); |
124 | 317k | for (uint32_t i = firstword; i < endword; i++) { |
125 | 264k | words[i] = ~words[i]; |
126 | 264k | } |
127 | 53.2k | words[endword] ^= ((~UINT64_C(0)) >> ((~end + 1) % 64)); |
128 | 53.2k | } Unexecuted instantiation: croaring_fuzzer_cc.cc:roaring::internal::bitset_flip_range(unsigned long*, unsigned int, unsigned int) Unexecuted instantiation: roaring.c:bitset_flip_range Unexecuted instantiation: roaring_array.c:bitset_flip_range Unexecuted instantiation: bitset_util.c:bitset_flip_range Unexecuted instantiation: bitset.c:bitset_flip_range Unexecuted instantiation: containers.c:bitset_flip_range Unexecuted instantiation: convert.c:bitset_flip_range Unexecuted instantiation: mixed_intersection.c:bitset_flip_range Unexecuted instantiation: mixed_union.c:bitset_flip_range mixed_negation.c:bitset_flip_range Line | Count | Source | 119 | 1.43k | uint32_t end) { | 120 | 1.43k | if (start == end) return; | 121 | 1.43k | uint32_t firstword = start / 64; | 122 | 1.43k | uint32_t endword = (end - 1) / 64; | 123 | 1.43k | words[firstword] ^= ~((~UINT64_C(0)) << (start % 64)); | 124 | 206k | for (uint32_t i = firstword; i < endword; i++) { | 125 | 204k | words[i] = ~words[i]; | 126 | 204k | } | 127 | | words[endword] ^= ((~UINT64_C(0)) >> ((~end + 1) % 64)); | 128 | 1.43k | } |
mixed_xor.c:bitset_flip_range Line | Count | Source | 119 | 51.7k | uint32_t end) { | 120 | 51.7k | if (start == end) return; | 121 | 51.7k | uint32_t firstword = start / 64; | 122 | 51.7k | uint32_t endword = (end - 1) / 64; | 123 | 51.7k | words[firstword] ^= ~((~UINT64_C(0)) << (start % 64)); | 124 | 111k | for (uint32_t i = firstword; i < endword; i++) { | 125 | 60.0k | words[i] = ~words[i]; | 126 | 60.0k | } | 127 | | words[endword] ^= ((~UINT64_C(0)) >> ((~end + 1) % 64)); | 128 | 51.7k | } |
Unexecuted instantiation: mixed_andnot.c:bitset_flip_range Unexecuted instantiation: roaring64.c:bitset_flip_range |
129 | | |
130 | | /* |
131 | | * Set all bits in indexes [begin,end) to false. |
132 | | */ |
133 | | static inline void bitset_reset_range(uint64_t *words, uint32_t start, |
134 | 1.21k | uint32_t end) { |
135 | 1.21k | if (start == end) return; |
136 | 1.21k | uint32_t firstword = start / 64; |
137 | 1.21k | uint32_t endword = (end - 1) / 64; |
138 | 1.21k | if (firstword == endword) { |
139 | 876 | words[firstword] &= ~(((~UINT64_C(0)) << (start % 64)) & |
140 | 876 | ((~UINT64_C(0)) >> ((~end + 1) % 64))); |
141 | 876 | return; |
142 | 876 | } |
143 | 339 | words[firstword] &= ~((~UINT64_C(0)) << (start % 64)); |
144 | 139k | for (uint32_t i = firstword + 1; i < endword; i++) { |
145 | 139k | words[i] = UINT64_C(0); |
146 | 139k | } |
147 | 339 | words[endword] &= ~((~UINT64_C(0)) >> ((~end + 1) % 64)); |
148 | 339 | } Unexecuted instantiation: croaring_fuzzer_cc.cc:roaring::internal::bitset_reset_range(unsigned long*, unsigned int, unsigned int) roaring.c:bitset_reset_range Line | Count | Source | 134 | 1.21k | uint32_t end) { | 135 | 1.21k | if (start == end) return; | 136 | 1.21k | uint32_t firstword = start / 64; | 137 | 1.21k | uint32_t endword = (end - 1) / 64; | 138 | 1.21k | if (firstword == endword) { | 139 | 876 | words[firstword] &= ~(((~UINT64_C(0)) << (start % 64)) & | 140 | 876 | ((~UINT64_C(0)) >> ((~end + 1) % 64))); | 141 | 876 | return; | 142 | 876 | } | 143 | 339 | words[firstword] &= ~((~UINT64_C(0)) << (start % 64)); | 144 | 139k | for (uint32_t i = firstword + 1; i < endword; i++) { | 145 | 139k | words[i] = UINT64_C(0); | 146 | 139k | } | 147 | | words[endword] &= ~((~UINT64_C(0)) >> ((~end + 1) % 64)); | 148 | 339 | } |
Unexecuted instantiation: roaring_array.c:bitset_reset_range Unexecuted instantiation: bitset_util.c:bitset_reset_range Unexecuted instantiation: bitset.c:bitset_reset_range Unexecuted instantiation: containers.c:bitset_reset_range Unexecuted instantiation: convert.c:bitset_reset_range Unexecuted instantiation: mixed_intersection.c:bitset_reset_range Unexecuted instantiation: mixed_union.c:bitset_reset_range Unexecuted instantiation: mixed_negation.c:bitset_reset_range Unexecuted instantiation: mixed_xor.c:bitset_reset_range Unexecuted instantiation: mixed_andnot.c:bitset_reset_range Unexecuted instantiation: roaring64.c:bitset_reset_range |
149 | | |
150 | | /* |
151 | | * Given a bitset containing "length" 64-bit words, write out the position |
152 | | * of all the set bits to "out", values start at "base". |
153 | | * |
154 | | * The "out" pointer should be sufficient to store the actual number of bits |
155 | | * set. |
156 | | * |
157 | | * Returns how many values were actually decoded. |
158 | | * |
159 | | * This function should only be expected to be faster than |
160 | | * bitset_extract_setbits |
161 | | * when the density of the bitset is high. |
162 | | * |
163 | | * This function uses AVX2 decoding. |
164 | | */ |
165 | | size_t bitset_extract_setbits_avx2(const uint64_t *words, size_t length, |
166 | | uint32_t *out, size_t outcapacity, |
167 | | uint32_t base); |
168 | | |
169 | | size_t bitset_extract_setbits_avx512(const uint64_t *words, size_t length, |
170 | | uint32_t *out, size_t outcapacity, |
171 | | uint32_t base); |
172 | | /* |
173 | | * Given a bitset containing "length" 64-bit words, write out the position |
174 | | * of all the set bits to "out", values start at "base". |
175 | | * |
176 | | * The "out" pointer should be sufficient to store the actual number of bits |
177 | | *set. |
178 | | * |
179 | | * Returns how many values were actually decoded. |
180 | | */ |
181 | | size_t bitset_extract_setbits(const uint64_t *words, size_t length, |
182 | | uint32_t *out, uint32_t base); |
183 | | |
184 | | /* |
185 | | * Given a bitset containing "length" 64-bit words, write out the position |
186 | | * of all the set bits to "out" as 16-bit integers, values start at "base" (can |
187 | | *be set to zero) |
188 | | * |
189 | | * The "out" pointer should be sufficient to store the actual number of bits |
190 | | *set. |
191 | | * |
192 | | * Returns how many values were actually decoded. |
193 | | * |
194 | | * This function should only be expected to be faster than |
195 | | *bitset_extract_setbits_uint16 |
196 | | * when the density of the bitset is high. |
197 | | * |
198 | | * This function uses SSE decoding. |
199 | | */ |
200 | | size_t bitset_extract_setbits_sse_uint16(const uint64_t *words, size_t length, |
201 | | uint16_t *out, size_t outcapacity, |
202 | | uint16_t base); |
203 | | |
204 | | size_t bitset_extract_setbits_avx512_uint16(const uint64_t *words, |
205 | | size_t length, uint16_t *out, |
206 | | size_t outcapacity, uint16_t base); |
207 | | |
208 | | /* |
209 | | * Given a bitset containing "length" 64-bit words, write out the position |
210 | | * of all the set bits to "out", values start at "base" |
211 | | * (can be set to zero) |
212 | | * |
213 | | * The "out" pointer should be sufficient to store the actual number of bits |
214 | | *set. |
215 | | * |
216 | | * Returns how many values were actually decoded. |
217 | | */ |
218 | | size_t bitset_extract_setbits_uint16(const uint64_t *words, size_t length, |
219 | | uint16_t *out, uint16_t base); |
220 | | |
221 | | /* |
222 | | * Given two bitsets containing "length" 64-bit words, write out the position |
223 | | * of all the common set bits to "out", values start at "base" |
224 | | * (can be set to zero) |
225 | | * |
226 | | * The "out" pointer should be sufficient to store the actual number of bits |
227 | | * set. |
228 | | * |
229 | | * Returns how many values were actually decoded. |
230 | | */ |
231 | | size_t bitset_extract_intersection_setbits_uint16( |
232 | | const uint64_t *__restrict__ words1, const uint64_t *__restrict__ words2, |
233 | | size_t length, uint16_t *out, uint16_t base); |
234 | | |
235 | | /* |
236 | | * Given a bitset having cardinality card, set all bit values in the list (there |
237 | | * are length of them) |
238 | | * and return the updated cardinality. This evidently assumes that the bitset |
239 | | * already contained data. |
240 | | */ |
241 | | uint64_t bitset_set_list_withcard(uint64_t *words, uint64_t card, |
242 | | const uint16_t *list, uint64_t length); |
243 | | /* |
244 | | * Given a bitset, set all bit values in the list (there |
245 | | * are length of them). |
246 | | */ |
247 | | void bitset_set_list(uint64_t *words, const uint16_t *list, uint64_t length); |
248 | | |
249 | | /* |
250 | | * Given a bitset having cardinality card, unset all bit values in the list |
251 | | * (there are length of them) |
252 | | * and return the updated cardinality. This evidently assumes that the bitset |
253 | | * already contained data. |
254 | | */ |
255 | | uint64_t bitset_clear_list(uint64_t *words, uint64_t card, const uint16_t *list, |
256 | | uint64_t length); |
257 | | |
258 | | /* |
259 | | * Given a bitset having cardinality card, toggle all bit values in the list |
260 | | * (there are length of them) |
261 | | * and return the updated cardinality. This evidently assumes that the bitset |
262 | | * already contained data. |
263 | | */ |
264 | | |
265 | | uint64_t bitset_flip_list_withcard(uint64_t *words, uint64_t card, |
266 | | const uint16_t *list, uint64_t length); |
267 | | |
268 | | void bitset_flip_list(uint64_t *words, const uint16_t *list, uint64_t length); |
269 | | |
270 | | #if CROARING_IS_X64 |
271 | | /*** |
272 | | * BEGIN Harley-Seal popcount functions. |
273 | | */ |
274 | | CROARING_TARGET_AVX2 |
275 | | /** |
276 | | * Compute the population count of a 256-bit word |
277 | | * This is not especially fast, but it is convenient as part of other functions. |
278 | | */ |
279 | 121k | static inline __m256i popcount256(__m256i v) { |
280 | 121k | const __m256i lookuppos = _mm256_setr_epi8( |
281 | 121k | /* 0 */ 4 + 0, /* 1 */ 4 + 1, /* 2 */ 4 + 1, /* 3 */ 4 + 2, |
282 | 121k | /* 4 */ 4 + 1, /* 5 */ 4 + 2, /* 6 */ 4 + 2, /* 7 */ 4 + 3, |
283 | 121k | /* 8 */ 4 + 1, /* 9 */ 4 + 2, /* a */ 4 + 2, /* b */ 4 + 3, |
284 | 121k | /* c */ 4 + 2, /* d */ 4 + 3, /* e */ 4 + 3, /* f */ 4 + 4, |
285 | | |
286 | | /* 0 */ 4 + 0, /* 1 */ 4 + 1, /* 2 */ 4 + 1, /* 3 */ 4 + 2, |
287 | 121k | /* 4 */ 4 + 1, /* 5 */ 4 + 2, /* 6 */ 4 + 2, /* 7 */ 4 + 3, |
288 | 121k | /* 8 */ 4 + 1, /* 9 */ 4 + 2, /* a */ 4 + 2, /* b */ 4 + 3, |
289 | 121k | /* c */ 4 + 2, /* d */ 4 + 3, /* e */ 4 + 3, /* f */ 4 + 4); |
290 | 121k | const __m256i lookupneg = _mm256_setr_epi8( |
291 | 121k | /* 0 */ 4 - 0, /* 1 */ 4 - 1, /* 2 */ 4 - 1, /* 3 */ 4 - 2, |
292 | 121k | /* 4 */ 4 - 1, /* 5 */ 4 - 2, /* 6 */ 4 - 2, /* 7 */ 4 - 3, |
293 | 121k | /* 8 */ 4 - 1, /* 9 */ 4 - 2, /* a */ 4 - 2, /* b */ 4 - 3, |
294 | 121k | /* c */ 4 - 2, /* d */ 4 - 3, /* e */ 4 - 3, /* f */ 4 - 4, |
295 | | |
296 | | /* 0 */ 4 - 0, /* 1 */ 4 - 1, /* 2 */ 4 - 1, /* 3 */ 4 - 2, |
297 | 121k | /* 4 */ 4 - 1, /* 5 */ 4 - 2, /* 6 */ 4 - 2, /* 7 */ 4 - 3, |
298 | 121k | /* 8 */ 4 - 1, /* 9 */ 4 - 2, /* a */ 4 - 2, /* b */ 4 - 3, |
299 | 121k | /* c */ 4 - 2, /* d */ 4 - 3, /* e */ 4 - 3, /* f */ 4 - 4); |
300 | 121k | const __m256i low_mask = _mm256_set1_epi8(0x0f); |
301 | | |
302 | 121k | const __m256i lo = _mm256_and_si256(v, low_mask); |
303 | 121k | const __m256i hi = _mm256_and_si256(_mm256_srli_epi16(v, 4), low_mask); |
304 | 121k | const __m256i popcnt1 = _mm256_shuffle_epi8(lookuppos, lo); |
305 | 121k | const __m256i popcnt2 = _mm256_shuffle_epi8(lookupneg, hi); |
306 | 121k | return _mm256_sad_epu8(popcnt1, popcnt2); |
307 | 121k | } Unexecuted instantiation: croaring_fuzzer_cc.cc:roaring::internal::popcount256(long long __vector(4)) Unexecuted instantiation: roaring.c:popcount256 Unexecuted instantiation: roaring_array.c:popcount256 Unexecuted instantiation: bitset_util.c:popcount256 Line | Count | Source | 279 | 121k | static inline __m256i popcount256(__m256i v) { | 280 | 121k | const __m256i lookuppos = _mm256_setr_epi8( | 281 | 121k | /* 0 */ 4 + 0, /* 1 */ 4 + 1, /* 2 */ 4 + 1, /* 3 */ 4 + 2, | 282 | 121k | /* 4 */ 4 + 1, /* 5 */ 4 + 2, /* 6 */ 4 + 2, /* 7 */ 4 + 3, | 283 | 121k | /* 8 */ 4 + 1, /* 9 */ 4 + 2, /* a */ 4 + 2, /* b */ 4 + 3, | 284 | 121k | /* c */ 4 + 2, /* d */ 4 + 3, /* e */ 4 + 3, /* f */ 4 + 4, | 285 | | | 286 | | /* 0 */ 4 + 0, /* 1 */ 4 + 1, /* 2 */ 4 + 1, /* 3 */ 4 + 2, | 287 | 121k | /* 4 */ 4 + 1, /* 5 */ 4 + 2, /* 6 */ 4 + 2, /* 7 */ 4 + 3, | 288 | 121k | /* 8 */ 4 + 1, /* 9 */ 4 + 2, /* a */ 4 + 2, /* b */ 4 + 3, | 289 | 121k | /* c */ 4 + 2, /* d */ 4 + 3, /* e */ 4 + 3, /* f */ 4 + 4); | 290 | 121k | const __m256i lookupneg = _mm256_setr_epi8( | 291 | 121k | /* 0 */ 4 - 0, /* 1 */ 4 - 1, /* 2 */ 4 - 1, /* 3 */ 4 - 2, | 292 | 121k | /* 4 */ 4 - 1, /* 5 */ 4 - 2, /* 6 */ 4 - 2, /* 7 */ 4 - 3, | 293 | 121k | /* 8 */ 4 - 1, /* 9 */ 4 - 2, /* a */ 4 - 2, /* b */ 4 - 3, | 294 | 121k | /* c */ 4 - 2, /* d */ 4 - 3, /* e */ 4 - 3, /* f */ 4 - 4, | 295 | | | 296 | | /* 0 */ 4 - 0, /* 1 */ 4 - 1, /* 2 */ 4 - 1, /* 3 */ 4 - 2, | 297 | 121k | /* 4 */ 4 - 1, /* 5 */ 4 - 2, /* 6 */ 4 - 2, /* 7 */ 4 - 3, | 298 | 121k | /* 8 */ 4 - 1, /* 9 */ 4 - 2, /* a */ 4 - 2, /* b */ 4 - 3, | 299 | 121k | /* c */ 4 - 2, /* d */ 4 - 3, /* e */ 4 - 3, /* f */ 4 - 4); | 300 | 121k | const __m256i low_mask = _mm256_set1_epi8(0x0f); | 301 | | | 302 | 121k | const __m256i lo = _mm256_and_si256(v, low_mask); | 303 | 121k | const __m256i hi = _mm256_and_si256(_mm256_srli_epi16(v, 4), low_mask); | 304 | 121k | const __m256i popcnt1 = _mm256_shuffle_epi8(lookuppos, lo); | 305 | 121k | const __m256i popcnt2 = _mm256_shuffle_epi8(lookupneg, hi); | 306 | 121k | return _mm256_sad_epu8(popcnt1, popcnt2); | 307 | 121k | } |
Unexecuted instantiation: containers.c:popcount256 Unexecuted instantiation: convert.c:popcount256 Unexecuted instantiation: mixed_intersection.c:popcount256 Unexecuted instantiation: mixed_union.c:popcount256 Unexecuted instantiation: mixed_negation.c:popcount256 Unexecuted instantiation: mixed_xor.c:popcount256 Unexecuted instantiation: mixed_andnot.c:popcount256 Unexecuted instantiation: roaring64.c:popcount256 |
308 | | CROARING_UNTARGET_AVX2 |
309 | | |
310 | | CROARING_TARGET_AVX2 |
311 | | /** |
312 | | * Simple CSA over 256 bits |
313 | | */ |
314 | | static inline void CSA(__m256i *h, __m256i *l, __m256i a, __m256i b, |
315 | 1.45M | __m256i c) { |
316 | 1.45M | const __m256i u = _mm256_xor_si256(a, b); |
317 | 1.45M | *h = _mm256_or_si256(_mm256_and_si256(a, b), _mm256_and_si256(u, c)); |
318 | 1.45M | *l = _mm256_xor_si256(u, c); |
319 | 1.45M | } Unexecuted instantiation: croaring_fuzzer_cc.cc:roaring::internal::CSA(long long __vector(4)*, long long __vector(4)*, long long __vector(4), long long __vector(4), long long __vector(4)) Unexecuted instantiation: roaring.c:CSA Unexecuted instantiation: roaring_array.c:CSA Unexecuted instantiation: bitset_util.c:CSA Line | Count | Source | 315 | 1.45M | __m256i c) { | 316 | 1.45M | const __m256i u = _mm256_xor_si256(a, b); | 317 | 1.45M | *h = _mm256_or_si256(_mm256_and_si256(a, b), _mm256_and_si256(u, c)); | 318 | 1.45M | *l = _mm256_xor_si256(u, c); | 319 | 1.45M | } |
Unexecuted instantiation: containers.c:CSA Unexecuted instantiation: convert.c:CSA Unexecuted instantiation: mixed_intersection.c:CSA Unexecuted instantiation: mixed_union.c:CSA Unexecuted instantiation: mixed_negation.c:CSA Unexecuted instantiation: mixed_xor.c:CSA Unexecuted instantiation: mixed_andnot.c:CSA Unexecuted instantiation: roaring64.c:CSA |
320 | | CROARING_UNTARGET_AVX2 |
321 | | |
322 | | CROARING_TARGET_AVX2 |
323 | | /** |
324 | | * Fast Harley-Seal AVX population count function |
325 | | */ |
326 | | inline static uint64_t avx2_harley_seal_popcount256(const __m256i *data, |
327 | 3.29k | const uint64_t size) { |
328 | 3.29k | __m256i total = _mm256_setzero_si256(); |
329 | 3.29k | __m256i ones = _mm256_setzero_si256(); |
330 | 3.29k | __m256i twos = _mm256_setzero_si256(); |
331 | 3.29k | __m256i fours = _mm256_setzero_si256(); |
332 | 3.29k | __m256i eights = _mm256_setzero_si256(); |
333 | 3.29k | __m256i sixteens = _mm256_setzero_si256(); |
334 | 3.29k | __m256i twosA, twosB, foursA, foursB, eightsA, eightsB; |
335 | | |
336 | 3.29k | const uint64_t limit = size - size % 16; |
337 | 3.29k | uint64_t i = 0; |
338 | | |
339 | 55.9k | for (; i < limit; i += 16) { |
340 | 52.6k | CSA(&twosA, &ones, ones, _mm256_lddqu_si256(data + i), |
341 | 52.6k | _mm256_lddqu_si256(data + i + 1)); |
342 | 52.6k | CSA(&twosB, &ones, ones, _mm256_lddqu_si256(data + i + 2), |
343 | 52.6k | _mm256_lddqu_si256(data + i + 3)); |
344 | 52.6k | CSA(&foursA, &twos, twos, twosA, twosB); |
345 | 52.6k | CSA(&twosA, &ones, ones, _mm256_lddqu_si256(data + i + 4), |
346 | 52.6k | _mm256_lddqu_si256(data + i + 5)); |
347 | 52.6k | CSA(&twosB, &ones, ones, _mm256_lddqu_si256(data + i + 6), |
348 | 52.6k | _mm256_lddqu_si256(data + i + 7)); |
349 | 52.6k | CSA(&foursB, &twos, twos, twosA, twosB); |
350 | 52.6k | CSA(&eightsA, &fours, fours, foursA, foursB); |
351 | 52.6k | CSA(&twosA, &ones, ones, _mm256_lddqu_si256(data + i + 8), |
352 | 52.6k | _mm256_lddqu_si256(data + i + 9)); |
353 | 52.6k | CSA(&twosB, &ones, ones, _mm256_lddqu_si256(data + i + 10), |
354 | 52.6k | _mm256_lddqu_si256(data + i + 11)); |
355 | 52.6k | CSA(&foursA, &twos, twos, twosA, twosB); |
356 | 52.6k | CSA(&twosA, &ones, ones, _mm256_lddqu_si256(data + i + 12), |
357 | 52.6k | _mm256_lddqu_si256(data + i + 13)); |
358 | 52.6k | CSA(&twosB, &ones, ones, _mm256_lddqu_si256(data + i + 14), |
359 | 52.6k | _mm256_lddqu_si256(data + i + 15)); |
360 | 52.6k | CSA(&foursB, &twos, twos, twosA, twosB); |
361 | 52.6k | CSA(&eightsB, &fours, fours, foursA, foursB); |
362 | 52.6k | CSA(&sixteens, &eights, eights, eightsA, eightsB); |
363 | | |
364 | 52.6k | total = _mm256_add_epi64(total, popcount256(sixteens)); |
365 | 52.6k | } |
366 | | |
367 | 3.29k | total = _mm256_slli_epi64(total, 4); // * 16 |
368 | 3.29k | total = _mm256_add_epi64( |
369 | 3.29k | total, _mm256_slli_epi64(popcount256(eights), 3)); // += 8 * ... |
370 | 3.29k | total = _mm256_add_epi64( |
371 | 3.29k | total, _mm256_slli_epi64(popcount256(fours), 2)); // += 4 * ... |
372 | 3.29k | total = _mm256_add_epi64( |
373 | 3.29k | total, _mm256_slli_epi64(popcount256(twos), 1)); // += 2 * ... |
374 | 3.29k | total = _mm256_add_epi64(total, popcount256(ones)); |
375 | 3.29k | for (; i < size; i++) |
376 | 0 | total = |
377 | 0 | _mm256_add_epi64(total, popcount256(_mm256_lddqu_si256(data + i))); |
378 | | |
379 | 3.29k | return (uint64_t)(_mm256_extract_epi64(total, 0)) + |
380 | 3.29k | (uint64_t)(_mm256_extract_epi64(total, 1)) + |
381 | 3.29k | (uint64_t)(_mm256_extract_epi64(total, 2)) + |
382 | 3.29k | (uint64_t)(_mm256_extract_epi64(total, 3)); |
383 | 3.29k | } Unexecuted instantiation: croaring_fuzzer_cc.cc:roaring::internal::avx2_harley_seal_popcount256(long long __vector(4) const*, unsigned long) Unexecuted instantiation: roaring.c:avx2_harley_seal_popcount256 Unexecuted instantiation: roaring_array.c:avx2_harley_seal_popcount256 Unexecuted instantiation: bitset_util.c:avx2_harley_seal_popcount256 bitset.c:avx2_harley_seal_popcount256 Line | Count | Source | 327 | 3.29k | const uint64_t size) { | 328 | 3.29k | __m256i total = _mm256_setzero_si256(); | 329 | 3.29k | __m256i ones = _mm256_setzero_si256(); | 330 | 3.29k | __m256i twos = _mm256_setzero_si256(); | 331 | 3.29k | __m256i fours = _mm256_setzero_si256(); | 332 | 3.29k | __m256i eights = _mm256_setzero_si256(); | 333 | 3.29k | __m256i sixteens = _mm256_setzero_si256(); | 334 | 3.29k | __m256i twosA, twosB, foursA, foursB, eightsA, eightsB; | 335 | | | 336 | 3.29k | const uint64_t limit = size - size % 16; | 337 | 3.29k | uint64_t i = 0; | 338 | | | 339 | 55.9k | for (; i < limit; i += 16) { | 340 | 52.6k | CSA(&twosA, &ones, ones, _mm256_lddqu_si256(data + i), | 341 | 52.6k | _mm256_lddqu_si256(data + i + 1)); | 342 | 52.6k | CSA(&twosB, &ones, ones, _mm256_lddqu_si256(data + i + 2), | 343 | 52.6k | _mm256_lddqu_si256(data + i + 3)); | 344 | 52.6k | CSA(&foursA, &twos, twos, twosA, twosB); | 345 | 52.6k | CSA(&twosA, &ones, ones, _mm256_lddqu_si256(data + i + 4), | 346 | 52.6k | _mm256_lddqu_si256(data + i + 5)); | 347 | 52.6k | CSA(&twosB, &ones, ones, _mm256_lddqu_si256(data + i + 6), | 348 | 52.6k | _mm256_lddqu_si256(data + i + 7)); | 349 | 52.6k | CSA(&foursB, &twos, twos, twosA, twosB); | 350 | 52.6k | CSA(&eightsA, &fours, fours, foursA, foursB); | 351 | 52.6k | CSA(&twosA, &ones, ones, _mm256_lddqu_si256(data + i + 8), | 352 | 52.6k | _mm256_lddqu_si256(data + i + 9)); | 353 | 52.6k | CSA(&twosB, &ones, ones, _mm256_lddqu_si256(data + i + 10), | 354 | 52.6k | _mm256_lddqu_si256(data + i + 11)); | 355 | 52.6k | CSA(&foursA, &twos, twos, twosA, twosB); | 356 | 52.6k | CSA(&twosA, &ones, ones, _mm256_lddqu_si256(data + i + 12), | 357 | 52.6k | _mm256_lddqu_si256(data + i + 13)); | 358 | 52.6k | CSA(&twosB, &ones, ones, _mm256_lddqu_si256(data + i + 14), | 359 | 52.6k | _mm256_lddqu_si256(data + i + 15)); | 360 | 52.6k | CSA(&foursB, &twos, twos, twosA, twosB); | 361 | 52.6k | CSA(&eightsB, &fours, fours, foursA, foursB); | 362 | 52.6k | CSA(&sixteens, &eights, eights, eightsA, eightsB); | 363 | | | 364 | 52.6k | total = _mm256_add_epi64(total, popcount256(sixteens)); | 365 | 52.6k | } | 366 | | | 367 | 3.29k | total = _mm256_slli_epi64(total, 4); // * 16 | 368 | 3.29k | total = _mm256_add_epi64( | 369 | 3.29k | total, _mm256_slli_epi64(popcount256(eights), 3)); // += 8 * ... | 370 | 3.29k | total = _mm256_add_epi64( | 371 | 3.29k | total, _mm256_slli_epi64(popcount256(fours), 2)); // += 4 * ... | 372 | 3.29k | total = _mm256_add_epi64( | 373 | 3.29k | total, _mm256_slli_epi64(popcount256(twos), 1)); // += 2 * ... | 374 | 3.29k | total = _mm256_add_epi64(total, popcount256(ones)); | 375 | 3.29k | for (; i < size; i++) | 376 | 0 | total = | 377 | 0 | _mm256_add_epi64(total, popcount256(_mm256_lddqu_si256(data + i))); | 378 | | | 379 | 3.29k | return (uint64_t)(_mm256_extract_epi64(total, 0)) + | 380 | 3.29k | (uint64_t)(_mm256_extract_epi64(total, 1)) + | 381 | 3.29k | (uint64_t)(_mm256_extract_epi64(total, 2)) + | 382 | | (uint64_t)(_mm256_extract_epi64(total, 3)); | 383 | 3.29k | } |
Unexecuted instantiation: containers.c:avx2_harley_seal_popcount256 Unexecuted instantiation: convert.c:avx2_harley_seal_popcount256 Unexecuted instantiation: mixed_intersection.c:avx2_harley_seal_popcount256 Unexecuted instantiation: mixed_union.c:avx2_harley_seal_popcount256 Unexecuted instantiation: mixed_negation.c:avx2_harley_seal_popcount256 Unexecuted instantiation: mixed_xor.c:avx2_harley_seal_popcount256 Unexecuted instantiation: mixed_andnot.c:avx2_harley_seal_popcount256 Unexecuted instantiation: roaring64.c:avx2_harley_seal_popcount256 |
384 | | CROARING_UNTARGET_AVX2 |
385 | | |
386 | | #define CROARING_AVXPOPCNTFNC(opname, avx_intrinsic) \ |
387 | | static inline uint64_t avx2_harley_seal_popcount256_##opname( \ |
388 | 1.38k | const __m256i *data1, const __m256i *data2, const uint64_t size) { \ |
389 | 1.38k | __m256i total = _mm256_setzero_si256(); \ |
390 | 1.38k | __m256i ones = _mm256_setzero_si256(); \ |
391 | 1.38k | __m256i twos = _mm256_setzero_si256(); \ |
392 | 1.38k | __m256i fours = _mm256_setzero_si256(); \ |
393 | 1.38k | __m256i eights = _mm256_setzero_si256(); \ |
394 | 1.38k | __m256i sixteens = _mm256_setzero_si256(); \ |
395 | 1.38k | __m256i twosA, twosB, foursA, foursB, eightsA, eightsB; \ |
396 | 1.38k | __m256i A1, A2; \ |
397 | 1.38k | const uint64_t limit = size - size % 16; \ |
398 | 1.38k | uint64_t i = 0; \ |
399 | 23.5k | for (; i < limit; i += 16) { \ |
400 | 22.2k | A1 = avx_intrinsic(_mm256_lddqu_si256(data1 + i), \ |
401 | 22.2k | _mm256_lddqu_si256(data2 + i)); \ |
402 | 22.2k | A2 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 1), \ |
403 | 22.2k | _mm256_lddqu_si256(data2 + i + 1)); \ |
404 | 22.2k | CSA(&twosA, &ones, ones, A1, A2); \ |
405 | 22.2k | A1 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 2), \ |
406 | 22.2k | _mm256_lddqu_si256(data2 + i + 2)); \ |
407 | 22.2k | A2 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 3), \ |
408 | 22.2k | _mm256_lddqu_si256(data2 + i + 3)); \ |
409 | 22.2k | CSA(&twosB, &ones, ones, A1, A2); \ |
410 | 22.2k | CSA(&foursA, &twos, twos, twosA, twosB); \ |
411 | 22.2k | A1 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 4), \ |
412 | 22.2k | _mm256_lddqu_si256(data2 + i + 4)); \ |
413 | 22.2k | A2 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 5), \ |
414 | 22.2k | _mm256_lddqu_si256(data2 + i + 5)); \ |
415 | 22.2k | CSA(&twosA, &ones, ones, A1, A2); \ |
416 | 22.2k | A1 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 6), \ |
417 | 22.2k | _mm256_lddqu_si256(data2 + i + 6)); \ |
418 | 22.2k | A2 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 7), \ |
419 | 22.2k | _mm256_lddqu_si256(data2 + i + 7)); \ |
420 | 22.2k | CSA(&twosB, &ones, ones, A1, A2); \ |
421 | 22.2k | CSA(&foursB, &twos, twos, twosA, twosB); \ |
422 | 22.2k | CSA(&eightsA, &fours, fours, foursA, foursB); \ |
423 | 22.2k | A1 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 8), \ |
424 | 22.2k | _mm256_lddqu_si256(data2 + i + 8)); \ |
425 | 22.2k | A2 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 9), \ |
426 | 22.2k | _mm256_lddqu_si256(data2 + i + 9)); \ |
427 | 22.2k | CSA(&twosA, &ones, ones, A1, A2); \ |
428 | 22.2k | A1 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 10), \ |
429 | 22.2k | _mm256_lddqu_si256(data2 + i + 10)); \ |
430 | 22.2k | A2 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 11), \ |
431 | 22.2k | _mm256_lddqu_si256(data2 + i + 11)); \ |
432 | 22.2k | CSA(&twosB, &ones, ones, A1, A2); \ |
433 | 22.2k | CSA(&foursA, &twos, twos, twosA, twosB); \ |
434 | 22.2k | A1 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 12), \ |
435 | 22.2k | _mm256_lddqu_si256(data2 + i + 12)); \ |
436 | 22.2k | A2 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 13), \ |
437 | 22.2k | _mm256_lddqu_si256(data2 + i + 13)); \ |
438 | 22.2k | CSA(&twosA, &ones, ones, A1, A2); \ |
439 | 22.2k | A1 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 14), \ |
440 | 22.2k | _mm256_lddqu_si256(data2 + i + 14)); \ |
441 | 22.2k | A2 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 15), \ |
442 | 22.2k | _mm256_lddqu_si256(data2 + i + 15)); \ |
443 | 22.2k | CSA(&twosB, &ones, ones, A1, A2); \ |
444 | 22.2k | CSA(&foursB, &twos, twos, twosA, twosB); \ |
445 | 22.2k | CSA(&eightsB, &fours, fours, foursA, foursB); \ |
446 | 22.2k | CSA(&sixteens, &eights, eights, eightsA, eightsB); \ |
447 | 22.2k | total = _mm256_add_epi64(total, popcount256(sixteens)); \ |
448 | 22.2k | } \ |
449 | 1.38k | total = _mm256_slli_epi64(total, 4); \ |
450 | 1.38k | total = _mm256_add_epi64(total, \ |
451 | 1.38k | _mm256_slli_epi64(popcount256(eights), 3)); \ |
452 | 1.38k | total = \ |
453 | 1.38k | _mm256_add_epi64(total, _mm256_slli_epi64(popcount256(fours), 2)); \ |
454 | 1.38k | total = \ |
455 | 1.38k | _mm256_add_epi64(total, _mm256_slli_epi64(popcount256(twos), 1)); \ |
456 | 1.38k | total = _mm256_add_epi64(total, popcount256(ones)); \ |
457 | 1.38k | for (; i < size; i++) { \ |
458 | 0 | A1 = avx_intrinsic(_mm256_lddqu_si256(data1 + i), \ |
459 | 0 | _mm256_lddqu_si256(data2 + i)); \ |
460 | 0 | total = _mm256_add_epi64(total, popcount256(A1)); \ |
461 | 0 | } \ |
462 | 1.38k | return (uint64_t)(_mm256_extract_epi64(total, 0)) + \ |
463 | 1.38k | (uint64_t)(_mm256_extract_epi64(total, 1)) + \ |
464 | 1.38k | (uint64_t)(_mm256_extract_epi64(total, 2)) + \ |
465 | 1.38k | (uint64_t)(_mm256_extract_epi64(total, 3)); \ |
466 | 1.38k | } \ Unexecuted instantiation: croaring_fuzzer_cc.cc:roaring::internal::avx2_harley_seal_popcount256_or(long long __vector(4) const*, long long __vector(4) const*, unsigned long) Unexecuted instantiation: croaring_fuzzer_cc.cc:roaring::internal::avx2_harley_seal_popcount256_union(long long __vector(4) const*, long long __vector(4) const*, unsigned long) Unexecuted instantiation: croaring_fuzzer_cc.cc:roaring::internal::avx2_harley_seal_popcount256_and(long long __vector(4) const*, long long __vector(4) const*, unsigned long) Unexecuted instantiation: croaring_fuzzer_cc.cc:roaring::internal::avx2_harley_seal_popcount256_intersection(long long __vector(4) const*, long long __vector(4) const*, unsigned long) Unexecuted instantiation: croaring_fuzzer_cc.cc:roaring::internal::avx2_harley_seal_popcount256_xor(long long __vector(4) const*, long long __vector(4) const*, unsigned long) Unexecuted instantiation: croaring_fuzzer_cc.cc:roaring::internal::avx2_harley_seal_popcount256_andnot(long long __vector(4) const*, long long __vector(4) const*, unsigned long) Unexecuted instantiation: roaring.c:avx2_harley_seal_popcount256_or Unexecuted instantiation: roaring.c:avx2_harley_seal_popcount256_union Unexecuted instantiation: roaring.c:avx2_harley_seal_popcount256_and Unexecuted instantiation: roaring.c:avx2_harley_seal_popcount256_intersection Unexecuted instantiation: roaring.c:avx2_harley_seal_popcount256_xor Unexecuted instantiation: roaring.c:avx2_harley_seal_popcount256_andnot Unexecuted instantiation: roaring_array.c:avx2_harley_seal_popcount256_or Unexecuted instantiation: roaring_array.c:avx2_harley_seal_popcount256_union Unexecuted instantiation: roaring_array.c:avx2_harley_seal_popcount256_and Unexecuted instantiation: roaring_array.c:avx2_harley_seal_popcount256_intersection Unexecuted instantiation: roaring_array.c:avx2_harley_seal_popcount256_xor Unexecuted instantiation: roaring_array.c:avx2_harley_seal_popcount256_andnot Unexecuted instantiation: bitset_util.c:avx2_harley_seal_popcount256_or Unexecuted instantiation: bitset_util.c:avx2_harley_seal_popcount256_union Unexecuted instantiation: bitset_util.c:avx2_harley_seal_popcount256_and Unexecuted instantiation: bitset_util.c:avx2_harley_seal_popcount256_intersection Unexecuted instantiation: bitset_util.c:avx2_harley_seal_popcount256_xor Unexecuted instantiation: bitset_util.c:avx2_harley_seal_popcount256_andnot Unexecuted instantiation: containers.c:avx2_harley_seal_popcount256_or Unexecuted instantiation: containers.c:avx2_harley_seal_popcount256_union Unexecuted instantiation: containers.c:avx2_harley_seal_popcount256_and Unexecuted instantiation: containers.c:avx2_harley_seal_popcount256_intersection Unexecuted instantiation: containers.c:avx2_harley_seal_popcount256_xor Unexecuted instantiation: containers.c:avx2_harley_seal_popcount256_andnot Unexecuted instantiation: convert.c:avx2_harley_seal_popcount256_or Unexecuted instantiation: convert.c:avx2_harley_seal_popcount256_union Unexecuted instantiation: convert.c:avx2_harley_seal_popcount256_and Unexecuted instantiation: convert.c:avx2_harley_seal_popcount256_intersection Unexecuted instantiation: convert.c:avx2_harley_seal_popcount256_xor Unexecuted instantiation: convert.c:avx2_harley_seal_popcount256_andnot Unexecuted instantiation: mixed_intersection.c:avx2_harley_seal_popcount256_or Unexecuted instantiation: mixed_intersection.c:avx2_harley_seal_popcount256_union Unexecuted instantiation: mixed_intersection.c:avx2_harley_seal_popcount256_and Unexecuted instantiation: mixed_intersection.c:avx2_harley_seal_popcount256_intersection Unexecuted instantiation: mixed_intersection.c:avx2_harley_seal_popcount256_xor Unexecuted instantiation: mixed_intersection.c:avx2_harley_seal_popcount256_andnot Unexecuted instantiation: mixed_union.c:avx2_harley_seal_popcount256_or Unexecuted instantiation: mixed_union.c:avx2_harley_seal_popcount256_union Unexecuted instantiation: mixed_union.c:avx2_harley_seal_popcount256_and Unexecuted instantiation: mixed_union.c:avx2_harley_seal_popcount256_intersection Unexecuted instantiation: mixed_union.c:avx2_harley_seal_popcount256_xor Unexecuted instantiation: mixed_union.c:avx2_harley_seal_popcount256_andnot Unexecuted instantiation: mixed_negation.c:avx2_harley_seal_popcount256_or Unexecuted instantiation: mixed_negation.c:avx2_harley_seal_popcount256_union Unexecuted instantiation: mixed_negation.c:avx2_harley_seal_popcount256_and Unexecuted instantiation: mixed_negation.c:avx2_harley_seal_popcount256_intersection Unexecuted instantiation: mixed_negation.c:avx2_harley_seal_popcount256_xor Unexecuted instantiation: mixed_negation.c:avx2_harley_seal_popcount256_andnot Unexecuted instantiation: mixed_xor.c:avx2_harley_seal_popcount256_or Unexecuted instantiation: mixed_xor.c:avx2_harley_seal_popcount256_union Unexecuted instantiation: mixed_xor.c:avx2_harley_seal_popcount256_and Unexecuted instantiation: mixed_xor.c:avx2_harley_seal_popcount256_intersection Unexecuted instantiation: mixed_xor.c:avx2_harley_seal_popcount256_xor Unexecuted instantiation: mixed_xor.c:avx2_harley_seal_popcount256_andnot Unexecuted instantiation: mixed_andnot.c:avx2_harley_seal_popcount256_or Unexecuted instantiation: mixed_andnot.c:avx2_harley_seal_popcount256_union Unexecuted instantiation: mixed_andnot.c:avx2_harley_seal_popcount256_and Unexecuted instantiation: mixed_andnot.c:avx2_harley_seal_popcount256_intersection Unexecuted instantiation: mixed_andnot.c:avx2_harley_seal_popcount256_xor Unexecuted instantiation: mixed_andnot.c:avx2_harley_seal_popcount256_andnot Unexecuted instantiation: roaring64.c:avx2_harley_seal_popcount256_or Unexecuted instantiation: roaring64.c:avx2_harley_seal_popcount256_union Unexecuted instantiation: roaring64.c:avx2_harley_seal_popcount256_and Unexecuted instantiation: roaring64.c:avx2_harley_seal_popcount256_intersection Unexecuted instantiation: roaring64.c:avx2_harley_seal_popcount256_xor Unexecuted instantiation: roaring64.c:avx2_harley_seal_popcount256_andnot |
467 | | static inline uint64_t avx2_harley_seal_popcount256andstore_##opname( \ |
468 | | const __m256i *__restrict__ data1, const __m256i *__restrict__ data2, \ |
469 | 1.39k | __m256i *__restrict__ out, const uint64_t size) { \ |
470 | 1.39k | __m256i total = _mm256_setzero_si256(); \ |
471 | 1.39k | __m256i ones = _mm256_setzero_si256(); \ |
472 | 1.39k | __m256i twos = _mm256_setzero_si256(); \ |
473 | 1.39k | __m256i fours = _mm256_setzero_si256(); \ |
474 | 1.39k | __m256i eights = _mm256_setzero_si256(); \ |
475 | 1.39k | __m256i sixteens = _mm256_setzero_si256(); \ |
476 | 1.39k | __m256i twosA, twosB, foursA, foursB, eightsA, eightsB; \ |
477 | 1.39k | __m256i A1, A2; \ |
478 | 1.39k | const uint64_t limit = size - size % 16; \ |
479 | 1.39k | uint64_t i = 0; \ |
480 | 23.6k | for (; i < limit; i += 16) { \ |
481 | 22.2k | A1 = avx_intrinsic(_mm256_lddqu_si256(data1 + i), \ |
482 | 22.2k | _mm256_lddqu_si256(data2 + i)); \ |
483 | 22.2k | _mm256_storeu_si256(out + i, A1); \ |
484 | 22.2k | A2 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 1), \ |
485 | 22.2k | _mm256_lddqu_si256(data2 + i + 1)); \ |
486 | 22.2k | _mm256_storeu_si256(out + i + 1, A2); \ |
487 | 22.2k | CSA(&twosA, &ones, ones, A1, A2); \ |
488 | 22.2k | A1 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 2), \ |
489 | 22.2k | _mm256_lddqu_si256(data2 + i + 2)); \ |
490 | 22.2k | _mm256_storeu_si256(out + i + 2, A1); \ |
491 | 22.2k | A2 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 3), \ |
492 | 22.2k | _mm256_lddqu_si256(data2 + i + 3)); \ |
493 | 22.2k | _mm256_storeu_si256(out + i + 3, A2); \ |
494 | 22.2k | CSA(&twosB, &ones, ones, A1, A2); \ |
495 | 22.2k | CSA(&foursA, &twos, twos, twosA, twosB); \ |
496 | 22.2k | A1 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 4), \ |
497 | 22.2k | _mm256_lddqu_si256(data2 + i + 4)); \ |
498 | 22.2k | _mm256_storeu_si256(out + i + 4, A1); \ |
499 | 22.2k | A2 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 5), \ |
500 | 22.2k | _mm256_lddqu_si256(data2 + i + 5)); \ |
501 | 22.2k | _mm256_storeu_si256(out + i + 5, A2); \ |
502 | 22.2k | CSA(&twosA, &ones, ones, A1, A2); \ |
503 | 22.2k | A1 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 6), \ |
504 | 22.2k | _mm256_lddqu_si256(data2 + i + 6)); \ |
505 | 22.2k | _mm256_storeu_si256(out + i + 6, A1); \ |
506 | 22.2k | A2 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 7), \ |
507 | 22.2k | _mm256_lddqu_si256(data2 + i + 7)); \ |
508 | 22.2k | _mm256_storeu_si256(out + i + 7, A2); \ |
509 | 22.2k | CSA(&twosB, &ones, ones, A1, A2); \ |
510 | 22.2k | CSA(&foursB, &twos, twos, twosA, twosB); \ |
511 | 22.2k | CSA(&eightsA, &fours, fours, foursA, foursB); \ |
512 | 22.2k | A1 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 8), \ |
513 | 22.2k | _mm256_lddqu_si256(data2 + i + 8)); \ |
514 | 22.2k | _mm256_storeu_si256(out + i + 8, A1); \ |
515 | 22.2k | A2 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 9), \ |
516 | 22.2k | _mm256_lddqu_si256(data2 + i + 9)); \ |
517 | 22.2k | _mm256_storeu_si256(out + i + 9, A2); \ |
518 | 22.2k | CSA(&twosA, &ones, ones, A1, A2); \ |
519 | 22.2k | A1 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 10), \ |
520 | 22.2k | _mm256_lddqu_si256(data2 + i + 10)); \ |
521 | 22.2k | _mm256_storeu_si256(out + i + 10, A1); \ |
522 | 22.2k | A2 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 11), \ |
523 | 22.2k | _mm256_lddqu_si256(data2 + i + 11)); \ |
524 | 22.2k | _mm256_storeu_si256(out + i + 11, A2); \ |
525 | 22.2k | CSA(&twosB, &ones, ones, A1, A2); \ |
526 | 22.2k | CSA(&foursA, &twos, twos, twosA, twosB); \ |
527 | 22.2k | A1 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 12), \ |
528 | 22.2k | _mm256_lddqu_si256(data2 + i + 12)); \ |
529 | 22.2k | _mm256_storeu_si256(out + i + 12, A1); \ |
530 | 22.2k | A2 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 13), \ |
531 | 22.2k | _mm256_lddqu_si256(data2 + i + 13)); \ |
532 | 22.2k | _mm256_storeu_si256(out + i + 13, A2); \ |
533 | 22.2k | CSA(&twosA, &ones, ones, A1, A2); \ |
534 | 22.2k | A1 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 14), \ |
535 | 22.2k | _mm256_lddqu_si256(data2 + i + 14)); \ |
536 | 22.2k | _mm256_storeu_si256(out + i + 14, A1); \ |
537 | 22.2k | A2 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 15), \ |
538 | 22.2k | _mm256_lddqu_si256(data2 + i + 15)); \ |
539 | 22.2k | _mm256_storeu_si256(out + i + 15, A2); \ |
540 | 22.2k | CSA(&twosB, &ones, ones, A1, A2); \ |
541 | 22.2k | CSA(&foursB, &twos, twos, twosA, twosB); \ |
542 | 22.2k | CSA(&eightsB, &fours, fours, foursA, foursB); \ |
543 | 22.2k | CSA(&sixteens, &eights, eights, eightsA, eightsB); \ |
544 | 22.2k | total = _mm256_add_epi64(total, popcount256(sixteens)); \ |
545 | 22.2k | } \ |
546 | 1.39k | total = _mm256_slli_epi64(total, 4); \ |
547 | 1.39k | total = _mm256_add_epi64(total, \ |
548 | 1.39k | _mm256_slli_epi64(popcount256(eights), 3)); \ |
549 | 1.39k | total = \ |
550 | 1.39k | _mm256_add_epi64(total, _mm256_slli_epi64(popcount256(fours), 2)); \ |
551 | 1.39k | total = \ |
552 | 1.39k | _mm256_add_epi64(total, _mm256_slli_epi64(popcount256(twos), 1)); \ |
553 | 1.39k | total = _mm256_add_epi64(total, popcount256(ones)); \ |
554 | 1.39k | for (; i < size; i++) { \ |
555 | 0 | A1 = avx_intrinsic(_mm256_lddqu_si256(data1 + i), \ |
556 | 0 | _mm256_lddqu_si256(data2 + i)); \ |
557 | 0 | _mm256_storeu_si256(out + i, A1); \ |
558 | 0 | total = _mm256_add_epi64(total, popcount256(A1)); \ |
559 | 0 | } \ |
560 | 1.39k | return (uint64_t)(_mm256_extract_epi64(total, 0)) + \ |
561 | 1.39k | (uint64_t)(_mm256_extract_epi64(total, 1)) + \ |
562 | 1.39k | (uint64_t)(_mm256_extract_epi64(total, 2)) + \ |
563 | 1.39k | (uint64_t)(_mm256_extract_epi64(total, 3)); \ |
564 | 1.39k | } Unexecuted instantiation: croaring_fuzzer_cc.cc:roaring::internal::avx2_harley_seal_popcount256andstore_or(long long __vector(4) const*, long long __vector(4) const*, long long __vector(4)*, unsigned long) Unexecuted instantiation: croaring_fuzzer_cc.cc:roaring::internal::avx2_harley_seal_popcount256andstore_union(long long __vector(4) const*, long long __vector(4) const*, long long __vector(4)*, unsigned long) Unexecuted instantiation: croaring_fuzzer_cc.cc:roaring::internal::avx2_harley_seal_popcount256andstore_and(long long __vector(4) const*, long long __vector(4) const*, long long __vector(4)*, unsigned long) Unexecuted instantiation: croaring_fuzzer_cc.cc:roaring::internal::avx2_harley_seal_popcount256andstore_intersection(long long __vector(4) const*, long long __vector(4) const*, long long __vector(4)*, unsigned long) Unexecuted instantiation: croaring_fuzzer_cc.cc:roaring::internal::avx2_harley_seal_popcount256andstore_xor(long long __vector(4) const*, long long __vector(4) const*, long long __vector(4)*, unsigned long) Unexecuted instantiation: croaring_fuzzer_cc.cc:roaring::internal::avx2_harley_seal_popcount256andstore_andnot(long long __vector(4) const*, long long __vector(4) const*, long long __vector(4)*, unsigned long) Unexecuted instantiation: roaring.c:avx2_harley_seal_popcount256andstore_or Unexecuted instantiation: roaring.c:avx2_harley_seal_popcount256andstore_union Unexecuted instantiation: roaring.c:avx2_harley_seal_popcount256andstore_and Unexecuted instantiation: roaring.c:avx2_harley_seal_popcount256andstore_intersection Unexecuted instantiation: roaring.c:avx2_harley_seal_popcount256andstore_xor Unexecuted instantiation: roaring.c:avx2_harley_seal_popcount256andstore_andnot Unexecuted instantiation: roaring_array.c:avx2_harley_seal_popcount256andstore_or Unexecuted instantiation: roaring_array.c:avx2_harley_seal_popcount256andstore_union Unexecuted instantiation: roaring_array.c:avx2_harley_seal_popcount256andstore_and Unexecuted instantiation: roaring_array.c:avx2_harley_seal_popcount256andstore_intersection Unexecuted instantiation: roaring_array.c:avx2_harley_seal_popcount256andstore_xor Unexecuted instantiation: roaring_array.c:avx2_harley_seal_popcount256andstore_andnot Unexecuted instantiation: bitset_util.c:avx2_harley_seal_popcount256andstore_or Unexecuted instantiation: bitset_util.c:avx2_harley_seal_popcount256andstore_union Unexecuted instantiation: bitset_util.c:avx2_harley_seal_popcount256andstore_and Unexecuted instantiation: bitset_util.c:avx2_harley_seal_popcount256andstore_intersection Unexecuted instantiation: bitset_util.c:avx2_harley_seal_popcount256andstore_xor Unexecuted instantiation: bitset_util.c:avx2_harley_seal_popcount256andstore_andnot Unexecuted instantiation: containers.c:avx2_harley_seal_popcount256andstore_or Unexecuted instantiation: containers.c:avx2_harley_seal_popcount256andstore_union Unexecuted instantiation: containers.c:avx2_harley_seal_popcount256andstore_and Unexecuted instantiation: containers.c:avx2_harley_seal_popcount256andstore_intersection Unexecuted instantiation: containers.c:avx2_harley_seal_popcount256andstore_xor Unexecuted instantiation: containers.c:avx2_harley_seal_popcount256andstore_andnot Unexecuted instantiation: convert.c:avx2_harley_seal_popcount256andstore_or Unexecuted instantiation: convert.c:avx2_harley_seal_popcount256andstore_union Unexecuted instantiation: convert.c:avx2_harley_seal_popcount256andstore_and Unexecuted instantiation: convert.c:avx2_harley_seal_popcount256andstore_intersection Unexecuted instantiation: convert.c:avx2_harley_seal_popcount256andstore_xor Unexecuted instantiation: convert.c:avx2_harley_seal_popcount256andstore_andnot Unexecuted instantiation: mixed_intersection.c:avx2_harley_seal_popcount256andstore_or Unexecuted instantiation: mixed_intersection.c:avx2_harley_seal_popcount256andstore_union Unexecuted instantiation: mixed_intersection.c:avx2_harley_seal_popcount256andstore_and Unexecuted instantiation: mixed_intersection.c:avx2_harley_seal_popcount256andstore_intersection Unexecuted instantiation: mixed_intersection.c:avx2_harley_seal_popcount256andstore_xor Unexecuted instantiation: mixed_intersection.c:avx2_harley_seal_popcount256andstore_andnot Unexecuted instantiation: mixed_union.c:avx2_harley_seal_popcount256andstore_or Unexecuted instantiation: mixed_union.c:avx2_harley_seal_popcount256andstore_union Unexecuted instantiation: mixed_union.c:avx2_harley_seal_popcount256andstore_and Unexecuted instantiation: mixed_union.c:avx2_harley_seal_popcount256andstore_intersection Unexecuted instantiation: mixed_union.c:avx2_harley_seal_popcount256andstore_xor Unexecuted instantiation: mixed_union.c:avx2_harley_seal_popcount256andstore_andnot Unexecuted instantiation: mixed_negation.c:avx2_harley_seal_popcount256andstore_or Unexecuted instantiation: mixed_negation.c:avx2_harley_seal_popcount256andstore_union Unexecuted instantiation: mixed_negation.c:avx2_harley_seal_popcount256andstore_and Unexecuted instantiation: mixed_negation.c:avx2_harley_seal_popcount256andstore_intersection Unexecuted instantiation: mixed_negation.c:avx2_harley_seal_popcount256andstore_xor Unexecuted instantiation: mixed_negation.c:avx2_harley_seal_popcount256andstore_andnot Unexecuted instantiation: mixed_xor.c:avx2_harley_seal_popcount256andstore_or Unexecuted instantiation: mixed_xor.c:avx2_harley_seal_popcount256andstore_union Unexecuted instantiation: mixed_xor.c:avx2_harley_seal_popcount256andstore_and Unexecuted instantiation: mixed_xor.c:avx2_harley_seal_popcount256andstore_intersection Unexecuted instantiation: mixed_xor.c:avx2_harley_seal_popcount256andstore_xor Unexecuted instantiation: mixed_xor.c:avx2_harley_seal_popcount256andstore_andnot Unexecuted instantiation: mixed_andnot.c:avx2_harley_seal_popcount256andstore_or Unexecuted instantiation: mixed_andnot.c:avx2_harley_seal_popcount256andstore_union Unexecuted instantiation: mixed_andnot.c:avx2_harley_seal_popcount256andstore_and Unexecuted instantiation: mixed_andnot.c:avx2_harley_seal_popcount256andstore_intersection Unexecuted instantiation: mixed_andnot.c:avx2_harley_seal_popcount256andstore_xor Unexecuted instantiation: mixed_andnot.c:avx2_harley_seal_popcount256andstore_andnot Unexecuted instantiation: roaring64.c:avx2_harley_seal_popcount256andstore_or Unexecuted instantiation: roaring64.c:avx2_harley_seal_popcount256andstore_union Unexecuted instantiation: roaring64.c:avx2_harley_seal_popcount256andstore_and Unexecuted instantiation: roaring64.c:avx2_harley_seal_popcount256andstore_intersection Unexecuted instantiation: roaring64.c:avx2_harley_seal_popcount256andstore_xor Unexecuted instantiation: roaring64.c:avx2_harley_seal_popcount256andstore_andnot |
565 | | |
566 | | CROARING_TARGET_AVX2 |
567 | 0 | CROARING_AVXPOPCNTFNC(or, _mm256_or_si256) Unexecuted instantiation: bitset.c:avx2_harley_seal_popcount256andstore_or Unexecuted instantiation: bitset.c:avx2_harley_seal_popcount256_or |
568 | | CROARING_UNTARGET_AVX2 |
569 | | |
570 | | CROARING_TARGET_AVX2 |
571 | 0 | CROARING_AVXPOPCNTFNC(union, _mm256_or_si256) Unexecuted instantiation: bitset.c:avx2_harley_seal_popcount256andstore_union Unexecuted instantiation: bitset.c:avx2_harley_seal_popcount256_union |
572 | | CROARING_UNTARGET_AVX2 |
573 | | |
574 | | CROARING_TARGET_AVX2 |
575 | 1.38k | CROARING_AVXPOPCNTFNC(and, _mm256_and_si256) Unexecuted instantiation: bitset.c:avx2_harley_seal_popcount256andstore_and bitset.c:avx2_harley_seal_popcount256_and Line | Count | Source | 575 | | CROARING_AVXPOPCNTFNC(and, _mm256_and_si256) |
|
576 | | CROARING_UNTARGET_AVX2 |
577 | | |
578 | | CROARING_TARGET_AVX2 |
579 | 0 | CROARING_AVXPOPCNTFNC(intersection, _mm256_and_si256) Unexecuted instantiation: bitset.c:avx2_harley_seal_popcount256andstore_intersection Unexecuted instantiation: bitset.c:avx2_harley_seal_popcount256_intersection |
580 | | CROARING_UNTARGET_AVX2 |
581 | | |
582 | | CROARING_TARGET_AVX2 |
583 | 1.39k | CROARING_AVXPOPCNTFNC(xor, _mm256_xor_si256) bitset.c:avx2_harley_seal_popcount256andstore_xor Line | Count | Source | 583 | | CROARING_AVXPOPCNTFNC(xor, _mm256_xor_si256) |
Unexecuted instantiation: bitset.c:avx2_harley_seal_popcount256_xor |
584 | | CROARING_UNTARGET_AVX2 |
585 | | |
586 | | CROARING_TARGET_AVX2 |
587 | 0 | CROARING_AVXPOPCNTFNC(andnot, _mm256_andnot_si256) Unexecuted instantiation: bitset.c:avx2_harley_seal_popcount256andstore_andnot Unexecuted instantiation: bitset.c:avx2_harley_seal_popcount256_andnot |
588 | | CROARING_UNTARGET_AVX2 |
589 | | |
590 | | #define VPOPCNT_AND_ADD(ptr, i, accu) \ |
591 | 0 | const __m512i v##i = _mm512_loadu_si512((const __m512i *)ptr + i); \ |
592 | 0 | const __m512i p##i = _mm512_popcnt_epi64(v##i); \ |
593 | 0 | accu = _mm512_add_epi64(accu, p##i); |
594 | | |
595 | | #if CROARING_COMPILER_SUPPORTS_AVX512 |
596 | | CROARING_TARGET_AVX512 |
597 | 0 | static inline uint64_t sum_epu64_256(const __m256i v) { |
598 | 0 | return (uint64_t)(_mm256_extract_epi64(v, 0)) + |
599 | 0 | (uint64_t)(_mm256_extract_epi64(v, 1)) + |
600 | 0 | (uint64_t)(_mm256_extract_epi64(v, 2)) + |
601 | 0 | (uint64_t)(_mm256_extract_epi64(v, 3)); |
602 | 0 | } Unexecuted instantiation: croaring_fuzzer_cc.cc:roaring::internal::sum_epu64_256(long long __vector(4)) Unexecuted instantiation: roaring.c:sum_epu64_256 Unexecuted instantiation: roaring_array.c:sum_epu64_256 Unexecuted instantiation: bitset_util.c:sum_epu64_256 Unexecuted instantiation: bitset.c:sum_epu64_256 Unexecuted instantiation: containers.c:sum_epu64_256 Unexecuted instantiation: convert.c:sum_epu64_256 Unexecuted instantiation: mixed_intersection.c:sum_epu64_256 Unexecuted instantiation: mixed_union.c:sum_epu64_256 Unexecuted instantiation: mixed_negation.c:sum_epu64_256 Unexecuted instantiation: mixed_xor.c:sum_epu64_256 Unexecuted instantiation: mixed_andnot.c:sum_epu64_256 Unexecuted instantiation: roaring64.c:sum_epu64_256 |
603 | | |
604 | 0 | static inline uint64_t simd_sum_epu64(const __m512i v) { |
605 | 0 | __m256i lo = _mm512_extracti64x4_epi64(v, 0); |
606 | 0 | __m256i hi = _mm512_extracti64x4_epi64(v, 1); |
607 | |
|
608 | 0 | return sum_epu64_256(lo) + sum_epu64_256(hi); |
609 | 0 | } Unexecuted instantiation: croaring_fuzzer_cc.cc:roaring::internal::simd_sum_epu64(long long __vector(8)) Unexecuted instantiation: roaring.c:simd_sum_epu64 Unexecuted instantiation: roaring_array.c:simd_sum_epu64 Unexecuted instantiation: bitset_util.c:simd_sum_epu64 Unexecuted instantiation: bitset.c:simd_sum_epu64 Unexecuted instantiation: containers.c:simd_sum_epu64 Unexecuted instantiation: convert.c:simd_sum_epu64 Unexecuted instantiation: mixed_intersection.c:simd_sum_epu64 Unexecuted instantiation: mixed_union.c:simd_sum_epu64 Unexecuted instantiation: mixed_negation.c:simd_sum_epu64 Unexecuted instantiation: mixed_xor.c:simd_sum_epu64 Unexecuted instantiation: mixed_andnot.c:simd_sum_epu64 Unexecuted instantiation: roaring64.c:simd_sum_epu64 |
610 | | |
611 | | static inline uint64_t avx512_vpopcount(const __m512i *data, |
612 | 0 | const uint64_t size) { |
613 | 0 | const uint64_t limit = size - size % 4; |
614 | 0 | __m512i total = _mm512_setzero_si512(); |
615 | 0 | uint64_t i = 0; |
616 | |
|
617 | 0 | for (; i < limit; i += 4) { |
618 | 0 | VPOPCNT_AND_ADD(data + i, 0, total); |
619 | 0 | VPOPCNT_AND_ADD(data + i, 1, total); |
620 | 0 | VPOPCNT_AND_ADD(data + i, 2, total); |
621 | 0 | VPOPCNT_AND_ADD(data + i, 3, total); |
622 | 0 | } |
623 | |
|
624 | 0 | for (; i < size; i++) { |
625 | 0 | total = _mm512_add_epi64( |
626 | 0 | total, _mm512_popcnt_epi64(_mm512_loadu_si512(data + i))); |
627 | 0 | } |
628 | |
|
629 | 0 | return simd_sum_epu64(total); |
630 | 0 | } Unexecuted instantiation: croaring_fuzzer_cc.cc:roaring::internal::avx512_vpopcount(long long __vector(8) const*, unsigned long) Unexecuted instantiation: roaring.c:avx512_vpopcount Unexecuted instantiation: roaring_array.c:avx512_vpopcount Unexecuted instantiation: bitset_util.c:avx512_vpopcount Unexecuted instantiation: bitset.c:avx512_vpopcount Unexecuted instantiation: containers.c:avx512_vpopcount Unexecuted instantiation: convert.c:avx512_vpopcount Unexecuted instantiation: mixed_intersection.c:avx512_vpopcount Unexecuted instantiation: mixed_union.c:avx512_vpopcount Unexecuted instantiation: mixed_negation.c:avx512_vpopcount Unexecuted instantiation: mixed_xor.c:avx512_vpopcount Unexecuted instantiation: mixed_andnot.c:avx512_vpopcount Unexecuted instantiation: roaring64.c:avx512_vpopcount |
631 | | CROARING_UNTARGET_AVX512 |
632 | | #endif |
633 | | |
634 | | #define CROARING_AVXPOPCNTFNC512(opname, avx_intrinsic) \ |
635 | | static inline uint64_t avx512_harley_seal_popcount512_##opname( \ |
636 | 0 | const __m512i *data1, const __m512i *data2, const uint64_t size) { \ |
637 | 0 | __m512i total = _mm512_setzero_si512(); \ |
638 | 0 | const uint64_t limit = size - size % 4; \ |
639 | 0 | uint64_t i = 0; \ |
640 | 0 | for (; i < limit; i += 4) { \ |
641 | 0 | __m512i a1 = avx_intrinsic(_mm512_loadu_si512(data1 + i), \ |
642 | 0 | _mm512_loadu_si512(data2 + i)); \ |
643 | 0 | total = _mm512_add_epi64(total, _mm512_popcnt_epi64(a1)); \ |
644 | 0 | __m512i a2 = avx_intrinsic(_mm512_loadu_si512(data1 + i + 1), \ |
645 | 0 | _mm512_loadu_si512(data2 + i + 1)); \ |
646 | 0 | total = _mm512_add_epi64(total, _mm512_popcnt_epi64(a2)); \ |
647 | 0 | __m512i a3 = avx_intrinsic(_mm512_loadu_si512(data1 + i + 2), \ |
648 | 0 | _mm512_loadu_si512(data2 + i + 2)); \ |
649 | 0 | total = _mm512_add_epi64(total, _mm512_popcnt_epi64(a3)); \ |
650 | 0 | __m512i a4 = avx_intrinsic(_mm512_loadu_si512(data1 + i + 3), \ |
651 | 0 | _mm512_loadu_si512(data2 + i + 3)); \ |
652 | 0 | total = _mm512_add_epi64(total, _mm512_popcnt_epi64(a4)); \ |
653 | 0 | } \ |
654 | 0 | for (; i < size; i++) { \ |
655 | 0 | __m512i a = avx_intrinsic(_mm512_loadu_si512(data1 + i), \ |
656 | 0 | _mm512_loadu_si512(data2 + i)); \ |
657 | 0 | total = _mm512_add_epi64(total, _mm512_popcnt_epi64(a)); \ |
658 | 0 | } \ |
659 | 0 | return simd_sum_epu64(total); \ |
660 | 0 | } \ Unexecuted instantiation: croaring_fuzzer_cc.cc:roaring::internal::avx512_harley_seal_popcount512_or(long long __vector(8) const*, long long __vector(8) const*, unsigned long) Unexecuted instantiation: croaring_fuzzer_cc.cc:roaring::internal::avx512_harley_seal_popcount512_union(long long __vector(8) const*, long long __vector(8) const*, unsigned long) Unexecuted instantiation: croaring_fuzzer_cc.cc:roaring::internal::avx512_harley_seal_popcount512_and(long long __vector(8) const*, long long __vector(8) const*, unsigned long) Unexecuted instantiation: croaring_fuzzer_cc.cc:roaring::internal::avx512_harley_seal_popcount512_intersection(long long __vector(8) const*, long long __vector(8) const*, unsigned long) Unexecuted instantiation: croaring_fuzzer_cc.cc:roaring::internal::avx512_harley_seal_popcount512_xor(long long __vector(8) const*, long long __vector(8) const*, unsigned long) Unexecuted instantiation: croaring_fuzzer_cc.cc:roaring::internal::avx512_harley_seal_popcount512_andnot(long long __vector(8) const*, long long __vector(8) const*, unsigned long) Unexecuted instantiation: roaring.c:avx512_harley_seal_popcount512_or Unexecuted instantiation: roaring.c:avx512_harley_seal_popcount512_union Unexecuted instantiation: roaring.c:avx512_harley_seal_popcount512_and Unexecuted instantiation: roaring.c:avx512_harley_seal_popcount512_intersection Unexecuted instantiation: roaring.c:avx512_harley_seal_popcount512_xor Unexecuted instantiation: roaring.c:avx512_harley_seal_popcount512_andnot Unexecuted instantiation: roaring_array.c:avx512_harley_seal_popcount512_or Unexecuted instantiation: roaring_array.c:avx512_harley_seal_popcount512_union Unexecuted instantiation: roaring_array.c:avx512_harley_seal_popcount512_and Unexecuted instantiation: roaring_array.c:avx512_harley_seal_popcount512_intersection Unexecuted instantiation: roaring_array.c:avx512_harley_seal_popcount512_xor Unexecuted instantiation: roaring_array.c:avx512_harley_seal_popcount512_andnot Unexecuted instantiation: bitset_util.c:avx512_harley_seal_popcount512_or Unexecuted instantiation: bitset_util.c:avx512_harley_seal_popcount512_union Unexecuted instantiation: bitset_util.c:avx512_harley_seal_popcount512_and Unexecuted instantiation: bitset_util.c:avx512_harley_seal_popcount512_intersection Unexecuted instantiation: bitset_util.c:avx512_harley_seal_popcount512_xor Unexecuted instantiation: bitset_util.c:avx512_harley_seal_popcount512_andnot Unexecuted instantiation: bitset.c:avx512_harley_seal_popcount512_or Unexecuted instantiation: bitset.c:avx512_harley_seal_popcount512_union Unexecuted instantiation: bitset.c:avx512_harley_seal_popcount512_and Unexecuted instantiation: bitset.c:avx512_harley_seal_popcount512_intersection Unexecuted instantiation: bitset.c:avx512_harley_seal_popcount512_xor Unexecuted instantiation: bitset.c:avx512_harley_seal_popcount512_andnot Unexecuted instantiation: containers.c:avx512_harley_seal_popcount512_or Unexecuted instantiation: containers.c:avx512_harley_seal_popcount512_union Unexecuted instantiation: containers.c:avx512_harley_seal_popcount512_and Unexecuted instantiation: containers.c:avx512_harley_seal_popcount512_intersection Unexecuted instantiation: containers.c:avx512_harley_seal_popcount512_xor Unexecuted instantiation: containers.c:avx512_harley_seal_popcount512_andnot Unexecuted instantiation: convert.c:avx512_harley_seal_popcount512_or Unexecuted instantiation: convert.c:avx512_harley_seal_popcount512_union Unexecuted instantiation: convert.c:avx512_harley_seal_popcount512_and Unexecuted instantiation: convert.c:avx512_harley_seal_popcount512_intersection Unexecuted instantiation: convert.c:avx512_harley_seal_popcount512_xor Unexecuted instantiation: convert.c:avx512_harley_seal_popcount512_andnot Unexecuted instantiation: mixed_intersection.c:avx512_harley_seal_popcount512_or Unexecuted instantiation: mixed_intersection.c:avx512_harley_seal_popcount512_union Unexecuted instantiation: mixed_intersection.c:avx512_harley_seal_popcount512_and Unexecuted instantiation: mixed_intersection.c:avx512_harley_seal_popcount512_intersection Unexecuted instantiation: mixed_intersection.c:avx512_harley_seal_popcount512_xor Unexecuted instantiation: mixed_intersection.c:avx512_harley_seal_popcount512_andnot Unexecuted instantiation: mixed_union.c:avx512_harley_seal_popcount512_or Unexecuted instantiation: mixed_union.c:avx512_harley_seal_popcount512_union Unexecuted instantiation: mixed_union.c:avx512_harley_seal_popcount512_and Unexecuted instantiation: mixed_union.c:avx512_harley_seal_popcount512_intersection Unexecuted instantiation: mixed_union.c:avx512_harley_seal_popcount512_xor Unexecuted instantiation: mixed_union.c:avx512_harley_seal_popcount512_andnot Unexecuted instantiation: mixed_negation.c:avx512_harley_seal_popcount512_or Unexecuted instantiation: mixed_negation.c:avx512_harley_seal_popcount512_union Unexecuted instantiation: mixed_negation.c:avx512_harley_seal_popcount512_and Unexecuted instantiation: mixed_negation.c:avx512_harley_seal_popcount512_intersection Unexecuted instantiation: mixed_negation.c:avx512_harley_seal_popcount512_xor Unexecuted instantiation: mixed_negation.c:avx512_harley_seal_popcount512_andnot Unexecuted instantiation: mixed_xor.c:avx512_harley_seal_popcount512_or Unexecuted instantiation: mixed_xor.c:avx512_harley_seal_popcount512_union Unexecuted instantiation: mixed_xor.c:avx512_harley_seal_popcount512_and Unexecuted instantiation: mixed_xor.c:avx512_harley_seal_popcount512_intersection Unexecuted instantiation: mixed_xor.c:avx512_harley_seal_popcount512_xor Unexecuted instantiation: mixed_xor.c:avx512_harley_seal_popcount512_andnot Unexecuted instantiation: mixed_andnot.c:avx512_harley_seal_popcount512_or Unexecuted instantiation: mixed_andnot.c:avx512_harley_seal_popcount512_union Unexecuted instantiation: mixed_andnot.c:avx512_harley_seal_popcount512_and Unexecuted instantiation: mixed_andnot.c:avx512_harley_seal_popcount512_intersection Unexecuted instantiation: mixed_andnot.c:avx512_harley_seal_popcount512_xor Unexecuted instantiation: mixed_andnot.c:avx512_harley_seal_popcount512_andnot Unexecuted instantiation: roaring64.c:avx512_harley_seal_popcount512_or Unexecuted instantiation: roaring64.c:avx512_harley_seal_popcount512_union Unexecuted instantiation: roaring64.c:avx512_harley_seal_popcount512_and Unexecuted instantiation: roaring64.c:avx512_harley_seal_popcount512_intersection Unexecuted instantiation: roaring64.c:avx512_harley_seal_popcount512_xor Unexecuted instantiation: roaring64.c:avx512_harley_seal_popcount512_andnot |
661 | | static inline uint64_t avx512_harley_seal_popcount512andstore_##opname( \ |
662 | | const __m512i *__restrict__ data1, const __m512i *__restrict__ data2, \ |
663 | 0 | __m512i *__restrict__ out, const uint64_t size) { \ |
664 | 0 | __m512i total = _mm512_setzero_si512(); \ |
665 | 0 | const uint64_t limit = size - size % 4; \ |
666 | 0 | uint64_t i = 0; \ |
667 | 0 | for (; i < limit; i += 4) { \ |
668 | 0 | __m512i a1 = avx_intrinsic(_mm512_loadu_si512(data1 + i), \ |
669 | 0 | _mm512_loadu_si512(data2 + i)); \ |
670 | 0 | _mm512_storeu_si512(out + i, a1); \ |
671 | 0 | total = _mm512_add_epi64(total, _mm512_popcnt_epi64(a1)); \ |
672 | 0 | __m512i a2 = avx_intrinsic(_mm512_loadu_si512(data1 + i + 1), \ |
673 | 0 | _mm512_loadu_si512(data2 + i + 1)); \ |
674 | 0 | _mm512_storeu_si512(out + i + 1, a2); \ |
675 | 0 | total = _mm512_add_epi64(total, _mm512_popcnt_epi64(a2)); \ |
676 | 0 | __m512i a3 = avx_intrinsic(_mm512_loadu_si512(data1 + i + 2), \ |
677 | 0 | _mm512_loadu_si512(data2 + i + 2)); \ |
678 | 0 | _mm512_storeu_si512(out + i + 2, a3); \ |
679 | 0 | total = _mm512_add_epi64(total, _mm512_popcnt_epi64(a3)); \ |
680 | 0 | __m512i a4 = avx_intrinsic(_mm512_loadu_si512(data1 + i + 3), \ |
681 | 0 | _mm512_loadu_si512(data2 + i + 3)); \ |
682 | 0 | _mm512_storeu_si512(out + i + 3, a4); \ |
683 | 0 | total = _mm512_add_epi64(total, _mm512_popcnt_epi64(a4)); \ |
684 | 0 | } \ |
685 | 0 | for (; i < size; i++) { \ |
686 | 0 | __m512i a = avx_intrinsic(_mm512_loadu_si512(data1 + i), \ |
687 | 0 | _mm512_loadu_si512(data2 + i)); \ |
688 | 0 | _mm512_storeu_si512(out + i, a); \ |
689 | 0 | total = _mm512_add_epi64(total, _mm512_popcnt_epi64(a)); \ |
690 | 0 | } \ |
691 | 0 | return simd_sum_epu64(total); \ |
692 | 0 | } Unexecuted instantiation: croaring_fuzzer_cc.cc:roaring::internal::avx512_harley_seal_popcount512andstore_or(long long __vector(8) const*, long long __vector(8) const*, long long __vector(8)*, unsigned long) Unexecuted instantiation: croaring_fuzzer_cc.cc:roaring::internal::avx512_harley_seal_popcount512andstore_union(long long __vector(8) const*, long long __vector(8) const*, long long __vector(8)*, unsigned long) Unexecuted instantiation: croaring_fuzzer_cc.cc:roaring::internal::avx512_harley_seal_popcount512andstore_and(long long __vector(8) const*, long long __vector(8) const*, long long __vector(8)*, unsigned long) Unexecuted instantiation: croaring_fuzzer_cc.cc:roaring::internal::avx512_harley_seal_popcount512andstore_intersection(long long __vector(8) const*, long long __vector(8) const*, long long __vector(8)*, unsigned long) Unexecuted instantiation: croaring_fuzzer_cc.cc:roaring::internal::avx512_harley_seal_popcount512andstore_xor(long long __vector(8) const*, long long __vector(8) const*, long long __vector(8)*, unsigned long) Unexecuted instantiation: croaring_fuzzer_cc.cc:roaring::internal::avx512_harley_seal_popcount512andstore_andnot(long long __vector(8) const*, long long __vector(8) const*, long long __vector(8)*, unsigned long) Unexecuted instantiation: roaring.c:avx512_harley_seal_popcount512andstore_or Unexecuted instantiation: roaring.c:avx512_harley_seal_popcount512andstore_union Unexecuted instantiation: roaring.c:avx512_harley_seal_popcount512andstore_and Unexecuted instantiation: roaring.c:avx512_harley_seal_popcount512andstore_intersection Unexecuted instantiation: roaring.c:avx512_harley_seal_popcount512andstore_xor Unexecuted instantiation: roaring.c:avx512_harley_seal_popcount512andstore_andnot Unexecuted instantiation: roaring_array.c:avx512_harley_seal_popcount512andstore_or Unexecuted instantiation: roaring_array.c:avx512_harley_seal_popcount512andstore_union Unexecuted instantiation: roaring_array.c:avx512_harley_seal_popcount512andstore_and Unexecuted instantiation: roaring_array.c:avx512_harley_seal_popcount512andstore_intersection Unexecuted instantiation: roaring_array.c:avx512_harley_seal_popcount512andstore_xor Unexecuted instantiation: roaring_array.c:avx512_harley_seal_popcount512andstore_andnot Unexecuted instantiation: bitset_util.c:avx512_harley_seal_popcount512andstore_or Unexecuted instantiation: bitset_util.c:avx512_harley_seal_popcount512andstore_union Unexecuted instantiation: bitset_util.c:avx512_harley_seal_popcount512andstore_and Unexecuted instantiation: bitset_util.c:avx512_harley_seal_popcount512andstore_intersection Unexecuted instantiation: bitset_util.c:avx512_harley_seal_popcount512andstore_xor Unexecuted instantiation: bitset_util.c:avx512_harley_seal_popcount512andstore_andnot Unexecuted instantiation: bitset.c:avx512_harley_seal_popcount512andstore_or Unexecuted instantiation: bitset.c:avx512_harley_seal_popcount512andstore_union Unexecuted instantiation: bitset.c:avx512_harley_seal_popcount512andstore_and Unexecuted instantiation: bitset.c:avx512_harley_seal_popcount512andstore_intersection Unexecuted instantiation: bitset.c:avx512_harley_seal_popcount512andstore_xor Unexecuted instantiation: bitset.c:avx512_harley_seal_popcount512andstore_andnot Unexecuted instantiation: containers.c:avx512_harley_seal_popcount512andstore_or Unexecuted instantiation: containers.c:avx512_harley_seal_popcount512andstore_union Unexecuted instantiation: containers.c:avx512_harley_seal_popcount512andstore_and Unexecuted instantiation: containers.c:avx512_harley_seal_popcount512andstore_intersection Unexecuted instantiation: containers.c:avx512_harley_seal_popcount512andstore_xor Unexecuted instantiation: containers.c:avx512_harley_seal_popcount512andstore_andnot Unexecuted instantiation: convert.c:avx512_harley_seal_popcount512andstore_or Unexecuted instantiation: convert.c:avx512_harley_seal_popcount512andstore_union Unexecuted instantiation: convert.c:avx512_harley_seal_popcount512andstore_and Unexecuted instantiation: convert.c:avx512_harley_seal_popcount512andstore_intersection Unexecuted instantiation: convert.c:avx512_harley_seal_popcount512andstore_xor Unexecuted instantiation: convert.c:avx512_harley_seal_popcount512andstore_andnot Unexecuted instantiation: mixed_intersection.c:avx512_harley_seal_popcount512andstore_or Unexecuted instantiation: mixed_intersection.c:avx512_harley_seal_popcount512andstore_union Unexecuted instantiation: mixed_intersection.c:avx512_harley_seal_popcount512andstore_and Unexecuted instantiation: mixed_intersection.c:avx512_harley_seal_popcount512andstore_intersection Unexecuted instantiation: mixed_intersection.c:avx512_harley_seal_popcount512andstore_xor Unexecuted instantiation: mixed_intersection.c:avx512_harley_seal_popcount512andstore_andnot Unexecuted instantiation: mixed_union.c:avx512_harley_seal_popcount512andstore_or Unexecuted instantiation: mixed_union.c:avx512_harley_seal_popcount512andstore_union Unexecuted instantiation: mixed_union.c:avx512_harley_seal_popcount512andstore_and Unexecuted instantiation: mixed_union.c:avx512_harley_seal_popcount512andstore_intersection Unexecuted instantiation: mixed_union.c:avx512_harley_seal_popcount512andstore_xor Unexecuted instantiation: mixed_union.c:avx512_harley_seal_popcount512andstore_andnot Unexecuted instantiation: mixed_negation.c:avx512_harley_seal_popcount512andstore_or Unexecuted instantiation: mixed_negation.c:avx512_harley_seal_popcount512andstore_union Unexecuted instantiation: mixed_negation.c:avx512_harley_seal_popcount512andstore_and Unexecuted instantiation: mixed_negation.c:avx512_harley_seal_popcount512andstore_intersection Unexecuted instantiation: mixed_negation.c:avx512_harley_seal_popcount512andstore_xor Unexecuted instantiation: mixed_negation.c:avx512_harley_seal_popcount512andstore_andnot Unexecuted instantiation: mixed_xor.c:avx512_harley_seal_popcount512andstore_or Unexecuted instantiation: mixed_xor.c:avx512_harley_seal_popcount512andstore_union Unexecuted instantiation: mixed_xor.c:avx512_harley_seal_popcount512andstore_and Unexecuted instantiation: mixed_xor.c:avx512_harley_seal_popcount512andstore_intersection Unexecuted instantiation: mixed_xor.c:avx512_harley_seal_popcount512andstore_xor Unexecuted instantiation: mixed_xor.c:avx512_harley_seal_popcount512andstore_andnot Unexecuted instantiation: mixed_andnot.c:avx512_harley_seal_popcount512andstore_or Unexecuted instantiation: mixed_andnot.c:avx512_harley_seal_popcount512andstore_union Unexecuted instantiation: mixed_andnot.c:avx512_harley_seal_popcount512andstore_and Unexecuted instantiation: mixed_andnot.c:avx512_harley_seal_popcount512andstore_intersection Unexecuted instantiation: mixed_andnot.c:avx512_harley_seal_popcount512andstore_xor Unexecuted instantiation: mixed_andnot.c:avx512_harley_seal_popcount512andstore_andnot Unexecuted instantiation: roaring64.c:avx512_harley_seal_popcount512andstore_or Unexecuted instantiation: roaring64.c:avx512_harley_seal_popcount512andstore_union Unexecuted instantiation: roaring64.c:avx512_harley_seal_popcount512andstore_and Unexecuted instantiation: roaring64.c:avx512_harley_seal_popcount512andstore_intersection Unexecuted instantiation: roaring64.c:avx512_harley_seal_popcount512andstore_xor Unexecuted instantiation: roaring64.c:avx512_harley_seal_popcount512andstore_andnot |
693 | | |
694 | | #if CROARING_COMPILER_SUPPORTS_AVX512 |
695 | | CROARING_TARGET_AVX512 |
696 | | CROARING_AVXPOPCNTFNC512(or, _mm512_or_si512) |
697 | | CROARING_AVXPOPCNTFNC512(union, _mm512_or_si512) |
698 | | CROARING_AVXPOPCNTFNC512(and, _mm512_and_si512) |
699 | | CROARING_AVXPOPCNTFNC512(intersection, _mm512_and_si512) |
700 | | CROARING_AVXPOPCNTFNC512(xor, _mm512_xor_si512) |
701 | | CROARING_AVXPOPCNTFNC512(andnot, _mm512_andnot_si512) |
702 | | CROARING_UNTARGET_AVX512 |
703 | | #endif |
704 | | /*** |
705 | | * END Harley-Seal popcount functions. |
706 | | */ |
707 | | |
708 | | #endif // CROARING_IS_X64 |
709 | | |
710 | | #ifdef __cplusplus |
711 | | } |
712 | | } |
713 | | } // extern "C" { namespace roaring { namespace internal |
714 | | #endif |
715 | | #if defined(__GNUC__) && !defined(__clang__) |
716 | | #pragma GCC diagnostic pop |
717 | | #endif |
718 | | #endif |