/src/botan/build/include/botan/internal/simd_avx2.h
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * (C) 2018 Jack Lloyd |
3 | | * |
4 | | * Botan is released under the Simplified BSD License (see license.txt) |
5 | | */ |
6 | | |
7 | | #ifndef BOTAN_SIMD_AVX2_H_ |
8 | | #define BOTAN_SIMD_AVX2_H_ |
9 | | |
10 | | #include <botan/types.h> |
11 | | #include <immintrin.h> |
12 | | |
13 | | namespace Botan { |
14 | | |
15 | | class SIMD_8x32 final |
16 | | { |
17 | | public: |
18 | | |
19 | | SIMD_8x32& operator=(const SIMD_8x32& other) = default; |
20 | | SIMD_8x32(const SIMD_8x32& other) = default; |
21 | | |
22 | | SIMD_8x32& operator=(SIMD_8x32&& other) = default; |
23 | | SIMD_8x32(SIMD_8x32&& other) = default; |
24 | | |
25 | | BOTAN_FUNC_ISA("avx2") |
26 | | BOTAN_FORCE_INLINE SIMD_8x32() |
27 | 0 | { |
28 | 0 | m_avx2 = _mm256_setzero_si256(); |
29 | 0 | } |
30 | | |
31 | | BOTAN_FUNC_ISA("avx2") |
32 | | explicit SIMD_8x32(const uint32_t B[8]) |
33 | 0 | { |
34 | 0 | m_avx2 = _mm256_loadu_si256(reinterpret_cast<const __m256i*>(B)); |
35 | 0 | } |
36 | | |
37 | | BOTAN_FUNC_ISA("avx2") |
38 | | explicit SIMD_8x32(uint32_t B0, uint32_t B1, uint32_t B2, uint32_t B3, |
39 | | uint32_t B4, uint32_t B5, uint32_t B6, uint32_t B7) |
40 | 87.6k | { |
41 | 87.6k | m_avx2 = _mm256_set_epi32(B7, B6, B5, B4, B3, B2, B1, B0); |
42 | 87.6k | } |
43 | | |
44 | | BOTAN_FUNC_ISA("avx2") |
45 | | static SIMD_8x32 splat(uint32_t B) |
46 | 1.40M | { |
47 | 1.40M | return SIMD_8x32(_mm256_set1_epi32(B)); |
48 | 1.40M | } |
49 | | |
50 | | BOTAN_FUNC_ISA("avx2") |
51 | | static SIMD_8x32 load_le(const uint8_t* in) |
52 | 0 | { |
53 | 0 | return SIMD_8x32(_mm256_loadu_si256(reinterpret_cast<const __m256i*>(in))); |
54 | 0 | } |
55 | | |
56 | | BOTAN_FUNC_ISA("avx2") |
57 | | static SIMD_8x32 load_be(const uint8_t* in) |
58 | 0 | { |
59 | 0 | return load_le(in).bswap(); |
60 | 0 | } |
61 | | |
62 | | BOTAN_FUNC_ISA("avx2") |
63 | | void store_le(uint8_t out[]) const |
64 | 701k | { |
65 | 701k | _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), m_avx2); |
66 | 701k | } |
67 | | |
68 | | BOTAN_FUNC_ISA("avx2") |
69 | | void store_be(uint8_t out[]) const |
70 | 0 | { |
71 | 0 | bswap().store_le(out); |
72 | 0 | } |
73 | | |
74 | | template<size_t ROT> |
75 | | BOTAN_FUNC_ISA("avx2") |
76 | | SIMD_8x32 rotl() const |
77 | 14.0M | { |
78 | 14.0M | static_assert(ROT > 0 && ROT < 32, "Invalid rotation constant"); |
79 | | |
80 | | #if defined(__AVX512VL__) |
81 | | return SIMD_8x32(_mm256_rol_epi32(m_avx2, ROT)); |
82 | | #else |
83 | 14.0M | if constexpr(ROT == 8) |
84 | 0 | { |
85 | 10.5M | const __m256i shuf_rotl_8 = _mm256_set_epi8(14, 13, 12, 15, 10, 9, 8, 11, 6, 5, 4, 7, 2, 1, 0, 3, |
86 | 10.5M | 14, 13, 12, 15, 10, 9, 8, 11, 6, 5, 4, 7, 2, 1, 0, 3); |
87 | | |
88 | 10.5M | return SIMD_8x32(_mm256_shuffle_epi8(m_avx2, shuf_rotl_8)); |
89 | 10.5M | } |
90 | 10.5M | else if constexpr(ROT == 16) |
91 | 0 | { |
92 | 7.01M | const __m256i shuf_rotl_16 = _mm256_set_epi8(13, 12, 15, 14, 9, 8, 11, 10, 5, 4, 7, 6, 1, 0, 3, 2, |
93 | 7.01M | 13, 12, 15, 14, 9, 8, 11, 10, 5, 4, 7, 6, 1, 0, 3, 2); |
94 | | |
95 | 7.01M | return SIMD_8x32(_mm256_shuffle_epi8(m_avx2, shuf_rotl_16)); |
96 | 7.01M | } |
97 | 7.01M | else |
98 | 7.01M | { |
99 | 7.01M | return SIMD_8x32(_mm256_or_si256(_mm256_slli_epi32(m_avx2, static_cast<int>(ROT)), |
100 | 7.01M | _mm256_srli_epi32(m_avx2, static_cast<int>(32-ROT)))); |
101 | 7.01M | } |
102 | 14.0M | #endif |
103 | 14.0M | } Unexecuted instantiation: Botan::SIMD_8x32 Botan::SIMD_8x32::rotl<19ul>() const Unexecuted instantiation: Botan::SIMD_8x32 Botan::SIMD_8x32::rotl<10ul>() const Unexecuted instantiation: Botan::SIMD_8x32 Botan::SIMD_8x32::rotl<13ul>() const Unexecuted instantiation: Botan::SIMD_8x32 Botan::SIMD_8x32::rotl<3ul>() const Unexecuted instantiation: Botan::SIMD_8x32 Botan::SIMD_8x32::rotl<1ul>() const Botan::SIMD_8x32 Botan::SIMD_8x32::rotl<7ul>() const Line | Count | Source | 77 | 3.50M | { | 78 | 3.50M | static_assert(ROT > 0 && ROT < 32, "Invalid rotation constant"); | 79 | | | 80 | | #if defined(__AVX512VL__) | 81 | | return SIMD_8x32(_mm256_rol_epi32(m_avx2, ROT)); | 82 | | #else | 83 | 3.50M | if constexpr(ROT == 8) | 84 | 0 | { | 85 | 3.50M | const __m256i shuf_rotl_8 = _mm256_set_epi8(14, 13, 12, 15, 10, 9, 8, 11, 6, 5, 4, 7, 2, 1, 0, 3, | 86 | 3.50M | 14, 13, 12, 15, 10, 9, 8, 11, 6, 5, 4, 7, 2, 1, 0, 3); | 87 | | | 88 | 3.50M | return SIMD_8x32(_mm256_shuffle_epi8(m_avx2, shuf_rotl_8)); | 89 | 3.50M | } | 90 | 3.50M | else if constexpr(ROT == 16) | 91 | 0 | { | 92 | 3.50M | const __m256i shuf_rotl_16 = _mm256_set_epi8(13, 12, 15, 14, 9, 8, 11, 10, 5, 4, 7, 6, 1, 0, 3, 2, | 93 | 3.50M | 13, 12, 15, 14, 9, 8, 11, 10, 5, 4, 7, 6, 1, 0, 3, 2); | 94 | | | 95 | 3.50M | return SIMD_8x32(_mm256_shuffle_epi8(m_avx2, shuf_rotl_16)); | 96 | 3.50M | } | 97 | 3.50M | else | 98 | 3.50M | { | 99 | 3.50M | return SIMD_8x32(_mm256_or_si256(_mm256_slli_epi32(m_avx2, static_cast<int>(ROT)), | 100 | 3.50M | _mm256_srli_epi32(m_avx2, static_cast<int>(32-ROT)))); | 101 | 3.50M | } | 102 | 3.50M | #endif | 103 | 3.50M | } |
Unexecuted instantiation: Botan::SIMD_8x32 Botan::SIMD_8x32::rotl<5ul>() const Unexecuted instantiation: Botan::SIMD_8x32 Botan::SIMD_8x32::rotl<22ul>() const Unexecuted instantiation: Botan::SIMD_8x32 Botan::SIMD_8x32::rotl<27ul>() const Unexecuted instantiation: Botan::SIMD_8x32 Botan::SIMD_8x32::rotl<25ul>() const Unexecuted instantiation: Botan::SIMD_8x32 Botan::SIMD_8x32::rotl<31ul>() const Unexecuted instantiation: Botan::SIMD_8x32 Botan::SIMD_8x32::rotl<29ul>() const Unexecuted instantiation: Botan::SIMD_8x32 Botan::SIMD_8x32::rotl<30ul>() const Unexecuted instantiation: Botan::SIMD_8x32 Botan::SIMD_8x32::rotl<26ul>() const Unexecuted instantiation: Botan::SIMD_8x32 Botan::SIMD_8x32::rotl<21ul>() const Botan::SIMD_8x32 Botan::SIMD_8x32::rotl<16ul>() const Line | Count | Source | 77 | 3.50M | { | 78 | 3.50M | static_assert(ROT > 0 && ROT < 32, "Invalid rotation constant"); | 79 | | | 80 | | #if defined(__AVX512VL__) | 81 | | return SIMD_8x32(_mm256_rol_epi32(m_avx2, ROT)); | 82 | | #else | 83 | 3.50M | if constexpr(ROT == 8) | 84 | 0 | { | 85 | 3.50M | const __m256i shuf_rotl_8 = _mm256_set_epi8(14, 13, 12, 15, 10, 9, 8, 11, 6, 5, 4, 7, 2, 1, 0, 3, | 86 | 3.50M | 14, 13, 12, 15, 10, 9, 8, 11, 6, 5, 4, 7, 2, 1, 0, 3); | 87 | | | 88 | 3.50M | return SIMD_8x32(_mm256_shuffle_epi8(m_avx2, shuf_rotl_8)); | 89 | 3.50M | } | 90 | 3.50M | else if constexpr(ROT == 16) | 91 | 3.50M | { | 92 | 3.50M | const __m256i shuf_rotl_16 = _mm256_set_epi8(13, 12, 15, 14, 9, 8, 11, 10, 5, 4, 7, 6, 1, 0, 3, 2, | 93 | 3.50M | 13, 12, 15, 14, 9, 8, 11, 10, 5, 4, 7, 6, 1, 0, 3, 2); | 94 | | | 95 | 3.50M | return SIMD_8x32(_mm256_shuffle_epi8(m_avx2, shuf_rotl_16)); | 96 | 3.50M | } | 97 | 3.50M | else | 98 | 3.50M | { | 99 | 3.50M | return SIMD_8x32(_mm256_or_si256(_mm256_slli_epi32(m_avx2, static_cast<int>(ROT)), | 100 | 3.50M | _mm256_srli_epi32(m_avx2, static_cast<int>(32-ROT)))); | 101 | 3.50M | } | 102 | 3.50M | #endif | 103 | 3.50M | } |
Botan::SIMD_8x32 Botan::SIMD_8x32::rotl<12ul>() const Line | Count | Source | 77 | 3.50M | { | 78 | 3.50M | static_assert(ROT > 0 && ROT < 32, "Invalid rotation constant"); | 79 | | | 80 | | #if defined(__AVX512VL__) | 81 | | return SIMD_8x32(_mm256_rol_epi32(m_avx2, ROT)); | 82 | | #else | 83 | 3.50M | if constexpr(ROT == 8) | 84 | 0 | { | 85 | 3.50M | const __m256i shuf_rotl_8 = _mm256_set_epi8(14, 13, 12, 15, 10, 9, 8, 11, 6, 5, 4, 7, 2, 1, 0, 3, | 86 | 3.50M | 14, 13, 12, 15, 10, 9, 8, 11, 6, 5, 4, 7, 2, 1, 0, 3); | 87 | | | 88 | 3.50M | return SIMD_8x32(_mm256_shuffle_epi8(m_avx2, shuf_rotl_8)); | 89 | 3.50M | } | 90 | 3.50M | else if constexpr(ROT == 16) | 91 | 0 | { | 92 | 3.50M | const __m256i shuf_rotl_16 = _mm256_set_epi8(13, 12, 15, 14, 9, 8, 11, 10, 5, 4, 7, 6, 1, 0, 3, 2, | 93 | 3.50M | 13, 12, 15, 14, 9, 8, 11, 10, 5, 4, 7, 6, 1, 0, 3, 2); | 94 | | | 95 | 3.50M | return SIMD_8x32(_mm256_shuffle_epi8(m_avx2, shuf_rotl_16)); | 96 | 3.50M | } | 97 | 3.50M | else | 98 | 3.50M | { | 99 | 3.50M | return SIMD_8x32(_mm256_or_si256(_mm256_slli_epi32(m_avx2, static_cast<int>(ROT)), | 100 | 3.50M | _mm256_srli_epi32(m_avx2, static_cast<int>(32-ROT)))); | 101 | 3.50M | } | 102 | 3.50M | #endif | 103 | 3.50M | } |
Botan::SIMD_8x32 Botan::SIMD_8x32::rotl<8ul>() const Line | Count | Source | 77 | 3.50M | { | 78 | 3.50M | static_assert(ROT > 0 && ROT < 32, "Invalid rotation constant"); | 79 | | | 80 | | #if defined(__AVX512VL__) | 81 | | return SIMD_8x32(_mm256_rol_epi32(m_avx2, ROT)); | 82 | | #else | 83 | 3.50M | if constexpr(ROT == 8) | 84 | 3.50M | { | 85 | 3.50M | const __m256i shuf_rotl_8 = _mm256_set_epi8(14, 13, 12, 15, 10, 9, 8, 11, 6, 5, 4, 7, 2, 1, 0, 3, | 86 | 3.50M | 14, 13, 12, 15, 10, 9, 8, 11, 6, 5, 4, 7, 2, 1, 0, 3); | 87 | | | 88 | 3.50M | return SIMD_8x32(_mm256_shuffle_epi8(m_avx2, shuf_rotl_8)); | 89 | 3.50M | } | 90 | 3.50M | else if constexpr(ROT == 16) | 91 | 3.50M | { | 92 | 3.50M | const __m256i shuf_rotl_16 = _mm256_set_epi8(13, 12, 15, 14, 9, 8, 11, 10, 5, 4, 7, 6, 1, 0, 3, 2, | 93 | 3.50M | 13, 12, 15, 14, 9, 8, 11, 10, 5, 4, 7, 6, 1, 0, 3, 2); | 94 | | | 95 | 3.50M | return SIMD_8x32(_mm256_shuffle_epi8(m_avx2, shuf_rotl_16)); | 96 | 3.50M | } | 97 | 3.50M | else | 98 | 3.50M | { | 99 | 3.50M | return SIMD_8x32(_mm256_or_si256(_mm256_slli_epi32(m_avx2, static_cast<int>(ROT)), | 100 | 3.50M | _mm256_srli_epi32(m_avx2, static_cast<int>(32-ROT)))); | 101 | 3.50M | } | 102 | 3.50M | #endif | 103 | 3.50M | } |
|
104 | | |
105 | | template<size_t ROT> |
106 | | BOTAN_FUNC_ISA("avx2") |
107 | | SIMD_8x32 rotr() const |
108 | 0 | { |
109 | 0 | return this->rotl<32-ROT>(); |
110 | 0 | } Unexecuted instantiation: Botan::SIMD_8x32 Botan::SIMD_8x32::rotr<13ul>() const Unexecuted instantiation: Botan::SIMD_8x32 Botan::SIMD_8x32::rotr<22ul>() const Unexecuted instantiation: Botan::SIMD_8x32 Botan::SIMD_8x32::rotr<5ul>() const Unexecuted instantiation: Botan::SIMD_8x32 Botan::SIMD_8x32::rotr<7ul>() const Unexecuted instantiation: Botan::SIMD_8x32 Botan::SIMD_8x32::rotr<1ul>() const Unexecuted instantiation: Botan::SIMD_8x32 Botan::SIMD_8x32::rotr<3ul>() const Unexecuted instantiation: Botan::SIMD_8x32 Botan::SIMD_8x32::rotr<2ul>() const Unexecuted instantiation: Botan::SIMD_8x32 Botan::SIMD_8x32::rotr<6ul>() const Unexecuted instantiation: Botan::SIMD_8x32 Botan::SIMD_8x32::rotr<11ul>() const Unexecuted instantiation: Botan::SIMD_8x32 Botan::SIMD_8x32::rotr<25ul>() const |
111 | | |
112 | | SIMD_8x32 BOTAN_FUNC_ISA("avx2") sigma0() const |
113 | 0 | { |
114 | 0 | const SIMD_8x32 rot1 = this->rotr<2>(); |
115 | 0 | const SIMD_8x32 rot2 = this->rotr<13>(); |
116 | 0 | const SIMD_8x32 rot3 = this->rotr<22>(); |
117 | 0 | return rot1 ^ rot2 ^ rot3; |
118 | 0 | } |
119 | | |
120 | | SIMD_8x32 BOTAN_FUNC_ISA("avx2") sigma1() const |
121 | 0 | { |
122 | 0 | const SIMD_8x32 rot1 = this->rotr<6>(); |
123 | 0 | const SIMD_8x32 rot2 = this->rotr<11>(); |
124 | 0 | const SIMD_8x32 rot3 = this->rotr<25>(); |
125 | 0 | return rot1 ^ rot2 ^ rot3; |
126 | 0 | } |
127 | | |
128 | | BOTAN_FUNC_ISA("avx2") |
129 | | SIMD_8x32 operator+(const SIMD_8x32& other) const |
130 | 175k | { |
131 | 175k | SIMD_8x32 retval(*this); |
132 | 175k | retval += other; |
133 | 175k | return retval; |
134 | 175k | } |
135 | | |
136 | | BOTAN_FUNC_ISA("avx2") |
137 | | SIMD_8x32 operator-(const SIMD_8x32& other) const |
138 | 0 | { |
139 | 0 | SIMD_8x32 retval(*this); |
140 | 0 | retval -= other; |
141 | 0 | return retval; |
142 | 0 | } |
143 | | |
144 | | BOTAN_FUNC_ISA("avx2") |
145 | | SIMD_8x32 operator^(const SIMD_8x32& other) const |
146 | 0 | { |
147 | 0 | SIMD_8x32 retval(*this); |
148 | 0 | retval ^= other; |
149 | 0 | return retval; |
150 | 0 | } |
151 | | |
152 | | BOTAN_FUNC_ISA("avx2") |
153 | | SIMD_8x32 operator|(const SIMD_8x32& other) const |
154 | 0 | { |
155 | 0 | SIMD_8x32 retval(*this); |
156 | 0 | retval |= other; |
157 | 0 | return retval; |
158 | 0 | } |
159 | | |
160 | | BOTAN_FUNC_ISA("avx2") |
161 | | SIMD_8x32 operator&(const SIMD_8x32& other) const |
162 | 0 | { |
163 | 0 | SIMD_8x32 retval(*this); |
164 | 0 | retval &= other; |
165 | 0 | return retval; |
166 | 0 | } |
167 | | |
168 | | BOTAN_FUNC_ISA("avx2") |
169 | | void operator+=(const SIMD_8x32& other) |
170 | 14.8M | { |
171 | 14.8M | m_avx2 = _mm256_add_epi32(m_avx2, other.m_avx2); |
172 | 14.8M | } |
173 | | |
174 | | BOTAN_FUNC_ISA("avx2") |
175 | | void operator-=(const SIMD_8x32& other) |
176 | 0 | { |
177 | 0 | m_avx2 = _mm256_sub_epi32(m_avx2, other.m_avx2); |
178 | 0 | } |
179 | | |
180 | | BOTAN_FUNC_ISA("avx2") |
181 | | void operator^=(const SIMD_8x32& other) |
182 | 14.0M | { |
183 | 14.0M | m_avx2 = _mm256_xor_si256(m_avx2, other.m_avx2); |
184 | 14.0M | } |
185 | | |
186 | | BOTAN_FUNC_ISA("avx2") |
187 | | void operator|=(const SIMD_8x32& other) |
188 | 0 | { |
189 | 0 | m_avx2 = _mm256_or_si256(m_avx2, other.m_avx2); |
190 | 0 | } |
191 | | |
192 | | BOTAN_FUNC_ISA("avx2") |
193 | | void operator&=(const SIMD_8x32& other) |
194 | 0 | { |
195 | 0 | m_avx2 = _mm256_and_si256(m_avx2, other.m_avx2); |
196 | 0 | } |
197 | | |
198 | | template<int SHIFT> BOTAN_FUNC_ISA("avx2") SIMD_8x32 shl() const |
199 | 0 | { |
200 | 0 | return SIMD_8x32(_mm256_slli_epi32(m_avx2, SHIFT)); |
201 | 0 | } Unexecuted instantiation: Botan::SIMD_8x32 Botan::SIMD_8x32::shl<3>() const Unexecuted instantiation: Botan::SIMD_8x32 Botan::SIMD_8x32::shl<7>() const |
202 | | |
203 | | template<int SHIFT> BOTAN_FUNC_ISA("avx2") SIMD_8x32 shr() const |
204 | | { |
205 | | return SIMD_8x32(_mm256_srli_epi32(m_avx2, SHIFT)); |
206 | | } |
207 | | |
208 | | BOTAN_FUNC_ISA("avx2") |
209 | | SIMD_8x32 operator~() const |
210 | 0 | { |
211 | 0 | return SIMD_8x32(_mm256_xor_si256(m_avx2, _mm256_set1_epi32(0xFFFFFFFF))); |
212 | 0 | } |
213 | | |
214 | | // (~reg) & other |
215 | | BOTAN_FUNC_ISA("avx2") |
216 | | SIMD_8x32 andc(const SIMD_8x32& other) const |
217 | 0 | { |
218 | 0 | return SIMD_8x32(_mm256_andnot_si256(m_avx2, other.m_avx2)); |
219 | 0 | } |
220 | | |
221 | | BOTAN_FUNC_ISA("avx2") |
222 | | SIMD_8x32 bswap() const |
223 | 0 | { |
224 | 0 | const uint8_t BSWAP_MASK[32] = { 3, 2, 1, 0, |
225 | 0 | 7, 6, 5, 4, |
226 | 0 | 11, 10, 9, 8, |
227 | 0 | 15, 14, 13, 12, |
228 | 0 | 19, 18, 17, 16, |
229 | 0 | 23, 22, 21, 20, |
230 | 0 | 27, 26, 25, 24, |
231 | 0 | 31, 30, 29, 28 }; |
232 | |
|
233 | 0 | const __m256i bswap = _mm256_loadu_si256(reinterpret_cast<const __m256i*>(BSWAP_MASK)); |
234 | |
|
235 | 0 | const __m256i output = _mm256_shuffle_epi8(m_avx2, bswap); |
236 | |
|
237 | 0 | return SIMD_8x32(output); |
238 | 0 | } |
239 | | |
240 | | BOTAN_FUNC_ISA("avx2") |
241 | | static void transpose(SIMD_8x32& B0, SIMD_8x32& B1, |
242 | | SIMD_8x32& B2, SIMD_8x32& B3) |
243 | 175k | { |
244 | 175k | const __m256i T0 = _mm256_unpacklo_epi32(B0.m_avx2, B1.m_avx2); |
245 | 175k | const __m256i T1 = _mm256_unpacklo_epi32(B2.m_avx2, B3.m_avx2); |
246 | 175k | const __m256i T2 = _mm256_unpackhi_epi32(B0.m_avx2, B1.m_avx2); |
247 | 175k | const __m256i T3 = _mm256_unpackhi_epi32(B2.m_avx2, B3.m_avx2); |
248 | | |
249 | 175k | B0.m_avx2 = _mm256_unpacklo_epi64(T0, T1); |
250 | 175k | B1.m_avx2 = _mm256_unpackhi_epi64(T0, T1); |
251 | 175k | B2.m_avx2 = _mm256_unpacklo_epi64(T2, T3); |
252 | 175k | B3.m_avx2 = _mm256_unpackhi_epi64(T2, T3); |
253 | 175k | } |
254 | | |
255 | | BOTAN_FUNC_ISA("avx2") |
256 | | static void transpose(SIMD_8x32& B0, SIMD_8x32& B1, |
257 | | SIMD_8x32& B2, SIMD_8x32& B3, |
258 | | SIMD_8x32& B4, SIMD_8x32& B5, |
259 | | SIMD_8x32& B6, SIMD_8x32& B7) |
260 | 87.6k | { |
261 | 87.6k | transpose(B0, B1, B2, B3); |
262 | 87.6k | transpose(B4, B5, B6, B7); |
263 | | |
264 | 87.6k | swap_tops(B0, B4); |
265 | 87.6k | swap_tops(B1, B5); |
266 | 87.6k | swap_tops(B2, B6); |
267 | 87.6k | swap_tops(B3, B7); |
268 | 87.6k | } |
269 | | |
270 | | BOTAN_FUNC_ISA("avx2") |
271 | | static SIMD_8x32 choose(const SIMD_8x32& mask, const SIMD_8x32& a, const SIMD_8x32& b) |
272 | 0 | { |
273 | | #if defined(__AVX512VL__) |
274 | | return _mm256_ternarylogic_epi32(mask.handle(), a.handle(), b.handle(), 0xca); |
275 | | #else |
276 | 0 | return (mask & a) ^ mask.andc(b); |
277 | 0 | #endif |
278 | 0 | } |
279 | | |
280 | | BOTAN_FUNC_ISA("avx2") |
281 | | static SIMD_8x32 majority(const SIMD_8x32& x, const SIMD_8x32& y, const SIMD_8x32& z) |
282 | 0 | { |
283 | | #if defined(__AVX512VL__) |
284 | | return _mm256_ternarylogic_epi32(x.handle(), y.handle(), z.handle(), 0xe8); |
285 | | #else |
286 | 0 | return SIMD_8x32::choose(x ^ y, z, y); |
287 | 0 | #endif |
288 | 0 | } |
289 | | |
290 | | BOTAN_FUNC_ISA("avx2") |
291 | | static void reset_registers() |
292 | 43.8k | { |
293 | 43.8k | _mm256_zeroupper(); |
294 | 43.8k | } |
295 | | |
296 | | BOTAN_FUNC_ISA("avx2") |
297 | | static void zero_registers() |
298 | 43.8k | { |
299 | 43.8k | _mm256_zeroall(); |
300 | 43.8k | } |
301 | | |
302 | 1.40M | __m256i BOTAN_FUNC_ISA("avx2") handle() const { return m_avx2; } |
303 | | |
304 | | BOTAN_FUNC_ISA("avx2") |
305 | 16.1M | SIMD_8x32(__m256i x) : m_avx2(x) {} |
306 | | |
307 | | private: |
308 | | |
309 | | BOTAN_FUNC_ISA("avx2") |
310 | | static void swap_tops(SIMD_8x32& A, SIMD_8x32& B) |
311 | 350k | { |
312 | 350k | SIMD_8x32 T0 = _mm256_permute2x128_si256(A.handle(), B.handle(), 0 + (2 << 4)); |
313 | 350k | SIMD_8x32 T1 = _mm256_permute2x128_si256(A.handle(), B.handle(), 1 + (3 << 4)); |
314 | 350k | A = T0; |
315 | 350k | B = T1; |
316 | 350k | } |
317 | | |
318 | | __m256i m_avx2; |
319 | | }; |
320 | | |
321 | | } |
322 | | |
323 | | #endif |