/src/botan/build/include/botan/internal/simd_avx2.h
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * (C) 2018 Jack Lloyd |
3 | | * |
4 | | * Botan is released under the Simplified BSD License (see license.txt) |
5 | | */ |
6 | | |
7 | | #ifndef BOTAN_SIMD_AVX2_H_ |
8 | | #define BOTAN_SIMD_AVX2_H_ |
9 | | |
10 | | #include <botan/types.h> |
11 | | #include <immintrin.h> |
12 | | |
13 | | namespace Botan { |
14 | | |
15 | | class SIMD_8x32 final |
16 | | { |
17 | | public: |
18 | | |
19 | | SIMD_8x32& operator=(const SIMD_8x32& other) = default; |
20 | | SIMD_8x32(const SIMD_8x32& other) = default; |
21 | | |
22 | | SIMD_8x32& operator=(SIMD_8x32&& other) = default; |
23 | | SIMD_8x32(SIMD_8x32&& other) = default; |
24 | | |
25 | | BOTAN_FUNC_ISA("avx2") |
26 | | BOTAN_FORCE_INLINE SIMD_8x32() |
27 | 0 | { |
28 | 0 | m_avx2 = _mm256_setzero_si256(); |
29 | 0 | } |
30 | | |
31 | | BOTAN_FUNC_ISA("avx2") |
32 | | explicit SIMD_8x32(const uint32_t B[8]) |
33 | 0 | { |
34 | 0 | m_avx2 = _mm256_loadu_si256(reinterpret_cast<const __m256i*>(B)); |
35 | 0 | } |
36 | | |
37 | | BOTAN_FUNC_ISA("avx2") |
38 | | explicit SIMD_8x32(uint32_t B0, uint32_t B1, uint32_t B2, uint32_t B3, |
39 | | uint32_t B4, uint32_t B5, uint32_t B6, uint32_t B7) |
40 | 108k | { |
41 | 108k | m_avx2 = _mm256_set_epi32(B7, B6, B5, B4, B3, B2, B1, B0); |
42 | 108k | } |
43 | | |
44 | | BOTAN_FUNC_ISA("avx2") |
45 | | static SIMD_8x32 splat(uint32_t B) |
46 | 1.72M | { |
47 | 1.72M | return SIMD_8x32(_mm256_set1_epi32(B)); |
48 | 1.72M | } |
49 | | |
50 | | BOTAN_FUNC_ISA("avx2") |
51 | | static SIMD_8x32 load_le(const uint8_t* in) |
52 | 0 | { |
53 | 0 | return SIMD_8x32(_mm256_loadu_si256(reinterpret_cast<const __m256i*>(in))); |
54 | 0 | } |
55 | | |
56 | | BOTAN_FUNC_ISA("avx2") |
57 | | static SIMD_8x32 load_be(const uint8_t* in) |
58 | 0 | { |
59 | 0 | return load_le(in).bswap(); |
60 | 0 | } |
61 | | |
62 | | BOTAN_FUNC_ISA("avx2") |
63 | | void store_le(uint8_t out[]) const |
64 | 864k | { |
65 | 864k | _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), m_avx2); |
66 | 864k | } |
67 | | |
68 | | BOTAN_FUNC_ISA("avx2") |
69 | | void store_be(uint8_t out[]) const |
70 | 0 | { |
71 | 0 | bswap().store_le(out); |
72 | 0 | } |
73 | | |
74 | | template<size_t ROT> |
75 | | BOTAN_FUNC_ISA("avx2") |
76 | | SIMD_8x32 rotl() const |
77 | 17.2M | { |
78 | 17.2M | static_assert(ROT > 0 && ROT < 32, "Invalid rotation constant"); |
79 | | |
80 | | #if defined(__AVX512VL__) |
81 | | return SIMD_8x32(_mm256_rol_epi32(m_avx2, ROT)); |
82 | | #else |
83 | 17.2M | if constexpr(ROT == 8) |
84 | 4.32M | { |
85 | 12.9M | const __m256i shuf_rotl_8 = _mm256_set_epi8(14, 13, 12, 15, 10, 9, 8, 11, 6, 5, 4, 7, 2, 1, 0, 3, |
86 | 12.9M | 14, 13, 12, 15, 10, 9, 8, 11, 6, 5, 4, 7, 2, 1, 0, 3); |
87 | | |
88 | 12.9M | return SIMD_8x32(_mm256_shuffle_epi8(m_avx2, shuf_rotl_8)); |
89 | 12.9M | } |
90 | 12.9M | else if constexpr(ROT == 16) |
91 | 4.32M | { |
92 | 8.64M | const __m256i shuf_rotl_16 = _mm256_set_epi8(13, 12, 15, 14, 9, 8, 11, 10, 5, 4, 7, 6, 1, 0, 3, 2, |
93 | 8.64M | 13, 12, 15, 14, 9, 8, 11, 10, 5, 4, 7, 6, 1, 0, 3, 2); |
94 | | |
95 | 8.64M | return SIMD_8x32(_mm256_shuffle_epi8(m_avx2, shuf_rotl_16)); |
96 | 8.64M | } |
97 | 8.64M | else |
98 | 8.64M | { |
99 | 8.64M | return SIMD_8x32(_mm256_or_si256(_mm256_slli_epi32(m_avx2, static_cast<int>(ROT)), |
100 | 8.64M | _mm256_srli_epi32(m_avx2, static_cast<int>(32-ROT)))); |
101 | 8.64M | } |
102 | 17.2M | #endif |
103 | 17.2M | } Unexecuted instantiation: Botan::SIMD_8x32 Botan::SIMD_8x32::rotl<19ul>() const Unexecuted instantiation: Botan::SIMD_8x32 Botan::SIMD_8x32::rotl<10ul>() const Unexecuted instantiation: Botan::SIMD_8x32 Botan::SIMD_8x32::rotl<13ul>() const Unexecuted instantiation: Botan::SIMD_8x32 Botan::SIMD_8x32::rotl<3ul>() const Unexecuted instantiation: Botan::SIMD_8x32 Botan::SIMD_8x32::rotl<1ul>() const Botan::SIMD_8x32 Botan::SIMD_8x32::rotl<7ul>() const Line | Count | Source | 77 | 4.32M | { | 78 | 4.32M | static_assert(ROT > 0 && ROT < 32, "Invalid rotation constant"); | 79 | | | 80 | | #if defined(__AVX512VL__) | 81 | | return SIMD_8x32(_mm256_rol_epi32(m_avx2, ROT)); | 82 | | #else | 83 | 4.32M | if constexpr(ROT == 8) | 84 | 0 | { | 85 | 4.32M | const __m256i shuf_rotl_8 = _mm256_set_epi8(14, 13, 12, 15, 10, 9, 8, 11, 6, 5, 4, 7, 2, 1, 0, 3, | 86 | 4.32M | 14, 13, 12, 15, 10, 9, 8, 11, 6, 5, 4, 7, 2, 1, 0, 3); | 87 | | | 88 | 4.32M | return SIMD_8x32(_mm256_shuffle_epi8(m_avx2, shuf_rotl_8)); | 89 | 4.32M | } | 90 | 4.32M | else if constexpr(ROT == 16) | 91 | 0 | { | 92 | 4.32M | const __m256i shuf_rotl_16 = _mm256_set_epi8(13, 12, 15, 14, 9, 8, 11, 10, 5, 4, 7, 6, 1, 0, 3, 2, | 93 | 4.32M | 13, 12, 15, 14, 9, 8, 11, 10, 5, 4, 7, 6, 1, 0, 3, 2); | 94 | | | 95 | 4.32M | return SIMD_8x32(_mm256_shuffle_epi8(m_avx2, shuf_rotl_16)); | 96 | 4.32M | } | 97 | 4.32M | else | 98 | 4.32M | { | 99 | 4.32M | return SIMD_8x32(_mm256_or_si256(_mm256_slli_epi32(m_avx2, static_cast<int>(ROT)), | 100 | 4.32M | _mm256_srli_epi32(m_avx2, static_cast<int>(32-ROT)))); | 101 | 4.32M | } | 102 | 4.32M | #endif | 103 | 4.32M | } |
Unexecuted instantiation: Botan::SIMD_8x32 Botan::SIMD_8x32::rotl<5ul>() const Unexecuted instantiation: Botan::SIMD_8x32 Botan::SIMD_8x32::rotl<22ul>() const Unexecuted instantiation: Botan::SIMD_8x32 Botan::SIMD_8x32::rotl<27ul>() const Unexecuted instantiation: Botan::SIMD_8x32 Botan::SIMD_8x32::rotl<25ul>() const Unexecuted instantiation: Botan::SIMD_8x32 Botan::SIMD_8x32::rotl<31ul>() const Unexecuted instantiation: Botan::SIMD_8x32 Botan::SIMD_8x32::rotl<29ul>() const Unexecuted instantiation: Botan::SIMD_8x32 Botan::SIMD_8x32::rotl<30ul>() const Unexecuted instantiation: Botan::SIMD_8x32 Botan::SIMD_8x32::rotl<26ul>() const Unexecuted instantiation: Botan::SIMD_8x32 Botan::SIMD_8x32::rotl<21ul>() const Botan::SIMD_8x32 Botan::SIMD_8x32::rotl<16ul>() const Line | Count | Source | 77 | 4.32M | { | 78 | 4.32M | static_assert(ROT > 0 && ROT < 32, "Invalid rotation constant"); | 79 | | | 80 | | #if defined(__AVX512VL__) | 81 | | return SIMD_8x32(_mm256_rol_epi32(m_avx2, ROT)); | 82 | | #else | 83 | 4.32M | if constexpr(ROT == 8) | 84 | 0 | { | 85 | 4.32M | const __m256i shuf_rotl_8 = _mm256_set_epi8(14, 13, 12, 15, 10, 9, 8, 11, 6, 5, 4, 7, 2, 1, 0, 3, | 86 | 4.32M | 14, 13, 12, 15, 10, 9, 8, 11, 6, 5, 4, 7, 2, 1, 0, 3); | 87 | | | 88 | 4.32M | return SIMD_8x32(_mm256_shuffle_epi8(m_avx2, shuf_rotl_8)); | 89 | 4.32M | } | 90 | 4.32M | else if constexpr(ROT == 16) | 91 | 4.32M | { | 92 | 4.32M | const __m256i shuf_rotl_16 = _mm256_set_epi8(13, 12, 15, 14, 9, 8, 11, 10, 5, 4, 7, 6, 1, 0, 3, 2, | 93 | 4.32M | 13, 12, 15, 14, 9, 8, 11, 10, 5, 4, 7, 6, 1, 0, 3, 2); | 94 | | | 95 | 4.32M | return SIMD_8x32(_mm256_shuffle_epi8(m_avx2, shuf_rotl_16)); | 96 | 4.32M | } | 97 | 4.32M | else | 98 | 4.32M | { | 99 | 4.32M | return SIMD_8x32(_mm256_or_si256(_mm256_slli_epi32(m_avx2, static_cast<int>(ROT)), | 100 | 4.32M | _mm256_srli_epi32(m_avx2, static_cast<int>(32-ROT)))); | 101 | 4.32M | } | 102 | 4.32M | #endif | 103 | 4.32M | } |
Botan::SIMD_8x32 Botan::SIMD_8x32::rotl<12ul>() const Line | Count | Source | 77 | 4.32M | { | 78 | 4.32M | static_assert(ROT > 0 && ROT < 32, "Invalid rotation constant"); | 79 | | | 80 | | #if defined(__AVX512VL__) | 81 | | return SIMD_8x32(_mm256_rol_epi32(m_avx2, ROT)); | 82 | | #else | 83 | 4.32M | if constexpr(ROT == 8) | 84 | 0 | { | 85 | 4.32M | const __m256i shuf_rotl_8 = _mm256_set_epi8(14, 13, 12, 15, 10, 9, 8, 11, 6, 5, 4, 7, 2, 1, 0, 3, | 86 | 4.32M | 14, 13, 12, 15, 10, 9, 8, 11, 6, 5, 4, 7, 2, 1, 0, 3); | 87 | | | 88 | 4.32M | return SIMD_8x32(_mm256_shuffle_epi8(m_avx2, shuf_rotl_8)); | 89 | 4.32M | } | 90 | 4.32M | else if constexpr(ROT == 16) | 91 | 0 | { | 92 | 4.32M | const __m256i shuf_rotl_16 = _mm256_set_epi8(13, 12, 15, 14, 9, 8, 11, 10, 5, 4, 7, 6, 1, 0, 3, 2, | 93 | 4.32M | 13, 12, 15, 14, 9, 8, 11, 10, 5, 4, 7, 6, 1, 0, 3, 2); | 94 | | | 95 | 4.32M | return SIMD_8x32(_mm256_shuffle_epi8(m_avx2, shuf_rotl_16)); | 96 | 4.32M | } | 97 | 4.32M | else | 98 | 4.32M | { | 99 | 4.32M | return SIMD_8x32(_mm256_or_si256(_mm256_slli_epi32(m_avx2, static_cast<int>(ROT)), | 100 | 4.32M | _mm256_srli_epi32(m_avx2, static_cast<int>(32-ROT)))); | 101 | 4.32M | } | 102 | 4.32M | #endif | 103 | 4.32M | } |
Botan::SIMD_8x32 Botan::SIMD_8x32::rotl<8ul>() const Line | Count | Source | 77 | 4.32M | { | 78 | 4.32M | static_assert(ROT > 0 && ROT < 32, "Invalid rotation constant"); | 79 | | | 80 | | #if defined(__AVX512VL__) | 81 | | return SIMD_8x32(_mm256_rol_epi32(m_avx2, ROT)); | 82 | | #else | 83 | 4.32M | if constexpr(ROT == 8) | 84 | 4.32M | { | 85 | 4.32M | const __m256i shuf_rotl_8 = _mm256_set_epi8(14, 13, 12, 15, 10, 9, 8, 11, 6, 5, 4, 7, 2, 1, 0, 3, | 86 | 4.32M | 14, 13, 12, 15, 10, 9, 8, 11, 6, 5, 4, 7, 2, 1, 0, 3); | 87 | | | 88 | 4.32M | return SIMD_8x32(_mm256_shuffle_epi8(m_avx2, shuf_rotl_8)); | 89 | 4.32M | } | 90 | 4.32M | else if constexpr(ROT == 16) | 91 | 4.32M | { | 92 | 4.32M | const __m256i shuf_rotl_16 = _mm256_set_epi8(13, 12, 15, 14, 9, 8, 11, 10, 5, 4, 7, 6, 1, 0, 3, 2, | 93 | 4.32M | 13, 12, 15, 14, 9, 8, 11, 10, 5, 4, 7, 6, 1, 0, 3, 2); | 94 | | | 95 | 4.32M | return SIMD_8x32(_mm256_shuffle_epi8(m_avx2, shuf_rotl_16)); | 96 | 4.32M | } | 97 | 4.32M | else | 98 | 4.32M | { | 99 | 4.32M | return SIMD_8x32(_mm256_or_si256(_mm256_slli_epi32(m_avx2, static_cast<int>(ROT)), | 100 | 4.32M | _mm256_srli_epi32(m_avx2, static_cast<int>(32-ROT)))); | 101 | 4.32M | } | 102 | 4.32M | #endif | 103 | 4.32M | } |
|
104 | | |
105 | | template<size_t ROT> |
106 | | BOTAN_FUNC_ISA("avx2") |
107 | | SIMD_8x32 rotr() const |
108 | 0 | { |
109 | 0 | return this->rotl<32-ROT>(); |
110 | 0 | } Unexecuted instantiation: Botan::SIMD_8x32 Botan::SIMD_8x32::rotr<13ul>() const Unexecuted instantiation: Botan::SIMD_8x32 Botan::SIMD_8x32::rotr<22ul>() const Unexecuted instantiation: Botan::SIMD_8x32 Botan::SIMD_8x32::rotr<5ul>() const Unexecuted instantiation: Botan::SIMD_8x32 Botan::SIMD_8x32::rotr<7ul>() const Unexecuted instantiation: Botan::SIMD_8x32 Botan::SIMD_8x32::rotr<1ul>() const Unexecuted instantiation: Botan::SIMD_8x32 Botan::SIMD_8x32::rotr<3ul>() const Unexecuted instantiation: Botan::SIMD_8x32 Botan::SIMD_8x32::rotr<2ul>() const Unexecuted instantiation: Botan::SIMD_8x32 Botan::SIMD_8x32::rotr<6ul>() const Unexecuted instantiation: Botan::SIMD_8x32 Botan::SIMD_8x32::rotr<11ul>() const Unexecuted instantiation: Botan::SIMD_8x32 Botan::SIMD_8x32::rotr<25ul>() const |
111 | | |
112 | | SIMD_8x32 BOTAN_FUNC_ISA("avx2") sigma0() const |
113 | 0 | { |
114 | 0 | const SIMD_8x32 rot1 = this->rotr<2>(); |
115 | 0 | const SIMD_8x32 rot2 = this->rotr<13>(); |
116 | 0 | const SIMD_8x32 rot3 = this->rotr<22>(); |
117 | 0 | return rot1 ^ rot2 ^ rot3; |
118 | 0 | } |
119 | | |
120 | | SIMD_8x32 BOTAN_FUNC_ISA("avx2") sigma1() const |
121 | 0 | { |
122 | 0 | const SIMD_8x32 rot1 = this->rotr<6>(); |
123 | 0 | const SIMD_8x32 rot2 = this->rotr<11>(); |
124 | 0 | const SIMD_8x32 rot3 = this->rotr<25>(); |
125 | 0 | return rot1 ^ rot2 ^ rot3; |
126 | 0 | } |
127 | | |
128 | | BOTAN_FUNC_ISA("avx2") |
129 | | SIMD_8x32 operator+(const SIMD_8x32& other) const |
130 | 216k | { |
131 | 216k | SIMD_8x32 retval(*this); |
132 | 216k | retval += other; |
133 | 216k | return retval; |
134 | 216k | } |
135 | | |
136 | | BOTAN_FUNC_ISA("avx2") |
137 | | SIMD_8x32 operator-(const SIMD_8x32& other) const |
138 | 0 | { |
139 | 0 | SIMD_8x32 retval(*this); |
140 | 0 | retval -= other; |
141 | 0 | return retval; |
142 | 0 | } |
143 | | |
144 | | BOTAN_FUNC_ISA("avx2") |
145 | | SIMD_8x32 operator^(const SIMD_8x32& other) const |
146 | 0 | { |
147 | 0 | SIMD_8x32 retval(*this); |
148 | 0 | retval ^= other; |
149 | 0 | return retval; |
150 | 0 | } |
151 | | |
152 | | BOTAN_FUNC_ISA("avx2") |
153 | | SIMD_8x32 operator|(const SIMD_8x32& other) const |
154 | 0 | { |
155 | 0 | SIMD_8x32 retval(*this); |
156 | 0 | retval |= other; |
157 | 0 | return retval; |
158 | 0 | } |
159 | | |
160 | | BOTAN_FUNC_ISA("avx2") |
161 | | SIMD_8x32 operator&(const SIMD_8x32& other) const |
162 | 0 | { |
163 | 0 | SIMD_8x32 retval(*this); |
164 | 0 | retval &= other; |
165 | 0 | return retval; |
166 | 0 | } |
167 | | |
168 | | BOTAN_FUNC_ISA("avx2") |
169 | | void operator+=(const SIMD_8x32& other) |
170 | 18.3M | { |
171 | 18.3M | m_avx2 = _mm256_add_epi32(m_avx2, other.m_avx2); |
172 | 18.3M | } |
173 | | |
174 | | BOTAN_FUNC_ISA("avx2") |
175 | | void operator-=(const SIMD_8x32& other) |
176 | 0 | { |
177 | 0 | m_avx2 = _mm256_sub_epi32(m_avx2, other.m_avx2); |
178 | 0 | } |
179 | | |
180 | | BOTAN_FUNC_ISA("avx2") |
181 | | void operator^=(const SIMD_8x32& other) |
182 | 17.2M | { |
183 | 17.2M | m_avx2 = _mm256_xor_si256(m_avx2, other.m_avx2); |
184 | 17.2M | } |
185 | | |
186 | | BOTAN_FUNC_ISA("avx2") |
187 | | void operator^=(uint32_t other) |
188 | 0 | { |
189 | 0 | *this ^= SIMD_8x32::splat(other); |
190 | 0 | } |
191 | | |
192 | | BOTAN_FUNC_ISA("avx2") |
193 | | void operator|=(const SIMD_8x32& other) |
194 | 0 | { |
195 | 0 | m_avx2 = _mm256_or_si256(m_avx2, other.m_avx2); |
196 | 0 | } |
197 | | |
198 | | BOTAN_FUNC_ISA("avx2") |
199 | | void operator&=(const SIMD_8x32& other) |
200 | 0 | { |
201 | 0 | m_avx2 = _mm256_and_si256(m_avx2, other.m_avx2); |
202 | 0 | } |
203 | | |
204 | | template<int SHIFT> BOTAN_FUNC_ISA("avx2") SIMD_8x32 shl() const |
205 | 0 | { |
206 | 0 | return SIMD_8x32(_mm256_slli_epi32(m_avx2, SHIFT)); |
207 | 0 | } Unexecuted instantiation: Botan::SIMD_8x32 Botan::SIMD_8x32::shl<3>() const Unexecuted instantiation: Botan::SIMD_8x32 Botan::SIMD_8x32::shl<7>() const |
208 | | |
209 | | template<int SHIFT> BOTAN_FUNC_ISA("avx2") SIMD_8x32 shr() const |
210 | | { |
211 | | return SIMD_8x32(_mm256_srli_epi32(m_avx2, SHIFT)); |
212 | | } |
213 | | |
214 | | BOTAN_FUNC_ISA("avx2") |
215 | | SIMD_8x32 operator~() const |
216 | 0 | { |
217 | 0 | return SIMD_8x32(_mm256_xor_si256(m_avx2, _mm256_set1_epi32(0xFFFFFFFF))); |
218 | 0 | } |
219 | | |
220 | | // (~reg) & other |
221 | | BOTAN_FUNC_ISA("avx2") |
222 | | SIMD_8x32 andc(const SIMD_8x32& other) const |
223 | 0 | { |
224 | 0 | return SIMD_8x32(_mm256_andnot_si256(m_avx2, other.m_avx2)); |
225 | 0 | } |
226 | | |
227 | | BOTAN_FUNC_ISA("avx2") |
228 | | SIMD_8x32 bswap() const |
229 | 0 | { |
230 | 0 | const uint8_t BSWAP_MASK[32] = { 3, 2, 1, 0, |
231 | 0 | 7, 6, 5, 4, |
232 | 0 | 11, 10, 9, 8, |
233 | 0 | 15, 14, 13, 12, |
234 | 0 | 19, 18, 17, 16, |
235 | 0 | 23, 22, 21, 20, |
236 | 0 | 27, 26, 25, 24, |
237 | 0 | 31, 30, 29, 28 }; |
238 | |
|
239 | 0 | const __m256i bswap = _mm256_loadu_si256(reinterpret_cast<const __m256i*>(BSWAP_MASK)); |
240 | |
|
241 | 0 | const __m256i output = _mm256_shuffle_epi8(m_avx2, bswap); |
242 | |
|
243 | 0 | return SIMD_8x32(output); |
244 | 0 | } |
245 | | |
246 | | BOTAN_FUNC_ISA("avx2") |
247 | | static void transpose(SIMD_8x32& B0, SIMD_8x32& B1, |
248 | | SIMD_8x32& B2, SIMD_8x32& B3) |
249 | 216k | { |
250 | 216k | const __m256i T0 = _mm256_unpacklo_epi32(B0.m_avx2, B1.m_avx2); |
251 | 216k | const __m256i T1 = _mm256_unpacklo_epi32(B2.m_avx2, B3.m_avx2); |
252 | 216k | const __m256i T2 = _mm256_unpackhi_epi32(B0.m_avx2, B1.m_avx2); |
253 | 216k | const __m256i T3 = _mm256_unpackhi_epi32(B2.m_avx2, B3.m_avx2); |
254 | | |
255 | 216k | B0.m_avx2 = _mm256_unpacklo_epi64(T0, T1); |
256 | 216k | B1.m_avx2 = _mm256_unpackhi_epi64(T0, T1); |
257 | 216k | B2.m_avx2 = _mm256_unpacklo_epi64(T2, T3); |
258 | 216k | B3.m_avx2 = _mm256_unpackhi_epi64(T2, T3); |
259 | 216k | } |
260 | | |
261 | | BOTAN_FUNC_ISA("avx2") |
262 | | static void transpose(SIMD_8x32& B0, SIMD_8x32& B1, |
263 | | SIMD_8x32& B2, SIMD_8x32& B3, |
264 | | SIMD_8x32& B4, SIMD_8x32& B5, |
265 | | SIMD_8x32& B6, SIMD_8x32& B7) |
266 | 108k | { |
267 | 108k | transpose(B0, B1, B2, B3); |
268 | 108k | transpose(B4, B5, B6, B7); |
269 | | |
270 | 108k | swap_tops(B0, B4); |
271 | 108k | swap_tops(B1, B5); |
272 | 108k | swap_tops(B2, B6); |
273 | 108k | swap_tops(B3, B7); |
274 | 108k | } |
275 | | |
276 | | BOTAN_FUNC_ISA("avx2") |
277 | | static SIMD_8x32 choose(const SIMD_8x32& mask, const SIMD_8x32& a, const SIMD_8x32& b) |
278 | 0 | { |
279 | | #if defined(__AVX512VL__) |
280 | | return _mm256_ternarylogic_epi32(mask.handle(), a.handle(), b.handle(), 0xca); |
281 | | #else |
282 | 0 | return (mask & a) ^ mask.andc(b); |
283 | 0 | #endif |
284 | 0 | } |
285 | | |
286 | | BOTAN_FUNC_ISA("avx2") |
287 | | static SIMD_8x32 majority(const SIMD_8x32& x, const SIMD_8x32& y, const SIMD_8x32& z) |
288 | 0 | { |
289 | | #if defined(__AVX512VL__) |
290 | | return _mm256_ternarylogic_epi32(x.handle(), y.handle(), z.handle(), 0xe8); |
291 | | #else |
292 | 0 | return SIMD_8x32::choose(x ^ y, z, y); |
293 | 0 | #endif |
294 | 0 | } |
295 | | |
296 | | BOTAN_FUNC_ISA("avx2") |
297 | | static void reset_registers() |
298 | 54.0k | { |
299 | 54.0k | _mm256_zeroupper(); |
300 | 54.0k | } |
301 | | |
302 | | BOTAN_FUNC_ISA("avx2") |
303 | | static void zero_registers() |
304 | 54.0k | { |
305 | 54.0k | _mm256_zeroall(); |
306 | 54.0k | } |
307 | | |
308 | 1.72M | __m256i BOTAN_FUNC_ISA("avx2") handle() const { return m_avx2; } |
309 | | |
310 | | BOTAN_FUNC_ISA("avx2") |
311 | 19.8M | SIMD_8x32(__m256i x) : m_avx2(x) {} |
312 | | |
313 | | private: |
314 | | |
315 | | BOTAN_FUNC_ISA("avx2") |
316 | | static void swap_tops(SIMD_8x32& A, SIMD_8x32& B) |
317 | 432k | { |
318 | 432k | SIMD_8x32 T0 = _mm256_permute2x128_si256(A.handle(), B.handle(), 0 + (2 << 4)); |
319 | 432k | SIMD_8x32 T1 = _mm256_permute2x128_si256(A.handle(), B.handle(), 1 + (3 << 4)); |
320 | 432k | A = T0; |
321 | 432k | B = T1; |
322 | 432k | } |
323 | | |
324 | | __m256i m_avx2; |
325 | | }; |
326 | | |
327 | | template<size_t R> |
328 | | inline SIMD_8x32 rotl(SIMD_8x32 input) |
329 | | { |
330 | | return input.rotl<R>(); |
331 | | } |
332 | | |
333 | | template<size_t R> |
334 | | inline SIMD_8x32 rotr(SIMD_8x32 input) |
335 | | { |
336 | | return input.rotr<R>(); |
337 | | } |
338 | | |
339 | | // For Serpent: |
340 | | template<size_t S> |
341 | | inline SIMD_8x32 shl(SIMD_8x32 input) |
342 | | { |
343 | | return input.shl<S>(); |
344 | | } |
345 | | |
346 | | } |
347 | | |
348 | | #endif |