Coverage Report

Created: 2024-11-29 06:10

/src/botan/build/include/internal/botan/internal/simd_avx2.h
Line
Count
Source (jump to first uncovered line)
1
/*
2
* (C) 2018 Jack Lloyd
3
*
4
* Botan is released under the Simplified BSD License (see license.txt)
5
*/
6
7
#ifndef BOTAN_SIMD_AVX2_H_
8
#define BOTAN_SIMD_AVX2_H_
9
10
#include <botan/types.h>
11
#include <immintrin.h>
12
13
namespace Botan {
14
15
#define BOTAN_AVX2_FN BOTAN_FUNC_ISA("avx2")
16
17
class SIMD_8x32 final {
18
   public:
19
      SIMD_8x32& operator=(const SIMD_8x32& other) = default;
20
      SIMD_8x32(const SIMD_8x32& other) = default;
21
22
      SIMD_8x32& operator=(SIMD_8x32&& other) = default;
23
      SIMD_8x32(SIMD_8x32&& other) = default;
24
25
      ~SIMD_8x32() = default;
26
27
      BOTAN_AVX2_FN
28
0
      BOTAN_FORCE_INLINE SIMD_8x32() noexcept { m_avx2 = _mm256_setzero_si256(); }
29
30
      BOTAN_AVX2_FN
31
0
      explicit SIMD_8x32(const uint32_t B[8]) noexcept {
32
0
         m_avx2 = _mm256_loadu_si256(reinterpret_cast<const __m256i*>(B));
33
0
      }
34
35
      BOTAN_AVX2_FN
36
      explicit SIMD_8x32(uint32_t B0,
37
                         uint32_t B1,
38
                         uint32_t B2,
39
                         uint32_t B3,
40
                         uint32_t B4,
41
                         uint32_t B5,
42
                         uint32_t B6,
43
37.8k
                         uint32_t B7) noexcept {
44
37.8k
         m_avx2 = _mm256_set_epi32(B7, B6, B5, B4, B3, B2, B1, B0);
45
37.8k
      }
46
47
      BOTAN_AVX2_FN
48
606k
      static SIMD_8x32 splat(uint32_t B) noexcept { return SIMD_8x32(_mm256_set1_epi32(B)); }
49
50
      BOTAN_AVX2_FN
51
0
      static SIMD_8x32 load_le(const uint8_t* in) noexcept {
52
0
         return SIMD_8x32(_mm256_loadu_si256(reinterpret_cast<const __m256i*>(in)));
53
0
      }
54
55
      BOTAN_AVX2_FN
56
0
      static SIMD_8x32 load_le128(const uint8_t* in) noexcept {
57
0
         return SIMD_8x32(_mm256_broadcastsi128_si256(_mm_loadu_si128(reinterpret_cast<const __m128i*>(in))));
58
0
      }
59
60
      BOTAN_AVX2_FN
61
0
      static SIMD_8x32 load_le128(const uint32_t* in) noexcept {
62
0
         return SIMD_8x32(_mm256_broadcastsi128_si256(_mm_loadu_si128(reinterpret_cast<const __m128i*>(in))));
63
0
      }
64
65
      BOTAN_AVX2_FN
66
0
      static SIMD_8x32 load_be(const uint8_t* in) noexcept { return load_le(in).bswap(); }
67
68
      BOTAN_AVX2_FN
69
303k
      void store_le(uint8_t out[]) const noexcept { _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), m_avx2); }
70
71
      BOTAN_AVX2_FN
72
0
      void store_le128(uint8_t out[]) const noexcept {
73
0
         _mm_storeu_si128(reinterpret_cast<__m128i*>(out), _mm256_extracti128_si256(raw(), 0));
74
0
      }
75
76
      BOTAN_AVX2_FN
77
0
      void store_be(uint8_t out[]) const noexcept { bswap().store_le(out); }
78
79
      template <size_t ROT>
80
      BOTAN_AVX2_FN SIMD_8x32 rotl() const noexcept
81
         requires(ROT > 0 && ROT < 32)
82
6.06M
      {
83
#if defined(__AVX512VL__)
84
         return SIMD_8x32(_mm256_rol_epi32(m_avx2, ROT));
85
#else
86
6.06M
         if constexpr(ROT == 8) {
87
1.51M
            const __m256i shuf_rotl_8 =
88
1.51M
               _mm256_set_epi64x(0x0e0d0c0f'0a09080b, 0x06050407'02010003, 0x0e0d0c0f'0a09080b, 0x06050407'02010003);
89
90
1.51M
            return SIMD_8x32(_mm256_shuffle_epi8(m_avx2, shuf_rotl_8));
91
1.51M
         } else if constexpr(ROT == 16) {
92
1.51M
            const __m256i shuf_rotl_16 =
93
1.51M
               _mm256_set_epi64x(0x0d0c0f0e'09080b0a, 0x05040706'01000302, 0x0d0c0f0e'09080b0a, 0x05040706'01000302);
94
95
1.51M
            return SIMD_8x32(_mm256_shuffle_epi8(m_avx2, shuf_rotl_16));
96
1.51M
         } else if constexpr(ROT == 24) {
97
0
            const __m256i shuf_rotl_24 =
98
0
               _mm256_set_epi64x(0x0c0f0e0d'080b0a09, 0x04070605'00030201, 0x0c0f0e0d'080b0a09, 0x04070605'00030201);
99
100
0
            return SIMD_8x32(_mm256_shuffle_epi8(m_avx2, shuf_rotl_24));
101
3.03M
         } else {
102
3.03M
            return SIMD_8x32(_mm256_or_si256(_mm256_slli_epi32(m_avx2, static_cast<int>(ROT)),
103
3.03M
                                             _mm256_srli_epi32(m_avx2, static_cast<int>(32 - ROT))));
104
3.03M
         }
105
6.06M
#endif
106
6.06M
      }
Unexecuted instantiation: _ZNK5Botan9SIMD_8x324rotlILm13EEES0_vQaagtT_Li0EltT_Li32E
Unexecuted instantiation: _ZNK5Botan9SIMD_8x324rotlILm3EEES0_vQaagtT_Li0EltT_Li32E
Unexecuted instantiation: _ZNK5Botan9SIMD_8x324rotlILm1EEES0_vQaagtT_Li0EltT_Li32E
_ZNK5Botan9SIMD_8x324rotlILm7EEES0_vQaagtT_Li0EltT_Li32E
Line
Count
Source
82
1.51M
      {
83
#if defined(__AVX512VL__)
84
         return SIMD_8x32(_mm256_rol_epi32(m_avx2, ROT));
85
#else
86
         if constexpr(ROT == 8) {
87
            const __m256i shuf_rotl_8 =
88
               _mm256_set_epi64x(0x0e0d0c0f'0a09080b, 0x06050407'02010003, 0x0e0d0c0f'0a09080b, 0x06050407'02010003);
89
90
            return SIMD_8x32(_mm256_shuffle_epi8(m_avx2, shuf_rotl_8));
91
         } else if constexpr(ROT == 16) {
92
            const __m256i shuf_rotl_16 =
93
               _mm256_set_epi64x(0x0d0c0f0e'09080b0a, 0x05040706'01000302, 0x0d0c0f0e'09080b0a, 0x05040706'01000302);
94
95
            return SIMD_8x32(_mm256_shuffle_epi8(m_avx2, shuf_rotl_16));
96
         } else if constexpr(ROT == 24) {
97
            const __m256i shuf_rotl_24 =
98
               _mm256_set_epi64x(0x0c0f0e0d'080b0a09, 0x04070605'00030201, 0x0c0f0e0d'080b0a09, 0x04070605'00030201);
99
100
            return SIMD_8x32(_mm256_shuffle_epi8(m_avx2, shuf_rotl_24));
101
1.51M
         } else {
102
1.51M
            return SIMD_8x32(_mm256_or_si256(_mm256_slli_epi32(m_avx2, static_cast<int>(ROT)),
103
1.51M
                                             _mm256_srli_epi32(m_avx2, static_cast<int>(32 - ROT))));
104
1.51M
         }
105
1.51M
#endif
106
1.51M
      }
Unexecuted instantiation: _ZNK5Botan9SIMD_8x324rotlILm5EEES0_vQaagtT_Li0EltT_Li32E
Unexecuted instantiation: _ZNK5Botan9SIMD_8x324rotlILm22EEES0_vQaagtT_Li0EltT_Li32E
Unexecuted instantiation: _ZNK5Botan9SIMD_8x324rotlILm10EEES0_vQaagtT_Li0EltT_Li32E
Unexecuted instantiation: _ZNK5Botan9SIMD_8x324rotlILm27EEES0_vQaagtT_Li0EltT_Li32E
Unexecuted instantiation: _ZNK5Botan9SIMD_8x324rotlILm25EEES0_vQaagtT_Li0EltT_Li32E
Unexecuted instantiation: _ZNK5Botan9SIMD_8x324rotlILm31EEES0_vQaagtT_Li0EltT_Li32E
Unexecuted instantiation: _ZNK5Botan9SIMD_8x324rotlILm29EEES0_vQaagtT_Li0EltT_Li32E
Unexecuted instantiation: _ZNK5Botan9SIMD_8x324rotlILm19EEES0_vQaagtT_Li0EltT_Li32E
Unexecuted instantiation: _ZNK5Botan9SIMD_8x324rotlILm30EEES0_vQaagtT_Li0EltT_Li32E
Unexecuted instantiation: _ZNK5Botan9SIMD_8x324rotlILm26EEES0_vQaagtT_Li0EltT_Li32E
Unexecuted instantiation: _ZNK5Botan9SIMD_8x324rotlILm21EEES0_vQaagtT_Li0EltT_Li32E
Unexecuted instantiation: _ZNK5Botan9SIMD_8x324rotlILm2EEES0_vQaagtT_Li0EltT_Li32E
Unexecuted instantiation: _ZNK5Botan9SIMD_8x324rotlILm18EEES0_vQaagtT_Li0EltT_Li32E
Unexecuted instantiation: _ZNK5Botan9SIMD_8x324rotlILm24EEES0_vQaagtT_Li0EltT_Li32E
_ZNK5Botan9SIMD_8x324rotlILm16EEES0_vQaagtT_Li0EltT_Li32E
Line
Count
Source
82
1.51M
      {
83
#if defined(__AVX512VL__)
84
         return SIMD_8x32(_mm256_rol_epi32(m_avx2, ROT));
85
#else
86
         if constexpr(ROT == 8) {
87
            const __m256i shuf_rotl_8 =
88
               _mm256_set_epi64x(0x0e0d0c0f'0a09080b, 0x06050407'02010003, 0x0e0d0c0f'0a09080b, 0x06050407'02010003);
89
90
            return SIMD_8x32(_mm256_shuffle_epi8(m_avx2, shuf_rotl_8));
91
1.51M
         } else if constexpr(ROT == 16) {
92
1.51M
            const __m256i shuf_rotl_16 =
93
1.51M
               _mm256_set_epi64x(0x0d0c0f0e'09080b0a, 0x05040706'01000302, 0x0d0c0f0e'09080b0a, 0x05040706'01000302);
94
95
1.51M
            return SIMD_8x32(_mm256_shuffle_epi8(m_avx2, shuf_rotl_16));
96
         } else if constexpr(ROT == 24) {
97
            const __m256i shuf_rotl_24 =
98
               _mm256_set_epi64x(0x0c0f0e0d'080b0a09, 0x04070605'00030201, 0x0c0f0e0d'080b0a09, 0x04070605'00030201);
99
100
            return SIMD_8x32(_mm256_shuffle_epi8(m_avx2, shuf_rotl_24));
101
         } else {
102
            return SIMD_8x32(_mm256_or_si256(_mm256_slli_epi32(m_avx2, static_cast<int>(ROT)),
103
                                             _mm256_srli_epi32(m_avx2, static_cast<int>(32 - ROT))));
104
         }
105
1.51M
#endif
106
1.51M
      }
_ZNK5Botan9SIMD_8x324rotlILm12EEES0_vQaagtT_Li0EltT_Li32E
Line
Count
Source
82
1.51M
      {
83
#if defined(__AVX512VL__)
84
         return SIMD_8x32(_mm256_rol_epi32(m_avx2, ROT));
85
#else
86
         if constexpr(ROT == 8) {
87
            const __m256i shuf_rotl_8 =
88
               _mm256_set_epi64x(0x0e0d0c0f'0a09080b, 0x06050407'02010003, 0x0e0d0c0f'0a09080b, 0x06050407'02010003);
89
90
            return SIMD_8x32(_mm256_shuffle_epi8(m_avx2, shuf_rotl_8));
91
         } else if constexpr(ROT == 16) {
92
            const __m256i shuf_rotl_16 =
93
               _mm256_set_epi64x(0x0d0c0f0e'09080b0a, 0x05040706'01000302, 0x0d0c0f0e'09080b0a, 0x05040706'01000302);
94
95
            return SIMD_8x32(_mm256_shuffle_epi8(m_avx2, shuf_rotl_16));
96
         } else if constexpr(ROT == 24) {
97
            const __m256i shuf_rotl_24 =
98
               _mm256_set_epi64x(0x0c0f0e0d'080b0a09, 0x04070605'00030201, 0x0c0f0e0d'080b0a09, 0x04070605'00030201);
99
100
            return SIMD_8x32(_mm256_shuffle_epi8(m_avx2, shuf_rotl_24));
101
1.51M
         } else {
102
1.51M
            return SIMD_8x32(_mm256_or_si256(_mm256_slli_epi32(m_avx2, static_cast<int>(ROT)),
103
1.51M
                                             _mm256_srli_epi32(m_avx2, static_cast<int>(32 - ROT))));
104
1.51M
         }
105
1.51M
#endif
106
1.51M
      }
_ZNK5Botan9SIMD_8x324rotlILm8EEES0_vQaagtT_Li0EltT_Li32E
Line
Count
Source
82
1.51M
      {
83
#if defined(__AVX512VL__)
84
         return SIMD_8x32(_mm256_rol_epi32(m_avx2, ROT));
85
#else
86
1.51M
         if constexpr(ROT == 8) {
87
1.51M
            const __m256i shuf_rotl_8 =
88
1.51M
               _mm256_set_epi64x(0x0e0d0c0f'0a09080b, 0x06050407'02010003, 0x0e0d0c0f'0a09080b, 0x06050407'02010003);
89
90
1.51M
            return SIMD_8x32(_mm256_shuffle_epi8(m_avx2, shuf_rotl_8));
91
         } else if constexpr(ROT == 16) {
92
            const __m256i shuf_rotl_16 =
93
               _mm256_set_epi64x(0x0d0c0f0e'09080b0a, 0x05040706'01000302, 0x0d0c0f0e'09080b0a, 0x05040706'01000302);
94
95
            return SIMD_8x32(_mm256_shuffle_epi8(m_avx2, shuf_rotl_16));
96
         } else if constexpr(ROT == 24) {
97
            const __m256i shuf_rotl_24 =
98
               _mm256_set_epi64x(0x0c0f0e0d'080b0a09, 0x04070605'00030201, 0x0c0f0e0d'080b0a09, 0x04070605'00030201);
99
100
            return SIMD_8x32(_mm256_shuffle_epi8(m_avx2, shuf_rotl_24));
101
         } else {
102
            return SIMD_8x32(_mm256_or_si256(_mm256_slli_epi32(m_avx2, static_cast<int>(ROT)),
103
                                             _mm256_srli_epi32(m_avx2, static_cast<int>(32 - ROT))));
104
         }
105
1.51M
#endif
106
1.51M
      }
107
108
      template <size_t ROT>
109
0
      BOTAN_AVX2_FN SIMD_8x32 rotr() const noexcept {
110
0
         return this->rotl<32 - ROT>();
111
0
      }
Unexecuted instantiation: Botan::SIMD_8x32 Botan::SIMD_8x32::rotr<22ul>() const
Unexecuted instantiation: Botan::SIMD_8x32 Botan::SIMD_8x32::rotr<5ul>() const
Unexecuted instantiation: Botan::SIMD_8x32 Botan::SIMD_8x32::rotr<7ul>() const
Unexecuted instantiation: Botan::SIMD_8x32 Botan::SIMD_8x32::rotr<1ul>() const
Unexecuted instantiation: Botan::SIMD_8x32 Botan::SIMD_8x32::rotr<3ul>() const
Unexecuted instantiation: Botan::SIMD_8x32 Botan::SIMD_8x32::rotr<13ul>() const
Unexecuted instantiation: Botan::SIMD_8x32 Botan::SIMD_8x32::rotr<2ul>() const
Unexecuted instantiation: Botan::SIMD_8x32 Botan::SIMD_8x32::rotr<6ul>() const
Unexecuted instantiation: Botan::SIMD_8x32 Botan::SIMD_8x32::rotr<11ul>() const
Unexecuted instantiation: Botan::SIMD_8x32 Botan::SIMD_8x32::rotr<25ul>() const
112
113
0
      SIMD_8x32 BOTAN_AVX2_FN sigma0() const noexcept {
114
0
         const SIMD_8x32 rot1 = this->rotr<2>();
115
0
         const SIMD_8x32 rot2 = this->rotr<13>();
116
0
         const SIMD_8x32 rot3 = this->rotr<22>();
117
0
         return rot1 ^ rot2 ^ rot3;
118
0
      }
119
120
0
      SIMD_8x32 BOTAN_AVX2_FN sigma1() const noexcept {
121
0
         const SIMD_8x32 rot1 = this->rotr<6>();
122
0
         const SIMD_8x32 rot2 = this->rotr<11>();
123
0
         const SIMD_8x32 rot3 = this->rotr<25>();
124
0
         return rot1 ^ rot2 ^ rot3;
125
0
      }
126
127
      BOTAN_AVX2_FN
128
75.7k
      SIMD_8x32 operator+(const SIMD_8x32& other) const noexcept {
129
75.7k
         SIMD_8x32 retval(*this);
130
75.7k
         retval += other;
131
75.7k
         return retval;
132
75.7k
      }
133
134
      BOTAN_AVX2_FN
135
0
      SIMD_8x32 operator-(const SIMD_8x32& other) const noexcept {
136
0
         SIMD_8x32 retval(*this);
137
0
         retval -= other;
138
0
         return retval;
139
0
      }
140
141
      BOTAN_AVX2_FN
142
0
      SIMD_8x32 operator^(const SIMD_8x32& other) const noexcept {
143
0
         SIMD_8x32 retval(*this);
144
0
         retval ^= other;
145
0
         return retval;
146
0
      }
147
148
      BOTAN_AVX2_FN
149
0
      SIMD_8x32 operator|(const SIMD_8x32& other) const noexcept {
150
0
         SIMD_8x32 retval(*this);
151
0
         retval |= other;
152
0
         return retval;
153
0
      }
154
155
      BOTAN_AVX2_FN
156
0
      SIMD_8x32 operator&(const SIMD_8x32& other) const noexcept {
157
0
         SIMD_8x32 retval(*this);
158
0
         retval &= other;
159
0
         return retval;
160
0
      }
161
162
      BOTAN_AVX2_FN
163
6.44M
      void operator+=(const SIMD_8x32& other) { m_avx2 = _mm256_add_epi32(m_avx2, other.m_avx2); }
164
165
      BOTAN_AVX2_FN
166
0
      void operator-=(const SIMD_8x32& other) { m_avx2 = _mm256_sub_epi32(m_avx2, other.m_avx2); }
167
168
      BOTAN_AVX2_FN
169
6.06M
      void operator^=(const SIMD_8x32& other) { m_avx2 = _mm256_xor_si256(m_avx2, other.m_avx2); }
170
171
      BOTAN_AVX2_FN
172
0
      void operator^=(uint32_t other) { *this ^= SIMD_8x32::splat(other); }
173
174
      BOTAN_AVX2_FN
175
0
      void operator|=(const SIMD_8x32& other) { m_avx2 = _mm256_or_si256(m_avx2, other.m_avx2); }
176
177
      BOTAN_AVX2_FN
178
0
      void operator&=(const SIMD_8x32& other) { m_avx2 = _mm256_and_si256(m_avx2, other.m_avx2); }
179
180
      template <int SHIFT>
181
0
      BOTAN_AVX2_FN SIMD_8x32 shl() const noexcept {
182
0
         return SIMD_8x32(_mm256_slli_epi32(m_avx2, SHIFT));
183
0
      }
Unexecuted instantiation: Botan::SIMD_8x32 Botan::SIMD_8x32::shl<3>() const
Unexecuted instantiation: Botan::SIMD_8x32 Botan::SIMD_8x32::shl<7>() const
184
185
      template <int SHIFT>
186
      BOTAN_AVX2_FN SIMD_8x32 shr() const noexcept {
187
         return SIMD_8x32(_mm256_srli_epi32(m_avx2, SHIFT));
188
      }
189
190
      BOTAN_AVX2_FN
191
0
      SIMD_8x32 operator~() const noexcept {
192
0
         return SIMD_8x32(_mm256_xor_si256(m_avx2, _mm256_set1_epi32(0xFFFFFFFF)));
193
0
      }
194
195
      // (~reg) & other
196
      BOTAN_AVX2_FN
197
0
      SIMD_8x32 andc(const SIMD_8x32& other) const noexcept {
198
0
         return SIMD_8x32(_mm256_andnot_si256(m_avx2, other.m_avx2));
199
0
      }
200
201
      BOTAN_AVX2_FN
202
0
      SIMD_8x32 bswap() const noexcept {
203
0
         const uint8_t BSWAP_MASK[32] = {3,  2,  1,  0,  7,  6,  5,  4,  11, 10, 9,  8,  15, 14, 13, 12,
204
0
                                         19, 18, 17, 16, 23, 22, 21, 20, 27, 26, 25, 24, 31, 30, 29, 28};
205
206
0
         const __m256i bswap = _mm256_loadu_si256(reinterpret_cast<const __m256i*>(BSWAP_MASK));
207
208
0
         const __m256i output = _mm256_shuffle_epi8(m_avx2, bswap);
209
210
0
         return SIMD_8x32(output);
211
0
      }
212
213
      BOTAN_AVX2_FN
214
0
      SIMD_8x32 rev_words() const noexcept { return SIMD_8x32(_mm256_shuffle_epi32(raw(), 0b00011011)); }
215
216
      BOTAN_AVX2_FN
217
75.7k
      static void transpose(SIMD_8x32& B0, SIMD_8x32& B1, SIMD_8x32& B2, SIMD_8x32& B3) noexcept {
218
75.7k
         const __m256i T0 = _mm256_unpacklo_epi32(B0.m_avx2, B1.m_avx2);
219
75.7k
         const __m256i T1 = _mm256_unpacklo_epi32(B2.m_avx2, B3.m_avx2);
220
75.7k
         const __m256i T2 = _mm256_unpackhi_epi32(B0.m_avx2, B1.m_avx2);
221
75.7k
         const __m256i T3 = _mm256_unpackhi_epi32(B2.m_avx2, B3.m_avx2);
222
223
75.7k
         B0.m_avx2 = _mm256_unpacklo_epi64(T0, T1);
224
75.7k
         B1.m_avx2 = _mm256_unpackhi_epi64(T0, T1);
225
75.7k
         B2.m_avx2 = _mm256_unpacklo_epi64(T2, T3);
226
75.7k
         B3.m_avx2 = _mm256_unpackhi_epi64(T2, T3);
227
75.7k
      }
228
229
      BOTAN_AVX2_FN
230
      static void transpose(SIMD_8x32& B0,
231
                            SIMD_8x32& B1,
232
                            SIMD_8x32& B2,
233
                            SIMD_8x32& B3,
234
                            SIMD_8x32& B4,
235
                            SIMD_8x32& B5,
236
                            SIMD_8x32& B6,
237
37.8k
                            SIMD_8x32& B7) noexcept {
238
37.8k
         transpose(B0, B1, B2, B3);
239
37.8k
         transpose(B4, B5, B6, B7);
240
241
37.8k
         swap_tops(B0, B4);
242
37.8k
         swap_tops(B1, B5);
243
37.8k
         swap_tops(B2, B6);
244
37.8k
         swap_tops(B3, B7);
245
37.8k
      }
246
247
      BOTAN_AVX2_FN
248
0
      static SIMD_8x32 choose(const SIMD_8x32& mask, const SIMD_8x32& a, const SIMD_8x32& b) noexcept {
249
#if defined(__AVX512VL__)
250
         return _mm256_ternarylogic_epi32(mask.raw(), a.raw(), b.raw(), 0xca);
251
#else
252
0
         return (mask & a) ^ mask.andc(b);
253
0
#endif
254
0
      }
255
256
      BOTAN_AVX2_FN
257
0
      static SIMD_8x32 majority(const SIMD_8x32& x, const SIMD_8x32& y, const SIMD_8x32& z) noexcept {
258
#if defined(__AVX512VL__)
259
         return _mm256_ternarylogic_epi32(x.raw(), y.raw(), z.raw(), 0xe8);
260
#else
261
0
         return SIMD_8x32::choose(x ^ y, z, y);
262
0
#endif
263
0
      }
264
265
      BOTAN_AVX2_FN
266
18.9k
      static void reset_registers() noexcept { _mm256_zeroupper(); }
267
268
      BOTAN_AVX2_FN
269
18.9k
      static void zero_registers() noexcept { _mm256_zeroall(); }
270
271
606k
      __m256i BOTAN_AVX2_FN raw() const noexcept { return m_avx2; }
272
273
      BOTAN_AVX2_FN
274
6.97M
      SIMD_8x32(__m256i x) noexcept : m_avx2(x) {}
275
276
   private:
277
      BOTAN_AVX2_FN
278
151k
      static void swap_tops(SIMD_8x32& A, SIMD_8x32& B) {
279
151k
         SIMD_8x32 T0 = _mm256_permute2x128_si256(A.raw(), B.raw(), 0 + (2 << 4));
280
151k
         SIMD_8x32 T1 = _mm256_permute2x128_si256(A.raw(), B.raw(), 1 + (3 << 4));
281
151k
         A = T0;
282
151k
         B = T1;
283
151k
      }
284
285
      __m256i m_avx2;
286
};
287
288
template <size_t R>
289
0
inline SIMD_8x32 rotl(SIMD_8x32 input) {
290
0
   return input.rotl<R>();
291
0
}
Unexecuted instantiation: Botan::SIMD_8x32 Botan::rotl<13ul>(Botan::SIMD_8x32)
Unexecuted instantiation: Botan::SIMD_8x32 Botan::rotl<3ul>(Botan::SIMD_8x32)
Unexecuted instantiation: Botan::SIMD_8x32 Botan::rotl<1ul>(Botan::SIMD_8x32)
Unexecuted instantiation: Botan::SIMD_8x32 Botan::rotl<7ul>(Botan::SIMD_8x32)
Unexecuted instantiation: Botan::SIMD_8x32 Botan::rotl<5ul>(Botan::SIMD_8x32)
Unexecuted instantiation: Botan::SIMD_8x32 Botan::rotl<22ul>(Botan::SIMD_8x32)
292
293
template <size_t R>
294
0
inline SIMD_8x32 rotr(SIMD_8x32 input) {
295
0
   return input.rotr<R>();
296
0
}
Unexecuted instantiation: Botan::SIMD_8x32 Botan::rotr<22ul>(Botan::SIMD_8x32)
Unexecuted instantiation: Botan::SIMD_8x32 Botan::rotr<5ul>(Botan::SIMD_8x32)
Unexecuted instantiation: Botan::SIMD_8x32 Botan::rotr<7ul>(Botan::SIMD_8x32)
Unexecuted instantiation: Botan::SIMD_8x32 Botan::rotr<1ul>(Botan::SIMD_8x32)
Unexecuted instantiation: Botan::SIMD_8x32 Botan::rotr<3ul>(Botan::SIMD_8x32)
Unexecuted instantiation: Botan::SIMD_8x32 Botan::rotr<13ul>(Botan::SIMD_8x32)
297
298
// For Serpent:
299
template <size_t S>
300
0
inline SIMD_8x32 shl(SIMD_8x32 input) {
301
0
   return input.shl<S>();
302
0
}
Unexecuted instantiation: Botan::SIMD_8x32 Botan::shl<3ul>(Botan::SIMD_8x32)
Unexecuted instantiation: Botan::SIMD_8x32 Botan::shl<7ul>(Botan::SIMD_8x32)
303
304
}  // namespace Botan
305
306
#endif