Coverage Report

Created: 2022-12-03 06:16

/src/botan/build/include/botan/internal/simd_avx2.h
Line
Count
Source (jump to first uncovered line)
1
/*
2
* (C) 2018 Jack Lloyd
3
*
4
* Botan is released under the Simplified BSD License (see license.txt)
5
*/
6
7
#ifndef BOTAN_SIMD_AVX2_H_
8
#define BOTAN_SIMD_AVX2_H_
9
10
#include <botan/types.h>
11
#include <immintrin.h>
12
13
namespace Botan {
14
15
class SIMD_8x32 final
16
   {
17
   public:
18
19
      SIMD_8x32& operator=(const SIMD_8x32& other) = default;
20
      SIMD_8x32(const SIMD_8x32& other) = default;
21
22
      SIMD_8x32& operator=(SIMD_8x32&& other) = default;
23
      SIMD_8x32(SIMD_8x32&& other) = default;
24
25
      BOTAN_FUNC_ISA("avx2")
26
      BOTAN_FORCE_INLINE SIMD_8x32()
27
0
         {
28
0
         m_avx2 = _mm256_setzero_si256();
29
0
         }
30
31
      BOTAN_FUNC_ISA("avx2")
32
      explicit SIMD_8x32(const uint32_t B[8])
33
0
         {
34
0
         m_avx2 = _mm256_loadu_si256(reinterpret_cast<const __m256i*>(B));
35
0
         }
36
37
      BOTAN_FUNC_ISA("avx2")
38
      explicit SIMD_8x32(uint32_t B0, uint32_t B1, uint32_t B2, uint32_t B3,
39
                         uint32_t B4, uint32_t B5, uint32_t B6, uint32_t B7)
40
116k
         {
41
116k
         m_avx2 = _mm256_set_epi32(B7, B6, B5, B4, B3, B2, B1, B0);
42
116k
         }
43
44
      BOTAN_FUNC_ISA("avx2")
45
      static SIMD_8x32 splat(uint32_t B)
46
1.86M
         {
47
1.86M
         return SIMD_8x32(_mm256_set1_epi32(B));
48
1.86M
         }
49
50
      BOTAN_FUNC_ISA("avx2")
51
      static SIMD_8x32 load_le(const uint8_t* in)
52
0
         {
53
0
         return SIMD_8x32(_mm256_loadu_si256(reinterpret_cast<const __m256i*>(in)));
54
0
         }
55
56
      BOTAN_FUNC_ISA("avx2")
57
      static SIMD_8x32 load_be(const uint8_t* in)
58
0
         {
59
0
         return load_le(in).bswap();
60
0
         }
61
62
      BOTAN_FUNC_ISA("avx2")
63
      void store_le(uint8_t out[]) const
64
933k
         {
65
933k
         _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), m_avx2);
66
933k
         }
67
68
      BOTAN_FUNC_ISA("avx2")
69
      void store_be(uint8_t out[]) const
70
0
         {
71
0
         bswap().store_le(out);
72
0
         }
73
74
      template<size_t ROT>
75
      BOTAN_FUNC_ISA("avx2")
76
      SIMD_8x32 rotl() const
77
18.6M
         {
78
18.6M
         static_assert(ROT > 0 && ROT < 32, "Invalid rotation constant");
79
80
#if defined(__AVX512VL__)
81
         return SIMD_8x32(_mm256_rol_epi32(m_avx2, ROT));
82
#else
83
18.6M
         if constexpr(ROT == 8)
84
4.66M
            {
85
14.0M
            const __m256i shuf_rotl_8 = _mm256_set_epi8(14, 13, 12, 15, 10, 9, 8, 11, 6, 5, 4, 7, 2, 1, 0, 3,
86
14.0M
                                                        14, 13, 12, 15, 10, 9, 8, 11, 6, 5, 4, 7, 2, 1, 0, 3);
87
88
14.0M
            return SIMD_8x32(_mm256_shuffle_epi8(m_avx2, shuf_rotl_8));
89
14.0M
            }
90
14.0M
         else if constexpr(ROT == 16)
91
4.66M
            {
92
9.33M
            const __m256i shuf_rotl_16 = _mm256_set_epi8(13, 12, 15, 14, 9, 8, 11, 10, 5, 4, 7, 6, 1, 0, 3, 2,
93
9.33M
                                                         13, 12, 15, 14, 9, 8, 11, 10, 5, 4, 7, 6, 1, 0, 3, 2);
94
95
9.33M
            return SIMD_8x32(_mm256_shuffle_epi8(m_avx2, shuf_rotl_16));
96
9.33M
            }
97
9.33M
         else
98
9.33M
            {
99
9.33M
            return SIMD_8x32(_mm256_or_si256(_mm256_slli_epi32(m_avx2, static_cast<int>(ROT)),
100
9.33M
                                             _mm256_srli_epi32(m_avx2, static_cast<int>(32-ROT))));
101
9.33M
            }
102
18.6M
#endif
103
18.6M
         }
Unexecuted instantiation: Botan::SIMD_8x32 Botan::SIMD_8x32::rotl<19ul>() const
Unexecuted instantiation: Botan::SIMD_8x32 Botan::SIMD_8x32::rotl<10ul>() const
Unexecuted instantiation: Botan::SIMD_8x32 Botan::SIMD_8x32::rotl<13ul>() const
Unexecuted instantiation: Botan::SIMD_8x32 Botan::SIMD_8x32::rotl<3ul>() const
Unexecuted instantiation: Botan::SIMD_8x32 Botan::SIMD_8x32::rotl<1ul>() const
Botan::SIMD_8x32 Botan::SIMD_8x32::rotl<7ul>() const
Line
Count
Source
77
4.66M
         {
78
4.66M
         static_assert(ROT > 0 && ROT < 32, "Invalid rotation constant");
79
80
#if defined(__AVX512VL__)
81
         return SIMD_8x32(_mm256_rol_epi32(m_avx2, ROT));
82
#else
83
4.66M
         if constexpr(ROT == 8)
84
0
            {
85
4.66M
            const __m256i shuf_rotl_8 = _mm256_set_epi8(14, 13, 12, 15, 10, 9, 8, 11, 6, 5, 4, 7, 2, 1, 0, 3,
86
4.66M
                                                        14, 13, 12, 15, 10, 9, 8, 11, 6, 5, 4, 7, 2, 1, 0, 3);
87
88
4.66M
            return SIMD_8x32(_mm256_shuffle_epi8(m_avx2, shuf_rotl_8));
89
4.66M
            }
90
4.66M
         else if constexpr(ROT == 16)
91
0
            {
92
4.66M
            const __m256i shuf_rotl_16 = _mm256_set_epi8(13, 12, 15, 14, 9, 8, 11, 10, 5, 4, 7, 6, 1, 0, 3, 2,
93
4.66M
                                                         13, 12, 15, 14, 9, 8, 11, 10, 5, 4, 7, 6, 1, 0, 3, 2);
94
95
4.66M
            return SIMD_8x32(_mm256_shuffle_epi8(m_avx2, shuf_rotl_16));
96
4.66M
            }
97
4.66M
         else
98
4.66M
            {
99
4.66M
            return SIMD_8x32(_mm256_or_si256(_mm256_slli_epi32(m_avx2, static_cast<int>(ROT)),
100
4.66M
                                             _mm256_srli_epi32(m_avx2, static_cast<int>(32-ROT))));
101
4.66M
            }
102
4.66M
#endif
103
4.66M
         }
Unexecuted instantiation: Botan::SIMD_8x32 Botan::SIMD_8x32::rotl<5ul>() const
Unexecuted instantiation: Botan::SIMD_8x32 Botan::SIMD_8x32::rotl<22ul>() const
Unexecuted instantiation: Botan::SIMD_8x32 Botan::SIMD_8x32::rotl<27ul>() const
Unexecuted instantiation: Botan::SIMD_8x32 Botan::SIMD_8x32::rotl<25ul>() const
Unexecuted instantiation: Botan::SIMD_8x32 Botan::SIMD_8x32::rotl<31ul>() const
Unexecuted instantiation: Botan::SIMD_8x32 Botan::SIMD_8x32::rotl<29ul>() const
Unexecuted instantiation: Botan::SIMD_8x32 Botan::SIMD_8x32::rotl<30ul>() const
Unexecuted instantiation: Botan::SIMD_8x32 Botan::SIMD_8x32::rotl<26ul>() const
Unexecuted instantiation: Botan::SIMD_8x32 Botan::SIMD_8x32::rotl<21ul>() const
Botan::SIMD_8x32 Botan::SIMD_8x32::rotl<16ul>() const
Line
Count
Source
77
4.66M
         {
78
4.66M
         static_assert(ROT > 0 && ROT < 32, "Invalid rotation constant");
79
80
#if defined(__AVX512VL__)
81
         return SIMD_8x32(_mm256_rol_epi32(m_avx2, ROT));
82
#else
83
4.66M
         if constexpr(ROT == 8)
84
0
            {
85
4.66M
            const __m256i shuf_rotl_8 = _mm256_set_epi8(14, 13, 12, 15, 10, 9, 8, 11, 6, 5, 4, 7, 2, 1, 0, 3,
86
4.66M
                                                        14, 13, 12, 15, 10, 9, 8, 11, 6, 5, 4, 7, 2, 1, 0, 3);
87
88
4.66M
            return SIMD_8x32(_mm256_shuffle_epi8(m_avx2, shuf_rotl_8));
89
4.66M
            }
90
4.66M
         else if constexpr(ROT == 16)
91
4.66M
            {
92
4.66M
            const __m256i shuf_rotl_16 = _mm256_set_epi8(13, 12, 15, 14, 9, 8, 11, 10, 5, 4, 7, 6, 1, 0, 3, 2,
93
4.66M
                                                         13, 12, 15, 14, 9, 8, 11, 10, 5, 4, 7, 6, 1, 0, 3, 2);
94
95
4.66M
            return SIMD_8x32(_mm256_shuffle_epi8(m_avx2, shuf_rotl_16));
96
4.66M
            }
97
4.66M
         else
98
4.66M
            {
99
4.66M
            return SIMD_8x32(_mm256_or_si256(_mm256_slli_epi32(m_avx2, static_cast<int>(ROT)),
100
4.66M
                                             _mm256_srli_epi32(m_avx2, static_cast<int>(32-ROT))));
101
4.66M
            }
102
4.66M
#endif
103
4.66M
         }
Botan::SIMD_8x32 Botan::SIMD_8x32::rotl<12ul>() const
Line
Count
Source
77
4.66M
         {
78
4.66M
         static_assert(ROT > 0 && ROT < 32, "Invalid rotation constant");
79
80
#if defined(__AVX512VL__)
81
         return SIMD_8x32(_mm256_rol_epi32(m_avx2, ROT));
82
#else
83
4.66M
         if constexpr(ROT == 8)
84
0
            {
85
4.66M
            const __m256i shuf_rotl_8 = _mm256_set_epi8(14, 13, 12, 15, 10, 9, 8, 11, 6, 5, 4, 7, 2, 1, 0, 3,
86
4.66M
                                                        14, 13, 12, 15, 10, 9, 8, 11, 6, 5, 4, 7, 2, 1, 0, 3);
87
88
4.66M
            return SIMD_8x32(_mm256_shuffle_epi8(m_avx2, shuf_rotl_8));
89
4.66M
            }
90
4.66M
         else if constexpr(ROT == 16)
91
0
            {
92
4.66M
            const __m256i shuf_rotl_16 = _mm256_set_epi8(13, 12, 15, 14, 9, 8, 11, 10, 5, 4, 7, 6, 1, 0, 3, 2,
93
4.66M
                                                         13, 12, 15, 14, 9, 8, 11, 10, 5, 4, 7, 6, 1, 0, 3, 2);
94
95
4.66M
            return SIMD_8x32(_mm256_shuffle_epi8(m_avx2, shuf_rotl_16));
96
4.66M
            }
97
4.66M
         else
98
4.66M
            {
99
4.66M
            return SIMD_8x32(_mm256_or_si256(_mm256_slli_epi32(m_avx2, static_cast<int>(ROT)),
100
4.66M
                                             _mm256_srli_epi32(m_avx2, static_cast<int>(32-ROT))));
101
4.66M
            }
102
4.66M
#endif
103
4.66M
         }
Botan::SIMD_8x32 Botan::SIMD_8x32::rotl<8ul>() const
Line
Count
Source
77
4.66M
         {
78
4.66M
         static_assert(ROT > 0 && ROT < 32, "Invalid rotation constant");
79
80
#if defined(__AVX512VL__)
81
         return SIMD_8x32(_mm256_rol_epi32(m_avx2, ROT));
82
#else
83
4.66M
         if constexpr(ROT == 8)
84
4.66M
            {
85
4.66M
            const __m256i shuf_rotl_8 = _mm256_set_epi8(14, 13, 12, 15, 10, 9, 8, 11, 6, 5, 4, 7, 2, 1, 0, 3,
86
4.66M
                                                        14, 13, 12, 15, 10, 9, 8, 11, 6, 5, 4, 7, 2, 1, 0, 3);
87
88
4.66M
            return SIMD_8x32(_mm256_shuffle_epi8(m_avx2, shuf_rotl_8));
89
4.66M
            }
90
4.66M
         else if constexpr(ROT == 16)
91
4.66M
            {
92
4.66M
            const __m256i shuf_rotl_16 = _mm256_set_epi8(13, 12, 15, 14, 9, 8, 11, 10, 5, 4, 7, 6, 1, 0, 3, 2,
93
4.66M
                                                         13, 12, 15, 14, 9, 8, 11, 10, 5, 4, 7, 6, 1, 0, 3, 2);
94
95
4.66M
            return SIMD_8x32(_mm256_shuffle_epi8(m_avx2, shuf_rotl_16));
96
4.66M
            }
97
4.66M
         else
98
4.66M
            {
99
4.66M
            return SIMD_8x32(_mm256_or_si256(_mm256_slli_epi32(m_avx2, static_cast<int>(ROT)),
100
4.66M
                                             _mm256_srli_epi32(m_avx2, static_cast<int>(32-ROT))));
101
4.66M
            }
102
4.66M
#endif
103
4.66M
         }
104
105
      template<size_t ROT>
106
      BOTAN_FUNC_ISA("avx2")
107
      SIMD_8x32 rotr() const
108
0
         {
109
0
         return this->rotl<32-ROT>();
110
0
         }
Unexecuted instantiation: Botan::SIMD_8x32 Botan::SIMD_8x32::rotr<13ul>() const
Unexecuted instantiation: Botan::SIMD_8x32 Botan::SIMD_8x32::rotr<22ul>() const
Unexecuted instantiation: Botan::SIMD_8x32 Botan::SIMD_8x32::rotr<5ul>() const
Unexecuted instantiation: Botan::SIMD_8x32 Botan::SIMD_8x32::rotr<7ul>() const
Unexecuted instantiation: Botan::SIMD_8x32 Botan::SIMD_8x32::rotr<1ul>() const
Unexecuted instantiation: Botan::SIMD_8x32 Botan::SIMD_8x32::rotr<3ul>() const
Unexecuted instantiation: Botan::SIMD_8x32 Botan::SIMD_8x32::rotr<2ul>() const
Unexecuted instantiation: Botan::SIMD_8x32 Botan::SIMD_8x32::rotr<6ul>() const
Unexecuted instantiation: Botan::SIMD_8x32 Botan::SIMD_8x32::rotr<11ul>() const
Unexecuted instantiation: Botan::SIMD_8x32 Botan::SIMD_8x32::rotr<25ul>() const
111
112
      SIMD_8x32 BOTAN_FUNC_ISA("avx2") sigma0() const
113
0
         {
114
0
         const SIMD_8x32 rot1 = this->rotr<2>();
115
0
         const SIMD_8x32 rot2 = this->rotr<13>();
116
0
         const SIMD_8x32 rot3 = this->rotr<22>();
117
0
         return rot1 ^ rot2 ^ rot3;
118
0
         }
119
120
      SIMD_8x32 BOTAN_FUNC_ISA("avx2") sigma1() const
121
0
         {
122
0
         const SIMD_8x32 rot1 = this->rotr<6>();
123
0
         const SIMD_8x32 rot2 = this->rotr<11>();
124
0
         const SIMD_8x32 rot3 = this->rotr<25>();
125
0
         return rot1 ^ rot2 ^ rot3;
126
0
         }
127
128
      BOTAN_FUNC_ISA("avx2")
129
      SIMD_8x32 operator+(const SIMD_8x32& other) const
130
233k
         {
131
233k
         SIMD_8x32 retval(*this);
132
233k
         retval += other;
133
233k
         return retval;
134
233k
         }
135
136
      BOTAN_FUNC_ISA("avx2")
137
      SIMD_8x32 operator-(const SIMD_8x32& other) const
138
0
         {
139
0
         SIMD_8x32 retval(*this);
140
0
         retval -= other;
141
0
         return retval;
142
0
         }
143
144
      BOTAN_FUNC_ISA("avx2")
145
      SIMD_8x32 operator^(const SIMD_8x32& other) const
146
0
         {
147
0
         SIMD_8x32 retval(*this);
148
0
         retval ^= other;
149
0
         return retval;
150
0
         }
151
152
      BOTAN_FUNC_ISA("avx2")
153
      SIMD_8x32 operator|(const SIMD_8x32& other) const
154
0
         {
155
0
         SIMD_8x32 retval(*this);
156
0
         retval |= other;
157
0
         return retval;
158
0
         }
159
160
      BOTAN_FUNC_ISA("avx2")
161
      SIMD_8x32 operator&(const SIMD_8x32& other) const
162
0
         {
163
0
         SIMD_8x32 retval(*this);
164
0
         retval &= other;
165
0
         return retval;
166
0
         }
167
168
      BOTAN_FUNC_ISA("avx2")
169
      void operator+=(const SIMD_8x32& other)
170
19.8M
         {
171
19.8M
         m_avx2 = _mm256_add_epi32(m_avx2, other.m_avx2);
172
19.8M
         }
173
174
      BOTAN_FUNC_ISA("avx2")
175
      void operator-=(const SIMD_8x32& other)
176
0
         {
177
0
         m_avx2 = _mm256_sub_epi32(m_avx2, other.m_avx2);
178
0
         }
179
180
      BOTAN_FUNC_ISA("avx2")
181
      void operator^=(const SIMD_8x32& other)
182
18.6M
         {
183
18.6M
         m_avx2 = _mm256_xor_si256(m_avx2, other.m_avx2);
184
18.6M
         }
185
186
      BOTAN_FUNC_ISA("avx2")
187
      void operator^=(uint32_t other)
188
0
         {
189
0
         *this ^= SIMD_8x32::splat(other);
190
0
         }
191
192
      BOTAN_FUNC_ISA("avx2")
193
      void operator|=(const SIMD_8x32& other)
194
0
         {
195
0
         m_avx2 = _mm256_or_si256(m_avx2, other.m_avx2);
196
0
         }
197
198
      BOTAN_FUNC_ISA("avx2")
199
      void operator&=(const SIMD_8x32& other)
200
0
         {
201
0
         m_avx2 = _mm256_and_si256(m_avx2, other.m_avx2);
202
0
         }
203
204
      template<int SHIFT> BOTAN_FUNC_ISA("avx2") SIMD_8x32 shl() const
205
0
         {
206
0
         return SIMD_8x32(_mm256_slli_epi32(m_avx2, SHIFT));
207
0
         }
Unexecuted instantiation: Botan::SIMD_8x32 Botan::SIMD_8x32::shl<3>() const
Unexecuted instantiation: Botan::SIMD_8x32 Botan::SIMD_8x32::shl<7>() const
208
209
      template<int SHIFT> BOTAN_FUNC_ISA("avx2") SIMD_8x32 shr() const
210
         {
211
         return SIMD_8x32(_mm256_srli_epi32(m_avx2, SHIFT));
212
         }
213
214
      BOTAN_FUNC_ISA("avx2")
215
      SIMD_8x32 operator~() const
216
0
         {
217
0
         return SIMD_8x32(_mm256_xor_si256(m_avx2, _mm256_set1_epi32(0xFFFFFFFF)));
218
0
         }
219
220
      // (~reg) & other
221
      BOTAN_FUNC_ISA("avx2")
222
      SIMD_8x32 andc(const SIMD_8x32& other) const
223
0
         {
224
0
         return SIMD_8x32(_mm256_andnot_si256(m_avx2, other.m_avx2));
225
0
         }
226
227
      BOTAN_FUNC_ISA("avx2")
228
      SIMD_8x32 bswap() const
229
0
         {
230
0
         const uint8_t BSWAP_MASK[32] = { 3, 2, 1, 0,
231
0
                                          7, 6, 5, 4,
232
0
                                          11, 10, 9, 8,
233
0
                                          15, 14, 13, 12,
234
0
                                          19, 18, 17, 16,
235
0
                                          23, 22, 21, 20,
236
0
                                          27, 26, 25, 24,
237
0
                                          31, 30, 29, 28 };
238
239
0
         const __m256i bswap = _mm256_loadu_si256(reinterpret_cast<const __m256i*>(BSWAP_MASK));
240
241
0
         const __m256i output = _mm256_shuffle_epi8(m_avx2, bswap);
242
243
0
         return SIMD_8x32(output);
244
0
         }
245
246
      BOTAN_FUNC_ISA("avx2")
247
      static void transpose(SIMD_8x32& B0, SIMD_8x32& B1,
248
                            SIMD_8x32& B2, SIMD_8x32& B3)
249
233k
         {
250
233k
         const __m256i T0 = _mm256_unpacklo_epi32(B0.m_avx2, B1.m_avx2);
251
233k
         const __m256i T1 = _mm256_unpacklo_epi32(B2.m_avx2, B3.m_avx2);
252
233k
         const __m256i T2 = _mm256_unpackhi_epi32(B0.m_avx2, B1.m_avx2);
253
233k
         const __m256i T3 = _mm256_unpackhi_epi32(B2.m_avx2, B3.m_avx2);
254
255
233k
         B0.m_avx2 = _mm256_unpacklo_epi64(T0, T1);
256
233k
         B1.m_avx2 = _mm256_unpackhi_epi64(T0, T1);
257
233k
         B2.m_avx2 = _mm256_unpacklo_epi64(T2, T3);
258
233k
         B3.m_avx2 = _mm256_unpackhi_epi64(T2, T3);
259
233k
         }
260
261
      BOTAN_FUNC_ISA("avx2")
262
      static void transpose(SIMD_8x32& B0, SIMD_8x32& B1,
263
                            SIMD_8x32& B2, SIMD_8x32& B3,
264
                            SIMD_8x32& B4, SIMD_8x32& B5,
265
                            SIMD_8x32& B6, SIMD_8x32& B7)
266
116k
         {
267
116k
         transpose(B0, B1, B2, B3);
268
116k
         transpose(B4, B5, B6, B7);
269
270
116k
         swap_tops(B0, B4);
271
116k
         swap_tops(B1, B5);
272
116k
         swap_tops(B2, B6);
273
116k
         swap_tops(B3, B7);
274
116k
         }
275
276
      BOTAN_FUNC_ISA("avx2")
277
      static SIMD_8x32 choose(const SIMD_8x32& mask, const SIMD_8x32& a, const SIMD_8x32& b)
278
0
         {
279
#if defined(__AVX512VL__)
280
         return _mm256_ternarylogic_epi32(mask.handle(), a.handle(), b.handle(), 0xca);
281
#else
282
0
         return (mask & a) ^ mask.andc(b);
283
0
#endif
284
0
         }
285
286
      BOTAN_FUNC_ISA("avx2")
287
      static SIMD_8x32 majority(const SIMD_8x32& x, const SIMD_8x32& y, const SIMD_8x32& z)
288
0
         {
289
#if defined(__AVX512VL__)
290
         return _mm256_ternarylogic_epi32(x.handle(), y.handle(), z.handle(), 0xe8);
291
#else
292
0
         return SIMD_8x32::choose(x ^ y, z, y);
293
0
#endif
294
0
         }
295
296
      BOTAN_FUNC_ISA("avx2")
297
      static void reset_registers()
298
58.3k
         {
299
58.3k
         _mm256_zeroupper();
300
58.3k
         }
301
302
      BOTAN_FUNC_ISA("avx2")
303
      static void zero_registers()
304
58.3k
         {
305
58.3k
         _mm256_zeroall();
306
58.3k
         }
307
308
1.86M
      __m256i BOTAN_FUNC_ISA("avx2") handle() const { return m_avx2; }
309
310
      BOTAN_FUNC_ISA("avx2")
311
21.4M
      SIMD_8x32(__m256i x) : m_avx2(x) {}
312
313
   private:
314
315
      BOTAN_FUNC_ISA("avx2")
316
      static void swap_tops(SIMD_8x32& A, SIMD_8x32& B)
317
466k
         {
318
466k
         SIMD_8x32 T0 = _mm256_permute2x128_si256(A.handle(), B.handle(), 0 + (2 << 4));
319
466k
         SIMD_8x32 T1 = _mm256_permute2x128_si256(A.handle(), B.handle(), 1 + (3 << 4));
320
466k
         A = T0;
321
466k
         B = T1;
322
466k
         }
323
324
      __m256i m_avx2;
325
   };
326
327
template<size_t R>
328
inline SIMD_8x32 rotl(SIMD_8x32 input)
329
   {
330
   return input.rotl<R>();
331
   }
332
333
template<size_t R>
334
inline SIMD_8x32 rotr(SIMD_8x32 input)
335
   {
336
   return input.rotr<R>();
337
   }
338
339
// For Serpent:
340
template<size_t S>
341
inline SIMD_8x32 shl(SIMD_8x32 input)
342
   {
343
   return input.shl<S>();
344
   }
345
346
}
347
348
#endif