Coverage Report

Created: 2022-06-23 06:44

/src/botan/build/include/botan/internal/simd_avx2.h
Line
Count
Source (jump to first uncovered line)
1
/*
2
* (C) 2018 Jack Lloyd
3
*
4
* Botan is released under the Simplified BSD License (see license.txt)
5
*/
6
7
#ifndef BOTAN_SIMD_AVX2_H_
8
#define BOTAN_SIMD_AVX2_H_
9
10
#include <botan/types.h>
11
#include <immintrin.h>
12
13
namespace Botan {
14
15
class SIMD_8x32 final
16
   {
17
   public:
18
19
      SIMD_8x32& operator=(const SIMD_8x32& other) = default;
20
      SIMD_8x32(const SIMD_8x32& other) = default;
21
22
      SIMD_8x32& operator=(SIMD_8x32&& other) = default;
23
      SIMD_8x32(SIMD_8x32&& other) = default;
24
25
      BOTAN_FUNC_ISA("avx2")
26
      BOTAN_FORCE_INLINE SIMD_8x32()
27
0
         {
28
0
         m_avx2 = _mm256_setzero_si256();
29
0
         }
30
31
      BOTAN_FUNC_ISA("avx2")
32
      explicit SIMD_8x32(const uint32_t B[8])
33
0
         {
34
0
         m_avx2 = _mm256_loadu_si256(reinterpret_cast<const __m256i*>(B));
35
0
         }
36
37
      BOTAN_FUNC_ISA("avx2")
38
      explicit SIMD_8x32(uint32_t B0, uint32_t B1, uint32_t B2, uint32_t B3,
39
                         uint32_t B4, uint32_t B5, uint32_t B6, uint32_t B7)
40
108k
         {
41
108k
         m_avx2 = _mm256_set_epi32(B7, B6, B5, B4, B3, B2, B1, B0);
42
108k
         }
43
44
      BOTAN_FUNC_ISA("avx2")
45
      static SIMD_8x32 splat(uint32_t B)
46
1.72M
         {
47
1.72M
         return SIMD_8x32(_mm256_set1_epi32(B));
48
1.72M
         }
49
50
      BOTAN_FUNC_ISA("avx2")
51
      static SIMD_8x32 load_le(const uint8_t* in)
52
0
         {
53
0
         return SIMD_8x32(_mm256_loadu_si256(reinterpret_cast<const __m256i*>(in)));
54
0
         }
55
56
      BOTAN_FUNC_ISA("avx2")
57
      static SIMD_8x32 load_be(const uint8_t* in)
58
0
         {
59
0
         return load_le(in).bswap();
60
0
         }
61
62
      BOTAN_FUNC_ISA("avx2")
63
      void store_le(uint8_t out[]) const
64
864k
         {
65
864k
         _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), m_avx2);
66
864k
         }
67
68
      BOTAN_FUNC_ISA("avx2")
69
      void store_be(uint8_t out[]) const
70
0
         {
71
0
         bswap().store_le(out);
72
0
         }
73
74
      template<size_t ROT>
75
      BOTAN_FUNC_ISA("avx2")
76
      SIMD_8x32 rotl() const
77
17.2M
         {
78
17.2M
         static_assert(ROT > 0 && ROT < 32, "Invalid rotation constant");
79
80
#if defined(__AVX512VL__)
81
         return SIMD_8x32(_mm256_rol_epi32(m_avx2, ROT));
82
#else
83
17.2M
         if constexpr(ROT == 8)
84
4.32M
            {
85
12.9M
            const __m256i shuf_rotl_8 = _mm256_set_epi8(14, 13, 12, 15, 10, 9, 8, 11, 6, 5, 4, 7, 2, 1, 0, 3,
86
12.9M
                                                        14, 13, 12, 15, 10, 9, 8, 11, 6, 5, 4, 7, 2, 1, 0, 3);
87
88
12.9M
            return SIMD_8x32(_mm256_shuffle_epi8(m_avx2, shuf_rotl_8));
89
12.9M
            }
90
12.9M
         else if constexpr(ROT == 16)
91
4.32M
            {
92
8.64M
            const __m256i shuf_rotl_16 = _mm256_set_epi8(13, 12, 15, 14, 9, 8, 11, 10, 5, 4, 7, 6, 1, 0, 3, 2,
93
8.64M
                                                         13, 12, 15, 14, 9, 8, 11, 10, 5, 4, 7, 6, 1, 0, 3, 2);
94
95
8.64M
            return SIMD_8x32(_mm256_shuffle_epi8(m_avx2, shuf_rotl_16));
96
8.64M
            }
97
8.64M
         else
98
8.64M
            {
99
8.64M
            return SIMD_8x32(_mm256_or_si256(_mm256_slli_epi32(m_avx2, static_cast<int>(ROT)),
100
8.64M
                                             _mm256_srli_epi32(m_avx2, static_cast<int>(32-ROT))));
101
8.64M
            }
102
17.2M
#endif
103
17.2M
         }
Unexecuted instantiation: Botan::SIMD_8x32 Botan::SIMD_8x32::rotl<19ul>() const
Unexecuted instantiation: Botan::SIMD_8x32 Botan::SIMD_8x32::rotl<10ul>() const
Unexecuted instantiation: Botan::SIMD_8x32 Botan::SIMD_8x32::rotl<13ul>() const
Unexecuted instantiation: Botan::SIMD_8x32 Botan::SIMD_8x32::rotl<3ul>() const
Unexecuted instantiation: Botan::SIMD_8x32 Botan::SIMD_8x32::rotl<1ul>() const
Botan::SIMD_8x32 Botan::SIMD_8x32::rotl<7ul>() const
Line
Count
Source
77
4.32M
         {
78
4.32M
         static_assert(ROT > 0 && ROT < 32, "Invalid rotation constant");
79
80
#if defined(__AVX512VL__)
81
         return SIMD_8x32(_mm256_rol_epi32(m_avx2, ROT));
82
#else
83
4.32M
         if constexpr(ROT == 8)
84
0
            {
85
4.32M
            const __m256i shuf_rotl_8 = _mm256_set_epi8(14, 13, 12, 15, 10, 9, 8, 11, 6, 5, 4, 7, 2, 1, 0, 3,
86
4.32M
                                                        14, 13, 12, 15, 10, 9, 8, 11, 6, 5, 4, 7, 2, 1, 0, 3);
87
88
4.32M
            return SIMD_8x32(_mm256_shuffle_epi8(m_avx2, shuf_rotl_8));
89
4.32M
            }
90
4.32M
         else if constexpr(ROT == 16)
91
0
            {
92
4.32M
            const __m256i shuf_rotl_16 = _mm256_set_epi8(13, 12, 15, 14, 9, 8, 11, 10, 5, 4, 7, 6, 1, 0, 3, 2,
93
4.32M
                                                         13, 12, 15, 14, 9, 8, 11, 10, 5, 4, 7, 6, 1, 0, 3, 2);
94
95
4.32M
            return SIMD_8x32(_mm256_shuffle_epi8(m_avx2, shuf_rotl_16));
96
4.32M
            }
97
4.32M
         else
98
4.32M
            {
99
4.32M
            return SIMD_8x32(_mm256_or_si256(_mm256_slli_epi32(m_avx2, static_cast<int>(ROT)),
100
4.32M
                                             _mm256_srli_epi32(m_avx2, static_cast<int>(32-ROT))));
101
4.32M
            }
102
4.32M
#endif
103
4.32M
         }
Unexecuted instantiation: Botan::SIMD_8x32 Botan::SIMD_8x32::rotl<5ul>() const
Unexecuted instantiation: Botan::SIMD_8x32 Botan::SIMD_8x32::rotl<22ul>() const
Unexecuted instantiation: Botan::SIMD_8x32 Botan::SIMD_8x32::rotl<27ul>() const
Unexecuted instantiation: Botan::SIMD_8x32 Botan::SIMD_8x32::rotl<25ul>() const
Unexecuted instantiation: Botan::SIMD_8x32 Botan::SIMD_8x32::rotl<31ul>() const
Unexecuted instantiation: Botan::SIMD_8x32 Botan::SIMD_8x32::rotl<29ul>() const
Unexecuted instantiation: Botan::SIMD_8x32 Botan::SIMD_8x32::rotl<30ul>() const
Unexecuted instantiation: Botan::SIMD_8x32 Botan::SIMD_8x32::rotl<26ul>() const
Unexecuted instantiation: Botan::SIMD_8x32 Botan::SIMD_8x32::rotl<21ul>() const
Botan::SIMD_8x32 Botan::SIMD_8x32::rotl<16ul>() const
Line
Count
Source
77
4.32M
         {
78
4.32M
         static_assert(ROT > 0 && ROT < 32, "Invalid rotation constant");
79
80
#if defined(__AVX512VL__)
81
         return SIMD_8x32(_mm256_rol_epi32(m_avx2, ROT));
82
#else
83
4.32M
         if constexpr(ROT == 8)
84
0
            {
85
4.32M
            const __m256i shuf_rotl_8 = _mm256_set_epi8(14, 13, 12, 15, 10, 9, 8, 11, 6, 5, 4, 7, 2, 1, 0, 3,
86
4.32M
                                                        14, 13, 12, 15, 10, 9, 8, 11, 6, 5, 4, 7, 2, 1, 0, 3);
87
88
4.32M
            return SIMD_8x32(_mm256_shuffle_epi8(m_avx2, shuf_rotl_8));
89
4.32M
            }
90
4.32M
         else if constexpr(ROT == 16)
91
4.32M
            {
92
4.32M
            const __m256i shuf_rotl_16 = _mm256_set_epi8(13, 12, 15, 14, 9, 8, 11, 10, 5, 4, 7, 6, 1, 0, 3, 2,
93
4.32M
                                                         13, 12, 15, 14, 9, 8, 11, 10, 5, 4, 7, 6, 1, 0, 3, 2);
94
95
4.32M
            return SIMD_8x32(_mm256_shuffle_epi8(m_avx2, shuf_rotl_16));
96
4.32M
            }
97
4.32M
         else
98
4.32M
            {
99
4.32M
            return SIMD_8x32(_mm256_or_si256(_mm256_slli_epi32(m_avx2, static_cast<int>(ROT)),
100
4.32M
                                             _mm256_srli_epi32(m_avx2, static_cast<int>(32-ROT))));
101
4.32M
            }
102
4.32M
#endif
103
4.32M
         }
Botan::SIMD_8x32 Botan::SIMD_8x32::rotl<12ul>() const
Line
Count
Source
77
4.32M
         {
78
4.32M
         static_assert(ROT > 0 && ROT < 32, "Invalid rotation constant");
79
80
#if defined(__AVX512VL__)
81
         return SIMD_8x32(_mm256_rol_epi32(m_avx2, ROT));
82
#else
83
4.32M
         if constexpr(ROT == 8)
84
0
            {
85
4.32M
            const __m256i shuf_rotl_8 = _mm256_set_epi8(14, 13, 12, 15, 10, 9, 8, 11, 6, 5, 4, 7, 2, 1, 0, 3,
86
4.32M
                                                        14, 13, 12, 15, 10, 9, 8, 11, 6, 5, 4, 7, 2, 1, 0, 3);
87
88
4.32M
            return SIMD_8x32(_mm256_shuffle_epi8(m_avx2, shuf_rotl_8));
89
4.32M
            }
90
4.32M
         else if constexpr(ROT == 16)
91
0
            {
92
4.32M
            const __m256i shuf_rotl_16 = _mm256_set_epi8(13, 12, 15, 14, 9, 8, 11, 10, 5, 4, 7, 6, 1, 0, 3, 2,
93
4.32M
                                                         13, 12, 15, 14, 9, 8, 11, 10, 5, 4, 7, 6, 1, 0, 3, 2);
94
95
4.32M
            return SIMD_8x32(_mm256_shuffle_epi8(m_avx2, shuf_rotl_16));
96
4.32M
            }
97
4.32M
         else
98
4.32M
            {
99
4.32M
            return SIMD_8x32(_mm256_or_si256(_mm256_slli_epi32(m_avx2, static_cast<int>(ROT)),
100
4.32M
                                             _mm256_srli_epi32(m_avx2, static_cast<int>(32-ROT))));
101
4.32M
            }
102
4.32M
#endif
103
4.32M
         }
Botan::SIMD_8x32 Botan::SIMD_8x32::rotl<8ul>() const
Line
Count
Source
77
4.32M
         {
78
4.32M
         static_assert(ROT > 0 && ROT < 32, "Invalid rotation constant");
79
80
#if defined(__AVX512VL__)
81
         return SIMD_8x32(_mm256_rol_epi32(m_avx2, ROT));
82
#else
83
4.32M
         if constexpr(ROT == 8)
84
4.32M
            {
85
4.32M
            const __m256i shuf_rotl_8 = _mm256_set_epi8(14, 13, 12, 15, 10, 9, 8, 11, 6, 5, 4, 7, 2, 1, 0, 3,
86
4.32M
                                                        14, 13, 12, 15, 10, 9, 8, 11, 6, 5, 4, 7, 2, 1, 0, 3);
87
88
4.32M
            return SIMD_8x32(_mm256_shuffle_epi8(m_avx2, shuf_rotl_8));
89
4.32M
            }
90
4.32M
         else if constexpr(ROT == 16)
91
4.32M
            {
92
4.32M
            const __m256i shuf_rotl_16 = _mm256_set_epi8(13, 12, 15, 14, 9, 8, 11, 10, 5, 4, 7, 6, 1, 0, 3, 2,
93
4.32M
                                                         13, 12, 15, 14, 9, 8, 11, 10, 5, 4, 7, 6, 1, 0, 3, 2);
94
95
4.32M
            return SIMD_8x32(_mm256_shuffle_epi8(m_avx2, shuf_rotl_16));
96
4.32M
            }
97
4.32M
         else
98
4.32M
            {
99
4.32M
            return SIMD_8x32(_mm256_or_si256(_mm256_slli_epi32(m_avx2, static_cast<int>(ROT)),
100
4.32M
                                             _mm256_srli_epi32(m_avx2, static_cast<int>(32-ROT))));
101
4.32M
            }
102
4.32M
#endif
103
4.32M
         }
104
105
      template<size_t ROT>
106
      BOTAN_FUNC_ISA("avx2")
107
      SIMD_8x32 rotr() const
108
0
         {
109
0
         return this->rotl<32-ROT>();
110
0
         }
Unexecuted instantiation: Botan::SIMD_8x32 Botan::SIMD_8x32::rotr<13ul>() const
Unexecuted instantiation: Botan::SIMD_8x32 Botan::SIMD_8x32::rotr<22ul>() const
Unexecuted instantiation: Botan::SIMD_8x32 Botan::SIMD_8x32::rotr<5ul>() const
Unexecuted instantiation: Botan::SIMD_8x32 Botan::SIMD_8x32::rotr<7ul>() const
Unexecuted instantiation: Botan::SIMD_8x32 Botan::SIMD_8x32::rotr<1ul>() const
Unexecuted instantiation: Botan::SIMD_8x32 Botan::SIMD_8x32::rotr<3ul>() const
Unexecuted instantiation: Botan::SIMD_8x32 Botan::SIMD_8x32::rotr<2ul>() const
Unexecuted instantiation: Botan::SIMD_8x32 Botan::SIMD_8x32::rotr<6ul>() const
Unexecuted instantiation: Botan::SIMD_8x32 Botan::SIMD_8x32::rotr<11ul>() const
Unexecuted instantiation: Botan::SIMD_8x32 Botan::SIMD_8x32::rotr<25ul>() const
111
112
      SIMD_8x32 BOTAN_FUNC_ISA("avx2") sigma0() const
113
0
         {
114
0
         const SIMD_8x32 rot1 = this->rotr<2>();
115
0
         const SIMD_8x32 rot2 = this->rotr<13>();
116
0
         const SIMD_8x32 rot3 = this->rotr<22>();
117
0
         return rot1 ^ rot2 ^ rot3;
118
0
         }
119
120
      SIMD_8x32 BOTAN_FUNC_ISA("avx2") sigma1() const
121
0
         {
122
0
         const SIMD_8x32 rot1 = this->rotr<6>();
123
0
         const SIMD_8x32 rot2 = this->rotr<11>();
124
0
         const SIMD_8x32 rot3 = this->rotr<25>();
125
0
         return rot1 ^ rot2 ^ rot3;
126
0
         }
127
128
      BOTAN_FUNC_ISA("avx2")
129
      SIMD_8x32 operator+(const SIMD_8x32& other) const
130
216k
         {
131
216k
         SIMD_8x32 retval(*this);
132
216k
         retval += other;
133
216k
         return retval;
134
216k
         }
135
136
      BOTAN_FUNC_ISA("avx2")
137
      SIMD_8x32 operator-(const SIMD_8x32& other) const
138
0
         {
139
0
         SIMD_8x32 retval(*this);
140
0
         retval -= other;
141
0
         return retval;
142
0
         }
143
144
      BOTAN_FUNC_ISA("avx2")
145
      SIMD_8x32 operator^(const SIMD_8x32& other) const
146
0
         {
147
0
         SIMD_8x32 retval(*this);
148
0
         retval ^= other;
149
0
         return retval;
150
0
         }
151
152
      BOTAN_FUNC_ISA("avx2")
153
      SIMD_8x32 operator|(const SIMD_8x32& other) const
154
0
         {
155
0
         SIMD_8x32 retval(*this);
156
0
         retval |= other;
157
0
         return retval;
158
0
         }
159
160
      BOTAN_FUNC_ISA("avx2")
161
      SIMD_8x32 operator&(const SIMD_8x32& other) const
162
0
         {
163
0
         SIMD_8x32 retval(*this);
164
0
         retval &= other;
165
0
         return retval;
166
0
         }
167
168
      BOTAN_FUNC_ISA("avx2")
169
      void operator+=(const SIMD_8x32& other)
170
18.3M
         {
171
18.3M
         m_avx2 = _mm256_add_epi32(m_avx2, other.m_avx2);
172
18.3M
         }
173
174
      BOTAN_FUNC_ISA("avx2")
175
      void operator-=(const SIMD_8x32& other)
176
0
         {
177
0
         m_avx2 = _mm256_sub_epi32(m_avx2, other.m_avx2);
178
0
         }
179
180
      BOTAN_FUNC_ISA("avx2")
181
      void operator^=(const SIMD_8x32& other)
182
17.2M
         {
183
17.2M
         m_avx2 = _mm256_xor_si256(m_avx2, other.m_avx2);
184
17.2M
         }
185
186
      BOTAN_FUNC_ISA("avx2")
187
      void operator^=(uint32_t other)
188
0
         {
189
0
         *this ^= SIMD_8x32::splat(other);
190
0
         }
191
192
      BOTAN_FUNC_ISA("avx2")
193
      void operator|=(const SIMD_8x32& other)
194
0
         {
195
0
         m_avx2 = _mm256_or_si256(m_avx2, other.m_avx2);
196
0
         }
197
198
      BOTAN_FUNC_ISA("avx2")
199
      void operator&=(const SIMD_8x32& other)
200
0
         {
201
0
         m_avx2 = _mm256_and_si256(m_avx2, other.m_avx2);
202
0
         }
203
204
      template<int SHIFT> BOTAN_FUNC_ISA("avx2") SIMD_8x32 shl() const
205
0
         {
206
0
         return SIMD_8x32(_mm256_slli_epi32(m_avx2, SHIFT));
207
0
         }
Unexecuted instantiation: Botan::SIMD_8x32 Botan::SIMD_8x32::shl<3>() const
Unexecuted instantiation: Botan::SIMD_8x32 Botan::SIMD_8x32::shl<7>() const
208
209
      template<int SHIFT> BOTAN_FUNC_ISA("avx2") SIMD_8x32 shr() const
210
         {
211
         return SIMD_8x32(_mm256_srli_epi32(m_avx2, SHIFT));
212
         }
213
214
      BOTAN_FUNC_ISA("avx2")
215
      SIMD_8x32 operator~() const
216
0
         {
217
0
         return SIMD_8x32(_mm256_xor_si256(m_avx2, _mm256_set1_epi32(0xFFFFFFFF)));
218
0
         }
219
220
      // (~reg) & other
221
      BOTAN_FUNC_ISA("avx2")
222
      SIMD_8x32 andc(const SIMD_8x32& other) const
223
0
         {
224
0
         return SIMD_8x32(_mm256_andnot_si256(m_avx2, other.m_avx2));
225
0
         }
226
227
      BOTAN_FUNC_ISA("avx2")
228
      SIMD_8x32 bswap() const
229
0
         {
230
0
         const uint8_t BSWAP_MASK[32] = { 3, 2, 1, 0,
231
0
                                          7, 6, 5, 4,
232
0
                                          11, 10, 9, 8,
233
0
                                          15, 14, 13, 12,
234
0
                                          19, 18, 17, 16,
235
0
                                          23, 22, 21, 20,
236
0
                                          27, 26, 25, 24,
237
0
                                          31, 30, 29, 28 };
238
239
0
         const __m256i bswap = _mm256_loadu_si256(reinterpret_cast<const __m256i*>(BSWAP_MASK));
240
241
0
         const __m256i output = _mm256_shuffle_epi8(m_avx2, bswap);
242
243
0
         return SIMD_8x32(output);
244
0
         }
245
246
      BOTAN_FUNC_ISA("avx2")
247
      static void transpose(SIMD_8x32& B0, SIMD_8x32& B1,
248
                            SIMD_8x32& B2, SIMD_8x32& B3)
249
216k
         {
250
216k
         const __m256i T0 = _mm256_unpacklo_epi32(B0.m_avx2, B1.m_avx2);
251
216k
         const __m256i T1 = _mm256_unpacklo_epi32(B2.m_avx2, B3.m_avx2);
252
216k
         const __m256i T2 = _mm256_unpackhi_epi32(B0.m_avx2, B1.m_avx2);
253
216k
         const __m256i T3 = _mm256_unpackhi_epi32(B2.m_avx2, B3.m_avx2);
254
255
216k
         B0.m_avx2 = _mm256_unpacklo_epi64(T0, T1);
256
216k
         B1.m_avx2 = _mm256_unpackhi_epi64(T0, T1);
257
216k
         B2.m_avx2 = _mm256_unpacklo_epi64(T2, T3);
258
216k
         B3.m_avx2 = _mm256_unpackhi_epi64(T2, T3);
259
216k
         }
260
261
      BOTAN_FUNC_ISA("avx2")
262
      static void transpose(SIMD_8x32& B0, SIMD_8x32& B1,
263
                            SIMD_8x32& B2, SIMD_8x32& B3,
264
                            SIMD_8x32& B4, SIMD_8x32& B5,
265
                            SIMD_8x32& B6, SIMD_8x32& B7)
266
108k
         {
267
108k
         transpose(B0, B1, B2, B3);
268
108k
         transpose(B4, B5, B6, B7);
269
270
108k
         swap_tops(B0, B4);
271
108k
         swap_tops(B1, B5);
272
108k
         swap_tops(B2, B6);
273
108k
         swap_tops(B3, B7);
274
108k
         }
275
276
      BOTAN_FUNC_ISA("avx2")
277
      static SIMD_8x32 choose(const SIMD_8x32& mask, const SIMD_8x32& a, const SIMD_8x32& b)
278
0
         {
279
#if defined(__AVX512VL__)
280
         return _mm256_ternarylogic_epi32(mask.handle(), a.handle(), b.handle(), 0xca);
281
#else
282
0
         return (mask & a) ^ mask.andc(b);
283
0
#endif
284
0
         }
285
286
      BOTAN_FUNC_ISA("avx2")
287
      static SIMD_8x32 majority(const SIMD_8x32& x, const SIMD_8x32& y, const SIMD_8x32& z)
288
0
         {
289
#if defined(__AVX512VL__)
290
         return _mm256_ternarylogic_epi32(x.handle(), y.handle(), z.handle(), 0xe8);
291
#else
292
0
         return SIMD_8x32::choose(x ^ y, z, y);
293
0
#endif
294
0
         }
295
296
      BOTAN_FUNC_ISA("avx2")
297
      static void reset_registers()
298
54.0k
         {
299
54.0k
         _mm256_zeroupper();
300
54.0k
         }
301
302
      BOTAN_FUNC_ISA("avx2")
303
      static void zero_registers()
304
54.0k
         {
305
54.0k
         _mm256_zeroall();
306
54.0k
         }
307
308
1.72M
      __m256i BOTAN_FUNC_ISA("avx2") handle() const { return m_avx2; }
309
310
      BOTAN_FUNC_ISA("avx2")
311
19.8M
      SIMD_8x32(__m256i x) : m_avx2(x) {}
312
313
   private:
314
315
      BOTAN_FUNC_ISA("avx2")
316
      static void swap_tops(SIMD_8x32& A, SIMD_8x32& B)
317
432k
         {
318
432k
         SIMD_8x32 T0 = _mm256_permute2x128_si256(A.handle(), B.handle(), 0 + (2 << 4));
319
432k
         SIMD_8x32 T1 = _mm256_permute2x128_si256(A.handle(), B.handle(), 1 + (3 << 4));
320
432k
         A = T0;
321
432k
         B = T1;
322
432k
         }
323
324
      __m256i m_avx2;
325
   };
326
327
template<size_t R>
328
inline SIMD_8x32 rotl(SIMD_8x32 input)
329
   {
330
   return input.rotl<R>();
331
   }
332
333
template<size_t R>
334
inline SIMD_8x32 rotr(SIMD_8x32 input)
335
   {
336
   return input.rotr<R>();
337
   }
338
339
// For Serpent:
340
template<size_t S>
341
inline SIMD_8x32 shl(SIMD_8x32 input)
342
   {
343
   return input.shl<S>();
344
   }
345
346
}
347
348
#endif