Coverage Report

Created: 2023-01-25 06:35

/src/botan/build/include/botan/internal/simd_avx2.h
Line
Count
Source (jump to first uncovered line)
1
/*
2
* (C) 2018 Jack Lloyd
3
*
4
* Botan is released under the Simplified BSD License (see license.txt)
5
*/
6
7
#ifndef BOTAN_SIMD_AVX2_H_
8
#define BOTAN_SIMD_AVX2_H_
9
10
#include <botan/types.h>
11
#include <immintrin.h>
12
13
namespace Botan {
14
15
#define BOTAN_AVX2_FN BOTAN_FUNC_ISA("avx2")
16
17
class SIMD_8x32 final
18
   {
19
   public:
20
21
      SIMD_8x32& operator=(const SIMD_8x32& other) = default;
22
      SIMD_8x32(const SIMD_8x32& other) = default;
23
24
      SIMD_8x32& operator=(SIMD_8x32&& other) = default;
25
      SIMD_8x32(SIMD_8x32&& other) = default;
26
27
      BOTAN_AVX2_FN
28
      BOTAN_FORCE_INLINE SIMD_8x32()
29
0
         {
30
0
         m_avx2 = _mm256_setzero_si256();
31
0
         }
32
33
      BOTAN_AVX2_FN
34
      explicit SIMD_8x32(const uint32_t B[8])
35
0
         {
36
0
         m_avx2 = _mm256_loadu_si256(reinterpret_cast<const __m256i*>(B));
37
0
         }
38
39
      BOTAN_AVX2_FN
40
      explicit SIMD_8x32(uint32_t B0, uint32_t B1, uint32_t B2, uint32_t B3,
41
                         uint32_t B4, uint32_t B5, uint32_t B6, uint32_t B7)
42
117k
         {
43
117k
         m_avx2 = _mm256_set_epi32(B7, B6, B5, B4, B3, B2, B1, B0);
44
117k
         }
45
46
      BOTAN_AVX2_FN
47
      static SIMD_8x32 splat(uint32_t B)
48
1.87M
         {
49
1.87M
         return SIMD_8x32(_mm256_set1_epi32(B));
50
1.87M
         }
51
52
      BOTAN_AVX2_FN
53
      static SIMD_8x32 load_le(const uint8_t* in)
54
0
         {
55
0
         return SIMD_8x32(_mm256_loadu_si256(reinterpret_cast<const __m256i*>(in)));
56
0
         }
57
58
      BOTAN_AVX2_FN
59
      static SIMD_8x32 load_be(const uint8_t* in)
60
0
         {
61
0
         return load_le(in).bswap();
62
0
         }
63
64
      BOTAN_AVX2_FN
65
      void store_le(uint8_t out[]) const
66
936k
         {
67
936k
         _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), m_avx2);
68
936k
         }
69
70
      BOTAN_AVX2_FN
71
      void store_be(uint8_t out[]) const
72
0
         {
73
0
         bswap().store_le(out);
74
0
         }
75
76
      template<size_t ROT>
77
      BOTAN_AVX2_FN
78
      SIMD_8x32 rotl() const
79
18.7M
         {
80
18.7M
         static_assert(ROT > 0 && ROT < 32, "Invalid rotation constant");
81
82
#if defined(__AVX512VL__)
83
         return SIMD_8x32(_mm256_rol_epi32(m_avx2, ROT));
84
#else
85
18.7M
         if constexpr(ROT == 8)
86
4.68M
            {
87
14.0M
            const __m256i shuf_rotl_8 = _mm256_set_epi8(14, 13, 12, 15, 10, 9, 8, 11, 6, 5, 4, 7, 2, 1, 0, 3,
88
14.0M
                                                        14, 13, 12, 15, 10, 9, 8, 11, 6, 5, 4, 7, 2, 1, 0, 3);
89
90
14.0M
            return SIMD_8x32(_mm256_shuffle_epi8(m_avx2, shuf_rotl_8));
91
14.0M
            }
92
14.0M
         else if constexpr(ROT == 16)
93
4.68M
            {
94
9.36M
            const __m256i shuf_rotl_16 = _mm256_set_epi8(13, 12, 15, 14, 9, 8, 11, 10, 5, 4, 7, 6, 1, 0, 3, 2,
95
9.36M
                                                         13, 12, 15, 14, 9, 8, 11, 10, 5, 4, 7, 6, 1, 0, 3, 2);
96
97
9.36M
            return SIMD_8x32(_mm256_shuffle_epi8(m_avx2, shuf_rotl_16));
98
9.36M
            }
99
9.36M
         else
100
9.36M
            {
101
9.36M
            return SIMD_8x32(_mm256_or_si256(_mm256_slli_epi32(m_avx2, static_cast<int>(ROT)),
102
9.36M
                                             _mm256_srli_epi32(m_avx2, static_cast<int>(32-ROT))));
103
9.36M
            }
104
18.7M
#endif
105
18.7M
         }
Unexecuted instantiation: Botan::SIMD_8x32 Botan::SIMD_8x32::rotl<30ul>() const
Unexecuted instantiation: Botan::SIMD_8x32 Botan::SIMD_8x32::rotl<19ul>() const
Unexecuted instantiation: Botan::SIMD_8x32 Botan::SIMD_8x32::rotl<10ul>() const
Unexecuted instantiation: Botan::SIMD_8x32 Botan::SIMD_8x32::rotl<26ul>() const
Unexecuted instantiation: Botan::SIMD_8x32 Botan::SIMD_8x32::rotl<21ul>() const
Botan::SIMD_8x32 Botan::SIMD_8x32::rotl<7ul>() const
Line
Count
Source
79
4.68M
         {
80
4.68M
         static_assert(ROT > 0 && ROT < 32, "Invalid rotation constant");
81
82
#if defined(__AVX512VL__)
83
         return SIMD_8x32(_mm256_rol_epi32(m_avx2, ROT));
84
#else
85
4.68M
         if constexpr(ROT == 8)
86
0
            {
87
4.68M
            const __m256i shuf_rotl_8 = _mm256_set_epi8(14, 13, 12, 15, 10, 9, 8, 11, 6, 5, 4, 7, 2, 1, 0, 3,
88
4.68M
                                                        14, 13, 12, 15, 10, 9, 8, 11, 6, 5, 4, 7, 2, 1, 0, 3);
89
90
4.68M
            return SIMD_8x32(_mm256_shuffle_epi8(m_avx2, shuf_rotl_8));
91
4.68M
            }
92
4.68M
         else if constexpr(ROT == 16)
93
0
            {
94
4.68M
            const __m256i shuf_rotl_16 = _mm256_set_epi8(13, 12, 15, 14, 9, 8, 11, 10, 5, 4, 7, 6, 1, 0, 3, 2,
95
4.68M
                                                         13, 12, 15, 14, 9, 8, 11, 10, 5, 4, 7, 6, 1, 0, 3, 2);
96
97
4.68M
            return SIMD_8x32(_mm256_shuffle_epi8(m_avx2, shuf_rotl_16));
98
4.68M
            }
99
4.68M
         else
100
4.68M
            {
101
4.68M
            return SIMD_8x32(_mm256_or_si256(_mm256_slli_epi32(m_avx2, static_cast<int>(ROT)),
102
4.68M
                                             _mm256_srli_epi32(m_avx2, static_cast<int>(32-ROT))));
103
4.68M
            }
104
4.68M
#endif
105
4.68M
         }
Unexecuted instantiation: Botan::SIMD_8x32 Botan::SIMD_8x32::rotl<13ul>() const
Unexecuted instantiation: Botan::SIMD_8x32 Botan::SIMD_8x32::rotl<3ul>() const
Unexecuted instantiation: Botan::SIMD_8x32 Botan::SIMD_8x32::rotl<1ul>() const
Unexecuted instantiation: Botan::SIMD_8x32 Botan::SIMD_8x32::rotl<5ul>() const
Unexecuted instantiation: Botan::SIMD_8x32 Botan::SIMD_8x32::rotl<22ul>() const
Unexecuted instantiation: Botan::SIMD_8x32 Botan::SIMD_8x32::rotl<27ul>() const
Unexecuted instantiation: Botan::SIMD_8x32 Botan::SIMD_8x32::rotl<25ul>() const
Unexecuted instantiation: Botan::SIMD_8x32 Botan::SIMD_8x32::rotl<31ul>() const
Unexecuted instantiation: Botan::SIMD_8x32 Botan::SIMD_8x32::rotl<29ul>() const
Botan::SIMD_8x32 Botan::SIMD_8x32::rotl<16ul>() const
Line
Count
Source
79
4.68M
         {
80
4.68M
         static_assert(ROT > 0 && ROT < 32, "Invalid rotation constant");
81
82
#if defined(__AVX512VL__)
83
         return SIMD_8x32(_mm256_rol_epi32(m_avx2, ROT));
84
#else
85
4.68M
         if constexpr(ROT == 8)
86
0
            {
87
4.68M
            const __m256i shuf_rotl_8 = _mm256_set_epi8(14, 13, 12, 15, 10, 9, 8, 11, 6, 5, 4, 7, 2, 1, 0, 3,
88
4.68M
                                                        14, 13, 12, 15, 10, 9, 8, 11, 6, 5, 4, 7, 2, 1, 0, 3);
89
90
4.68M
            return SIMD_8x32(_mm256_shuffle_epi8(m_avx2, shuf_rotl_8));
91
4.68M
            }
92
4.68M
         else if constexpr(ROT == 16)
93
4.68M
            {
94
4.68M
            const __m256i shuf_rotl_16 = _mm256_set_epi8(13, 12, 15, 14, 9, 8, 11, 10, 5, 4, 7, 6, 1, 0, 3, 2,
95
4.68M
                                                         13, 12, 15, 14, 9, 8, 11, 10, 5, 4, 7, 6, 1, 0, 3, 2);
96
97
4.68M
            return SIMD_8x32(_mm256_shuffle_epi8(m_avx2, shuf_rotl_16));
98
4.68M
            }
99
4.68M
         else
100
4.68M
            {
101
4.68M
            return SIMD_8x32(_mm256_or_si256(_mm256_slli_epi32(m_avx2, static_cast<int>(ROT)),
102
4.68M
                                             _mm256_srli_epi32(m_avx2, static_cast<int>(32-ROT))));
103
4.68M
            }
104
4.68M
#endif
105
4.68M
         }
Botan::SIMD_8x32 Botan::SIMD_8x32::rotl<12ul>() const
Line
Count
Source
79
4.68M
         {
80
4.68M
         static_assert(ROT > 0 && ROT < 32, "Invalid rotation constant");
81
82
#if defined(__AVX512VL__)
83
         return SIMD_8x32(_mm256_rol_epi32(m_avx2, ROT));
84
#else
85
4.68M
         if constexpr(ROT == 8)
86
0
            {
87
4.68M
            const __m256i shuf_rotl_8 = _mm256_set_epi8(14, 13, 12, 15, 10, 9, 8, 11, 6, 5, 4, 7, 2, 1, 0, 3,
88
4.68M
                                                        14, 13, 12, 15, 10, 9, 8, 11, 6, 5, 4, 7, 2, 1, 0, 3);
89
90
4.68M
            return SIMD_8x32(_mm256_shuffle_epi8(m_avx2, shuf_rotl_8));
91
4.68M
            }
92
4.68M
         else if constexpr(ROT == 16)
93
0
            {
94
4.68M
            const __m256i shuf_rotl_16 = _mm256_set_epi8(13, 12, 15, 14, 9, 8, 11, 10, 5, 4, 7, 6, 1, 0, 3, 2,
95
4.68M
                                                         13, 12, 15, 14, 9, 8, 11, 10, 5, 4, 7, 6, 1, 0, 3, 2);
96
97
4.68M
            return SIMD_8x32(_mm256_shuffle_epi8(m_avx2, shuf_rotl_16));
98
4.68M
            }
99
4.68M
         else
100
4.68M
            {
101
4.68M
            return SIMD_8x32(_mm256_or_si256(_mm256_slli_epi32(m_avx2, static_cast<int>(ROT)),
102
4.68M
                                             _mm256_srli_epi32(m_avx2, static_cast<int>(32-ROT))));
103
4.68M
            }
104
4.68M
#endif
105
4.68M
         }
Botan::SIMD_8x32 Botan::SIMD_8x32::rotl<8ul>() const
Line
Count
Source
79
4.68M
         {
80
4.68M
         static_assert(ROT > 0 && ROT < 32, "Invalid rotation constant");
81
82
#if defined(__AVX512VL__)
83
         return SIMD_8x32(_mm256_rol_epi32(m_avx2, ROT));
84
#else
85
4.68M
         if constexpr(ROT == 8)
86
4.68M
            {
87
4.68M
            const __m256i shuf_rotl_8 = _mm256_set_epi8(14, 13, 12, 15, 10, 9, 8, 11, 6, 5, 4, 7, 2, 1, 0, 3,
88
4.68M
                                                        14, 13, 12, 15, 10, 9, 8, 11, 6, 5, 4, 7, 2, 1, 0, 3);
89
90
4.68M
            return SIMD_8x32(_mm256_shuffle_epi8(m_avx2, shuf_rotl_8));
91
4.68M
            }
92
4.68M
         else if constexpr(ROT == 16)
93
4.68M
            {
94
4.68M
            const __m256i shuf_rotl_16 = _mm256_set_epi8(13, 12, 15, 14, 9, 8, 11, 10, 5, 4, 7, 6, 1, 0, 3, 2,
95
4.68M
                                                         13, 12, 15, 14, 9, 8, 11, 10, 5, 4, 7, 6, 1, 0, 3, 2);
96
97
4.68M
            return SIMD_8x32(_mm256_shuffle_epi8(m_avx2, shuf_rotl_16));
98
4.68M
            }
99
4.68M
         else
100
4.68M
            {
101
4.68M
            return SIMD_8x32(_mm256_or_si256(_mm256_slli_epi32(m_avx2, static_cast<int>(ROT)),
102
4.68M
                                             _mm256_srli_epi32(m_avx2, static_cast<int>(32-ROT))));
103
4.68M
            }
104
4.68M
#endif
105
4.68M
         }
106
107
      template<size_t ROT>
108
      BOTAN_AVX2_FN
109
      SIMD_8x32 rotr() const
110
0
         {
111
0
         return this->rotl<32-ROT>();
112
0
         }
Unexecuted instantiation: Botan::SIMD_8x32 Botan::SIMD_8x32::rotr<2ul>() const
Unexecuted instantiation: Botan::SIMD_8x32 Botan::SIMD_8x32::rotr<13ul>() const
Unexecuted instantiation: Botan::SIMD_8x32 Botan::SIMD_8x32::rotr<22ul>() const
Unexecuted instantiation: Botan::SIMD_8x32 Botan::SIMD_8x32::rotr<6ul>() const
Unexecuted instantiation: Botan::SIMD_8x32 Botan::SIMD_8x32::rotr<11ul>() const
Unexecuted instantiation: Botan::SIMD_8x32 Botan::SIMD_8x32::rotr<25ul>() const
Unexecuted instantiation: Botan::SIMD_8x32 Botan::SIMD_8x32::rotr<5ul>() const
Unexecuted instantiation: Botan::SIMD_8x32 Botan::SIMD_8x32::rotr<7ul>() const
Unexecuted instantiation: Botan::SIMD_8x32 Botan::SIMD_8x32::rotr<1ul>() const
Unexecuted instantiation: Botan::SIMD_8x32 Botan::SIMD_8x32::rotr<3ul>() const
113
114
      SIMD_8x32 BOTAN_AVX2_FN sigma0() const
115
0
         {
116
0
         const SIMD_8x32 rot1 = this->rotr<2>();
117
0
         const SIMD_8x32 rot2 = this->rotr<13>();
118
0
         const SIMD_8x32 rot3 = this->rotr<22>();
119
0
         return rot1 ^ rot2 ^ rot3;
120
0
         }
121
122
      SIMD_8x32 BOTAN_AVX2_FN sigma1() const
123
0
         {
124
0
         const SIMD_8x32 rot1 = this->rotr<6>();
125
0
         const SIMD_8x32 rot2 = this->rotr<11>();
126
0
         const SIMD_8x32 rot3 = this->rotr<25>();
127
0
         return rot1 ^ rot2 ^ rot3;
128
0
         }
129
130
      BOTAN_AVX2_FN
131
      SIMD_8x32 operator+(const SIMD_8x32& other) const
132
234k
         {
133
234k
         SIMD_8x32 retval(*this);
134
234k
         retval += other;
135
234k
         return retval;
136
234k
         }
137
138
      BOTAN_AVX2_FN
139
      SIMD_8x32 operator-(const SIMD_8x32& other) const
140
0
         {
141
0
         SIMD_8x32 retval(*this);
142
0
         retval -= other;
143
0
         return retval;
144
0
         }
145
146
      BOTAN_AVX2_FN
147
      SIMD_8x32 operator^(const SIMD_8x32& other) const
148
0
         {
149
0
         SIMD_8x32 retval(*this);
150
0
         retval ^= other;
151
0
         return retval;
152
0
         }
153
154
      BOTAN_AVX2_FN
155
      SIMD_8x32 operator|(const SIMD_8x32& other) const
156
0
         {
157
0
         SIMD_8x32 retval(*this);
158
0
         retval |= other;
159
0
         return retval;
160
0
         }
161
162
      BOTAN_AVX2_FN
163
      SIMD_8x32 operator&(const SIMD_8x32& other) const
164
0
         {
165
0
         SIMD_8x32 retval(*this);
166
0
         retval &= other;
167
0
         return retval;
168
0
         }
169
170
      BOTAN_AVX2_FN
171
      void operator+=(const SIMD_8x32& other)
172
19.9M
         {
173
19.9M
         m_avx2 = _mm256_add_epi32(m_avx2, other.m_avx2);
174
19.9M
         }
175
176
      BOTAN_AVX2_FN
177
      void operator-=(const SIMD_8x32& other)
178
0
         {
179
0
         m_avx2 = _mm256_sub_epi32(m_avx2, other.m_avx2);
180
0
         }
181
182
      BOTAN_AVX2_FN
183
      void operator^=(const SIMD_8x32& other)
184
18.7M
         {
185
18.7M
         m_avx2 = _mm256_xor_si256(m_avx2, other.m_avx2);
186
18.7M
         }
187
188
      BOTAN_AVX2_FN
189
      void operator^=(uint32_t other)
190
0
         {
191
0
         *this ^= SIMD_8x32::splat(other);
192
0
         }
193
194
      BOTAN_AVX2_FN
195
      void operator|=(const SIMD_8x32& other)
196
0
         {
197
0
         m_avx2 = _mm256_or_si256(m_avx2, other.m_avx2);
198
0
         }
199
200
      BOTAN_AVX2_FN
201
      void operator&=(const SIMD_8x32& other)
202
0
         {
203
0
         m_avx2 = _mm256_and_si256(m_avx2, other.m_avx2);
204
0
         }
205
206
      template<int SHIFT> BOTAN_AVX2_FN SIMD_8x32 shl() const
207
0
         {
208
0
         return SIMD_8x32(_mm256_slli_epi32(m_avx2, SHIFT));
209
0
         }
Unexecuted instantiation: Botan::SIMD_8x32 Botan::SIMD_8x32::shl<3>() const
Unexecuted instantiation: Botan::SIMD_8x32 Botan::SIMD_8x32::shl<7>() const
210
211
      template<int SHIFT> BOTAN_AVX2_FN SIMD_8x32 shr() const
212
         {
213
         return SIMD_8x32(_mm256_srli_epi32(m_avx2, SHIFT));
214
         }
215
216
      BOTAN_AVX2_FN
217
      SIMD_8x32 operator~() const
218
0
         {
219
0
         return SIMD_8x32(_mm256_xor_si256(m_avx2, _mm256_set1_epi32(0xFFFFFFFF)));
220
0
         }
221
222
      // (~reg) & other
223
      BOTAN_AVX2_FN
224
      SIMD_8x32 andc(const SIMD_8x32& other) const
225
0
         {
226
0
         return SIMD_8x32(_mm256_andnot_si256(m_avx2, other.m_avx2));
227
0
         }
228
229
      BOTAN_AVX2_FN
230
      SIMD_8x32 bswap() const
231
0
         {
232
0
         const uint8_t BSWAP_MASK[32] = { 3, 2, 1, 0,
233
0
                                          7, 6, 5, 4,
234
0
                                          11, 10, 9, 8,
235
0
                                          15, 14, 13, 12,
236
0
                                          19, 18, 17, 16,
237
0
                                          23, 22, 21, 20,
238
0
                                          27, 26, 25, 24,
239
0
                                          31, 30, 29, 28 };
240
241
0
         const __m256i bswap = _mm256_loadu_si256(reinterpret_cast<const __m256i*>(BSWAP_MASK));
242
243
0
         const __m256i output = _mm256_shuffle_epi8(m_avx2, bswap);
244
245
0
         return SIMD_8x32(output);
246
0
         }
247
248
      BOTAN_AVX2_FN
249
      static void transpose(SIMD_8x32& B0, SIMD_8x32& B1,
250
                            SIMD_8x32& B2, SIMD_8x32& B3)
251
234k
         {
252
234k
         const __m256i T0 = _mm256_unpacklo_epi32(B0.m_avx2, B1.m_avx2);
253
234k
         const __m256i T1 = _mm256_unpacklo_epi32(B2.m_avx2, B3.m_avx2);
254
234k
         const __m256i T2 = _mm256_unpackhi_epi32(B0.m_avx2, B1.m_avx2);
255
234k
         const __m256i T3 = _mm256_unpackhi_epi32(B2.m_avx2, B3.m_avx2);
256
257
234k
         B0.m_avx2 = _mm256_unpacklo_epi64(T0, T1);
258
234k
         B1.m_avx2 = _mm256_unpackhi_epi64(T0, T1);
259
234k
         B2.m_avx2 = _mm256_unpacklo_epi64(T2, T3);
260
234k
         B3.m_avx2 = _mm256_unpackhi_epi64(T2, T3);
261
234k
         }
262
263
      BOTAN_AVX2_FN
264
      static void transpose(SIMD_8x32& B0, SIMD_8x32& B1,
265
                            SIMD_8x32& B2, SIMD_8x32& B3,
266
                            SIMD_8x32& B4, SIMD_8x32& B5,
267
                            SIMD_8x32& B6, SIMD_8x32& B7)
268
117k
         {
269
117k
         transpose(B0, B1, B2, B3);
270
117k
         transpose(B4, B5, B6, B7);
271
272
117k
         swap_tops(B0, B4);
273
117k
         swap_tops(B1, B5);
274
117k
         swap_tops(B2, B6);
275
117k
         swap_tops(B3, B7);
276
117k
         }
277
278
      BOTAN_AVX2_FN
279
      static SIMD_8x32 choose(const SIMD_8x32& mask, const SIMD_8x32& a, const SIMD_8x32& b)
280
0
         {
281
#if defined(__AVX512VL__)
282
         return _mm256_ternarylogic_epi32(mask.handle(), a.handle(), b.handle(), 0xca);
283
#else
284
0
         return (mask & a) ^ mask.andc(b);
285
0
#endif
286
0
         }
287
288
      BOTAN_AVX2_FN
289
      static SIMD_8x32 majority(const SIMD_8x32& x, const SIMD_8x32& y, const SIMD_8x32& z)
290
0
         {
291
#if defined(__AVX512VL__)
292
         return _mm256_ternarylogic_epi32(x.handle(), y.handle(), z.handle(), 0xe8);
293
#else
294
0
         return SIMD_8x32::choose(x ^ y, z, y);
295
0
#endif
296
0
         }
297
298
      BOTAN_AVX2_FN
299
      static void reset_registers()
300
58.5k
         {
301
58.5k
         _mm256_zeroupper();
302
58.5k
         }
303
304
      BOTAN_AVX2_FN
305
      static void zero_registers()
306
58.5k
         {
307
58.5k
         _mm256_zeroall();
308
58.5k
         }
309
310
1.87M
      __m256i BOTAN_AVX2_FN handle() const { return m_avx2; }
311
312
      BOTAN_AVX2_FN
313
21.5M
      SIMD_8x32(__m256i x) : m_avx2(x) {}
314
315
   private:
316
317
      BOTAN_AVX2_FN
318
      static void swap_tops(SIMD_8x32& A, SIMD_8x32& B)
319
468k
         {
320
468k
         SIMD_8x32 T0 = _mm256_permute2x128_si256(A.handle(), B.handle(), 0 + (2 << 4));
321
468k
         SIMD_8x32 T1 = _mm256_permute2x128_si256(A.handle(), B.handle(), 1 + (3 << 4));
322
468k
         A = T0;
323
468k
         B = T1;
324
468k
         }
325
326
      __m256i m_avx2;
327
   };
328
329
template<size_t R>
330
inline SIMD_8x32 rotl(SIMD_8x32 input)
331
   {
332
   return input.rotl<R>();
333
   }
334
335
template<size_t R>
336
inline SIMD_8x32 rotr(SIMD_8x32 input)
337
   {
338
   return input.rotr<R>();
339
   }
340
341
// For Serpent:
342
template<size_t S>
343
inline SIMD_8x32 shl(SIMD_8x32 input)
344
   {
345
   return input.shl<S>();
346
   }
347
348
}
349
350
#endif