Coverage Report

Created: 2020-09-16 07:52

/src/botan/build/include/botan/internal/simd_avx2.h
Line
Count
Source (jump to first uncovered line)
1
/*
2
* (C) 2018 Jack Lloyd
3
*
4
* Botan is released under the Simplified BSD License (see license.txt)
5
*/
6
7
#ifndef BOTAN_SIMD_AVX2_H_
8
#define BOTAN_SIMD_AVX2_H_
9
10
#include <botan/types.h>
11
#include <immintrin.h>
12
13
namespace Botan {
14
15
class SIMD_8x32 final
16
   {
17
   public:
18
19
      SIMD_8x32& operator=(const SIMD_8x32& other) = default;
20
      SIMD_8x32(const SIMD_8x32& other) = default;
21
22
      SIMD_8x32& operator=(SIMD_8x32&& other) = default;
23
      SIMD_8x32(SIMD_8x32&& other) = default;
24
25
      BOTAN_FUNC_ISA("avx2")
26
      BOTAN_FORCE_INLINE SIMD_8x32()
27
0
         {
28
0
         m_avx2 = _mm256_setzero_si256();
29
0
         }
30
31
      BOTAN_FUNC_ISA("avx2")
32
      explicit SIMD_8x32(const uint32_t B[8])
33
0
         {
34
0
         m_avx2 = _mm256_loadu_si256(reinterpret_cast<const __m256i*>(B));
35
0
         }
36
37
      BOTAN_FUNC_ISA("avx2")
38
      explicit SIMD_8x32(uint32_t B0, uint32_t B1, uint32_t B2, uint32_t B3,
39
                         uint32_t B4, uint32_t B5, uint32_t B6, uint32_t B7)
40
104k
         {
41
104k
         m_avx2 = _mm256_set_epi32(B7, B6, B5, B4, B3, B2, B1, B0);
42
104k
         }
43
44
      BOTAN_FUNC_ISA("avx2")
45
      static SIMD_8x32 splat(uint32_t B)
46
1.67M
         {
47
1.67M
         return SIMD_8x32(_mm256_set1_epi32(B));
48
1.67M
         }
49
50
      BOTAN_FUNC_ISA("avx2")
51
      static SIMD_8x32 load_le(const uint8_t* in)
52
0
         {
53
0
         return SIMD_8x32(_mm256_loadu_si256(reinterpret_cast<const __m256i*>(in)));
54
0
         }
55
56
      BOTAN_FUNC_ISA("avx2")
57
      static SIMD_8x32 load_be(const uint8_t* in)
58
0
         {
59
0
         return load_le(in).bswap();
60
0
         }
61
62
      BOTAN_FUNC_ISA("avx2")
63
      void store_le(uint8_t out[]) const
64
838k
         {
65
838k
         _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), m_avx2);
66
838k
         }
67
68
      BOTAN_FUNC_ISA("avx2")
69
      void store_be(uint8_t out[]) const
70
0
         {
71
0
         bswap().store_le(out);
72
0
         }
73
74
      template<size_t ROT>
75
      BOTAN_FUNC_ISA("avx2")
76
      SIMD_8x32 rotl() const
77
16.7M
         {
78
16.7M
         static_assert(ROT > 0 && ROT < 32, "Invalid rotation constant");
79
16.7M
80
#if defined(__AVX512VL__)
81
         return SIMD_8x32(_mm256_rol_epi32(m_avx2, ROT));
82
#else
83
16.7M
         BOTAN_IF_CONSTEXPR(ROT == 8)
84
4.19M
            {
85
4.19M
            const __m256i shuf_rotl_8 = _mm256_set_epi8(14, 13, 12, 15, 10, 9, 8, 11, 6, 5, 4, 7, 2, 1, 0, 3,
86
4.19M
                                                        14, 13, 12, 15, 10, 9, 8, 11, 6, 5, 4, 7, 2, 1, 0, 3);
87
4.19M
88
4.19M
            return SIMD_8x32(_mm256_shuffle_epi8(m_avx2, shuf_rotl_8));
89
4.19M
            }
90
16.7M
         else BOTAN_IF_CONSTEXPR(ROT == 16)
91
4.19M
            {
92
4.19M
            const __m256i shuf_rotl_16 = _mm256_set_epi8(13, 12, 15, 14, 9, 8, 11, 10, 5, 4, 7, 6, 1, 0, 3, 2,
93
4.19M
                                                         13, 12, 15, 14, 9, 8, 11, 10, 5, 4, 7, 6, 1, 0, 3, 2);
94
4.19M
95
4.19M
            return SIMD_8x32(_mm256_shuffle_epi8(m_avx2, shuf_rotl_16));
96
4.19M
            }
97
8.38M
         else
98
8.38M
            {
99
8.38M
            return SIMD_8x32(_mm256_or_si256(_mm256_slli_epi32(m_avx2, static_cast<int>(ROT)),
100
8.38M
                                             _mm256_srli_epi32(m_avx2, static_cast<int>(32-ROT))));
101
8.38M
            }
102
16.7M
#endif
103
16.7M
         }
Unexecuted instantiation: Botan::SIMD_8x32 Botan::SIMD_8x32::rotl<13ul>() const
Unexecuted instantiation: Botan::SIMD_8x32 Botan::SIMD_8x32::rotl<3ul>() const
Unexecuted instantiation: Botan::SIMD_8x32 Botan::SIMD_8x32::rotl<1ul>() const
Botan::SIMD_8x32 Botan::SIMD_8x32::rotl<7ul>() const
Line
Count
Source
77
4.19M
         {
78
4.19M
         static_assert(ROT > 0 && ROT < 32, "Invalid rotation constant");
79
4.19M
80
#if defined(__AVX512VL__)
81
         return SIMD_8x32(_mm256_rol_epi32(m_avx2, ROT));
82
#else
83
4.19M
         BOTAN_IF_CONSTEXPR(ROT == 8)
84
0
            {
85
0
            const __m256i shuf_rotl_8 = _mm256_set_epi8(14, 13, 12, 15, 10, 9, 8, 11, 6, 5, 4, 7, 2, 1, 0, 3,
86
0
                                                        14, 13, 12, 15, 10, 9, 8, 11, 6, 5, 4, 7, 2, 1, 0, 3);
87
0
88
0
            return SIMD_8x32(_mm256_shuffle_epi8(m_avx2, shuf_rotl_8));
89
0
            }
90
4.19M
         else BOTAN_IF_CONSTEXPR(ROT == 16)
91
0
            {
92
0
            const __m256i shuf_rotl_16 = _mm256_set_epi8(13, 12, 15, 14, 9, 8, 11, 10, 5, 4, 7, 6, 1, 0, 3, 2,
93
0
                                                         13, 12, 15, 14, 9, 8, 11, 10, 5, 4, 7, 6, 1, 0, 3, 2);
94
0
95
0
            return SIMD_8x32(_mm256_shuffle_epi8(m_avx2, shuf_rotl_16));
96
0
            }
97
4.19M
         else
98
4.19M
            {
99
4.19M
            return SIMD_8x32(_mm256_or_si256(_mm256_slli_epi32(m_avx2, static_cast<int>(ROT)),
100
4.19M
                                             _mm256_srli_epi32(m_avx2, static_cast<int>(32-ROT))));
101
4.19M
            }
102
4.19M
#endif
103
4.19M
         }
Unexecuted instantiation: Botan::SIMD_8x32 Botan::SIMD_8x32::rotl<5ul>() const
Unexecuted instantiation: Botan::SIMD_8x32 Botan::SIMD_8x32::rotl<22ul>() const
Unexecuted instantiation: Botan::SIMD_8x32 Botan::SIMD_8x32::rotl<10ul>() const
Unexecuted instantiation: Botan::SIMD_8x32 Botan::SIMD_8x32::rotl<27ul>() const
Unexecuted instantiation: Botan::SIMD_8x32 Botan::SIMD_8x32::rotl<25ul>() const
Unexecuted instantiation: Botan::SIMD_8x32 Botan::SIMD_8x32::rotl<31ul>() const
Unexecuted instantiation: Botan::SIMD_8x32 Botan::SIMD_8x32::rotl<29ul>() const
Unexecuted instantiation: Botan::SIMD_8x32 Botan::SIMD_8x32::rotl<19ul>() const
Unexecuted instantiation: Botan::SIMD_8x32 Botan::SIMD_8x32::rotl<26ul>() const
Unexecuted instantiation: Botan::SIMD_8x32 Botan::SIMD_8x32::rotl<21ul>() const
Unexecuted instantiation: Botan::SIMD_8x32 Botan::SIMD_8x32::rotl<30ul>() const
Botan::SIMD_8x32 Botan::SIMD_8x32::rotl<16ul>() const
Line
Count
Source
77
4.19M
         {
78
4.19M
         static_assert(ROT > 0 && ROT < 32, "Invalid rotation constant");
79
4.19M
80
#if defined(__AVX512VL__)
81
         return SIMD_8x32(_mm256_rol_epi32(m_avx2, ROT));
82
#else
83
4.19M
         BOTAN_IF_CONSTEXPR(ROT == 8)
84
0
            {
85
0
            const __m256i shuf_rotl_8 = _mm256_set_epi8(14, 13, 12, 15, 10, 9, 8, 11, 6, 5, 4, 7, 2, 1, 0, 3,
86
0
                                                        14, 13, 12, 15, 10, 9, 8, 11, 6, 5, 4, 7, 2, 1, 0, 3);
87
0
88
0
            return SIMD_8x32(_mm256_shuffle_epi8(m_avx2, shuf_rotl_8));
89
0
            }
90
4.19M
         else BOTAN_IF_CONSTEXPR(ROT == 16)
91
4.19M
            {
92
4.19M
            const __m256i shuf_rotl_16 = _mm256_set_epi8(13, 12, 15, 14, 9, 8, 11, 10, 5, 4, 7, 6, 1, 0, 3, 2,
93
4.19M
                                                         13, 12, 15, 14, 9, 8, 11, 10, 5, 4, 7, 6, 1, 0, 3, 2);
94
4.19M
95
4.19M
            return SIMD_8x32(_mm256_shuffle_epi8(m_avx2, shuf_rotl_16));
96
4.19M
            }
97
0
         else
98
0
            {
99
0
            return SIMD_8x32(_mm256_or_si256(_mm256_slli_epi32(m_avx2, static_cast<int>(ROT)),
100
0
                                             _mm256_srli_epi32(m_avx2, static_cast<int>(32-ROT))));
101
0
            }
102
4.19M
#endif
103
4.19M
         }
Botan::SIMD_8x32 Botan::SIMD_8x32::rotl<12ul>() const
Line
Count
Source
77
4.19M
         {
78
4.19M
         static_assert(ROT > 0 && ROT < 32, "Invalid rotation constant");
79
4.19M
80
#if defined(__AVX512VL__)
81
         return SIMD_8x32(_mm256_rol_epi32(m_avx2, ROT));
82
#else
83
4.19M
         BOTAN_IF_CONSTEXPR(ROT == 8)
84
0
            {
85
0
            const __m256i shuf_rotl_8 = _mm256_set_epi8(14, 13, 12, 15, 10, 9, 8, 11, 6, 5, 4, 7, 2, 1, 0, 3,
86
0
                                                        14, 13, 12, 15, 10, 9, 8, 11, 6, 5, 4, 7, 2, 1, 0, 3);
87
0
88
0
            return SIMD_8x32(_mm256_shuffle_epi8(m_avx2, shuf_rotl_8));
89
0
            }
90
4.19M
         else BOTAN_IF_CONSTEXPR(ROT == 16)
91
0
            {
92
0
            const __m256i shuf_rotl_16 = _mm256_set_epi8(13, 12, 15, 14, 9, 8, 11, 10, 5, 4, 7, 6, 1, 0, 3, 2,
93
0
                                                         13, 12, 15, 14, 9, 8, 11, 10, 5, 4, 7, 6, 1, 0, 3, 2);
94
0
95
0
            return SIMD_8x32(_mm256_shuffle_epi8(m_avx2, shuf_rotl_16));
96
0
            }
97
4.19M
         else
98
4.19M
            {
99
4.19M
            return SIMD_8x32(_mm256_or_si256(_mm256_slli_epi32(m_avx2, static_cast<int>(ROT)),
100
4.19M
                                             _mm256_srli_epi32(m_avx2, static_cast<int>(32-ROT))));
101
4.19M
            }
102
4.19M
#endif
103
4.19M
         }
Botan::SIMD_8x32 Botan::SIMD_8x32::rotl<8ul>() const
Line
Count
Source
77
4.19M
         {
78
4.19M
         static_assert(ROT > 0 && ROT < 32, "Invalid rotation constant");
79
4.19M
80
#if defined(__AVX512VL__)
81
         return SIMD_8x32(_mm256_rol_epi32(m_avx2, ROT));
82
#else
83
4.19M
         BOTAN_IF_CONSTEXPR(ROT == 8)
84
4.19M
            {
85
4.19M
            const __m256i shuf_rotl_8 = _mm256_set_epi8(14, 13, 12, 15, 10, 9, 8, 11, 6, 5, 4, 7, 2, 1, 0, 3,
86
4.19M
                                                        14, 13, 12, 15, 10, 9, 8, 11, 6, 5, 4, 7, 2, 1, 0, 3);
87
4.19M
88
4.19M
            return SIMD_8x32(_mm256_shuffle_epi8(m_avx2, shuf_rotl_8));
89
4.19M
            }
90
4.19M
         else BOTAN_IF_CONSTEXPR(ROT == 16)
91
0
            {
92
0
            const __m256i shuf_rotl_16 = _mm256_set_epi8(13, 12, 15, 14, 9, 8, 11, 10, 5, 4, 7, 6, 1, 0, 3, 2,
93
0
                                                         13, 12, 15, 14, 9, 8, 11, 10, 5, 4, 7, 6, 1, 0, 3, 2);
94
0
95
0
            return SIMD_8x32(_mm256_shuffle_epi8(m_avx2, shuf_rotl_16));
96
0
            }
97
0
         else
98
0
            {
99
0
            return SIMD_8x32(_mm256_or_si256(_mm256_slli_epi32(m_avx2, static_cast<int>(ROT)),
100
0
                                             _mm256_srli_epi32(m_avx2, static_cast<int>(32-ROT))));
101
0
            }
102
4.19M
#endif
103
4.19M
         }
104
105
      template<size_t ROT>
106
      BOTAN_FUNC_ISA("avx2")
107
      SIMD_8x32 rotr() const
108
0
         {
109
0
         return this->rotl<32-ROT>();
110
0
         }
Unexecuted instantiation: Botan::SIMD_8x32 Botan::SIMD_8x32::rotr<22ul>() const
Unexecuted instantiation: Botan::SIMD_8x32 Botan::SIMD_8x32::rotr<5ul>() const
Unexecuted instantiation: Botan::SIMD_8x32 Botan::SIMD_8x32::rotr<7ul>() const
Unexecuted instantiation: Botan::SIMD_8x32 Botan::SIMD_8x32::rotr<1ul>() const
Unexecuted instantiation: Botan::SIMD_8x32 Botan::SIMD_8x32::rotr<3ul>() const
Unexecuted instantiation: Botan::SIMD_8x32 Botan::SIMD_8x32::rotr<13ul>() const
Unexecuted instantiation: Botan::SIMD_8x32 Botan::SIMD_8x32::rotr<6ul>() const
Unexecuted instantiation: Botan::SIMD_8x32 Botan::SIMD_8x32::rotr<11ul>() const
Unexecuted instantiation: Botan::SIMD_8x32 Botan::SIMD_8x32::rotr<25ul>() const
Unexecuted instantiation: Botan::SIMD_8x32 Botan::SIMD_8x32::rotr<2ul>() const
111
112
      template<size_t ROT1, size_t ROT2, size_t ROT3>
113
      SIMD_8x32 BOTAN_FUNC_ISA("avx2") rho() const
114
0
         {
115
0
         SIMD_8x32 res;
116
0
117
0
         const SIMD_8x32 rot1 = this->rotr<ROT1>();
118
0
         const SIMD_8x32 rot2 = this->rotr<ROT2>();
119
0
         const SIMD_8x32 rot3 = this->rotr<ROT3>();
120
0
121
0
         return rot1 ^ rot2 ^ rot3;
122
0
         }
Unexecuted instantiation: Botan::SIMD_8x32 Botan::SIMD_8x32::rho<6ul, 11ul, 25ul>() const
Unexecuted instantiation: Botan::SIMD_8x32 Botan::SIMD_8x32::rho<2ul, 13ul, 22ul>() const
123
124
      BOTAN_FUNC_ISA("avx2")
125
      SIMD_8x32 operator+(const SIMD_8x32& other) const
126
209k
         {
127
209k
         SIMD_8x32 retval(*this);
128
209k
         retval += other;
129
209k
         return retval;
130
209k
         }
131
132
      BOTAN_FUNC_ISA("avx2")
133
      SIMD_8x32 operator-(const SIMD_8x32& other) const
134
0
         {
135
0
         SIMD_8x32 retval(*this);
136
0
         retval -= other;
137
0
         return retval;
138
0
         }
139
140
      BOTAN_FUNC_ISA("avx2")
141
      SIMD_8x32 operator^(const SIMD_8x32& other) const
142
0
         {
143
0
         SIMD_8x32 retval(*this);
144
0
         retval ^= other;
145
0
         return retval;
146
0
         }
147
148
      BOTAN_FUNC_ISA("avx2")
149
      SIMD_8x32 operator|(const SIMD_8x32& other) const
150
0
         {
151
0
         SIMD_8x32 retval(*this);
152
0
         retval |= other;
153
0
         return retval;
154
0
         }
155
156
      BOTAN_FUNC_ISA("avx2")
157
      SIMD_8x32 operator&(const SIMD_8x32& other) const
158
0
         {
159
0
         SIMD_8x32 retval(*this);
160
0
         retval &= other;
161
0
         return retval;
162
0
         }
163
164
      BOTAN_FUNC_ISA("avx2")
165
      void operator+=(const SIMD_8x32& other)
166
17.8M
         {
167
17.8M
         m_avx2 = _mm256_add_epi32(m_avx2, other.m_avx2);
168
17.8M
         }
169
170
      BOTAN_FUNC_ISA("avx2")
171
      void operator-=(const SIMD_8x32& other)
172
0
         {
173
0
         m_avx2 = _mm256_sub_epi32(m_avx2, other.m_avx2);
174
0
         }
175
176
      BOTAN_FUNC_ISA("avx2")
177
      void operator^=(const SIMD_8x32& other)
178
16.7M
         {
179
16.7M
         m_avx2 = _mm256_xor_si256(m_avx2, other.m_avx2);
180
16.7M
         }
181
182
      BOTAN_FUNC_ISA("avx2")
183
      void operator|=(const SIMD_8x32& other)
184
0
         {
185
0
         m_avx2 = _mm256_or_si256(m_avx2, other.m_avx2);
186
0
         }
187
188
      BOTAN_FUNC_ISA("avx2")
189
      void operator&=(const SIMD_8x32& other)
190
0
         {
191
0
         m_avx2 = _mm256_and_si256(m_avx2, other.m_avx2);
192
0
         }
193
194
      template<int SHIFT> BOTAN_FUNC_ISA("avx2") SIMD_8x32 shl() const
195
0
         {
196
0
         return SIMD_8x32(_mm256_slli_epi32(m_avx2, SHIFT));
197
0
         }
Unexecuted instantiation: Botan::SIMD_8x32 Botan::SIMD_8x32::shl<3>() const
Unexecuted instantiation: Botan::SIMD_8x32 Botan::SIMD_8x32::shl<7>() const
198
199
      template<int SHIFT> BOTAN_FUNC_ISA("avx2") SIMD_8x32 shr() const
200
         {
201
         return SIMD_8x32(_mm256_srli_epi32(m_avx2, SHIFT));
202
         }
203
204
      BOTAN_FUNC_ISA("avx2")
205
      SIMD_8x32 operator~() const
206
0
         {
207
0
         return SIMD_8x32(_mm256_xor_si256(m_avx2, _mm256_set1_epi32(0xFFFFFFFF)));
208
0
         }
209
210
      // (~reg) & other
211
      BOTAN_FUNC_ISA("avx2")
212
      SIMD_8x32 andc(const SIMD_8x32& other) const
213
0
         {
214
0
         return SIMD_8x32(_mm256_andnot_si256(m_avx2, other.m_avx2));
215
0
         }
216
217
      BOTAN_FUNC_ISA("avx2")
218
      SIMD_8x32 bswap() const
219
0
         {
220
0
         const uint8_t BSWAP_MASK[32] = { 3, 2, 1, 0,
221
0
                                          7, 6, 5, 4,
222
0
                                          11, 10, 9, 8,
223
0
                                          15, 14, 13, 12,
224
0
                                          19, 18, 17, 16,
225
0
                                          23, 22, 21, 20,
226
0
                                          27, 26, 25, 24,
227
0
                                          31, 30, 29, 28 };
228
0
229
0
         const __m256i bswap = _mm256_loadu_si256(reinterpret_cast<const __m256i*>(BSWAP_MASK));
230
0
231
0
         const __m256i output = _mm256_shuffle_epi8(m_avx2, bswap);
232
0
233
0
         return SIMD_8x32(output);
234
0
         }
235
236
      BOTAN_FUNC_ISA("avx2")
237
      static void transpose(SIMD_8x32& B0, SIMD_8x32& B1,
238
                            SIMD_8x32& B2, SIMD_8x32& B3)
239
209k
         {
240
209k
         const __m256i T0 = _mm256_unpacklo_epi32(B0.m_avx2, B1.m_avx2);
241
209k
         const __m256i T1 = _mm256_unpacklo_epi32(B2.m_avx2, B3.m_avx2);
242
209k
         const __m256i T2 = _mm256_unpackhi_epi32(B0.m_avx2, B1.m_avx2);
243
209k
         const __m256i T3 = _mm256_unpackhi_epi32(B2.m_avx2, B3.m_avx2);
244
209k
245
209k
         B0.m_avx2 = _mm256_unpacklo_epi64(T0, T1);
246
209k
         B1.m_avx2 = _mm256_unpackhi_epi64(T0, T1);
247
209k
         B2.m_avx2 = _mm256_unpacklo_epi64(T2, T3);
248
209k
         B3.m_avx2 = _mm256_unpackhi_epi64(T2, T3);
249
209k
         }
250
251
      BOTAN_FUNC_ISA("avx2")
252
      static void transpose(SIMD_8x32& B0, SIMD_8x32& B1,
253
                            SIMD_8x32& B2, SIMD_8x32& B3,
254
                            SIMD_8x32& B4, SIMD_8x32& B5,
255
                            SIMD_8x32& B6, SIMD_8x32& B7)
256
104k
         {
257
104k
         transpose(B0, B1, B2, B3);
258
104k
         transpose(B4, B5, B6, B7);
259
104k
260
104k
         swap_tops(B0, B4);
261
104k
         swap_tops(B1, B5);
262
104k
         swap_tops(B2, B6);
263
104k
         swap_tops(B3, B7);
264
104k
         }
265
266
      BOTAN_FUNC_ISA("avx2")
267
      static void reset_registers()
268
52.4k
         {
269
52.4k
         _mm256_zeroupper();
270
52.4k
         }
271
272
      BOTAN_FUNC_ISA("avx2")
273
      static void zero_registers()
274
52.4k
         {
275
52.4k
         _mm256_zeroall();
276
52.4k
         }
277
278
1.67M
      __m256i BOTAN_FUNC_ISA("avx2") handle() const { return m_avx2; }
279
280
      BOTAN_FUNC_ISA("avx2")
281
19.2M
      SIMD_8x32(__m256i x) : m_avx2(x) {}
282
283
   private:
284
285
      BOTAN_FUNC_ISA("avx2")
286
      static void swap_tops(SIMD_8x32& A, SIMD_8x32& B)
287
419k
         {
288
419k
         SIMD_8x32 T0 = _mm256_permute2x128_si256(A.handle(), B.handle(), 0 + (2 << 4));
289
419k
         SIMD_8x32 T1 = _mm256_permute2x128_si256(A.handle(), B.handle(), 1 + (3 << 4));
290
419k
         A = T0;
291
419k
         B = T1;
292
419k
         }
293
294
      __m256i m_avx2;
295
   };
296
297
}
298
299
#endif