Coverage Report

Created: 2020-02-14 15:38

/src/botan/build/include/botan/internal/simd_avx2.h
Line
Count
Source (jump to first uncovered line)
1
/*
2
* (C) 2018 Jack Lloyd
3
*
4
* Botan is released under the Simplified BSD License (see license.txt)
5
*/
6
7
#ifndef BOTAN_SIMD_AVX2_H_
8
#define BOTAN_SIMD_AVX2_H_
9
10
#include <botan/types.h>
11
#include <immintrin.h>
12
13
namespace Botan {
14
15
class SIMD_8x32 final
16
   {
17
   public:
18
19
      SIMD_8x32& operator=(const SIMD_8x32& other) = default;
20
      SIMD_8x32(const SIMD_8x32& other) = default;
21
22
      SIMD_8x32& operator=(SIMD_8x32&& other) = default;
23
      SIMD_8x32(SIMD_8x32&& other) = default;
24
25
      BOTAN_FUNC_ISA("avx2")
26
      SIMD_8x32()
27
0
         {
28
0
         m_avx2 = _mm256_setzero_si256();
29
0
         }
30
31
      BOTAN_FUNC_ISA("avx2")
32
      explicit SIMD_8x32(const uint32_t B[8])
33
0
         {
34
0
         m_avx2 = _mm256_loadu_si256(reinterpret_cast<const __m256i*>(B));
35
0
         }
36
37
      BOTAN_FUNC_ISA("avx2")
38
      explicit SIMD_8x32(uint32_t B0, uint32_t B1, uint32_t B2, uint32_t B3,
39
                         uint32_t B4, uint32_t B5, uint32_t B6, uint32_t B7)
40
97.7k
         {
41
97.7k
         m_avx2 = _mm256_set_epi32(B7, B6, B5, B4, B3, B2, B1, B0);
42
97.7k
         }
43
44
      BOTAN_FUNC_ISA("avx2")
45
      static SIMD_8x32 splat(uint32_t B)
46
1.56M
         {
47
1.56M
         return SIMD_8x32(_mm256_set1_epi32(B));
48
1.56M
         }
49
50
      BOTAN_FUNC_ISA("avx2")
51
      static SIMD_8x32 load_le(const uint8_t* in)
52
0
         {
53
0
         return SIMD_8x32(_mm256_loadu_si256(reinterpret_cast<const __m256i*>(in)));
54
0
         }
55
56
      BOTAN_FUNC_ISA("avx2")
57
      static SIMD_8x32 load_be(const uint8_t* in)
58
0
         {
59
0
         return load_le(in).bswap();
60
0
         }
61
62
      BOTAN_FUNC_ISA("avx2")
63
      void store_le(uint8_t out[]) const
64
781k
         {
65
781k
         _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), m_avx2);
66
781k
         }
67
68
      BOTAN_FUNC_ISA("avx2")
69
      void store_be(uint8_t out[]) const
70
0
         {
71
0
         bswap().store_le(out);
72
0
         }
73
74
      template<size_t ROT>
75
      BOTAN_FUNC_ISA("avx2")
76
      SIMD_8x32 rotl() const
77
15.6M
         {
78
15.6M
         static_assert(ROT > 0 && ROT < 32, "Invalid rotation constant");
79
15.6M
80
15.6M
         BOTAN_IF_CONSTEXPR(ROT == 8)
81
3.90M
            {
82
3.90M
            const __m256i shuf_rotl_8 = _mm256_set_epi8(14, 13, 12, 15, 10, 9, 8, 11, 6, 5, 4, 7, 2, 1, 0, 3,
83
3.90M
                                                        14, 13, 12, 15, 10, 9, 8, 11, 6, 5, 4, 7, 2, 1, 0, 3);
84
3.90M
85
3.90M
            return SIMD_8x32(_mm256_shuffle_epi8(m_avx2, shuf_rotl_8));
86
3.90M
            }
87
15.6M
         else BOTAN_IF_CONSTEXPR(ROT == 16)
88
3.90M
            {
89
3.90M
            const __m256i shuf_rotl_16 = _mm256_set_epi8(13, 12, 15, 14, 9, 8, 11, 10, 5, 4, 7, 6, 1, 0, 3, 2,
90
3.90M
                                                         13, 12, 15, 14, 9, 8, 11, 10, 5, 4, 7, 6, 1, 0, 3, 2);
91
3.90M
92
3.90M
            return SIMD_8x32(_mm256_shuffle_epi8(m_avx2, shuf_rotl_16));
93
3.90M
            }
94
7.81M
         else
95
7.81M
            {
96
7.81M
            return SIMD_8x32(_mm256_or_si256(_mm256_slli_epi32(m_avx2, static_cast<int>(ROT)),
97
7.81M
                                             _mm256_srli_epi32(m_avx2, static_cast<int>(32-ROT))));
98
7.81M
            }
99
15.6M
         }
Unexecuted instantiation: Botan::SIMD_8x32 Botan::SIMD_8x32::rotl<13ul>() const
Unexecuted instantiation: Botan::SIMD_8x32 Botan::SIMD_8x32::rotl<3ul>() const
Unexecuted instantiation: Botan::SIMD_8x32 Botan::SIMD_8x32::rotl<1ul>() const
Botan::SIMD_8x32 Botan::SIMD_8x32::rotl<7ul>() const
Line
Count
Source
77
3.90M
         {
78
3.90M
         static_assert(ROT > 0 && ROT < 32, "Invalid rotation constant");
79
3.90M
80
3.90M
         BOTAN_IF_CONSTEXPR(ROT == 8)
81
0
            {
82
0
            const __m256i shuf_rotl_8 = _mm256_set_epi8(14, 13, 12, 15, 10, 9, 8, 11, 6, 5, 4, 7, 2, 1, 0, 3,
83
0
                                                        14, 13, 12, 15, 10, 9, 8, 11, 6, 5, 4, 7, 2, 1, 0, 3);
84
0
85
0
            return SIMD_8x32(_mm256_shuffle_epi8(m_avx2, shuf_rotl_8));
86
0
            }
87
3.90M
         else BOTAN_IF_CONSTEXPR(ROT == 16)
88
0
            {
89
0
            const __m256i shuf_rotl_16 = _mm256_set_epi8(13, 12, 15, 14, 9, 8, 11, 10, 5, 4, 7, 6, 1, 0, 3, 2,
90
0
                                                         13, 12, 15, 14, 9, 8, 11, 10, 5, 4, 7, 6, 1, 0, 3, 2);
91
0
92
0
            return SIMD_8x32(_mm256_shuffle_epi8(m_avx2, shuf_rotl_16));
93
0
            }
94
3.90M
         else
95
3.90M
            {
96
3.90M
            return SIMD_8x32(_mm256_or_si256(_mm256_slli_epi32(m_avx2, static_cast<int>(ROT)),
97
3.90M
                                             _mm256_srli_epi32(m_avx2, static_cast<int>(32-ROT))));
98
3.90M
            }
99
3.90M
         }
Unexecuted instantiation: Botan::SIMD_8x32 Botan::SIMD_8x32::rotl<5ul>() const
Unexecuted instantiation: Botan::SIMD_8x32 Botan::SIMD_8x32::rotl<22ul>() const
Unexecuted instantiation: Botan::SIMD_8x32 Botan::SIMD_8x32::rotl<10ul>() const
Unexecuted instantiation: Botan::SIMD_8x32 Botan::SIMD_8x32::rotl<27ul>() const
Unexecuted instantiation: Botan::SIMD_8x32 Botan::SIMD_8x32::rotl<25ul>() const
Unexecuted instantiation: Botan::SIMD_8x32 Botan::SIMD_8x32::rotl<31ul>() const
Unexecuted instantiation: Botan::SIMD_8x32 Botan::SIMD_8x32::rotl<29ul>() const
Unexecuted instantiation: Botan::SIMD_8x32 Botan::SIMD_8x32::rotl<19ul>() const
Unexecuted instantiation: Botan::SIMD_8x32 Botan::SIMD_8x32::rotl<26ul>() const
Unexecuted instantiation: Botan::SIMD_8x32 Botan::SIMD_8x32::rotl<21ul>() const
Unexecuted instantiation: Botan::SIMD_8x32 Botan::SIMD_8x32::rotl<30ul>() const
Botan::SIMD_8x32 Botan::SIMD_8x32::rotl<16ul>() const
Line
Count
Source
77
3.90M
         {
78
3.90M
         static_assert(ROT > 0 && ROT < 32, "Invalid rotation constant");
79
3.90M
80
3.90M
         BOTAN_IF_CONSTEXPR(ROT == 8)
81
0
            {
82
0
            const __m256i shuf_rotl_8 = _mm256_set_epi8(14, 13, 12, 15, 10, 9, 8, 11, 6, 5, 4, 7, 2, 1, 0, 3,
83
0
                                                        14, 13, 12, 15, 10, 9, 8, 11, 6, 5, 4, 7, 2, 1, 0, 3);
84
0
85
0
            return SIMD_8x32(_mm256_shuffle_epi8(m_avx2, shuf_rotl_8));
86
0
            }
87
3.90M
         else BOTAN_IF_CONSTEXPR(ROT == 16)
88
3.90M
            {
89
3.90M
            const __m256i shuf_rotl_16 = _mm256_set_epi8(13, 12, 15, 14, 9, 8, 11, 10, 5, 4, 7, 6, 1, 0, 3, 2,
90
3.90M
                                                         13, 12, 15, 14, 9, 8, 11, 10, 5, 4, 7, 6, 1, 0, 3, 2);
91
3.90M
92
3.90M
            return SIMD_8x32(_mm256_shuffle_epi8(m_avx2, shuf_rotl_16));
93
3.90M
            }
94
0
         else
95
0
            {
96
0
            return SIMD_8x32(_mm256_or_si256(_mm256_slli_epi32(m_avx2, static_cast<int>(ROT)),
97
0
                                             _mm256_srli_epi32(m_avx2, static_cast<int>(32-ROT))));
98
0
            }
99
3.90M
         }
Botan::SIMD_8x32 Botan::SIMD_8x32::rotl<12ul>() const
Line
Count
Source
77
3.90M
         {
78
3.90M
         static_assert(ROT > 0 && ROT < 32, "Invalid rotation constant");
79
3.90M
80
3.90M
         BOTAN_IF_CONSTEXPR(ROT == 8)
81
0
            {
82
0
            const __m256i shuf_rotl_8 = _mm256_set_epi8(14, 13, 12, 15, 10, 9, 8, 11, 6, 5, 4, 7, 2, 1, 0, 3,
83
0
                                                        14, 13, 12, 15, 10, 9, 8, 11, 6, 5, 4, 7, 2, 1, 0, 3);
84
0
85
0
            return SIMD_8x32(_mm256_shuffle_epi8(m_avx2, shuf_rotl_8));
86
0
            }
87
3.90M
         else BOTAN_IF_CONSTEXPR(ROT == 16)
88
0
            {
89
0
            const __m256i shuf_rotl_16 = _mm256_set_epi8(13, 12, 15, 14, 9, 8, 11, 10, 5, 4, 7, 6, 1, 0, 3, 2,
90
0
                                                         13, 12, 15, 14, 9, 8, 11, 10, 5, 4, 7, 6, 1, 0, 3, 2);
91
0
92
0
            return SIMD_8x32(_mm256_shuffle_epi8(m_avx2, shuf_rotl_16));
93
0
            }
94
3.90M
         else
95
3.90M
            {
96
3.90M
            return SIMD_8x32(_mm256_or_si256(_mm256_slli_epi32(m_avx2, static_cast<int>(ROT)),
97
3.90M
                                             _mm256_srli_epi32(m_avx2, static_cast<int>(32-ROT))));
98
3.90M
            }
99
3.90M
         }
Botan::SIMD_8x32 Botan::SIMD_8x32::rotl<8ul>() const
Line
Count
Source
77
3.90M
         {
78
3.90M
         static_assert(ROT > 0 && ROT < 32, "Invalid rotation constant");
79
3.90M
80
3.90M
         BOTAN_IF_CONSTEXPR(ROT == 8)
81
3.90M
            {
82
3.90M
            const __m256i shuf_rotl_8 = _mm256_set_epi8(14, 13, 12, 15, 10, 9, 8, 11, 6, 5, 4, 7, 2, 1, 0, 3,
83
3.90M
                                                        14, 13, 12, 15, 10, 9, 8, 11, 6, 5, 4, 7, 2, 1, 0, 3);
84
3.90M
85
3.90M
            return SIMD_8x32(_mm256_shuffle_epi8(m_avx2, shuf_rotl_8));
86
3.90M
            }
87
3.90M
         else BOTAN_IF_CONSTEXPR(ROT == 16)
88
0
            {
89
0
            const __m256i shuf_rotl_16 = _mm256_set_epi8(13, 12, 15, 14, 9, 8, 11, 10, 5, 4, 7, 6, 1, 0, 3, 2,
90
0
                                                         13, 12, 15, 14, 9, 8, 11, 10, 5, 4, 7, 6, 1, 0, 3, 2);
91
0
92
0
            return SIMD_8x32(_mm256_shuffle_epi8(m_avx2, shuf_rotl_16));
93
0
            }
94
0
         else
95
0
            {
96
0
            return SIMD_8x32(_mm256_or_si256(_mm256_slli_epi32(m_avx2, static_cast<int>(ROT)),
97
0
                                             _mm256_srli_epi32(m_avx2, static_cast<int>(32-ROT))));
98
0
            }
99
3.90M
         }
100
101
      template<size_t ROT>
102
      BOTAN_FUNC_ISA("avx2")
103
      SIMD_8x32 rotr() const
104
0
         {
105
0
         return this->rotl<32-ROT>();
106
0
         }
Unexecuted instantiation: Botan::SIMD_8x32 Botan::SIMD_8x32::rotr<22ul>() const
Unexecuted instantiation: Botan::SIMD_8x32 Botan::SIMD_8x32::rotr<5ul>() const
Unexecuted instantiation: Botan::SIMD_8x32 Botan::SIMD_8x32::rotr<7ul>() const
Unexecuted instantiation: Botan::SIMD_8x32 Botan::SIMD_8x32::rotr<1ul>() const
Unexecuted instantiation: Botan::SIMD_8x32 Botan::SIMD_8x32::rotr<3ul>() const
Unexecuted instantiation: Botan::SIMD_8x32 Botan::SIMD_8x32::rotr<13ul>() const
Unexecuted instantiation: Botan::SIMD_8x32 Botan::SIMD_8x32::rotr<6ul>() const
Unexecuted instantiation: Botan::SIMD_8x32 Botan::SIMD_8x32::rotr<11ul>() const
Unexecuted instantiation: Botan::SIMD_8x32 Botan::SIMD_8x32::rotr<25ul>() const
Unexecuted instantiation: Botan::SIMD_8x32 Botan::SIMD_8x32::rotr<2ul>() const
107
108
      template<size_t ROT1, size_t ROT2, size_t ROT3>
109
      SIMD_8x32 rho() const
110
0
         {
111
0
         SIMD_8x32 res;
112
0
113
0
         const SIMD_8x32 rot1 = this->rotr<ROT1>();
114
0
         const SIMD_8x32 rot2 = this->rotr<ROT2>();
115
0
         const SIMD_8x32 rot3 = this->rotr<ROT3>();
116
0
117
0
         return rot1 ^ rot2 ^ rot3;
118
0
         }
Unexecuted instantiation: Botan::SIMD_8x32 Botan::SIMD_8x32::rho<6ul, 11ul, 25ul>() const
Unexecuted instantiation: Botan::SIMD_8x32 Botan::SIMD_8x32::rho<2ul, 13ul, 22ul>() const
119
120
      SIMD_8x32 operator+(const SIMD_8x32& other) const
121
195k
         {
122
195k
         SIMD_8x32 retval(*this);
123
195k
         retval += other;
124
195k
         return retval;
125
195k
         }
126
127
      SIMD_8x32 operator-(const SIMD_8x32& other) const
128
0
         {
129
0
         SIMD_8x32 retval(*this);
130
0
         retval -= other;
131
0
         return retval;
132
0
         }
133
134
      SIMD_8x32 operator^(const SIMD_8x32& other) const
135
0
         {
136
0
         SIMD_8x32 retval(*this);
137
0
         retval ^= other;
138
0
         return retval;
139
0
         }
140
141
      SIMD_8x32 operator|(const SIMD_8x32& other) const
142
0
         {
143
0
         SIMD_8x32 retval(*this);
144
0
         retval |= other;
145
0
         return retval;
146
0
         }
147
148
      SIMD_8x32 operator&(const SIMD_8x32& other) const
149
0
         {
150
0
         SIMD_8x32 retval(*this);
151
0
         retval &= other;
152
0
         return retval;
153
0
         }
154
155
      BOTAN_FUNC_ISA("avx2")
156
      void operator+=(const SIMD_8x32& other)
157
16.6M
         {
158
16.6M
         m_avx2 = _mm256_add_epi32(m_avx2, other.m_avx2);
159
16.6M
         }
160
161
      BOTAN_FUNC_ISA("avx2")
162
      void operator-=(const SIMD_8x32& other)
163
0
         {
164
0
         m_avx2 = _mm256_sub_epi32(m_avx2, other.m_avx2);
165
0
         }
166
167
      BOTAN_FUNC_ISA("avx2")
168
      void operator^=(const SIMD_8x32& other)
169
15.6M
         {
170
15.6M
         m_avx2 = _mm256_xor_si256(m_avx2, other.m_avx2);
171
15.6M
         }
172
173
      BOTAN_FUNC_ISA("avx2")
174
      void operator|=(const SIMD_8x32& other)
175
0
         {
176
0
         m_avx2 = _mm256_or_si256(m_avx2, other.m_avx2);
177
0
         }
178
179
      BOTAN_FUNC_ISA("avx2")
180
      void operator&=(const SIMD_8x32& other)
181
0
         {
182
0
         m_avx2 = _mm256_and_si256(m_avx2, other.m_avx2);
183
0
         }
184
185
      template<int SHIFT> BOTAN_FUNC_ISA("avx2") SIMD_8x32 shl() const
186
0
         {
187
0
         return SIMD_8x32(_mm256_slli_epi32(m_avx2, SHIFT));
188
0
         }
Unexecuted instantiation: Botan::SIMD_8x32 Botan::SIMD_8x32::shl<3>() const
Unexecuted instantiation: Botan::SIMD_8x32 Botan::SIMD_8x32::shl<7>() const
189
190
      template<int SHIFT> BOTAN_FUNC_ISA("avx2")SIMD_8x32 shr() const
191
         {
192
         return SIMD_8x32(_mm256_srli_epi32(m_avx2, SHIFT));
193
         }
194
195
      BOTAN_FUNC_ISA("avx2")
196
      SIMD_8x32 operator~() const
197
0
         {
198
0
         return SIMD_8x32(_mm256_xor_si256(m_avx2, _mm256_set1_epi32(0xFFFFFFFF)));
199
0
         }
200
201
      // (~reg) & other
202
      BOTAN_FUNC_ISA("avx2")
203
      SIMD_8x32 andc(const SIMD_8x32& other) const
204
0
         {
205
0
         return SIMD_8x32(_mm256_andnot_si256(m_avx2, other.m_avx2));
206
0
         }
207
208
      BOTAN_FUNC_ISA("avx2")
209
      SIMD_8x32 bswap() const
210
0
         {
211
0
         const uint8_t BSWAP_MASK[32] = { 3, 2, 1, 0,
212
0
                                          7, 6, 5, 4,
213
0
                                          11, 10, 9, 8,
214
0
                                          15, 14, 13, 12,
215
0
                                          19, 18, 17, 16,
216
0
                                          23, 22, 21, 20,
217
0
                                          27, 26, 25, 24,
218
0
                                          31, 30, 29, 28 };
219
0
220
0
         const __m256i bswap = _mm256_loadu_si256(reinterpret_cast<const __m256i*>(BSWAP_MASK));
221
0
222
0
         const __m256i output = _mm256_shuffle_epi8(m_avx2, bswap);
223
0
224
0
         return SIMD_8x32(output);
225
0
         }
226
227
      BOTAN_FUNC_ISA("avx2")
228
      static void transpose(SIMD_8x32& B0, SIMD_8x32& B1,
229
                            SIMD_8x32& B2, SIMD_8x32& B3)
230
195k
         {
231
195k
         const __m256i T0 = _mm256_unpacklo_epi32(B0.m_avx2, B1.m_avx2);
232
195k
         const __m256i T1 = _mm256_unpacklo_epi32(B2.m_avx2, B3.m_avx2);
233
195k
         const __m256i T2 = _mm256_unpackhi_epi32(B0.m_avx2, B1.m_avx2);
234
195k
         const __m256i T3 = _mm256_unpackhi_epi32(B2.m_avx2, B3.m_avx2);
235
195k
236
195k
         B0.m_avx2 = _mm256_unpacklo_epi64(T0, T1);
237
195k
         B1.m_avx2 = _mm256_unpackhi_epi64(T0, T1);
238
195k
         B2.m_avx2 = _mm256_unpacklo_epi64(T2, T3);
239
195k
         B3.m_avx2 = _mm256_unpackhi_epi64(T2, T3);
240
195k
         }
241
242
      BOTAN_FUNC_ISA("avx2")
243
      static void transpose(SIMD_8x32& B0, SIMD_8x32& B1,
244
                            SIMD_8x32& B2, SIMD_8x32& B3,
245
                            SIMD_8x32& B4, SIMD_8x32& B5,
246
                            SIMD_8x32& B6, SIMD_8x32& B7)
247
97.7k
         {
248
97.7k
         transpose(B0, B1, B2, B3);
249
97.7k
         transpose(B4, B5, B6, B7);
250
97.7k
251
97.7k
         swap_tops(B0, B4);
252
97.7k
         swap_tops(B1, B5);
253
97.7k
         swap_tops(B2, B6);
254
97.7k
         swap_tops(B3, B7);
255
97.7k
         }
256
257
      BOTAN_FUNC_ISA("avx2")
258
      static void reset_registers()
259
48.8k
         {
260
48.8k
         _mm256_zeroupper();
261
48.8k
         }
262
263
      BOTAN_FUNC_ISA("avx2")
264
      static void zero_registers()
265
48.8k
         {
266
48.8k
         _mm256_zeroall();
267
48.8k
         }
268
269
1.56M
      __m256i BOTAN_FUNC_ISA("avx2") handle() const { return m_avx2; }
270
271
      BOTAN_FUNC_ISA("avx2")
272
17.9M
      SIMD_8x32(__m256i x) : m_avx2(x) {}
273
274
   private:
275
276
      BOTAN_FUNC_ISA("avx2")
277
      static void swap_tops(SIMD_8x32& A, SIMD_8x32& B)
278
390k
         {
279
390k
         SIMD_8x32 T0 = _mm256_permute2x128_si256(A.handle(), B.handle(), 0 + (2 << 4));
280
390k
         SIMD_8x32 T1 = _mm256_permute2x128_si256(A.handle(), B.handle(), 1 + (3 << 4));
281
390k
         A = T0;
282
390k
         B = T1;
283
390k
         }
284
285
      __m256i m_avx2;
286
   };
287
288
}
289
290
#endif