Coverage Report

Created: 2021-02-21 07:20

/src/botan/build/include/botan/internal/simd_avx2.h
Line
Count
Source (jump to first uncovered line)
1
/*
2
* (C) 2018 Jack Lloyd
3
*
4
* Botan is released under the Simplified BSD License (see license.txt)
5
*/
6
7
#ifndef BOTAN_SIMD_AVX2_H_
8
#define BOTAN_SIMD_AVX2_H_
9
10
#include <botan/types.h>
11
#include <immintrin.h>
12
13
namespace Botan {
14
15
class SIMD_8x32 final
16
   {
17
   public:
18
19
      SIMD_8x32& operator=(const SIMD_8x32& other) = default;
20
      SIMD_8x32(const SIMD_8x32& other) = default;
21
22
      SIMD_8x32& operator=(SIMD_8x32&& other) = default;
23
      SIMD_8x32(SIMD_8x32&& other) = default;
24
25
      BOTAN_FUNC_ISA("avx2")
26
      BOTAN_FORCE_INLINE SIMD_8x32()
27
0
         {
28
0
         m_avx2 = _mm256_setzero_si256();
29
0
         }
30
31
      BOTAN_FUNC_ISA("avx2")
32
      explicit SIMD_8x32(const uint32_t B[8])
33
0
         {
34
0
         m_avx2 = _mm256_loadu_si256(reinterpret_cast<const __m256i*>(B));
35
0
         }
36
37
      BOTAN_FUNC_ISA("avx2")
38
      explicit SIMD_8x32(uint32_t B0, uint32_t B1, uint32_t B2, uint32_t B3,
39
                         uint32_t B4, uint32_t B5, uint32_t B6, uint32_t B7)
40
87.6k
         {
41
87.6k
         m_avx2 = _mm256_set_epi32(B7, B6, B5, B4, B3, B2, B1, B0);
42
87.6k
         }
43
44
      BOTAN_FUNC_ISA("avx2")
45
      static SIMD_8x32 splat(uint32_t B)
46
1.40M
         {
47
1.40M
         return SIMD_8x32(_mm256_set1_epi32(B));
48
1.40M
         }
49
50
      BOTAN_FUNC_ISA("avx2")
51
      static SIMD_8x32 load_le(const uint8_t* in)
52
0
         {
53
0
         return SIMD_8x32(_mm256_loadu_si256(reinterpret_cast<const __m256i*>(in)));
54
0
         }
55
56
      BOTAN_FUNC_ISA("avx2")
57
      static SIMD_8x32 load_be(const uint8_t* in)
58
0
         {
59
0
         return load_le(in).bswap();
60
0
         }
61
62
      BOTAN_FUNC_ISA("avx2")
63
      void store_le(uint8_t out[]) const
64
701k
         {
65
701k
         _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), m_avx2);
66
701k
         }
67
68
      BOTAN_FUNC_ISA("avx2")
69
      void store_be(uint8_t out[]) const
70
0
         {
71
0
         bswap().store_le(out);
72
0
         }
73
74
      template<size_t ROT>
75
      BOTAN_FUNC_ISA("avx2")
76
      SIMD_8x32 rotl() const
77
14.0M
         {
78
14.0M
         static_assert(ROT > 0 && ROT < 32, "Invalid rotation constant");
79
80
#if defined(__AVX512VL__)
81
         return SIMD_8x32(_mm256_rol_epi32(m_avx2, ROT));
82
#else
83
14.0M
         if constexpr(ROT == 8)
84
0
            {
85
10.5M
            const __m256i shuf_rotl_8 = _mm256_set_epi8(14, 13, 12, 15, 10, 9, 8, 11, 6, 5, 4, 7, 2, 1, 0, 3,
86
10.5M
                                                        14, 13, 12, 15, 10, 9, 8, 11, 6, 5, 4, 7, 2, 1, 0, 3);
87
88
10.5M
            return SIMD_8x32(_mm256_shuffle_epi8(m_avx2, shuf_rotl_8));
89
10.5M
            }
90
10.5M
         else if constexpr(ROT == 16)
91
0
            {
92
7.01M
            const __m256i shuf_rotl_16 = _mm256_set_epi8(13, 12, 15, 14, 9, 8, 11, 10, 5, 4, 7, 6, 1, 0, 3, 2,
93
7.01M
                                                         13, 12, 15, 14, 9, 8, 11, 10, 5, 4, 7, 6, 1, 0, 3, 2);
94
95
7.01M
            return SIMD_8x32(_mm256_shuffle_epi8(m_avx2, shuf_rotl_16));
96
7.01M
            }
97
7.01M
         else
98
7.01M
            {
99
7.01M
            return SIMD_8x32(_mm256_or_si256(_mm256_slli_epi32(m_avx2, static_cast<int>(ROT)),
100
7.01M
                                             _mm256_srli_epi32(m_avx2, static_cast<int>(32-ROT))));
101
7.01M
            }
102
14.0M
#endif
103
14.0M
         }
Unexecuted instantiation: Botan::SIMD_8x32 Botan::SIMD_8x32::rotl<19ul>() const
Unexecuted instantiation: Botan::SIMD_8x32 Botan::SIMD_8x32::rotl<10ul>() const
Unexecuted instantiation: Botan::SIMD_8x32 Botan::SIMD_8x32::rotl<13ul>() const
Unexecuted instantiation: Botan::SIMD_8x32 Botan::SIMD_8x32::rotl<3ul>() const
Unexecuted instantiation: Botan::SIMD_8x32 Botan::SIMD_8x32::rotl<1ul>() const
Botan::SIMD_8x32 Botan::SIMD_8x32::rotl<7ul>() const
Line
Count
Source
77
3.50M
         {
78
3.50M
         static_assert(ROT > 0 && ROT < 32, "Invalid rotation constant");
79
80
#if defined(__AVX512VL__)
81
         return SIMD_8x32(_mm256_rol_epi32(m_avx2, ROT));
82
#else
83
3.50M
         if constexpr(ROT == 8)
84
0
            {
85
3.50M
            const __m256i shuf_rotl_8 = _mm256_set_epi8(14, 13, 12, 15, 10, 9, 8, 11, 6, 5, 4, 7, 2, 1, 0, 3,
86
3.50M
                                                        14, 13, 12, 15, 10, 9, 8, 11, 6, 5, 4, 7, 2, 1, 0, 3);
87
88
3.50M
            return SIMD_8x32(_mm256_shuffle_epi8(m_avx2, shuf_rotl_8));
89
3.50M
            }
90
3.50M
         else if constexpr(ROT == 16)
91
0
            {
92
3.50M
            const __m256i shuf_rotl_16 = _mm256_set_epi8(13, 12, 15, 14, 9, 8, 11, 10, 5, 4, 7, 6, 1, 0, 3, 2,
93
3.50M
                                                         13, 12, 15, 14, 9, 8, 11, 10, 5, 4, 7, 6, 1, 0, 3, 2);
94
95
3.50M
            return SIMD_8x32(_mm256_shuffle_epi8(m_avx2, shuf_rotl_16));
96
3.50M
            }
97
3.50M
         else
98
3.50M
            {
99
3.50M
            return SIMD_8x32(_mm256_or_si256(_mm256_slli_epi32(m_avx2, static_cast<int>(ROT)),
100
3.50M
                                             _mm256_srli_epi32(m_avx2, static_cast<int>(32-ROT))));
101
3.50M
            }
102
3.50M
#endif
103
3.50M
         }
Unexecuted instantiation: Botan::SIMD_8x32 Botan::SIMD_8x32::rotl<5ul>() const
Unexecuted instantiation: Botan::SIMD_8x32 Botan::SIMD_8x32::rotl<22ul>() const
Unexecuted instantiation: Botan::SIMD_8x32 Botan::SIMD_8x32::rotl<27ul>() const
Unexecuted instantiation: Botan::SIMD_8x32 Botan::SIMD_8x32::rotl<25ul>() const
Unexecuted instantiation: Botan::SIMD_8x32 Botan::SIMD_8x32::rotl<31ul>() const
Unexecuted instantiation: Botan::SIMD_8x32 Botan::SIMD_8x32::rotl<29ul>() const
Unexecuted instantiation: Botan::SIMD_8x32 Botan::SIMD_8x32::rotl<30ul>() const
Unexecuted instantiation: Botan::SIMD_8x32 Botan::SIMD_8x32::rotl<26ul>() const
Unexecuted instantiation: Botan::SIMD_8x32 Botan::SIMD_8x32::rotl<21ul>() const
Botan::SIMD_8x32 Botan::SIMD_8x32::rotl<16ul>() const
Line
Count
Source
77
3.50M
         {
78
3.50M
         static_assert(ROT > 0 && ROT < 32, "Invalid rotation constant");
79
80
#if defined(__AVX512VL__)
81
         return SIMD_8x32(_mm256_rol_epi32(m_avx2, ROT));
82
#else
83
3.50M
         if constexpr(ROT == 8)
84
0
            {
85
3.50M
            const __m256i shuf_rotl_8 = _mm256_set_epi8(14, 13, 12, 15, 10, 9, 8, 11, 6, 5, 4, 7, 2, 1, 0, 3,
86
3.50M
                                                        14, 13, 12, 15, 10, 9, 8, 11, 6, 5, 4, 7, 2, 1, 0, 3);
87
88
3.50M
            return SIMD_8x32(_mm256_shuffle_epi8(m_avx2, shuf_rotl_8));
89
3.50M
            }
90
3.50M
         else if constexpr(ROT == 16)
91
3.50M
            {
92
3.50M
            const __m256i shuf_rotl_16 = _mm256_set_epi8(13, 12, 15, 14, 9, 8, 11, 10, 5, 4, 7, 6, 1, 0, 3, 2,
93
3.50M
                                                         13, 12, 15, 14, 9, 8, 11, 10, 5, 4, 7, 6, 1, 0, 3, 2);
94
95
3.50M
            return SIMD_8x32(_mm256_shuffle_epi8(m_avx2, shuf_rotl_16));
96
3.50M
            }
97
3.50M
         else
98
3.50M
            {
99
3.50M
            return SIMD_8x32(_mm256_or_si256(_mm256_slli_epi32(m_avx2, static_cast<int>(ROT)),
100
3.50M
                                             _mm256_srli_epi32(m_avx2, static_cast<int>(32-ROT))));
101
3.50M
            }
102
3.50M
#endif
103
3.50M
         }
Botan::SIMD_8x32 Botan::SIMD_8x32::rotl<12ul>() const
Line
Count
Source
77
3.50M
         {
78
3.50M
         static_assert(ROT > 0 && ROT < 32, "Invalid rotation constant");
79
80
#if defined(__AVX512VL__)
81
         return SIMD_8x32(_mm256_rol_epi32(m_avx2, ROT));
82
#else
83
3.50M
         if constexpr(ROT == 8)
84
0
            {
85
3.50M
            const __m256i shuf_rotl_8 = _mm256_set_epi8(14, 13, 12, 15, 10, 9, 8, 11, 6, 5, 4, 7, 2, 1, 0, 3,
86
3.50M
                                                        14, 13, 12, 15, 10, 9, 8, 11, 6, 5, 4, 7, 2, 1, 0, 3);
87
88
3.50M
            return SIMD_8x32(_mm256_shuffle_epi8(m_avx2, shuf_rotl_8));
89
3.50M
            }
90
3.50M
         else if constexpr(ROT == 16)
91
0
            {
92
3.50M
            const __m256i shuf_rotl_16 = _mm256_set_epi8(13, 12, 15, 14, 9, 8, 11, 10, 5, 4, 7, 6, 1, 0, 3, 2,
93
3.50M
                                                         13, 12, 15, 14, 9, 8, 11, 10, 5, 4, 7, 6, 1, 0, 3, 2);
94
95
3.50M
            return SIMD_8x32(_mm256_shuffle_epi8(m_avx2, shuf_rotl_16));
96
3.50M
            }
97
3.50M
         else
98
3.50M
            {
99
3.50M
            return SIMD_8x32(_mm256_or_si256(_mm256_slli_epi32(m_avx2, static_cast<int>(ROT)),
100
3.50M
                                             _mm256_srli_epi32(m_avx2, static_cast<int>(32-ROT))));
101
3.50M
            }
102
3.50M
#endif
103
3.50M
         }
Botan::SIMD_8x32 Botan::SIMD_8x32::rotl<8ul>() const
Line
Count
Source
77
3.50M
         {
78
3.50M
         static_assert(ROT > 0 && ROT < 32, "Invalid rotation constant");
79
80
#if defined(__AVX512VL__)
81
         return SIMD_8x32(_mm256_rol_epi32(m_avx2, ROT));
82
#else
83
3.50M
         if constexpr(ROT == 8)
84
3.50M
            {
85
3.50M
            const __m256i shuf_rotl_8 = _mm256_set_epi8(14, 13, 12, 15, 10, 9, 8, 11, 6, 5, 4, 7, 2, 1, 0, 3,
86
3.50M
                                                        14, 13, 12, 15, 10, 9, 8, 11, 6, 5, 4, 7, 2, 1, 0, 3);
87
88
3.50M
            return SIMD_8x32(_mm256_shuffle_epi8(m_avx2, shuf_rotl_8));
89
3.50M
            }
90
3.50M
         else if constexpr(ROT == 16)
91
3.50M
            {
92
3.50M
            const __m256i shuf_rotl_16 = _mm256_set_epi8(13, 12, 15, 14, 9, 8, 11, 10, 5, 4, 7, 6, 1, 0, 3, 2,
93
3.50M
                                                         13, 12, 15, 14, 9, 8, 11, 10, 5, 4, 7, 6, 1, 0, 3, 2);
94
95
3.50M
            return SIMD_8x32(_mm256_shuffle_epi8(m_avx2, shuf_rotl_16));
96
3.50M
            }
97
3.50M
         else
98
3.50M
            {
99
3.50M
            return SIMD_8x32(_mm256_or_si256(_mm256_slli_epi32(m_avx2, static_cast<int>(ROT)),
100
3.50M
                                             _mm256_srli_epi32(m_avx2, static_cast<int>(32-ROT))));
101
3.50M
            }
102
3.50M
#endif
103
3.50M
         }
104
105
      template<size_t ROT>
106
      BOTAN_FUNC_ISA("avx2")
107
      SIMD_8x32 rotr() const
108
0
         {
109
0
         return this->rotl<32-ROT>();
110
0
         }
Unexecuted instantiation: Botan::SIMD_8x32 Botan::SIMD_8x32::rotr<13ul>() const
Unexecuted instantiation: Botan::SIMD_8x32 Botan::SIMD_8x32::rotr<22ul>() const
Unexecuted instantiation: Botan::SIMD_8x32 Botan::SIMD_8x32::rotr<5ul>() const
Unexecuted instantiation: Botan::SIMD_8x32 Botan::SIMD_8x32::rotr<7ul>() const
Unexecuted instantiation: Botan::SIMD_8x32 Botan::SIMD_8x32::rotr<1ul>() const
Unexecuted instantiation: Botan::SIMD_8x32 Botan::SIMD_8x32::rotr<3ul>() const
Unexecuted instantiation: Botan::SIMD_8x32 Botan::SIMD_8x32::rotr<2ul>() const
Unexecuted instantiation: Botan::SIMD_8x32 Botan::SIMD_8x32::rotr<6ul>() const
Unexecuted instantiation: Botan::SIMD_8x32 Botan::SIMD_8x32::rotr<11ul>() const
Unexecuted instantiation: Botan::SIMD_8x32 Botan::SIMD_8x32::rotr<25ul>() const
111
112
      SIMD_8x32 BOTAN_FUNC_ISA("avx2") sigma0() const
113
0
         {
114
0
         const SIMD_8x32 rot1 = this->rotr<2>();
115
0
         const SIMD_8x32 rot2 = this->rotr<13>();
116
0
         const SIMD_8x32 rot3 = this->rotr<22>();
117
0
         return rot1 ^ rot2 ^ rot3;
118
0
         }
119
120
      SIMD_8x32 BOTAN_FUNC_ISA("avx2") sigma1() const
121
0
         {
122
0
         const SIMD_8x32 rot1 = this->rotr<6>();
123
0
         const SIMD_8x32 rot2 = this->rotr<11>();
124
0
         const SIMD_8x32 rot3 = this->rotr<25>();
125
0
         return rot1 ^ rot2 ^ rot3;
126
0
         }
127
128
      BOTAN_FUNC_ISA("avx2")
129
      SIMD_8x32 operator+(const SIMD_8x32& other) const
130
175k
         {
131
175k
         SIMD_8x32 retval(*this);
132
175k
         retval += other;
133
175k
         return retval;
134
175k
         }
135
136
      BOTAN_FUNC_ISA("avx2")
137
      SIMD_8x32 operator-(const SIMD_8x32& other) const
138
0
         {
139
0
         SIMD_8x32 retval(*this);
140
0
         retval -= other;
141
0
         return retval;
142
0
         }
143
144
      BOTAN_FUNC_ISA("avx2")
145
      SIMD_8x32 operator^(const SIMD_8x32& other) const
146
0
         {
147
0
         SIMD_8x32 retval(*this);
148
0
         retval ^= other;
149
0
         return retval;
150
0
         }
151
152
      BOTAN_FUNC_ISA("avx2")
153
      SIMD_8x32 operator|(const SIMD_8x32& other) const
154
0
         {
155
0
         SIMD_8x32 retval(*this);
156
0
         retval |= other;
157
0
         return retval;
158
0
         }
159
160
      BOTAN_FUNC_ISA("avx2")
161
      SIMD_8x32 operator&(const SIMD_8x32& other) const
162
0
         {
163
0
         SIMD_8x32 retval(*this);
164
0
         retval &= other;
165
0
         return retval;
166
0
         }
167
168
      BOTAN_FUNC_ISA("avx2")
169
      void operator+=(const SIMD_8x32& other)
170
14.8M
         {
171
14.8M
         m_avx2 = _mm256_add_epi32(m_avx2, other.m_avx2);
172
14.8M
         }
173
174
      BOTAN_FUNC_ISA("avx2")
175
      void operator-=(const SIMD_8x32& other)
176
0
         {
177
0
         m_avx2 = _mm256_sub_epi32(m_avx2, other.m_avx2);
178
0
         }
179
180
      BOTAN_FUNC_ISA("avx2")
181
      void operator^=(const SIMD_8x32& other)
182
14.0M
         {
183
14.0M
         m_avx2 = _mm256_xor_si256(m_avx2, other.m_avx2);
184
14.0M
         }
185
186
      BOTAN_FUNC_ISA("avx2")
187
      void operator|=(const SIMD_8x32& other)
188
0
         {
189
0
         m_avx2 = _mm256_or_si256(m_avx2, other.m_avx2);
190
0
         }
191
192
      BOTAN_FUNC_ISA("avx2")
193
      void operator&=(const SIMD_8x32& other)
194
0
         {
195
0
         m_avx2 = _mm256_and_si256(m_avx2, other.m_avx2);
196
0
         }
197
198
      template<int SHIFT> BOTAN_FUNC_ISA("avx2") SIMD_8x32 shl() const
199
0
         {
200
0
         return SIMD_8x32(_mm256_slli_epi32(m_avx2, SHIFT));
201
0
         }
Unexecuted instantiation: Botan::SIMD_8x32 Botan::SIMD_8x32::shl<3>() const
Unexecuted instantiation: Botan::SIMD_8x32 Botan::SIMD_8x32::shl<7>() const
202
203
      template<int SHIFT> BOTAN_FUNC_ISA("avx2") SIMD_8x32 shr() const
204
         {
205
         return SIMD_8x32(_mm256_srli_epi32(m_avx2, SHIFT));
206
         }
207
208
      BOTAN_FUNC_ISA("avx2")
209
      SIMD_8x32 operator~() const
210
0
         {
211
0
         return SIMD_8x32(_mm256_xor_si256(m_avx2, _mm256_set1_epi32(0xFFFFFFFF)));
212
0
         }
213
214
      // (~reg) & other
215
      BOTAN_FUNC_ISA("avx2")
216
      SIMD_8x32 andc(const SIMD_8x32& other) const
217
0
         {
218
0
         return SIMD_8x32(_mm256_andnot_si256(m_avx2, other.m_avx2));
219
0
         }
220
221
      BOTAN_FUNC_ISA("avx2")
222
      SIMD_8x32 bswap() const
223
0
         {
224
0
         const uint8_t BSWAP_MASK[32] = { 3, 2, 1, 0,
225
0
                                          7, 6, 5, 4,
226
0
                                          11, 10, 9, 8,
227
0
                                          15, 14, 13, 12,
228
0
                                          19, 18, 17, 16,
229
0
                                          23, 22, 21, 20,
230
0
                                          27, 26, 25, 24,
231
0
                                          31, 30, 29, 28 };
232
233
0
         const __m256i bswap = _mm256_loadu_si256(reinterpret_cast<const __m256i*>(BSWAP_MASK));
234
235
0
         const __m256i output = _mm256_shuffle_epi8(m_avx2, bswap);
236
237
0
         return SIMD_8x32(output);
238
0
         }
239
240
      BOTAN_FUNC_ISA("avx2")
241
      static void transpose(SIMD_8x32& B0, SIMD_8x32& B1,
242
                            SIMD_8x32& B2, SIMD_8x32& B3)
243
175k
         {
244
175k
         const __m256i T0 = _mm256_unpacklo_epi32(B0.m_avx2, B1.m_avx2);
245
175k
         const __m256i T1 = _mm256_unpacklo_epi32(B2.m_avx2, B3.m_avx2);
246
175k
         const __m256i T2 = _mm256_unpackhi_epi32(B0.m_avx2, B1.m_avx2);
247
175k
         const __m256i T3 = _mm256_unpackhi_epi32(B2.m_avx2, B3.m_avx2);
248
249
175k
         B0.m_avx2 = _mm256_unpacklo_epi64(T0, T1);
250
175k
         B1.m_avx2 = _mm256_unpackhi_epi64(T0, T1);
251
175k
         B2.m_avx2 = _mm256_unpacklo_epi64(T2, T3);
252
175k
         B3.m_avx2 = _mm256_unpackhi_epi64(T2, T3);
253
175k
         }
254
255
      BOTAN_FUNC_ISA("avx2")
256
      static void transpose(SIMD_8x32& B0, SIMD_8x32& B1,
257
                            SIMD_8x32& B2, SIMD_8x32& B3,
258
                            SIMD_8x32& B4, SIMD_8x32& B5,
259
                            SIMD_8x32& B6, SIMD_8x32& B7)
260
87.6k
         {
261
87.6k
         transpose(B0, B1, B2, B3);
262
87.6k
         transpose(B4, B5, B6, B7);
263
264
87.6k
         swap_tops(B0, B4);
265
87.6k
         swap_tops(B1, B5);
266
87.6k
         swap_tops(B2, B6);
267
87.6k
         swap_tops(B3, B7);
268
87.6k
         }
269
270
      BOTAN_FUNC_ISA("avx2")
271
      static SIMD_8x32 choose(const SIMD_8x32& mask, const SIMD_8x32& a, const SIMD_8x32& b)
272
0
         {
273
#if defined(__AVX512VL__)
274
         return _mm256_ternarylogic_epi32(mask.handle(), a.handle(), b.handle(), 0xca);
275
#else
276
0
         return (mask & a) ^ mask.andc(b);
277
0
#endif
278
0
         }
279
280
      BOTAN_FUNC_ISA("avx2")
281
      static SIMD_8x32 majority(const SIMD_8x32& x, const SIMD_8x32& y, const SIMD_8x32& z)
282
0
         {
283
#if defined(__AVX512VL__)
284
         return _mm256_ternarylogic_epi32(x.handle(), y.handle(), z.handle(), 0xe8);
285
#else
286
0
         return SIMD_8x32::choose(x ^ y, z, y);
287
0
#endif
288
0
         }
289
290
      BOTAN_FUNC_ISA("avx2")
291
      static void reset_registers()
292
43.8k
         {
293
43.8k
         _mm256_zeroupper();
294
43.8k
         }
295
296
      BOTAN_FUNC_ISA("avx2")
297
      static void zero_registers()
298
43.8k
         {
299
43.8k
         _mm256_zeroall();
300
43.8k
         }
301
302
1.40M
      __m256i BOTAN_FUNC_ISA("avx2") handle() const { return m_avx2; }
303
304
      BOTAN_FUNC_ISA("avx2")
305
16.1M
      SIMD_8x32(__m256i x) : m_avx2(x) {}
306
307
   private:
308
309
      BOTAN_FUNC_ISA("avx2")
310
      static void swap_tops(SIMD_8x32& A, SIMD_8x32& B)
311
350k
         {
312
350k
         SIMD_8x32 T0 = _mm256_permute2x128_si256(A.handle(), B.handle(), 0 + (2 << 4));
313
350k
         SIMD_8x32 T1 = _mm256_permute2x128_si256(A.handle(), B.handle(), 1 + (3 << 4));
314
350k
         A = T0;
315
350k
         B = T1;
316
350k
         }
317
318
      __m256i m_avx2;
319
   };
320
321
}
322
323
#endif