/src/botan/build/include/botan/internal/simd_32.h
Line  | Count  | Source (jump to first uncovered line)  | 
1  |  | /*  | 
2  |  | * Lightweight wrappers for SIMD operations  | 
3  |  | * (C) 2009,2011,2016,2017,2019 Jack Lloyd  | 
4  |  | *  | 
5  |  | * Botan is released under the Simplified BSD License (see license.txt)  | 
6  |  | */  | 
7  |  |  | 
8  |  | #ifndef BOTAN_SIMD_32_H_  | 
9  |  | #define BOTAN_SIMD_32_H_  | 
10  |  |  | 
11  |  | #include <botan/types.h>  | 
12  |  |  | 
13  |  | #if defined(BOTAN_TARGET_SUPPORTS_SSE2)  | 
14  |  |    #include <emmintrin.h>  | 
15  |  |    #define BOTAN_SIMD_USE_SSE2  | 
16  |  |  | 
17  |  | #elif defined(BOTAN_TARGET_SUPPORTS_ALTIVEC)  | 
18  |  |    #include <botan/internal/bswap.h>  | 
19  |  |    #include <botan/internal/loadstor.h>  | 
20  |  |    #include <altivec.h>  | 
21  |  |    #undef vector  | 
22  |  |    #undef bool  | 
23  |  |    #define BOTAN_SIMD_USE_ALTIVEC  | 
24  |  |  | 
25  |  | #elif defined(BOTAN_TARGET_SUPPORTS_NEON)  | 
26  |  |    #include <botan/internal/cpuid.h>  | 
27  |  |    #include <arm_neon.h>  | 
28  |  |    #define BOTAN_SIMD_USE_NEON  | 
29  |  |  | 
30  |  | #else  | 
31  |  |    #error "No SIMD instruction set enabled"  | 
32  |  | #endif  | 
33  |  |  | 
34  |  | #if defined(BOTAN_SIMD_USE_SSE2)  | 
35  |  |    #define BOTAN_SIMD_ISA "sse2"  | 
36  |  |    #define BOTAN_VPERM_ISA "ssse3"  | 
37  |  |    #define BOTAN_CLMUL_ISA "pclmul"  | 
38  |  | #elif defined(BOTAN_SIMD_USE_NEON)  | 
39  |  |    #if defined(BOTAN_TARGET_ARCH_IS_ARM64)  | 
40  |  |       #define BOTAN_SIMD_ISA "+simd"  | 
41  |  |       #define BOTAN_CLMUL_ISA "+crypto"  | 
42  |  |    #else  | 
43  |  |       #define BOTAN_SIMD_ISA "fpu=neon"  | 
44  |  |    #endif  | 
45  |  |    #define BOTAN_VPERM_ISA BOTAN_SIMD_ISA  | 
46  |  | #elif defined(BOTAN_SIMD_USE_ALTIVEC)  | 
47  |  |    #define BOTAN_SIMD_ISA "altivec"  | 
48  |  |    #define BOTAN_VPERM_ISA "altivec"  | 
49  |  |    #define BOTAN_CLMUL_ISA "crypto"  | 
50  |  | #endif  | 
51  |  |  | 
52  |  | namespace Botan { | 
53  |  |  | 
54  |  | #if defined(BOTAN_SIMD_USE_SSE2)  | 
55  |  | using native_simd_type = __m128i;  | 
56  |  | #elif defined(BOTAN_SIMD_USE_ALTIVEC)  | 
57  |  | using native_simd_type = __vector unsigned int;  | 
58  |  | #elif defined(BOTAN_SIMD_USE_NEON)  | 
59  |  | using native_simd_type = uint32x4_t;  | 
60  |  | #endif  | 
61  |  |  | 
62  |  | /**  | 
63  |  | * 4x32 bit SIMD register  | 
64  |  | *  | 
65  |  | * This class is not a general purpose SIMD type, and only offers  | 
66  |  | * instructions needed for evaluation of specific crypto primitives.  | 
67  |  | * For example it does not currently have equality operators of any  | 
68  |  | * kind.  | 
69  |  | *  | 
70  |  | * Implemented for SSE2, VMX (Altivec), and NEON.  | 
71  |  | */  | 
72  |  | class SIMD_4x32 final { | 
73  |  |    public:  | 
74  |  |       SIMD_4x32& operator=(const SIMD_4x32& other) = default;  | 
75  |  |       SIMD_4x32(const SIMD_4x32& other) = default;  | 
76  |  |  | 
77  |  |       SIMD_4x32& operator=(SIMD_4x32&& other) = default;  | 
78  |  |       SIMD_4x32(SIMD_4x32&& other) = default;  | 
79  |  |  | 
80  |  |       ~SIMD_4x32() = default;  | 
81  |  |  | 
82  |  |       /**  | 
83  |  |       * Zero initialize SIMD register with 4 32-bit elements  | 
84  |  |       */  | 
85  |  |       SIMD_4x32() noexcept  // zero initialized  | 
86  | 0  |       { | 
87  | 0  | #if defined(BOTAN_SIMD_USE_SSE2)  | 
88  | 0  |          m_simd = _mm_setzero_si128();  | 
89  |  | #elif defined(BOTAN_SIMD_USE_ALTIVEC)  | 
90  |  |          m_simd = vec_splat_u32(0);  | 
91  |  | #elif defined(BOTAN_SIMD_USE_NEON)  | 
92  |  |          m_simd = vdupq_n_u32(0);  | 
93  |  | #endif  | 
94  | 0  |       }  | 
95  |  |  | 
96  |  |       /**  | 
97  |  |       * Load SIMD register with 4 32-bit elements  | 
98  |  |       */  | 
99  | 0  |       explicit SIMD_4x32(const uint32_t B[4]) noexcept { | 
100  | 0  | #if defined(BOTAN_SIMD_USE_SSE2)  | 
101  | 0  |          m_simd = _mm_loadu_si128(reinterpret_cast<const __m128i*>(B));  | 
102  |  | #elif defined(BOTAN_SIMD_USE_ALTIVEC)  | 
103  |  |          __vector unsigned int val = {B[0], B[1], B[2], B[3]}; | 
104  |  |          m_simd = val;  | 
105  |  | #elif defined(BOTAN_SIMD_USE_NEON)  | 
106  |  |          m_simd = vld1q_u32(B);  | 
107  |  | #endif  | 
108  | 0  |       }  | 
109  |  |  | 
110  |  |       /**  | 
111  |  |       * Load SIMD register with 4 32-bit elements  | 
112  |  |       */  | 
113  | 192  |       SIMD_4x32(uint32_t B0, uint32_t B1, uint32_t B2, uint32_t B3) noexcept { | 
114  | 192  | #if defined(BOTAN_SIMD_USE_SSE2)  | 
115  | 192  |          m_simd = _mm_set_epi32(B3, B2, B1, B0);  | 
116  |  | #elif defined(BOTAN_SIMD_USE_ALTIVEC)  | 
117  |  |          __vector unsigned int val = {B0, B1, B2, B3}; | 
118  |  |          m_simd = val;  | 
119  |  | #elif defined(BOTAN_SIMD_USE_NEON)  | 
120  |  |          // Better way to do this?  | 
121  |  |          const uint32_t B[4] = {B0, B1, B2, B3}; | 
122  |  |          m_simd = vld1q_u32(B);  | 
123  |  | #endif  | 
124  | 192  |       }  | 
125  |  |  | 
126  |  |       /**  | 
127  |  |       * Load SIMD register with one 32-bit element repeated  | 
128  |  |       */  | 
129  | 0  |       static SIMD_4x32 splat(uint32_t B) noexcept { | 
130  | 0  | #if defined(BOTAN_SIMD_USE_SSE2)  | 
131  | 0  |          return SIMD_4x32(_mm_set1_epi32(B));  | 
132  |  | #elif defined(BOTAN_SIMD_USE_NEON)  | 
133  |  |          return SIMD_4x32(vdupq_n_u32(B));  | 
134  |  | #else  | 
135  |  |          return SIMD_4x32(B, B, B, B);  | 
136  |  | #endif  | 
137  | 0  |       }  | 
138  |  |  | 
139  |  |       /**  | 
140  |  |       * Load SIMD register with one 8-bit element repeated  | 
141  |  |       */  | 
142  | 4  |       static SIMD_4x32 splat_u8(uint8_t B) noexcept { | 
143  | 4  | #if defined(BOTAN_SIMD_USE_SSE2)  | 
144  | 4  |          return SIMD_4x32(_mm_set1_epi8(B));  | 
145  |  | #elif defined(BOTAN_SIMD_USE_NEON)  | 
146  |  |          return SIMD_4x32(vreinterpretq_u32_u8(vdupq_n_u8(B)));  | 
147  |  | #else  | 
148  |  |          const uint32_t B4 = make_uint32(B, B, B, B);  | 
149  |  |          return SIMD_4x32(B4, B4, B4, B4);  | 
150  |  | #endif  | 
151  | 4  |       }  | 
152  |  |  | 
153  |  |       /**  | 
154  |  |       * Load a SIMD register with little-endian convention  | 
155  |  |       */  | 
156  | 0  |       static SIMD_4x32 load_le(const void* in) noexcept { | 
157  | 0  | #if defined(BOTAN_SIMD_USE_SSE2)  | 
158  | 0  |          return SIMD_4x32(_mm_loadu_si128(reinterpret_cast<const __m128i*>(in)));  | 
159  |  | #elif defined(BOTAN_SIMD_USE_ALTIVEC)  | 
160  |  |          uint32_t R[4];  | 
161  |  |          Botan::load_le(R, static_cast<const uint8_t*>(in), 4);  | 
162  |  |          return SIMD_4x32(R);  | 
163  |  | #elif defined(BOTAN_SIMD_USE_NEON)  | 
164  |  |          SIMD_4x32 l(vld1q_u32(static_cast<const uint32_t*>(in)));  | 
165  |  |          return CPUID::is_big_endian() ? l.bswap() : l;  | 
166  |  | #endif  | 
167  | 0  |       }  | 
168  |  |  | 
169  |  |       /**  | 
170  |  |       * Load a SIMD register with big-endian convention  | 
171  |  |       */  | 
172  | 0  |       static SIMD_4x32 load_be(const void* in) noexcept { | 
173  | 0  | #if defined(BOTAN_SIMD_USE_SSE2)  | 
174  | 0  |          return load_le(in).bswap();  | 
175  |  | 
  | 
176  |  | #elif defined(BOTAN_SIMD_USE_ALTIVEC)  | 
177  |  |          uint32_t R[4];  | 
178  |  |          Botan::load_be(R, static_cast<const uint8_t*>(in), 4);  | 
179  |  |          return SIMD_4x32(R);  | 
180  |  |  | 
181  |  | #elif defined(BOTAN_SIMD_USE_NEON)  | 
182  |  |          SIMD_4x32 l(vld1q_u32(static_cast<const uint32_t*>(in)));  | 
183  |  |          return CPUID::is_little_endian() ? l.bswap() : l;  | 
184  |  | #endif  | 
185  | 0  |       }  | 
186  |  |  | 
187  | 0  |       void store_le(uint32_t out[4]) const noexcept { this->store_le(reinterpret_cast<uint8_t*>(out)); } | 
188  |  |  | 
189  | 0  |       void store_be(uint32_t out[4]) const noexcept { this->store_be(reinterpret_cast<uint8_t*>(out)); } | 
190  |  |  | 
191  | 0  |       void store_le(uint64_t out[2]) const noexcept { this->store_le(reinterpret_cast<uint8_t*>(out)); } | 
192  |  |  | 
193  |  |       /**  | 
194  |  |       * Load a SIMD register with little-endian convention  | 
195  |  |       */  | 
196  | 0  |       void store_le(uint8_t out[]) const noexcept { | 
197  | 0  | #if defined(BOTAN_SIMD_USE_SSE2)  | 
198  |  | 
  | 
199  | 0  |          _mm_storeu_si128(reinterpret_cast<__m128i*>(out), raw());  | 
200  |  | 
  | 
201  |  | #elif defined(BOTAN_SIMD_USE_ALTIVEC)  | 
202  |  |  | 
203  |  |          union { | 
204  |  |                __vector unsigned int V;  | 
205  |  |                uint32_t R[4];  | 
206  |  |          } vec;  | 
207  |  |  | 
208  |  |          vec.V = raw();  | 
209  |  |          Botan::store_le(out, vec.R[0], vec.R[1], vec.R[2], vec.R[3]);  | 
210  |  |  | 
211  |  | #elif defined(BOTAN_SIMD_USE_NEON)  | 
212  |  |          if(CPUID::is_little_endian()) { | 
213  |  |             vst1q_u8(out, vreinterpretq_u8_u32(m_simd));  | 
214  |  |          } else { | 
215  |  |             vst1q_u8(out, vreinterpretq_u8_u32(bswap().m_simd));  | 
216  |  |          }  | 
217  |  | #endif  | 
218  | 0  |       }  | 
219  |  |  | 
220  |  |       /**  | 
221  |  |       * Load a SIMD register with big-endian convention  | 
222  |  |       */  | 
223  | 0  |       void store_be(uint8_t out[]) const noexcept { | 
224  | 0  | #if defined(BOTAN_SIMD_USE_SSE2)  | 
225  |  | 
  | 
226  | 0  |          bswap().store_le(out);  | 
227  |  | 
  | 
228  |  | #elif defined(BOTAN_SIMD_USE_ALTIVEC)  | 
229  |  |  | 
230  |  |          union { | 
231  |  |                __vector unsigned int V;  | 
232  |  |                uint32_t R[4];  | 
233  |  |          } vec;  | 
234  |  |  | 
235  |  |          vec.V = m_simd;  | 
236  |  |          Botan::store_be(out, vec.R[0], vec.R[1], vec.R[2], vec.R[3]);  | 
237  |  |  | 
238  |  | #elif defined(BOTAN_SIMD_USE_NEON)  | 
239  |  |          if(CPUID::is_little_endian()) { | 
240  |  |             vst1q_u8(out, vreinterpretq_u8_u32(bswap().m_simd));  | 
241  |  |          } else { | 
242  |  |             vst1q_u8(out, vreinterpretq_u8_u32(m_simd));  | 
243  |  |          }  | 
244  |  | #endif  | 
245  | 0  |       }  | 
246  |  |  | 
247  |  |       /*  | 
248  |  |       * This is used for SHA-2/SHACAL2  | 
249  |  |       */  | 
250  | 0  |       SIMD_4x32 sigma0() const noexcept { | 
251  |  | #if BOTAN_COMPILER_HAS_BUILTIN(__builtin_crypto_vshasigmaw) && defined(_ARCH_PWR8)  | 
252  |  |          return SIMD_4x32(__builtin_crypto_vshasigmaw(raw(), 1, 0));  | 
253  |  | #else  | 
254  | 0  |          const SIMD_4x32 rot1 = this->rotr<2>();  | 
255  | 0  |          const SIMD_4x32 rot2 = this->rotr<13>();  | 
256  | 0  |          const SIMD_4x32 rot3 = this->rotr<22>();  | 
257  | 0  |          return (rot1 ^ rot2 ^ rot3);  | 
258  | 0  | #endif  | 
259  | 0  |       }  | 
260  |  |  | 
261  |  |       /*  | 
262  |  |       * This is used for SHA-2/SHACAL2  | 
263  |  |       */  | 
264  | 0  |       SIMD_4x32 sigma1() const noexcept { | 
265  |  | #if BOTAN_COMPILER_HAS_BUILTIN(__builtin_crypto_vshasigmaw) && defined(_ARCH_PWR8)  | 
266  |  |          return SIMD_4x32(__builtin_crypto_vshasigmaw(raw(), 1, 0xF));  | 
267  |  | #else  | 
268  | 0  |          const SIMD_4x32 rot1 = this->rotr<6>();  | 
269  | 0  |          const SIMD_4x32 rot2 = this->rotr<11>();  | 
270  | 0  |          const SIMD_4x32 rot3 = this->rotr<25>();  | 
271  | 0  |          return (rot1 ^ rot2 ^ rot3);  | 
272  | 0  | #endif  | 
273  | 0  |       }  | 
274  |  |  | 
275  |  |       /**  | 
276  |  |       * Left rotation by a compile time constant  | 
277  |  |       */  | 
278  |  |       template <size_t ROT>  | 
279  |  |       SIMD_4x32 rotl() const noexcept  | 
280  |  |          requires(ROT > 0 && ROT < 32)  | 
281  | 0  |       { | 
282  | 0  | #if defined(BOTAN_SIMD_USE_SSE2)  | 
283  |  | 
  | 
284  | 0  |          return SIMD_4x32(_mm_or_si128(_mm_slli_epi32(m_simd, static_cast<int>(ROT)),  | 
285  | 0  |                                        _mm_srli_epi32(m_simd, static_cast<int>(32 - ROT))));  | 
286  |  | 
  | 
287  |  | #elif defined(BOTAN_SIMD_USE_ALTIVEC)  | 
288  |  |  | 
289  |  |          const unsigned int r = static_cast<unsigned int>(ROT);  | 
290  |  |          __vector unsigned int rot = {r, r, r, r}; | 
291  |  |          return SIMD_4x32(vec_rl(m_simd, rot));  | 
292  |  |  | 
293  |  | #elif defined(BOTAN_SIMD_USE_NEON)  | 
294  |  |  | 
295  |  |    #if defined(BOTAN_TARGET_ARCH_IS_ARM64)  | 
296  |  |  | 
297  |  |          if constexpr(ROT == 8) { | 
298  |  |             const uint8_t maskb[16] = {3, 0, 1, 2, 7, 4, 5, 6, 11, 8, 9, 10, 15, 12, 13, 14}; | 
299  |  |             const uint8x16_t mask = vld1q_u8(maskb);  | 
300  |  |             return SIMD_4x32(vreinterpretq_u32_u8(vqtbl1q_u8(vreinterpretq_u8_u32(m_simd), mask)));  | 
301  |  |          } else if constexpr(ROT == 16) { | 
302  |  |             return SIMD_4x32(vreinterpretq_u32_u16(vrev32q_u16(vreinterpretq_u16_u32(m_simd))));  | 
303  |  |          }  | 
304  |  |    #endif  | 
305  |  |          return SIMD_4x32(  | 
306  |  |             vorrq_u32(vshlq_n_u32(m_simd, static_cast<int>(ROT)), vshrq_n_u32(m_simd, static_cast<int>(32 - ROT))));  | 
307  |  | #endif  | 
308  | 0  |       } Unexecuted instantiation: Botan::SIMD_4x32 Botan::SIMD_4x32::rotl<1ul>() const Unexecuted instantiation: Botan::SIMD_4x32 Botan::SIMD_4x32::rotl<2ul>() const Unexecuted instantiation: Botan::SIMD_4x32 Botan::SIMD_4x32::rotl<30ul>() const Unexecuted instantiation: Botan::SIMD_4x32 Botan::SIMD_4x32::rotl<19ul>() const Unexecuted instantiation: Botan::SIMD_4x32 Botan::SIMD_4x32::rotl<10ul>() const Unexecuted instantiation: Botan::SIMD_4x32 Botan::SIMD_4x32::rotl<26ul>() const Unexecuted instantiation: Botan::SIMD_4x32 Botan::SIMD_4x32::rotl<21ul>() const Unexecuted instantiation: Botan::SIMD_4x32 Botan::SIMD_4x32::rotl<7ul>() const Unexecuted instantiation: Botan::SIMD_4x32 Botan::SIMD_4x32::rotl<8ul>() const Unexecuted instantiation: Botan::SIMD_4x32 Botan::SIMD_4x32::rotl<24ul>() const Unexecuted instantiation: Botan::SIMD_4x32 Botan::SIMD_4x32::rotl<5ul>() const Unexecuted instantiation: Botan::SIMD_4x32 Botan::SIMD_4x32::rotl<31ul>() const Unexecuted instantiation: Botan::SIMD_4x32 Botan::SIMD_4x32::rotl<27ul>() const Unexecuted instantiation: Botan::SIMD_4x32 Botan::SIMD_4x32::rotl<13ul>() const Unexecuted instantiation: Botan::SIMD_4x32 Botan::SIMD_4x32::rotl<3ul>() const Unexecuted instantiation: Botan::SIMD_4x32 Botan::SIMD_4x32::rotl<22ul>() const Unexecuted instantiation: Botan::SIMD_4x32 Botan::SIMD_4x32::rotl<25ul>() const Unexecuted instantiation: Botan::SIMD_4x32 Botan::SIMD_4x32::rotl<29ul>() const Unexecuted instantiation: Botan::SIMD_4x32 Botan::SIMD_4x32::rotl<16ul>() const Unexecuted instantiation: Botan::SIMD_4x32 Botan::SIMD_4x32::rotl<12ul>() const  | 
309  |  |  | 
310  |  |       /**  | 
311  |  |       * Right rotation by a compile time constant  | 
312  |  |       */  | 
313  |  |       template <size_t ROT>  | 
314  | 0  |       SIMD_4x32 rotr() const noexcept { | 
315  | 0  |          return this->rotl<32 - ROT>();  | 
316  | 0  |       } Unexecuted instantiation: Botan::SIMD_4x32 Botan::SIMD_4x32::rotr<2ul>() const Unexecuted instantiation: Botan::SIMD_4x32 Botan::SIMD_4x32::rotr<13ul>() const Unexecuted instantiation: Botan::SIMD_4x32 Botan::SIMD_4x32::rotr<22ul>() const Unexecuted instantiation: Botan::SIMD_4x32 Botan::SIMD_4x32::rotr<6ul>() const Unexecuted instantiation: Botan::SIMD_4x32 Botan::SIMD_4x32::rotr<11ul>() const Unexecuted instantiation: Botan::SIMD_4x32 Botan::SIMD_4x32::rotr<25ul>() const Unexecuted instantiation: Botan::SIMD_4x32 Botan::SIMD_4x32::rotr<8ul>() const Unexecuted instantiation: Botan::SIMD_4x32 Botan::SIMD_4x32::rotr<1ul>() const Unexecuted instantiation: Botan::SIMD_4x32 Botan::SIMD_4x32::rotr<5ul>() const Unexecuted instantiation: Botan::SIMD_4x32 Botan::SIMD_4x32::rotr<7ul>() const Unexecuted instantiation: Botan::SIMD_4x32 Botan::SIMD_4x32::rotr<3ul>() const  | 
317  |  |  | 
318  |  |       /**  | 
319  |  |       * Add elements of a SIMD vector  | 
320  |  |       */  | 
321  | 0  |       SIMD_4x32 operator+(const SIMD_4x32& other) const noexcept { | 
322  | 0  |          SIMD_4x32 retval(*this);  | 
323  | 0  |          retval += other;  | 
324  | 0  |          return retval;  | 
325  | 0  |       }  | 
326  |  |  | 
327  |  |       /**  | 
328  |  |       * Subtract elements of a SIMD vector  | 
329  |  |       */  | 
330  | 0  |       SIMD_4x32 operator-(const SIMD_4x32& other) const noexcept { | 
331  | 0  |          SIMD_4x32 retval(*this);  | 
332  | 0  |          retval -= other;  | 
333  | 0  |          return retval;  | 
334  | 0  |       }  | 
335  |  |  | 
336  |  |       /**  | 
337  |  |       * XOR elements of a SIMD vector  | 
338  |  |       */  | 
339  | 0  |       SIMD_4x32 operator^(const SIMD_4x32& other) const noexcept { | 
340  | 0  |          SIMD_4x32 retval(*this);  | 
341  | 0  |          retval ^= other;  | 
342  | 0  |          return retval;  | 
343  | 0  |       }  | 
344  |  |  | 
345  |  |       /**  | 
346  |  |       * Binary OR elements of a SIMD vector  | 
347  |  |       */  | 
348  | 0  |       SIMD_4x32 operator|(const SIMD_4x32& other) const noexcept { | 
349  | 0  |          SIMD_4x32 retval(*this);  | 
350  | 0  |          retval |= other;  | 
351  | 0  |          return retval;  | 
352  | 0  |       }  | 
353  |  |  | 
354  |  |       /**  | 
355  |  |       * Binary AND elements of a SIMD vector  | 
356  |  |       */  | 
357  | 0  |       SIMD_4x32 operator&(const SIMD_4x32& other) const noexcept { | 
358  | 0  |          SIMD_4x32 retval(*this);  | 
359  | 0  |          retval &= other;  | 
360  | 0  |          return retval;  | 
361  | 0  |       }  | 
362  |  |  | 
363  | 0  |       void operator+=(const SIMD_4x32& other) noexcept { | 
364  | 0  | #if defined(BOTAN_SIMD_USE_SSE2)  | 
365  | 0  |          m_simd = _mm_add_epi32(m_simd, other.m_simd);  | 
366  |  | #elif defined(BOTAN_SIMD_USE_ALTIVEC)  | 
367  |  |          m_simd = vec_add(m_simd, other.m_simd);  | 
368  |  | #elif defined(BOTAN_SIMD_USE_NEON)  | 
369  |  |          m_simd = vaddq_u32(m_simd, other.m_simd);  | 
370  |  | #endif  | 
371  | 0  |       }  | 
372  |  |  | 
373  | 0  |       void operator-=(const SIMD_4x32& other) noexcept { | 
374  | 0  | #if defined(BOTAN_SIMD_USE_SSE2)  | 
375  | 0  |          m_simd = _mm_sub_epi32(m_simd, other.m_simd);  | 
376  |  | #elif defined(BOTAN_SIMD_USE_ALTIVEC)  | 
377  |  |          m_simd = vec_sub(m_simd, other.m_simd);  | 
378  |  | #elif defined(BOTAN_SIMD_USE_NEON)  | 
379  |  |          m_simd = vsubq_u32(m_simd, other.m_simd);  | 
380  |  | #endif  | 
381  | 0  |       }  | 
382  |  |  | 
383  | 0  |       void operator^=(const SIMD_4x32& other) noexcept { | 
384  | 0  | #if defined(BOTAN_SIMD_USE_SSE2)  | 
385  | 0  |          m_simd = _mm_xor_si128(m_simd, other.m_simd);  | 
386  |  | #elif defined(BOTAN_SIMD_USE_ALTIVEC)  | 
387  |  |          m_simd = vec_xor(m_simd, other.m_simd);  | 
388  |  | #elif defined(BOTAN_SIMD_USE_NEON)  | 
389  |  |          m_simd = veorq_u32(m_simd, other.m_simd);  | 
390  |  | #endif  | 
391  | 0  |       }  | 
392  |  |  | 
393  | 0  |       void operator^=(uint32_t other) noexcept { *this ^= SIMD_4x32::splat(other); } | 
394  |  |  | 
395  | 0  |       void operator|=(const SIMD_4x32& other) noexcept { | 
396  | 0  | #if defined(BOTAN_SIMD_USE_SSE2)  | 
397  | 0  |          m_simd = _mm_or_si128(m_simd, other.m_simd);  | 
398  |  | #elif defined(BOTAN_SIMD_USE_ALTIVEC)  | 
399  |  |          m_simd = vec_or(m_simd, other.m_simd);  | 
400  |  | #elif defined(BOTAN_SIMD_USE_NEON)  | 
401  |  |          m_simd = vorrq_u32(m_simd, other.m_simd);  | 
402  |  | #endif  | 
403  | 0  |       }  | 
404  |  |  | 
405  | 0  |       void operator&=(const SIMD_4x32& other) noexcept { | 
406  | 0  | #if defined(BOTAN_SIMD_USE_SSE2)  | 
407  | 0  |          m_simd = _mm_and_si128(m_simd, other.m_simd);  | 
408  |  | #elif defined(BOTAN_SIMD_USE_ALTIVEC)  | 
409  |  |          m_simd = vec_and(m_simd, other.m_simd);  | 
410  |  | #elif defined(BOTAN_SIMD_USE_NEON)  | 
411  |  |          m_simd = vandq_u32(m_simd, other.m_simd);  | 
412  |  | #endif  | 
413  | 0  |       }  | 
414  |  |  | 
415  |  |       template <int SHIFT>  | 
416  |  |       SIMD_4x32 shl() const noexcept  | 
417  |  |          requires(SHIFT > 0 && SHIFT < 32)  | 
418  | 0  |       { | 
419  | 0  | #if defined(BOTAN_SIMD_USE_SSE2)  | 
420  | 0  |          return SIMD_4x32(_mm_slli_epi32(m_simd, SHIFT));  | 
421  |  | 
  | 
422  |  | #elif defined(BOTAN_SIMD_USE_ALTIVEC)  | 
423  |  |          const unsigned int s = static_cast<unsigned int>(SHIFT);  | 
424  |  |          const __vector unsigned int shifts = {s, s, s, s}; | 
425  |  |          return SIMD_4x32(vec_sl(m_simd, shifts));  | 
426  |  | #elif defined(BOTAN_SIMD_USE_NEON)  | 
427  |  |          return SIMD_4x32(vshlq_n_u32(m_simd, SHIFT));  | 
428  |  | #endif  | 
429  | 0  |       } Unexecuted instantiation: Botan::SIMD_4x32 Botan::SIMD_4x32::shl<1>() const Unexecuted instantiation: Botan::SIMD_4x32 Botan::SIMD_4x32::shl<31>() const Unexecuted instantiation: Botan::SIMD_4x32 Botan::SIMD_4x32::shl<30>() const Unexecuted instantiation: Botan::SIMD_4x32 Botan::SIMD_4x32::shl<25>() const Unexecuted instantiation: Botan::SIMD_4x32 Botan::SIMD_4x32::shl<3>() const Unexecuted instantiation: Botan::SIMD_4x32 Botan::SIMD_4x32::shl<7>() const  | 
430  |  |  | 
431  |  |       template <int SHIFT>  | 
432  | 0  |       SIMD_4x32 shr() const noexcept { | 
433  | 0  | #if defined(BOTAN_SIMD_USE_SSE2)  | 
434  | 0  |          return SIMD_4x32(_mm_srli_epi32(m_simd, SHIFT));  | 
435  |  | 
  | 
436  |  | #elif defined(BOTAN_SIMD_USE_ALTIVEC)  | 
437  |  |          const unsigned int s = static_cast<unsigned int>(SHIFT);  | 
438  |  |          const __vector unsigned int shifts = {s, s, s, s}; | 
439  |  |          return SIMD_4x32(vec_sr(m_simd, shifts));  | 
440  |  | #elif defined(BOTAN_SIMD_USE_NEON)  | 
441  |  |          return SIMD_4x32(vshrq_n_u32(m_simd, SHIFT));  | 
442  |  | #endif  | 
443  | 0  |       } Unexecuted instantiation: Botan::SIMD_4x32 Botan::SIMD_4x32::shr<31>() const Unexecuted instantiation: Botan::SIMD_4x32 Botan::SIMD_4x32::shr<7>() const Unexecuted instantiation: Botan::SIMD_4x32 Botan::SIMD_4x32::shr<2>() const Unexecuted instantiation: Botan::SIMD_4x32 Botan::SIMD_4x32::shr<1>() const Unexecuted instantiation: Botan::SIMD_4x32 Botan::SIMD_4x32::shr<4>() const  | 
444  |  |  | 
445  | 0  |       SIMD_4x32 operator~() const noexcept { | 
446  | 0  | #if defined(BOTAN_SIMD_USE_SSE2)  | 
447  | 0  |          return SIMD_4x32(_mm_xor_si128(m_simd, _mm_set1_epi32(0xFFFFFFFF)));  | 
448  |  | #elif defined(BOTAN_SIMD_USE_ALTIVEC)  | 
449  |  |          return SIMD_4x32(vec_nor(m_simd, m_simd));  | 
450  |  | #elif defined(BOTAN_SIMD_USE_NEON)  | 
451  |  |          return SIMD_4x32(vmvnq_u32(m_simd));  | 
452  |  | #endif  | 
453  | 0  |       }  | 
454  |  |  | 
455  |  |       // (~reg) & other  | 
456  | 0  |       SIMD_4x32 andc(const SIMD_4x32& other) const noexcept { | 
457  | 0  | #if defined(BOTAN_SIMD_USE_SSE2)  | 
458  | 0  |          return SIMD_4x32(_mm_andnot_si128(m_simd, other.m_simd));  | 
459  |  | #elif defined(BOTAN_SIMD_USE_ALTIVEC)  | 
460  |  |          /*  | 
461  |  |          AltiVec does arg1 & ~arg2 rather than SSE's ~arg1 & arg2  | 
462  |  |          so swap the arguments  | 
463  |  |          */  | 
464  |  |          return SIMD_4x32(vec_andc(other.m_simd, m_simd));  | 
465  |  | #elif defined(BOTAN_SIMD_USE_NEON)  | 
466  |  |          // NEON is also a & ~b  | 
467  |  |          return SIMD_4x32(vbicq_u32(other.m_simd, m_simd));  | 
468  |  | #endif  | 
469  | 0  |       }  | 
470  |  |  | 
471  |  |       /**  | 
472  |  |       * Return copy *this with each word byte swapped  | 
473  |  |       */  | 
474  | 0  |       SIMD_4x32 bswap() const noexcept { | 
475  | 0  | #if defined(BOTAN_SIMD_USE_SSE2)  | 
476  |  | 
  | 
477  | 0  |          __m128i T = m_simd;  | 
478  | 0  |          T = _mm_shufflehi_epi16(T, _MM_SHUFFLE(2, 3, 0, 1));  | 
479  | 0  |          T = _mm_shufflelo_epi16(T, _MM_SHUFFLE(2, 3, 0, 1));  | 
480  | 0  |          return SIMD_4x32(_mm_or_si128(_mm_srli_epi16(T, 8), _mm_slli_epi16(T, 8)));  | 
481  |  | 
  | 
482  |  | #elif defined(BOTAN_SIMD_USE_ALTIVEC)  | 
483  |  |          return SIMD_4x32(vec_revb(m_simd));  | 
484  |  |  | 
485  |  | #elif defined(BOTAN_SIMD_USE_NEON)  | 
486  |  |          return SIMD_4x32(vreinterpretq_u32_u8(vrev32q_u8(vreinterpretq_u8_u32(m_simd))));  | 
487  |  | #endif  | 
488  | 0  |       }  | 
489  |  |  | 
490  |  |       template <size_t I>  | 
491  |  |       SIMD_4x32 shift_elems_left() const noexcept  | 
492  |  |          requires(I <= 3)  | 
493  | 0  |       { | 
494  | 0  | #if defined(BOTAN_SIMD_USE_SSE2)  | 
495  | 0  |          return SIMD_4x32(_mm_slli_si128(raw(), 4 * I));  | 
496  |  | #elif defined(BOTAN_SIMD_USE_NEON)  | 
497  |  |          return SIMD_4x32(vextq_u32(vdupq_n_u32(0), raw(), 4 - I));  | 
498  |  | #elif defined(BOTAN_SIMD_USE_ALTIVEC)  | 
499  |  |          const __vector unsigned int zero = vec_splat_u32(0);  | 
500  |  |  | 
501  |  |          const __vector unsigned char shuf[3] = { | 
502  |  |             {16, 17, 18, 19, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}, | 
503  |  |             {16, 17, 18, 19, 20, 21, 22, 23, 0, 1, 2, 3, 4, 5, 6, 7}, | 
504  |  |             {16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 0, 1, 2, 3}, | 
505  |  |          };  | 
506  |  |  | 
507  |  |          return SIMD_4x32(vec_perm(raw(), zero, shuf[I - 1]));  | 
508  |  | #endif  | 
509  | 0  |       } Unexecuted instantiation: Botan::SIMD_4x32 Botan::SIMD_4x32::shift_elems_left<3ul>() const Unexecuted instantiation: Botan::SIMD_4x32 Botan::SIMD_4x32::shift_elems_left<2ul>() const Unexecuted instantiation: Botan::SIMD_4x32 Botan::SIMD_4x32::shift_elems_left<1ul>() const  | 
510  |  |  | 
511  |  |       template <size_t I>  | 
512  |  |       SIMD_4x32 shift_elems_right() const noexcept  | 
513  |  |          requires(I <= 3)  | 
514  | 0  |       { | 
515  | 0  | #if defined(BOTAN_SIMD_USE_SSE2)  | 
516  | 0  |          return SIMD_4x32(_mm_srli_si128(raw(), 4 * I));  | 
517  |  | #elif defined(BOTAN_SIMD_USE_NEON)  | 
518  |  |          return SIMD_4x32(vextq_u32(raw(), vdupq_n_u32(0), I));  | 
519  |  | #elif defined(BOTAN_SIMD_USE_ALTIVEC)  | 
520  |  |          const __vector unsigned int zero = vec_splat_u32(0);  | 
521  |  |  | 
522  |  |          const __vector unsigned char shuf[3] = { | 
523  |  |             {4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19}, | 
524  |  |             {8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23}, | 
525  |  |             {12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27}, | 
526  |  |          };  | 
527  |  |  | 
528  |  |          return SIMD_4x32(vec_perm(raw(), zero, shuf[I - 1]));  | 
529  |  | #endif  | 
530  | 0  |       } Unexecuted instantiation: Botan::SIMD_4x32 Botan::SIMD_4x32::shift_elems_right<1ul>() const Unexecuted instantiation: Botan::SIMD_4x32 Botan::SIMD_4x32::shift_elems_right<2ul>() const Unexecuted instantiation: Botan::SIMD_4x32 Botan::SIMD_4x32::shift_elems_right<3ul>() const  | 
531  |  |  | 
532  |  |       /**  | 
533  |  |       * 4x4 Transposition on SIMD registers  | 
534  |  |       */  | 
535  | 0  |       static void transpose(SIMD_4x32& B0, SIMD_4x32& B1, SIMD_4x32& B2, SIMD_4x32& B3) noexcept { | 
536  | 0  | #if defined(BOTAN_SIMD_USE_SSE2)  | 
537  | 0  |          const __m128i T0 = _mm_unpacklo_epi32(B0.m_simd, B1.m_simd);  | 
538  | 0  |          const __m128i T1 = _mm_unpacklo_epi32(B2.m_simd, B3.m_simd);  | 
539  | 0  |          const __m128i T2 = _mm_unpackhi_epi32(B0.m_simd, B1.m_simd);  | 
540  | 0  |          const __m128i T3 = _mm_unpackhi_epi32(B2.m_simd, B3.m_simd);  | 
541  |  | 
  | 
542  | 0  |          B0.m_simd = _mm_unpacklo_epi64(T0, T1);  | 
543  | 0  |          B1.m_simd = _mm_unpackhi_epi64(T0, T1);  | 
544  | 0  |          B2.m_simd = _mm_unpacklo_epi64(T2, T3);  | 
545  | 0  |          B3.m_simd = _mm_unpackhi_epi64(T2, T3);  | 
546  |  | #elif defined(BOTAN_SIMD_USE_ALTIVEC)  | 
547  |  |          const __vector unsigned int T0 = vec_mergeh(B0.m_simd, B2.m_simd);  | 
548  |  |          const __vector unsigned int T1 = vec_mergeh(B1.m_simd, B3.m_simd);  | 
549  |  |          const __vector unsigned int T2 = vec_mergel(B0.m_simd, B2.m_simd);  | 
550  |  |          const __vector unsigned int T3 = vec_mergel(B1.m_simd, B3.m_simd);  | 
551  |  |  | 
552  |  |          B0.m_simd = vec_mergeh(T0, T1);  | 
553  |  |          B1.m_simd = vec_mergel(T0, T1);  | 
554  |  |          B2.m_simd = vec_mergeh(T2, T3);  | 
555  |  |          B3.m_simd = vec_mergel(T2, T3);  | 
556  |  |  | 
557  |  | #elif defined(BOTAN_SIMD_USE_NEON) && defined(BOTAN_TARGET_ARCH_IS_ARM32)  | 
558  |  |          const uint32x4x2_t T0 = vzipq_u32(B0.m_simd, B2.m_simd);  | 
559  |  |          const uint32x4x2_t T1 = vzipq_u32(B1.m_simd, B3.m_simd);  | 
560  |  |          const uint32x4x2_t O0 = vzipq_u32(T0.val[0], T1.val[0]);  | 
561  |  |          const uint32x4x2_t O1 = vzipq_u32(T0.val[1], T1.val[1]);  | 
562  |  |  | 
563  |  |          B0.m_simd = O0.val[0];  | 
564  |  |          B1.m_simd = O0.val[1];  | 
565  |  |          B2.m_simd = O1.val[0];  | 
566  |  |          B3.m_simd = O1.val[1];  | 
567  |  |  | 
568  |  | #elif defined(BOTAN_SIMD_USE_NEON) && defined(BOTAN_TARGET_ARCH_IS_ARM64)  | 
569  |  |          const uint32x4_t T0 = vzip1q_u32(B0.m_simd, B2.m_simd);  | 
570  |  |          const uint32x4_t T2 = vzip2q_u32(B0.m_simd, B2.m_simd);  | 
571  |  |          const uint32x4_t T1 = vzip1q_u32(B1.m_simd, B3.m_simd);  | 
572  |  |          const uint32x4_t T3 = vzip2q_u32(B1.m_simd, B3.m_simd);  | 
573  |  |  | 
574  |  |          B0.m_simd = vzip1q_u32(T0, T1);  | 
575  |  |          B1.m_simd = vzip2q_u32(T0, T1);  | 
576  |  |          B2.m_simd = vzip1q_u32(T2, T3);  | 
577  |  |          B3.m_simd = vzip2q_u32(T2, T3);  | 
578  |  | #endif  | 
579  | 0  |       }  | 
580  |  |  | 
581  | 0  |       static inline SIMD_4x32 choose(const SIMD_4x32& mask, const SIMD_4x32& a, const SIMD_4x32& b) noexcept { | 
582  |  | #if defined(BOTAN_SIMD_USE_ALTIVEC)  | 
583  |  |          return SIMD_4x32(vec_sel(b.raw(), a.raw(), mask.raw()));  | 
584  |  | #elif defined(BOTAN_SIMD_USE_NEON)  | 
585  |  |          return SIMD_4x32(vbslq_u32(mask.raw(), a.raw(), b.raw()));  | 
586  |  | #else  | 
587  | 0  |          return (mask & a) ^ mask.andc(b);  | 
588  | 0  | #endif  | 
589  | 0  |       }  | 
590  |  |  | 
591  | 0  |       static inline SIMD_4x32 majority(const SIMD_4x32& x, const SIMD_4x32& y, const SIMD_4x32& z) noexcept { | 
592  | 0  |          return SIMD_4x32::choose(x ^ y, z, y);  | 
593  | 0  |       }  | 
594  |  |  | 
595  | 0  |       native_simd_type raw() const noexcept { return m_simd; } | 
596  |  |  | 
597  | 4  |       explicit SIMD_4x32(native_simd_type x) noexcept : m_simd(x) {} | 
598  |  |  | 
599  |  |    private:  | 
600  |  |       native_simd_type m_simd;  | 
601  |  | };  | 
602  |  |  | 
603  |  | template <size_t R>  | 
604  | 0  | inline SIMD_4x32 rotl(SIMD_4x32 input) { | 
605  | 0  |    return input.rotl<R>();  | 
606  | 0  | } Unexecuted instantiation: Botan::SIMD_4x32 Botan::rotl<13ul>(Botan::SIMD_4x32) Unexecuted instantiation: Botan::SIMD_4x32 Botan::rotl<3ul>(Botan::SIMD_4x32) Unexecuted instantiation: Botan::SIMD_4x32 Botan::rotl<1ul>(Botan::SIMD_4x32) Unexecuted instantiation: Botan::SIMD_4x32 Botan::rotl<7ul>(Botan::SIMD_4x32) Unexecuted instantiation: Botan::SIMD_4x32 Botan::rotl<5ul>(Botan::SIMD_4x32) Unexecuted instantiation: Botan::SIMD_4x32 Botan::rotl<22ul>(Botan::SIMD_4x32)  | 
607  |  |  | 
608  |  | template <size_t R>  | 
609  | 0  | inline SIMD_4x32 rotr(SIMD_4x32 input) { | 
610  | 0  |    return input.rotr<R>();  | 
611  | 0  | } Unexecuted instantiation: Botan::SIMD_4x32 Botan::rotr<22ul>(Botan::SIMD_4x32) Unexecuted instantiation: Botan::SIMD_4x32 Botan::rotr<5ul>(Botan::SIMD_4x32) Unexecuted instantiation: Botan::SIMD_4x32 Botan::rotr<7ul>(Botan::SIMD_4x32) Unexecuted instantiation: Botan::SIMD_4x32 Botan::rotr<1ul>(Botan::SIMD_4x32) Unexecuted instantiation: Botan::SIMD_4x32 Botan::rotr<3ul>(Botan::SIMD_4x32) Unexecuted instantiation: Botan::SIMD_4x32 Botan::rotr<13ul>(Botan::SIMD_4x32)  | 
612  |  |  | 
613  |  | // For Serpent:  | 
614  |  | template <size_t S>  | 
615  | 0  | inline SIMD_4x32 shl(SIMD_4x32 input) { | 
616  | 0  |    return input.shl<S>();  | 
617  | 0  | } Unexecuted instantiation: Botan::SIMD_4x32 Botan::shl<3ul>(Botan::SIMD_4x32) Unexecuted instantiation: Botan::SIMD_4x32 Botan::shl<7ul>(Botan::SIMD_4x32)  | 
618  |  |  | 
619  |  | }  // namespace Botan  | 
620  |  |  | 
621  |  | #endif  |