Coverage Report

Created: 2023-06-07 07:00

/src/botan/build/include/botan/internal/simd_32.h
Line
Count
Source (jump to first uncovered line)
1
/*
2
* Lightweight wrappers for SIMD operations
3
* (C) 2009,2011,2016,2017,2019 Jack Lloyd
4
*
5
* Botan is released under the Simplified BSD License (see license.txt)
6
*/
7
8
#ifndef BOTAN_SIMD_32_H_
9
#define BOTAN_SIMD_32_H_
10
11
#include <botan/types.h>
12
13
#if defined(BOTAN_TARGET_SUPPORTS_SSE2)
14
   #include <emmintrin.h>
15
   #define BOTAN_SIMD_USE_SSE2
16
17
#elif defined(BOTAN_TARGET_SUPPORTS_ALTIVEC)
18
   #include <botan/internal/bswap.h>
19
   #include <botan/internal/loadstor.h>
20
   #include <altivec.h>
21
   #undef vector
22
   #undef bool
23
   #define BOTAN_SIMD_USE_ALTIVEC
24
25
#elif defined(BOTAN_TARGET_SUPPORTS_NEON)
26
   #include <botan/internal/cpuid.h>
27
   #include <arm_neon.h>
28
   #define BOTAN_SIMD_USE_NEON
29
30
#else
31
   #error "No SIMD instruction set enabled"
32
#endif
33
34
#if defined(BOTAN_SIMD_USE_SSE2)
35
   #define BOTAN_SIMD_ISA "sse2"
36
   #define BOTAN_VPERM_ISA "ssse3"
37
   #define BOTAN_CLMUL_ISA "pclmul"
38
#elif defined(BOTAN_SIMD_USE_NEON)
39
   #if defined(BOTAN_TARGET_ARCH_IS_ARM64)
40
      #define BOTAN_SIMD_ISA "+simd"
41
      #define BOTAN_CLMUL_ISA "+crypto"
42
   #else
43
      #define BOTAN_SIMD_ISA "fpu=neon"
44
   #endif
45
   #define BOTAN_VPERM_ISA BOTAN_SIMD_ISA
46
#elif defined(BOTAN_SIMD_USE_ALTIVEC)
47
   #define BOTAN_SIMD_ISA "altivec"
48
   #define BOTAN_VPERM_ISA "altivec"
49
   #define BOTAN_CLMUL_ISA "crypto"
50
#endif
51
52
namespace Botan {
53
54
#if defined(BOTAN_SIMD_USE_SSE2)
55
using native_simd_type = __m128i;
56
#elif defined(BOTAN_SIMD_USE_ALTIVEC)
57
using native_simd_type = __vector unsigned int;
58
#elif defined(BOTAN_SIMD_USE_NEON)
59
using native_simd_type = uint32x4_t;
60
#endif
61
62
/**
63
* 4x32 bit SIMD register
64
*
65
* This class is not a general purpose SIMD type, and only offers
66
* instructions needed for evaluation of specific crypto primitives.
67
* For example it does not currently have equality operators of any
68
* kind.
69
*
70
* Implemented for SSE2, VMX (Altivec), and NEON.
71
*/
72
class SIMD_4x32 final {
73
   public:
74
      SIMD_4x32& operator=(const SIMD_4x32& other) = default;
75
      SIMD_4x32(const SIMD_4x32& other) = default;
76
77
      SIMD_4x32& operator=(SIMD_4x32&& other) = default;
78
      SIMD_4x32(SIMD_4x32&& other) = default;
79
80
      ~SIMD_4x32() = default;
81
82
      /**
83
      * Zero initialize SIMD register with 4 32-bit elements
84
      */
85
      SIMD_4x32() noexcept  // zero initialized
86
0
      {
87
0
#if defined(BOTAN_SIMD_USE_SSE2)
88
0
         m_simd = _mm_setzero_si128();
89
#elif defined(BOTAN_SIMD_USE_ALTIVEC)
90
         m_simd = vec_splat_u32(0);
91
#elif defined(BOTAN_SIMD_USE_NEON)
92
         m_simd = vdupq_n_u32(0);
93
#endif
94
0
      }
95
96
      /**
97
      * Load SIMD register with 4 32-bit elements
98
      */
99
0
      explicit SIMD_4x32(const uint32_t B[4]) noexcept {
100
0
#if defined(BOTAN_SIMD_USE_SSE2)
101
0
         m_simd = _mm_loadu_si128(reinterpret_cast<const __m128i*>(B));
102
#elif defined(BOTAN_SIMD_USE_ALTIVEC)
103
         __vector unsigned int val = {B[0], B[1], B[2], B[3]};
104
         m_simd = val;
105
#elif defined(BOTAN_SIMD_USE_NEON)
106
         m_simd = vld1q_u32(B);
107
#endif
108
0
      }
109
110
      /**
111
      * Load SIMD register with 4 32-bit elements
112
      */
113
96
      SIMD_4x32(uint32_t B0, uint32_t B1, uint32_t B2, uint32_t B3) noexcept {
114
96
#if defined(BOTAN_SIMD_USE_SSE2)
115
96
         m_simd = _mm_set_epi32(B3, B2, B1, B0);
116
#elif defined(BOTAN_SIMD_USE_ALTIVEC)
117
         __vector unsigned int val = {B0, B1, B2, B3};
118
         m_simd = val;
119
#elif defined(BOTAN_SIMD_USE_NEON)
120
         // Better way to do this?
121
         const uint32_t B[4] = {B0, B1, B2, B3};
122
         m_simd = vld1q_u32(B);
123
#endif
124
96
      }
125
126
      /**
127
      * Load SIMD register with one 32-bit element repeated
128
      */
129
0
      static SIMD_4x32 splat(uint32_t B) noexcept {
130
0
#if defined(BOTAN_SIMD_USE_SSE2)
131
0
         return SIMD_4x32(_mm_set1_epi32(B));
132
#elif defined(BOTAN_SIMD_USE_NEON)
133
         return SIMD_4x32(vdupq_n_u32(B));
134
#else
135
         return SIMD_4x32(B, B, B, B);
136
#endif
137
0
      }
138
139
      /**
140
      * Load SIMD register with one 8-bit element repeated
141
      */
142
2
      static SIMD_4x32 splat_u8(uint8_t B) noexcept {
143
2
#if defined(BOTAN_SIMD_USE_SSE2)
144
2
         return SIMD_4x32(_mm_set1_epi8(B));
145
#elif defined(BOTAN_SIMD_USE_NEON)
146
         return SIMD_4x32(vreinterpretq_u32_u8(vdupq_n_u8(B)));
147
#else
148
         const uint32_t B4 = make_uint32(B, B, B, B);
149
         return SIMD_4x32(B4, B4, B4, B4);
150
#endif
151
2
      }
152
153
      /**
154
      * Load a SIMD register with little-endian convention
155
      */
156
0
      static SIMD_4x32 load_le(const void* in) noexcept {
157
0
#if defined(BOTAN_SIMD_USE_SSE2)
158
0
         return SIMD_4x32(_mm_loadu_si128(reinterpret_cast<const __m128i*>(in)));
159
#elif defined(BOTAN_SIMD_USE_ALTIVEC)
160
         uint32_t R[4];
161
         Botan::load_le(R, static_cast<const uint8_t*>(in), 4);
162
         return SIMD_4x32(R);
163
#elif defined(BOTAN_SIMD_USE_NEON)
164
         SIMD_4x32 l(vld1q_u32(static_cast<const uint32_t*>(in)));
165
         return CPUID::is_big_endian() ? l.bswap() : l;
166
#endif
167
0
      }
168
169
      /**
170
      * Load a SIMD register with big-endian convention
171
      */
172
0
      static SIMD_4x32 load_be(const void* in) noexcept {
173
0
#if defined(BOTAN_SIMD_USE_SSE2)
174
0
         return load_le(in).bswap();
175
176
#elif defined(BOTAN_SIMD_USE_ALTIVEC)
177
         uint32_t R[4];
178
         Botan::load_be(R, static_cast<const uint8_t*>(in), 4);
179
         return SIMD_4x32(R);
180
181
#elif defined(BOTAN_SIMD_USE_NEON)
182
         SIMD_4x32 l(vld1q_u32(static_cast<const uint32_t*>(in)));
183
         return CPUID::is_little_endian() ? l.bswap() : l;
184
#endif
185
0
      }
186
187
0
      void store_le(uint32_t out[4]) const noexcept { this->store_le(reinterpret_cast<uint8_t*>(out)); }
188
189
0
      void store_be(uint32_t out[4]) const noexcept { this->store_be(reinterpret_cast<uint8_t*>(out)); }
190
191
0
      void store_le(uint64_t out[2]) const noexcept { this->store_le(reinterpret_cast<uint8_t*>(out)); }
192
193
      /**
194
      * Load a SIMD register with little-endian convention
195
      */
196
0
      void store_le(uint8_t out[]) const noexcept {
197
0
#if defined(BOTAN_SIMD_USE_SSE2)
198
199
0
         _mm_storeu_si128(reinterpret_cast<__m128i*>(out), raw());
200
201
#elif defined(BOTAN_SIMD_USE_ALTIVEC)
202
203
         union {
204
               __vector unsigned int V;
205
               uint32_t R[4];
206
         } vec;
207
208
         vec.V = raw();
209
         Botan::store_le(out, vec.R[0], vec.R[1], vec.R[2], vec.R[3]);
210
211
#elif defined(BOTAN_SIMD_USE_NEON)
212
         if(CPUID::is_little_endian()) {
213
            vst1q_u8(out, vreinterpretq_u8_u32(m_simd));
214
         } else {
215
            vst1q_u8(out, vreinterpretq_u8_u32(bswap().m_simd));
216
         }
217
#endif
218
0
      }
219
220
      /**
221
      * Load a SIMD register with big-endian convention
222
      */
223
0
      void store_be(uint8_t out[]) const noexcept {
224
0
#if defined(BOTAN_SIMD_USE_SSE2)
225
226
0
         bswap().store_le(out);
227
228
#elif defined(BOTAN_SIMD_USE_ALTIVEC)
229
230
         union {
231
               __vector unsigned int V;
232
               uint32_t R[4];
233
         } vec;
234
235
         vec.V = m_simd;
236
         Botan::store_be(out, vec.R[0], vec.R[1], vec.R[2], vec.R[3]);
237
238
#elif defined(BOTAN_SIMD_USE_NEON)
239
         if(CPUID::is_little_endian()) {
240
            vst1q_u8(out, vreinterpretq_u8_u32(bswap().m_simd));
241
         } else {
242
            vst1q_u8(out, vreinterpretq_u8_u32(m_simd));
243
         }
244
#endif
245
0
      }
246
247
      /*
248
      * This is used for SHA-2/SHACAL2
249
      */
250
0
      SIMD_4x32 sigma0() const noexcept {
251
#if BOTAN_COMPILER_HAS_BUILTIN(__builtin_crypto_vshasigmaw) && defined(_ARCH_PWR8)
252
         return SIMD_4x32(__builtin_crypto_vshasigmaw(raw(), 1, 0));
253
#else
254
0
         const SIMD_4x32 rot1 = this->rotr<2>();
255
0
         const SIMD_4x32 rot2 = this->rotr<13>();
256
0
         const SIMD_4x32 rot3 = this->rotr<22>();
257
0
         return (rot1 ^ rot2 ^ rot3);
258
0
#endif
259
0
      }
260
261
      /*
262
      * This is used for SHA-2/SHACAL2
263
      */
264
0
      SIMD_4x32 sigma1() const noexcept {
265
#if BOTAN_COMPILER_HAS_BUILTIN(__builtin_crypto_vshasigmaw) && defined(_ARCH_PWR8)
266
         return SIMD_4x32(__builtin_crypto_vshasigmaw(raw(), 1, 0xF));
267
#else
268
0
         const SIMD_4x32 rot1 = this->rotr<6>();
269
0
         const SIMD_4x32 rot2 = this->rotr<11>();
270
0
         const SIMD_4x32 rot3 = this->rotr<25>();
271
0
         return (rot1 ^ rot2 ^ rot3);
272
0
#endif
273
0
      }
274
275
      /**
276
      * Left rotation by a compile time constant
277
      */
278
      template <size_t ROT>
279
      SIMD_4x32 rotl() const noexcept
280
         requires(ROT > 0 && ROT < 32)
281
0
      {
282
0
#if defined(BOTAN_SIMD_USE_SSE2)
283
284
0
         return SIMD_4x32(_mm_or_si128(_mm_slli_epi32(m_simd, static_cast<int>(ROT)),
285
0
                                       _mm_srli_epi32(m_simd, static_cast<int>(32 - ROT))));
286
287
#elif defined(BOTAN_SIMD_USE_ALTIVEC)
288
289
         const unsigned int r = static_cast<unsigned int>(ROT);
290
         __vector unsigned int rot = {r, r, r, r};
291
         return SIMD_4x32(vec_rl(m_simd, rot));
292
293
#elif defined(BOTAN_SIMD_USE_NEON)
294
295
   #if defined(BOTAN_TARGET_ARCH_IS_ARM64)
296
297
         if constexpr(ROT == 8) {
298
            const uint8_t maskb[16] = {3, 0, 1, 2, 7, 4, 5, 6, 11, 8, 9, 10, 15, 12, 13, 14};
299
            const uint8x16_t mask = vld1q_u8(maskb);
300
            return SIMD_4x32(vreinterpretq_u32_u8(vqtbl1q_u8(vreinterpretq_u8_u32(m_simd), mask)));
301
         } else if constexpr(ROT == 16) {
302
            return SIMD_4x32(vreinterpretq_u32_u16(vrev32q_u16(vreinterpretq_u16_u32(m_simd))));
303
         }
304
   #endif
305
         return SIMD_4x32(
306
            vorrq_u32(vshlq_n_u32(m_simd, static_cast<int>(ROT)), vshrq_n_u32(m_simd, static_cast<int>(32 - ROT))));
307
#endif
308
0
      }
Unexecuted instantiation: Botan::SIMD_4x32 Botan::SIMD_4x32::rotl<8ul>() const
Unexecuted instantiation: Botan::SIMD_4x32 Botan::SIMD_4x32::rotl<24ul>() const
Unexecuted instantiation: Botan::SIMD_4x32 Botan::SIMD_4x32::rotl<30ul>() const
Unexecuted instantiation: Botan::SIMD_4x32 Botan::SIMD_4x32::rotl<1ul>() const
Unexecuted instantiation: Botan::SIMD_4x32 Botan::SIMD_4x32::rotl<5ul>() const
Unexecuted instantiation: Botan::SIMD_4x32 Botan::SIMD_4x32::rotl<2ul>() const
Unexecuted instantiation: Botan::SIMD_4x32 Botan::SIMD_4x32::rotl<31ul>() const
Unexecuted instantiation: Botan::SIMD_4x32 Botan::SIMD_4x32::rotl<27ul>() const
Unexecuted instantiation: Botan::SIMD_4x32 Botan::SIMD_4x32::rotl<19ul>() const
Unexecuted instantiation: Botan::SIMD_4x32 Botan::SIMD_4x32::rotl<10ul>() const
Unexecuted instantiation: Botan::SIMD_4x32 Botan::SIMD_4x32::rotl<26ul>() const
Unexecuted instantiation: Botan::SIMD_4x32 Botan::SIMD_4x32::rotl<21ul>() const
Unexecuted instantiation: Botan::SIMD_4x32 Botan::SIMD_4x32::rotl<7ul>() const
Unexecuted instantiation: Botan::SIMD_4x32 Botan::SIMD_4x32::rotl<13ul>() const
Unexecuted instantiation: Botan::SIMD_4x32 Botan::SIMD_4x32::rotl<3ul>() const
Unexecuted instantiation: Botan::SIMD_4x32 Botan::SIMD_4x32::rotl<22ul>() const
Unexecuted instantiation: Botan::SIMD_4x32 Botan::SIMD_4x32::rotl<25ul>() const
Unexecuted instantiation: Botan::SIMD_4x32 Botan::SIMD_4x32::rotl<29ul>() const
Unexecuted instantiation: Botan::SIMD_4x32 Botan::SIMD_4x32::rotl<16ul>() const
Unexecuted instantiation: Botan::SIMD_4x32 Botan::SIMD_4x32::rotl<12ul>() const
309
310
      /**
311
      * Right rotation by a compile time constant
312
      */
313
      template <size_t ROT>
314
0
      SIMD_4x32 rotr() const noexcept {
315
0
         return this->rotl<32 - ROT>();
316
0
      }
Unexecuted instantiation: Botan::SIMD_4x32 Botan::SIMD_4x32::rotr<8ul>() const
Unexecuted instantiation: Botan::SIMD_4x32 Botan::SIMD_4x32::rotr<2ul>() const
Unexecuted instantiation: Botan::SIMD_4x32 Botan::SIMD_4x32::rotr<1ul>() const
Unexecuted instantiation: Botan::SIMD_4x32 Botan::SIMD_4x32::rotr<5ul>() const
Unexecuted instantiation: Botan::SIMD_4x32 Botan::SIMD_4x32::rotr<13ul>() const
Unexecuted instantiation: Botan::SIMD_4x32 Botan::SIMD_4x32::rotr<22ul>() const
Unexecuted instantiation: Botan::SIMD_4x32 Botan::SIMD_4x32::rotr<6ul>() const
Unexecuted instantiation: Botan::SIMD_4x32 Botan::SIMD_4x32::rotr<11ul>() const
Unexecuted instantiation: Botan::SIMD_4x32 Botan::SIMD_4x32::rotr<25ul>() const
Unexecuted instantiation: Botan::SIMD_4x32 Botan::SIMD_4x32::rotr<7ul>() const
Unexecuted instantiation: Botan::SIMD_4x32 Botan::SIMD_4x32::rotr<3ul>() const
317
318
      /**
319
      * Add elements of a SIMD vector
320
      */
321
0
      SIMD_4x32 operator+(const SIMD_4x32& other) const noexcept {
322
0
         SIMD_4x32 retval(*this);
323
0
         retval += other;
324
0
         return retval;
325
0
      }
326
327
      /**
328
      * Subtract elements of a SIMD vector
329
      */
330
0
      SIMD_4x32 operator-(const SIMD_4x32& other) const noexcept {
331
0
         SIMD_4x32 retval(*this);
332
0
         retval -= other;
333
0
         return retval;
334
0
      }
335
336
      /**
337
      * XOR elements of a SIMD vector
338
      */
339
0
      SIMD_4x32 operator^(const SIMD_4x32& other) const noexcept {
340
0
         SIMD_4x32 retval(*this);
341
0
         retval ^= other;
342
0
         return retval;
343
0
      }
344
345
      /**
346
      * Binary OR elements of a SIMD vector
347
      */
348
0
      SIMD_4x32 operator|(const SIMD_4x32& other) const noexcept {
349
0
         SIMD_4x32 retval(*this);
350
0
         retval |= other;
351
0
         return retval;
352
0
      }
353
354
      /**
355
      * Binary AND elements of a SIMD vector
356
      */
357
0
      SIMD_4x32 operator&(const SIMD_4x32& other) const noexcept {
358
0
         SIMD_4x32 retval(*this);
359
0
         retval &= other;
360
0
         return retval;
361
0
      }
362
363
0
      void operator+=(const SIMD_4x32& other) noexcept {
364
0
#if defined(BOTAN_SIMD_USE_SSE2)
365
0
         m_simd = _mm_add_epi32(m_simd, other.m_simd);
366
#elif defined(BOTAN_SIMD_USE_ALTIVEC)
367
         m_simd = vec_add(m_simd, other.m_simd);
368
#elif defined(BOTAN_SIMD_USE_NEON)
369
         m_simd = vaddq_u32(m_simd, other.m_simd);
370
#endif
371
0
      }
372
373
0
      void operator-=(const SIMD_4x32& other) noexcept {
374
0
#if defined(BOTAN_SIMD_USE_SSE2)
375
0
         m_simd = _mm_sub_epi32(m_simd, other.m_simd);
376
#elif defined(BOTAN_SIMD_USE_ALTIVEC)
377
         m_simd = vec_sub(m_simd, other.m_simd);
378
#elif defined(BOTAN_SIMD_USE_NEON)
379
         m_simd = vsubq_u32(m_simd, other.m_simd);
380
#endif
381
0
      }
382
383
0
      void operator^=(const SIMD_4x32& other) noexcept {
384
0
#if defined(BOTAN_SIMD_USE_SSE2)
385
0
         m_simd = _mm_xor_si128(m_simd, other.m_simd);
386
#elif defined(BOTAN_SIMD_USE_ALTIVEC)
387
         m_simd = vec_xor(m_simd, other.m_simd);
388
#elif defined(BOTAN_SIMD_USE_NEON)
389
         m_simd = veorq_u32(m_simd, other.m_simd);
390
#endif
391
0
      }
392
393
0
      void operator^=(uint32_t other) noexcept { *this ^= SIMD_4x32::splat(other); }
394
395
0
      void operator|=(const SIMD_4x32& other) noexcept {
396
0
#if defined(BOTAN_SIMD_USE_SSE2)
397
0
         m_simd = _mm_or_si128(m_simd, other.m_simd);
398
#elif defined(BOTAN_SIMD_USE_ALTIVEC)
399
         m_simd = vec_or(m_simd, other.m_simd);
400
#elif defined(BOTAN_SIMD_USE_NEON)
401
         m_simd = vorrq_u32(m_simd, other.m_simd);
402
#endif
403
0
      }
404
405
0
      void operator&=(const SIMD_4x32& other) noexcept {
406
0
#if defined(BOTAN_SIMD_USE_SSE2)
407
0
         m_simd = _mm_and_si128(m_simd, other.m_simd);
408
#elif defined(BOTAN_SIMD_USE_ALTIVEC)
409
         m_simd = vec_and(m_simd, other.m_simd);
410
#elif defined(BOTAN_SIMD_USE_NEON)
411
         m_simd = vandq_u32(m_simd, other.m_simd);
412
#endif
413
0
      }
414
415
      template <int SHIFT>
416
      SIMD_4x32 shl() const noexcept
417
         requires(SHIFT > 0 && SHIFT < 32)
418
0
      {
419
0
#if defined(BOTAN_SIMD_USE_SSE2)
420
0
         return SIMD_4x32(_mm_slli_epi32(m_simd, SHIFT));
421
422
#elif defined(BOTAN_SIMD_USE_ALTIVEC)
423
         const unsigned int s = static_cast<unsigned int>(SHIFT);
424
         const __vector unsigned int shifts = {s, s, s, s};
425
         return SIMD_4x32(vec_sl(m_simd, shifts));
426
#elif defined(BOTAN_SIMD_USE_NEON)
427
         return SIMD_4x32(vshlq_n_u32(m_simd, SHIFT));
428
#endif
429
0
      }
Unexecuted instantiation: Botan::SIMD_4x32 Botan::SIMD_4x32::shl<3>() const
Unexecuted instantiation: Botan::SIMD_4x32 Botan::SIMD_4x32::shl<7>() const
Unexecuted instantiation: Botan::SIMD_4x32 Botan::SIMD_4x32::shl<1>() const
Unexecuted instantiation: Botan::SIMD_4x32 Botan::SIMD_4x32::shl<31>() const
Unexecuted instantiation: Botan::SIMD_4x32 Botan::SIMD_4x32::shl<30>() const
Unexecuted instantiation: Botan::SIMD_4x32 Botan::SIMD_4x32::shl<25>() const
430
431
      template <int SHIFT>
432
0
      SIMD_4x32 shr() const noexcept {
433
0
#if defined(BOTAN_SIMD_USE_SSE2)
434
0
         return SIMD_4x32(_mm_srli_epi32(m_simd, SHIFT));
435
436
#elif defined(BOTAN_SIMD_USE_ALTIVEC)
437
         const unsigned int s = static_cast<unsigned int>(SHIFT);
438
         const __vector unsigned int shifts = {s, s, s, s};
439
         return SIMD_4x32(vec_sr(m_simd, shifts));
440
#elif defined(BOTAN_SIMD_USE_NEON)
441
         return SIMD_4x32(vshrq_n_u32(m_simd, SHIFT));
442
#endif
443
0
      }
Unexecuted instantiation: Botan::SIMD_4x32 Botan::SIMD_4x32::shr<4>() const
Unexecuted instantiation: Botan::SIMD_4x32 Botan::SIMD_4x32::shr<31>() const
Unexecuted instantiation: Botan::SIMD_4x32 Botan::SIMD_4x32::shr<7>() const
Unexecuted instantiation: Botan::SIMD_4x32 Botan::SIMD_4x32::shr<2>() const
Unexecuted instantiation: Botan::SIMD_4x32 Botan::SIMD_4x32::shr<1>() const
444
445
0
      SIMD_4x32 operator~() const noexcept {
446
0
#if defined(BOTAN_SIMD_USE_SSE2)
447
0
         return SIMD_4x32(_mm_xor_si128(m_simd, _mm_set1_epi32(0xFFFFFFFF)));
448
#elif defined(BOTAN_SIMD_USE_ALTIVEC)
449
         return SIMD_4x32(vec_nor(m_simd, m_simd));
450
#elif defined(BOTAN_SIMD_USE_NEON)
451
         return SIMD_4x32(vmvnq_u32(m_simd));
452
#endif
453
0
      }
454
455
      // (~reg) & other
456
0
      SIMD_4x32 andc(const SIMD_4x32& other) const noexcept {
457
0
#if defined(BOTAN_SIMD_USE_SSE2)
458
0
         return SIMD_4x32(_mm_andnot_si128(m_simd, other.m_simd));
459
#elif defined(BOTAN_SIMD_USE_ALTIVEC)
460
         /*
461
         AltiVec does arg1 & ~arg2 rather than SSE's ~arg1 & arg2
462
         so swap the arguments
463
         */
464
         return SIMD_4x32(vec_andc(other.m_simd, m_simd));
465
#elif defined(BOTAN_SIMD_USE_NEON)
466
         // NEON is also a & ~b
467
         return SIMD_4x32(vbicq_u32(other.m_simd, m_simd));
468
#endif
469
0
      }
470
471
      /**
472
      * Return copy *this with each word byte swapped
473
      */
474
0
      SIMD_4x32 bswap() const noexcept {
475
0
#if defined(BOTAN_SIMD_USE_SSE2)
476
477
0
         __m128i T = m_simd;
478
0
         T = _mm_shufflehi_epi16(T, _MM_SHUFFLE(2, 3, 0, 1));
479
0
         T = _mm_shufflelo_epi16(T, _MM_SHUFFLE(2, 3, 0, 1));
480
0
         return SIMD_4x32(_mm_or_si128(_mm_srli_epi16(T, 8), _mm_slli_epi16(T, 8)));
481
482
#elif defined(BOTAN_SIMD_USE_ALTIVEC)
483
484
         union {
485
               __vector unsigned int V;
486
               uint32_t R[4];
487
         } vec;
488
489
         vec.V = m_simd;
490
         bswap_4(vec.R);
491
         return SIMD_4x32(vec.R[0], vec.R[1], vec.R[2], vec.R[3]);
492
493
#elif defined(BOTAN_SIMD_USE_NEON)
494
         return SIMD_4x32(vreinterpretq_u32_u8(vrev32q_u8(vreinterpretq_u8_u32(m_simd))));
495
#endif
496
0
      }
497
498
      template <size_t I>
499
      SIMD_4x32 shift_elems_left() const noexcept
500
         requires(I <= 3)
501
0
      {
502
0
#if defined(BOTAN_SIMD_USE_SSE2)
503
0
         return SIMD_4x32(_mm_slli_si128(raw(), 4 * I));
504
#elif defined(BOTAN_SIMD_USE_NEON)
505
         return SIMD_4x32(vextq_u32(vdupq_n_u32(0), raw(), 4 - I));
506
#elif defined(BOTAN_SIMD_USE_ALTIVEC)
507
         const __vector unsigned int zero = vec_splat_u32(0);
508
509
         const __vector unsigned char shuf[3] = {
510
            {16, 17, 18, 19, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11},
511
            {16, 17, 18, 19, 20, 21, 22, 23, 0, 1, 2, 3, 4, 5, 6, 7},
512
            {16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 0, 1, 2, 3},
513
         };
514
515
         return SIMD_4x32(vec_perm(raw(), zero, shuf[I - 1]));
516
#endif
517
0
      }
Unexecuted instantiation: Botan::SIMD_4x32 Botan::SIMD_4x32::shift_elems_left<3ul>() const
Unexecuted instantiation: Botan::SIMD_4x32 Botan::SIMD_4x32::shift_elems_left<1ul>() const
Unexecuted instantiation: Botan::SIMD_4x32 Botan::SIMD_4x32::shift_elems_left<2ul>() const
518
519
      template <size_t I>
520
      SIMD_4x32 shift_elems_right() const noexcept
521
         requires(I <= 3)
522
0
      {
523
0
#if defined(BOTAN_SIMD_USE_SSE2)
524
0
         return SIMD_4x32(_mm_srli_si128(raw(), 4 * I));
525
#elif defined(BOTAN_SIMD_USE_NEON)
526
         return SIMD_4x32(vextq_u32(raw(), vdupq_n_u32(0), I));
527
#elif defined(BOTAN_SIMD_USE_ALTIVEC)
528
         const __vector unsigned int zero = vec_splat_u32(0);
529
530
         const __vector unsigned char shuf[3] = {
531
            {4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19},
532
            {8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23},
533
            {12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27},
534
         };
535
536
         return SIMD_4x32(vec_perm(raw(), zero, shuf[I - 1]));
537
#endif
538
0
      }
Unexecuted instantiation: Botan::SIMD_4x32 Botan::SIMD_4x32::shift_elems_right<1ul>() const
Unexecuted instantiation: Botan::SIMD_4x32 Botan::SIMD_4x32::shift_elems_right<2ul>() const
Unexecuted instantiation: Botan::SIMD_4x32 Botan::SIMD_4x32::shift_elems_right<3ul>() const
539
540
      /**
541
      * 4x4 Transposition on SIMD registers
542
      */
543
0
      static void transpose(SIMD_4x32& B0, SIMD_4x32& B1, SIMD_4x32& B2, SIMD_4x32& B3) noexcept {
544
0
#if defined(BOTAN_SIMD_USE_SSE2)
545
0
         const __m128i T0 = _mm_unpacklo_epi32(B0.m_simd, B1.m_simd);
546
0
         const __m128i T1 = _mm_unpacklo_epi32(B2.m_simd, B3.m_simd);
547
0
         const __m128i T2 = _mm_unpackhi_epi32(B0.m_simd, B1.m_simd);
548
0
         const __m128i T3 = _mm_unpackhi_epi32(B2.m_simd, B3.m_simd);
549
550
0
         B0.m_simd = _mm_unpacklo_epi64(T0, T1);
551
0
         B1.m_simd = _mm_unpackhi_epi64(T0, T1);
552
0
         B2.m_simd = _mm_unpacklo_epi64(T2, T3);
553
0
         B3.m_simd = _mm_unpackhi_epi64(T2, T3);
554
#elif defined(BOTAN_SIMD_USE_ALTIVEC)
555
         const __vector unsigned int T0 = vec_mergeh(B0.m_simd, B2.m_simd);
556
         const __vector unsigned int T1 = vec_mergeh(B1.m_simd, B3.m_simd);
557
         const __vector unsigned int T2 = vec_mergel(B0.m_simd, B2.m_simd);
558
         const __vector unsigned int T3 = vec_mergel(B1.m_simd, B3.m_simd);
559
560
         B0.m_simd = vec_mergeh(T0, T1);
561
         B1.m_simd = vec_mergel(T0, T1);
562
         B2.m_simd = vec_mergeh(T2, T3);
563
         B3.m_simd = vec_mergel(T2, T3);
564
565
#elif defined(BOTAN_SIMD_USE_NEON) && defined(BOTAN_TARGET_ARCH_IS_ARM32)
566
         const uint32x4x2_t T0 = vzipq_u32(B0.m_simd, B2.m_simd);
567
         const uint32x4x2_t T1 = vzipq_u32(B1.m_simd, B3.m_simd);
568
         const uint32x4x2_t O0 = vzipq_u32(T0.val[0], T1.val[0]);
569
         const uint32x4x2_t O1 = vzipq_u32(T0.val[1], T1.val[1]);
570
571
         B0.m_simd = O0.val[0];
572
         B1.m_simd = O0.val[1];
573
         B2.m_simd = O1.val[0];
574
         B3.m_simd = O1.val[1];
575
576
#elif defined(BOTAN_SIMD_USE_NEON) && defined(BOTAN_TARGET_ARCH_IS_ARM64)
577
         const uint32x4_t T0 = vzip1q_u32(B0.m_simd, B2.m_simd);
578
         const uint32x4_t T2 = vzip2q_u32(B0.m_simd, B2.m_simd);
579
         const uint32x4_t T1 = vzip1q_u32(B1.m_simd, B3.m_simd);
580
         const uint32x4_t T3 = vzip2q_u32(B1.m_simd, B3.m_simd);
581
582
         B0.m_simd = vzip1q_u32(T0, T1);
583
         B1.m_simd = vzip2q_u32(T0, T1);
584
         B2.m_simd = vzip1q_u32(T2, T3);
585
         B3.m_simd = vzip2q_u32(T2, T3);
586
#endif
587
0
      }
588
589
0
      static inline SIMD_4x32 choose(const SIMD_4x32& mask, const SIMD_4x32& a, const SIMD_4x32& b) noexcept {
590
#if defined(BOTAN_SIMD_USE_ALTIVEC)
591
         return SIMD_4x32(vec_sel(b.raw(), a.raw(), mask.raw()));
592
#elif defined(BOTAN_SIMD_USE_NEON)
593
         return SIMD_4x32(vbslq_u32(mask.raw(), a.raw(), b.raw()));
594
#else
595
0
         return (mask & a) ^ mask.andc(b);
596
0
#endif
597
0
      }
598
599
0
      static inline SIMD_4x32 majority(const SIMD_4x32& x, const SIMD_4x32& y, const SIMD_4x32& z) noexcept {
600
0
         return SIMD_4x32::choose(x ^ y, z, y);
601
0
      }
602
603
0
      native_simd_type raw() const noexcept { return m_simd; }
604
605
2
      explicit SIMD_4x32(native_simd_type x) noexcept : m_simd(x) {}
606
607
   private:
608
      native_simd_type m_simd;
609
};
610
611
template <size_t R>
612
0
inline SIMD_4x32 rotl(SIMD_4x32 input) {
613
0
   return input.rotl<R>();
614
0
}
Unexecuted instantiation: Botan::SIMD_4x32 Botan::rotl<13ul>(Botan::SIMD_4x32)
Unexecuted instantiation: Botan::SIMD_4x32 Botan::rotl<3ul>(Botan::SIMD_4x32)
Unexecuted instantiation: Botan::SIMD_4x32 Botan::rotl<1ul>(Botan::SIMD_4x32)
Unexecuted instantiation: Botan::SIMD_4x32 Botan::rotl<7ul>(Botan::SIMD_4x32)
Unexecuted instantiation: Botan::SIMD_4x32 Botan::rotl<5ul>(Botan::SIMD_4x32)
Unexecuted instantiation: Botan::SIMD_4x32 Botan::rotl<22ul>(Botan::SIMD_4x32)
615
616
template <size_t R>
617
0
inline SIMD_4x32 rotr(SIMD_4x32 input) {
618
0
   return input.rotr<R>();
619
0
}
Unexecuted instantiation: Botan::SIMD_4x32 Botan::rotr<22ul>(Botan::SIMD_4x32)
Unexecuted instantiation: Botan::SIMD_4x32 Botan::rotr<5ul>(Botan::SIMD_4x32)
Unexecuted instantiation: Botan::SIMD_4x32 Botan::rotr<7ul>(Botan::SIMD_4x32)
Unexecuted instantiation: Botan::SIMD_4x32 Botan::rotr<1ul>(Botan::SIMD_4x32)
Unexecuted instantiation: Botan::SIMD_4x32 Botan::rotr<3ul>(Botan::SIMD_4x32)
Unexecuted instantiation: Botan::SIMD_4x32 Botan::rotr<13ul>(Botan::SIMD_4x32)
620
621
// For Serpent:
622
template <size_t S>
623
0
inline SIMD_4x32 shl(SIMD_4x32 input) {
624
0
   return input.shl<S>();
625
0
}
Unexecuted instantiation: Botan::SIMD_4x32 Botan::shl<3ul>(Botan::SIMD_4x32)
Unexecuted instantiation: Botan::SIMD_4x32 Botan::shl<7ul>(Botan::SIMD_4x32)
626
627
}  // namespace Botan
628
629
#endif