Coverage Report

Created: 2021-01-13 07:05

/src/botan/build/include/botan/internal/simd_32.h
Line
Count
Source (jump to first uncovered line)
1
/*
2
* Lightweight wrappers for SIMD operations
3
* (C) 2009,2011,2016,2017,2019 Jack Lloyd
4
*
5
* Botan is released under the Simplified BSD License (see license.txt)
6
*/
7
8
#ifndef BOTAN_SIMD_32_H_
9
#define BOTAN_SIMD_32_H_
10
11
#include <botan/types.h>
12
13
#if defined(BOTAN_TARGET_SUPPORTS_SSE2)
14
  #include <emmintrin.h>
15
  #define BOTAN_SIMD_USE_SSE2
16
17
#elif defined(BOTAN_TARGET_SUPPORTS_ALTIVEC)
18
  #include <botan/internal/bswap.h>
19
  #include <botan/internal/loadstor.h>
20
  #include <altivec.h>
21
  #undef vector
22
  #undef bool
23
  #define BOTAN_SIMD_USE_ALTIVEC
24
25
#elif defined(BOTAN_TARGET_SUPPORTS_NEON)
26
  #include <botan/internal/cpuid.h>
27
  #include <arm_neon.h>
28
  #define BOTAN_SIMD_USE_NEON
29
30
#else
31
  #error "No SIMD instruction set enabled"
32
#endif
33
34
#if defined(BOTAN_SIMD_USE_SSE2)
35
  #define BOTAN_SIMD_ISA "sse2"
36
  #define BOTAN_VPERM_ISA "ssse3"
37
  #define BOTAN_CLMUL_ISA "pclmul"
38
#elif defined(BOTAN_SIMD_USE_NEON)
39
  #if defined(BOTAN_TARGET_ARCH_IS_ARM64)
40
    #define BOTAN_SIMD_ISA "+simd"
41
    #define BOTAN_CLMUL_ISA "+crypto"
42
  #else
43
    #define BOTAN_SIMD_ISA "fpu=neon"
44
  #endif
45
  #define BOTAN_VPERM_ISA BOTAN_SIMD_ISA
46
#elif defined(BOTAN_SIMD_USE_ALTIVEC)
47
  #define BOTAN_SIMD_ISA "altivec"
48
  #define BOTAN_VPERM_ISA "altivec"
49
  #define BOTAN_CLMUL_ISA "crypto"
50
#endif
51
52
namespace Botan {
53
54
#if defined(BOTAN_SIMD_USE_SSE2)
55
   typedef __m128i native_simd_type;
56
#elif defined(BOTAN_SIMD_USE_ALTIVEC)
57
   typedef __vector unsigned int native_simd_type;
58
#elif defined(BOTAN_SIMD_USE_NEON)
59
   typedef uint32x4_t native_simd_type;
60
#endif
61
62
/**
63
* 4x32 bit SIMD register
64
*
65
* This class is not a general purpose SIMD type, and only offers
66
* instructions needed for evaluation of specific crypto primitives.
67
* For example it does not currently have equality operators of any
68
* kind.
69
*
70
* Implemented for SSE2, VMX (Altivec), and NEON.
71
*/
72
class SIMD_4x32 final
73
   {
74
   public:
75
76
      SIMD_4x32& operator=(const SIMD_4x32& other) = default;
77
      SIMD_4x32(const SIMD_4x32& other) = default;
78
79
      SIMD_4x32& operator=(SIMD_4x32&& other) = default;
80
      SIMD_4x32(SIMD_4x32&& other) = default;
81
82
      /**
83
      * Zero initialize SIMD register with 4 32-bit elements
84
      */
85
      SIMD_4x32() // zero initialized
86
5.70k
         {
87
5.70k
#if defined(BOTAN_SIMD_USE_SSE2)
88
5.70k
         m_simd = _mm_setzero_si128();
89
#elif defined(BOTAN_SIMD_USE_ALTIVEC)
90
         m_simd = vec_splat_u32(0);
91
#elif defined(BOTAN_SIMD_USE_NEON)
92
         m_simd = vdupq_n_u32(0);
93
#endif
94
5.70k
         }
95
96
      /**
97
      * Load SIMD register with 4 32-bit elements
98
      */
99
      explicit SIMD_4x32(const uint32_t B[4])
100
0
         {
101
0
#if defined(BOTAN_SIMD_USE_SSE2)
102
0
         m_simd = _mm_loadu_si128(reinterpret_cast<const __m128i*>(B));
103
#elif defined(BOTAN_SIMD_USE_ALTIVEC)
104
         __vector unsigned int val = { B[0], B[1], B[2], B[3]};
105
         m_simd = val;
106
#elif defined(BOTAN_SIMD_USE_NEON)
107
         m_simd = vld1q_u32(B);
108
#endif
109
0
         }
110
111
      /**
112
      * Load SIMD register with 4 32-bit elements
113
      */
114
      SIMD_4x32(uint32_t B0, uint32_t B1, uint32_t B2, uint32_t B3)
115
1.15k
         {
116
1.15k
#if defined(BOTAN_SIMD_USE_SSE2)
117
1.15k
         m_simd = _mm_set_epi32(B3, B2, B1, B0);
118
#elif defined(BOTAN_SIMD_USE_ALTIVEC)
119
         __vector unsigned int val = {B0, B1, B2, B3};
120
         m_simd = val;
121
#elif defined(BOTAN_SIMD_USE_NEON)
122
         // Better way to do this?
123
         const uint32_t B[4] = { B0, B1, B2, B3 };
124
         m_simd = vld1q_u32(B);
125
#endif
126
1.15k
         }
127
128
      /**
129
      * Load SIMD register with one 32-bit element repeated
130
      */
131
      static SIMD_4x32 splat(uint32_t B)
132
0
         {
133
0
#if defined(BOTAN_SIMD_USE_SSE2)
134
0
         return SIMD_4x32(_mm_set1_epi32(B));
135
#elif defined(BOTAN_SIMD_USE_NEON)
136
         return SIMD_4x32(vdupq_n_u32(B));
137
#else
138
         return SIMD_4x32(B, B, B, B);
139
#endif
140
0
         }
141
142
      /**
143
      * Load SIMD register with one 8-bit element repeated
144
      */
145
      static SIMD_4x32 splat_u8(uint8_t B)
146
24
         {
147
24
#if defined(BOTAN_SIMD_USE_SSE2)
148
24
         return SIMD_4x32(_mm_set1_epi8(B));
149
#elif defined(BOTAN_SIMD_USE_NEON)
150
         return SIMD_4x32(vreinterpretq_u32_u8(vdupq_n_u8(B)));
151
#else
152
         const uint32_t B4 = make_uint32(B, B, B, B);
153
         return SIMD_4x32(B4, B4, B4, B4);
154
#endif
155
24
         }
156
157
      /**
158
      * Load a SIMD register with little-endian convention
159
      */
160
      static SIMD_4x32 load_le(const void* in)
161
32.4k
         {
162
32.4k
#if defined(BOTAN_SIMD_USE_SSE2)
163
32.4k
         return SIMD_4x32(_mm_loadu_si128(reinterpret_cast<const __m128i*>(in)));
164
#elif defined(BOTAN_SIMD_USE_ALTIVEC)
165
         uint32_t R[4];
166
         Botan::load_le(R, static_cast<const uint8_t*>(in), 4);
167
         return SIMD_4x32(R);
168
#elif defined(BOTAN_SIMD_USE_NEON)
169
         SIMD_4x32 l(vld1q_u32(static_cast<const uint32_t*>(in)));
170
         return CPUID::is_big_endian() ? l.bswap() : l;
171
#endif
172
32.4k
         }
173
174
      /**
175
      * Load a SIMD register with big-endian convention
176
      */
177
      static SIMD_4x32 load_be(const void* in)
178
0
         {
179
0
#if defined(BOTAN_SIMD_USE_SSE2)
180
0
         return load_le(in).bswap();
181
182
#elif defined(BOTAN_SIMD_USE_ALTIVEC)
183
         uint32_t R[4];
184
         Botan::load_be(R, static_cast<const uint8_t*>(in), 4);
185
         return SIMD_4x32(R);
186
187
#elif defined(BOTAN_SIMD_USE_NEON)
188
         SIMD_4x32 l(vld1q_u32(static_cast<const uint32_t*>(in)));
189
         return CPUID::is_little_endian() ? l.bswap() : l;
190
#endif
191
0
         }
192
193
      void store_le(uint32_t out[4]) const
194
0
         {
195
0
         this->store_le(reinterpret_cast<uint8_t*>(out));
196
0
         }
197
198
      void store_le(uint64_t out[2]) const
199
2.62k
         {
200
2.62k
         this->store_le(reinterpret_cast<uint8_t*>(out));
201
2.62k
         }
202
203
      /**
204
      * Load a SIMD register with little-endian convention
205
      */
206
      void store_le(uint8_t out[]) const
207
5.28k
         {
208
5.28k
#if defined(BOTAN_SIMD_USE_SSE2)
209
210
5.28k
         _mm_storeu_si128(reinterpret_cast<__m128i*>(out), raw());
211
212
#elif defined(BOTAN_SIMD_USE_ALTIVEC)
213
214
         union {
215
            __vector unsigned int V;
216
            uint32_t R[4];
217
            } vec;
218
         vec.V = raw();
219
         Botan::store_le(out, vec.R[0], vec.R[1], vec.R[2], vec.R[3]);
220
221
#elif defined(BOTAN_SIMD_USE_NEON)
222
         if(CPUID::is_little_endian())
223
            {
224
            vst1q_u8(out, vreinterpretq_u8_u32(m_simd));
225
            }
226
         else
227
            {
228
            vst1q_u8(out, vreinterpretq_u8_u32(bswap().m_simd));
229
            }
230
#endif
231
5.28k
         }
232
233
      /**
234
      * Load a SIMD register with big-endian convention
235
      */
236
      void store_be(uint8_t out[]) const
237
0
         {
238
0
#if defined(BOTAN_SIMD_USE_SSE2)
239
240
0
         bswap().store_le(out);
241
242
#elif defined(BOTAN_SIMD_USE_ALTIVEC)
243
244
         union {
245
            __vector unsigned int V;
246
            uint32_t R[4];
247
            } vec;
248
         vec.V = m_simd;
249
         Botan::store_be(out, vec.R[0], vec.R[1], vec.R[2], vec.R[3]);
250
251
#elif defined(BOTAN_SIMD_USE_NEON)
252
         if(CPUID::is_little_endian())
253
            {
254
            vst1q_u8(out, vreinterpretq_u8_u32(bswap().m_simd));
255
            }
256
         else
257
            {
258
            vst1q_u8(out, vreinterpretq_u8_u32(m_simd));
259
            }
260
#endif
261
0
         }
262
263
      /*
264
      * This is used for SHA-2/SHACAL2
265
      */
266
      SIMD_4x32 sigma0() const
267
0
         {
268
#if defined(__GNUC__) && defined(_ARCH_PWR8)
269
         return SIMD_4x32(__builtin_crypto_vshasigmaw(raw(), 1, 0));
270
#else
271
0
         const SIMD_4x32 rot1 = this->rotr<2>();
272
0
         const SIMD_4x32 rot2 = this->rotr<13>();
273
0
         const SIMD_4x32 rot3 = this->rotr<22>();
274
0
         return (rot1 ^ rot2 ^ rot3);
275
0
#endif
276
0
         }
277
278
      /*
279
      * This is used for SHA-2/SHACAL2
280
      */
281
      SIMD_4x32 sigma1() const
282
0
         {
283
#if defined(__GNUC__) && defined(_ARCH_PWR8)
284
         return SIMD_4x32(__builtin_crypto_vshasigmaw(raw(), 1, 0xF));
285
#else
286
0
         const SIMD_4x32 rot1 = this->rotr<6>();
287
0
         const SIMD_4x32 rot2 = this->rotr<11>();
288
0
         const SIMD_4x32 rot3 = this->rotr<25>();
289
0
         return (rot1 ^ rot2 ^ rot3);
290
0
#endif
291
0
         }
292
293
      /**
294
      * Left rotation by a compile time constant
295
      */
296
      template<size_t ROT>
297
      SIMD_4x32 rotl() const
298
0
         {
299
0
         static_assert(ROT > 0 && ROT < 32, "Invalid rotation constant");
300
301
0
#if defined(BOTAN_SIMD_USE_SSE2)
302
303
0
         return SIMD_4x32(_mm_or_si128(_mm_slli_epi32(m_simd, static_cast<int>(ROT)),
304
0
                                       _mm_srli_epi32(m_simd, static_cast<int>(32-ROT))));
305
306
#elif defined(BOTAN_SIMD_USE_ALTIVEC)
307
308
         const unsigned int r = static_cast<unsigned int>(ROT);
309
         __vector unsigned int rot = {r, r, r, r};
310
         return SIMD_4x32(vec_rl(m_simd, rot));
311
312
#elif defined(BOTAN_SIMD_USE_NEON)
313
314
#if defined(BOTAN_TARGET_ARCH_IS_ARM64)
315
316
         if constexpr(ROT == 8)
317
            {
318
            const uint8_t maskb[16] = { 3,0,1,2, 7,4,5,6, 11,8,9,10, 15,12,13,14 };
319
            const uint8x16_t mask = vld1q_u8(maskb);
320
            return SIMD_4x32(vreinterpretq_u32_u8(vqtbl1q_u8(vreinterpretq_u8_u32(m_simd), mask)));
321
            }
322
         else if constexpr(ROT == 16)
323
            {
324
            return SIMD_4x32(vreinterpretq_u32_u16(vrev32q_u16(vreinterpretq_u16_u32(m_simd))));
325
            }
326
#endif
327
         return SIMD_4x32(vorrq_u32(vshlq_n_u32(m_simd, static_cast<int>(ROT)),
328
                                    vshrq_n_u32(m_simd, static_cast<int>(32-ROT))));
329
#endif
330
0
         }
Unexecuted instantiation: Botan::SIMD_4x32 Botan::SIMD_4x32::rotl<30ul>() const
Unexecuted instantiation: Botan::SIMD_4x32 Botan::SIMD_4x32::rotl<8ul>() const
Unexecuted instantiation: Botan::SIMD_4x32 Botan::SIMD_4x32::rotl<24ul>() const
Unexecuted instantiation: Botan::SIMD_4x32 Botan::SIMD_4x32::rotl<1ul>() const
Unexecuted instantiation: Botan::SIMD_4x32 Botan::SIMD_4x32::rotl<5ul>() const
Unexecuted instantiation: Botan::SIMD_4x32 Botan::SIMD_4x32::rotl<2ul>() const
Unexecuted instantiation: Botan::SIMD_4x32 Botan::SIMD_4x32::rotl<31ul>() const
Unexecuted instantiation: Botan::SIMD_4x32 Botan::SIMD_4x32::rotl<27ul>() const
Unexecuted instantiation: Botan::SIMD_4x32 Botan::SIMD_4x32::rotl<19ul>() const
Unexecuted instantiation: Botan::SIMD_4x32 Botan::SIMD_4x32::rotl<10ul>() const
Unexecuted instantiation: Botan::SIMD_4x32 Botan::SIMD_4x32::rotl<26ul>() const
Unexecuted instantiation: Botan::SIMD_4x32 Botan::SIMD_4x32::rotl<21ul>() const
Unexecuted instantiation: Botan::SIMD_4x32 Botan::SIMD_4x32::rotl<7ul>() const
Unexecuted instantiation: Botan::SIMD_4x32 Botan::SIMD_4x32::rotl<13ul>() const
Unexecuted instantiation: Botan::SIMD_4x32 Botan::SIMD_4x32::rotl<3ul>() const
Unexecuted instantiation: Botan::SIMD_4x32 Botan::SIMD_4x32::rotl<22ul>() const
Unexecuted instantiation: Botan::SIMD_4x32 Botan::SIMD_4x32::rotl<25ul>() const
Unexecuted instantiation: Botan::SIMD_4x32 Botan::SIMD_4x32::rotl<29ul>() const
Unexecuted instantiation: Botan::SIMD_4x32 Botan::SIMD_4x32::rotl<16ul>() const
Unexecuted instantiation: Botan::SIMD_4x32 Botan::SIMD_4x32::rotl<12ul>() const
331
332
      /**
333
      * Right rotation by a compile time constant
334
      */
335
      template<size_t ROT>
336
      SIMD_4x32 rotr() const
337
0
         {
338
0
         return this->rotl<32-ROT>();
339
0
         }
Unexecuted instantiation: Botan::SIMD_4x32 Botan::SIMD_4x32::rotr<2ul>() const
Unexecuted instantiation: Botan::SIMD_4x32 Botan::SIMD_4x32::rotr<8ul>() const
Unexecuted instantiation: Botan::SIMD_4x32 Botan::SIMD_4x32::rotr<1ul>() const
Unexecuted instantiation: Botan::SIMD_4x32 Botan::SIMD_4x32::rotr<5ul>() const
Unexecuted instantiation: Botan::SIMD_4x32 Botan::SIMD_4x32::rotr<13ul>() const
Unexecuted instantiation: Botan::SIMD_4x32 Botan::SIMD_4x32::rotr<22ul>() const
Unexecuted instantiation: Botan::SIMD_4x32 Botan::SIMD_4x32::rotr<6ul>() const
Unexecuted instantiation: Botan::SIMD_4x32 Botan::SIMD_4x32::rotr<11ul>() const
Unexecuted instantiation: Botan::SIMD_4x32 Botan::SIMD_4x32::rotr<25ul>() const
Unexecuted instantiation: Botan::SIMD_4x32 Botan::SIMD_4x32::rotr<7ul>() const
Unexecuted instantiation: Botan::SIMD_4x32 Botan::SIMD_4x32::rotr<3ul>() const
340
341
      /**
342
      * Add elements of a SIMD vector
343
      */
344
      SIMD_4x32 operator+(const SIMD_4x32& other) const
345
0
         {
346
0
         SIMD_4x32 retval(*this);
347
0
         retval += other;
348
0
         return retval;
349
0
         }
350
351
      /**
352
      * Subtract elements of a SIMD vector
353
      */
354
      SIMD_4x32 operator-(const SIMD_4x32& other) const
355
0
         {
356
0
         SIMD_4x32 retval(*this);
357
0
         retval -= other;
358
0
         return retval;
359
0
         }
360
361
      /**
362
      * XOR elements of a SIMD vector
363
      */
364
      SIMD_4x32 operator^(const SIMD_4x32& other) const
365
154k
         {
366
154k
         SIMD_4x32 retval(*this);
367
154k
         retval ^= other;
368
154k
         return retval;
369
154k
         }
370
371
      /**
372
      * Binary OR elements of a SIMD vector
373
      */
374
      SIMD_4x32 operator|(const SIMD_4x32& other) const
375
0
         {
376
0
         SIMD_4x32 retval(*this);
377
0
         retval |= other;
378
0
         return retval;
379
0
         }
380
381
      /**
382
      * Binary AND elements of a SIMD vector
383
      */
384
      SIMD_4x32 operator&(const SIMD_4x32& other) const
385
0
         {
386
0
         SIMD_4x32 retval(*this);
387
0
         retval &= other;
388
0
         return retval;
389
0
         }
390
391
      void operator+=(const SIMD_4x32& other)
392
0
         {
393
0
#if defined(BOTAN_SIMD_USE_SSE2)
394
0
         m_simd = _mm_add_epi32(m_simd, other.m_simd);
395
#elif defined(BOTAN_SIMD_USE_ALTIVEC)
396
         m_simd = vec_add(m_simd, other.m_simd);
397
#elif defined(BOTAN_SIMD_USE_NEON)
398
         m_simd = vaddq_u32(m_simd, other.m_simd);
399
#endif
400
0
         }
401
402
      void operator-=(const SIMD_4x32& other)
403
0
         {
404
0
#if defined(BOTAN_SIMD_USE_SSE2)
405
0
         m_simd = _mm_sub_epi32(m_simd, other.m_simd);
406
#elif defined(BOTAN_SIMD_USE_ALTIVEC)
407
         m_simd = vec_sub(m_simd, other.m_simd);
408
#elif defined(BOTAN_SIMD_USE_NEON)
409
         m_simd = vsubq_u32(m_simd, other.m_simd);
410
#endif
411
0
         }
412
413
      void operator^=(const SIMD_4x32& other)
414
232k
         {
415
232k
#if defined(BOTAN_SIMD_USE_SSE2)
416
232k
         m_simd = _mm_xor_si128(m_simd, other.m_simd);
417
#elif defined(BOTAN_SIMD_USE_ALTIVEC)
418
         m_simd = vec_xor(m_simd, other.m_simd);
419
#elif defined(BOTAN_SIMD_USE_NEON)
420
         m_simd = veorq_u32(m_simd, other.m_simd);
421
#endif
422
232k
         }
423
424
      void operator|=(const SIMD_4x32& other)
425
31.6k
         {
426
31.6k
#if defined(BOTAN_SIMD_USE_SSE2)
427
31.6k
         m_simd = _mm_or_si128(m_simd, other.m_simd);
428
#elif defined(BOTAN_SIMD_USE_ALTIVEC)
429
         m_simd = vec_or(m_simd, other.m_simd);
430
#elif defined(BOTAN_SIMD_USE_NEON)
431
         m_simd = vorrq_u32(m_simd, other.m_simd);
432
#endif
433
31.6k
         }
434
435
      void operator&=(const SIMD_4x32& other)
436
0
         {
437
0
#if defined(BOTAN_SIMD_USE_SSE2)
438
0
         m_simd = _mm_and_si128(m_simd, other.m_simd);
439
#elif defined(BOTAN_SIMD_USE_ALTIVEC)
440
         m_simd = vec_and(m_simd, other.m_simd);
441
#elif defined(BOTAN_SIMD_USE_NEON)
442
         m_simd = vandq_u32(m_simd, other.m_simd);
443
#endif
444
0
         }
445
446
447
      template<int SHIFT> SIMD_4x32 shl() const
448
52.6k
         {
449
52.6k
         static_assert(SHIFT > 0 && SHIFT <= 31, "Invalid shift count");
450
451
52.6k
#if defined(BOTAN_SIMD_USE_SSE2)
452
52.6k
         return SIMD_4x32(_mm_slli_epi32(m_simd, SHIFT));
453
454
#elif defined(BOTAN_SIMD_USE_ALTIVEC)
455
         const unsigned int s = static_cast<unsigned int>(SHIFT);
456
         const __vector unsigned int shifts = {s, s, s, s};
457
         return SIMD_4x32(vec_sl(m_simd, shifts));
458
#elif defined(BOTAN_SIMD_USE_NEON)
459
         return SIMD_4x32(vshlq_n_u32(m_simd, SHIFT));
460
#endif
461
52.6k
         }
Unexecuted instantiation: Botan::SIMD_4x32 Botan::SIMD_4x32::shl<3>() const
Unexecuted instantiation: Botan::SIMD_4x32 Botan::SIMD_4x32::shl<7>() const
Botan::SIMD_4x32 Botan::SIMD_4x32::shl<1>() const
Line
Count
Source
448
21.0k
         {
449
21.0k
         static_assert(SHIFT > 0 && SHIFT <= 31, "Invalid shift count");
450
451
21.0k
#if defined(BOTAN_SIMD_USE_SSE2)
452
21.0k
         return SIMD_4x32(_mm_slli_epi32(m_simd, SHIFT));
453
454
#elif defined(BOTAN_SIMD_USE_ALTIVEC)
455
         const unsigned int s = static_cast<unsigned int>(SHIFT);
456
         const __vector unsigned int shifts = {s, s, s, s};
457
         return SIMD_4x32(vec_sl(m_simd, shifts));
458
#elif defined(BOTAN_SIMD_USE_NEON)
459
         return SIMD_4x32(vshlq_n_u32(m_simd, SHIFT));
460
#endif
461
21.0k
         }
Botan::SIMD_4x32 Botan::SIMD_4x32::shl<31>() const
Line
Count
Source
448
10.5k
         {
449
10.5k
         static_assert(SHIFT > 0 && SHIFT <= 31, "Invalid shift count");
450
451
10.5k
#if defined(BOTAN_SIMD_USE_SSE2)
452
10.5k
         return SIMD_4x32(_mm_slli_epi32(m_simd, SHIFT));
453
454
#elif defined(BOTAN_SIMD_USE_ALTIVEC)
455
         const unsigned int s = static_cast<unsigned int>(SHIFT);
456
         const __vector unsigned int shifts = {s, s, s, s};
457
         return SIMD_4x32(vec_sl(m_simd, shifts));
458
#elif defined(BOTAN_SIMD_USE_NEON)
459
         return SIMD_4x32(vshlq_n_u32(m_simd, SHIFT));
460
#endif
461
10.5k
         }
Botan::SIMD_4x32 Botan::SIMD_4x32::shl<30>() const
Line
Count
Source
448
10.5k
         {
449
10.5k
         static_assert(SHIFT > 0 && SHIFT <= 31, "Invalid shift count");
450
451
10.5k
#if defined(BOTAN_SIMD_USE_SSE2)
452
10.5k
         return SIMD_4x32(_mm_slli_epi32(m_simd, SHIFT));
453
454
#elif defined(BOTAN_SIMD_USE_ALTIVEC)
455
         const unsigned int s = static_cast<unsigned int>(SHIFT);
456
         const __vector unsigned int shifts = {s, s, s, s};
457
         return SIMD_4x32(vec_sl(m_simd, shifts));
458
#elif defined(BOTAN_SIMD_USE_NEON)
459
         return SIMD_4x32(vshlq_n_u32(m_simd, SHIFT));
460
#endif
461
10.5k
         }
Botan::SIMD_4x32 Botan::SIMD_4x32::shl<25>() const
Line
Count
Source
448
10.5k
         {
449
10.5k
         static_assert(SHIFT > 0 && SHIFT <= 31, "Invalid shift count");
450
451
10.5k
#if defined(BOTAN_SIMD_USE_SSE2)
452
10.5k
         return SIMD_4x32(_mm_slli_epi32(m_simd, SHIFT));
453
454
#elif defined(BOTAN_SIMD_USE_ALTIVEC)
455
         const unsigned int s = static_cast<unsigned int>(SHIFT);
456
         const __vector unsigned int shifts = {s, s, s, s};
457
         return SIMD_4x32(vec_sl(m_simd, shifts));
458
#elif defined(BOTAN_SIMD_USE_NEON)
459
         return SIMD_4x32(vshlq_n_u32(m_simd, SHIFT));
460
#endif
461
10.5k
         }
462
463
      template<int SHIFT> SIMD_4x32 shr() const
464
52.6k
         {
465
52.6k
#if defined(BOTAN_SIMD_USE_SSE2)
466
52.6k
         return SIMD_4x32(_mm_srli_epi32(m_simd, SHIFT));
467
468
#elif defined(BOTAN_SIMD_USE_ALTIVEC)
469
         const unsigned int s = static_cast<unsigned int>(SHIFT);
470
         const __vector unsigned int shifts = {s, s, s, s};
471
         return SIMD_4x32(vec_sr(m_simd, shifts));
472
#elif defined(BOTAN_SIMD_USE_NEON)
473
         return SIMD_4x32(vshrq_n_u32(m_simd, SHIFT));
474
#endif
475
52.6k
         }
Unexecuted instantiation: Botan::SIMD_4x32 Botan::SIMD_4x32::shr<4>() const
Botan::SIMD_4x32 Botan::SIMD_4x32::shr<31>() const
Line
Count
Source
464
21.0k
         {
465
21.0k
#if defined(BOTAN_SIMD_USE_SSE2)
466
21.0k
         return SIMD_4x32(_mm_srli_epi32(m_simd, SHIFT));
467
468
#elif defined(BOTAN_SIMD_USE_ALTIVEC)
469
         const unsigned int s = static_cast<unsigned int>(SHIFT);
470
         const __vector unsigned int shifts = {s, s, s, s};
471
         return SIMD_4x32(vec_sr(m_simd, shifts));
472
#elif defined(BOTAN_SIMD_USE_NEON)
473
         return SIMD_4x32(vshrq_n_u32(m_simd, SHIFT));
474
#endif
475
21.0k
         }
Botan::SIMD_4x32 Botan::SIMD_4x32::shr<7>() const
Line
Count
Source
464
10.5k
         {
465
10.5k
#if defined(BOTAN_SIMD_USE_SSE2)
466
10.5k
         return SIMD_4x32(_mm_srli_epi32(m_simd, SHIFT));
467
468
#elif defined(BOTAN_SIMD_USE_ALTIVEC)
469
         const unsigned int s = static_cast<unsigned int>(SHIFT);
470
         const __vector unsigned int shifts = {s, s, s, s};
471
         return SIMD_4x32(vec_sr(m_simd, shifts));
472
#elif defined(BOTAN_SIMD_USE_NEON)
473
         return SIMD_4x32(vshrq_n_u32(m_simd, SHIFT));
474
#endif
475
10.5k
         }
Botan::SIMD_4x32 Botan::SIMD_4x32::shr<2>() const
Line
Count
Source
464
10.5k
         {
465
10.5k
#if defined(BOTAN_SIMD_USE_SSE2)
466
10.5k
         return SIMD_4x32(_mm_srli_epi32(m_simd, SHIFT));
467
468
#elif defined(BOTAN_SIMD_USE_ALTIVEC)
469
         const unsigned int s = static_cast<unsigned int>(SHIFT);
470
         const __vector unsigned int shifts = {s, s, s, s};
471
         return SIMD_4x32(vec_sr(m_simd, shifts));
472
#elif defined(BOTAN_SIMD_USE_NEON)
473
         return SIMD_4x32(vshrq_n_u32(m_simd, SHIFT));
474
#endif
475
10.5k
         }
Botan::SIMD_4x32 Botan::SIMD_4x32::shr<1>() const
Line
Count
Source
464
10.5k
         {
465
10.5k
#if defined(BOTAN_SIMD_USE_SSE2)
466
10.5k
         return SIMD_4x32(_mm_srli_epi32(m_simd, SHIFT));
467
468
#elif defined(BOTAN_SIMD_USE_ALTIVEC)
469
         const unsigned int s = static_cast<unsigned int>(SHIFT);
470
         const __vector unsigned int shifts = {s, s, s, s};
471
         return SIMD_4x32(vec_sr(m_simd, shifts));
472
#elif defined(BOTAN_SIMD_USE_NEON)
473
         return SIMD_4x32(vshrq_n_u32(m_simd, SHIFT));
474
#endif
475
10.5k
         }
476
477
      SIMD_4x32 operator~() const
478
0
         {
479
0
#if defined(BOTAN_SIMD_USE_SSE2)
480
0
         return SIMD_4x32(_mm_xor_si128(m_simd, _mm_set1_epi32(0xFFFFFFFF)));
481
#elif defined(BOTAN_SIMD_USE_ALTIVEC)
482
         return SIMD_4x32(vec_nor(m_simd, m_simd));
483
#elif defined(BOTAN_SIMD_USE_NEON)
484
         return SIMD_4x32(vmvnq_u32(m_simd));
485
#endif
486
0
         }
487
488
      // (~reg) & other
489
      SIMD_4x32 andc(const SIMD_4x32& other) const
490
0
         {
491
0
#if defined(BOTAN_SIMD_USE_SSE2)
492
0
         return SIMD_4x32(_mm_andnot_si128(m_simd, other.m_simd));
493
#elif defined(BOTAN_SIMD_USE_ALTIVEC)
494
         /*
495
         AltiVec does arg1 & ~arg2 rather than SSE's ~arg1 & arg2
496
         so swap the arguments
497
         */
498
         return SIMD_4x32(vec_andc(other.m_simd, m_simd));
499
#elif defined(BOTAN_SIMD_USE_NEON)
500
         // NEON is also a & ~b
501
         return SIMD_4x32(vbicq_u32(other.m_simd, m_simd));
502
#endif
503
0
         }
504
505
      /**
506
      * Return copy *this with each word byte swapped
507
      */
508
      SIMD_4x32 bswap() const
509
0
         {
510
0
#if defined(BOTAN_SIMD_USE_SSE2)
511
512
0
         __m128i T = m_simd;
513
0
         T = _mm_shufflehi_epi16(T, _MM_SHUFFLE(2, 3, 0, 1));
514
0
         T = _mm_shufflelo_epi16(T, _MM_SHUFFLE(2, 3, 0, 1));
515
0
         return SIMD_4x32(_mm_or_si128(_mm_srli_epi16(T, 8), _mm_slli_epi16(T, 8)));
516
517
#elif defined(BOTAN_SIMD_USE_ALTIVEC)
518
519
         union {
520
            __vector unsigned int V;
521
            uint32_t R[4];
522
            } vec;
523
524
         vec.V = m_simd;
525
         bswap_4(vec.R);
526
         return SIMD_4x32(vec.R[0], vec.R[1], vec.R[2], vec.R[3]);
527
528
#elif defined(BOTAN_SIMD_USE_NEON)
529
         return SIMD_4x32(vreinterpretq_u32_u8(vrev32q_u8(vreinterpretq_u8_u32(m_simd))));
530
#endif
531
0
         }
532
533
      template<size_t I>
534
      SIMD_4x32 shift_elems_left() const
535
42.1k
         {
536
42.1k
         static_assert(I <= 3, "Invalid shift count");
537
538
42.1k
#if defined(BOTAN_SIMD_USE_SSE2)
539
42.1k
         return SIMD_4x32(_mm_slli_si128(raw(), 4*I));
540
#elif defined(BOTAN_SIMD_USE_NEON)
541
         return SIMD_4x32(vextq_u32(vdupq_n_u32(0), raw(), 4-I));
542
#elif defined(BOTAN_SIMD_USE_ALTIVEC)
543
         const __vector unsigned int zero = vec_splat_u32(0);
544
545
         const __vector unsigned char shuf[3] = {
546
            { 16, 17, 18, 19, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11 },
547
            { 16, 17, 18, 19, 20, 21, 22, 23, 0, 1, 2, 3, 4, 5, 6, 7 },
548
            { 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 0, 1, 2, 3 },
549
         };
550
551
         return SIMD_4x32(vec_perm(raw(), zero, shuf[I-1]));
552
#endif
553
42.1k
         }
Botan::SIMD_4x32 Botan::SIMD_4x32::shift_elems_left<1ul>() const
Line
Count
Source
535
21.0k
         {
536
21.0k
         static_assert(I <= 3, "Invalid shift count");
537
538
21.0k
#if defined(BOTAN_SIMD_USE_SSE2)
539
21.0k
         return SIMD_4x32(_mm_slli_si128(raw(), 4*I));
540
#elif defined(BOTAN_SIMD_USE_NEON)
541
         return SIMD_4x32(vextq_u32(vdupq_n_u32(0), raw(), 4-I));
542
#elif defined(BOTAN_SIMD_USE_ALTIVEC)
543
         const __vector unsigned int zero = vec_splat_u32(0);
544
545
         const __vector unsigned char shuf[3] = {
546
            { 16, 17, 18, 19, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11 },
547
            { 16, 17, 18, 19, 20, 21, 22, 23, 0, 1, 2, 3, 4, 5, 6, 7 },
548
            { 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 0, 1, 2, 3 },
549
         };
550
551
         return SIMD_4x32(vec_perm(raw(), zero, shuf[I-1]));
552
#endif
553
21.0k
         }
Botan::SIMD_4x32 Botan::SIMD_4x32::shift_elems_left<2ul>() const
Line
Count
Source
535
10.5k
         {
536
10.5k
         static_assert(I <= 3, "Invalid shift count");
537
538
10.5k
#if defined(BOTAN_SIMD_USE_SSE2)
539
10.5k
         return SIMD_4x32(_mm_slli_si128(raw(), 4*I));
540
#elif defined(BOTAN_SIMD_USE_NEON)
541
         return SIMD_4x32(vextq_u32(vdupq_n_u32(0), raw(), 4-I));
542
#elif defined(BOTAN_SIMD_USE_ALTIVEC)
543
         const __vector unsigned int zero = vec_splat_u32(0);
544
545
         const __vector unsigned char shuf[3] = {
546
            { 16, 17, 18, 19, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11 },
547
            { 16, 17, 18, 19, 20, 21, 22, 23, 0, 1, 2, 3, 4, 5, 6, 7 },
548
            { 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 0, 1, 2, 3 },
549
         };
550
551
         return SIMD_4x32(vec_perm(raw(), zero, shuf[I-1]));
552
#endif
553
10.5k
         }
Botan::SIMD_4x32 Botan::SIMD_4x32::shift_elems_left<3ul>() const
Line
Count
Source
535
10.5k
         {
536
10.5k
         static_assert(I <= 3, "Invalid shift count");
537
538
10.5k
#if defined(BOTAN_SIMD_USE_SSE2)
539
10.5k
         return SIMD_4x32(_mm_slli_si128(raw(), 4*I));
540
#elif defined(BOTAN_SIMD_USE_NEON)
541
         return SIMD_4x32(vextq_u32(vdupq_n_u32(0), raw(), 4-I));
542
#elif defined(BOTAN_SIMD_USE_ALTIVEC)
543
         const __vector unsigned int zero = vec_splat_u32(0);
544
545
         const __vector unsigned char shuf[3] = {
546
            { 16, 17, 18, 19, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11 },
547
            { 16, 17, 18, 19, 20, 21, 22, 23, 0, 1, 2, 3, 4, 5, 6, 7 },
548
            { 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 0, 1, 2, 3 },
549
         };
550
551
         return SIMD_4x32(vec_perm(raw(), zero, shuf[I-1]));
552
#endif
553
10.5k
         }
554
555
      template<size_t I>
556
      SIMD_4x32 shift_elems_right() const
557
77.2k
         {
558
77.2k
         static_assert(I <= 3, "Invalid shift count");
559
560
77.2k
#if defined(BOTAN_SIMD_USE_SSE2)
561
77.2k
         return SIMD_4x32(_mm_srli_si128(raw(), 4*I));
562
#elif defined(BOTAN_SIMD_USE_NEON)
563
         return SIMD_4x32(vextq_u32(raw(), vdupq_n_u32(0), I));
564
#elif defined(BOTAN_SIMD_USE_ALTIVEC)
565
         const __vector unsigned int zero = vec_splat_u32(0);
566
567
         const __vector unsigned char shuf[3] = {
568
            { 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19 },
569
            { 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23 },
570
            { 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27 },
571
         };
572
573
         return SIMD_4x32(vec_perm(raw(), zero, shuf[I-1]));
574
#endif
575
77.2k
         }
Botan::SIMD_4x32 Botan::SIMD_4x32::shift_elems_right<2ul>() const
Line
Count
Source
557
56.1k
         {
558
56.1k
         static_assert(I <= 3, "Invalid shift count");
559
560
56.1k
#if defined(BOTAN_SIMD_USE_SSE2)
561
56.1k
         return SIMD_4x32(_mm_srli_si128(raw(), 4*I));
562
#elif defined(BOTAN_SIMD_USE_NEON)
563
         return SIMD_4x32(vextq_u32(raw(), vdupq_n_u32(0), I));
564
#elif defined(BOTAN_SIMD_USE_ALTIVEC)
565
         const __vector unsigned int zero = vec_splat_u32(0);
566
567
         const __vector unsigned char shuf[3] = {
568
            { 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19 },
569
            { 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23 },
570
            { 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27 },
571
         };
572
573
         return SIMD_4x32(vec_perm(raw(), zero, shuf[I-1]));
574
#endif
575
56.1k
         }
Botan::SIMD_4x32 Botan::SIMD_4x32::shift_elems_right<3ul>() const
Line
Count
Source
557
10.5k
         {
558
10.5k
         static_assert(I <= 3, "Invalid shift count");
559
560
10.5k
#if defined(BOTAN_SIMD_USE_SSE2)
561
10.5k
         return SIMD_4x32(_mm_srli_si128(raw(), 4*I));
562
#elif defined(BOTAN_SIMD_USE_NEON)
563
         return SIMD_4x32(vextq_u32(raw(), vdupq_n_u32(0), I));
564
#elif defined(BOTAN_SIMD_USE_ALTIVEC)
565
         const __vector unsigned int zero = vec_splat_u32(0);
566
567
         const __vector unsigned char shuf[3] = {
568
            { 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19 },
569
            { 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23 },
570
            { 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27 },
571
         };
572
573
         return SIMD_4x32(vec_perm(raw(), zero, shuf[I-1]));
574
#endif
575
10.5k
         }
Botan::SIMD_4x32 Botan::SIMD_4x32::shift_elems_right<1ul>() const
Line
Count
Source
557
10.5k
         {
558
10.5k
         static_assert(I <= 3, "Invalid shift count");
559
560
10.5k
#if defined(BOTAN_SIMD_USE_SSE2)
561
10.5k
         return SIMD_4x32(_mm_srli_si128(raw(), 4*I));
562
#elif defined(BOTAN_SIMD_USE_NEON)
563
         return SIMD_4x32(vextq_u32(raw(), vdupq_n_u32(0), I));
564
#elif defined(BOTAN_SIMD_USE_ALTIVEC)
565
         const __vector unsigned int zero = vec_splat_u32(0);
566
567
         const __vector unsigned char shuf[3] = {
568
            { 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19 },
569
            { 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23 },
570
            { 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27 },
571
         };
572
573
         return SIMD_4x32(vec_perm(raw(), zero, shuf[I-1]));
574
#endif
575
10.5k
         }
576
577
      /**
578
      * 4x4 Transposition on SIMD registers
579
      */
580
      static void transpose(SIMD_4x32& B0, SIMD_4x32& B1,
581
                            SIMD_4x32& B2, SIMD_4x32& B3)
582
0
         {
583
0
#if defined(BOTAN_SIMD_USE_SSE2)
584
0
         const __m128i T0 = _mm_unpacklo_epi32(B0.m_simd, B1.m_simd);
585
0
         const __m128i T1 = _mm_unpacklo_epi32(B2.m_simd, B3.m_simd);
586
0
         const __m128i T2 = _mm_unpackhi_epi32(B0.m_simd, B1.m_simd);
587
0
         const __m128i T3 = _mm_unpackhi_epi32(B2.m_simd, B3.m_simd);
588
589
0
         B0.m_simd = _mm_unpacklo_epi64(T0, T1);
590
0
         B1.m_simd = _mm_unpackhi_epi64(T0, T1);
591
0
         B2.m_simd = _mm_unpacklo_epi64(T2, T3);
592
0
         B3.m_simd = _mm_unpackhi_epi64(T2, T3);
593
#elif defined(BOTAN_SIMD_USE_ALTIVEC)
594
         const __vector unsigned int T0 = vec_mergeh(B0.m_simd, B2.m_simd);
595
         const __vector unsigned int T1 = vec_mergeh(B1.m_simd, B3.m_simd);
596
         const __vector unsigned int T2 = vec_mergel(B0.m_simd, B2.m_simd);
597
         const __vector unsigned int T3 = vec_mergel(B1.m_simd, B3.m_simd);
598
599
         B0.m_simd = vec_mergeh(T0, T1);
600
         B1.m_simd = vec_mergel(T0, T1);
601
         B2.m_simd = vec_mergeh(T2, T3);
602
         B3.m_simd = vec_mergel(T2, T3);
603
604
#elif defined(BOTAN_SIMD_USE_NEON) && defined(BOTAN_TARGET_ARCH_IS_ARM32)
605
         const uint32x4x2_t T0 = vzipq_u32(B0.m_simd, B2.m_simd);
606
         const uint32x4x2_t T1 = vzipq_u32(B1.m_simd, B3.m_simd);
607
         const uint32x4x2_t O0 = vzipq_u32(T0.val[0], T1.val[0]);
608
         const uint32x4x2_t O1 = vzipq_u32(T0.val[1], T1.val[1]);
609
610
         B0.m_simd = O0.val[0];
611
         B1.m_simd = O0.val[1];
612
         B2.m_simd = O1.val[0];
613
         B3.m_simd = O1.val[1];
614
615
#elif defined(BOTAN_SIMD_USE_NEON) && defined(BOTAN_TARGET_ARCH_IS_ARM64)
616
         const uint32x4_t T0 = vzip1q_u32(B0.m_simd, B2.m_simd);
617
         const uint32x4_t T2 = vzip2q_u32(B0.m_simd, B2.m_simd);
618
         const uint32x4_t T1 = vzip1q_u32(B1.m_simd, B3.m_simd);
619
         const uint32x4_t T3 = vzip2q_u32(B1.m_simd, B3.m_simd);
620
621
         B0.m_simd = vzip1q_u32(T0, T1);
622
         B1.m_simd = vzip2q_u32(T0, T1);
623
         B2.m_simd = vzip1q_u32(T2, T3);
624
         B3.m_simd = vzip2q_u32(T2, T3);
625
#endif
626
0
         }
627
628
      static inline SIMD_4x32 choose(const SIMD_4x32& mask, const SIMD_4x32& a, const SIMD_4x32& b)
629
0
         {
630
#if defined(BOTAN_SIMD_USE_ALTIVEC)
631
         return SIMD_4x32(vec_sel(b.raw(), a.raw(), mask.raw()));
632
#elif defined(BOTAN_SIMD_USE_NEON)
633
         return SIMD_4x32(vbslq_u32(mask.raw(), a.raw(), b.raw()));
634
#else
635
0
         return (mask & a) ^ mask.andc(b);
636
0
#endif
637
0
         }
638
639
      static inline SIMD_4x32 majority(const SIMD_4x32& x, const SIMD_4x32& y, const SIMD_4x32& z)
640
0
         {
641
0
         return SIMD_4x32::choose(x ^ y, z, y);
642
0
         }
643
644
331k
      native_simd_type raw() const { return m_simd; }
645
646
376k
      explicit SIMD_4x32(native_simd_type x) : m_simd(x) {}
647
   private:
648
      native_simd_type m_simd;
649
   };
650
651
}
652
653
#endif