Coverage Report

Created: 2020-02-14 15:38

/src/botan/build/include/botan/internal/simd_32.h
Line
Count
Source (jump to first uncovered line)
1
/*
2
* Lightweight wrappers for SIMD operations
3
* (C) 2009,2011,2016,2017,2019 Jack Lloyd
4
*
5
* Botan is released under the Simplified BSD License (see license.txt)
6
*/
7
8
#ifndef BOTAN_SIMD_32_H_
9
#define BOTAN_SIMD_32_H_
10
11
#include <botan/types.h>
12
13
#if defined(BOTAN_TARGET_SUPPORTS_SSE2)
14
  #include <emmintrin.h>
15
  #define BOTAN_SIMD_USE_SSE2
16
17
#elif defined(BOTAN_TARGET_SUPPORTS_ALTIVEC)
18
  #include <botan/bswap.h>
19
  #include <botan/loadstor.h>
20
  #include <altivec.h>
21
  #undef vector
22
  #undef bool
23
  #define BOTAN_SIMD_USE_ALTIVEC
24
25
#elif defined(BOTAN_TARGET_SUPPORTS_NEON)
26
  #include <botan/cpuid.h>
27
  #include <arm_neon.h>
28
  #define BOTAN_SIMD_USE_NEON
29
30
#else
31
  #error "No SIMD instruction set enabled"
32
#endif
33
34
#if defined(BOTAN_SIMD_USE_SSE2)
35
  #define BOTAN_SIMD_ISA "sse2"
36
  #define BOTAN_VPERM_ISA "ssse3"
37
  #define BOTAN_CLMUL_ISA "pclmul"
38
#elif defined(BOTAN_SIMD_USE_NEON)
39
  #if defined(BOTAN_TARGET_ARCH_IS_ARM64)
40
    #define BOTAN_SIMD_ISA "+simd"
41
    #define BOTAN_CLMUL_ISA "+crypto"
42
  #else
43
    #define BOTAN_SIMD_ISA "fpu=neon"
44
  #endif
45
  #define BOTAN_VPERM_ISA BOTAN_SIMD_ISA
46
#elif defined(BOTAN_SIMD_USE_ALTIVEC)
47
  #define BOTAN_SIMD_ISA "altivec"
48
  #define BOTAN_VPERM_ISA "altivec"
49
  #define BOTAN_CLMUL_ISA "crypto"
50
#endif
51
52
namespace Botan {
53
54
#if defined(BOTAN_SIMD_USE_SSE2)
55
   typedef __m128i native_simd_type;
56
#elif defined(BOTAN_SIMD_USE_ALTIVEC)
57
   typedef __vector unsigned int native_simd_type;
58
#elif defined(BOTAN_SIMD_USE_NEON)
59
   typedef uint32x4_t native_simd_type;
60
#endif
61
62
/**
63
* 4x32 bit SIMD register
64
*
65
* This class is not a general purpose SIMD type, and only offers
66
* instructions needed for evaluation of specific crypto primitives.
67
* For example it does not currently have equality operators of any
68
* kind.
69
*
70
* Implemented for SSE2, VMX (Altivec), and NEON.
71
*/
72
class SIMD_4x32 final
73
   {
74
   public:
75
76
      SIMD_4x32& operator=(const SIMD_4x32& other) = default;
77
      SIMD_4x32(const SIMD_4x32& other) = default;
78
79
      SIMD_4x32& operator=(SIMD_4x32&& other) = default;
80
      SIMD_4x32(SIMD_4x32&& other) = default;
81
82
      /**
83
      * Zero initialize SIMD register with 4 32-bit elements
84
      */
85
      SIMD_4x32() // zero initialized
86
6.77k
         {
87
6.77k
#if defined(BOTAN_SIMD_USE_SSE2)
88
6.77k
         m_simd = _mm_setzero_si128();
89
#elif defined(BOTAN_SIMD_USE_ALTIVEC)
90
         m_simd = vec_splat_u32(0);
91
#elif defined(BOTAN_SIMD_USE_NEON)
92
         m_simd = vdupq_n_u32(0);
93
#endif
94
         }
95
96
      /**
97
      * Load SIMD register with 4 32-bit elements
98
      */
99
      explicit SIMD_4x32(const uint32_t B[4])
100
0
         {
101
0
#if defined(BOTAN_SIMD_USE_SSE2)
102
0
         m_simd = _mm_loadu_si128(reinterpret_cast<const __m128i*>(B));
103
#elif defined(BOTAN_SIMD_USE_ALTIVEC)
104
         __vector unsigned int val = { B[0], B[1], B[2], B[3]};
105
         m_simd = val;
106
#elif defined(BOTAN_SIMD_USE_NEON)
107
         m_simd = vld1q_u32(B);
108
#endif
109
         }
110
111
      /**
112
      * Load SIMD register with 4 32-bit elements
113
      */
114
      SIMD_4x32(uint32_t B0, uint32_t B1, uint32_t B2, uint32_t B3)
115
1.15k
         {
116
1.15k
#if defined(BOTAN_SIMD_USE_SSE2)
117
1.15k
         m_simd = _mm_set_epi32(B3, B2, B1, B0);
118
#elif defined(BOTAN_SIMD_USE_ALTIVEC)
119
         __vector unsigned int val = {B0, B1, B2, B3};
120
         m_simd = val;
121
#elif defined(BOTAN_SIMD_USE_NEON)
122
         // Better way to do this?
123
         const uint32_t B[4] = { B0, B1, B2, B3 };
124
         m_simd = vld1q_u32(B);
125
#endif
126
         }
127
128
      /**
129
      * Load SIMD register with one 32-bit element repeated
130
      */
131
      static SIMD_4x32 splat(uint32_t B)
132
0
         {
133
0
#if defined(BOTAN_SIMD_USE_SSE2)
134
0
         return SIMD_4x32(_mm_set1_epi32(B));
135
#elif defined(BOTAN_SIMD_USE_NEON)
136
         return SIMD_4x32(vdupq_n_u32(B));
137
#else
138
         return SIMD_4x32(B, B, B, B);
139
#endif
140
         }
141
142
      /**
143
      * Load SIMD register with one 8-bit element repeated
144
      */
145
      static SIMD_4x32 splat_u8(uint8_t B)
146
24
         {
147
24
#if defined(BOTAN_SIMD_USE_SSE2)
148
24
         return SIMD_4x32(_mm_set1_epi8(B));
149
#elif defined(BOTAN_SIMD_USE_NEON)
150
         return SIMD_4x32(vreinterpretq_u32_u8(vdupq_n_u8(B)));
151
#else
152
         const uint32_t B4 = make_uint32(B, B, B, B);
153
         return SIMD_4x32(B4, B4, B4, B4);
154
#endif
155
         }
156
157
      /**
158
      * Load a SIMD register with little-endian convention
159
      */
160
      static SIMD_4x32 load_le(const void* in)
161
42.1k
         {
162
42.1k
#if defined(BOTAN_SIMD_USE_SSE2)
163
42.1k
         return SIMD_4x32(_mm_loadu_si128(reinterpret_cast<const __m128i*>(in)));
164
#elif defined(BOTAN_SIMD_USE_ALTIVEC)
165
         uint32_t R[4];
166
         Botan::load_le(R, static_cast<const uint8_t*>(in), 4);
167
         return SIMD_4x32(R);
168
#elif defined(BOTAN_SIMD_USE_NEON)
169
         SIMD_4x32 l(vld1q_u32(static_cast<const uint32_t*>(in)));
170
         return CPUID::is_big_endian() ? l.bswap() : l;
171
#endif
172
         }
173
174
      /**
175
      * Load a SIMD register with big-endian convention
176
      */
177
      static SIMD_4x32 load_be(const void* in)
178
0
         {
179
0
#if defined(BOTAN_SIMD_USE_SSE2)
180
0
         return load_le(in).bswap();
181
0
182
#elif defined(BOTAN_SIMD_USE_ALTIVEC)
183
         uint32_t R[4];
184
         Botan::load_be(R, static_cast<const uint8_t*>(in), 4);
185
         return SIMD_4x32(R);
186
187
#elif defined(BOTAN_SIMD_USE_NEON)
188
         SIMD_4x32 l(vld1q_u32(static_cast<const uint32_t*>(in)));
189
         return CPUID::is_little_endian() ? l.bswap() : l;
190
#endif
191
         }
192
193
      void store_le(uint32_t out[4]) const
194
0
         {
195
0
         this->store_le(reinterpret_cast<uint8_t*>(out));
196
0
         }
197
198
      void store_le(uint64_t out[2]) const
199
3.80k
         {
200
3.80k
         this->store_le(reinterpret_cast<uint8_t*>(out));
201
3.80k
         }
202
203
      /**
204
      * Load a SIMD register with little-endian convention
205
      */
206
      void store_le(uint8_t out[]) const
207
7.94k
         {
208
7.94k
#if defined(BOTAN_SIMD_USE_SSE2)
209
7.94k
210
7.94k
         _mm_storeu_si128(reinterpret_cast<__m128i*>(out), raw());
211
7.94k
212
#elif defined(BOTAN_SIMD_USE_ALTIVEC)
213
214
         union {
215
            __vector unsigned int V;
216
            uint32_t R[4];
217
            } vec;
218
         vec.V = raw();
219
         Botan::store_le(out, vec.R[0], vec.R[1], vec.R[2], vec.R[3]);
220
221
#elif defined(BOTAN_SIMD_USE_NEON)
222
         if(CPUID::is_little_endian())
223
            {
224
            vst1q_u8(out, vreinterpretq_u8_u32(m_simd));
225
            }
226
         else
227
            {
228
            vst1q_u8(out, vreinterpretq_u8_u32(bswap().m_simd));
229
            }
230
#endif
231
         }
232
233
      /**
234
      * Load a SIMD register with big-endian convention
235
      */
236
      void store_be(uint8_t out[]) const
237
0
         {
238
0
#if defined(BOTAN_SIMD_USE_SSE2)
239
0
240
0
         bswap().store_le(out);
241
0
242
#elif defined(BOTAN_SIMD_USE_ALTIVEC)
243
244
         union {
245
            __vector unsigned int V;
246
            uint32_t R[4];
247
            } vec;
248
         vec.V = m_simd;
249
         Botan::store_be(out, vec.R[0], vec.R[1], vec.R[2], vec.R[3]);
250
251
#elif defined(BOTAN_SIMD_USE_NEON)
252
         if(CPUID::is_little_endian())
253
            {
254
            vst1q_u8(out, vreinterpretq_u8_u32(bswap().m_simd));
255
            }
256
         else
257
            {
258
            vst1q_u8(out, vreinterpretq_u8_u32(m_simd));
259
            }
260
#endif
261
         }
262
263
      /*
264
      * This is used for SHA-2/SHACAL2
265
      * Return rotr(ROT1) ^ rotr(ROT2) ^ rotr(ROT3)
266
      */
267
      template<size_t ROT1, size_t ROT2, size_t ROT3>
268
      SIMD_4x32 rho() const
269
0
         {
270
0
         const SIMD_4x32 rot1 = this->rotr<ROT1>();
271
0
         const SIMD_4x32 rot2 = this->rotr<ROT2>();
272
0
         const SIMD_4x32 rot3 = this->rotr<ROT3>();
273
0
         return (rot1 ^ rot2 ^ rot3);
274
0
         }
Unexecuted instantiation: Botan::SIMD_4x32 Botan::SIMD_4x32::rho<6ul, 11ul, 25ul>() const
Unexecuted instantiation: Botan::SIMD_4x32 Botan::SIMD_4x32::rho<2ul, 13ul, 22ul>() const
275
276
      /**
277
      * Left rotation by a compile time constant
278
      */
279
      template<size_t ROT>
280
      SIMD_4x32 rotl() const
281
0
         {
282
0
         static_assert(ROT > 0 && ROT < 32, "Invalid rotation constant");
283
0
284
0
#if defined(BOTAN_SIMD_USE_SSE2)
285
0
286
0
         return SIMD_4x32(_mm_or_si128(_mm_slli_epi32(m_simd, static_cast<int>(ROT)),
287
0
                                       _mm_srli_epi32(m_simd, static_cast<int>(32-ROT))));
288
0
289
#elif defined(BOTAN_SIMD_USE_ALTIVEC)
290
291
         const unsigned int r = static_cast<unsigned int>(ROT);
292
         __vector unsigned int rot = {r, r, r, r};
293
         return SIMD_4x32(vec_rl(m_simd, rot));
294
295
#elif defined(BOTAN_SIMD_USE_NEON)
296
297
#if defined(BOTAN_TARGET_ARCH_IS_ARM64)
298
299
         BOTAN_IF_CONSTEXPR(ROT == 8)
300
            {
301
            const uint8_t maskb[16] = { 3,0,1,2, 7,4,5,6, 11,8,9,10, 15,12,13,14 };
302
            const uint8x16_t mask = vld1q_u8(maskb);
303
            return SIMD_4x32(vreinterpretq_u32_u8(vqtbl1q_u8(vreinterpretq_u8_u32(m_simd), mask)));
304
            }
305
         else BOTAN_IF_CONSTEXPR(ROT == 16)
306
            {
307
            return SIMD_4x32(vreinterpretq_u32_u16(vrev32q_u16(vreinterpretq_u16_u32(m_simd))));
308
            }
309
#endif
310
         return SIMD_4x32(vorrq_u32(vshlq_n_u32(m_simd, static_cast<int>(ROT)),
311
                                    vshrq_n_u32(m_simd, static_cast<int>(32-ROT))));
312
#endif
313
         }
Unexecuted instantiation: Botan::SIMD_4x32 Botan::SIMD_4x32::rotl<8ul>() const
Unexecuted instantiation: Botan::SIMD_4x32 Botan::SIMD_4x32::rotl<24ul>() const
Unexecuted instantiation: Botan::SIMD_4x32 Botan::SIMD_4x32::rotl<1ul>() const
Unexecuted instantiation: Botan::SIMD_4x32 Botan::SIMD_4x32::rotl<5ul>() const
Unexecuted instantiation: Botan::SIMD_4x32 Botan::SIMD_4x32::rotl<2ul>() const
Unexecuted instantiation: Botan::SIMD_4x32 Botan::SIMD_4x32::rotl<31ul>() const
Unexecuted instantiation: Botan::SIMD_4x32 Botan::SIMD_4x32::rotl<27ul>() const
Unexecuted instantiation: Botan::SIMD_4x32 Botan::SIMD_4x32::rotl<30ul>() const
Unexecuted instantiation: Botan::SIMD_4x32 Botan::SIMD_4x32::rotl<13ul>() const
Unexecuted instantiation: Botan::SIMD_4x32 Botan::SIMD_4x32::rotl<3ul>() const
Unexecuted instantiation: Botan::SIMD_4x32 Botan::SIMD_4x32::rotl<7ul>() const
Unexecuted instantiation: Botan::SIMD_4x32 Botan::SIMD_4x32::rotl<22ul>() const
Unexecuted instantiation: Botan::SIMD_4x32 Botan::SIMD_4x32::rotl<10ul>() const
Unexecuted instantiation: Botan::SIMD_4x32 Botan::SIMD_4x32::rotl<25ul>() const
Unexecuted instantiation: Botan::SIMD_4x32 Botan::SIMD_4x32::rotl<29ul>() const
Unexecuted instantiation: Botan::SIMD_4x32 Botan::SIMD_4x32::rotl<19ul>() const
Unexecuted instantiation: Botan::SIMD_4x32 Botan::SIMD_4x32::rotl<26ul>() const
Unexecuted instantiation: Botan::SIMD_4x32 Botan::SIMD_4x32::rotl<21ul>() const
Unexecuted instantiation: Botan::SIMD_4x32 Botan::SIMD_4x32::rotl<16ul>() const
Unexecuted instantiation: Botan::SIMD_4x32 Botan::SIMD_4x32::rotl<12ul>() const
314
315
      /**
316
      * Right rotation by a compile time constant
317
      */
318
      template<size_t ROT>
319
      SIMD_4x32 rotr() const
320
0
         {
321
0
         return this->rotl<32-ROT>();
322
0
         }
Unexecuted instantiation: Botan::SIMD_4x32 Botan::SIMD_4x32::rotr<8ul>() const
Unexecuted instantiation: Botan::SIMD_4x32 Botan::SIMD_4x32::rotr<1ul>() const
Unexecuted instantiation: Botan::SIMD_4x32 Botan::SIMD_4x32::rotr<5ul>() const
Unexecuted instantiation: Botan::SIMD_4x32 Botan::SIMD_4x32::rotr<2ul>() const
Unexecuted instantiation: Botan::SIMD_4x32 Botan::SIMD_4x32::rotr<22ul>() const
Unexecuted instantiation: Botan::SIMD_4x32 Botan::SIMD_4x32::rotr<7ul>() const
Unexecuted instantiation: Botan::SIMD_4x32 Botan::SIMD_4x32::rotr<3ul>() const
Unexecuted instantiation: Botan::SIMD_4x32 Botan::SIMD_4x32::rotr<13ul>() const
Unexecuted instantiation: Botan::SIMD_4x32 Botan::SIMD_4x32::rotr<6ul>() const
Unexecuted instantiation: Botan::SIMD_4x32 Botan::SIMD_4x32::rotr<11ul>() const
Unexecuted instantiation: Botan::SIMD_4x32 Botan::SIMD_4x32::rotr<25ul>() const
323
324
      /**
325
      * Add elements of a SIMD vector
326
      */
327
      SIMD_4x32 operator+(const SIMD_4x32& other) const
328
0
         {
329
0
         SIMD_4x32 retval(*this);
330
0
         retval += other;
331
0
         return retval;
332
0
         }
333
334
      /**
335
      * Subtract elements of a SIMD vector
336
      */
337
      SIMD_4x32 operator-(const SIMD_4x32& other) const
338
0
         {
339
0
         SIMD_4x32 retval(*this);
340
0
         retval -= other;
341
0
         return retval;
342
0
         }
343
344
      /**
345
      * XOR elements of a SIMD vector
346
      */
347
      SIMD_4x32 operator^(const SIMD_4x32& other) const
348
192k
         {
349
192k
         SIMD_4x32 retval(*this);
350
192k
         retval ^= other;
351
192k
         return retval;
352
192k
         }
353
354
      /**
355
      * Binary OR elements of a SIMD vector
356
      */
357
      SIMD_4x32 operator|(const SIMD_4x32& other) const
358
0
         {
359
0
         SIMD_4x32 retval(*this);
360
0
         retval |= other;
361
0
         return retval;
362
0
         }
363
364
      /**
365
      * Binary AND elements of a SIMD vector
366
      */
367
      SIMD_4x32 operator&(const SIMD_4x32& other) const
368
0
         {
369
0
         SIMD_4x32 retval(*this);
370
0
         retval &= other;
371
0
         return retval;
372
0
         }
373
374
      void operator+=(const SIMD_4x32& other)
375
0
         {
376
0
#if defined(BOTAN_SIMD_USE_SSE2)
377
0
         m_simd = _mm_add_epi32(m_simd, other.m_simd);
378
#elif defined(BOTAN_SIMD_USE_ALTIVEC)
379
         m_simd = vec_add(m_simd, other.m_simd);
380
#elif defined(BOTAN_SIMD_USE_NEON)
381
         m_simd = vaddq_u32(m_simd, other.m_simd);
382
#endif
383
         }
384
385
      void operator-=(const SIMD_4x32& other)
386
0
         {
387
0
#if defined(BOTAN_SIMD_USE_SSE2)
388
0
         m_simd = _mm_sub_epi32(m_simd, other.m_simd);
389
#elif defined(BOTAN_SIMD_USE_ALTIVEC)
390
         m_simd = vec_sub(m_simd, other.m_simd);
391
#elif defined(BOTAN_SIMD_USE_NEON)
392
         m_simd = vsubq_u32(m_simd, other.m_simd);
393
#endif
394
         }
395
396
      void operator^=(const SIMD_4x32& other)
397
293k
         {
398
293k
#if defined(BOTAN_SIMD_USE_SSE2)
399
293k
         m_simd = _mm_xor_si128(m_simd, other.m_simd);
400
293k
401
#elif defined(BOTAN_SIMD_USE_ALTIVEC)
402
         m_simd = vec_xor(m_simd, other.m_simd);
403
#elif defined(BOTAN_SIMD_USE_NEON)
404
         m_simd = veorq_u32(m_simd, other.m_simd);
405
#endif
406
         }
407
408
      void operator|=(const SIMD_4x32& other)
409
41.9k
         {
410
41.9k
#if defined(BOTAN_SIMD_USE_SSE2)
411
41.9k
         m_simd = _mm_or_si128(m_simd, other.m_simd);
412
#elif defined(BOTAN_SIMD_USE_ALTIVEC)
413
         m_simd = vec_or(m_simd, other.m_simd);
414
#elif defined(BOTAN_SIMD_USE_NEON)
415
         m_simd = vorrq_u32(m_simd, other.m_simd);
416
#endif
417
         }
418
419
      void operator&=(const SIMD_4x32& other)
420
0
         {
421
0
#if defined(BOTAN_SIMD_USE_SSE2)
422
0
         m_simd = _mm_and_si128(m_simd, other.m_simd);
423
#elif defined(BOTAN_SIMD_USE_ALTIVEC)
424
         m_simd = vec_and(m_simd, other.m_simd);
425
#elif defined(BOTAN_SIMD_USE_NEON)
426
         m_simd = vandq_u32(m_simd, other.m_simd);
427
#endif
428
         }
429
430
431
      template<int SHIFT> SIMD_4x32 shl() const
432
69.8k
         {
433
69.8k
         static_assert(SHIFT > 0 && SHIFT <= 31, "Invalid shift count");
434
69.8k
435
69.8k
#if defined(BOTAN_SIMD_USE_SSE2)
436
69.8k
         return SIMD_4x32(_mm_slli_epi32(m_simd, SHIFT));
437
69.8k
438
#elif defined(BOTAN_SIMD_USE_ALTIVEC)
439
         const unsigned int s = static_cast<unsigned int>(SHIFT);
440
         const __vector unsigned int shifts = {s, s, s, s};
441
         return SIMD_4x32(vec_sl(m_simd, shifts));
442
#elif defined(BOTAN_SIMD_USE_NEON)
443
         return SIMD_4x32(vshlq_n_u32(m_simd, SHIFT));
444
#endif
445
         }
Unexecuted instantiation: Botan::SIMD_4x32 Botan::SIMD_4x32::shl<3>() const
Unexecuted instantiation: Botan::SIMD_4x32 Botan::SIMD_4x32::shl<7>() const
Botan::SIMD_4x32 Botan::SIMD_4x32::shl<1>() const
Line
Count
Source
432
27.9k
         {
433
27.9k
         static_assert(SHIFT > 0 && SHIFT <= 31, "Invalid shift count");
434
27.9k
435
27.9k
#if defined(BOTAN_SIMD_USE_SSE2)
436
27.9k
         return SIMD_4x32(_mm_slli_epi32(m_simd, SHIFT));
437
27.9k
438
#elif defined(BOTAN_SIMD_USE_ALTIVEC)
439
         const unsigned int s = static_cast<unsigned int>(SHIFT);
440
         const __vector unsigned int shifts = {s, s, s, s};
441
         return SIMD_4x32(vec_sl(m_simd, shifts));
442
#elif defined(BOTAN_SIMD_USE_NEON)
443
         return SIMD_4x32(vshlq_n_u32(m_simd, SHIFT));
444
#endif
445
         }
Botan::SIMD_4x32 Botan::SIMD_4x32::shl<31>() const
Line
Count
Source
432
13.9k
         {
433
13.9k
         static_assert(SHIFT > 0 && SHIFT <= 31, "Invalid shift count");
434
13.9k
435
13.9k
#if defined(BOTAN_SIMD_USE_SSE2)
436
13.9k
         return SIMD_4x32(_mm_slli_epi32(m_simd, SHIFT));
437
13.9k
438
#elif defined(BOTAN_SIMD_USE_ALTIVEC)
439
         const unsigned int s = static_cast<unsigned int>(SHIFT);
440
         const __vector unsigned int shifts = {s, s, s, s};
441
         return SIMD_4x32(vec_sl(m_simd, shifts));
442
#elif defined(BOTAN_SIMD_USE_NEON)
443
         return SIMD_4x32(vshlq_n_u32(m_simd, SHIFT));
444
#endif
445
         }
Botan::SIMD_4x32 Botan::SIMD_4x32::shl<30>() const
Line
Count
Source
432
13.9k
         {
433
13.9k
         static_assert(SHIFT > 0 && SHIFT <= 31, "Invalid shift count");
434
13.9k
435
13.9k
#if defined(BOTAN_SIMD_USE_SSE2)
436
13.9k
         return SIMD_4x32(_mm_slli_epi32(m_simd, SHIFT));
437
13.9k
438
#elif defined(BOTAN_SIMD_USE_ALTIVEC)
439
         const unsigned int s = static_cast<unsigned int>(SHIFT);
440
         const __vector unsigned int shifts = {s, s, s, s};
441
         return SIMD_4x32(vec_sl(m_simd, shifts));
442
#elif defined(BOTAN_SIMD_USE_NEON)
443
         return SIMD_4x32(vshlq_n_u32(m_simd, SHIFT));
444
#endif
445
         }
Botan::SIMD_4x32 Botan::SIMD_4x32::shl<25>() const
Line
Count
Source
432
13.9k
         {
433
13.9k
         static_assert(SHIFT > 0 && SHIFT <= 31, "Invalid shift count");
434
13.9k
435
13.9k
#if defined(BOTAN_SIMD_USE_SSE2)
436
13.9k
         return SIMD_4x32(_mm_slli_epi32(m_simd, SHIFT));
437
13.9k
438
#elif defined(BOTAN_SIMD_USE_ALTIVEC)
439
         const unsigned int s = static_cast<unsigned int>(SHIFT);
440
         const __vector unsigned int shifts = {s, s, s, s};
441
         return SIMD_4x32(vec_sl(m_simd, shifts));
442
#elif defined(BOTAN_SIMD_USE_NEON)
443
         return SIMD_4x32(vshlq_n_u32(m_simd, SHIFT));
444
#endif
445
         }
446
447
      template<int SHIFT> SIMD_4x32 shr() const
448
69.8k
         {
449
69.8k
#if defined(BOTAN_SIMD_USE_SSE2)
450
69.8k
         return SIMD_4x32(_mm_srli_epi32(m_simd, SHIFT));
451
69.8k
452
#elif defined(BOTAN_SIMD_USE_ALTIVEC)
453
         const unsigned int s = static_cast<unsigned int>(SHIFT);
454
         const __vector unsigned int shifts = {s, s, s, s};
455
         return SIMD_4x32(vec_sr(m_simd, shifts));
456
#elif defined(BOTAN_SIMD_USE_NEON)
457
         return SIMD_4x32(vshrq_n_u32(m_simd, SHIFT));
458
#endif
459
         }
Unexecuted instantiation: Botan::SIMD_4x32 Botan::SIMD_4x32::shr<4>() const
Botan::SIMD_4x32 Botan::SIMD_4x32::shr<31>() const
Line
Count
Source
448
27.9k
         {
449
27.9k
#if defined(BOTAN_SIMD_USE_SSE2)
450
27.9k
         return SIMD_4x32(_mm_srli_epi32(m_simd, SHIFT));
451
27.9k
452
#elif defined(BOTAN_SIMD_USE_ALTIVEC)
453
         const unsigned int s = static_cast<unsigned int>(SHIFT);
454
         const __vector unsigned int shifts = {s, s, s, s};
455
         return SIMD_4x32(vec_sr(m_simd, shifts));
456
#elif defined(BOTAN_SIMD_USE_NEON)
457
         return SIMD_4x32(vshrq_n_u32(m_simd, SHIFT));
458
#endif
459
         }
Botan::SIMD_4x32 Botan::SIMD_4x32::shr<7>() const
Line
Count
Source
448
13.9k
         {
449
13.9k
#if defined(BOTAN_SIMD_USE_SSE2)
450
13.9k
         return SIMD_4x32(_mm_srli_epi32(m_simd, SHIFT));
451
13.9k
452
#elif defined(BOTAN_SIMD_USE_ALTIVEC)
453
         const unsigned int s = static_cast<unsigned int>(SHIFT);
454
         const __vector unsigned int shifts = {s, s, s, s};
455
         return SIMD_4x32(vec_sr(m_simd, shifts));
456
#elif defined(BOTAN_SIMD_USE_NEON)
457
         return SIMD_4x32(vshrq_n_u32(m_simd, SHIFT));
458
#endif
459
         }
Botan::SIMD_4x32 Botan::SIMD_4x32::shr<2>() const
Line
Count
Source
448
13.9k
         {
449
13.9k
#if defined(BOTAN_SIMD_USE_SSE2)
450
13.9k
         return SIMD_4x32(_mm_srli_epi32(m_simd, SHIFT));
451
13.9k
452
#elif defined(BOTAN_SIMD_USE_ALTIVEC)
453
         const unsigned int s = static_cast<unsigned int>(SHIFT);
454
         const __vector unsigned int shifts = {s, s, s, s};
455
         return SIMD_4x32(vec_sr(m_simd, shifts));
456
#elif defined(BOTAN_SIMD_USE_NEON)
457
         return SIMD_4x32(vshrq_n_u32(m_simd, SHIFT));
458
#endif
459
         }
Botan::SIMD_4x32 Botan::SIMD_4x32::shr<1>() const
Line
Count
Source
448
13.9k
         {
449
13.9k
#if defined(BOTAN_SIMD_USE_SSE2)
450
13.9k
         return SIMD_4x32(_mm_srli_epi32(m_simd, SHIFT));
451
13.9k
452
#elif defined(BOTAN_SIMD_USE_ALTIVEC)
453
         const unsigned int s = static_cast<unsigned int>(SHIFT);
454
         const __vector unsigned int shifts = {s, s, s, s};
455
         return SIMD_4x32(vec_sr(m_simd, shifts));
456
#elif defined(BOTAN_SIMD_USE_NEON)
457
         return SIMD_4x32(vshrq_n_u32(m_simd, SHIFT));
458
#endif
459
         }
460
461
      SIMD_4x32 operator~() const
462
0
         {
463
0
#if defined(BOTAN_SIMD_USE_SSE2)
464
0
         return SIMD_4x32(_mm_xor_si128(m_simd, _mm_set1_epi32(0xFFFFFFFF)));
465
#elif defined(BOTAN_SIMD_USE_ALTIVEC)
466
         return SIMD_4x32(vec_nor(m_simd, m_simd));
467
#elif defined(BOTAN_SIMD_USE_NEON)
468
         return SIMD_4x32(vmvnq_u32(m_simd));
469
#endif
470
         }
471
472
      // (~reg) & other
473
      SIMD_4x32 andc(const SIMD_4x32& other) const
474
0
         {
475
0
#if defined(BOTAN_SIMD_USE_SSE2)
476
0
         return SIMD_4x32(_mm_andnot_si128(m_simd, other.m_simd));
477
#elif defined(BOTAN_SIMD_USE_ALTIVEC)
478
         /*
479
         AltiVec does arg1 & ~arg2 rather than SSE's ~arg1 & arg2
480
         so swap the arguments
481
         */
482
         return SIMD_4x32(vec_andc(other.m_simd, m_simd));
483
#elif defined(BOTAN_SIMD_USE_NEON)
484
         // NEON is also a & ~b
485
         return SIMD_4x32(vbicq_u32(other.m_simd, m_simd));
486
#endif
487
         }
488
489
      /**
490
      * Return copy *this with each word byte swapped
491
      */
492
      SIMD_4x32 bswap() const
493
0
         {
494
0
#if defined(BOTAN_SIMD_USE_SSE2)
495
0
496
0
         __m128i T = m_simd;
497
0
         T = _mm_shufflehi_epi16(T, _MM_SHUFFLE(2, 3, 0, 1));
498
0
         T = _mm_shufflelo_epi16(T, _MM_SHUFFLE(2, 3, 0, 1));
499
0
         return SIMD_4x32(_mm_or_si128(_mm_srli_epi16(T, 8), _mm_slli_epi16(T, 8)));
500
0
501
#elif defined(BOTAN_SIMD_USE_ALTIVEC)
502
503
         union {
504
            __vector unsigned int V;
505
            uint32_t R[4];
506
            } vec;
507
508
         vec.V = m_simd;
509
         bswap_4(vec.R);
510
         return SIMD_4x32(vec.R[0], vec.R[1], vec.R[2], vec.R[3]);
511
512
#elif defined(BOTAN_SIMD_USE_NEON)
513
         return SIMD_4x32(vreinterpretq_u32_u8(vrev32q_u8(vreinterpretq_u8_u32(m_simd))));
514
#endif
515
         }
516
517
      template<size_t I>
518
      SIMD_4x32 shift_elems_left() const
519
55.8k
         {
520
55.8k
         static_assert(I <= 3, "Invalid shift count");
521
55.8k
522
55.8k
#if defined(BOTAN_SIMD_USE_SSE2)
523
55.8k
         return SIMD_4x32(_mm_slli_si128(raw(), 4*I));
524
#elif defined(BOTAN_SIMD_USE_NEON)
525
         return SIMD_4x32(vextq_u32(vdupq_n_u32(0), raw(), 4-I));
526
#elif defined(BOTAN_SIMD_USE_ALTIVEC)
527
         const __vector unsigned int zero = vec_splat_u32(0);
528
529
         const __vector unsigned char shuf[3] = {
530
            { 16, 17, 18, 19, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11 },
531
            { 16, 17, 18, 19, 20, 21, 22, 23, 0, 1, 2, 3, 4, 5, 6, 7 },
532
            { 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 0, 1, 2, 3 },
533
         };
534
535
         return SIMD_4x32(vec_perm(raw(), zero, shuf[I-1]));
536
#endif
537
         }
Botan::SIMD_4x32 Botan::SIMD_4x32::shift_elems_left<1ul>() const
Line
Count
Source
519
27.9k
         {
520
27.9k
         static_assert(I <= 3, "Invalid shift count");
521
27.9k
522
27.9k
#if defined(BOTAN_SIMD_USE_SSE2)
523
27.9k
         return SIMD_4x32(_mm_slli_si128(raw(), 4*I));
524
#elif defined(BOTAN_SIMD_USE_NEON)
525
         return SIMD_4x32(vextq_u32(vdupq_n_u32(0), raw(), 4-I));
526
#elif defined(BOTAN_SIMD_USE_ALTIVEC)
527
         const __vector unsigned int zero = vec_splat_u32(0);
528
529
         const __vector unsigned char shuf[3] = {
530
            { 16, 17, 18, 19, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11 },
531
            { 16, 17, 18, 19, 20, 21, 22, 23, 0, 1, 2, 3, 4, 5, 6, 7 },
532
            { 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 0, 1, 2, 3 },
533
         };
534
535
         return SIMD_4x32(vec_perm(raw(), zero, shuf[I-1]));
536
#endif
537
         }
Botan::SIMD_4x32 Botan::SIMD_4x32::shift_elems_left<2ul>() const
Line
Count
Source
519
13.9k
         {
520
13.9k
         static_assert(I <= 3, "Invalid shift count");
521
13.9k
522
13.9k
#if defined(BOTAN_SIMD_USE_SSE2)
523
13.9k
         return SIMD_4x32(_mm_slli_si128(raw(), 4*I));
524
#elif defined(BOTAN_SIMD_USE_NEON)
525
         return SIMD_4x32(vextq_u32(vdupq_n_u32(0), raw(), 4-I));
526
#elif defined(BOTAN_SIMD_USE_ALTIVEC)
527
         const __vector unsigned int zero = vec_splat_u32(0);
528
529
         const __vector unsigned char shuf[3] = {
530
            { 16, 17, 18, 19, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11 },
531
            { 16, 17, 18, 19, 20, 21, 22, 23, 0, 1, 2, 3, 4, 5, 6, 7 },
532
            { 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 0, 1, 2, 3 },
533
         };
534
535
         return SIMD_4x32(vec_perm(raw(), zero, shuf[I-1]));
536
#endif
537
         }
Botan::SIMD_4x32 Botan::SIMD_4x32::shift_elems_left<3ul>() const
Line
Count
Source
519
13.9k
         {
520
13.9k
         static_assert(I <= 3, "Invalid shift count");
521
13.9k
522
13.9k
#if defined(BOTAN_SIMD_USE_SSE2)
523
13.9k
         return SIMD_4x32(_mm_slli_si128(raw(), 4*I));
524
#elif defined(BOTAN_SIMD_USE_NEON)
525
         return SIMD_4x32(vextq_u32(vdupq_n_u32(0), raw(), 4-I));
526
#elif defined(BOTAN_SIMD_USE_ALTIVEC)
527
         const __vector unsigned int zero = vec_splat_u32(0);
528
529
         const __vector unsigned char shuf[3] = {
530
            { 16, 17, 18, 19, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11 },
531
            { 16, 17, 18, 19, 20, 21, 22, 23, 0, 1, 2, 3, 4, 5, 6, 7 },
532
            { 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 0, 1, 2, 3 },
533
         };
534
535
         return SIMD_4x32(vec_perm(raw(), zero, shuf[I-1]));
536
#endif
537
         }
538
539
      template<size_t I>
540
      SIMD_4x32 shift_elems_right() const
541
96.0k
         {
542
96.0k
         static_assert(I <= 3, "Invalid shift count");
543
96.0k
544
96.0k
#if defined(BOTAN_SIMD_USE_SSE2)
545
96.0k
         return SIMD_4x32(_mm_srli_si128(raw(), 4*I));
546
#elif defined(BOTAN_SIMD_USE_NEON)
547
         return SIMD_4x32(vextq_u32(raw(), vdupq_n_u32(0), I));
548
#elif defined(BOTAN_SIMD_USE_ALTIVEC)
549
         const __vector unsigned int zero = vec_splat_u32(0);
550
551
         const __vector unsigned char shuf[3] = {
552
            { 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19 },
553
            { 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23 },
554
            { 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27 },
555
         };
556
557
         return SIMD_4x32(vec_perm(raw(), zero, shuf[I-1]));
558
#endif
559
         }
Botan::SIMD_4x32 Botan::SIMD_4x32::shift_elems_right<2ul>() const
Line
Count
Source
541
68.1k
         {
542
68.1k
         static_assert(I <= 3, "Invalid shift count");
543
68.1k
544
68.1k
#if defined(BOTAN_SIMD_USE_SSE2)
545
68.1k
         return SIMD_4x32(_mm_srli_si128(raw(), 4*I));
546
#elif defined(BOTAN_SIMD_USE_NEON)
547
         return SIMD_4x32(vextq_u32(raw(), vdupq_n_u32(0), I));
548
#elif defined(BOTAN_SIMD_USE_ALTIVEC)
549
         const __vector unsigned int zero = vec_splat_u32(0);
550
551
         const __vector unsigned char shuf[3] = {
552
            { 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19 },
553
            { 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23 },
554
            { 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27 },
555
         };
556
557
         return SIMD_4x32(vec_perm(raw(), zero, shuf[I-1]));
558
#endif
559
         }
Botan::SIMD_4x32 Botan::SIMD_4x32::shift_elems_right<3ul>() const
Line
Count
Source
541
13.9k
         {
542
13.9k
         static_assert(I <= 3, "Invalid shift count");
543
13.9k
544
13.9k
#if defined(BOTAN_SIMD_USE_SSE2)
545
13.9k
         return SIMD_4x32(_mm_srli_si128(raw(), 4*I));
546
#elif defined(BOTAN_SIMD_USE_NEON)
547
         return SIMD_4x32(vextq_u32(raw(), vdupq_n_u32(0), I));
548
#elif defined(BOTAN_SIMD_USE_ALTIVEC)
549
         const __vector unsigned int zero = vec_splat_u32(0);
550
551
         const __vector unsigned char shuf[3] = {
552
            { 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19 },
553
            { 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23 },
554
            { 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27 },
555
         };
556
557
         return SIMD_4x32(vec_perm(raw(), zero, shuf[I-1]));
558
#endif
559
         }
Botan::SIMD_4x32 Botan::SIMD_4x32::shift_elems_right<1ul>() const
Line
Count
Source
541
13.9k
         {
542
13.9k
         static_assert(I <= 3, "Invalid shift count");
543
13.9k
544
13.9k
#if defined(BOTAN_SIMD_USE_SSE2)
545
13.9k
         return SIMD_4x32(_mm_srli_si128(raw(), 4*I));
546
#elif defined(BOTAN_SIMD_USE_NEON)
547
         return SIMD_4x32(vextq_u32(raw(), vdupq_n_u32(0), I));
548
#elif defined(BOTAN_SIMD_USE_ALTIVEC)
549
         const __vector unsigned int zero = vec_splat_u32(0);
550
551
         const __vector unsigned char shuf[3] = {
552
            { 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19 },
553
            { 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23 },
554
            { 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27 },
555
         };
556
557
         return SIMD_4x32(vec_perm(raw(), zero, shuf[I-1]));
558
#endif
559
         }
560
561
      /**
562
      * 4x4 Transposition on SIMD registers
563
      */
564
      static void transpose(SIMD_4x32& B0, SIMD_4x32& B1,
565
                            SIMD_4x32& B2, SIMD_4x32& B3)
566
0
         {
567
0
#if defined(BOTAN_SIMD_USE_SSE2)
568
0
         const __m128i T0 = _mm_unpacklo_epi32(B0.m_simd, B1.m_simd);
569
0
         const __m128i T1 = _mm_unpacklo_epi32(B2.m_simd, B3.m_simd);
570
0
         const __m128i T2 = _mm_unpackhi_epi32(B0.m_simd, B1.m_simd);
571
0
         const __m128i T3 = _mm_unpackhi_epi32(B2.m_simd, B3.m_simd);
572
0
573
0
         B0.m_simd = _mm_unpacklo_epi64(T0, T1);
574
0
         B1.m_simd = _mm_unpackhi_epi64(T0, T1);
575
0
         B2.m_simd = _mm_unpacklo_epi64(T2, T3);
576
0
         B3.m_simd = _mm_unpackhi_epi64(T2, T3);
577
#elif defined(BOTAN_SIMD_USE_ALTIVEC)
578
         const __vector unsigned int T0 = vec_mergeh(B0.m_simd, B2.m_simd);
579
         const __vector unsigned int T1 = vec_mergeh(B1.m_simd, B3.m_simd);
580
         const __vector unsigned int T2 = vec_mergel(B0.m_simd, B2.m_simd);
581
         const __vector unsigned int T3 = vec_mergel(B1.m_simd, B3.m_simd);
582
583
         B0.m_simd = vec_mergeh(T0, T1);
584
         B1.m_simd = vec_mergel(T0, T1);
585
         B2.m_simd = vec_mergeh(T2, T3);
586
         B3.m_simd = vec_mergel(T2, T3);
587
588
#elif defined(BOTAN_SIMD_USE_NEON) && defined(BOTAN_TARGET_ARCH_IS_ARM32)
589
         const uint32x4x2_t T0 = vzipq_u32(B0.m_simd, B2.m_simd);
590
         const uint32x4x2_t T1 = vzipq_u32(B1.m_simd, B3.m_simd);
591
         const uint32x4x2_t O0 = vzipq_u32(T0.val[0], T1.val[0]);
592
         const uint32x4x2_t O1 = vzipq_u32(T0.val[1], T1.val[1]);
593
594
         B0.m_simd = O0.val[0];
595
         B1.m_simd = O0.val[1];
596
         B2.m_simd = O1.val[0];
597
         B3.m_simd = O1.val[1];
598
599
#elif defined(BOTAN_SIMD_USE_NEON) && defined(BOTAN_TARGET_ARCH_IS_ARM64)
600
         const uint32x4_t T0 = vzip1q_u32(B0.m_simd, B2.m_simd);
601
         const uint32x4_t T2 = vzip2q_u32(B0.m_simd, B2.m_simd);
602
         const uint32x4_t T1 = vzip1q_u32(B1.m_simd, B3.m_simd);
603
         const uint32x4_t T3 = vzip2q_u32(B1.m_simd, B3.m_simd);
604
605
         B0.m_simd = vzip1q_u32(T0, T1);
606
         B1.m_simd = vzip2q_u32(T0, T1);
607
         B2.m_simd = vzip1q_u32(T2, T3);
608
         B3.m_simd = vzip2q_u32(T2, T3);
609
#endif
610
         }
611
612
420k
      native_simd_type raw() const BOTAN_FUNC_ISA(BOTAN_SIMD_ISA) { return m_simd; }
613
614
484k
      explicit SIMD_4x32(native_simd_type x) : m_simd(x) {}
615
   private:
616
      native_simd_type m_simd;
617
   };
618
619
}
620
621
#endif