Coverage Report

Created: 2025-04-11 06:34

/src/botan/build/include/internal/botan/internal/simd_2x64.h
Line
Count
Source (jump to first uncovered line)
1
/*
2
* (C) 2022,2025 Jack Lloyd
3
*
4
* Botan is released under the Simplified BSD License (see license.txt)
5
*/
6
7
#ifndef BOTAN_SIMD_2X64_H_
8
#define BOTAN_SIMD_2X64_H_
9
10
#include <botan/compiler.h>
11
#include <botan/types.h>
12
#include <botan/internal/isa_extn.h>
13
#include <botan/internal/target_info.h>
14
15
#if defined(BOTAN_TARGET_CPU_SUPPORTS_SSSE3)
16
   #include <emmintrin.h>
17
   #include <tmmintrin.h>
18
   #define BOTAN_SIMD_USE_SSSE3
19
#endif
20
21
namespace Botan {
22
23
class SIMD_2x64 final {
24
   public:
25
      SIMD_2x64& operator=(const SIMD_2x64& other) = default;
26
      SIMD_2x64(const SIMD_2x64& other) = default;
27
28
      SIMD_2x64& operator=(SIMD_2x64&& other) = default;
29
      SIMD_2x64(SIMD_2x64&& other) = default;
30
31
      ~SIMD_2x64() = default;
32
33
      // zero initialized
34
636k
      SIMD_2x64() { m_simd = _mm_setzero_si128(); }
35
36
3.82M
      static SIMD_2x64 load_le(const void* in) {
37
3.82M
         return SIMD_2x64(_mm_loadu_si128(reinterpret_cast<const __m128i*>(in)));
38
3.82M
      }
39
40
636k
      static SIMD_2x64 load_be(const void* in) { return SIMD_2x64::load_le(in).bswap(); }
41
42
636k
      SIMD_2x64 BOTAN_FN_ISA_SIMD_2X64 bswap() const {
43
636k
         const auto idx = _mm_set_epi8(8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7);
44
636k
         return SIMD_2x64(_mm_shuffle_epi8(m_simd, idx));
45
636k
      }
46
47
3.18M
      void store_le(uint64_t out[2]) const { this->store_le(reinterpret_cast<uint8_t*>(out)); }
48
49
3.18M
      void store_le(uint8_t out[]) const { _mm_storeu_si128(reinterpret_cast<__m128i*>(out), m_simd); }
50
51
10.8M
      SIMD_2x64 operator+(const SIMD_2x64& other) const {
52
10.8M
         SIMD_2x64 retval(*this);
53
10.8M
         retval += other;
54
10.8M
         return retval;
55
10.8M
      }
56
57
10.1M
      SIMD_2x64 operator^(const SIMD_2x64& other) const {
58
10.1M
         SIMD_2x64 retval(*this);
59
10.1M
         retval ^= other;
60
10.1M
         return retval;
61
10.1M
      }
62
63
10.8M
      void operator+=(const SIMD_2x64& other) { m_simd = _mm_add_epi64(m_simd, other.m_simd); }
64
65
10.1M
      void operator^=(const SIMD_2x64& other) { m_simd = _mm_xor_si128(m_simd, other.m_simd); }
66
67
      template <size_t ROT>
68
      BOTAN_FN_ISA_SIMD_2X64 SIMD_2x64 rotr() const
69
         requires(ROT > 0 && ROT < 64)
70
10.1M
      {
71
10.1M
         if constexpr(ROT == 8) {
72
2.54M
            auto tab = _mm_setr_epi8(1, 2, 3, 4, 5, 6, 7, 0, 9, 10, 11, 12, 13, 14, 15, 8);
73
2.54M
            return SIMD_2x64(_mm_shuffle_epi8(m_simd, tab));
74
2.54M
         } else if constexpr(ROT == 16) {
75
0
            auto tab = _mm_setr_epi8(2, 3, 4, 5, 6, 7, 0, 1, 10, 11, 12, 13, 14, 15, 8, 9);
76
0
            return SIMD_2x64(_mm_shuffle_epi8(m_simd, tab));
77
0
         } else if constexpr(ROT == 24) {
78
0
            auto tab = _mm_setr_epi8(3, 4, 5, 6, 7, 0, 1, 2, 11, 12, 13, 14, 15, 8, 9, 10);
79
0
            return SIMD_2x64(_mm_shuffle_epi8(m_simd, tab));
80
0
         } else if constexpr(ROT == 32) {
81
0
            auto tab = _mm_setr_epi8(4, 5, 6, 7, 0, 1, 2, 3, 12, 13, 14, 15, 8, 9, 10, 11);
82
0
            return SIMD_2x64(_mm_shuffle_epi8(m_simd, tab));
83
7.64M
         } else {
84
7.64M
            return SIMD_2x64(_mm_or_si128(_mm_srli_epi64(m_simd, static_cast<int>(ROT)),
85
7.64M
                                          _mm_slli_epi64(m_simd, static_cast<int>(64 - ROT))));
86
7.64M
         }
87
10.1M
      }
_ZNK5Botan9SIMD_2x644rotrILm1EEES0_vQaagtT_Li0EltT_Li64E
Line
Count
Source
70
2.54M
      {
71
         if constexpr(ROT == 8) {
72
            auto tab = _mm_setr_epi8(1, 2, 3, 4, 5, 6, 7, 0, 9, 10, 11, 12, 13, 14, 15, 8);
73
            return SIMD_2x64(_mm_shuffle_epi8(m_simd, tab));
74
         } else if constexpr(ROT == 16) {
75
            auto tab = _mm_setr_epi8(2, 3, 4, 5, 6, 7, 0, 1, 10, 11, 12, 13, 14, 15, 8, 9);
76
            return SIMD_2x64(_mm_shuffle_epi8(m_simd, tab));
77
         } else if constexpr(ROT == 24) {
78
            auto tab = _mm_setr_epi8(3, 4, 5, 6, 7, 0, 1, 2, 11, 12, 13, 14, 15, 8, 9, 10);
79
            return SIMD_2x64(_mm_shuffle_epi8(m_simd, tab));
80
         } else if constexpr(ROT == 32) {
81
            auto tab = _mm_setr_epi8(4, 5, 6, 7, 0, 1, 2, 3, 12, 13, 14, 15, 8, 9, 10, 11);
82
            return SIMD_2x64(_mm_shuffle_epi8(m_simd, tab));
83
2.54M
         } else {
84
2.54M
            return SIMD_2x64(_mm_or_si128(_mm_srli_epi64(m_simd, static_cast<int>(ROT)),
85
2.54M
                                          _mm_slli_epi64(m_simd, static_cast<int>(64 - ROT))));
86
2.54M
         }
87
2.54M
      }
_ZNK5Botan9SIMD_2x644rotrILm8EEES0_vQaagtT_Li0EltT_Li64E
Line
Count
Source
70
2.54M
      {
71
2.54M
         if constexpr(ROT == 8) {
72
2.54M
            auto tab = _mm_setr_epi8(1, 2, 3, 4, 5, 6, 7, 0, 9, 10, 11, 12, 13, 14, 15, 8);
73
2.54M
            return SIMD_2x64(_mm_shuffle_epi8(m_simd, tab));
74
         } else if constexpr(ROT == 16) {
75
            auto tab = _mm_setr_epi8(2, 3, 4, 5, 6, 7, 0, 1, 10, 11, 12, 13, 14, 15, 8, 9);
76
            return SIMD_2x64(_mm_shuffle_epi8(m_simd, tab));
77
         } else if constexpr(ROT == 24) {
78
            auto tab = _mm_setr_epi8(3, 4, 5, 6, 7, 0, 1, 2, 11, 12, 13, 14, 15, 8, 9, 10);
79
            return SIMD_2x64(_mm_shuffle_epi8(m_simd, tab));
80
         } else if constexpr(ROT == 32) {
81
            auto tab = _mm_setr_epi8(4, 5, 6, 7, 0, 1, 2, 3, 12, 13, 14, 15, 8, 9, 10, 11);
82
            return SIMD_2x64(_mm_shuffle_epi8(m_simd, tab));
83
         } else {
84
            return SIMD_2x64(_mm_or_si128(_mm_srli_epi64(m_simd, static_cast<int>(ROT)),
85
                                          _mm_slli_epi64(m_simd, static_cast<int>(64 - ROT))));
86
         }
87
2.54M
      }
_ZNK5Botan9SIMD_2x644rotrILm19EEES0_vQaagtT_Li0EltT_Li64E
Line
Count
Source
70
2.54M
      {
71
         if constexpr(ROT == 8) {
72
            auto tab = _mm_setr_epi8(1, 2, 3, 4, 5, 6, 7, 0, 9, 10, 11, 12, 13, 14, 15, 8);
73
            return SIMD_2x64(_mm_shuffle_epi8(m_simd, tab));
74
         } else if constexpr(ROT == 16) {
75
            auto tab = _mm_setr_epi8(2, 3, 4, 5, 6, 7, 0, 1, 10, 11, 12, 13, 14, 15, 8, 9);
76
            return SIMD_2x64(_mm_shuffle_epi8(m_simd, tab));
77
         } else if constexpr(ROT == 24) {
78
            auto tab = _mm_setr_epi8(3, 4, 5, 6, 7, 0, 1, 2, 11, 12, 13, 14, 15, 8, 9, 10);
79
            return SIMD_2x64(_mm_shuffle_epi8(m_simd, tab));
80
         } else if constexpr(ROT == 32) {
81
            auto tab = _mm_setr_epi8(4, 5, 6, 7, 0, 1, 2, 3, 12, 13, 14, 15, 8, 9, 10, 11);
82
            return SIMD_2x64(_mm_shuffle_epi8(m_simd, tab));
83
2.54M
         } else {
84
2.54M
            return SIMD_2x64(_mm_or_si128(_mm_srli_epi64(m_simd, static_cast<int>(ROT)),
85
2.54M
                                          _mm_slli_epi64(m_simd, static_cast<int>(64 - ROT))));
86
2.54M
         }
87
2.54M
      }
_ZNK5Botan9SIMD_2x644rotrILm61EEES0_vQaagtT_Li0EltT_Li64E
Line
Count
Source
70
2.54M
      {
71
         if constexpr(ROT == 8) {
72
            auto tab = _mm_setr_epi8(1, 2, 3, 4, 5, 6, 7, 0, 9, 10, 11, 12, 13, 14, 15, 8);
73
            return SIMD_2x64(_mm_shuffle_epi8(m_simd, tab));
74
         } else if constexpr(ROT == 16) {
75
            auto tab = _mm_setr_epi8(2, 3, 4, 5, 6, 7, 0, 1, 10, 11, 12, 13, 14, 15, 8, 9);
76
            return SIMD_2x64(_mm_shuffle_epi8(m_simd, tab));
77
         } else if constexpr(ROT == 24) {
78
            auto tab = _mm_setr_epi8(3, 4, 5, 6, 7, 0, 1, 2, 11, 12, 13, 14, 15, 8, 9, 10);
79
            return SIMD_2x64(_mm_shuffle_epi8(m_simd, tab));
80
         } else if constexpr(ROT == 32) {
81
            auto tab = _mm_setr_epi8(4, 5, 6, 7, 0, 1, 2, 3, 12, 13, 14, 15, 8, 9, 10, 11);
82
            return SIMD_2x64(_mm_shuffle_epi8(m_simd, tab));
83
2.54M
         } else {
84
2.54M
            return SIMD_2x64(_mm_or_si128(_mm_srli_epi64(m_simd, static_cast<int>(ROT)),
85
2.54M
                                          _mm_slli_epi64(m_simd, static_cast<int>(64 - ROT))));
86
2.54M
         }
87
2.54M
      }
Unexecuted instantiation: _ZNK5Botan9SIMD_2x644rotrILm32EEES0_vQaagtT_Li0EltT_Li64E
Unexecuted instantiation: _ZNK5Botan9SIMD_2x644rotrILm24EEES0_vQaagtT_Li0EltT_Li64E
Unexecuted instantiation: _ZNK5Botan9SIMD_2x644rotrILm16EEES0_vQaagtT_Li0EltT_Li64E
Unexecuted instantiation: _ZNK5Botan9SIMD_2x644rotrILm63EEES0_vQaagtT_Li0EltT_Li64E
88
89
      template <size_t ROT>
90
      SIMD_2x64 rotl() const {
91
         return this->rotr<64 - ROT>();
92
      }
93
94
      template <int SHIFT>
95
5.09M
      SIMD_2x64 shr() const noexcept {
96
5.09M
         return SIMD_2x64(_mm_srli_epi64(m_simd, SHIFT));
97
5.09M
      }
Botan::SIMD_2x64 Botan::SIMD_2x64::shr<7>() const
Line
Count
Source
95
2.54M
      SIMD_2x64 shr() const noexcept {
96
2.54M
         return SIMD_2x64(_mm_srli_epi64(m_simd, SHIFT));
97
2.54M
      }
Botan::SIMD_2x64 Botan::SIMD_2x64::shr<6>() const
Line
Count
Source
95
2.54M
      SIMD_2x64 shr() const noexcept {
96
2.54M
         return SIMD_2x64(_mm_srli_epi64(m_simd, SHIFT));
97
2.54M
      }
98
99
5.09M
      static SIMD_2x64 BOTAN_FN_ISA_SIMD_2X64 alignr8(const SIMD_2x64& a, const SIMD_2x64& b) {
100
5.09M
         return SIMD_2x64(_mm_alignr_epi8(a.m_simd, b.m_simd, 8));
101
5.09M
      }
102
103
      // Argon2 specific operation
104
0
      static void twist(SIMD_2x64& B0, SIMD_2x64& B1, SIMD_2x64& C0, SIMD_2x64& C1, SIMD_2x64& D0, SIMD_2x64& D1) {
105
0
         SIMD_2x64 T0, T1;
106
0
107
0
         T0 = SIMD_2x64::alignr8(B1, B0);
108
0
         T1 = SIMD_2x64::alignr8(B0, B1);
109
0
         B0 = T0;
110
0
         B1 = T1;
111
0
112
0
         T0 = C0;
113
0
         C0 = C1;
114
0
         C1 = T0;
115
0
116
0
         T0 = SIMD_2x64::alignr8(D0, D1);
117
0
         T1 = SIMD_2x64::alignr8(D1, D0);
118
0
         D0 = T0;
119
0
         D1 = T1;
120
0
      }
121
122
      // Argon2 specific operation
123
0
      static void untwist(SIMD_2x64& B0, SIMD_2x64& B1, SIMD_2x64& C0, SIMD_2x64& C1, SIMD_2x64& D0, SIMD_2x64& D1) {
124
0
         SIMD_2x64 T0, T1;
125
0
126
0
         T0 = SIMD_2x64::alignr8(B0, B1);
127
0
         T1 = SIMD_2x64::alignr8(B1, B0);
128
0
         B0 = T0;
129
0
         B1 = T1;
130
0
131
0
         T0 = C0;
132
0
         C0 = C1;
133
0
         C1 = T0;
134
0
135
0
         T0 = SIMD_2x64::alignr8(D1, D0);
136
0
         T1 = SIMD_2x64::alignr8(D0, D1);
137
0
         D0 = T0;
138
0
         D1 = T1;
139
0
      }
140
141
      // Argon2 specific operation
142
0
      static SIMD_2x64 mul2_32(SIMD_2x64 x, SIMD_2x64 y) {
143
0
         const __m128i m = _mm_mul_epu32(x.m_simd, y.m_simd);
144
0
         return SIMD_2x64(_mm_add_epi64(m, m));
145
0
      }
146
147
24.8M
      explicit SIMD_2x64(__m128i x) : m_simd(x) {}
148
149
   private:
150
      __m128i m_simd;
151
};
152
153
}  // namespace Botan
154
155
#endif