/src/botan/src/lib/pbkdf/argon2/argon2_ssse3/argon2_ssse3.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | /** |
2 | | * (C) 2022 Jack Lloyd |
3 | | * |
4 | | * Botan is released under the Simplified BSD License (see license.txt) |
5 | | */ |
6 | | |
7 | | #include <botan/internal/argon2_ssse3.h> |
8 | | #include <tmmintrin.h> |
9 | | |
10 | | namespace Botan { |
11 | | |
12 | | namespace { |
13 | | |
14 | | class SIMD_2x64 final |
15 | | { |
16 | | public: |
17 | | SIMD_2x64& operator=(const SIMD_2x64& other) = default; |
18 | | SIMD_2x64(const SIMD_2x64& other) = default; |
19 | | |
20 | | SIMD_2x64& operator=(SIMD_2x64&& other) = default; |
21 | | SIMD_2x64(SIMD_2x64&& other) = default; |
22 | | |
23 | | SIMD_2x64() // zero initialized |
24 | 0 | { |
25 | 0 | m_simd = _mm_setzero_si128(); |
26 | 0 | } |
27 | | |
28 | | static SIMD_2x64 load_le(const void* in) |
29 | 0 | { |
30 | 0 | return SIMD_2x64(_mm_loadu_si128(reinterpret_cast<const __m128i*>(in))); |
31 | 0 | } |
32 | | |
33 | | void store_le(uint64_t out[2]) const |
34 | 0 | { |
35 | 0 | this->store_le(reinterpret_cast<uint8_t*>(out)); |
36 | 0 | } |
37 | | |
38 | | void store_le(uint8_t out[]) const |
39 | 0 | { |
40 | 0 | _mm_storeu_si128(reinterpret_cast<__m128i*>(out), m_simd); |
41 | 0 | } |
42 | | |
43 | | SIMD_2x64 operator+(const SIMD_2x64& other) const |
44 | 0 | { |
45 | 0 | SIMD_2x64 retval(*this); |
46 | 0 | retval += other; |
47 | 0 | return retval; |
48 | 0 | } |
49 | | |
50 | | SIMD_2x64 operator^(const SIMD_2x64& other) const |
51 | 0 | { |
52 | 0 | SIMD_2x64 retval(*this); |
53 | 0 | retval ^= other; |
54 | 0 | return retval; |
55 | 0 | } |
56 | | |
57 | | void operator+=(const SIMD_2x64& other) |
58 | 0 | { |
59 | 0 | m_simd = _mm_add_epi64(m_simd, other.m_simd); |
60 | 0 | } |
61 | | |
62 | | void operator^=(const SIMD_2x64& other) |
63 | 0 | { |
64 | 0 | m_simd = _mm_xor_si128(m_simd, other.m_simd); |
65 | 0 | } |
66 | | |
67 | | template<size_t ROT> |
68 | | BOTAN_FUNC_ISA("ssse3") |
69 | | SIMD_2x64 rotr() const |
70 | 0 | { |
71 | 0 | static_assert(ROT > 0 && ROT < 64, "Invalid rotation constant"); |
72 | |
|
73 | 0 | if constexpr(ROT == 16) |
74 | 0 | { |
75 | 0 | auto tab = _mm_setr_epi8(2, 3, 4, 5, 6, 7, 0, 1, 10, 11, 12, 13, 14, 15, 8, 9); |
76 | 0 | return SIMD_2x64(_mm_shuffle_epi8(m_simd, tab)); |
77 | 0 | } |
78 | 0 | else if constexpr(ROT == 24) |
79 | 0 | { |
80 | 0 | auto tab = _mm_setr_epi8(3, 4, 5, 6, 7, 0, 1, 2, 11, 12, 13, 14, 15, 8, 9, 10); |
81 | 0 | return SIMD_2x64(_mm_shuffle_epi8(m_simd, tab)); |
82 | 0 | } |
83 | 0 | else if constexpr(ROT == 32) |
84 | 0 | { |
85 | 0 | auto tab = _mm_setr_epi8(4, 5, 6, 7, 0, 1, 2, 3, 12, 13, 14, 15, 8, 9, 10, 11); |
86 | 0 | return SIMD_2x64(_mm_shuffle_epi8(m_simd, tab)); |
87 | 0 | } |
88 | 0 | else |
89 | 0 | { |
90 | 0 | return SIMD_2x64(_mm_or_si128(_mm_srli_epi64(m_simd, static_cast<int>(ROT)), |
91 | 0 | _mm_slli_epi64(m_simd, static_cast<int>(64-ROT)))); |
92 | 0 | } |
93 | 0 | } Unexecuted instantiation: argon2_ssse3.cpp:Botan::(anonymous namespace)::SIMD_2x64 Botan::(anonymous namespace)::SIMD_2x64::rotr<32ul>() const Unexecuted instantiation: argon2_ssse3.cpp:Botan::(anonymous namespace)::SIMD_2x64 Botan::(anonymous namespace)::SIMD_2x64::rotr<24ul>() const Unexecuted instantiation: argon2_ssse3.cpp:Botan::(anonymous namespace)::SIMD_2x64 Botan::(anonymous namespace)::SIMD_2x64::rotr<16ul>() const Unexecuted instantiation: argon2_ssse3.cpp:Botan::(anonymous namespace)::SIMD_2x64 Botan::(anonymous namespace)::SIMD_2x64::rotr<63ul>() const |
94 | | |
95 | | template<size_t ROT> |
96 | | SIMD_2x64 rotl() const |
97 | | { |
98 | | return this->rotr<64-ROT>(); |
99 | | } |
100 | | |
101 | | // Argon2 specific operation |
102 | | static SIMD_2x64 mul2_32(SIMD_2x64 x, SIMD_2x64 y) |
103 | 0 | { |
104 | 0 | const __m128i m = _mm_mul_epu32(x.m_simd, y.m_simd); |
105 | 0 | return SIMD_2x64(_mm_add_epi64(m, m)); |
106 | 0 | } |
107 | | |
108 | | template<size_t T> |
109 | | BOTAN_FUNC_ISA("ssse3") |
110 | | static SIMD_2x64 alignr(SIMD_2x64 a, SIMD_2x64 b) |
111 | 0 | { |
112 | 0 | static_assert(T > 0 && T < 16, "Invalid alignr constant"); |
113 | 0 | return SIMD_2x64(_mm_alignr_epi8(a.m_simd, b.m_simd, T)); |
114 | 0 | } |
115 | | |
116 | | // Argon2 specific |
117 | | static void twist( |
118 | | SIMD_2x64& B0, |
119 | | SIMD_2x64& B1, |
120 | | SIMD_2x64& C0, |
121 | | SIMD_2x64& C1, |
122 | | SIMD_2x64& D0, |
123 | | SIMD_2x64& D1) |
124 | 0 | { |
125 | 0 | SIMD_2x64 T0, T1; |
126 | |
|
127 | 0 | T0 = SIMD_2x64::alignr<8>(B1, B0); |
128 | 0 | T1 = SIMD_2x64::alignr<8>(B0, B1); |
129 | 0 | B0 = T0; |
130 | 0 | B1 = T1; |
131 | |
|
132 | 0 | T0 = C0; |
133 | 0 | C0 = C1; |
134 | 0 | C1 = T0; |
135 | |
|
136 | 0 | T0 = SIMD_2x64::alignr<8>(D0, D1); |
137 | 0 | T1 = SIMD_2x64::alignr<8>(D1, D0); |
138 | 0 | D0 = T0; |
139 | 0 | D1 = T1; |
140 | 0 | } |
141 | | |
142 | | // Argon2 specific |
143 | | static void untwist( |
144 | | SIMD_2x64& B0, |
145 | | SIMD_2x64& B1, |
146 | | SIMD_2x64& C0, |
147 | | SIMD_2x64& C1, |
148 | | SIMD_2x64& D0, |
149 | | SIMD_2x64& D1) |
150 | 0 | { |
151 | 0 | SIMD_2x64 T0, T1; |
152 | |
|
153 | 0 | T0 = SIMD_2x64::alignr<8>(B0, B1); |
154 | 0 | T1 = SIMD_2x64::alignr<8>(B1, B0); |
155 | 0 | B0 = T0; |
156 | 0 | B1 = T1; |
157 | |
|
158 | 0 | T0 = C0; |
159 | 0 | C0 = C1; |
160 | 0 | C1 = T0; |
161 | |
|
162 | 0 | T0 = SIMD_2x64::alignr<8>(D1, D0); |
163 | 0 | T1 = SIMD_2x64::alignr<8>(D0, D1); |
164 | 0 | D0 = T0; |
165 | 0 | D1 = T1; |
166 | 0 | } |
167 | | |
168 | 0 | explicit SIMD_2x64(__m128i x) : m_simd(x) {} |
169 | | private: |
170 | | __m128i m_simd; |
171 | | }; |
172 | | |
173 | | BOTAN_FORCE_INLINE void blamka_G( |
174 | | SIMD_2x64& A0, |
175 | | SIMD_2x64& A1, |
176 | | SIMD_2x64& B0, |
177 | | SIMD_2x64& B1, |
178 | | SIMD_2x64& C0, |
179 | | SIMD_2x64& C1, |
180 | | SIMD_2x64& D0, |
181 | | SIMD_2x64& D1) |
182 | 0 | { |
183 | 0 | A0 += B0 + SIMD_2x64::mul2_32(A0, B0); |
184 | 0 | A1 += B1 + SIMD_2x64::mul2_32(A1, B1); |
185 | 0 | D0 ^= A0; |
186 | 0 | D1 ^= A1; |
187 | 0 | D0 = D0.rotr<32>(); |
188 | 0 | D1 = D1.rotr<32>(); |
189 | |
|
190 | 0 | C0 += D0 + SIMD_2x64::mul2_32(C0, D0); |
191 | 0 | C1 += D1 + SIMD_2x64::mul2_32(C1, D1); |
192 | 0 | B0 ^= C0; |
193 | 0 | B1 ^= C1; |
194 | 0 | B0 = B0.rotr<24>(); |
195 | 0 | B1 = B1.rotr<24>(); |
196 | |
|
197 | 0 | A0 += B0 + SIMD_2x64::mul2_32(A0, B0); |
198 | 0 | A1 += B1 + SIMD_2x64::mul2_32(A1, B1); |
199 | 0 | D0 ^= A0; |
200 | 0 | D1 ^= A1; |
201 | 0 | D0 = D0.rotr<16>(); |
202 | 0 | D1 = D1.rotr<16>(); |
203 | |
|
204 | 0 | C0 += D0 + SIMD_2x64::mul2_32(C0, D0); |
205 | 0 | C1 += D1 + SIMD_2x64::mul2_32(C1, D1); |
206 | 0 | B0 ^= C0; |
207 | 0 | B1 ^= C1; |
208 | 0 | B0 = B0.rotr<63>(); |
209 | 0 | B1 = B1.rotr<63>(); |
210 | 0 | } |
211 | | |
212 | | BOTAN_FORCE_INLINE void blamka_R( |
213 | | SIMD_2x64& A0, |
214 | | SIMD_2x64& A1, |
215 | | SIMD_2x64& B0, |
216 | | SIMD_2x64& B1, |
217 | | SIMD_2x64& C0, |
218 | | SIMD_2x64& C1, |
219 | | SIMD_2x64& D0, |
220 | | SIMD_2x64& D1) |
221 | 0 | { |
222 | 0 | blamka_G(A0, A1, B0, B1, C0, C1, D0, D1); |
223 | |
|
224 | 0 | SIMD_2x64::twist(B0, B1, C0, C1, D0, D1); |
225 | 0 | blamka_G(A0, A1, B0, B1, C0, C1, D0, D1); |
226 | 0 | SIMD_2x64::untwist(B0, B1, C0, C1, D0, D1); |
227 | 0 | } |
228 | | |
229 | | } |
230 | | |
231 | | void blamka_ssse3(uint64_t T[128]) |
232 | 0 | { |
233 | 0 | for(size_t i = 0; i != 8; ++i) |
234 | 0 | { |
235 | 0 | SIMD_2x64 Tv[8]; |
236 | 0 | for(size_t j = 0; j != 8; ++j) |
237 | 0 | Tv[j] = SIMD_2x64::load_le(&T[2*(8*i+j)]); |
238 | |
|
239 | 0 | blamka_R(Tv[0], Tv[1], Tv[2], Tv[3], |
240 | 0 | Tv[4], Tv[5], Tv[6], Tv[7]); |
241 | |
|
242 | 0 | for(size_t j = 0; j != 8; ++j) |
243 | 0 | Tv[j].store_le(&T[2*(8*i+j)]); |
244 | 0 | } |
245 | |
|
246 | 0 | for(size_t i = 0; i != 8; ++i) |
247 | 0 | { |
248 | 0 | SIMD_2x64 Tv[8]; |
249 | 0 | for(size_t j = 0; j != 8; ++j) |
250 | 0 | Tv[j] = SIMD_2x64::load_le(&T[2*(i+8*j)]); |
251 | |
|
252 | 0 | blamka_R(Tv[0], Tv[1], Tv[2], Tv[3], |
253 | 0 | Tv[4], Tv[5], Tv[6], Tv[7]); |
254 | |
|
255 | 0 | for(size_t j = 0; j != 8; ++j) |
256 | 0 | Tv[j].store_le(&T[2*(i+8*j)]); |
257 | 0 | } |
258 | 0 | } |
259 | | |
260 | | } |