/src/botan/src/lib/pbkdf/argon2/argon2.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | /** |
2 | | * (C) 2018,2019,2022 Jack Lloyd |
3 | | * |
4 | | * Botan is released under the Simplified BSD License (see license.txt) |
5 | | */ |
6 | | |
7 | | #include <botan/argon2.h> |
8 | | |
9 | | #include <botan/exceptn.h> |
10 | | #include <botan/hash.h> |
11 | | #include <botan/mem_ops.h> |
12 | | #include <botan/internal/fmt.h> |
13 | | #include <botan/internal/loadstor.h> |
14 | | #include <botan/internal/rotate.h> |
15 | | #include <limits> |
16 | | |
17 | | #if defined(BOTAN_HAS_THREAD_UTILS) |
18 | | #include <botan/internal/thread_pool.h> |
19 | | #endif |
20 | | |
21 | | #if defined(BOTAN_HAS_ARGON2_AVX2) || defined(BOTAN_HAS_ARGON2_SSSE3) |
22 | | #include <botan/internal/cpuid.h> |
23 | | #endif |
24 | | |
25 | | namespace Botan { |
26 | | |
27 | | namespace { |
28 | | |
29 | | const size_t SYNC_POINTS = 4; |
30 | | |
31 | | void argon2_H0(uint8_t H0[64], |
32 | | HashFunction& blake2b, |
33 | | size_t output_len, |
34 | | const char* password, |
35 | | size_t password_len, |
36 | | const uint8_t salt[], |
37 | | size_t salt_len, |
38 | | const uint8_t key[], |
39 | | size_t key_len, |
40 | | const uint8_t ad[], |
41 | | size_t ad_len, |
42 | | size_t y, |
43 | | size_t p, |
44 | | size_t M, |
45 | 268 | size_t t) { |
46 | 268 | const uint8_t v = 19; // Argon2 version code |
47 | | |
48 | 268 | blake2b.update_le(static_cast<uint32_t>(p)); |
49 | 268 | blake2b.update_le(static_cast<uint32_t>(output_len)); |
50 | 268 | blake2b.update_le(static_cast<uint32_t>(M)); |
51 | 268 | blake2b.update_le(static_cast<uint32_t>(t)); |
52 | 268 | blake2b.update_le(static_cast<uint32_t>(v)); |
53 | 268 | blake2b.update_le(static_cast<uint32_t>(y)); |
54 | | |
55 | 268 | blake2b.update_le(static_cast<uint32_t>(password_len)); |
56 | 268 | blake2b.update(cast_char_ptr_to_uint8(password), password_len); |
57 | | |
58 | 268 | blake2b.update_le(static_cast<uint32_t>(salt_len)); |
59 | 268 | blake2b.update(salt, salt_len); |
60 | | |
61 | 268 | blake2b.update_le(static_cast<uint32_t>(key_len)); |
62 | 268 | blake2b.update(key, key_len); |
63 | | |
64 | 268 | blake2b.update_le(static_cast<uint32_t>(ad_len)); |
65 | 268 | blake2b.update(ad, ad_len); |
66 | | |
67 | 268 | blake2b.final(H0); |
68 | 268 | } |
69 | | |
70 | 268 | void extract_key(uint8_t output[], size_t output_len, const secure_vector<uint64_t>& B, size_t memory, size_t threads) { |
71 | 268 | const size_t lanes = memory / threads; |
72 | | |
73 | 268 | uint64_t sum[128] = {0}; |
74 | | |
75 | 21.7k | for(size_t lane = 0; lane != threads; ++lane) { |
76 | 21.4k | const size_t start = 128 * (lane * lanes + lanes - 1); |
77 | 21.4k | const size_t end = 128 * (lane * lanes + lanes); |
78 | | |
79 | 2.76M | for(size_t j = start; j != end; ++j) { |
80 | 2.74M | sum[j % 128] ^= B[j]; |
81 | 2.74M | } |
82 | 21.4k | } |
83 | | |
84 | 268 | if(output_len <= 64) { |
85 | 50 | auto blake2b = HashFunction::create_or_throw(fmt("BLAKE2b({})", output_len * 8)); |
86 | 50 | blake2b->update_le(static_cast<uint32_t>(output_len)); |
87 | 6.45k | for(size_t i = 0; i != 128; ++i) { |
88 | 6.40k | blake2b->update_le(sum[i]); |
89 | 6.40k | } |
90 | 50 | blake2b->final(output); |
91 | 218 | } else { |
92 | 218 | secure_vector<uint8_t> T(64); |
93 | | |
94 | 218 | auto blake2b = HashFunction::create_or_throw("BLAKE2b(512)"); |
95 | 218 | blake2b->update_le(static_cast<uint32_t>(output_len)); |
96 | 28.1k | for(size_t i = 0; i != 128; ++i) { |
97 | 27.9k | blake2b->update_le(sum[i]); |
98 | 27.9k | } |
99 | 218 | blake2b->final(&T[0]); |
100 | | |
101 | 3.42k | while(output_len > 64) { |
102 | 3.20k | copy_mem(output, &T[0], 32); |
103 | 3.20k | output_len -= 32; |
104 | 3.20k | output += 32; |
105 | | |
106 | 3.20k | if(output_len > 64) { |
107 | 2.98k | blake2b->update(T); |
108 | 2.98k | blake2b->final(&T[0]); |
109 | 2.98k | } |
110 | 3.20k | } |
111 | | |
112 | 218 | if(output_len == 64) { |
113 | 19 | blake2b->update(T); |
114 | 19 | blake2b->final(output); |
115 | 199 | } else { |
116 | 199 | auto blake2b_f = HashFunction::create_or_throw(fmt("BLAKE2b({})", output_len * 8)); |
117 | 199 | blake2b_f->update(T); |
118 | 199 | blake2b_f->final(output); |
119 | 199 | } |
120 | 218 | } |
121 | 268 | } |
122 | | |
123 | | void init_blocks( |
124 | 268 | secure_vector<uint64_t>& B, HashFunction& blake2b, const uint8_t H0[64], size_t memory, size_t threads) { |
125 | 268 | BOTAN_ASSERT_NOMSG(B.size() >= threads * 256); |
126 | | |
127 | 21.7k | for(size_t i = 0; i != threads; ++i) { |
128 | 21.4k | const size_t B_off = i * (memory / threads); |
129 | | |
130 | 21.4k | BOTAN_ASSERT_NOMSG(B.size() >= 128 * (B_off + 2)); |
131 | | |
132 | 64.3k | for(size_t j = 0; j != 2; ++j) { |
133 | 42.9k | uint8_t T[64] = {0}; |
134 | | |
135 | 42.9k | blake2b.update_le(static_cast<uint32_t>(1024)); |
136 | 42.9k | blake2b.update(H0, 64); |
137 | 42.9k | blake2b.update_le(static_cast<uint32_t>(j)); |
138 | 42.9k | blake2b.update_le(static_cast<uint32_t>(i)); |
139 | 42.9k | blake2b.final(T); |
140 | | |
141 | 1.32M | for(size_t k = 0; k != 30; ++k) { |
142 | 1.28M | load_le(&B[128 * (B_off + j) + 4 * k], T, 32 / 8); |
143 | 1.28M | blake2b.update(T, 64); |
144 | 1.28M | blake2b.final(T); |
145 | 1.28M | } |
146 | | |
147 | 42.9k | load_le(&B[128 * (B_off + j) + 4 * 30], T, 64 / 8); |
148 | 42.9k | } |
149 | 21.4k | } |
150 | 268 | } |
151 | | |
152 | 0 | BOTAN_FORCE_INLINE void blamka_G(uint64_t& A, uint64_t& B, uint64_t& C, uint64_t& D) { |
153 | 0 | A += B + (static_cast<uint64_t>(2) * static_cast<uint32_t>(A)) * static_cast<uint32_t>(B); |
154 | 0 | D = rotr<32>(A ^ D); |
155 | |
|
156 | 0 | C += D + (static_cast<uint64_t>(2) * static_cast<uint32_t>(C)) * static_cast<uint32_t>(D); |
157 | 0 | B = rotr<24>(B ^ C); |
158 | |
|
159 | 0 | A += B + (static_cast<uint64_t>(2) * static_cast<uint32_t>(A)) * static_cast<uint32_t>(B); |
160 | 0 | D = rotr<16>(A ^ D); |
161 | |
|
162 | 0 | C += D + (static_cast<uint64_t>(2) * static_cast<uint32_t>(C)) * static_cast<uint32_t>(D); |
163 | 0 | B = rotr<63>(B ^ C); |
164 | 0 | } |
165 | | |
166 | | } // namespace |
167 | | |
168 | 8.22M | void Argon2::blamka(uint64_t N[128], uint64_t T[128]) { |
169 | 8.22M | #if defined(BOTAN_HAS_ARGON2_AVX2) |
170 | 8.22M | if(CPUID::has_avx2()) { |
171 | 8.22M | return Argon2::blamka_avx2(N, T); |
172 | 8.22M | } |
173 | 683 | #endif |
174 | | |
175 | 683 | #if defined(BOTAN_HAS_ARGON2_SSSE3) |
176 | 683 | if(CPUID::has_ssse3()) { |
177 | 0 | return Argon2::blamka_ssse3(N, T); |
178 | 0 | } |
179 | 683 | #endif |
180 | | |
181 | 683 | copy_mem(T, N, 128); |
182 | | |
183 | 683 | for(size_t i = 0; i != 128; i += 16) { |
184 | 0 | blamka_G(T[i + 0], T[i + 4], T[i + 8], T[i + 12]); |
185 | 0 | blamka_G(T[i + 1], T[i + 5], T[i + 9], T[i + 13]); |
186 | 0 | blamka_G(T[i + 2], T[i + 6], T[i + 10], T[i + 14]); |
187 | 0 | blamka_G(T[i + 3], T[i + 7], T[i + 11], T[i + 15]); |
188 | |
|
189 | 0 | blamka_G(T[i + 0], T[i + 5], T[i + 10], T[i + 15]); |
190 | 0 | blamka_G(T[i + 1], T[i + 6], T[i + 11], T[i + 12]); |
191 | 0 | blamka_G(T[i + 2], T[i + 7], T[i + 8], T[i + 13]); |
192 | 0 | blamka_G(T[i + 3], T[i + 4], T[i + 9], T[i + 14]); |
193 | 0 | } |
194 | | |
195 | 683 | for(size_t i = 0; i != 128 / 8; i += 2) { |
196 | 0 | blamka_G(T[i + 0], T[i + 32], T[i + 64], T[i + 96]); |
197 | 0 | blamka_G(T[i + 1], T[i + 33], T[i + 65], T[i + 97]); |
198 | 0 | blamka_G(T[i + 16], T[i + 48], T[i + 80], T[i + 112]); |
199 | 0 | blamka_G(T[i + 17], T[i + 49], T[i + 81], T[i + 113]); |
200 | |
|
201 | 0 | blamka_G(T[i + 0], T[i + 33], T[i + 80], T[i + 113]); |
202 | 0 | blamka_G(T[i + 1], T[i + 48], T[i + 81], T[i + 96]); |
203 | 0 | blamka_G(T[i + 16], T[i + 49], T[i + 64], T[i + 97]); |
204 | 0 | blamka_G(T[i + 17], T[i + 32], T[i + 65], T[i + 112]); |
205 | 0 | } |
206 | | |
207 | 683 | for(size_t i = 0; i != 128; ++i) { |
208 | 0 | N[i] ^= T[i]; |
209 | 0 | } |
210 | 683 | } |
211 | | |
212 | | namespace { |
213 | | |
214 | | void gen_2i_addresses(uint64_t T[128], |
215 | | uint64_t B[128], |
216 | | size_t n, |
217 | | size_t lane, |
218 | | size_t slice, |
219 | | size_t memory, |
220 | | size_t time, |
221 | | size_t mode, |
222 | 41.3k | size_t cnt) { |
223 | 41.3k | clear_mem(B, 128); |
224 | | |
225 | 41.3k | B[0] = n; |
226 | 41.3k | B[1] = lane; |
227 | 41.3k | B[2] = slice; |
228 | 41.3k | B[3] = memory; |
229 | 41.3k | B[4] = time; |
230 | 41.3k | B[5] = mode; |
231 | 41.3k | B[6] = cnt; |
232 | | |
233 | 123k | for(size_t r = 0; r != 2; ++r) { |
234 | 82.2k | Argon2::blamka(B, T); |
235 | 82.2k | } |
236 | 41.3k | } |
237 | | |
238 | | uint32_t index_alpha( |
239 | 8.29M | uint64_t random, size_t lanes, size_t segments, size_t threads, size_t n, size_t slice, size_t lane, size_t index) { |
240 | 8.29M | size_t ref_lane = static_cast<uint32_t>(random >> 32) % threads; |
241 | | |
242 | 8.29M | if(n == 0 && slice == 0) { |
243 | 1.30M | ref_lane = lane; |
244 | 1.30M | } |
245 | | |
246 | 8.29M | size_t m = 3 * segments; |
247 | 8.29M | size_t s = ((slice + 1) % 4) * segments; |
248 | | |
249 | 8.29M | if(lane == ref_lane) { |
250 | 1.70M | m += index; |
251 | 1.70M | } |
252 | | |
253 | 8.29M | if(n == 0) { |
254 | 5.35M | m = slice * segments; |
255 | 5.35M | s = 0; |
256 | 5.35M | if(slice == 0 || lane == ref_lane) { |
257 | 1.51M | m += index; |
258 | 1.51M | } |
259 | 5.35M | } |
260 | | |
261 | 8.29M | if(index == 0 || lane == ref_lane) { |
262 | 1.79M | m -= 1; |
263 | 1.79M | } |
264 | | |
265 | 8.29M | uint64_t p = static_cast<uint32_t>(random); |
266 | 8.29M | p = (p * p) >> 32; |
267 | 8.29M | p = (p * m) >> 32; |
268 | | |
269 | 8.29M | return static_cast<uint32_t>(ref_lane * lanes + (s + m - (p + 1)) % lanes); |
270 | 8.29M | } |
271 | | |
272 | | void process_block(secure_vector<uint64_t>& B, |
273 | | size_t n, |
274 | | size_t slice, |
275 | | size_t lane, |
276 | | size_t lanes, |
277 | | size_t segments, |
278 | | size_t threads, |
279 | | uint8_t mode, |
280 | | size_t memory, |
281 | 118k | size_t time) { |
282 | 118k | uint64_t T[128]; |
283 | 118k | size_t index = 0; |
284 | 118k | if(n == 0 && slice == 0) { |
285 | 21.4k | index = 2; |
286 | 21.4k | } |
287 | | |
288 | 118k | const bool use_2i = mode == 1 || (mode == 2 && n == 0 && slice < SYNC_POINTS / 2); |
289 | | |
290 | 118k | uint64_t addresses[128]; |
291 | 118k | size_t address_counter = 1; |
292 | | |
293 | 118k | if(use_2i) { |
294 | 30.6k | gen_2i_addresses(T, addresses, n, lane, slice, memory, time, mode, address_counter); |
295 | 30.6k | } |
296 | | |
297 | 8.41M | while(index < segments) { |
298 | 8.29M | const size_t offset = lane * lanes + slice * segments + index; |
299 | | |
300 | 8.29M | size_t prev = offset - 1; |
301 | 8.29M | if(index == 0 && slice == 0) { |
302 | 8.21k | prev += lanes; |
303 | 8.21k | } |
304 | | |
305 | 8.29M | if(use_2i && index > 0 && index % 128 == 0) { |
306 | 10.6k | address_counter += 1; |
307 | 10.6k | gen_2i_addresses(T, addresses, n, lane, slice, memory, time, mode, address_counter); |
308 | 10.6k | } |
309 | | |
310 | 8.29M | const uint64_t random = use_2i ? addresses[index % 128] : B.at(128 * prev); |
311 | 8.29M | const size_t new_offset = index_alpha(random, lanes, segments, threads, n, slice, lane, index); |
312 | | |
313 | 8.29M | uint64_t N[128]; |
314 | 232M | for(size_t i = 0; i != 128; ++i) { |
315 | 223M | N[i] = B[128 * prev + i] ^ B[128 * new_offset + i]; |
316 | 223M | } |
317 | | |
318 | 8.29M | Argon2::blamka(N, T); |
319 | | |
320 | 278M | for(size_t i = 0; i != 128; ++i) { |
321 | 270M | B[128 * offset + i] ^= N[i]; |
322 | 270M | } |
323 | | |
324 | 8.29M | index += 1; |
325 | 8.29M | } |
326 | 118k | } |
327 | | |
328 | 268 | void process_blocks(secure_vector<uint64_t>& B, size_t t, size_t memory, size_t threads, uint8_t mode) { |
329 | 268 | const size_t lanes = memory / threads; |
330 | 268 | const size_t segments = lanes / SYNC_POINTS; |
331 | | |
332 | 268 | #if defined(BOTAN_HAS_THREAD_UTILS) |
333 | 268 | auto& thread_pool = Thread_Pool::global_instance(); |
334 | 268 | #endif |
335 | | |
336 | 640 | for(size_t n = 0; n != t; ++n) { |
337 | 1.86k | for(size_t slice = 0; slice != SYNC_POINTS; ++slice) { |
338 | 1.48k | #if defined(BOTAN_HAS_THREAD_UTILS) |
339 | 1.48k | if(threads > 1) { |
340 | 1.45k | std::vector<std::future<void>> fut_results; |
341 | 1.45k | fut_results.reserve(threads); |
342 | | |
343 | 120k | for(size_t lane = 0; lane != threads; ++lane) { |
344 | 118k | fut_results.push_back(thread_pool.run( |
345 | 118k | process_block, std::ref(B), n, slice, lane, lanes, segments, threads, mode, memory, t)); |
346 | 118k | } |
347 | | |
348 | 118k | for(auto& fut : fut_results) { |
349 | 118k | fut.get(); |
350 | 118k | } |
351 | | |
352 | 1.45k | continue; |
353 | 1.45k | } |
354 | 32 | #endif |
355 | | |
356 | 64 | for(size_t lane = 0; lane != threads; ++lane) { |
357 | 32 | process_block(B, n, slice, lane, lanes, segments, threads, mode, memory, t); |
358 | 32 | } |
359 | 32 | } |
360 | 372 | } |
361 | 268 | } |
362 | | |
363 | | } // namespace |
364 | | |
365 | | void Argon2::argon2(uint8_t output[], |
366 | | size_t output_len, |
367 | | const char* password, |
368 | | size_t password_len, |
369 | | const uint8_t salt[], |
370 | | size_t salt_len, |
371 | | const uint8_t key[], |
372 | | size_t key_len, |
373 | | const uint8_t ad[], |
374 | 275 | size_t ad_len) const { |
375 | 275 | BOTAN_ARG_CHECK(output_len >= 4 && output_len <= std::numeric_limits<uint32_t>::max(), |
376 | 275 | "Invalid Argon2 output length"); |
377 | 275 | BOTAN_ARG_CHECK(password_len <= std::numeric_limits<uint32_t>::max(), "Invalid Argon2 password length"); |
378 | 275 | BOTAN_ARG_CHECK(salt_len <= std::numeric_limits<uint32_t>::max(), "Invalid Argon2 salt length"); |
379 | 275 | BOTAN_ARG_CHECK(key_len <= std::numeric_limits<uint32_t>::max(), "Invalid Argon2 key length"); |
380 | 275 | BOTAN_ARG_CHECK(ad_len <= std::numeric_limits<uint32_t>::max(), "Invalid Argon2 ad length"); |
381 | | |
382 | 275 | auto blake2 = HashFunction::create_or_throw("BLAKE2b"); |
383 | | |
384 | 275 | uint8_t H0[64] = {0}; |
385 | 275 | argon2_H0(H0, |
386 | 275 | *blake2, |
387 | 275 | output_len, |
388 | 275 | password, |
389 | 275 | password_len, |
390 | 275 | salt, |
391 | 275 | salt_len, |
392 | 275 | key, |
393 | 275 | key_len, |
394 | 275 | ad, |
395 | 275 | ad_len, |
396 | 275 | m_family, |
397 | 275 | m_p, |
398 | 275 | m_M, |
399 | 275 | m_t); |
400 | | |
401 | 275 | const size_t memory = (m_M / (SYNC_POINTS * m_p)) * (SYNC_POINTS * m_p); |
402 | | |
403 | 275 | secure_vector<uint64_t> B(memory * 1024 / 8); |
404 | | |
405 | 275 | init_blocks(B, *blake2, H0, memory, m_p); |
406 | 275 | process_blocks(B, m_t, memory, m_p, m_family); |
407 | | |
408 | 275 | clear_mem(output, output_len); |
409 | 275 | extract_key(output, output_len, B, memory, m_p); |
410 | 275 | } |
411 | | |
412 | | } // namespace Botan |