/src/botan/src/lib/pbkdf/argon2/argon2.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | /** |
2 | | * (C) 2018,2019,2022 Jack Lloyd |
3 | | * |
4 | | * Botan is released under the Simplified BSD License (see license.txt) |
5 | | */ |
6 | | |
7 | | #include <botan/argon2.h> |
8 | | |
9 | | #include <botan/exceptn.h> |
10 | | #include <botan/hash.h> |
11 | | #include <botan/mem_ops.h> |
12 | | #include <botan/internal/fmt.h> |
13 | | #include <botan/internal/loadstor.h> |
14 | | #include <botan/internal/rotate.h> |
15 | | #include <limits> |
16 | | |
17 | | #if defined(BOTAN_HAS_THREAD_UTILS) |
18 | | #include <botan/internal/thread_pool.h> |
19 | | #endif |
20 | | |
21 | | #if defined(BOTAN_HAS_ARGON2_AVX2) || defined(BOTAN_HAS_ARGON2_SSSE3) |
22 | | #include <botan/internal/cpuid.h> |
23 | | #endif |
24 | | |
25 | | namespace Botan { |
26 | | |
27 | | namespace { |
28 | | |
29 | | const size_t SYNC_POINTS = 4; |
30 | | |
31 | | void argon2_H0(uint8_t H0[64], |
32 | | HashFunction& blake2b, |
33 | | size_t output_len, |
34 | | const char* password, |
35 | | size_t password_len, |
36 | | const uint8_t salt[], |
37 | | size_t salt_len, |
38 | | const uint8_t key[], |
39 | | size_t key_len, |
40 | | const uint8_t ad[], |
41 | | size_t ad_len, |
42 | | size_t y, |
43 | | size_t p, |
44 | | size_t M, |
45 | 0 | size_t t) { |
46 | 0 | const uint8_t v = 19; // Argon2 version code |
47 | |
|
48 | 0 | blake2b.update_le(static_cast<uint32_t>(p)); |
49 | 0 | blake2b.update_le(static_cast<uint32_t>(output_len)); |
50 | 0 | blake2b.update_le(static_cast<uint32_t>(M)); |
51 | 0 | blake2b.update_le(static_cast<uint32_t>(t)); |
52 | 0 | blake2b.update_le(static_cast<uint32_t>(v)); |
53 | 0 | blake2b.update_le(static_cast<uint32_t>(y)); |
54 | |
|
55 | 0 | blake2b.update_le(static_cast<uint32_t>(password_len)); |
56 | 0 | blake2b.update(cast_char_ptr_to_uint8(password), password_len); |
57 | |
|
58 | 0 | blake2b.update_le(static_cast<uint32_t>(salt_len)); |
59 | 0 | blake2b.update(salt, salt_len); |
60 | |
|
61 | 0 | blake2b.update_le(static_cast<uint32_t>(key_len)); |
62 | 0 | blake2b.update(key, key_len); |
63 | |
|
64 | 0 | blake2b.update_le(static_cast<uint32_t>(ad_len)); |
65 | 0 | blake2b.update(ad, ad_len); |
66 | |
|
67 | 0 | blake2b.final(H0); |
68 | 0 | } |
69 | | |
70 | 0 | void extract_key(uint8_t output[], size_t output_len, const secure_vector<uint64_t>& B, size_t memory, size_t threads) { |
71 | 0 | const size_t lanes = memory / threads; |
72 | |
|
73 | 0 | uint64_t sum[128] = {0}; |
74 | |
|
75 | 0 | for(size_t lane = 0; lane != threads; ++lane) { |
76 | 0 | const size_t start = 128 * (lane * lanes + lanes - 1); |
77 | 0 | const size_t end = 128 * (lane * lanes + lanes); |
78 | |
|
79 | 0 | for(size_t j = start; j != end; ++j) { |
80 | 0 | sum[j % 128] ^= B[j]; |
81 | 0 | } |
82 | 0 | } |
83 | |
|
84 | 0 | if(output_len <= 64) { |
85 | 0 | auto blake2b = HashFunction::create_or_throw(fmt("BLAKE2b({})", output_len * 8)); |
86 | 0 | blake2b->update_le(static_cast<uint32_t>(output_len)); |
87 | 0 | for(size_t i = 0; i != 128; ++i) { |
88 | 0 | blake2b->update_le(sum[i]); |
89 | 0 | } |
90 | 0 | blake2b->final(output); |
91 | 0 | } else { |
92 | 0 | secure_vector<uint8_t> T(64); |
93 | |
|
94 | 0 | auto blake2b = HashFunction::create_or_throw("BLAKE2b(512)"); |
95 | 0 | blake2b->update_le(static_cast<uint32_t>(output_len)); |
96 | 0 | for(size_t i = 0; i != 128; ++i) { |
97 | 0 | blake2b->update_le(sum[i]); |
98 | 0 | } |
99 | 0 | blake2b->final(&T[0]); |
100 | |
|
101 | 0 | while(output_len > 64) { |
102 | 0 | copy_mem(output, &T[0], 32); |
103 | 0 | output_len -= 32; |
104 | 0 | output += 32; |
105 | |
|
106 | 0 | if(output_len > 64) { |
107 | 0 | blake2b->update(T); |
108 | 0 | blake2b->final(&T[0]); |
109 | 0 | } |
110 | 0 | } |
111 | |
|
112 | 0 | if(output_len == 64) { |
113 | 0 | blake2b->update(T); |
114 | 0 | blake2b->final(output); |
115 | 0 | } else { |
116 | 0 | auto blake2b_f = HashFunction::create_or_throw(fmt("BLAKE2b({})", output_len * 8)); |
117 | 0 | blake2b_f->update(T); |
118 | 0 | blake2b_f->final(output); |
119 | 0 | } |
120 | 0 | } |
121 | 0 | } |
122 | | |
123 | | void init_blocks( |
124 | 0 | secure_vector<uint64_t>& B, HashFunction& blake2b, const uint8_t H0[64], size_t memory, size_t threads) { |
125 | 0 | BOTAN_ASSERT_NOMSG(B.size() >= threads * 256); |
126 | |
|
127 | 0 | for(size_t i = 0; i != threads; ++i) { |
128 | 0 | const size_t B_off = i * (memory / threads); |
129 | |
|
130 | 0 | BOTAN_ASSERT_NOMSG(B.size() >= 128 * (B_off + 2)); |
131 | |
|
132 | 0 | for(size_t j = 0; j != 2; ++j) { |
133 | 0 | uint8_t T[64] = {0}; |
134 | |
|
135 | 0 | blake2b.update_le(static_cast<uint32_t>(1024)); |
136 | 0 | blake2b.update(H0, 64); |
137 | 0 | blake2b.update_le(static_cast<uint32_t>(j)); |
138 | 0 | blake2b.update_le(static_cast<uint32_t>(i)); |
139 | 0 | blake2b.final(T); |
140 | |
|
141 | 0 | for(size_t k = 0; k != 30; ++k) { |
142 | 0 | load_le(&B[128 * (B_off + j) + 4 * k], T, 32 / 8); |
143 | 0 | blake2b.update(T, 64); |
144 | 0 | blake2b.final(T); |
145 | 0 | } |
146 | |
|
147 | 0 | load_le(&B[128 * (B_off + j) + 4 * 30], T, 64 / 8); |
148 | 0 | } |
149 | 0 | } |
150 | 0 | } |
151 | | |
152 | 0 | BOTAN_FORCE_INLINE void blamka_G(uint64_t& A, uint64_t& B, uint64_t& C, uint64_t& D) { |
153 | 0 | A += B + (static_cast<uint64_t>(2) * static_cast<uint32_t>(A)) * static_cast<uint32_t>(B); |
154 | 0 | D = rotr<32>(A ^ D); |
155 | |
|
156 | 0 | C += D + (static_cast<uint64_t>(2) * static_cast<uint32_t>(C)) * static_cast<uint32_t>(D); |
157 | 0 | B = rotr<24>(B ^ C); |
158 | |
|
159 | 0 | A += B + (static_cast<uint64_t>(2) * static_cast<uint32_t>(A)) * static_cast<uint32_t>(B); |
160 | 0 | D = rotr<16>(A ^ D); |
161 | |
|
162 | 0 | C += D + (static_cast<uint64_t>(2) * static_cast<uint32_t>(C)) * static_cast<uint32_t>(D); |
163 | 0 | B = rotr<63>(B ^ C); |
164 | 0 | } |
165 | | |
166 | | } // namespace |
167 | | |
168 | 0 | void Argon2::blamka(uint64_t N[128], uint64_t T[128]) { |
169 | 0 | #if defined(BOTAN_HAS_ARGON2_AVX2) |
170 | 0 | if(CPUID::has_avx2()) { |
171 | 0 | return Argon2::blamka_avx2(N, T); |
172 | 0 | } |
173 | 0 | #endif |
174 | | |
175 | 0 | #if defined(BOTAN_HAS_ARGON2_SSSE3) |
176 | 0 | if(CPUID::has_ssse3()) { |
177 | 0 | return Argon2::blamka_ssse3(N, T); |
178 | 0 | } |
179 | 0 | #endif |
180 | | |
181 | 0 | copy_mem(T, N, 128); |
182 | |
|
183 | 0 | for(size_t i = 0; i != 128; i += 16) { |
184 | 0 | blamka_G(T[i + 0], T[i + 4], T[i + 8], T[i + 12]); |
185 | 0 | blamka_G(T[i + 1], T[i + 5], T[i + 9], T[i + 13]); |
186 | 0 | blamka_G(T[i + 2], T[i + 6], T[i + 10], T[i + 14]); |
187 | 0 | blamka_G(T[i + 3], T[i + 7], T[i + 11], T[i + 15]); |
188 | |
|
189 | 0 | blamka_G(T[i + 0], T[i + 5], T[i + 10], T[i + 15]); |
190 | 0 | blamka_G(T[i + 1], T[i + 6], T[i + 11], T[i + 12]); |
191 | 0 | blamka_G(T[i + 2], T[i + 7], T[i + 8], T[i + 13]); |
192 | 0 | blamka_G(T[i + 3], T[i + 4], T[i + 9], T[i + 14]); |
193 | 0 | } |
194 | |
|
195 | 0 | for(size_t i = 0; i != 128 / 8; i += 2) { |
196 | 0 | blamka_G(T[i + 0], T[i + 32], T[i + 64], T[i + 96]); |
197 | 0 | blamka_G(T[i + 1], T[i + 33], T[i + 65], T[i + 97]); |
198 | 0 | blamka_G(T[i + 16], T[i + 48], T[i + 80], T[i + 112]); |
199 | 0 | blamka_G(T[i + 17], T[i + 49], T[i + 81], T[i + 113]); |
200 | |
|
201 | 0 | blamka_G(T[i + 0], T[i + 33], T[i + 80], T[i + 113]); |
202 | 0 | blamka_G(T[i + 1], T[i + 48], T[i + 81], T[i + 96]); |
203 | 0 | blamka_G(T[i + 16], T[i + 49], T[i + 64], T[i + 97]); |
204 | 0 | blamka_G(T[i + 17], T[i + 32], T[i + 65], T[i + 112]); |
205 | 0 | } |
206 | |
|
207 | 0 | for(size_t i = 0; i != 128; ++i) { |
208 | 0 | N[i] ^= T[i]; |
209 | 0 | } |
210 | 0 | } |
211 | | |
212 | | namespace { |
213 | | |
214 | | void gen_2i_addresses(uint64_t T[128], |
215 | | uint64_t B[128], |
216 | | size_t n, |
217 | | size_t lane, |
218 | | size_t slice, |
219 | | size_t memory, |
220 | | size_t time, |
221 | | size_t mode, |
222 | 0 | size_t cnt) { |
223 | 0 | clear_mem(B, 128); |
224 | |
|
225 | 0 | B[0] = n; |
226 | 0 | B[1] = lane; |
227 | 0 | B[2] = slice; |
228 | 0 | B[3] = memory; |
229 | 0 | B[4] = time; |
230 | 0 | B[5] = mode; |
231 | 0 | B[6] = cnt; |
232 | |
|
233 | 0 | for(size_t r = 0; r != 2; ++r) { |
234 | 0 | Argon2::blamka(B, T); |
235 | 0 | } |
236 | 0 | } |
237 | | |
238 | | uint32_t index_alpha( |
239 | 0 | uint64_t random, size_t lanes, size_t segments, size_t threads, size_t n, size_t slice, size_t lane, size_t index) { |
240 | 0 | size_t ref_lane = static_cast<uint32_t>(random >> 32) % threads; |
241 | |
|
242 | 0 | if(n == 0 && slice == 0) { |
243 | 0 | ref_lane = lane; |
244 | 0 | } |
245 | |
|
246 | 0 | size_t m = 3 * segments; |
247 | 0 | size_t s = ((slice + 1) % 4) * segments; |
248 | |
|
249 | 0 | if(lane == ref_lane) { |
250 | 0 | m += index; |
251 | 0 | } |
252 | |
|
253 | 0 | if(n == 0) { |
254 | 0 | m = slice * segments; |
255 | 0 | s = 0; |
256 | 0 | if(slice == 0 || lane == ref_lane) { |
257 | 0 | m += index; |
258 | 0 | } |
259 | 0 | } |
260 | |
|
261 | 0 | if(index == 0 || lane == ref_lane) { |
262 | 0 | m -= 1; |
263 | 0 | } |
264 | |
|
265 | 0 | uint64_t p = static_cast<uint32_t>(random); |
266 | 0 | p = (p * p) >> 32; |
267 | 0 | p = (p * m) >> 32; |
268 | |
|
269 | 0 | return static_cast<uint32_t>(ref_lane * lanes + (s + m - (p + 1)) % lanes); |
270 | 0 | } |
271 | | |
272 | | void process_block(secure_vector<uint64_t>& B, |
273 | | size_t n, |
274 | | size_t slice, |
275 | | size_t lane, |
276 | | size_t lanes, |
277 | | size_t segments, |
278 | | size_t threads, |
279 | | uint8_t mode, |
280 | | size_t memory, |
281 | 0 | size_t time) { |
282 | 0 | uint64_t T[128]; |
283 | 0 | size_t index = 0; |
284 | 0 | if(n == 0 && slice == 0) { |
285 | 0 | index = 2; |
286 | 0 | } |
287 | |
|
288 | 0 | const bool use_2i = mode == 1 || (mode == 2 && n == 0 && slice < SYNC_POINTS / 2); |
289 | |
|
290 | 0 | uint64_t addresses[128]; |
291 | 0 | size_t address_counter = 1; |
292 | |
|
293 | 0 | if(use_2i) { |
294 | 0 | gen_2i_addresses(T, addresses, n, lane, slice, memory, time, mode, address_counter); |
295 | 0 | } |
296 | |
|
297 | 0 | while(index < segments) { |
298 | 0 | const size_t offset = lane * lanes + slice * segments + index; |
299 | |
|
300 | 0 | size_t prev = offset - 1; |
301 | 0 | if(index == 0 && slice == 0) { |
302 | 0 | prev += lanes; |
303 | 0 | } |
304 | |
|
305 | 0 | if(use_2i && index > 0 && index % 128 == 0) { |
306 | 0 | address_counter += 1; |
307 | 0 | gen_2i_addresses(T, addresses, n, lane, slice, memory, time, mode, address_counter); |
308 | 0 | } |
309 | |
|
310 | 0 | const uint64_t random = use_2i ? addresses[index % 128] : B.at(128 * prev); |
311 | 0 | const size_t new_offset = index_alpha(random, lanes, segments, threads, n, slice, lane, index); |
312 | |
|
313 | 0 | uint64_t N[128]; |
314 | 0 | for(size_t i = 0; i != 128; ++i) { |
315 | 0 | N[i] = B[128 * prev + i] ^ B[128 * new_offset + i]; |
316 | 0 | } |
317 | |
|
318 | 0 | Argon2::blamka(N, T); |
319 | |
|
320 | 0 | for(size_t i = 0; i != 128; ++i) { |
321 | 0 | B[128 * offset + i] ^= N[i]; |
322 | 0 | } |
323 | |
|
324 | 0 | index += 1; |
325 | 0 | } |
326 | 0 | } |
327 | | |
328 | 0 | void process_blocks(secure_vector<uint64_t>& B, size_t t, size_t memory, size_t threads, uint8_t mode) { |
329 | 0 | const size_t lanes = memory / threads; |
330 | 0 | const size_t segments = lanes / SYNC_POINTS; |
331 | |
|
332 | 0 | #if defined(BOTAN_HAS_THREAD_UTILS) |
333 | 0 | if(threads > 1) { |
334 | 0 | auto& thread_pool = Thread_Pool::global_instance(); |
335 | |
|
336 | 0 | for(size_t n = 0; n != t; ++n) { |
337 | 0 | for(size_t slice = 0; slice != SYNC_POINTS; ++slice) { |
338 | 0 | std::vector<std::future<void>> fut_results; |
339 | 0 | fut_results.reserve(threads); |
340 | |
|
341 | 0 | for(size_t lane = 0; lane != threads; ++lane) { |
342 | 0 | fut_results.push_back(thread_pool.run( |
343 | 0 | process_block, std::ref(B), n, slice, lane, lanes, segments, threads, mode, memory, t)); |
344 | 0 | } |
345 | |
|
346 | 0 | for(auto& fut : fut_results) { |
347 | 0 | fut.get(); |
348 | 0 | } |
349 | 0 | } |
350 | 0 | } |
351 | |
|
352 | 0 | return; |
353 | 0 | } |
354 | 0 | #endif |
355 | | |
356 | 0 | for(size_t n = 0; n != t; ++n) { |
357 | 0 | for(size_t slice = 0; slice != SYNC_POINTS; ++slice) { |
358 | 0 | for(size_t lane = 0; lane != threads; ++lane) { |
359 | 0 | process_block(B, n, slice, lane, lanes, segments, threads, mode, memory, t); |
360 | 0 | } |
361 | 0 | } |
362 | 0 | } |
363 | 0 | } |
364 | | |
365 | | } // namespace |
366 | | |
367 | | void Argon2::argon2(uint8_t output[], |
368 | | size_t output_len, |
369 | | const char* password, |
370 | | size_t password_len, |
371 | | const uint8_t salt[], |
372 | | size_t salt_len, |
373 | | const uint8_t key[], |
374 | | size_t key_len, |
375 | | const uint8_t ad[], |
376 | 0 | size_t ad_len) const { |
377 | 0 | BOTAN_ARG_CHECK(output_len >= 4 && output_len <= std::numeric_limits<uint32_t>::max(), |
378 | 0 | "Invalid Argon2 output length"); |
379 | 0 | BOTAN_ARG_CHECK(password_len <= std::numeric_limits<uint32_t>::max(), "Invalid Argon2 password length"); |
380 | 0 | BOTAN_ARG_CHECK(salt_len <= std::numeric_limits<uint32_t>::max(), "Invalid Argon2 salt length"); |
381 | 0 | BOTAN_ARG_CHECK(key_len <= std::numeric_limits<uint32_t>::max(), "Invalid Argon2 key length"); |
382 | 0 | BOTAN_ARG_CHECK(ad_len <= std::numeric_limits<uint32_t>::max(), "Invalid Argon2 ad length"); |
383 | |
|
384 | 0 | auto blake2 = HashFunction::create_or_throw("BLAKE2b"); |
385 | |
|
386 | 0 | uint8_t H0[64] = {0}; |
387 | 0 | argon2_H0(H0, |
388 | 0 | *blake2, |
389 | 0 | output_len, |
390 | 0 | password, |
391 | 0 | password_len, |
392 | 0 | salt, |
393 | 0 | salt_len, |
394 | 0 | key, |
395 | 0 | key_len, |
396 | 0 | ad, |
397 | 0 | ad_len, |
398 | 0 | m_family, |
399 | 0 | m_p, |
400 | 0 | m_M, |
401 | 0 | m_t); |
402 | |
|
403 | 0 | const size_t memory = (m_M / (SYNC_POINTS * m_p)) * (SYNC_POINTS * m_p); |
404 | |
|
405 | 0 | secure_vector<uint64_t> B(memory * 1024 / 8); |
406 | |
|
407 | 0 | init_blocks(B, *blake2, H0, memory, m_p); |
408 | 0 | process_blocks(B, m_t, memory, m_p, m_family); |
409 | |
|
410 | 0 | clear_mem(output, output_len); |
411 | 0 | extract_key(output, output_len, B, memory, m_p); |
412 | 0 | } |
413 | | |
414 | | } // namespace Botan |