/src/botan/src/lib/pbkdf/argon2/argon2.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | /** |
2 | | * (C) 2018,2019,2022 Jack Lloyd |
3 | | * |
4 | | * Botan is released under the Simplified BSD License (see license.txt) |
5 | | */ |
6 | | |
7 | | #include <botan/argon2.h> |
8 | | #include <botan/internal/loadstor.h> |
9 | | #include <botan/hash.h> |
10 | | #include <botan/mem_ops.h> |
11 | | #include <botan/internal/rotate.h> |
12 | | #include <botan/exceptn.h> |
13 | | |
14 | | #if defined(BOTAN_HAS_THREAD_UTILS) |
15 | | #include <botan/internal/thread_pool.h> |
16 | | #endif |
17 | | |
18 | | #if defined(BOTAN_HAS_ARGON2_SSSE3) |
19 | | #include <botan/internal/argon2_ssse3.h> |
20 | | #include <botan/internal/cpuid.h> |
21 | | #endif |
22 | | |
23 | | namespace Botan { |
24 | | |
25 | | namespace { |
26 | | |
27 | | const size_t SYNC_POINTS = 4; |
28 | | |
29 | | void argon2_H0(uint8_t H0[64], |
30 | | HashFunction& blake2b, |
31 | | size_t output_len, |
32 | | const char* password, size_t password_len, |
33 | | const uint8_t salt[], size_t salt_len, |
34 | | const uint8_t key[], size_t key_len, |
35 | | const uint8_t ad[], size_t ad_len, |
36 | | size_t y, size_t p, size_t M, size_t t) |
37 | 0 | { |
38 | 0 | const uint8_t v = 19; // Argon2 version code |
39 | |
|
40 | 0 | blake2b.update_le(static_cast<uint32_t>(p)); |
41 | 0 | blake2b.update_le(static_cast<uint32_t>(output_len)); |
42 | 0 | blake2b.update_le(static_cast<uint32_t>(M)); |
43 | 0 | blake2b.update_le(static_cast<uint32_t>(t)); |
44 | 0 | blake2b.update_le(static_cast<uint32_t>(v)); |
45 | 0 | blake2b.update_le(static_cast<uint32_t>(y)); |
46 | |
|
47 | 0 | blake2b.update_le(static_cast<uint32_t>(password_len)); |
48 | 0 | blake2b.update(cast_char_ptr_to_uint8(password), password_len); |
49 | |
|
50 | 0 | blake2b.update_le(static_cast<uint32_t>(salt_len)); |
51 | 0 | blake2b.update(salt, salt_len); |
52 | |
|
53 | 0 | blake2b.update_le(static_cast<uint32_t>(key_len)); |
54 | 0 | blake2b.update(key, key_len); |
55 | |
|
56 | 0 | blake2b.update_le(static_cast<uint32_t>(ad_len)); |
57 | 0 | blake2b.update(ad, ad_len); |
58 | |
|
59 | 0 | blake2b.final(H0); |
60 | 0 | } |
61 | | |
62 | | void extract_key(uint8_t output[], size_t output_len, |
63 | | const secure_vector<uint64_t>& B, |
64 | | size_t memory, size_t threads) |
65 | 0 | { |
66 | 0 | const size_t lanes = memory / threads; |
67 | |
|
68 | 0 | uint64_t sum[128] = { 0 }; |
69 | |
|
70 | 0 | for(size_t lane = 0; lane != threads; ++lane) |
71 | 0 | { |
72 | 0 | const size_t start = 128*(lane * lanes + lanes - 1); |
73 | 0 | const size_t end = 128*(lane * lanes + lanes); |
74 | |
|
75 | 0 | for(size_t j = start; j != end; ++j) |
76 | 0 | { |
77 | 0 | sum[j % 128] ^= B[j]; |
78 | 0 | } |
79 | 0 | } |
80 | |
|
81 | 0 | if(output_len <= 64) |
82 | 0 | { |
83 | 0 | std::unique_ptr<HashFunction> blake2b = HashFunction::create_or_throw("BLAKE2b(" + std::to_string(output_len*8) + ")"); |
84 | 0 | blake2b->update_le(static_cast<uint32_t>(output_len)); |
85 | 0 | for(size_t i = 0; i != 128; ++i) |
86 | 0 | blake2b->update_le(sum[i]); |
87 | 0 | blake2b->final(output); |
88 | 0 | } |
89 | 0 | else |
90 | 0 | { |
91 | 0 | secure_vector<uint8_t> T(64); |
92 | |
|
93 | 0 | std::unique_ptr<HashFunction> blake2b = HashFunction::create_or_throw("BLAKE2b(512)"); |
94 | 0 | blake2b->update_le(static_cast<uint32_t>(output_len)); |
95 | 0 | for(size_t i = 0; i != 128; ++i) |
96 | 0 | blake2b->update_le(sum[i]); |
97 | 0 | blake2b->final(&T[0]); |
98 | |
|
99 | 0 | while(output_len > 64) |
100 | 0 | { |
101 | 0 | copy_mem(output, &T[0], 32); |
102 | 0 | output_len -= 32; |
103 | 0 | output += 32; |
104 | |
|
105 | 0 | if(output_len > 64) |
106 | 0 | { |
107 | 0 | blake2b->update(T); |
108 | 0 | blake2b->final(&T[0]); |
109 | 0 | } |
110 | 0 | } |
111 | |
|
112 | 0 | if(output_len == 64) |
113 | 0 | { |
114 | 0 | blake2b->update(T); |
115 | 0 | blake2b->final(output); |
116 | 0 | } |
117 | 0 | else |
118 | 0 | { |
119 | 0 | std::unique_ptr<HashFunction> blake2b_f = HashFunction::create_or_throw("BLAKE2b(" + std::to_string(output_len*8) + ")"); |
120 | 0 | blake2b_f->update(T); |
121 | 0 | blake2b_f->final(output); |
122 | 0 | } |
123 | 0 | } |
124 | 0 | } |
125 | | |
126 | | void init_blocks(secure_vector<uint64_t>& B, |
127 | | HashFunction& blake2b, |
128 | | const uint8_t H0[64], |
129 | | size_t memory, |
130 | | size_t threads) |
131 | 0 | { |
132 | 0 | BOTAN_ASSERT_NOMSG(B.size() >= threads*256); |
133 | |
|
134 | 0 | for(size_t i = 0; i != threads; ++i) |
135 | 0 | { |
136 | 0 | const size_t B_off = i * (memory / threads); |
137 | |
|
138 | 0 | BOTAN_ASSERT_NOMSG(B.size() >= 128*(B_off+2)); |
139 | |
|
140 | 0 | for(size_t j = 0; j != 2; ++j) |
141 | 0 | { |
142 | 0 | uint8_t T[64] = { 0 }; |
143 | |
|
144 | 0 | blake2b.update_le(static_cast<uint32_t>(1024)); |
145 | 0 | blake2b.update(H0, 64); |
146 | 0 | blake2b.update_le(static_cast<uint32_t>(j)); |
147 | 0 | blake2b.update_le(static_cast<uint32_t>(i)); |
148 | 0 | blake2b.final(T); |
149 | |
|
150 | 0 | for(size_t k = 0; k != 30; ++k) |
151 | 0 | { |
152 | 0 | load_le(&B[128*(B_off+j)+4*k], T, 32 / 8); |
153 | 0 | blake2b.update(T, 64); |
154 | 0 | blake2b.final(T); |
155 | 0 | } |
156 | |
|
157 | 0 | load_le(&B[128*(B_off+j)+4*30], T, 64 / 8); |
158 | 0 | } |
159 | 0 | } |
160 | 0 | } |
161 | | |
162 | | BOTAN_FORCE_INLINE void blamka_G(uint64_t& A, uint64_t& B, uint64_t& C, uint64_t& D) |
163 | 0 | { |
164 | 0 | A += B + (static_cast<uint64_t>(2) * static_cast<uint32_t>(A)) * static_cast<uint32_t>(B); |
165 | 0 | D = rotr<32>(A ^ D); |
166 | |
|
167 | 0 | C += D + (static_cast<uint64_t>(2) * static_cast<uint32_t>(C)) * static_cast<uint32_t>(D); |
168 | 0 | B = rotr<24>(B ^ C); |
169 | |
|
170 | 0 | A += B + (static_cast<uint64_t>(2) * static_cast<uint32_t>(A)) * static_cast<uint32_t>(B); |
171 | 0 | D = rotr<16>(A ^ D); |
172 | |
|
173 | 0 | C += D + (static_cast<uint64_t>(2) * static_cast<uint32_t>(C)) * static_cast<uint32_t>(D); |
174 | 0 | B = rotr<63>(B ^ C); |
175 | 0 | } |
176 | | |
177 | | void blamka(uint64_t T[128]) |
178 | 0 | { |
179 | 0 | #if defined(BOTAN_HAS_ARGON2_SSSE3) |
180 | 0 | if(CPUID::has_ssse3()) |
181 | 0 | return blamka_ssse3(T); |
182 | 0 | #endif |
183 | | |
184 | 0 | for(size_t i = 0; i != 128; i += 16) |
185 | 0 | { |
186 | 0 | blamka_G(T[i+ 0], T[i+ 4], T[i+ 8], T[i+ 12]); |
187 | 0 | blamka_G(T[i+ 1], T[i+ 5], T[i+ 9], T[i+ 13]); |
188 | 0 | blamka_G(T[i+ 2], T[i+ 6], T[i+ 10], T[i+ 14]); |
189 | 0 | blamka_G(T[i+ 3], T[i+ 7], T[i+ 11], T[i+ 15]); |
190 | |
|
191 | 0 | blamka_G(T[i+ 0], T[i+ 5], T[i+ 10], T[i+ 15]); |
192 | 0 | blamka_G(T[i+ 1], T[i+ 6], T[i+ 11], T[i+ 12]); |
193 | 0 | blamka_G(T[i+ 2], T[i+ 7], T[i+ 8], T[i+ 13]); |
194 | 0 | blamka_G(T[i+ 3], T[i+ 4], T[i+ 9], T[i+ 14]); |
195 | 0 | } |
196 | |
|
197 | 0 | for(size_t i = 0; i != 128 / 8; i += 2) |
198 | 0 | { |
199 | 0 | blamka_G(T[i+ 0], T[i+ 32], T[i+ 64], T[i+ 96]); |
200 | 0 | blamka_G(T[i+ 1], T[i+ 33], T[i+ 65], T[i+ 97]); |
201 | 0 | blamka_G(T[i+ 16], T[i+ 48], T[i+ 80], T[i+112]); |
202 | 0 | blamka_G(T[i+ 17], T[i+ 49], T[i+ 81], T[i+113]); |
203 | |
|
204 | 0 | blamka_G(T[i+ 0], T[i+ 33], T[i+ 80], T[i+113]); |
205 | 0 | blamka_G(T[i+ 1], T[i+ 48], T[i+ 81], T[i+ 96]); |
206 | 0 | blamka_G(T[i+ 16], T[i+ 49], T[i+ 64], T[i+ 97]); |
207 | 0 | blamka_G(T[i+ 17], T[i+ 32], T[i+ 65], T[i+112]); |
208 | 0 | } |
209 | 0 | } |
210 | | |
211 | | void gen_2i_addresses(uint64_t T[128], uint64_t B[128], |
212 | | size_t n, size_t lane, size_t slice, size_t memory, |
213 | | size_t time, size_t mode, size_t cnt) |
214 | 0 | { |
215 | 0 | clear_mem(B, 128); |
216 | |
|
217 | 0 | B[0] = n; |
218 | 0 | B[1] = lane; |
219 | 0 | B[2] = slice; |
220 | 0 | B[3] = memory; |
221 | 0 | B[4] = time; |
222 | 0 | B[5] = mode; |
223 | 0 | B[6] = cnt; |
224 | |
|
225 | 0 | for(size_t r = 0; r != 2; ++r) |
226 | 0 | { |
227 | 0 | copy_mem(T, B, 128); |
228 | |
|
229 | 0 | blamka(T); |
230 | |
|
231 | 0 | for(size_t i = 0; i != 128; ++i) |
232 | 0 | B[i] ^= T[i]; |
233 | 0 | } |
234 | 0 | } |
235 | | |
236 | | uint32_t index_alpha(uint64_t random, |
237 | | size_t lanes, |
238 | | size_t segments, |
239 | | size_t threads, |
240 | | size_t n, |
241 | | size_t slice, |
242 | | size_t lane, |
243 | | size_t index) |
244 | 0 | { |
245 | 0 | size_t ref_lane = static_cast<uint32_t>(random >> 32) % threads; |
246 | |
|
247 | 0 | if(n == 0 && slice == 0) |
248 | 0 | ref_lane = lane; |
249 | |
|
250 | 0 | size_t m = 3*segments; |
251 | 0 | size_t s = ((slice+1) % 4)*segments; |
252 | |
|
253 | 0 | if(lane == ref_lane) |
254 | 0 | m += index; |
255 | |
|
256 | 0 | if(n == 0) |
257 | 0 | { |
258 | 0 | m = slice*segments; |
259 | 0 | s = 0; |
260 | 0 | if(slice == 0 || lane == ref_lane) |
261 | 0 | m += index; |
262 | 0 | } |
263 | |
|
264 | 0 | if(index == 0 || lane == ref_lane) |
265 | 0 | m -= 1; |
266 | |
|
267 | 0 | uint64_t p = static_cast<uint32_t>(random); |
268 | 0 | p = (p * p) >> 32; |
269 | 0 | p = (p * m) >> 32; |
270 | |
|
271 | 0 | return static_cast<uint32_t>(ref_lane*lanes + (s + m - (p+1)) % lanes); |
272 | 0 | } |
273 | | |
274 | | void process_block(secure_vector<uint64_t>& B, |
275 | | size_t n, size_t slice, size_t lane, |
276 | | size_t lanes, size_t segments, size_t threads, uint8_t mode, |
277 | | size_t memory, size_t time) |
278 | 0 | { |
279 | 0 | uint64_t T[128]; |
280 | 0 | size_t index = 0; |
281 | 0 | if(n == 0 && slice == 0) |
282 | 0 | index = 2; |
283 | |
|
284 | 0 | const bool use_2i = mode == 1 || (mode == 2 && n == 0 && slice < SYNC_POINTS/2); |
285 | |
|
286 | 0 | uint64_t addresses[128]; |
287 | 0 | size_t address_counter = 1; |
288 | |
|
289 | 0 | if(use_2i) |
290 | 0 | { |
291 | 0 | gen_2i_addresses(T, addresses, n, lane, slice, memory, time, mode, address_counter); |
292 | 0 | } |
293 | |
|
294 | 0 | while(index < segments) |
295 | 0 | { |
296 | 0 | const size_t offset = lane*lanes + slice*segments + index; |
297 | |
|
298 | 0 | size_t prev = offset - 1; |
299 | 0 | if(index == 0 && slice == 0) |
300 | 0 | prev += lanes; |
301 | |
|
302 | 0 | if(use_2i && index > 0 && index % 128 == 0) |
303 | 0 | { |
304 | 0 | address_counter += 1; |
305 | 0 | gen_2i_addresses(T, addresses, n, lane, slice, memory, time, mode, address_counter); |
306 | 0 | } |
307 | |
|
308 | 0 | const uint64_t random = use_2i ? addresses[index % 128] : B.at(128*prev); |
309 | 0 | const size_t new_offset = index_alpha(random, lanes, segments, threads, n, slice, lane, index); |
310 | |
|
311 | 0 | for(size_t i = 0; i != 128; ++i) |
312 | 0 | T[i] = B[128*prev+i] ^ B[128*new_offset+i]; |
313 | |
|
314 | 0 | blamka(T); |
315 | |
|
316 | 0 | for(size_t i = 0; i != 128; ++i) |
317 | 0 | B[128*offset + i] ^= T[i] ^ B[128*prev+i] ^ B[128*new_offset+i]; |
318 | |
|
319 | 0 | index += 1; |
320 | 0 | } |
321 | 0 | } |
322 | | |
323 | | void process_blocks(secure_vector<uint64_t>& B, |
324 | | size_t t, |
325 | | size_t memory, |
326 | | size_t threads, |
327 | | uint8_t mode) |
328 | 0 | { |
329 | 0 | const size_t lanes = memory / threads; |
330 | 0 | const size_t segments = lanes / SYNC_POINTS; |
331 | |
|
332 | 0 | #if defined(BOTAN_HAS_THREAD_UTILS) |
333 | 0 | auto& thread_pool = Thread_Pool::global_instance(); |
334 | 0 | #endif |
335 | |
|
336 | 0 | for(size_t n = 0; n != t; ++n) |
337 | 0 | { |
338 | 0 | for(size_t slice = 0; slice != SYNC_POINTS; ++slice) |
339 | 0 | { |
340 | 0 | #if defined(BOTAN_HAS_THREAD_UTILS) |
341 | 0 | if(threads > 1) |
342 | 0 | { |
343 | 0 | std::vector<std::future<void>> fut_results; |
344 | 0 | fut_results.reserve(threads); |
345 | |
|
346 | 0 | for(size_t lane = 0; lane != threads; ++lane) |
347 | 0 | { |
348 | 0 | fut_results.push_back(thread_pool.run( |
349 | 0 | process_block, |
350 | 0 | std::ref(B), n, slice, lane, lanes, segments, threads, mode, memory, t)); |
351 | 0 | } |
352 | |
|
353 | 0 | for(auto& fut : fut_results) |
354 | 0 | fut.get(); |
355 | |
|
356 | 0 | continue; |
357 | 0 | } |
358 | 0 | #endif |
359 | | |
360 | 0 | for(size_t lane = 0; lane != threads; ++lane) |
361 | 0 | { |
362 | 0 | process_block(B, n, slice, lane, lanes, segments, threads, mode, memory, t); |
363 | 0 | } |
364 | 0 | } |
365 | 0 | } |
366 | 0 | } |
367 | | |
368 | | } |
369 | | |
370 | | void Argon2::argon2(uint8_t output[], size_t output_len, |
371 | | const char* password, size_t password_len, |
372 | | const uint8_t salt[], size_t salt_len, |
373 | | const uint8_t key[], size_t key_len, |
374 | | const uint8_t ad[], size_t ad_len) const |
375 | 0 | { |
376 | 0 | BOTAN_ARG_CHECK(output_len >= 4, "Invalid Argon2 output length"); |
377 | |
|
378 | 0 | auto blake2 = HashFunction::create_or_throw("BLAKE2b"); |
379 | |
|
380 | 0 | uint8_t H0[64] = { 0 }; |
381 | 0 | argon2_H0(H0, *blake2, output_len, |
382 | 0 | password, password_len, |
383 | 0 | salt, salt_len, |
384 | 0 | key, key_len, |
385 | 0 | ad, ad_len, |
386 | 0 | m_family, m_p, m_M, m_t); |
387 | |
|
388 | 0 | const size_t memory = (m_M / (SYNC_POINTS*m_p)) * (SYNC_POINTS*m_p); |
389 | |
|
390 | 0 | secure_vector<uint64_t> B(memory * 1024/8); |
391 | |
|
392 | 0 | init_blocks(B, *blake2, H0, memory, m_p); |
393 | 0 | process_blocks(B, m_t, memory, m_p, m_family); |
394 | |
|
395 | 0 | clear_mem(output, output_len); |
396 | 0 | extract_key(output, output_len, B, memory, m_p); |
397 | 0 | } |
398 | | |
399 | | } |