/src/botan/src/lib/pbkdf/argon2/argon2.cpp
Line  | Count  | Source (jump to first uncovered line)  | 
1  |  | /**  | 
2  |  | * (C) 2018,2019,2022 Jack Lloyd  | 
3  |  | *  | 
4  |  | * Botan is released under the Simplified BSD License (see license.txt)  | 
5  |  | */  | 
6  |  |  | 
7  |  | #include <botan/argon2.h>  | 
8  |  |  | 
9  |  | #include <botan/exceptn.h>  | 
10  |  | #include <botan/hash.h>  | 
11  |  | #include <botan/mem_ops.h>  | 
12  |  | #include <botan/internal/fmt.h>  | 
13  |  | #include <botan/internal/loadstor.h>  | 
14  |  | #include <botan/internal/rotate.h>  | 
15  |  | #include <limits>  | 
16  |  |  | 
17  |  | #if defined(BOTAN_HAS_THREAD_UTILS)  | 
18  |  |    #include <botan/internal/thread_pool.h>  | 
19  |  | #endif  | 
20  |  |  | 
21  |  | #if defined(BOTAN_HAS_ARGON2_AVX2) || defined(BOTAN_HAS_ARGON2_SSSE3)  | 
22  |  |    #include <botan/internal/cpuid.h>  | 
23  |  | #endif  | 
24  |  |  | 
25  |  | namespace Botan { | 
26  |  |  | 
27  |  | namespace { | 
28  |  |  | 
29  |  | const size_t SYNC_POINTS = 4;  | 
30  |  |  | 
31  |  | void argon2_H0(uint8_t H0[64],  | 
32  |  |                HashFunction& blake2b,  | 
33  |  |                size_t output_len,  | 
34  |  |                const char* password,  | 
35  |  |                size_t password_len,  | 
36  |  |                const uint8_t salt[],  | 
37  |  |                size_t salt_len,  | 
38  |  |                const uint8_t key[],  | 
39  |  |                size_t key_len,  | 
40  |  |                const uint8_t ad[],  | 
41  |  |                size_t ad_len,  | 
42  |  |                size_t y,  | 
43  |  |                size_t p,  | 
44  |  |                size_t M,  | 
45  | 0  |                size_t t) { | 
46  | 0  |    const uint8_t v = 19;  // Argon2 version code  | 
47  |  | 
  | 
48  | 0  |    blake2b.update_le(static_cast<uint32_t>(p));  | 
49  | 0  |    blake2b.update_le(static_cast<uint32_t>(output_len));  | 
50  | 0  |    blake2b.update_le(static_cast<uint32_t>(M));  | 
51  | 0  |    blake2b.update_le(static_cast<uint32_t>(t));  | 
52  | 0  |    blake2b.update_le(static_cast<uint32_t>(v));  | 
53  | 0  |    blake2b.update_le(static_cast<uint32_t>(y));  | 
54  |  | 
  | 
55  | 0  |    blake2b.update_le(static_cast<uint32_t>(password_len));  | 
56  | 0  |    blake2b.update(cast_char_ptr_to_uint8(password), password_len);  | 
57  |  | 
  | 
58  | 0  |    blake2b.update_le(static_cast<uint32_t>(salt_len));  | 
59  | 0  |    blake2b.update(salt, salt_len);  | 
60  |  | 
  | 
61  | 0  |    blake2b.update_le(static_cast<uint32_t>(key_len));  | 
62  | 0  |    blake2b.update(key, key_len);  | 
63  |  | 
  | 
64  | 0  |    blake2b.update_le(static_cast<uint32_t>(ad_len));  | 
65  | 0  |    blake2b.update(ad, ad_len);  | 
66  |  | 
  | 
67  | 0  |    blake2b.final(H0);  | 
68  | 0  | }  | 
69  |  |  | 
70  | 0  | void extract_key(uint8_t output[], size_t output_len, const secure_vector<uint64_t>& B, size_t memory, size_t threads) { | 
71  | 0  |    const size_t lanes = memory / threads;  | 
72  |  | 
  | 
73  | 0  |    uint64_t sum[128] = {0}; | 
74  |  | 
  | 
75  | 0  |    for(size_t lane = 0; lane != threads; ++lane) { | 
76  | 0  |       const size_t start = 128 * (lane * lanes + lanes - 1);  | 
77  | 0  |       const size_t end = 128 * (lane * lanes + lanes);  | 
78  |  | 
  | 
79  | 0  |       for(size_t j = start; j != end; ++j) { | 
80  | 0  |          sum[j % 128] ^= B[j];  | 
81  | 0  |       }  | 
82  | 0  |    }  | 
83  |  | 
  | 
84  | 0  |    if(output_len <= 64) { | 
85  | 0  |       auto blake2b = HashFunction::create_or_throw(fmt("BLAKE2b({})", output_len * 8)); | 
86  | 0  |       blake2b->update_le(static_cast<uint32_t>(output_len));  | 
87  | 0  |       for(size_t i = 0; i != 128; ++i) { | 
88  | 0  |          blake2b->update_le(sum[i]);  | 
89  | 0  |       }  | 
90  | 0  |       blake2b->final(output);  | 
91  | 0  |    } else { | 
92  | 0  |       secure_vector<uint8_t> T(64);  | 
93  |  | 
  | 
94  | 0  |       auto blake2b = HashFunction::create_or_throw("BLAKE2b(512)"); | 
95  | 0  |       blake2b->update_le(static_cast<uint32_t>(output_len));  | 
96  | 0  |       for(size_t i = 0; i != 128; ++i) { | 
97  | 0  |          blake2b->update_le(sum[i]);  | 
98  | 0  |       }  | 
99  | 0  |       blake2b->final(&T[0]);  | 
100  |  | 
  | 
101  | 0  |       while(output_len > 64) { | 
102  | 0  |          copy_mem(output, &T[0], 32);  | 
103  | 0  |          output_len -= 32;  | 
104  | 0  |          output += 32;  | 
105  |  | 
  | 
106  | 0  |          if(output_len > 64) { | 
107  | 0  |             blake2b->update(T);  | 
108  | 0  |             blake2b->final(&T[0]);  | 
109  | 0  |          }  | 
110  | 0  |       }  | 
111  |  | 
  | 
112  | 0  |       if(output_len == 64) { | 
113  | 0  |          blake2b->update(T);  | 
114  | 0  |          blake2b->final(output);  | 
115  | 0  |       } else { | 
116  | 0  |          auto blake2b_f = HashFunction::create_or_throw(fmt("BLAKE2b({})", output_len * 8)); | 
117  | 0  |          blake2b_f->update(T);  | 
118  | 0  |          blake2b_f->final(output);  | 
119  | 0  |       }  | 
120  | 0  |    }  | 
121  | 0  | }  | 
122  |  |  | 
123  |  | void init_blocks(  | 
124  | 0  |    secure_vector<uint64_t>& B, HashFunction& blake2b, const uint8_t H0[64], size_t memory, size_t threads) { | 
125  | 0  |    BOTAN_ASSERT_NOMSG(B.size() >= threads * 256);  | 
126  |  | 
  | 
127  | 0  |    for(size_t i = 0; i != threads; ++i) { | 
128  | 0  |       const size_t B_off = i * (memory / threads);  | 
129  |  | 
  | 
130  | 0  |       BOTAN_ASSERT_NOMSG(B.size() >= 128 * (B_off + 2));  | 
131  |  | 
  | 
132  | 0  |       for(size_t j = 0; j != 2; ++j) { | 
133  | 0  |          uint8_t T[64] = {0}; | 
134  |  | 
  | 
135  | 0  |          blake2b.update_le(static_cast<uint32_t>(1024));  | 
136  | 0  |          blake2b.update(H0, 64);  | 
137  | 0  |          blake2b.update_le(static_cast<uint32_t>(j));  | 
138  | 0  |          blake2b.update_le(static_cast<uint32_t>(i));  | 
139  | 0  |          blake2b.final(T);  | 
140  |  | 
  | 
141  | 0  |          for(size_t k = 0; k != 30; ++k) { | 
142  | 0  |             load_le(&B[128 * (B_off + j) + 4 * k], T, 32 / 8);  | 
143  | 0  |             blake2b.update(T, 64);  | 
144  | 0  |             blake2b.final(T);  | 
145  | 0  |          }  | 
146  |  | 
  | 
147  | 0  |          load_le(&B[128 * (B_off + j) + 4 * 30], T, 64 / 8);  | 
148  | 0  |       }  | 
149  | 0  |    }  | 
150  | 0  | }  | 
151  |  |  | 
152  | 0  | BOTAN_FORCE_INLINE void blamka_G(uint64_t& A, uint64_t& B, uint64_t& C, uint64_t& D) { | 
153  | 0  |    A += B + (static_cast<uint64_t>(2) * static_cast<uint32_t>(A)) * static_cast<uint32_t>(B);  | 
154  | 0  |    D = rotr<32>(A ^ D);  | 
155  |  | 
  | 
156  | 0  |    C += D + (static_cast<uint64_t>(2) * static_cast<uint32_t>(C)) * static_cast<uint32_t>(D);  | 
157  | 0  |    B = rotr<24>(B ^ C);  | 
158  |  | 
  | 
159  | 0  |    A += B + (static_cast<uint64_t>(2) * static_cast<uint32_t>(A)) * static_cast<uint32_t>(B);  | 
160  | 0  |    D = rotr<16>(A ^ D);  | 
161  |  | 
  | 
162  | 0  |    C += D + (static_cast<uint64_t>(2) * static_cast<uint32_t>(C)) * static_cast<uint32_t>(D);  | 
163  | 0  |    B = rotr<63>(B ^ C);  | 
164  | 0  | }  | 
165  |  |  | 
166  |  | }  // namespace  | 
167  |  |  | 
168  | 0  | void Argon2::blamka(uint64_t N[128], uint64_t T[128]) { | 
169  | 0  | #if defined(BOTAN_HAS_ARGON2_AVX2)  | 
170  | 0  |    if(CPUID::has_avx2()) { | 
171  | 0  |       return Argon2::blamka_avx2(N, T);  | 
172  | 0  |    }  | 
173  | 0  | #endif  | 
174  |  |  | 
175  | 0  | #if defined(BOTAN_HAS_ARGON2_SSSE3)  | 
176  | 0  |    if(CPUID::has_ssse3()) { | 
177  | 0  |       return Argon2::blamka_ssse3(N, T);  | 
178  | 0  |    }  | 
179  | 0  | #endif  | 
180  |  |  | 
181  | 0  |    copy_mem(T, N, 128);  | 
182  |  | 
  | 
183  | 0  |    for(size_t i = 0; i != 128; i += 16) { | 
184  | 0  |       blamka_G(T[i + 0], T[i + 4], T[i + 8], T[i + 12]);  | 
185  | 0  |       blamka_G(T[i + 1], T[i + 5], T[i + 9], T[i + 13]);  | 
186  | 0  |       blamka_G(T[i + 2], T[i + 6], T[i + 10], T[i + 14]);  | 
187  | 0  |       blamka_G(T[i + 3], T[i + 7], T[i + 11], T[i + 15]);  | 
188  |  | 
  | 
189  | 0  |       blamka_G(T[i + 0], T[i + 5], T[i + 10], T[i + 15]);  | 
190  | 0  |       blamka_G(T[i + 1], T[i + 6], T[i + 11], T[i + 12]);  | 
191  | 0  |       blamka_G(T[i + 2], T[i + 7], T[i + 8], T[i + 13]);  | 
192  | 0  |       blamka_G(T[i + 3], T[i + 4], T[i + 9], T[i + 14]);  | 
193  | 0  |    }  | 
194  |  | 
  | 
195  | 0  |    for(size_t i = 0; i != 128 / 8; i += 2) { | 
196  | 0  |       blamka_G(T[i + 0], T[i + 32], T[i + 64], T[i + 96]);  | 
197  | 0  |       blamka_G(T[i + 1], T[i + 33], T[i + 65], T[i + 97]);  | 
198  | 0  |       blamka_G(T[i + 16], T[i + 48], T[i + 80], T[i + 112]);  | 
199  | 0  |       blamka_G(T[i + 17], T[i + 49], T[i + 81], T[i + 113]);  | 
200  |  | 
  | 
201  | 0  |       blamka_G(T[i + 0], T[i + 33], T[i + 80], T[i + 113]);  | 
202  | 0  |       blamka_G(T[i + 1], T[i + 48], T[i + 81], T[i + 96]);  | 
203  | 0  |       blamka_G(T[i + 16], T[i + 49], T[i + 64], T[i + 97]);  | 
204  | 0  |       blamka_G(T[i + 17], T[i + 32], T[i + 65], T[i + 112]);  | 
205  | 0  |    }  | 
206  |  | 
  | 
207  | 0  |    for(size_t i = 0; i != 128; ++i) { | 
208  | 0  |       N[i] ^= T[i];  | 
209  | 0  |    }  | 
210  | 0  | }  | 
211  |  |  | 
212  |  | namespace { | 
213  |  |  | 
214  |  | void gen_2i_addresses(uint64_t T[128],  | 
215  |  |                       uint64_t B[128],  | 
216  |  |                       size_t n,  | 
217  |  |                       size_t lane,  | 
218  |  |                       size_t slice,  | 
219  |  |                       size_t memory,  | 
220  |  |                       size_t time,  | 
221  |  |                       size_t mode,  | 
222  | 0  |                       size_t cnt) { | 
223  | 0  |    clear_mem(B, 128);  | 
224  |  | 
  | 
225  | 0  |    B[0] = n;  | 
226  | 0  |    B[1] = lane;  | 
227  | 0  |    B[2] = slice;  | 
228  | 0  |    B[3] = memory;  | 
229  | 0  |    B[4] = time;  | 
230  | 0  |    B[5] = mode;  | 
231  | 0  |    B[6] = cnt;  | 
232  |  | 
  | 
233  | 0  |    for(size_t r = 0; r != 2; ++r) { | 
234  | 0  |       Argon2::blamka(B, T);  | 
235  | 0  |    }  | 
236  | 0  | }  | 
237  |  |  | 
238  |  | uint32_t index_alpha(  | 
239  | 0  |    uint64_t random, size_t lanes, size_t segments, size_t threads, size_t n, size_t slice, size_t lane, size_t index) { | 
240  | 0  |    size_t ref_lane = static_cast<uint32_t>(random >> 32) % threads;  | 
241  |  | 
  | 
242  | 0  |    if(n == 0 && slice == 0) { | 
243  | 0  |       ref_lane = lane;  | 
244  | 0  |    }  | 
245  |  | 
  | 
246  | 0  |    size_t m = 3 * segments;  | 
247  | 0  |    size_t s = ((slice + 1) % 4) * segments;  | 
248  |  | 
  | 
249  | 0  |    if(lane == ref_lane) { | 
250  | 0  |       m += index;  | 
251  | 0  |    }  | 
252  |  | 
  | 
253  | 0  |    if(n == 0) { | 
254  | 0  |       m = slice * segments;  | 
255  | 0  |       s = 0;  | 
256  | 0  |       if(slice == 0 || lane == ref_lane) { | 
257  | 0  |          m += index;  | 
258  | 0  |       }  | 
259  | 0  |    }  | 
260  |  | 
  | 
261  | 0  |    if(index == 0 || lane == ref_lane) { | 
262  | 0  |       m -= 1;  | 
263  | 0  |    }  | 
264  |  | 
  | 
265  | 0  |    uint64_t p = static_cast<uint32_t>(random);  | 
266  | 0  |    p = (p * p) >> 32;  | 
267  | 0  |    p = (p * m) >> 32;  | 
268  |  | 
  | 
269  | 0  |    return static_cast<uint32_t>(ref_lane * lanes + (s + m - (p + 1)) % lanes);  | 
270  | 0  | }  | 
271  |  |  | 
272  |  | void process_block(secure_vector<uint64_t>& B,  | 
273  |  |                    size_t n,  | 
274  |  |                    size_t slice,  | 
275  |  |                    size_t lane,  | 
276  |  |                    size_t lanes,  | 
277  |  |                    size_t segments,  | 
278  |  |                    size_t threads,  | 
279  |  |                    uint8_t mode,  | 
280  |  |                    size_t memory,  | 
281  | 0  |                    size_t time) { | 
282  | 0  |    uint64_t T[128];  | 
283  | 0  |    size_t index = 0;  | 
284  | 0  |    if(n == 0 && slice == 0) { | 
285  | 0  |       index = 2;  | 
286  | 0  |    }  | 
287  |  | 
  | 
288  | 0  |    const bool use_2i = mode == 1 || (mode == 2 && n == 0 && slice < SYNC_POINTS / 2);  | 
289  |  | 
  | 
290  | 0  |    uint64_t addresses[128];  | 
291  | 0  |    size_t address_counter = 1;  | 
292  |  | 
  | 
293  | 0  |    if(use_2i) { | 
294  | 0  |       gen_2i_addresses(T, addresses, n, lane, slice, memory, time, mode, address_counter);  | 
295  | 0  |    }  | 
296  |  | 
  | 
297  | 0  |    while(index < segments) { | 
298  | 0  |       const size_t offset = lane * lanes + slice * segments + index;  | 
299  |  | 
  | 
300  | 0  |       size_t prev = offset - 1;  | 
301  | 0  |       if(index == 0 && slice == 0) { | 
302  | 0  |          prev += lanes;  | 
303  | 0  |       }  | 
304  |  | 
  | 
305  | 0  |       if(use_2i && index > 0 && index % 128 == 0) { | 
306  | 0  |          address_counter += 1;  | 
307  | 0  |          gen_2i_addresses(T, addresses, n, lane, slice, memory, time, mode, address_counter);  | 
308  | 0  |       }  | 
309  |  | 
  | 
310  | 0  |       const uint64_t random = use_2i ? addresses[index % 128] : B.at(128 * prev);  | 
311  | 0  |       const size_t new_offset = index_alpha(random, lanes, segments, threads, n, slice, lane, index);  | 
312  |  | 
  | 
313  | 0  |       uint64_t N[128];  | 
314  | 0  |       for(size_t i = 0; i != 128; ++i) { | 
315  | 0  |          N[i] = B[128 * prev + i] ^ B[128 * new_offset + i];  | 
316  | 0  |       }  | 
317  |  | 
  | 
318  | 0  |       Argon2::blamka(N, T);  | 
319  |  | 
  | 
320  | 0  |       for(size_t i = 0; i != 128; ++i) { | 
321  | 0  |          B[128 * offset + i] ^= N[i];  | 
322  | 0  |       }  | 
323  |  | 
  | 
324  | 0  |       index += 1;  | 
325  | 0  |    }  | 
326  | 0  | }  | 
327  |  |  | 
328  | 0  | void process_blocks(secure_vector<uint64_t>& B, size_t t, size_t memory, size_t threads, uint8_t mode) { | 
329  | 0  |    const size_t lanes = memory / threads;  | 
330  | 0  |    const size_t segments = lanes / SYNC_POINTS;  | 
331  |  | 
  | 
332  | 0  | #if defined(BOTAN_HAS_THREAD_UTILS)  | 
333  | 0  |    auto& thread_pool = Thread_Pool::global_instance();  | 
334  | 0  | #endif  | 
335  |  | 
  | 
336  | 0  |    for(size_t n = 0; n != t; ++n) { | 
337  | 0  |       for(size_t slice = 0; slice != SYNC_POINTS; ++slice) { | 
338  | 0  | #if defined(BOTAN_HAS_THREAD_UTILS)  | 
339  | 0  |          if(threads > 1) { | 
340  | 0  |             std::vector<std::future<void>> fut_results;  | 
341  | 0  |             fut_results.reserve(threads);  | 
342  |  | 
  | 
343  | 0  |             for(size_t lane = 0; lane != threads; ++lane) { | 
344  | 0  |                fut_results.push_back(thread_pool.run(  | 
345  | 0  |                   process_block, std::ref(B), n, slice, lane, lanes, segments, threads, mode, memory, t));  | 
346  | 0  |             }  | 
347  |  | 
  | 
348  | 0  |             for(auto& fut : fut_results) { | 
349  | 0  |                fut.get();  | 
350  | 0  |             }  | 
351  |  | 
  | 
352  | 0  |             continue;  | 
353  | 0  |          }  | 
354  | 0  | #endif  | 
355  |  |  | 
356  | 0  |          for(size_t lane = 0; lane != threads; ++lane) { | 
357  | 0  |             process_block(B, n, slice, lane, lanes, segments, threads, mode, memory, t);  | 
358  | 0  |          }  | 
359  | 0  |       }  | 
360  | 0  |    }  | 
361  | 0  | }  | 
362  |  |  | 
363  |  | }  // namespace  | 
364  |  |  | 
365  |  | void Argon2::argon2(uint8_t output[],  | 
366  |  |                     size_t output_len,  | 
367  |  |                     const char* password,  | 
368  |  |                     size_t password_len,  | 
369  |  |                     const uint8_t salt[],  | 
370  |  |                     size_t salt_len,  | 
371  |  |                     const uint8_t key[],  | 
372  |  |                     size_t key_len,  | 
373  |  |                     const uint8_t ad[],  | 
374  | 0  |                     size_t ad_len) const { | 
375  | 0  |    BOTAN_ARG_CHECK(output_len >= 4 && output_len <= std::numeric_limits<uint32_t>::max(),  | 
376  | 0  |                    "Invalid Argon2 output length");  | 
377  | 0  |    BOTAN_ARG_CHECK(password_len <= std::numeric_limits<uint32_t>::max(), "Invalid Argon2 password length");  | 
378  | 0  |    BOTAN_ARG_CHECK(salt_len <= std::numeric_limits<uint32_t>::max(), "Invalid Argon2 salt length");  | 
379  | 0  |    BOTAN_ARG_CHECK(key_len <= std::numeric_limits<uint32_t>::max(), "Invalid Argon2 key length");  | 
380  | 0  |    BOTAN_ARG_CHECK(ad_len <= std::numeric_limits<uint32_t>::max(), "Invalid Argon2 ad length");  | 
381  |  | 
  | 
382  | 0  |    auto blake2 = HashFunction::create_or_throw("BLAKE2b"); | 
383  |  | 
  | 
384  | 0  |    uint8_t H0[64] = {0}; | 
385  | 0  |    argon2_H0(H0,  | 
386  | 0  |              *blake2,  | 
387  | 0  |              output_len,  | 
388  | 0  |              password,  | 
389  | 0  |              password_len,  | 
390  | 0  |              salt,  | 
391  | 0  |              salt_len,  | 
392  | 0  |              key,  | 
393  | 0  |              key_len,  | 
394  | 0  |              ad,  | 
395  | 0  |              ad_len,  | 
396  | 0  |              m_family,  | 
397  | 0  |              m_p,  | 
398  | 0  |              m_M,  | 
399  | 0  |              m_t);  | 
400  |  | 
  | 
401  | 0  |    const size_t memory = (m_M / (SYNC_POINTS * m_p)) * (SYNC_POINTS * m_p);  | 
402  |  | 
  | 
403  | 0  |    secure_vector<uint64_t> B(memory * 1024 / 8);  | 
404  |  | 
  | 
405  | 0  |    init_blocks(B, *blake2, H0, memory, m_p);  | 
406  | 0  |    process_blocks(B, m_t, memory, m_p, m_family);  | 
407  |  | 
  | 
408  | 0  |    clear_mem(output, output_len);  | 
409  | 0  |    extract_key(output, output_len, B, memory, m_p);  | 
410  | 0  | }  | 
411  |  |  | 
412  |  | }  // namespace Botan  |