Coverage Report

Created: 2026-04-28 06:58

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/botan/src/lib/pbkdf/argon2/argon2.cpp
Line
Count
Source
1
/**
2
* (C) 2018,2019,2022 Jack Lloyd
3
*
4
* Botan is released under the Simplified BSD License (see license.txt)
5
*/
6
7
#include <botan/argon2.h>
8
9
#include <botan/hash.h>
10
#include <botan/mem_ops.h>
11
#include <botan/internal/bit_ops.h>
12
#include <botan/internal/fmt.h>
13
#include <botan/internal/loadstor.h>
14
#include <botan/internal/mem_utils.h>
15
#include <botan/internal/rotate.h>
16
#include <limits>
17
18
#if defined(BOTAN_HAS_THREAD_UTILS)
19
   #include <botan/internal/thread_pool.h>
20
#endif
21
22
#if defined(BOTAN_HAS_CPUID)
23
   #include <botan/internal/cpuid.h>
24
#endif
25
26
namespace Botan {
27
28
namespace {
29
30
const size_t SYNC_POINTS = 4;
31
32
void argon2_H0(uint8_t H0[64],
33
               HashFunction& blake2b,
34
               size_t output_len,
35
               const char* password,
36
               size_t password_len,
37
               const uint8_t salt[],
38
               size_t salt_len,
39
               const uint8_t key[],
40
               size_t key_len,
41
               const uint8_t ad[],
42
               size_t ad_len,
43
               size_t y,
44
               size_t p,
45
               size_t M,
46
0
               size_t t) {
47
0
   const uint8_t v = 19;  // Argon2 version code
48
49
0
   blake2b.update_le(static_cast<uint32_t>(p));
50
0
   blake2b.update_le(static_cast<uint32_t>(output_len));
51
0
   blake2b.update_le(static_cast<uint32_t>(M));
52
0
   blake2b.update_le(static_cast<uint32_t>(t));
53
0
   blake2b.update_le(static_cast<uint32_t>(v));
54
0
   blake2b.update_le(static_cast<uint32_t>(y));
55
56
0
   blake2b.update_le(static_cast<uint32_t>(password_len));
57
0
   blake2b.update(as_span_of_bytes(password, password_len));
58
59
0
   blake2b.update_le(static_cast<uint32_t>(salt_len));
60
0
   blake2b.update(salt, salt_len);
61
62
0
   blake2b.update_le(static_cast<uint32_t>(key_len));
63
0
   blake2b.update(key, key_len);
64
65
0
   blake2b.update_le(static_cast<uint32_t>(ad_len));
66
0
   blake2b.update(ad, ad_len);
67
68
0
   blake2b.final(H0);
69
0
}
70
71
0
void extract_key(uint8_t output[], size_t output_len, const secure_vector<uint64_t>& B, size_t memory, size_t threads) {
72
0
   const size_t lanes = memory / threads;
73
74
0
   uint64_t sum[128] = {0};
75
76
0
   for(size_t lane = 0; lane != threads; ++lane) {
77
0
      const size_t start = 128 * (lane * lanes + lanes - 1);
78
0
      const size_t end = 128 * (lane * lanes + lanes);
79
80
0
      for(size_t j = start; j != end; ++j) {
81
0
         sum[j % 128] ^= B[j];
82
0
      }
83
0
   }
84
85
0
   if(output_len <= 64) {
86
0
      auto blake2b = HashFunction::create_or_throw(fmt("BLAKE2b({})", output_len * 8));
87
0
      blake2b->update_le(static_cast<uint32_t>(output_len));
88
0
      for(size_t i = 0; i != 128; ++i) {  // NOLINT(modernize-loop-convert)
89
0
         blake2b->update_le(sum[i]);
90
0
      }
91
0
      blake2b->final(output);
92
0
   } else {
93
0
      secure_vector<uint8_t> T(64);
94
95
0
      auto blake2b = HashFunction::create_or_throw("BLAKE2b(512)");
96
0
      blake2b->update_le(static_cast<uint32_t>(output_len));
97
0
      for(size_t i = 0; i != 128; ++i) {  // NOLINT(modernize-loop-convert)
98
0
         blake2b->update_le(sum[i]);
99
0
      }
100
0
      blake2b->final(std::span{T});
101
102
0
      while(output_len > 64) {
103
0
         copy_mem(output, T.data(), 32);
104
0
         output_len -= 32;
105
0
         output += 32;
106
107
0
         if(output_len > 64) {
108
0
            blake2b->update(T);
109
0
            blake2b->final(std::span{T});
110
0
         }
111
0
      }
112
113
0
      if(output_len == 64) {
114
0
         blake2b->update(T);
115
0
         blake2b->final(output);
116
0
      } else {
117
0
         auto blake2b_f = HashFunction::create_or_throw(fmt("BLAKE2b({})", output_len * 8));
118
0
         blake2b_f->update(T);
119
0
         blake2b_f->final(output);
120
0
      }
121
0
   }
122
0
}
123
124
void init_blocks(
125
0
   secure_vector<uint64_t>& B, HashFunction& blake2b, const uint8_t H0[64], size_t memory, size_t threads) {
126
0
   BOTAN_ASSERT_NOMSG(B.size() >= threads * 256);
127
128
0
   for(size_t i = 0; i != threads; ++i) {
129
0
      const size_t B_off = i * (memory / threads);
130
131
0
      BOTAN_ASSERT_NOMSG(B.size() >= 128 * (B_off + 2));
132
133
0
      for(size_t j = 0; j != 2; ++j) {
134
0
         uint8_t T[64] = {0};
135
136
0
         blake2b.update_le(static_cast<uint32_t>(1024));
137
0
         blake2b.update(H0, 64);
138
0
         blake2b.update_le(static_cast<uint32_t>(j));
139
0
         blake2b.update_le(static_cast<uint32_t>(i));
140
0
         blake2b.final(T);
141
142
0
         for(size_t k = 0; k != 30; ++k) {
143
0
            load_le(&B[128 * (B_off + j) + 4 * k], T, 32 / 8);
144
0
            blake2b.update(T, 64);
145
0
            blake2b.final(T);
146
0
         }
147
148
0
         load_le(&B[128 * (B_off + j) + 4 * 30], T, 64 / 8);
149
0
      }
150
0
   }
151
0
}
152
153
0
BOTAN_FORCE_INLINE void blamka_G(uint64_t& A, uint64_t& B, uint64_t& C, uint64_t& D) {
154
0
   A += B + (static_cast<uint64_t>(2) * static_cast<uint32_t>(A)) * static_cast<uint32_t>(B);
155
0
   D = rotr<32>(A ^ D);
156
157
0
   C += D + (static_cast<uint64_t>(2) * static_cast<uint32_t>(C)) * static_cast<uint32_t>(D);
158
0
   B = rotr<24>(B ^ C);
159
160
0
   A += B + (static_cast<uint64_t>(2) * static_cast<uint32_t>(A)) * static_cast<uint32_t>(B);
161
0
   D = rotr<16>(A ^ D);
162
163
0
   C += D + (static_cast<uint64_t>(2) * static_cast<uint32_t>(C)) * static_cast<uint32_t>(D);
164
0
   B = rotr<63>(B ^ C);
165
0
}
166
167
}  // namespace
168
169
0
void Argon2::blamka(uint64_t N[128], uint64_t T[128]) {
170
0
#if defined(BOTAN_HAS_ARGON2_AVX512)
171
0
   if(CPUID::has(CPUID::Feature::AVX512)) {
172
0
      return Argon2::blamka_avx512(N, T);
173
0
   }
174
0
#endif
175
176
0
#if defined(BOTAN_HAS_ARGON2_AVX2)
177
0
   if(CPUID::has(CPUID::Feature::AVX2)) {
178
0
      return Argon2::blamka_avx2(N, T);
179
0
   }
180
0
#endif
181
182
0
#if defined(BOTAN_HAS_ARGON2_SIMD64)
183
0
   if(CPUID::has(CPUID::Feature::SIMD_2X64)) {
184
0
      return Argon2::blamka_simd64(N, T);
185
0
   }
186
0
#endif
187
188
0
   copy_mem(T, N, 128);
189
190
0
   for(size_t i = 0; i != 128; i += 16) {
191
0
      blamka_G(T[i + 0], T[i + 4], T[i + 8], T[i + 12]);
192
0
      blamka_G(T[i + 1], T[i + 5], T[i + 9], T[i + 13]);
193
0
      blamka_G(T[i + 2], T[i + 6], T[i + 10], T[i + 14]);
194
0
      blamka_G(T[i + 3], T[i + 7], T[i + 11], T[i + 15]);
195
196
0
      blamka_G(T[i + 0], T[i + 5], T[i + 10], T[i + 15]);
197
0
      blamka_G(T[i + 1], T[i + 6], T[i + 11], T[i + 12]);
198
0
      blamka_G(T[i + 2], T[i + 7], T[i + 8], T[i + 13]);
199
0
      blamka_G(T[i + 3], T[i + 4], T[i + 9], T[i + 14]);
200
0
   }
201
202
0
   for(size_t i = 0; i != 128 / 8; i += 2) {
203
0
      blamka_G(T[i + 0], T[i + 32], T[i + 64], T[i + 96]);
204
0
      blamka_G(T[i + 1], T[i + 33], T[i + 65], T[i + 97]);
205
0
      blamka_G(T[i + 16], T[i + 48], T[i + 80], T[i + 112]);
206
0
      blamka_G(T[i + 17], T[i + 49], T[i + 81], T[i + 113]);
207
208
0
      blamka_G(T[i + 0], T[i + 33], T[i + 80], T[i + 113]);
209
0
      blamka_G(T[i + 1], T[i + 48], T[i + 81], T[i + 96]);
210
0
      blamka_G(T[i + 16], T[i + 49], T[i + 64], T[i + 97]);
211
0
      blamka_G(T[i + 17], T[i + 32], T[i + 65], T[i + 112]);
212
0
   }
213
214
0
   for(size_t i = 0; i != 128; ++i) {
215
0
      N[i] ^= T[i];
216
0
   }
217
0
}
218
219
namespace {
220
221
void gen_2i_addresses(uint64_t T[128],
222
                      uint64_t B[128],
223
                      size_t n,
224
                      size_t lane,
225
                      size_t slice,
226
                      size_t memory,
227
                      size_t time,
228
                      size_t mode,
229
0
                      size_t cnt) {
230
0
   clear_mem(B, 128);
231
232
0
   B[0] = n;
233
0
   B[1] = lane;
234
0
   B[2] = slice;
235
0
   B[3] = memory;
236
0
   B[4] = time;
237
0
   B[5] = mode;
238
0
   B[6] = cnt;
239
240
0
   for(size_t r = 0; r != 2; ++r) {
241
0
      Argon2::blamka(B, T);
242
0
   }
243
0
}
244
245
// Reduce random modulo Argon2 thread count (normally a power of 2)
246
0
inline size_t mod_threads(uint32_t random, size_t threads) {
247
0
   if(is_power_of_2(threads)) {
248
0
      return random & static_cast<uint32_t>(threads - 1);
249
0
   } else {
250
0
      return random % threads;
251
0
   }
252
0
}
253
254
// Reduce alpha modulo the lane length; always a multiple of 4 and commonly a power of 2
255
0
inline size_t mod_lanes(uint64_t alpha, size_t lanes) {
256
0
   if(is_power_of_2(lanes)) {
257
0
      return static_cast<size_t>(alpha & static_cast<uint64_t>(lanes - 1));
258
0
   } else {
259
0
      return alpha % lanes;
260
0
   }
261
0
}
262
263
uint32_t index_alpha(
264
0
   uint64_t random, size_t lanes, size_t segments, size_t threads, size_t n, size_t slice, size_t lane, size_t index) {
265
0
   size_t ref_lane = mod_threads(static_cast<uint32_t>(random >> 32), threads);
266
267
0
   if(n == 0 && slice == 0) {
268
0
      ref_lane = lane;
269
0
   }
270
271
0
   size_t m = 3 * segments;
272
0
   size_t s = ((slice + 1) % 4) * segments;
273
274
0
   if(lane == ref_lane) {
275
0
      m += index;
276
0
   }
277
278
0
   if(n == 0) {
279
0
      m = slice * segments;
280
0
      s = 0;
281
0
      if(slice == 0 || lane == ref_lane) {
282
0
         m += index;
283
0
      }
284
0
   }
285
286
0
   if(index == 0 || lane == ref_lane) {
287
0
      m -= 1;
288
0
   }
289
290
0
   uint64_t p = static_cast<uint32_t>(random);
291
0
   p = (p * p) >> 32;
292
0
   p = (p * m) >> 32;
293
294
0
   return static_cast<uint32_t>(ref_lane * lanes + mod_lanes(s + m - (p + 1), lanes));
295
0
}
296
297
void process_block(secure_vector<uint64_t>& B,
298
                   size_t n,
299
                   size_t slice,
300
                   size_t lane,
301
                   size_t lanes,
302
                   size_t segments,
303
                   size_t threads,
304
                   uint8_t mode,
305
                   size_t memory,
306
0
                   size_t time) {
307
0
   uint64_t T[128];
308
0
   size_t index = 0;
309
0
   if(n == 0 && slice == 0) {
310
0
      index = 2;
311
0
   }
312
313
0
   const bool use_2i = mode == 1 || (mode == 2 && n == 0 && slice < SYNC_POINTS / 2);
314
315
0
   uint64_t addresses[128];
316
0
   size_t address_counter = 1;
317
318
0
   if(use_2i) {
319
0
      gen_2i_addresses(T, addresses, n, lane, slice, memory, time, mode, address_counter);
320
0
   }
321
322
0
   while(index < segments) {
323
0
      const size_t offset = lane * lanes + slice * segments + index;
324
325
0
      size_t prev = offset - 1;
326
0
      if(index == 0 && slice == 0) {
327
0
         prev += lanes;
328
0
      }
329
330
0
      if(use_2i && index > 0 && index % 128 == 0) {
331
0
         address_counter += 1;
332
0
         gen_2i_addresses(T, addresses, n, lane, slice, memory, time, mode, address_counter);
333
0
      }
334
335
0
      const uint64_t random = use_2i ? addresses[index % 128] : B.at(128 * prev);
336
0
      const size_t new_offset = index_alpha(random, lanes, segments, threads, n, slice, lane, index);
337
338
0
      uint64_t N[128];
339
0
      for(size_t i = 0; i != 128; ++i) {
340
0
         N[i] = B[128 * prev + i] ^ B[128 * new_offset + i];
341
0
      }
342
343
0
      Argon2::blamka(N, T);
344
345
0
      for(size_t i = 0; i != 128; ++i) {
346
0
         B[128 * offset + i] ^= N[i];
347
0
      }
348
349
0
      index += 1;
350
0
   }
351
0
}
352
353
0
void process_blocks(secure_vector<uint64_t>& B, size_t t, size_t memory, size_t threads, uint8_t mode) {
354
0
   const size_t lanes = memory / threads;
355
0
   const size_t segments = lanes / SYNC_POINTS;
356
357
0
#if defined(BOTAN_HAS_THREAD_UTILS)
358
0
   if(threads > 1) {
359
0
      auto& thread_pool = Thread_Pool::global_instance();
360
361
0
      for(size_t n = 0; n != t; ++n) {
362
0
         for(size_t slice = 0; slice != SYNC_POINTS; ++slice) {
363
0
            std::vector<std::future<void>> fut_results;
364
0
            fut_results.reserve(threads);
365
366
0
            for(size_t lane = 0; lane != threads; ++lane) {
367
0
               fut_results.push_back(thread_pool.run(
368
0
                  process_block, std::ref(B), n, slice, lane, lanes, segments, threads, mode, memory, t));
369
0
            }
370
371
0
            for(auto& fut : fut_results) {
372
0
               fut.get();
373
0
            }
374
0
         }
375
0
      }
376
377
0
      return;
378
0
   }
379
0
#endif
380
381
0
   for(size_t n = 0; n != t; ++n) {
382
0
      for(size_t slice = 0; slice != SYNC_POINTS; ++slice) {
383
0
         for(size_t lane = 0; lane != threads; ++lane) {
384
0
            process_block(B, n, slice, lane, lanes, segments, threads, mode, memory, t);
385
0
         }
386
0
      }
387
0
   }
388
0
}
389
390
}  // namespace
391
392
void Argon2::argon2(uint8_t output[],
393
                    size_t output_len,
394
                    const char* password,
395
                    size_t password_len,
396
                    const uint8_t salt[],
397
                    size_t salt_len,
398
                    const uint8_t key[],
399
                    size_t key_len,
400
                    const uint8_t ad[],
401
0
                    size_t ad_len) const {
402
0
   BOTAN_ARG_CHECK(output_len >= 4 && output_len <= std::numeric_limits<uint32_t>::max(),
403
0
                   "Invalid Argon2 output length");
404
0
   BOTAN_ARG_CHECK(password_len <= std::numeric_limits<uint32_t>::max(), "Invalid Argon2 password length");
405
0
   BOTAN_ARG_CHECK(salt_len <= std::numeric_limits<uint32_t>::max(), "Invalid Argon2 salt length");
406
0
   BOTAN_ARG_CHECK(key_len <= std::numeric_limits<uint32_t>::max(), "Invalid Argon2 key length");
407
0
   BOTAN_ARG_CHECK(ad_len <= std::numeric_limits<uint32_t>::max(), "Invalid Argon2 ad length");
408
409
0
   auto blake2 = HashFunction::create_or_throw("BLAKE2b");
410
411
0
   uint8_t H0[64] = {0};
412
0
   argon2_H0(H0,
413
0
             *blake2,
414
0
             output_len,
415
0
             password,
416
0
             password_len,
417
0
             salt,
418
0
             salt_len,
419
0
             key,
420
0
             key_len,
421
0
             ad,
422
0
             ad_len,
423
0
             m_family,
424
0
             m_p,
425
0
             m_M,
426
0
             m_t);
427
428
0
   const size_t memory = (m_M / (SYNC_POINTS * m_p)) * (SYNC_POINTS * m_p);
429
430
0
   secure_vector<uint64_t> B(memory * 1024 / 8);
431
432
0
   init_blocks(B, *blake2, H0, memory, m_p);
433
0
   process_blocks(B, m_t, memory, m_p, m_family);
434
435
0
   clear_mem(output, output_len);
436
0
   extract_key(output, output_len, B, memory, m_p);
437
0
}
438
439
}  // namespace Botan