Coverage Report

Created: 2024-02-25 06:16

/src/botan/src/lib/pbkdf/argon2/argon2.cpp
Line
Count
Source (jump to first uncovered line)
1
/**
2
* (C) 2018,2019,2022 Jack Lloyd
3
*
4
* Botan is released under the Simplified BSD License (see license.txt)
5
*/
6
7
#include <botan/argon2.h>
8
9
#include <botan/exceptn.h>
10
#include <botan/hash.h>
11
#include <botan/mem_ops.h>
12
#include <botan/internal/fmt.h>
13
#include <botan/internal/loadstor.h>
14
#include <botan/internal/rotate.h>
15
#include <limits>
16
17
#if defined(BOTAN_HAS_THREAD_UTILS)
18
   #include <botan/internal/thread_pool.h>
19
#endif
20
21
#if defined(BOTAN_HAS_ARGON2_AVX2) || defined(BOTAN_HAS_ARGON2_SSSE3)
22
   #include <botan/internal/cpuid.h>
23
#endif
24
25
namespace Botan {
26
27
namespace {
28
29
const size_t SYNC_POINTS = 4;
30
31
void argon2_H0(uint8_t H0[64],
32
               HashFunction& blake2b,
33
               size_t output_len,
34
               const char* password,
35
               size_t password_len,
36
               const uint8_t salt[],
37
               size_t salt_len,
38
               const uint8_t key[],
39
               size_t key_len,
40
               const uint8_t ad[],
41
               size_t ad_len,
42
               size_t y,
43
               size_t p,
44
               size_t M,
45
216
               size_t t) {
46
216
   const uint8_t v = 19;  // Argon2 version code
47
48
216
   blake2b.update_le(static_cast<uint32_t>(p));
49
216
   blake2b.update_le(static_cast<uint32_t>(output_len));
50
216
   blake2b.update_le(static_cast<uint32_t>(M));
51
216
   blake2b.update_le(static_cast<uint32_t>(t));
52
216
   blake2b.update_le(static_cast<uint32_t>(v));
53
216
   blake2b.update_le(static_cast<uint32_t>(y));
54
55
216
   blake2b.update_le(static_cast<uint32_t>(password_len));
56
216
   blake2b.update(cast_char_ptr_to_uint8(password), password_len);
57
58
216
   blake2b.update_le(static_cast<uint32_t>(salt_len));
59
216
   blake2b.update(salt, salt_len);
60
61
216
   blake2b.update_le(static_cast<uint32_t>(key_len));
62
216
   blake2b.update(key, key_len);
63
64
216
   blake2b.update_le(static_cast<uint32_t>(ad_len));
65
216
   blake2b.update(ad, ad_len);
66
67
216
   blake2b.final(H0);
68
216
}
69
70
216
void extract_key(uint8_t output[], size_t output_len, const secure_vector<uint64_t>& B, size_t memory, size_t threads) {
71
216
   const size_t lanes = memory / threads;
72
73
216
   uint64_t sum[128] = {0};
74
75
16.8k
   for(size_t lane = 0; lane != threads; ++lane) {
76
16.6k
      const size_t start = 128 * (lane * lanes + lanes - 1);
77
16.6k
      const size_t end = 128 * (lane * lanes + lanes);
78
79
2.14M
      for(size_t j = start; j != end; ++j) {
80
2.13M
         sum[j % 128] ^= B[j];
81
2.13M
      }
82
16.6k
   }
83
84
216
   if(output_len <= 64) {
85
50
      auto blake2b = HashFunction::create_or_throw(fmt("BLAKE2b({})", output_len * 8));
86
50
      blake2b->update_le(static_cast<uint32_t>(output_len));
87
6.45k
      for(size_t i = 0; i != 128; ++i) {
88
6.40k
         blake2b->update_le(sum[i]);
89
6.40k
      }
90
50
      blake2b->final(output);
91
166
   } else {
92
166
      secure_vector<uint8_t> T(64);
93
94
166
      auto blake2b = HashFunction::create_or_throw("BLAKE2b(512)");
95
166
      blake2b->update_le(static_cast<uint32_t>(output_len));
96
21.4k
      for(size_t i = 0; i != 128; ++i) {
97
21.2k
         blake2b->update_le(sum[i]);
98
21.2k
      }
99
166
      blake2b->final(&T[0]);
100
101
2.56k
      while(output_len > 64) {
102
2.39k
         copy_mem(output, &T[0], 32);
103
2.39k
         output_len -= 32;
104
2.39k
         output += 32;
105
106
2.39k
         if(output_len > 64) {
107
2.23k
            blake2b->update(T);
108
2.23k
            blake2b->final(&T[0]);
109
2.23k
         }
110
2.39k
      }
111
112
166
      if(output_len == 64) {
113
22
         blake2b->update(T);
114
22
         blake2b->final(output);
115
144
      } else {
116
144
         auto blake2b_f = HashFunction::create_or_throw(fmt("BLAKE2b({})", output_len * 8));
117
144
         blake2b_f->update(T);
118
144
         blake2b_f->final(output);
119
144
      }
120
166
   }
121
216
}
122
123
void init_blocks(
124
216
   secure_vector<uint64_t>& B, HashFunction& blake2b, const uint8_t H0[64], size_t memory, size_t threads) {
125
216
   BOTAN_ASSERT_NOMSG(B.size() >= threads * 256);
126
127
16.8k
   for(size_t i = 0; i != threads; ++i) {
128
16.6k
      const size_t B_off = i * (memory / threads);
129
130
16.6k
      BOTAN_ASSERT_NOMSG(B.size() >= 128 * (B_off + 2));
131
132
49.9k
      for(size_t j = 0; j != 2; ++j) {
133
33.3k
         uint8_t T[64] = {0};
134
135
33.3k
         blake2b.update_le(static_cast<uint32_t>(1024));
136
33.3k
         blake2b.update(H0, 64);
137
33.3k
         blake2b.update_le(static_cast<uint32_t>(j));
138
33.3k
         blake2b.update_le(static_cast<uint32_t>(i));
139
33.3k
         blake2b.final(T);
140
141
1.03M
         for(size_t k = 0; k != 30; ++k) {
142
999k
            load_le(&B[128 * (B_off + j) + 4 * k], T, 32 / 8);
143
999k
            blake2b.update(T, 64);
144
999k
            blake2b.final(T);
145
999k
         }
146
147
33.3k
         load_le(&B[128 * (B_off + j) + 4 * 30], T, 64 / 8);
148
33.3k
      }
149
16.6k
   }
150
216
}
151
152
0
BOTAN_FORCE_INLINE void blamka_G(uint64_t& A, uint64_t& B, uint64_t& C, uint64_t& D) {
153
0
   A += B + (static_cast<uint64_t>(2) * static_cast<uint32_t>(A)) * static_cast<uint32_t>(B);
154
0
   D = rotr<32>(A ^ D);
155
156
0
   C += D + (static_cast<uint64_t>(2) * static_cast<uint32_t>(C)) * static_cast<uint32_t>(D);
157
0
   B = rotr<24>(B ^ C);
158
159
0
   A += B + (static_cast<uint64_t>(2) * static_cast<uint32_t>(A)) * static_cast<uint32_t>(B);
160
0
   D = rotr<16>(A ^ D);
161
162
0
   C += D + (static_cast<uint64_t>(2) * static_cast<uint32_t>(C)) * static_cast<uint32_t>(D);
163
0
   B = rotr<63>(B ^ C);
164
0
}
165
166
}  // namespace
167
168
5.63M
void Argon2::blamka(uint64_t N[128], uint64_t T[128]) {
169
5.63M
#if defined(BOTAN_HAS_ARGON2_AVX2)
170
5.85M
   if(CPUID::has_avx2()) {
171
5.85M
      return Argon2::blamka_avx2(N, T);
172
5.85M
   }
173
18.4E
#endif
174
175
18.4E
#if defined(BOTAN_HAS_ARGON2_SSSE3)
176
18.4E
   if(CPUID::has_ssse3()) {
177
0
      return Argon2::blamka_ssse3(N, T);
178
0
   }
179
18.4E
#endif
180
181
18.4E
   copy_mem(T, N, 128);
182
183
18.4E
   for(size_t i = 0; i != 128; i += 16) {
184
0
      blamka_G(T[i + 0], T[i + 4], T[i + 8], T[i + 12]);
185
0
      blamka_G(T[i + 1], T[i + 5], T[i + 9], T[i + 13]);
186
0
      blamka_G(T[i + 2], T[i + 6], T[i + 10], T[i + 14]);
187
0
      blamka_G(T[i + 3], T[i + 7], T[i + 11], T[i + 15]);
188
189
0
      blamka_G(T[i + 0], T[i + 5], T[i + 10], T[i + 15]);
190
0
      blamka_G(T[i + 1], T[i + 6], T[i + 11], T[i + 12]);
191
0
      blamka_G(T[i + 2], T[i + 7], T[i + 8], T[i + 13]);
192
0
      blamka_G(T[i + 3], T[i + 4], T[i + 9], T[i + 14]);
193
0
   }
194
195
18.4E
   for(size_t i = 0; i != 128 / 8; i += 2) {
196
0
      blamka_G(T[i + 0], T[i + 32], T[i + 64], T[i + 96]);
197
0
      blamka_G(T[i + 1], T[i + 33], T[i + 65], T[i + 97]);
198
0
      blamka_G(T[i + 16], T[i + 48], T[i + 80], T[i + 112]);
199
0
      blamka_G(T[i + 17], T[i + 49], T[i + 81], T[i + 113]);
200
201
0
      blamka_G(T[i + 0], T[i + 33], T[i + 80], T[i + 113]);
202
0
      blamka_G(T[i + 1], T[i + 48], T[i + 81], T[i + 96]);
203
0
      blamka_G(T[i + 16], T[i + 49], T[i + 64], T[i + 97]);
204
0
      blamka_G(T[i + 17], T[i + 32], T[i + 65], T[i + 112]);
205
0
   }
206
207
18.4E
   for(size_t i = 0; i != 128; ++i) {
208
0
      N[i] ^= T[i];
209
0
   }
210
18.4E
}
211
212
namespace {
213
214
void gen_2i_addresses(uint64_t T[128],
215
                      uint64_t B[128],
216
                      size_t n,
217
                      size_t lane,
218
                      size_t slice,
219
                      size_t memory,
220
                      size_t time,
221
                      size_t mode,
222
29.6k
                      size_t cnt) {
223
29.6k
   clear_mem(B, 128);
224
225
29.6k
   B[0] = n;
226
29.6k
   B[1] = lane;
227
29.6k
   B[2] = slice;
228
29.6k
   B[3] = memory;
229
29.6k
   B[4] = time;
230
29.6k
   B[5] = mode;
231
29.6k
   B[6] = cnt;
232
233
88.7k
   for(size_t r = 0; r != 2; ++r) {
234
59.1k
      Argon2::blamka(B, T);
235
59.1k
   }
236
29.6k
}
237
238
uint32_t index_alpha(
239
5.13M
   uint64_t random, size_t lanes, size_t segments, size_t threads, size_t n, size_t slice, size_t lane, size_t index) {
240
5.13M
   size_t ref_lane = static_cast<uint32_t>(random >> 32) % threads;
241
242
5.13M
   if(n == 0 && slice == 0) {
243
835k
      ref_lane = lane;
244
835k
   }
245
246
5.13M
   size_t m = 3 * segments;
247
5.13M
   size_t s = ((slice + 1) % 4) * segments;
248
249
5.13M
   if(lane == ref_lane) {
250
1.20M
      m += index;
251
1.20M
   }
252
253
5.13M
   if(n == 0) {
254
3.35M
      m = slice * segments;
255
3.35M
      s = 0;
256
3.35M
      if(slice == 0 || lane == ref_lane) {
257
1.03M
         m += index;
258
1.03M
      }
259
3.35M
   }
260
261
5.23M
   if(index == 0 || lane == ref_lane) {
262
1.31M
      m -= 1;
263
1.31M
   }
264
265
5.13M
   uint64_t p = static_cast<uint32_t>(random);
266
5.13M
   p = (p * p) >> 32;
267
5.13M
   p = (p * m) >> 32;
268
269
5.13M
   return static_cast<uint32_t>(ref_lane * lanes + (s + m - (p + 1)) % lanes);
270
5.13M
}
271
272
void process_block(secure_vector<uint64_t>& B,
273
                   size_t n,
274
                   size_t slice,
275
                   size_t lane,
276
                   size_t lanes,
277
                   size_t segments,
278
                   size_t threads,
279
                   uint8_t mode,
280
                   size_t memory,
281
91.4k
                   size_t time) {
282
91.4k
   uint64_t T[128];
283
91.4k
   size_t index = 0;
284
91.4k
   if(n == 0 && slice == 0) {
285
16.6k
      index = 2;
286
16.6k
   }
287
288
91.4k
   const bool use_2i = mode == 1 || (mode == 2 && n == 0 && slice < SYNC_POINTS / 2);
289
290
91.4k
   uint64_t addresses[128];
291
91.4k
   size_t address_counter = 1;
292
293
91.4k
   if(use_2i) {
294
24.0k
      gen_2i_addresses(T, addresses, n, lane, slice, memory, time, mode, address_counter);
295
24.0k
   }
296
297
5.11M
   while(index < segments) {
298
5.01M
      const size_t offset = lane * lanes + slice * segments + index;
299
300
5.01M
      size_t prev = offset - 1;
301
5.01M
      if(index == 0 && slice == 0) {
302
6.26k
         prev += lanes;
303
6.26k
      }
304
305
5.01M
      if(use_2i && index > 0 && index % 128 == 0) {
306
5.52k
         address_counter += 1;
307
5.52k
         gen_2i_addresses(T, addresses, n, lane, slice, memory, time, mode, address_counter);
308
5.52k
      }
309
310
5.01M
      const uint64_t random = use_2i ? addresses[index % 128] : B.at(128 * prev);
311
5.01M
      const size_t new_offset = index_alpha(random, lanes, segments, threads, n, slice, lane, index);
312
313
5.01M
      uint64_t N[128];
314
198M
      for(size_t i = 0; i != 128; ++i) {
315
193M
         N[i] = B[128 * prev + i] ^ B[128 * new_offset + i];
316
193M
      }
317
318
5.01M
      Argon2::blamka(N, T);
319
320
207M
      for(size_t i = 0; i != 128; ++i) {
321
202M
         B[128 * offset + i] ^= N[i];
322
202M
      }
323
324
5.01M
      index += 1;
325
5.01M
   }
326
91.4k
}
327
328
216
void process_blocks(secure_vector<uint64_t>& B, size_t t, size_t memory, size_t threads, uint8_t mode) {
329
216
   const size_t lanes = memory / threads;
330
216
   const size_t segments = lanes / SYNC_POINTS;
331
332
216
#if defined(BOTAN_HAS_THREAD_UTILS)
333
216
   auto& thread_pool = Thread_Pool::global_instance();
334
216
#endif
335
336
512
   for(size_t n = 0; n != t; ++n) {
337
1.48k
      for(size_t slice = 0; slice != SYNC_POINTS; ++slice) {
338
1.18k
#if defined(BOTAN_HAS_THREAD_UTILS)
339
1.18k
         if(threads > 1) {
340
1.14k
            std::vector<std::future<void>> fut_results;
341
1.14k
            fut_results.reserve(threads);
342
343
92.8k
            for(size_t lane = 0; lane != threads; ++lane) {
344
91.7k
               fut_results.push_back(thread_pool.run(
345
91.7k
                  process_block, std::ref(B), n, slice, lane, lanes, segments, threads, mode, memory, t));
346
91.7k
            }
347
348
91.7k
            for(auto& fut : fut_results) {
349
91.7k
               fut.get();
350
91.7k
            }
351
352
1.14k
            continue;
353
1.14k
         }
354
44
#endif
355
356
88
         for(size_t lane = 0; lane != threads; ++lane) {
357
44
            process_block(B, n, slice, lane, lanes, segments, threads, mode, memory, t);
358
44
         }
359
44
      }
360
296
   }
361
216
}
362
363
}  // namespace
364
365
void Argon2::argon2(uint8_t output[],
366
                    size_t output_len,
367
                    const char* password,
368
                    size_t password_len,
369
                    const uint8_t salt[],
370
                    size_t salt_len,
371
                    const uint8_t key[],
372
                    size_t key_len,
373
                    const uint8_t ad[],
374
222
                    size_t ad_len) const {
375
222
   BOTAN_ARG_CHECK(output_len >= 4 && output_len <= std::numeric_limits<uint32_t>::max(),
376
222
                   "Invalid Argon2 output length");
377
222
   BOTAN_ARG_CHECK(password_len <= std::numeric_limits<uint32_t>::max(), "Invalid Argon2 password length");
378
222
   BOTAN_ARG_CHECK(salt_len <= std::numeric_limits<uint32_t>::max(), "Invalid Argon2 salt length");
379
222
   BOTAN_ARG_CHECK(key_len <= std::numeric_limits<uint32_t>::max(), "Invalid Argon2 key length");
380
222
   BOTAN_ARG_CHECK(ad_len <= std::numeric_limits<uint32_t>::max(), "Invalid Argon2 ad length");
381
382
222
   auto blake2 = HashFunction::create_or_throw("BLAKE2b");
383
384
222
   uint8_t H0[64] = {0};
385
222
   argon2_H0(H0,
386
222
             *blake2,
387
222
             output_len,
388
222
             password,
389
222
             password_len,
390
222
             salt,
391
222
             salt_len,
392
222
             key,
393
222
             key_len,
394
222
             ad,
395
222
             ad_len,
396
222
             m_family,
397
222
             m_p,
398
222
             m_M,
399
222
             m_t);
400
401
222
   const size_t memory = (m_M / (SYNC_POINTS * m_p)) * (SYNC_POINTS * m_p);
402
403
222
   secure_vector<uint64_t> B(memory * 1024 / 8);
404
405
222
   init_blocks(B, *blake2, H0, memory, m_p);
406
222
   process_blocks(B, m_t, memory, m_p, m_family);
407
408
222
   clear_mem(output, output_len);
409
222
   extract_key(output, output_len, B, memory, m_p);
410
222
}
411
412
}  // namespace Botan