Coverage Report

Created: 2022-06-23 06:44

/src/botan/src/lib/pbkdf/argon2/argon2.cpp
Line
Count
Source (jump to first uncovered line)
1
/**
2
* (C) 2018,2019,2022 Jack Lloyd
3
*
4
* Botan is released under the Simplified BSD License (see license.txt)
5
*/
6
7
#include <botan/argon2.h>
8
#include <botan/internal/loadstor.h>
9
#include <botan/hash.h>
10
#include <botan/mem_ops.h>
11
#include <botan/internal/rotate.h>
12
#include <botan/exceptn.h>
13
14
#if defined(BOTAN_HAS_THREAD_UTILS)
15
   #include <botan/internal/thread_pool.h>
16
#endif
17
18
#if defined(BOTAN_HAS_ARGON2_SSSE3)
19
   #include <botan/internal/argon2_ssse3.h>
20
   #include <botan/internal/cpuid.h>
21
#endif
22
23
namespace Botan {
24
25
namespace {
26
27
const size_t SYNC_POINTS = 4;
28
29
void argon2_H0(uint8_t H0[64],
30
               HashFunction& blake2b,
31
               size_t output_len,
32
               const char* password, size_t password_len,
33
               const uint8_t salt[], size_t salt_len,
34
               const uint8_t key[], size_t key_len,
35
               const uint8_t ad[], size_t ad_len,
36
               size_t y, size_t p, size_t M, size_t t)
37
0
   {
38
0
   const uint8_t v = 19; // Argon2 version code
39
40
0
   blake2b.update_le(static_cast<uint32_t>(p));
41
0
   blake2b.update_le(static_cast<uint32_t>(output_len));
42
0
   blake2b.update_le(static_cast<uint32_t>(M));
43
0
   blake2b.update_le(static_cast<uint32_t>(t));
44
0
   blake2b.update_le(static_cast<uint32_t>(v));
45
0
   blake2b.update_le(static_cast<uint32_t>(y));
46
47
0
   blake2b.update_le(static_cast<uint32_t>(password_len));
48
0
   blake2b.update(cast_char_ptr_to_uint8(password), password_len);
49
50
0
   blake2b.update_le(static_cast<uint32_t>(salt_len));
51
0
   blake2b.update(salt, salt_len);
52
53
0
   blake2b.update_le(static_cast<uint32_t>(key_len));
54
0
   blake2b.update(key, key_len);
55
56
0
   blake2b.update_le(static_cast<uint32_t>(ad_len));
57
0
   blake2b.update(ad, ad_len);
58
59
0
   blake2b.final(H0);
60
0
   }
61
62
void extract_key(uint8_t output[], size_t output_len,
63
                 const secure_vector<uint64_t>& B,
64
                 size_t memory, size_t threads)
65
0
   {
66
0
   const size_t lanes = memory / threads;
67
68
0
   uint64_t sum[128] = { 0 };
69
70
0
   for(size_t lane = 0; lane != threads; ++lane)
71
0
      {
72
0
      const size_t start = 128*(lane * lanes + lanes - 1);
73
0
      const size_t end = 128*(lane * lanes + lanes);
74
75
0
      for(size_t j = start; j != end; ++j)
76
0
         {
77
0
         sum[j % 128] ^= B[j];
78
0
         }
79
0
      }
80
81
0
   if(output_len <= 64)
82
0
      {
83
0
      std::unique_ptr<HashFunction> blake2b = HashFunction::create_or_throw("BLAKE2b(" + std::to_string(output_len*8) + ")");
84
0
      blake2b->update_le(static_cast<uint32_t>(output_len));
85
0
      for(size_t i = 0; i != 128; ++i)
86
0
         blake2b->update_le(sum[i]);
87
0
      blake2b->final(output);
88
0
      }
89
0
   else
90
0
      {
91
0
      secure_vector<uint8_t> T(64);
92
93
0
      std::unique_ptr<HashFunction> blake2b = HashFunction::create_or_throw("BLAKE2b(512)");
94
0
      blake2b->update_le(static_cast<uint32_t>(output_len));
95
0
      for(size_t i = 0; i != 128; ++i)
96
0
         blake2b->update_le(sum[i]);
97
0
      blake2b->final(&T[0]);
98
99
0
      while(output_len > 64)
100
0
         {
101
0
         copy_mem(output, &T[0], 32);
102
0
         output_len -= 32;
103
0
         output += 32;
104
105
0
         if(output_len > 64)
106
0
            {
107
0
            blake2b->update(T);
108
0
            blake2b->final(&T[0]);
109
0
            }
110
0
         }
111
112
0
      if(output_len == 64)
113
0
         {
114
0
         blake2b->update(T);
115
0
         blake2b->final(output);
116
0
         }
117
0
      else
118
0
         {
119
0
         std::unique_ptr<HashFunction> blake2b_f = HashFunction::create_or_throw("BLAKE2b(" + std::to_string(output_len*8) + ")");
120
0
         blake2b_f->update(T);
121
0
         blake2b_f->final(output);
122
0
         }
123
0
      }
124
0
   }
125
126
void init_blocks(secure_vector<uint64_t>& B,
127
                 HashFunction& blake2b,
128
                 const uint8_t H0[64],
129
                 size_t memory,
130
                 size_t threads)
131
0
   {
132
0
   BOTAN_ASSERT_NOMSG(B.size() >= threads*256);
133
134
0
   for(size_t i = 0; i != threads; ++i)
135
0
      {
136
0
      const size_t B_off = i * (memory / threads);
137
138
0
      BOTAN_ASSERT_NOMSG(B.size() >= 128*(B_off+2));
139
140
0
      for(size_t j = 0; j != 2; ++j)
141
0
         {
142
0
         uint8_t T[64] = { 0 };
143
144
0
         blake2b.update_le(static_cast<uint32_t>(1024));
145
0
         blake2b.update(H0, 64);
146
0
         blake2b.update_le(static_cast<uint32_t>(j));
147
0
         blake2b.update_le(static_cast<uint32_t>(i));
148
0
         blake2b.final(T);
149
150
0
         for(size_t k = 0; k != 30; ++k)
151
0
            {
152
0
            load_le(&B[128*(B_off+j)+4*k], T, 32 / 8);
153
0
            blake2b.update(T, 64);
154
0
            blake2b.final(T);
155
0
            }
156
157
0
         load_le(&B[128*(B_off+j)+4*30], T, 64 / 8);
158
0
         }
159
0
      }
160
0
   }
161
162
BOTAN_FORCE_INLINE void blamka_G(uint64_t& A, uint64_t& B, uint64_t& C, uint64_t& D)
163
0
   {
164
0
   A += B + (static_cast<uint64_t>(2) * static_cast<uint32_t>(A)) * static_cast<uint32_t>(B);
165
0
   D = rotr<32>(A ^ D);
166
167
0
   C += D + (static_cast<uint64_t>(2) * static_cast<uint32_t>(C)) * static_cast<uint32_t>(D);
168
0
   B = rotr<24>(B ^ C);
169
170
0
   A += B + (static_cast<uint64_t>(2) * static_cast<uint32_t>(A)) * static_cast<uint32_t>(B);
171
0
   D = rotr<16>(A ^ D);
172
173
0
   C += D + (static_cast<uint64_t>(2) * static_cast<uint32_t>(C)) * static_cast<uint32_t>(D);
174
0
   B = rotr<63>(B ^ C);
175
0
   }
176
177
void blamka(uint64_t T[128])
178
0
   {
179
0
#if defined(BOTAN_HAS_ARGON2_SSSE3)
180
0
   if(CPUID::has_ssse3())
181
0
      return blamka_ssse3(T);
182
0
#endif
183
184
0
   for(size_t i = 0; i != 128; i += 16)
185
0
      {
186
0
      blamka_G(T[i+  0], T[i+  4], T[i+  8], T[i+ 12]);
187
0
      blamka_G(T[i+  1], T[i+  5], T[i+  9], T[i+ 13]);
188
0
      blamka_G(T[i+  2], T[i+  6], T[i+ 10], T[i+ 14]);
189
0
      blamka_G(T[i+  3], T[i+  7], T[i+ 11], T[i+ 15]);
190
191
0
      blamka_G(T[i+  0], T[i+  5], T[i+ 10], T[i+ 15]);
192
0
      blamka_G(T[i+  1], T[i+  6], T[i+ 11], T[i+ 12]);
193
0
      blamka_G(T[i+  2], T[i+  7], T[i+  8], T[i+ 13]);
194
0
      blamka_G(T[i+  3], T[i+  4], T[i+  9], T[i+ 14]);
195
0
      }
196
197
0
   for(size_t i = 0; i != 128 / 8; i += 2)
198
0
      {
199
0
      blamka_G(T[i+  0], T[i+ 32], T[i+ 64], T[i+ 96]);
200
0
      blamka_G(T[i+  1], T[i+ 33], T[i+ 65], T[i+ 97]);
201
0
      blamka_G(T[i+ 16], T[i+ 48], T[i+ 80], T[i+112]);
202
0
      blamka_G(T[i+ 17], T[i+ 49], T[i+ 81], T[i+113]);
203
204
0
      blamka_G(T[i+  0], T[i+ 33], T[i+ 80], T[i+113]);
205
0
      blamka_G(T[i+  1], T[i+ 48], T[i+ 81], T[i+ 96]);
206
0
      blamka_G(T[i+ 16], T[i+ 49], T[i+ 64], T[i+ 97]);
207
0
      blamka_G(T[i+ 17], T[i+ 32], T[i+ 65], T[i+112]);
208
0
      }
209
0
   }
210
211
void gen_2i_addresses(uint64_t T[128], uint64_t B[128],
212
                      size_t n, size_t lane, size_t slice, size_t memory,
213
                      size_t time, size_t mode, size_t cnt)
214
0
   {
215
0
   clear_mem(B, 128);
216
217
0
   B[0] = n;
218
0
   B[1] = lane;
219
0
   B[2] = slice;
220
0
   B[3] = memory;
221
0
   B[4] = time;
222
0
   B[5] = mode;
223
0
   B[6] = cnt;
224
225
0
   for(size_t r = 0; r != 2; ++r)
226
0
      {
227
0
      copy_mem(T, B, 128);
228
229
0
      blamka(T);
230
231
0
      for(size_t i = 0; i != 128; ++i)
232
0
         B[i] ^= T[i];
233
0
      }
234
0
   }
235
236
uint32_t index_alpha(uint64_t random,
237
                     size_t lanes,
238
                     size_t segments,
239
                     size_t threads,
240
                     size_t n,
241
                     size_t slice,
242
                     size_t lane,
243
                     size_t index)
244
0
   {
245
0
   size_t ref_lane = static_cast<uint32_t>(random >> 32) % threads;
246
247
0
   if(n == 0 && slice == 0)
248
0
      ref_lane = lane;
249
250
0
   size_t m = 3*segments;
251
0
   size_t s = ((slice+1) % 4)*segments;
252
253
0
   if(lane == ref_lane)
254
0
      m += index;
255
256
0
   if(n == 0)
257
0
      {
258
0
      m = slice*segments;
259
0
      s = 0;
260
0
      if(slice == 0 || lane == ref_lane)
261
0
         m += index;
262
0
      }
263
264
0
   if(index == 0 || lane == ref_lane)
265
0
      m -= 1;
266
267
0
   uint64_t p = static_cast<uint32_t>(random);
268
0
   p = (p * p) >> 32;
269
0
   p = (p * m) >> 32;
270
271
0
   return static_cast<uint32_t>(ref_lane*lanes + (s + m - (p+1)) % lanes);
272
0
   }
273
274
void process_block(secure_vector<uint64_t>& B,
275
                   size_t n, size_t slice, size_t lane,
276
                   size_t lanes, size_t segments, size_t threads, uint8_t mode,
277
                   size_t memory, size_t time)
278
0
   {
279
0
   uint64_t T[128];
280
0
   size_t index = 0;
281
0
   if(n == 0 && slice == 0)
282
0
      index = 2;
283
284
0
   const bool use_2i = mode == 1 || (mode == 2 && n == 0 && slice < SYNC_POINTS/2);
285
286
0
   uint64_t addresses[128];
287
0
   size_t address_counter = 1;
288
289
0
   if(use_2i)
290
0
      {
291
0
      gen_2i_addresses(T, addresses, n, lane, slice, memory, time, mode, address_counter);
292
0
      }
293
294
0
   while(index < segments)
295
0
      {
296
0
      const size_t offset = lane*lanes + slice*segments + index;
297
298
0
      size_t prev = offset - 1;
299
0
      if(index == 0 && slice == 0)
300
0
         prev += lanes;
301
302
0
      if(use_2i && index > 0 && index % 128 == 0)
303
0
         {
304
0
         address_counter += 1;
305
0
         gen_2i_addresses(T, addresses, n, lane, slice, memory, time, mode, address_counter);
306
0
         }
307
308
0
      const uint64_t random = use_2i ? addresses[index % 128] : B.at(128*prev);
309
0
      const size_t new_offset = index_alpha(random, lanes, segments, threads, n, slice, lane, index);
310
311
0
      for(size_t i = 0; i != 128; ++i)
312
0
         T[i] = B[128*prev+i] ^ B[128*new_offset+i];
313
314
0
      blamka(T);
315
316
0
      for(size_t i = 0; i != 128; ++i)
317
0
         B[128*offset + i] ^= T[i] ^ B[128*prev+i] ^ B[128*new_offset+i];
318
319
0
      index += 1;
320
0
      }
321
0
   }
322
323
void process_blocks(secure_vector<uint64_t>& B,
324
                    size_t t,
325
                    size_t memory,
326
                    size_t threads,
327
                    uint8_t mode)
328
0
   {
329
0
   const size_t lanes = memory / threads;
330
0
   const size_t segments = lanes / SYNC_POINTS;
331
332
0
#if defined(BOTAN_HAS_THREAD_UTILS)
333
0
   auto& thread_pool = Thread_Pool::global_instance();
334
0
#endif
335
336
0
   for(size_t n = 0; n != t; ++n)
337
0
      {
338
0
      for(size_t slice = 0; slice != SYNC_POINTS; ++slice)
339
0
         {
340
0
#if defined(BOTAN_HAS_THREAD_UTILS)
341
0
         if(threads > 1)
342
0
            {
343
0
            std::vector<std::future<void>> fut_results;
344
0
            fut_results.reserve(threads);
345
346
0
            for(size_t lane = 0; lane != threads; ++lane)
347
0
               {
348
0
               fut_results.push_back(thread_pool.run(
349
0
                  process_block,
350
0
                  std::ref(B), n, slice, lane, lanes, segments, threads, mode, memory, t));
351
0
               }
352
353
0
            for(auto& fut : fut_results)
354
0
               fut.get();
355
356
0
            continue;
357
0
            }
358
0
#endif
359
360
0
         for(size_t lane = 0; lane != threads; ++lane)
361
0
            {
362
0
            process_block(B, n, slice, lane, lanes, segments, threads, mode, memory, t);
363
0
            }
364
0
         }
365
0
      }
366
0
   }
367
368
}
369
370
void Argon2::argon2(uint8_t output[], size_t output_len,
371
                    const char* password, size_t password_len,
372
                    const uint8_t salt[], size_t salt_len,
373
                    const uint8_t key[], size_t key_len,
374
                    const uint8_t ad[], size_t ad_len) const
375
0
   {
376
0
   BOTAN_ARG_CHECK(output_len >= 4, "Invalid Argon2 output length");
377
378
0
   auto blake2 = HashFunction::create_or_throw("BLAKE2b");
379
380
0
   uint8_t H0[64] = { 0 };
381
0
   argon2_H0(H0, *blake2, output_len,
382
0
             password, password_len,
383
0
             salt, salt_len,
384
0
             key, key_len,
385
0
             ad, ad_len,
386
0
             m_family, m_p, m_M, m_t);
387
388
0
   const size_t memory = (m_M / (SYNC_POINTS*m_p)) * (SYNC_POINTS*m_p);
389
390
0
   secure_vector<uint64_t> B(memory * 1024/8);
391
392
0
   init_blocks(B, *blake2, H0, memory, m_p);
393
0
   process_blocks(B, m_t, memory, m_p, m_family);
394
395
0
   clear_mem(output, output_len);
396
0
   extract_key(output, output_len, B, memory, m_p);
397
0
   }
398
399
}