Coverage Report

Created: 2021-02-21 07:20

/src/botan/src/lib/pbkdf/argon2/argon2.cpp
Line
Count
Source (jump to first uncovered line)
1
/**
2
* (C) 2018,2019 Jack Lloyd
3
*
4
* Botan is released under the Simplified BSD License (see license.txt)
5
*/
6
7
#include <botan/argon2.h>
8
#include <botan/internal/loadstor.h>
9
#include <botan/hash.h>
10
#include <botan/mem_ops.h>
11
#include <botan/internal/rotate.h>
12
#include <botan/exceptn.h>
13
14
namespace Botan {
15
16
namespace {
17
18
static const size_t SYNC_POINTS = 4;
19
20
secure_vector<uint8_t> argon2_H0(HashFunction& blake2b,
21
                                 size_t output_len,
22
                                 const char* password, size_t password_len,
23
                                 const uint8_t salt[], size_t salt_len,
24
                                 const uint8_t key[], size_t key_len,
25
                                 const uint8_t ad[], size_t ad_len,
26
                                 size_t y, size_t p, size_t M, size_t t)
27
0
   {
28
0
   const uint8_t v = 19; // Argon2 version code
29
30
0
   blake2b.update_le(static_cast<uint32_t>(p));
31
0
   blake2b.update_le(static_cast<uint32_t>(output_len));
32
0
   blake2b.update_le(static_cast<uint32_t>(M));
33
0
   blake2b.update_le(static_cast<uint32_t>(t));
34
0
   blake2b.update_le(static_cast<uint32_t>(v));
35
0
   blake2b.update_le(static_cast<uint32_t>(y));
36
37
0
   blake2b.update_le(static_cast<uint32_t>(password_len));
38
0
   blake2b.update(cast_char_ptr_to_uint8(password), password_len);
39
40
0
   blake2b.update_le(static_cast<uint32_t>(salt_len));
41
0
   blake2b.update(salt, salt_len);
42
43
0
   blake2b.update_le(static_cast<uint32_t>(key_len));
44
0
   blake2b.update(key, key_len);
45
46
0
   blake2b.update_le(static_cast<uint32_t>(ad_len));
47
0
   blake2b.update(ad, ad_len);
48
49
0
   return blake2b.final();
50
0
   }
51
52
void Htick(secure_vector<uint8_t>& T,
53
           uint8_t output[],
54
           size_t output_len,
55
           HashFunction& blake2b,
56
           const secure_vector<uint8_t>& H0,
57
           size_t p0, size_t p1)
58
0
   {
59
0
   BOTAN_ASSERT_NOMSG(output_len % 64 == 0);
60
61
0
   blake2b.update_le(static_cast<uint32_t>(output_len));
62
0
   blake2b.update(H0);
63
0
   blake2b.update_le(static_cast<uint32_t>(p0));
64
0
   blake2b.update_le(static_cast<uint32_t>(p1));
65
66
0
   blake2b.final(&T[0]);
67
68
0
   while(output_len > 64)
69
0
      {
70
0
      copy_mem(output, &T[0], 32);
71
0
      output_len -= 32;
72
0
      output += 32;
73
74
0
      blake2b.update(T);
75
0
      blake2b.final(&T[0]);
76
0
      }
77
78
0
   if(output_len > 0)
79
0
      copy_mem(output, &T[0], output_len);
80
0
   }
81
82
void extract_key(uint8_t output[], size_t output_len,
83
                 const secure_vector<uint64_t>& B,
84
                 size_t memory, size_t threads)
85
0
   {
86
0
   const size_t lanes = memory / threads;
87
88
0
   secure_vector<uint64_t> sum(128);
89
90
0
   for(size_t lane = 0; lane != threads; ++lane)
91
0
      {
92
0
      size_t start = 128*(lane * lanes + lanes - 1);
93
0
      size_t end = 128*(lane * lanes + lanes);
94
95
0
      for(size_t j = start; j != end; ++j)
96
0
         {
97
0
         sum[j % 128] ^= B[j];
98
0
         }
99
0
      }
100
101
0
   secure_vector<uint8_t> sum8(1024);
102
0
   copy_out_le(sum8.data(), 1024, sum.data());
103
104
0
   if(output_len <= 64)
105
0
      {
106
0
      std::unique_ptr<HashFunction> blake2b = HashFunction::create_or_throw("BLAKE2b(" + std::to_string(output_len*8) + ")");
107
0
      blake2b->update_le(static_cast<uint32_t>(output_len));
108
0
      blake2b->update(sum8.data(), sum8.size());
109
0
      blake2b->final(output);
110
0
      }
111
0
   else
112
0
      {
113
0
      secure_vector<uint8_t> T(64);
114
115
0
      std::unique_ptr<HashFunction> blake2b = HashFunction::create_or_throw("BLAKE2b(512)");
116
0
      blake2b->update_le(static_cast<uint32_t>(output_len));
117
0
      blake2b->update(sum8.data(), sum8.size());
118
0
      blake2b->final(&T[0]);
119
120
0
      while(output_len > 64)
121
0
         {
122
0
         copy_mem(output, &T[0], 32);
123
0
         output_len -= 32;
124
0
         output += 32;
125
126
0
         if(output_len > 64)
127
0
            {
128
0
            blake2b->update(T);
129
0
            blake2b->final(&T[0]);
130
0
            }
131
0
         }
132
133
0
      if(output_len == 64)
134
0
         {
135
0
         blake2b->update(T);
136
0
         blake2b->final(output);
137
0
         }
138
0
      else
139
0
         {
140
0
         std::unique_ptr<HashFunction> blake2b_f = HashFunction::create_or_throw("BLAKE2b(" + std::to_string(output_len*8) + ")");
141
0
         blake2b_f->update(T);
142
0
         blake2b_f->final(output);
143
0
         }
144
0
      }
145
0
   }
146
147
void init_blocks(secure_vector<uint64_t>& B,
148
                 HashFunction& blake2b,
149
                 const secure_vector<uint8_t>& H0,
150
                 size_t memory,
151
                 size_t threads)
152
0
   {
153
0
   BOTAN_ASSERT_NOMSG(B.size() >= threads*256);
154
155
0
   secure_vector<uint8_t> H(1024);
156
0
   secure_vector<uint8_t> T(blake2b.output_length());
157
158
0
   for(size_t i = 0; i != threads; ++i)
159
0
      {
160
0
      const size_t B_off = i * (memory / threads);
161
162
0
      BOTAN_ASSERT_NOMSG(B.size() >= 128*(B_off+2));
163
164
0
      Htick(T, &H[0], H.size(), blake2b, H0, 0, i);
165
166
0
      for(size_t j = 0; j != 128; ++j)
167
0
         {
168
0
         B[128*B_off+j] = load_le<uint64_t>(H.data(), j);
169
0
         }
170
171
0
      Htick(T, &H[0], H.size(), blake2b, H0, 1, i);
172
173
0
      for(size_t j = 0; j != 128; ++j)
174
0
         {
175
0
         B[128*(B_off+1)+j] = load_le<uint64_t>(H.data(), j);
176
0
         }
177
0
      }
178
0
   }
179
180
inline void blamka_G(uint64_t& A, uint64_t& B, uint64_t& C, uint64_t& D)
181
0
   {
182
0
   A += B + (static_cast<uint64_t>(2) * static_cast<uint32_t>(A)) * static_cast<uint32_t>(B);
183
0
   D = rotr<32>(A ^ D);
184
185
0
   C += D + (static_cast<uint64_t>(2) * static_cast<uint32_t>(C)) * static_cast<uint32_t>(D);
186
0
   B = rotr<24>(B ^ C);
187
188
0
   A += B + (static_cast<uint64_t>(2) * static_cast<uint32_t>(A)) * static_cast<uint32_t>(B);
189
0
   D = rotr<16>(A ^ D);
190
191
0
   C += D + (static_cast<uint64_t>(2) * static_cast<uint32_t>(C)) * static_cast<uint32_t>(D);
192
0
   B = rotr<63>(B ^ C);
193
0
   }
194
195
inline void blamka(uint64_t& V0, uint64_t& V1, uint64_t& V2, uint64_t& V3,
196
                   uint64_t& V4, uint64_t& V5, uint64_t& V6, uint64_t& V7,
197
                   uint64_t& V8, uint64_t& V9, uint64_t& VA, uint64_t& VB,
198
                   uint64_t& VC, uint64_t& VD, uint64_t& VE, uint64_t& VF)
199
0
   {
200
0
   blamka_G(V0, V4, V8, VC);
201
0
   blamka_G(V1, V5, V9, VD);
202
0
   blamka_G(V2, V6, VA, VE);
203
0
   blamka_G(V3, V7, VB, VF);
204
205
0
   blamka_G(V0, V5, VA, VF);
206
0
   blamka_G(V1, V6, VB, VC);
207
0
   blamka_G(V2, V7, V8, VD);
208
0
   blamka_G(V3, V4, V9, VE);
209
0
   }
210
211
void process_block_xor(secure_vector<uint64_t>& T,
212
                       secure_vector<uint64_t>& B,
213
                       size_t offset,
214
                       size_t prev,
215
                       size_t new_offset)
216
0
   {
217
0
   for(size_t i = 0; i != 128; ++i)
218
0
      T[i] = B[128*prev+i] ^ B[128*new_offset+i];
219
220
0
   for(size_t i = 0; i != 128; i += 16)
221
0
      {
222
0
      blamka(T[i+ 0], T[i+ 1], T[i+ 2], T[i+ 3],
223
0
             T[i+ 4], T[i+ 5], T[i+ 6], T[i+ 7],
224
0
             T[i+ 8], T[i+ 9], T[i+10], T[i+11],
225
0
             T[i+12], T[i+13], T[i+14], T[i+15]);
226
0
      }
227
228
0
   for(size_t i = 0; i != 128 / 8; i += 2)
229
0
      {
230
0
      blamka(T[    i], T[    i+1], T[ 16+i], T[ 16+i+1],
231
0
             T[ 32+i], T[ 32+i+1], T[ 48+i], T[ 48+i+1],
232
0
             T[ 64+i], T[ 64+i+1], T[ 80+i], T[ 80+i+1],
233
0
             T[ 96+i], T[ 96+i+1], T[112+i], T[112+i+1]);
234
0
      }
235
236
0
   for(size_t i = 0; i != 128; ++i)
237
0
      B[128*offset + i] ^= T[i] ^ B[128*prev+i] ^ B[128*new_offset+i];
238
0
   }
239
240
void gen_2i_addresses(secure_vector<uint64_t>& T, secure_vector<uint64_t>& B,
241
                      size_t n, size_t lane, size_t slice, size_t memory,
242
                      size_t time, size_t mode, size_t cnt)
243
0
   {
244
0
   BOTAN_ASSERT_NOMSG(B.size() == 128);
245
0
   BOTAN_ASSERT_NOMSG(T.size() == 128);
246
247
0
   clear_mem(B.data(), B.size());
248
0
   B[0] = n;
249
0
   B[1] = lane;
250
0
   B[2] = slice;
251
0
   B[3] = memory;
252
0
   B[4] = time;
253
0
   B[5] = mode;
254
0
   B[6] = cnt;
255
256
0
   for(size_t r = 0; r != 2; ++r)
257
0
      {
258
0
      copy_mem(T.data(), B.data(), B.size());
259
260
0
      for(size_t i = 0; i != 128; i += 16)
261
0
         {
262
0
         blamka(T[i+ 0], T[i+ 1], T[i+ 2], T[i+ 3],
263
0
                T[i+ 4], T[i+ 5], T[i+ 6], T[i+ 7],
264
0
                T[i+ 8], T[i+ 9], T[i+10], T[i+11],
265
0
                T[i+12], T[i+13], T[i+14], T[i+15]);
266
0
         }
267
0
      for(size_t i = 0; i != 128 / 8; i += 2)
268
0
         {
269
0
         blamka(T[    i], T[    i+1], T[ 16+i], T[ 16+i+1],
270
0
                T[ 32+i], T[ 32+i+1], T[ 48+i], T[ 48+i+1],
271
0
                T[ 64+i], T[ 64+i+1], T[ 80+i], T[ 80+i+1],
272
0
                T[ 96+i], T[ 96+i+1], T[112+i], T[112+i+1]);
273
0
         }
274
275
0
      for(size_t i = 0; i != 128; ++i)
276
0
         B[i] ^= T[i];
277
0
      }
278
0
   }
279
280
uint32_t index_alpha(uint64_t random,
281
                     size_t lanes,
282
                     size_t segments,
283
                     size_t threads,
284
                     size_t n,
285
                     size_t slice,
286
                     size_t lane,
287
                     size_t index)
288
0
   {
289
0
   size_t ref_lane = static_cast<uint32_t>(random >> 32) % threads;
290
291
0
   if(n == 0 && slice == 0)
292
0
      ref_lane = lane;
293
294
0
   size_t m = 3*segments;
295
0
   size_t s = ((slice+1) % 4)*segments;
296
297
0
   if(lane == ref_lane)
298
0
      m += index;
299
300
0
   if(n == 0) {
301
0
         m = slice*segments;
302
0
         s = 0;
303
0
         if(slice == 0 || lane == ref_lane)
304
0
            m += index;
305
0
   }
306
307
0
   if(index == 0 || lane == ref_lane)
308
0
      m -= 1;
309
310
0
   uint64_t p = static_cast<uint32_t>(random);
311
0
   p = (p * p) >> 32;
312
0
   p = (p * m) >> 32;
313
314
0
   return static_cast<uint32_t>(ref_lane*lanes + (s + m - (p+1)) % lanes);
315
0
   }
316
317
void process_block_argon2d(secure_vector<uint64_t>& T,
318
                           secure_vector<uint64_t>& B,
319
                           size_t n, size_t slice, size_t lane,
320
                           size_t lanes, size_t segments, size_t threads)
321
0
   {
322
0
   size_t index = 0;
323
0
   if(n == 0 && slice == 0)
324
0
      index = 2;
325
326
0
   while(index < segments)
327
0
      {
328
0
      const size_t offset = lane*lanes + slice*segments + index;
329
330
0
      size_t prev = offset - 1;
331
0
      if(index == 0 && slice == 0)
332
0
         prev += lanes;
333
334
0
      const uint64_t random = B.at(128*prev);
335
0
      const size_t new_offset = index_alpha(random, lanes, segments, threads, n, slice, lane, index);
336
337
0
      process_block_xor(T, B, offset, prev, new_offset);
338
339
0
      index += 1;
340
0
      }
341
0
   }
342
343
void process_block_argon2i(secure_vector<uint64_t>& T,
344
                           secure_vector<uint64_t>& B,
345
                           size_t n, size_t slice, size_t lane,
346
                           size_t lanes, size_t segments, size_t threads, uint8_t mode,
347
                           size_t memory, size_t time)
348
0
   {
349
0
   size_t index = 0;
350
0
   if(n == 0 && slice == 0)
351
0
      index = 2;
352
353
0
   secure_vector<uint64_t> addresses(128);
354
0
   size_t address_counter = 1;
355
356
0
   gen_2i_addresses(T, addresses, n, lane, slice, memory, time, mode, address_counter);
357
358
0
   while(index < segments)
359
0
      {
360
0
      const size_t offset = lane*lanes + slice*segments + index;
361
362
0
      size_t prev = offset - 1;
363
0
      if(index == 0 && slice == 0)
364
0
         prev += lanes;
365
366
0
      if(index > 0 && index % 128 == 0)
367
0
         {
368
0
         address_counter += 1;
369
0
         gen_2i_addresses(T, addresses, n, lane, slice, memory, time, mode, address_counter);
370
0
         }
371
372
0
      const uint64_t random = addresses[index % 128];
373
0
      const size_t new_offset = index_alpha(random, lanes, segments, threads, n, slice, lane, index);
374
375
0
      process_block_xor(T, B, offset, prev, new_offset);
376
377
0
      index += 1;
378
0
      }
379
0
   }
380
381
void process_blocks(secure_vector<uint64_t>& B,
382
                    size_t t,
383
                    size_t memory,
384
                    size_t threads,
385
                    uint8_t mode)
386
0
   {
387
0
   const size_t lanes = memory / threads;
388
0
   const size_t segments = lanes / SYNC_POINTS;
389
390
0
   secure_vector<uint64_t> T(128);
391
0
   for(size_t n = 0; n != t; ++n)
392
0
      {
393
0
      for(size_t slice = 0; slice != SYNC_POINTS; ++slice)
394
0
         {
395
         // TODO can run this in Thread_Pool
396
0
         for(size_t lane = 0; lane != threads; ++lane)
397
0
            {
398
0
            if(mode == 1 || (mode == 2 && n == 0 && slice < SYNC_POINTS/2))
399
0
               process_block_argon2i(T, B, n, slice, lane, lanes, segments, threads, mode, memory, t);
400
0
            else
401
0
               process_block_argon2d(T, B, n, slice, lane, lanes, segments, threads);
402
0
            }
403
0
         }
404
0
      }
405
406
0
   }
407
408
}
409
410
void argon2(uint8_t output[], size_t output_len,
411
            const char* password, size_t password_len,
412
            const uint8_t salt[], size_t salt_len,
413
            const uint8_t key[], size_t key_len,
414
            const uint8_t ad[], size_t ad_len,
415
            uint8_t mode, size_t threads, size_t M, size_t t)
416
0
   {
417
0
   BOTAN_ARG_CHECK(mode == 0 || mode == 1 || mode == 2, "Unknown Argon2 mode parameter");
418
0
   BOTAN_ARG_CHECK(output_len >= 4, "Invalid Argon2 output length");
419
0
   BOTAN_ARG_CHECK(threads >= 1 && threads <= 128, "Invalid Argon2 threads parameter");
420
0
   BOTAN_ARG_CHECK(M >= 8*threads && M <= 8192*1024, "Invalid Argon2 M parameter");
421
0
   BOTAN_ARG_CHECK(t >= 1, "Invalid Argon2 t parameter");
422
423
0
   std::unique_ptr<HashFunction> blake2 = HashFunction::create_or_throw("BLAKE2b");
424
425
0
   const auto H0 = argon2_H0(*blake2, output_len,
426
0
                             password, password_len,
427
0
                             salt, salt_len,
428
0
                             key, key_len,
429
0
                             ad, ad_len,
430
0
                             mode, threads, M, t);
431
432
0
   const size_t memory = (M / (SYNC_POINTS*threads)) * (SYNC_POINTS*threads);
433
434
0
   secure_vector<uint64_t> B(memory * 1024/8);
435
436
0
   init_blocks(B, *blake2, H0, memory, threads);
437
0
   process_blocks(B, t, memory, threads, mode);
438
439
0
   clear_mem(output, output_len);
440
0
   extract_key(output, output_len, B, memory, threads);
441
0
   }
442
443
}