Coverage Report

Created: 2024-11-29 06:10

/src/botan/src/lib/pubkey/curve448/curve448_gf.cpp
Line
Count
Source (jump to first uncovered line)
1
/*
2
* X448 Gf Modulo 2^448 - 2^224 - 1
3
* (C) 2024 Jack Lloyd
4
*     2024 Fabian Albert - Rohde & Schwarz Cybersecurity
5
*
6
* Botan is released under the Simplified BSD License (see license.txt)
7
*/
8
#include <botan/internal/curve448_gf.h>
9
10
#include <botan/internal/ct_utils.h>
11
#include <botan/internal/loadstor.h>
12
#include <botan/internal/mp_asmi.h>
13
#include <botan/internal/mp_core.h>
14
15
#include <algorithm>
16
17
namespace Botan {
18
19
namespace {
20
21
/**
22
 * @brief Compute (a + b). The carry is returned in the carry parameter.
23
 *  The carry is not included for the addition.
24
 */
25
212M
inline uint64_t u64_add(uint64_t a, uint64_t b, bool* carry) {
26
   // Let the compiler optimize this into fancy instructions
27
212M
   const uint64_t sum = a + b;
28
212M
   *carry = sum < a;
29
212M
   return sum;
30
212M
}
31
32
/**
33
 * @brief Compute (a + b + carry), where carry is in {0, 1}. The carry of this computation
34
 * is store in the in/out @p carry parameter.
35
 */
36
264M
inline uint64_t u64_add_with_carry(uint64_t a, uint64_t b, bool* carry) {
37
   // Let the compiler optimize this into fancy instructions
38
264M
   uint64_t sum = a + b;
39
264M
   const bool carry_a_plus_b = (sum < a);
40
264M
   sum += static_cast<uint64_t>(*carry);
41
264M
   *carry = static_cast<uint64_t>(carry_a_plus_b) | static_cast<uint64_t>(sum < static_cast<uint64_t>(*carry));
42
264M
   return sum;
43
264M
}
44
45
/**
46
 * @brief Compute (a - (b + borrow)). The borrow is returned in the carry parameter.
47
 *
48
 * I.e. borrow = 1 if a < b + borrow, else 0.
49
 */
50
85.8M
inline uint64_t u64_sub_with_borrow(uint64_t a, uint64_t b, bool* borrow) {
51
   // Let the compiler optimize this into fancy instructions
52
85.8M
   const uint64_t diff = a - b;
53
85.8M
   const bool borrow_a_min_b = diff > a;
54
85.8M
   const uint64_t z = diff - static_cast<uint64_t>(*borrow);
55
85.8M
   *borrow = static_cast<uint64_t>(borrow_a_min_b) | static_cast<uint64_t>(z > diff);
56
85.8M
   return z;
57
85.8M
}
58
59
/**
60
 * @brief Reduce the result of a addition modulo 2^448 - 2^224 - 1.
61
 *
62
 * Algorithm 1 of paper "Reduction Modulo 2^448 - 2^224 - 1", from line 27.
63
 *
64
 * @param h_3 Output
65
 * @param h_1 Input
66
 */
67
18.8M
void reduce_after_add(std::span<uint64_t, WORDS_448> h_3, std::span<const uint64_t, 8> h_1) {
68
18.8M
   std::array<uint64_t, 8> h_2;
69
18.8M
   bool carry;
70
71
   // Line 27+ (of the paper's algorithm 1)
72
18.8M
   h_2[0] = u64_add(h_1[0], h_1[7], &carry);
73
74
18.8M
   h_2[1] = u64_add(h_1[1], carry, &carry);
75
18.8M
   h_2[2] = u64_add(h_1[2], carry, &carry);
76
77
   // Line 30
78
18.8M
   h_2[3] = u64_add_with_carry(h_1[3], h_1[7] << 32, &carry);
79
80
   // Line 31+
81
18.8M
   h_2[4] = u64_add(h_1[4], carry, &carry);
82
18.8M
   h_2[5] = u64_add(h_1[5], carry, &carry);
83
18.8M
   h_2[6] = u64_add(h_1[6], carry, &carry);
84
85
18.8M
   h_2[7] = carry;
86
87
18.8M
   h_3[0] = u64_add(h_2[0], h_2[7], &carry);
88
18.8M
   h_3[1] = u64_add(h_2[1], carry, &carry);
89
18.8M
   h_3[2] = u64_add(h_2[2], carry, &carry);
90
   // Line 37
91
18.8M
   h_3[3] = h_2[3] + (h_2[7] << 32) + carry;
92
93
   // Line 38
94
18.8M
   h_3[4] = h_2[4];
95
18.8M
   h_3[5] = h_2[5];
96
18.8M
   h_3[6] = h_2[6];
97
18.8M
}
98
99
/**
100
 * @brief Reduce the result of a addition modulo 2^448 - 2^224 - 1.
101
 *
102
 * Algorithm 1 of paper "Reduction Modulo 2^448 - 2^224 - 1".
103
 */
104
14.2M
void reduce_after_mul(std::span<uint64_t, WORDS_448> out, std::span<const uint64_t, 14> in) {
105
14.2M
   std::array<uint64_t, 8> r;
106
14.2M
   std::array<uint64_t, 8> s;
107
14.2M
   std::array<uint64_t, 8> t_0;
108
14.2M
   std::array<uint64_t, 8> h_1;
109
110
14.2M
   bool carry;
111
112
   // Line 4 (of the paper's algorithm 1)
113
14.2M
   r[0] = u64_add(in[0], in[7], &carry);
114
115
   // Line 5-7
116
99.6M
   for(size_t i = 1; i < 7; ++i) {
117
85.3M
      r[i] = u64_add_with_carry(in[i], in[i + 7], &carry);
118
85.3M
   }
119
14.2M
   r[7] = carry;
120
14.2M
   s[0] = r[0];
121
14.2M
   s[1] = r[1];
122
14.2M
   s[2] = r[2];
123
   // Line 10
124
14.2M
   s[3] = u64_add(r[3], in[10] & 0xFFFFFFFF00000000, &carry);
125
   // Line 11-13
126
56.9M
   for(size_t i = 4; i < 7; ++i) {
127
42.6M
      s[i] = u64_add_with_carry(r[i], in[i + 7], &carry);
128
42.6M
   }
129
14.2M
   s[7] = r[7] + carry;
130
131
   // Line 15-17
132
56.9M
   for(size_t i = 0; i < 3; ++i) {
133
42.6M
      t_0[i] = (in[i + 11] << 32) | (in[i + 10] >> 32);
134
42.6M
   }
135
   // Line 18
136
14.2M
   t_0[3] = (in[7] << 32) | (in[13] >> 32);
137
   // Line 19-21
138
56.9M
   for(size_t i = 4; i < 7; ++i) {
139
42.6M
      t_0[i] = (in[i + 4] << 32) | (in[i + 3] >> 32);
140
42.6M
   }
141
14.2M
   h_1[0] = u64_add(s[0], t_0[0], &carry);
142
   // Line 23-25
143
99.6M
   for(size_t i = 1; i < 7; ++i) {
144
85.3M
      h_1[i] = u64_add_with_carry(s[i], t_0[i], &carry);
145
85.3M
   }
146
14.2M
   h_1[7] = s[7] + carry;
147
148
14.2M
   reduce_after_add(out, h_1);
149
14.2M
}
150
151
void gf_mul(std::span<uint64_t, WORDS_448> out,
152
            std::span<const uint64_t, WORDS_448> a,
153
8.48M
            std::span<const uint64_t, WORDS_448> b) {
154
8.48M
   std::array<uint64_t, 14> ws;
155
8.48M
   comba_mul<7>(ws.data(), a.data(), b.data());
156
8.48M
   reduce_after_mul(out, ws);
157
8.48M
}
158
159
5.74M
void gf_square(std::span<uint64_t, WORDS_448> out, std::span<const uint64_t, WORDS_448> a) {
160
5.74M
   std::array<uint64_t, 14> ws;
161
5.74M
   comba_sqr<7>(ws.data(), a.data());
162
5.74M
   reduce_after_mul(out, ws);
163
5.74M
}
164
165
void gf_add(std::span<uint64_t, WORDS_448> out,
166
            std::span<const uint64_t, WORDS_448> a,
167
4.62M
            std::span<const uint64_t, WORDS_448> b) {
168
4.62M
   std::array<uint64_t, WORDS_448 + 1> ws;
169
4.62M
   copy_mem(std::span(ws).first<WORDS_448>(), a);
170
4.62M
   ws[WORDS_448] = 0;
171
172
4.62M
   bool carry = false;
173
36.9M
   for(size_t i = 0; i < WORDS_448; ++i) {
174
32.3M
      ws[i] = u64_add_with_carry(a[i], b[i], &carry);
175
32.3M
   }
176
4.62M
   ws[WORDS_448] = carry;
177
178
4.62M
   reduce_after_add(out, ws);
179
4.62M
}
180
181
/**
182
 * @brief Subtract two elements in GF(P). out = a - b
183
 *
184
 * Algorithm 2 of paper: "Reduction Modulo 2^448 - 2^224 - 1"
185
 */
186
void gf_sub(std::span<uint64_t, WORDS_448> out,
187
            std::span<const uint64_t, WORDS_448> a,
188
4.76M
            std::span<const uint64_t, WORDS_448> b) {
189
4.76M
   std::array<uint64_t, WORDS_448> h_0;
190
4.76M
   std::array<uint64_t, WORDS_448> h_1;
191
192
4.76M
   bool borrow = false;
193
38.1M
   for(size_t i = 0; i < WORDS_448; ++i) {
194
33.3M
      h_0[i] = u64_sub_with_borrow(a[i], b[i], &borrow);
195
33.3M
   }
196
4.76M
   uint64_t delta = borrow;
197
4.76M
   uint64_t delta_p = delta << 32;
198
4.76M
   borrow = false;
199
200
4.76M
   h_1[0] = u64_sub_with_borrow(h_0[0], delta, &borrow);
201
4.76M
   h_1[1] = u64_sub_with_borrow(h_0[1], 0, &borrow);
202
4.76M
   h_1[2] = u64_sub_with_borrow(h_0[2], 0, &borrow);
203
4.76M
   h_1[3] = u64_sub_with_borrow(h_0[3], delta_p, &borrow);
204
4.76M
   h_1[4] = u64_sub_with_borrow(h_0[4], 0, &borrow);
205
4.76M
   h_1[5] = u64_sub_with_borrow(h_0[5], 0, &borrow);
206
4.76M
   h_1[6] = u64_sub_with_borrow(h_0[6], 0, &borrow);
207
208
4.76M
   delta = borrow;
209
4.76M
   delta_p = delta << 32;
210
4.76M
   borrow = false;
211
212
4.76M
   out[0] = u64_sub_with_borrow(h_1[0], delta, &borrow);
213
4.76M
   out[1] = u64_sub_with_borrow(h_1[1], 0, &borrow);
214
4.76M
   out[2] = u64_sub_with_borrow(h_1[2], 0, &borrow);
215
4.76M
   out[3] = u64_sub_with_borrow(h_1[3], delta_p, &borrow);
216
4.76M
   out[4] = h_1[4];
217
4.76M
   out[5] = h_1[5];
218
4.76M
   out[6] = h_1[6];
219
4.76M
}
220
221
/**
222
 * @brief Inversion in GF(P) using Fermat's little theorem:
223
 * x^-1 = x^(P-2) mod P
224
 */
225
2.66k
void gf_inv(std::span<uint64_t, WORDS_448> out, std::span<const uint64_t, WORDS_448> a) {
226
2.66k
   clear_mem(out);
227
2.66k
   out[0] = 1;
228
   // Square and multiply
229
1.19M
   for(int16_t t = 448; t >= 0; --t) {
230
1.19M
      gf_square(out, out);
231
      // (P-2) has zero bits at indices 1, 224, 448. All others are one.
232
1.19M
      if(t != 448 && t != 224 && t != 1) {
233
1.18M
         gf_mul(out, out, a);
234
1.18M
      }
235
1.19M
   }
236
2.66k
}
237
238
/**
239
 * @brief Convert a number to its canonical representation.
240
 *
241
 * I.e. if the number is greater than P, subtract P. The number cannot be >= 2P
242
 * since 2*P > 2^(7*64).
243
 */
244
2.66k
std::array<uint64_t, WORDS_448> to_canonical(std::span<const uint64_t, WORDS_448> in) {
245
2.66k
   const std::array<uint64_t, WORDS_448> p = {0xffffffffffffffff,
246
2.66k
                                              0xffffffffffffffff,
247
2.66k
                                              0xffffffffffffffff,
248
2.66k
                                              0xfffffffeffffffff,
249
2.66k
                                              0xffffffffffffffff,
250
2.66k
                                              0xffffffffffffffff,
251
2.66k
                                              0xffffffffffffffff};
252
253
2.66k
   std::array<uint64_t, WORDS_448> in_minus_p;
254
2.66k
   bool borrow = false;
255
21.2k
   for(size_t i = 0; i < WORDS_448; ++i) {
256
18.6k
      in_minus_p[i] = u64_sub_with_borrow(in[i], p[i], &borrow);
257
18.6k
   }
258
2.66k
   std::array<uint64_t, WORDS_448> out;
259
2.66k
   CT::Mask<uint64_t>::expand(borrow).select_n(out.data(), in.data(), in_minus_p.data(), WORDS_448);
260
2.66k
   return out;
261
2.66k
}
262
263
}  // namespace
264
265
4.66k
Gf448Elem::Gf448Elem(std::span<const uint8_t, BYTES_448> x) {
266
4.66k
   load_le(m_x, x);
267
4.66k
}
268
269
21.3M
Gf448Elem::Gf448Elem(uint64_t least_sig_word) {
270
21.3M
   clear_mem(m_x);
271
21.3M
   m_x[0] = least_sig_word;
272
21.3M
}
273
274
2.49k
void Gf448Elem::to_bytes(std::span<uint8_t, BYTES_448> out) const {
275
2.49k
   store_le(out, to_canonical(m_x));
276
2.49k
}
277
278
2.33k
std::array<uint8_t, BYTES_448> Gf448Elem::to_bytes() const {
279
2.33k
   std::array<uint8_t, BYTES_448> bytes;
280
2.33k
   to_bytes(bytes);
281
2.33k
   return bytes;
282
2.33k
}
283
284
2.09M
void Gf448Elem::ct_cond_swap(bool b, Gf448Elem& other) {
285
16.7M
   for(size_t i = 0; i < WORDS_448; ++i) {
286
14.6M
      CT::conditional_swap(b, m_x[i], other.m_x[i]);
287
14.6M
   }
288
2.09M
}
289
290
219k
void Gf448Elem::ct_cond_assign(bool b, const Gf448Elem& other) {
291
219k
   CT::conditional_assign_mem(static_cast<uint64_t>(b), m_x.data(), other.m_x.data(), WORDS_448);
292
219k
}
293
294
4.62M
Gf448Elem Gf448Elem::operator+(const Gf448Elem& other) const {
295
4.62M
   Gf448Elem res(0);
296
4.62M
   gf_add(res.m_x, m_x, other.m_x);
297
4.62M
   return res;
298
4.62M
}
299
300
4.69M
Gf448Elem Gf448Elem::operator-(const Gf448Elem& other) const {
301
4.69M
   Gf448Elem res(0);
302
4.69M
   gf_sub(res.m_x, m_x, other.m_x);
303
4.69M
   return res;
304
4.69M
}
305
306
73.1k
Gf448Elem Gf448Elem::operator-() const {
307
73.1k
   Gf448Elem res(0);
308
73.1k
   gf_sub(res.m_x, res.m_x, m_x);
309
73.1k
   return res;
310
73.1k
}
311
312
7.29M
Gf448Elem Gf448Elem::operator*(const Gf448Elem& other) const {
313
7.29M
   Gf448Elem res(0);
314
7.29M
   gf_mul(res.m_x, m_x, other.m_x);
315
7.29M
   return res;
316
7.29M
}
317
318
2.66k
Gf448Elem Gf448Elem::operator/(const Gf448Elem& other) const {
319
2.66k
   Gf448Elem res(0);
320
2.66k
   gf_inv(res.m_x, other.m_x);
321
2.66k
   gf_mul(res.m_x, m_x, res.m_x);
322
2.66k
   return res;
323
2.66k
}
324
325
0
bool Gf448Elem::operator==(const Gf448Elem& other) const {
326
0
   const auto canonical_form_this = to_canonical(m_x);
327
0
   const auto canonical_form_other = to_canonical(other.m_x);
328
0
   return CT::is_equal(canonical_form_this.data(), canonical_form_other.data(), WORDS_448).as_bool();
329
0
}
330
331
0
bool Gf448Elem::is_zero() const {
332
0
   const auto canonical_form = to_canonical(m_x);
333
334
0
   return CT::all_zeros(canonical_form.data(), WORDS_448).as_bool();
335
0
}
336
337
164
bool Gf448Elem::is_odd() const {
338
164
   const auto canonical_form = to_canonical(m_x);
339
164
   return (canonical_form[0] & 1) == 1;
340
164
}
341
342
0
bool Gf448Elem::bytes_are_canonical_representation(std::span<const uint8_t, BYTES_448> x) {
343
0
   const auto x_words = load_le<std::array<uint64_t, WORDS_448>>(x);
344
0
   const auto x_words_canonical = to_canonical(x_words);
345
0
   return CT::is_equal(x_words.data(), x_words_canonical.data(), WORDS_448).as_bool();
346
0
}
347
348
4.54M
Gf448Elem square(const Gf448Elem& elem) {
349
4.54M
   Gf448Elem res(0);
350
4.54M
   gf_square(res.words(), elem.words());
351
4.54M
   return res;
352
4.54M
}
353
354
0
Gf448Elem root(const Gf448Elem& elem) {
355
0
   Gf448Elem res(1);
356
357
   // (P-3)/4 is an 445 bit integer with one zero bits at 222. All others are one.
358
0
   for(int16_t t = 445; t >= 0; --t) {
359
0
      gf_square(res.words(), res.words());
360
0
      if(t != 222) {
361
0
         gf_mul(res.words(), res.words(), elem.words());
362
0
      }
363
0
   }
364
365
0
   return res;
366
0
}
367
368
}  // namespace Botan