/src/botan/src/lib/pubkey/curve448/curve448_gf.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * X448 Gf Modulo 2^448 - 2^224 - 1 |
3 | | * (C) 2024 Jack Lloyd |
4 | | * 2024 Fabian Albert - Rohde & Schwarz Cybersecurity |
5 | | * |
6 | | * Botan is released under the Simplified BSD License (see license.txt) |
7 | | */ |
8 | | #include <botan/internal/curve448_gf.h> |
9 | | |
10 | | #include <botan/internal/ct_utils.h> |
11 | | #include <botan/internal/loadstor.h> |
12 | | #include <botan/internal/mp_asmi.h> |
13 | | #include <botan/internal/mp_core.h> |
14 | | |
15 | | #include <algorithm> |
16 | | |
17 | | namespace Botan { |
18 | | |
19 | | namespace { |
20 | | |
21 | | /** |
22 | | * @brief Compute (a + b). The carry is returned in the carry parameter. |
23 | | * The carry is not included for the addition. |
24 | | */ |
25 | 212M | inline uint64_t u64_add(uint64_t a, uint64_t b, bool* carry) { |
26 | | // Let the compiler optimize this into fancy instructions |
27 | 212M | const uint64_t sum = a + b; |
28 | 212M | *carry = sum < a; |
29 | 212M | return sum; |
30 | 212M | } |
31 | | |
32 | | /** |
33 | | * @brief Compute (a + b + carry), where carry is in {0, 1}. The carry of this computation |
34 | | * is store in the in/out @p carry parameter. |
35 | | */ |
36 | 264M | inline uint64_t u64_add_with_carry(uint64_t a, uint64_t b, bool* carry) { |
37 | | // Let the compiler optimize this into fancy instructions |
38 | 264M | uint64_t sum = a + b; |
39 | 264M | const bool carry_a_plus_b = (sum < a); |
40 | 264M | sum += static_cast<uint64_t>(*carry); |
41 | 264M | *carry = static_cast<uint64_t>(carry_a_plus_b) | static_cast<uint64_t>(sum < static_cast<uint64_t>(*carry)); |
42 | 264M | return sum; |
43 | 264M | } |
44 | | |
45 | | /** |
46 | | * @brief Compute (a - (b + borrow)). The borrow is returned in the carry parameter. |
47 | | * |
48 | | * I.e. borrow = 1 if a < b + borrow, else 0. |
49 | | */ |
50 | 85.8M | inline uint64_t u64_sub_with_borrow(uint64_t a, uint64_t b, bool* borrow) { |
51 | | // Let the compiler optimize this into fancy instructions |
52 | 85.8M | const uint64_t diff = a - b; |
53 | 85.8M | const bool borrow_a_min_b = diff > a; |
54 | 85.8M | const uint64_t z = diff - static_cast<uint64_t>(*borrow); |
55 | 85.8M | *borrow = static_cast<uint64_t>(borrow_a_min_b) | static_cast<uint64_t>(z > diff); |
56 | 85.8M | return z; |
57 | 85.8M | } |
58 | | |
59 | | /** |
60 | | * @brief Reduce the result of a addition modulo 2^448 - 2^224 - 1. |
61 | | * |
62 | | * Algorithm 1 of paper "Reduction Modulo 2^448 - 2^224 - 1", from line 27. |
63 | | * |
64 | | * @param h_3 Output |
65 | | * @param h_1 Input |
66 | | */ |
67 | 18.8M | void reduce_after_add(std::span<uint64_t, WORDS_448> h_3, std::span<const uint64_t, 8> h_1) { |
68 | 18.8M | std::array<uint64_t, 8> h_2; |
69 | 18.8M | bool carry; |
70 | | |
71 | | // Line 27+ (of the paper's algorithm 1) |
72 | 18.8M | h_2[0] = u64_add(h_1[0], h_1[7], &carry); |
73 | | |
74 | 18.8M | h_2[1] = u64_add(h_1[1], carry, &carry); |
75 | 18.8M | h_2[2] = u64_add(h_1[2], carry, &carry); |
76 | | |
77 | | // Line 30 |
78 | 18.8M | h_2[3] = u64_add_with_carry(h_1[3], h_1[7] << 32, &carry); |
79 | | |
80 | | // Line 31+ |
81 | 18.8M | h_2[4] = u64_add(h_1[4], carry, &carry); |
82 | 18.8M | h_2[5] = u64_add(h_1[5], carry, &carry); |
83 | 18.8M | h_2[6] = u64_add(h_1[6], carry, &carry); |
84 | | |
85 | 18.8M | h_2[7] = carry; |
86 | | |
87 | 18.8M | h_3[0] = u64_add(h_2[0], h_2[7], &carry); |
88 | 18.8M | h_3[1] = u64_add(h_2[1], carry, &carry); |
89 | 18.8M | h_3[2] = u64_add(h_2[2], carry, &carry); |
90 | | // Line 37 |
91 | 18.8M | h_3[3] = h_2[3] + (h_2[7] << 32) + carry; |
92 | | |
93 | | // Line 38 |
94 | 18.8M | h_3[4] = h_2[4]; |
95 | 18.8M | h_3[5] = h_2[5]; |
96 | 18.8M | h_3[6] = h_2[6]; |
97 | 18.8M | } |
98 | | |
99 | | /** |
100 | | * @brief Reduce the result of a addition modulo 2^448 - 2^224 - 1. |
101 | | * |
102 | | * Algorithm 1 of paper "Reduction Modulo 2^448 - 2^224 - 1". |
103 | | */ |
104 | 14.2M | void reduce_after_mul(std::span<uint64_t, WORDS_448> out, std::span<const uint64_t, 14> in) { |
105 | 14.2M | std::array<uint64_t, 8> r; |
106 | 14.2M | std::array<uint64_t, 8> s; |
107 | 14.2M | std::array<uint64_t, 8> t_0; |
108 | 14.2M | std::array<uint64_t, 8> h_1; |
109 | | |
110 | 14.2M | bool carry; |
111 | | |
112 | | // Line 4 (of the paper's algorithm 1) |
113 | 14.2M | r[0] = u64_add(in[0], in[7], &carry); |
114 | | |
115 | | // Line 5-7 |
116 | 99.6M | for(size_t i = 1; i < 7; ++i) { |
117 | 85.3M | r[i] = u64_add_with_carry(in[i], in[i + 7], &carry); |
118 | 85.3M | } |
119 | 14.2M | r[7] = carry; |
120 | 14.2M | s[0] = r[0]; |
121 | 14.2M | s[1] = r[1]; |
122 | 14.2M | s[2] = r[2]; |
123 | | // Line 10 |
124 | 14.2M | s[3] = u64_add(r[3], in[10] & 0xFFFFFFFF00000000, &carry); |
125 | | // Line 11-13 |
126 | 56.9M | for(size_t i = 4; i < 7; ++i) { |
127 | 42.6M | s[i] = u64_add_with_carry(r[i], in[i + 7], &carry); |
128 | 42.6M | } |
129 | 14.2M | s[7] = r[7] + carry; |
130 | | |
131 | | // Line 15-17 |
132 | 56.9M | for(size_t i = 0; i < 3; ++i) { |
133 | 42.6M | t_0[i] = (in[i + 11] << 32) | (in[i + 10] >> 32); |
134 | 42.6M | } |
135 | | // Line 18 |
136 | 14.2M | t_0[3] = (in[7] << 32) | (in[13] >> 32); |
137 | | // Line 19-21 |
138 | 56.9M | for(size_t i = 4; i < 7; ++i) { |
139 | 42.6M | t_0[i] = (in[i + 4] << 32) | (in[i + 3] >> 32); |
140 | 42.6M | } |
141 | 14.2M | h_1[0] = u64_add(s[0], t_0[0], &carry); |
142 | | // Line 23-25 |
143 | 99.6M | for(size_t i = 1; i < 7; ++i) { |
144 | 85.3M | h_1[i] = u64_add_with_carry(s[i], t_0[i], &carry); |
145 | 85.3M | } |
146 | 14.2M | h_1[7] = s[7] + carry; |
147 | | |
148 | 14.2M | reduce_after_add(out, h_1); |
149 | 14.2M | } |
150 | | |
151 | | void gf_mul(std::span<uint64_t, WORDS_448> out, |
152 | | std::span<const uint64_t, WORDS_448> a, |
153 | 8.48M | std::span<const uint64_t, WORDS_448> b) { |
154 | 8.48M | std::array<uint64_t, 14> ws; |
155 | 8.48M | comba_mul<7>(ws.data(), a.data(), b.data()); |
156 | 8.48M | reduce_after_mul(out, ws); |
157 | 8.48M | } |
158 | | |
159 | 5.74M | void gf_square(std::span<uint64_t, WORDS_448> out, std::span<const uint64_t, WORDS_448> a) { |
160 | 5.74M | std::array<uint64_t, 14> ws; |
161 | 5.74M | comba_sqr<7>(ws.data(), a.data()); |
162 | 5.74M | reduce_after_mul(out, ws); |
163 | 5.74M | } |
164 | | |
165 | | void gf_add(std::span<uint64_t, WORDS_448> out, |
166 | | std::span<const uint64_t, WORDS_448> a, |
167 | 4.62M | std::span<const uint64_t, WORDS_448> b) { |
168 | 4.62M | std::array<uint64_t, WORDS_448 + 1> ws; |
169 | 4.62M | copy_mem(std::span(ws).first<WORDS_448>(), a); |
170 | 4.62M | ws[WORDS_448] = 0; |
171 | | |
172 | 4.62M | bool carry = false; |
173 | 36.9M | for(size_t i = 0; i < WORDS_448; ++i) { |
174 | 32.3M | ws[i] = u64_add_with_carry(a[i], b[i], &carry); |
175 | 32.3M | } |
176 | 4.62M | ws[WORDS_448] = carry; |
177 | | |
178 | 4.62M | reduce_after_add(out, ws); |
179 | 4.62M | } |
180 | | |
181 | | /** |
182 | | * @brief Subtract two elements in GF(P). out = a - b |
183 | | * |
184 | | * Algorithm 2 of paper: "Reduction Modulo 2^448 - 2^224 - 1" |
185 | | */ |
186 | | void gf_sub(std::span<uint64_t, WORDS_448> out, |
187 | | std::span<const uint64_t, WORDS_448> a, |
188 | 4.76M | std::span<const uint64_t, WORDS_448> b) { |
189 | 4.76M | std::array<uint64_t, WORDS_448> h_0; |
190 | 4.76M | std::array<uint64_t, WORDS_448> h_1; |
191 | | |
192 | 4.76M | bool borrow = false; |
193 | 38.1M | for(size_t i = 0; i < WORDS_448; ++i) { |
194 | 33.3M | h_0[i] = u64_sub_with_borrow(a[i], b[i], &borrow); |
195 | 33.3M | } |
196 | 4.76M | uint64_t delta = borrow; |
197 | 4.76M | uint64_t delta_p = delta << 32; |
198 | 4.76M | borrow = false; |
199 | | |
200 | 4.76M | h_1[0] = u64_sub_with_borrow(h_0[0], delta, &borrow); |
201 | 4.76M | h_1[1] = u64_sub_with_borrow(h_0[1], 0, &borrow); |
202 | 4.76M | h_1[2] = u64_sub_with_borrow(h_0[2], 0, &borrow); |
203 | 4.76M | h_1[3] = u64_sub_with_borrow(h_0[3], delta_p, &borrow); |
204 | 4.76M | h_1[4] = u64_sub_with_borrow(h_0[4], 0, &borrow); |
205 | 4.76M | h_1[5] = u64_sub_with_borrow(h_0[5], 0, &borrow); |
206 | 4.76M | h_1[6] = u64_sub_with_borrow(h_0[6], 0, &borrow); |
207 | | |
208 | 4.76M | delta = borrow; |
209 | 4.76M | delta_p = delta << 32; |
210 | 4.76M | borrow = false; |
211 | | |
212 | 4.76M | out[0] = u64_sub_with_borrow(h_1[0], delta, &borrow); |
213 | 4.76M | out[1] = u64_sub_with_borrow(h_1[1], 0, &borrow); |
214 | 4.76M | out[2] = u64_sub_with_borrow(h_1[2], 0, &borrow); |
215 | 4.76M | out[3] = u64_sub_with_borrow(h_1[3], delta_p, &borrow); |
216 | 4.76M | out[4] = h_1[4]; |
217 | 4.76M | out[5] = h_1[5]; |
218 | 4.76M | out[6] = h_1[6]; |
219 | 4.76M | } |
220 | | |
221 | | /** |
222 | | * @brief Inversion in GF(P) using Fermat's little theorem: |
223 | | * x^-1 = x^(P-2) mod P |
224 | | */ |
225 | 2.66k | void gf_inv(std::span<uint64_t, WORDS_448> out, std::span<const uint64_t, WORDS_448> a) { |
226 | 2.66k | clear_mem(out); |
227 | 2.66k | out[0] = 1; |
228 | | // Square and multiply |
229 | 1.19M | for(int16_t t = 448; t >= 0; --t) { |
230 | 1.19M | gf_square(out, out); |
231 | | // (P-2) has zero bits at indices 1, 224, 448. All others are one. |
232 | 1.19M | if(t != 448 && t != 224 && t != 1) { |
233 | 1.18M | gf_mul(out, out, a); |
234 | 1.18M | } |
235 | 1.19M | } |
236 | 2.66k | } |
237 | | |
238 | | /** |
239 | | * @brief Convert a number to its canonical representation. |
240 | | * |
241 | | * I.e. if the number is greater than P, subtract P. The number cannot be >= 2P |
242 | | * since 2*P > 2^(7*64). |
243 | | */ |
244 | 2.66k | std::array<uint64_t, WORDS_448> to_canonical(std::span<const uint64_t, WORDS_448> in) { |
245 | 2.66k | const std::array<uint64_t, WORDS_448> p = {0xffffffffffffffff, |
246 | 2.66k | 0xffffffffffffffff, |
247 | 2.66k | 0xffffffffffffffff, |
248 | 2.66k | 0xfffffffeffffffff, |
249 | 2.66k | 0xffffffffffffffff, |
250 | 2.66k | 0xffffffffffffffff, |
251 | 2.66k | 0xffffffffffffffff}; |
252 | | |
253 | 2.66k | std::array<uint64_t, WORDS_448> in_minus_p; |
254 | 2.66k | bool borrow = false; |
255 | 21.2k | for(size_t i = 0; i < WORDS_448; ++i) { |
256 | 18.6k | in_minus_p[i] = u64_sub_with_borrow(in[i], p[i], &borrow); |
257 | 18.6k | } |
258 | 2.66k | std::array<uint64_t, WORDS_448> out; |
259 | 2.66k | CT::Mask<uint64_t>::expand(borrow).select_n(out.data(), in.data(), in_minus_p.data(), WORDS_448); |
260 | 2.66k | return out; |
261 | 2.66k | } |
262 | | |
263 | | } // namespace |
264 | | |
265 | 4.66k | Gf448Elem::Gf448Elem(std::span<const uint8_t, BYTES_448> x) { |
266 | 4.66k | load_le(m_x, x); |
267 | 4.66k | } |
268 | | |
269 | 21.3M | Gf448Elem::Gf448Elem(uint64_t least_sig_word) { |
270 | 21.3M | clear_mem(m_x); |
271 | 21.3M | m_x[0] = least_sig_word; |
272 | 21.3M | } |
273 | | |
274 | 2.49k | void Gf448Elem::to_bytes(std::span<uint8_t, BYTES_448> out) const { |
275 | 2.49k | store_le(out, to_canonical(m_x)); |
276 | 2.49k | } |
277 | | |
278 | 2.33k | std::array<uint8_t, BYTES_448> Gf448Elem::to_bytes() const { |
279 | 2.33k | std::array<uint8_t, BYTES_448> bytes; |
280 | 2.33k | to_bytes(bytes); |
281 | 2.33k | return bytes; |
282 | 2.33k | } |
283 | | |
284 | 2.09M | void Gf448Elem::ct_cond_swap(bool b, Gf448Elem& other) { |
285 | 16.7M | for(size_t i = 0; i < WORDS_448; ++i) { |
286 | 14.6M | CT::conditional_swap(b, m_x[i], other.m_x[i]); |
287 | 14.6M | } |
288 | 2.09M | } |
289 | | |
290 | 219k | void Gf448Elem::ct_cond_assign(bool b, const Gf448Elem& other) { |
291 | 219k | CT::conditional_assign_mem(static_cast<uint64_t>(b), m_x.data(), other.m_x.data(), WORDS_448); |
292 | 219k | } |
293 | | |
294 | 4.62M | Gf448Elem Gf448Elem::operator+(const Gf448Elem& other) const { |
295 | 4.62M | Gf448Elem res(0); |
296 | 4.62M | gf_add(res.m_x, m_x, other.m_x); |
297 | 4.62M | return res; |
298 | 4.62M | } |
299 | | |
300 | 4.69M | Gf448Elem Gf448Elem::operator-(const Gf448Elem& other) const { |
301 | 4.69M | Gf448Elem res(0); |
302 | 4.69M | gf_sub(res.m_x, m_x, other.m_x); |
303 | 4.69M | return res; |
304 | 4.69M | } |
305 | | |
306 | 73.1k | Gf448Elem Gf448Elem::operator-() const { |
307 | 73.1k | Gf448Elem res(0); |
308 | 73.1k | gf_sub(res.m_x, res.m_x, m_x); |
309 | 73.1k | return res; |
310 | 73.1k | } |
311 | | |
312 | 7.29M | Gf448Elem Gf448Elem::operator*(const Gf448Elem& other) const { |
313 | 7.29M | Gf448Elem res(0); |
314 | 7.29M | gf_mul(res.m_x, m_x, other.m_x); |
315 | 7.29M | return res; |
316 | 7.29M | } |
317 | | |
318 | 2.66k | Gf448Elem Gf448Elem::operator/(const Gf448Elem& other) const { |
319 | 2.66k | Gf448Elem res(0); |
320 | 2.66k | gf_inv(res.m_x, other.m_x); |
321 | 2.66k | gf_mul(res.m_x, m_x, res.m_x); |
322 | 2.66k | return res; |
323 | 2.66k | } |
324 | | |
325 | 0 | bool Gf448Elem::operator==(const Gf448Elem& other) const { |
326 | 0 | const auto canonical_form_this = to_canonical(m_x); |
327 | 0 | const auto canonical_form_other = to_canonical(other.m_x); |
328 | 0 | return CT::is_equal(canonical_form_this.data(), canonical_form_other.data(), WORDS_448).as_bool(); |
329 | 0 | } |
330 | | |
331 | 0 | bool Gf448Elem::is_zero() const { |
332 | 0 | const auto canonical_form = to_canonical(m_x); |
333 | |
|
334 | 0 | return CT::all_zeros(canonical_form.data(), WORDS_448).as_bool(); |
335 | 0 | } |
336 | | |
337 | 164 | bool Gf448Elem::is_odd() const { |
338 | 164 | const auto canonical_form = to_canonical(m_x); |
339 | 164 | return (canonical_form[0] & 1) == 1; |
340 | 164 | } |
341 | | |
342 | 0 | bool Gf448Elem::bytes_are_canonical_representation(std::span<const uint8_t, BYTES_448> x) { |
343 | 0 | const auto x_words = load_le<std::array<uint64_t, WORDS_448>>(x); |
344 | 0 | const auto x_words_canonical = to_canonical(x_words); |
345 | 0 | return CT::is_equal(x_words.data(), x_words_canonical.data(), WORDS_448).as_bool(); |
346 | 0 | } |
347 | | |
348 | 4.54M | Gf448Elem square(const Gf448Elem& elem) { |
349 | 4.54M | Gf448Elem res(0); |
350 | 4.54M | gf_square(res.words(), elem.words()); |
351 | 4.54M | return res; |
352 | 4.54M | } |
353 | | |
354 | 0 | Gf448Elem root(const Gf448Elem& elem) { |
355 | 0 | Gf448Elem res(1); |
356 | | |
357 | | // (P-3)/4 is an 445 bit integer with one zero bits at 222. All others are one. |
358 | 0 | for(int16_t t = 445; t >= 0; --t) { |
359 | 0 | gf_square(res.words(), res.words()); |
360 | 0 | if(t != 222) { |
361 | 0 | gf_mul(res.words(), res.words(), elem.words()); |
362 | 0 | } |
363 | 0 | } |
364 | |
|
365 | 0 | return res; |
366 | 0 | } |
367 | | |
368 | | } // namespace Botan |