/src/trezor-firmware/crypto/bignum.c
Line | Count | Source (jump to first uncovered line) |
1 | | /** |
2 | | * Copyright (c) 2013-2014 Tomas Dzetkulic |
3 | | * Copyright (c) 2013-2014 Pavol Rusnak |
4 | | * Copyright (c) 2015 Jochen Hoenicke |
5 | | * Copyright (c) 2016 Alex Beregszaszi |
6 | | * |
7 | | * Permission is hereby granted, free of charge, to any person obtaining |
8 | | * a copy of this software and associated documentation files (the "Software"), |
9 | | * to deal in the Software without restriction, including without limitation |
10 | | * the rights to use, copy, modify, merge, publish, distribute, sublicense, |
11 | | * and/or sell copies of the Software, and to permit persons to whom the |
12 | | * Software is furnished to do so, subject to the following conditions: |
13 | | * |
14 | | * The above copyright notice and this permission notice shall be included |
15 | | * in all copies or substantial portions of the Software. |
16 | | * |
17 | | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS |
18 | | * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
19 | | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
20 | | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES |
21 | | * OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, |
22 | | * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR |
23 | | * OTHER DEALINGS IN THE SOFTWARE. |
24 | | */ |
25 | | |
26 | | #include "bignum.h" |
27 | | |
28 | | #include <assert.h> |
29 | | #include <stdint.h> |
30 | | #include <stdio.h> |
31 | | #include <string.h> |
32 | | |
33 | | #include "memzero.h" |
34 | | #include "script.h" |
35 | | |
36 | | /* |
37 | | This library implements 256-bit numbers arithmetic. |
38 | | |
39 | | An unsigned 256-bit number is represented by a bignum256 structure, that is an |
40 | | array of nine 32-bit values called limbs. Limbs are digits of the number in |
41 | | the base 2**29 representation in the little endian order. This means that |
42 | | bignum256 x; |
43 | | represents the value |
44 | | sum([x[i] * 2**(29*i) for i in range(9)). |
45 | | |
46 | | A limb of a bignum256 is *normalized* iff it's less than 2**29. |
47 | | A bignum256 is *normalized* iff every its limb is normalized. |
48 | | A number is *fully reduced modulo p* iff it is less than p. |
49 | | A number is *partly reduced modulo p* iff is is less than 2*p. |
50 | | The number p is usually a prime number such that 2^256 - 2^224 <= p <= 2^256. |
51 | | |
52 | | All functions except bn_fast_mod expect that all their bignum256 inputs are |
53 | | normalized. (The function bn_fast_mod allows the input number to have the |
54 | | most significant limb unnormalized). All bignum256 outputs of all functions |
55 | | are guaranteed to be normalized. |
56 | | |
57 | | A number can be partly reduced with bn_fast_mod, a partly reduced number can |
58 | | be fully reduced with bn_mod. |
59 | | |
60 | | A function has *constant control flow with regard to its argument* iff the |
61 | | order in which instructions of the function are executed doesn't depend on the |
62 | | value of the argument. |
63 | | A function has *constant memory access flow with regard to its argument* iff |
64 | | the memory addresses that are acessed and the order in which they are accessed |
65 | | don't depend on the value of the argument. |
66 | | A function *has contant control (memory access) flow* iff it has constant |
67 | | control (memory access) flow with regard to all its arguments. |
68 | | |
69 | | The following function has contant control flow with regard to its arugment |
70 | | n, however is doesn't have constant memory access flow with regard to it: |
71 | | void (int n, int *a) } |
72 | | a[0] = 0; |
73 | | a[n] = 0; // memory address reveals the value of n |
74 | | } |
75 | | |
76 | | Unless stated otherwise all functions are supposed to have both constant |
77 | | control flow and constant memory access flow. |
78 | | */ |
79 | | |
80 | | #define BN_MAX_DECIMAL_DIGITS \ |
81 | 0 | 79 // floor(log(2**(LIMBS * BITS_PER_LIMB), 10)) + 1 |
82 | | |
83 | | // y = (bignum256) x |
84 | | // Assumes x is normalized and x < 2**261 == 2**(BITS_PER_LIMB * LIMBS) |
85 | | // Guarantees y is normalized |
86 | 26.9M | void bn_copy_lower(const bignum512 *x, bignum256 *y) { |
87 | 269M | for (int i = 0; i < BN_LIMBS; i++) { |
88 | 242M | y->val[i] = x->val[i]; |
89 | 242M | } |
90 | 26.9M | } |
91 | | |
92 | | // out_number = (bignum256) in_number |
93 | | // Assumes in_number is a raw bigendian 256-bit number |
94 | | // Guarantees out_number is normalized |
95 | 25.2k | void bn_read_be(const uint8_t *in_number, bignum256 *out_number) { |
96 | 25.2k | uint32_t temp = 0; |
97 | | |
98 | 227k | for (int i = 0; i < BN_LIMBS - 1; i++) { |
99 | 201k | uint32_t limb = read_be(in_number + (BN_LIMBS - 2 - i) * 4); |
100 | | |
101 | 201k | temp |= limb << (BN_EXTRA_BITS * i); |
102 | 201k | out_number->val[i] = temp & BN_LIMB_MASK; |
103 | | |
104 | 201k | temp = limb >> (32 - BN_EXTRA_BITS * (i + 1)); |
105 | 201k | } |
106 | | |
107 | 25.2k | out_number->val[BN_LIMBS - 1] = temp; |
108 | 25.2k | } |
109 | | |
110 | | // out_number = (bignum512) in_number |
111 | | // Assumes in_number is a raw bigendian 512-bit number |
112 | | // Guarantees out_number is normalized |
113 | 0 | void bn_read_be_512(const uint8_t *in_number, bignum512 *out_number) { |
114 | 0 | bignum256 lower = {0}, upper = {0}; |
115 | |
|
116 | 0 | bn_read_be(in_number + 32, &lower); |
117 | 0 | bn_read_be(in_number, &upper); |
118 | |
|
119 | 0 | const int shift_length = BN_BITS_PER_LIMB * BN_LIMBS - 256; |
120 | 0 | uint32_t shift = upper.val[0] & ((1 << shift_length) - 1); |
121 | 0 | for (int i = 0; i < shift_length; i++) { |
122 | 0 | bn_rshift(&upper); |
123 | 0 | } |
124 | 0 | lower.val[BN_LIMBS - 1] |= shift << (BN_BITS_PER_LIMB - shift_length); |
125 | |
|
126 | 0 | for (int i = 0; i < BN_LIMBS; i++) { |
127 | 0 | out_number->val[i] = lower.val[i]; |
128 | 0 | } |
129 | 0 | for (int i = 0; i < BN_LIMBS; i++) { |
130 | 0 | out_number->val[i + BN_LIMBS] = upper.val[i]; |
131 | 0 | } |
132 | 0 | } |
133 | | |
134 | | // out_number = (256BE) in_number |
135 | | // Assumes in_number < 2**256 |
136 | | // Guarantess out_number is a raw bigendian 256-bit number |
137 | 13.7k | void bn_write_be(const bignum256 *in_number, uint8_t *out_number) { |
138 | 13.7k | uint32_t temp = in_number->val[BN_LIMBS - 1]; |
139 | 123k | for (int i = BN_LIMBS - 2; i >= 0; i--) { |
140 | 110k | uint32_t limb = in_number->val[i]; |
141 | | |
142 | 110k | temp = (temp << (BN_BITS_PER_LIMB - BN_EXTRA_BITS * i)) | |
143 | 110k | (limb >> (BN_EXTRA_BITS * i)); |
144 | 110k | write_be(out_number + (BN_LIMBS - 2 - i) * 4, temp); |
145 | | |
146 | 110k | temp = limb; |
147 | 110k | } |
148 | 13.7k | } |
149 | | |
150 | | // out_number = (bignum256) in_number |
151 | | // Assumes in_number is a raw little endian 256-bit number |
152 | | // Guarantees out_number is normalized |
153 | 0 | void bn_read_le(const uint8_t *in_number, bignum256 *out_number) { |
154 | 0 | uint32_t temp = 0; |
155 | 0 | for (int i = 0; i < BN_LIMBS - 1; i++) { |
156 | 0 | uint32_t limb = read_le(in_number + i * 4); |
157 | |
|
158 | 0 | temp |= limb << (BN_EXTRA_BITS * i); |
159 | 0 | out_number->val[i] = temp & BN_LIMB_MASK; |
160 | 0 | temp = limb >> (32 - BN_EXTRA_BITS * (i + 1)); |
161 | 0 | } |
162 | |
|
163 | 0 | out_number->val[BN_LIMBS - 1] = temp; |
164 | 0 | } |
165 | | |
166 | | // out_number = (256LE) in_number |
167 | | // Assumes in_number < 2**256 |
168 | | // Guarantess out_number is a raw little endian 256-bit number |
169 | 0 | void bn_write_le(const bignum256 *in_number, uint8_t *out_number) { |
170 | 0 | uint32_t temp = in_number->val[BN_LIMBS - 1]; |
171 | |
|
172 | 0 | for (int i = BN_LIMBS - 2; i >= 0; i--) { |
173 | 0 | uint32_t limb = in_number->val[i]; |
174 | 0 | temp = (temp << (BN_BITS_PER_LIMB - BN_EXTRA_BITS * i)) | |
175 | 0 | (limb >> (BN_EXTRA_BITS * i)); |
176 | 0 | write_le(out_number + i * 4, temp); |
177 | 0 | temp = limb; |
178 | 0 | } |
179 | 0 | } |
180 | | |
181 | | // out_number = (bignum256) in_number |
182 | | // Guarantees out_number is normalized |
183 | 0 | void bn_read_uint32(uint32_t in_number, bignum256 *out_number) { |
184 | 0 | out_number->val[0] = in_number & BN_LIMB_MASK; |
185 | 0 | out_number->val[1] = in_number >> BN_BITS_PER_LIMB; |
186 | 0 | for (uint32_t i = 2; i < BN_LIMBS; i++) out_number->val[i] = 0; |
187 | 0 | } |
188 | | |
189 | | // out_number = (bignum256) in_number |
190 | | // Guarantees out_number is normalized |
191 | 0 | void bn_read_uint64(uint64_t in_number, bignum256 *out_number) { |
192 | 0 | out_number->val[0] = in_number & BN_LIMB_MASK; |
193 | 0 | out_number->val[1] = (in_number >>= BN_BITS_PER_LIMB) & BN_LIMB_MASK; |
194 | 0 | out_number->val[2] = in_number >> BN_BITS_PER_LIMB; |
195 | 0 | for (uint32_t i = 3; i < BN_LIMBS; i++) out_number->val[i] = 0; |
196 | 0 | } |
197 | | |
198 | | // Returns the bitsize of x |
199 | | // Assumes x is normalized |
200 | | // The function doesn't have neither constant control flow nor constant memory |
201 | | // access flow |
202 | 1.21k | int bn_bitcount(const bignum256 *x) { |
203 | 1.21k | for (int i = BN_LIMBS - 1; i >= 0; i--) { |
204 | 1.21k | uint32_t limb = x->val[i]; |
205 | 1.21k | if (limb != 0) { |
206 | | // __builtin_clz returns the number of leading zero bits starting at the |
207 | | // most significant bit position |
208 | 1.21k | return i * BN_BITS_PER_LIMB + (32 - __builtin_clz(limb)); |
209 | 1.21k | } |
210 | 1.21k | } |
211 | 0 | return 0; |
212 | 1.21k | } |
213 | | |
214 | | // Returns the number of decimal digits of x; if x is 0, returns 1 |
215 | | // Assumes x is normalized |
216 | | // The function doesn't have neither constant control flow nor constant memory |
217 | | // access flow |
218 | 0 | unsigned int bn_digitcount(const bignum256 *x) { |
219 | 0 | bignum256 val = {0}; |
220 | 0 | bn_copy(x, &val); |
221 | |
|
222 | 0 | unsigned int digits = 1; |
223 | 0 | for (unsigned int i = 0; i < BN_MAX_DECIMAL_DIGITS; i += 3) { |
224 | 0 | uint32_t limb = 0; |
225 | |
|
226 | 0 | bn_divmod1000(&val, &limb); |
227 | |
|
228 | 0 | if (limb >= 100) { |
229 | 0 | digits = i + 3; |
230 | 0 | } else if (limb >= 10) { |
231 | 0 | digits = i + 2; |
232 | 0 | } else if (limb >= 1) { |
233 | 0 | digits = i + 1; |
234 | 0 | } |
235 | 0 | } |
236 | |
|
237 | 0 | memzero(&val, sizeof(val)); |
238 | |
|
239 | 0 | return digits; |
240 | 0 | } |
241 | | |
242 | | // x = 0 |
243 | | // Guarantees x is normalized |
244 | 168k | void bn_zero(bignum256 *x) { |
245 | 1.68M | for (int i = 0; i < BN_LIMBS; i++) { |
246 | 1.51M | x->val[i] = 0; |
247 | 1.51M | } |
248 | 168k | } |
249 | | |
250 | | // x = 1 |
251 | | // Guarantees x is normalized |
252 | 84.0k | void bn_one(bignum256 *x) { |
253 | 84.0k | x->val[0] = 1; |
254 | 756k | for (int i = 1; i < BN_LIMBS; i++) { |
255 | 672k | x->val[i] = 0; |
256 | 672k | } |
257 | 84.0k | } |
258 | | |
259 | | // Returns x == 0 |
260 | | // Assumes x is normalized |
261 | 257k | int bn_is_zero(const bignum256 *x) { |
262 | 257k | uint32_t result = 0; |
263 | 2.57M | for (int i = 0; i < BN_LIMBS; i++) { |
264 | 2.31M | result |= x->val[i]; |
265 | 2.31M | } |
266 | 257k | return !result; |
267 | 257k | } |
268 | | |
269 | | // Returns x == 1 |
270 | | // Assumes x is normalized |
271 | 43.7M | int bn_is_one(const bignum256 *x) { |
272 | 43.7M | uint32_t result = x->val[0] ^ 1; |
273 | 393M | for (int i = 1; i < BN_LIMBS; i++) { |
274 | 350M | result |= x->val[i]; |
275 | 350M | } |
276 | 43.7M | return !result; |
277 | 43.7M | } |
278 | | |
279 | | // Returns x < y |
280 | | // Assumes x, y are normalized |
281 | 44.6M | int bn_is_less(const bignum256 *x, const bignum256 *y) { |
282 | 44.6M | uint32_t res1 = 0; |
283 | 44.6M | uint32_t res2 = 0; |
284 | 446M | for (int i = BN_LIMBS - 1; i >= 0; i--) { |
285 | 401M | res1 = (res1 << 1) | (x->val[i] < y->val[i]); |
286 | 401M | res2 = (res2 << 1) | (x->val[i] > y->val[i]); |
287 | 401M | } |
288 | 44.6M | return res1 > res2; |
289 | 44.6M | } |
290 | | |
291 | | // Returns x == y |
292 | | // Assumes x, y are normalized |
293 | 684k | int bn_is_equal(const bignum256 *x, const bignum256 *y) { |
294 | 684k | uint32_t result = 0; |
295 | 6.84M | for (int i = 0; i < BN_LIMBS; i++) { |
296 | 6.15M | result |= x->val[i] ^ y->val[i]; |
297 | 6.15M | } |
298 | 684k | return !result; |
299 | 684k | } |
300 | | |
301 | | // res = cond if truecase else falsecase |
302 | | // Assumes cond is either 0 or 1 |
303 | | // Works properly even if &res == &truecase or &res == &falsecase or |
304 | | // &truecase == &falsecase or &res == &truecase == &falsecase |
305 | | void bn_cmov(bignum256 *res, volatile uint32_t cond, const bignum256 *truecase, |
306 | 5.96M | const bignum256 *falsecase) { |
307 | | // Intentional use of bitwise OR operator to ensure constant-time |
308 | 5.96M | assert((int)(cond == 1) | (int)(cond == 0)); |
309 | | |
310 | 5.96M | uint32_t tmask = -cond; // tmask = 0xFFFFFFFF if cond else 0x00000000 |
311 | 5.96M | uint32_t fmask = ~tmask; // fmask = 0x00000000 if cond else 0xFFFFFFFF |
312 | | |
313 | 59.6M | for (int i = 0; i < BN_LIMBS; i++) { |
314 | 53.6M | res->val[i] = (truecase->val[i] & tmask) | (falsecase->val[i] & fmask); |
315 | 53.6M | } |
316 | 5.96M | } |
317 | | |
318 | | // x = -x % prime if cond else x, |
319 | | // Explicitly x = (3 * prime - x if x > prime else 2 * prime - x) if cond else |
320 | | // else (x if x > prime else x + prime) |
321 | | // Assumes x is normalized and partly reduced |
322 | | // Assumes cond is either 1 or 0 |
323 | | // Guarantees x is normalized |
324 | | // Assumes prime is normalized and |
325 | | // 0 < prime < 2**260 == 2**(BITS_PER_LIMB * LIMBS - 1) |
326 | 562k | void bn_cnegate(volatile uint32_t cond, bignum256 *x, const bignum256 *prime) { |
327 | | // Intentional use of bitwise OR operator to ensure constant time |
328 | 562k | assert((int)(cond == 1) | (int)(cond == 0)); |
329 | | |
330 | 562k | uint32_t tmask = -cond; // tmask = 0xFFFFFFFF if cond else 0x00000000 |
331 | 562k | uint32_t fmask = ~tmask; // fmask = 0x00000000 if cond else 0xFFFFFFFF |
332 | | |
333 | 562k | bn_mod(x, prime); |
334 | | // x < prime |
335 | | |
336 | 562k | uint32_t acc1 = 1; |
337 | 562k | uint32_t acc2 = 0; |
338 | | |
339 | 5.62M | for (int i = 0; i < BN_LIMBS; i++) { |
340 | 5.06M | acc1 += (BN_BASE - 1) + 2 * prime->val[i] - x->val[i]; |
341 | | // acc1 neither overflows 32 bits nor underflows 0 |
342 | | // Proof: |
343 | | // acc1 + (BASE - 1) + 2 * prime[i] - x[i] |
344 | | // >= (BASE - 1) - x >= (2**BITS_PER_LIMB - 1) - (2**BITS_PER_LIMB - 1) |
345 | | // == 0 |
346 | | // acc1 + (BASE - 1) + 2 * prime[i] - x[i] |
347 | | // <= acc1 + (BASE - 1) + 2 * prime[i] |
348 | | // <= (2**(32 - BITS_PER_LIMB) - 1) + 2 * (2**BITS_PER_LIMB - 1) + |
349 | | // (2**BITS_PER_LIMB - 1) |
350 | | // == 7 + 3 * 2**29 < 2**32 |
351 | | |
352 | 5.06M | acc2 += prime->val[i] + x->val[i]; |
353 | | // acc2 doesn't overflow 32 bits |
354 | | // Proof: |
355 | | // acc2 + prime[i] + x[i] |
356 | | // <= 2**(32 - BITS_PER_LIMB) - 1 + 2 * (2**BITS_PER_LIMB - 1) |
357 | | // == 2**(32 - BITS_PER_LIMB) + 2**(BITS_PER_LIMB + 1) - 2 |
358 | | // == 2**30 + 5 < 2**32 |
359 | | |
360 | | // x = acc1 & LIMB_MASK if cond else acc2 & LIMB_MASK |
361 | 5.06M | x->val[i] = ((acc1 & tmask) | (acc2 & fmask)) & BN_LIMB_MASK; |
362 | | |
363 | 5.06M | acc1 >>= BN_BITS_PER_LIMB; |
364 | | // acc1 <= 7 == 2**(32 - BITS_PER_LIMB) - 1 |
365 | | // acc1 == 2**(BITS_PER_LIMB * (i + 1)) + 2 * prime[:i + 1] - x[:i + 1] |
366 | | // >> BITS_PER_LIMB * (i + 1) |
367 | | |
368 | 5.06M | acc2 >>= BN_BITS_PER_LIMB; |
369 | | // acc2 <= 7 == 2**(32 - BITS_PER_LIMB) - 1 |
370 | | // acc2 == prime[:i + 1] + x[:i + 1] >> BITS_PER_LIMB * (i + 1) |
371 | 5.06M | } |
372 | | |
373 | | // assert(acc1 == 1); // assert prime <= 2**260 |
374 | | // assert(acc2 == 0); |
375 | | |
376 | | // clang-format off |
377 | | // acc1 == 1 |
378 | | // Proof: |
379 | | // acc1 == 2**(BITS_PER_LIMB * LIMBS) + 2 * prime[:LIMBS] - x[:LIMBS] >> BITS_PER_LIMB * LIMBS |
380 | | // == 2**(BITS_PER_LIMB * LIMBS) + 2 * prime - x >> BITS_PER_LIMB * LIMBS |
381 | | // <= 2**(BITS_PER_LIMB * LIMBS) + 2 * prime >> BITS_PER_LIMB * LIMBS |
382 | | // <= 2**(BITS_PER_LIMB * LIMBS) + 2 * (2**(BITS_PER_LIMB * LIMBS - 1) - 1) >> BITS_PER_LIMB * LIMBS |
383 | | // <= 2**(BITS_PER_LIMB * LIMBS) + 2**(BITS_PER_LIMB * LIMBS) - 2 >> BITS_PER_LIMB * LIMBS |
384 | | // == 1 |
385 | | |
386 | | // acc1 == 2**(BITS_PER_LIMB * LIMBS) + 2 * prime[:LIMBS] - x[:LIMBS] >> BITS_PER_LIMB * LIMBS |
387 | | // == 2**(BITS_PER_LIMB * LIMBS) + 2 * prime - x >> BITS_PER_LIMB * LIMBS |
388 | | // >= 2**(BITS_PER_LIMB * LIMBS) + 0 >> BITS_PER_LIMB * LIMBS |
389 | | // == 1 |
390 | | |
391 | | // acc2 == 0 |
392 | | // Proof: |
393 | | // acc2 == prime[:LIMBS] + x[:LIMBS] >> BITS_PER_LIMB * LIMBS |
394 | | // == prime + x >> BITS_PER_LIMB * LIMBS |
395 | | // <= 2 * prime - 1 >> BITS_PER_LIMB * LIMBS |
396 | | // <= 2 * (2**(BITS_PER_LIMB * LIMBS - 1) - 1) - 1 >> 261 |
397 | | // == 2**(BITS_PER_LIMB * LIMBS) - 3 >> BITS_PER_LIMB * LIMBS |
398 | | // == 0 |
399 | | // clang-format on |
400 | 562k | } |
401 | | |
402 | | // x <<= 1 |
403 | | // Assumes x is normalized, x < 2**260 == 2**(LIMBS*BITS_PER_LIMB - 1) |
404 | | // Guarantees x is normalized |
405 | 45.9M | void bn_lshift(bignum256 *x) { |
406 | 413M | for (int i = BN_LIMBS - 1; i > 0; i--) { |
407 | 367M | x->val[i] = ((x->val[i] << 1) & BN_LIMB_MASK) | |
408 | 367M | (x->val[i - 1] >> (BN_BITS_PER_LIMB - 1)); |
409 | 367M | } |
410 | 45.9M | x->val[0] = (x->val[0] << 1) & BN_LIMB_MASK; |
411 | 45.9M | } |
412 | | |
413 | | // x >>= 1, i.e. x = floor(x/2) |
414 | | // Assumes x is normalized |
415 | | // Guarantees x is normalized |
416 | | // If x is partly reduced (fully reduced) modulo prime, |
417 | | // guarantess x will be partly reduced (fully reduced) modulo prime |
418 | 43.7M | void bn_rshift(bignum256 *x) { |
419 | 393M | for (int i = 0; i < BN_LIMBS - 1; i++) { |
420 | 350M | x->val[i] = |
421 | 350M | (x->val[i] >> 1) | ((x->val[i + 1] & 1) << (BN_BITS_PER_LIMB - 1)); |
422 | 350M | } |
423 | 43.7M | x->val[BN_LIMBS - 1] >>= 1; |
424 | 43.7M | } |
425 | | |
426 | | // Sets i-th least significant bit (counting from zero) |
427 | | // Assumes x is normalized and 0 <= i < 261 == LIMBS*BITS_PER_LIMB |
428 | | // Guarantees x is normalized |
429 | | // The function has constant control flow but not constant memory access flow |
430 | | // with regard to i |
431 | 0 | void bn_setbit(bignum256 *x, uint16_t i) { |
432 | 0 | assert(i < BN_LIMBS * BN_BITS_PER_LIMB); |
433 | 0 | x->val[i / BN_BITS_PER_LIMB] |= (1u << (i % BN_BITS_PER_LIMB)); |
434 | 0 | } |
435 | | |
436 | | // clears i-th least significant bit (counting from zero) |
437 | | // Assumes x is normalized and 0 <= i < 261 == LIMBS*BITS_PER_LIMB |
438 | | // Guarantees x is normalized |
439 | | // The function has constant control flow but not constant memory access flow |
440 | | // with regard to i |
441 | 0 | void bn_clearbit(bignum256 *x, uint16_t i) { |
442 | 0 | assert(i < BN_LIMBS * BN_BITS_PER_LIMB); |
443 | 0 | x->val[i / BN_BITS_PER_LIMB] &= ~(1u << (i % BN_BITS_PER_LIMB)); |
444 | 0 | } |
445 | | |
446 | | // returns i-th least significant bit (counting from zero) |
447 | | // Assumes x is normalized and 0 <= i < 261 == LIMBS*BITS_PER_LIMB |
448 | | // The function has constant control flow but not constant memory access flow |
449 | | // with regard to i |
450 | 0 | uint32_t bn_testbit(const bignum256 *x, uint16_t i) { |
451 | 0 | assert(i < BN_LIMBS * BN_BITS_PER_LIMB); |
452 | 0 | return (x->val[i / BN_BITS_PER_LIMB] >> (i % BN_BITS_PER_LIMB)) & 1; |
453 | 0 | } |
454 | | |
455 | | // res = x ^ y |
456 | | // Assumes x, y are normalized |
457 | | // Guarantees res is normalized |
458 | | // Works properly even if &res == &x or &res == &y or &res == &x == &y |
459 | 0 | void bn_xor(bignum256 *res, const bignum256 *x, const bignum256 *y) { |
460 | 0 | for (int i = 0; i < BN_LIMBS; i++) { |
461 | 0 | res->val[i] = x->val[i] ^ y->val[i]; |
462 | 0 | } |
463 | 0 | } |
464 | | |
465 | | // x = x / 2 % prime |
466 | | // Explicitly x = x / 2 if is_even(x) else (x + prime) / 2 |
467 | | // Assumes x is normalized, x + prime < 261 == LIMBS * BITS_PER_LIMB |
468 | | // Guarantees x is normalized |
469 | | // If x is partly reduced (fully reduced) modulo prime, |
470 | | // guarantess x will be partly reduced (fully reduced) modulo prime |
471 | | // Assumes prime is an odd number and normalized |
472 | 5.19M | void bn_mult_half(bignum256 *x, const bignum256 *prime) { |
473 | | // x = x / 2 if is_even(x) else (x + prime) / 2 |
474 | | |
475 | 5.19M | uint32_t x_is_odd_mask = |
476 | 5.19M | -(x->val[0] & 1); // x_is_odd_mask = 0xFFFFFFFF if is_odd(x) else 0 |
477 | | |
478 | 5.19M | uint32_t acc = (x->val[0] + (prime->val[0] & x_is_odd_mask)) >> 1; |
479 | | // acc < 2**BITS_PER_LIMB |
480 | | // Proof: |
481 | | // acc == x[0] + prime[0] & x_is_odd_mask >> 1 |
482 | | // <= (2**(BITS_PER_LIMB) - 1) + (2**(BITS_PER_LIMB) - 1) >> 1 |
483 | | // == 2**(BITS_PER_LIMB + 1) - 2 >> 1 |
484 | | // < 2**(BITS_PER_LIMB) |
485 | | |
486 | 46.7M | for (int i = 0; i < BN_LIMBS - 1; i++) { |
487 | 41.5M | uint32_t temp = (x->val[i + 1] + (prime->val[i + 1] & x_is_odd_mask)); |
488 | | // temp < 2**(BITS_PER_LIMB + 1) |
489 | | // Proof: |
490 | | // temp == x[i + 1] + val[i + 1] & x_is_odd_mask |
491 | | // <= (2**(BITS_PER_LIMB) - 1) + (2**(BITS_PER_LIMB) - 1) |
492 | | // < 2**(BITS_PER_LIMB + 1) |
493 | | |
494 | 41.5M | acc += (temp & 1) << (BN_BITS_PER_LIMB - 1); |
495 | | // acc doesn't overflow 32 bits |
496 | | // Proof: |
497 | | // acc + (temp & 1 << BITS_PER_LIMB - 1) |
498 | | // <= 2**(BITS_PER_LIMB + 1) + 2**(BITS_PER_LIMB - 1) |
499 | | // <= 2**30 + 2**28 < 2**32 |
500 | | |
501 | 41.5M | x->val[i] = acc & BN_LIMB_MASK; |
502 | 41.5M | acc >>= BN_BITS_PER_LIMB; |
503 | 41.5M | acc += temp >> 1; |
504 | | // acc < 2**(BITS_PER_LIMB + 1) |
505 | | // Proof: |
506 | | // acc + (temp >> 1) |
507 | | // <= (2**(32 - BITS_PER_LIMB) - 1) + (2**(BITS_PER_LIMB + 1) - 1 >> 1) |
508 | | // == 7 + 2**(BITS_PER_LIMB) - 1 < 2**(BITS_PER_LIMB + 1) |
509 | | |
510 | | // acc == x[:i+2]+(prime[:i+2] & x_is_odd_mask) >> BITS_PER_LIMB * (i+1) |
511 | 41.5M | } |
512 | 5.19M | x->val[BN_LIMBS - 1] = acc; |
513 | | |
514 | | // assert(acc >> BITS_PER_LIMB == 0); |
515 | | // acc >> BITS_PER_LIMB == 0 |
516 | | // Proof: |
517 | | // acc |
518 | | // == x[:LIMBS] + (prime[:LIMBS] & x_is_odd_mask) >> BITS_PER_LIMB*LIMBS |
519 | | // == x + (prime & x_is_odd_mask) >> BITS_PER_LIMB * LIMBS |
520 | | // <= x + prime >> BITS_PER_LIMB * LIMBS |
521 | | // <= 2**(BITS_PER_LIMB * LIMBS) - 1 >> BITS_PER_LIMB * LIMBS |
522 | | // == 0 |
523 | 5.19M | } |
524 | | |
525 | | // x = x * k % prime |
526 | | // Assumes x is normalized, 0 <= k <= 8 = 2**(32 - BITS_PER_LIMB) |
527 | | // Assumes prime is normalized and 2^256 - 2^224 <= prime <= 2^256 |
528 | | // Guarantees x is normalized and partly reduced modulo prime |
529 | 5.00M | void bn_mult_k(bignum256 *x, uint8_t k, const bignum256 *prime) { |
530 | 5.00M | assert(k <= 8); |
531 | | |
532 | 50.0M | for (int i = 0; i < BN_LIMBS; i++) { |
533 | 45.0M | x->val[i] = k * x->val[i]; |
534 | | // x[i] doesn't overflow 32 bits |
535 | | // k * x[i] <= 2**(32 - BITS_PER_LIMB) * (2**BITS_PER_LIMB - 1) |
536 | | // < 2**(32 - BITS_PER_LIMB) * 2**BITS_PER_LIMB == 2**32 |
537 | 45.0M | } |
538 | | |
539 | 5.00M | bn_fast_mod(x, prime); |
540 | 5.00M | } |
541 | | |
542 | | // Reduces partly reduced x modulo prime |
543 | | // Explicitly x = x if x < prime else x - prime |
544 | | // Assumes x is partly reduced modulo prime |
545 | | // Guarantees x is fully reduced modulo prime |
546 | | // Assumes prime is nonzero and normalized |
547 | 830k | void bn_mod(bignum256 *x, const bignum256 *prime) { |
548 | 830k | uint32_t x_less_prime = bn_is_less(x, prime); |
549 | | |
550 | 830k | bignum256 temp = {0}; |
551 | 830k | bn_subtract(x, prime, &temp); |
552 | 830k | bn_cmov(x, x_less_prime, x, &temp); |
553 | | |
554 | 830k | memzero(&temp, sizeof(temp)); |
555 | 830k | } |
556 | | |
557 | | // Auxiliary function for bn_multiply |
558 | | // res = k * x |
559 | | // Assumes k and x are normalized |
560 | | // Guarantees res is normalized 18 digit little endian number in base 2**29 |
561 | 26.9M | void bn_multiply_long(const bignum256 *k, const bignum256 *x, bignum512 *res) { |
562 | | // Uses long multiplication in base 2**29, see |
563 | | // https://en.wikipedia.org/wiki/Multiplication_algorithm#Long_multiplication |
564 | | |
565 | 26.9M | uint64_t acc = 0; |
566 | | |
567 | | // compute lower half |
568 | 269M | for (int i = 0; i < BN_LIMBS; i++) { |
569 | 1.45G | for (int j = 0; j <= i; j++) { |
570 | 1.21G | acc += k->val[j] * (uint64_t)x->val[i - j]; |
571 | | // acc doesn't overflow 64 bits |
572 | | // Proof: |
573 | | // acc <= acc + sum([k[j] * x[i-j] for j in range(i)]) |
574 | | // <= (2**(64 - BITS_PER_LIMB) - 1) + |
575 | | // LIMBS * (2**BITS_PER_LIMB - 1) * (2**BITS_PER_LIMB - 1) |
576 | | // == (2**35 - 1) + 9 * (2**29 - 1) * (2**29 - 1) |
577 | | // <= 2**35 + 9 * 2**58 < 2**64 |
578 | 1.21G | } |
579 | | |
580 | 242M | res->val[i] = acc & BN_LIMB_MASK; |
581 | 242M | acc >>= BN_BITS_PER_LIMB; |
582 | | // acc <= 2**35 - 1 == 2**(64 - BITS_PER_LIMB) - 1 |
583 | 242M | } |
584 | | |
585 | | // compute upper half |
586 | 242M | for (int i = BN_LIMBS; i < 2 * BN_LIMBS - 1; i++) { |
587 | 1.18G | for (int j = i - BN_LIMBS + 1; j < BN_LIMBS; j++) { |
588 | 971M | acc += k->val[j] * (uint64_t)x->val[i - j]; |
589 | | // acc doesn't overflow 64 bits |
590 | | // Proof: |
591 | | // acc <= acc + sum([k[j] * x[i-j] for j in range(i)]) |
592 | | // <= (2**(64 - BITS_PER_LIMB) - 1) |
593 | | // LIMBS * (2**BITS_PER_LIMB - 1) * (2**BITS_PER_LIMB - 1) |
594 | | // == (2**35 - 1) + 9 * (2**29 - 1) * (2**29 - 1) |
595 | | // <= 2**35 + 9 * 2**58 < 2**64 |
596 | 971M | } |
597 | | |
598 | 215M | res->val[i] = acc & (BN_BASE - 1); |
599 | 215M | acc >>= BN_BITS_PER_LIMB; |
600 | | // acc < 2**35 == 2**(64 - BITS_PER_LIMB) |
601 | 215M | } |
602 | | |
603 | 26.9M | res->val[2 * BN_LIMBS - 1] = acc; |
604 | 26.9M | } |
605 | | |
606 | | // Auxiliary function for bn_multiply |
607 | | // Assumes 0 <= d <= 8 == LIMBS - 1 |
608 | | // Assumes res is normalized and res < 2**(256 + 29*d + 31) |
609 | | // Guarantess res in normalized and res < 2 * prime * 2**(29*d) |
610 | | // Assumes prime is normalized, 2**256 - 2**224 <= prime <= 2**256 |
611 | | void bn_multiply_reduce_step(bignum512 *res, const bignum256 *prime, |
612 | 242M | uint32_t d) { |
613 | | // clang-format off |
614 | | // Computes res = res - (res // 2**(256 + BITS_PER_LIMB * d)) * prime * 2**(BITS_PER_LIMB * d) |
615 | | |
616 | | // res - (res // 2**(256 + BITS_PER_LIMB * d)) * prime * 2**(BITS_PER_LIMB * d) < 2 * prime * 2**(BITS_PER_LIMB * d) |
617 | | // Proof: |
618 | | // res - res // (2**(256 + BITS_PER_LIMB * d)) * 2**(BITS_PER_LIMB * d) * prime |
619 | | // == res - res // (2**(256 + BITS_PER_LIMB * d)) * 2**(BITS_PER_LIMB * d) * (2**256 - (2**256 - prime)) |
620 | | // == res - res // (2**(256 + BITS_PER_LIMB * d)) * 2**(BITS_PER_LIMB * d) * 2**256 + res // (2**(256 + BITS_PER_LIMB * d)) * 2**(BITS_PER_LIMB * d) * (2**256 - prime) |
621 | | // == (res % 2**(256 + BITS_PER_LIMB * d)) + res // (2**256 + BITS_PER_LIMB * d) * 2**(BITS_PER_LIMB * d) * (2**256 - prime) |
622 | | // <= (2**(256 + 29*d + 31) % 2**(256 + 29*d)) + (2**(256 + 29*d + 31) - 1) / (2**256 + 29*d) * 2**(29*d) * (2**256 - prime) |
623 | | // <= 2**(256 + 29*d) + 2**(256 + 29*d + 31) / (2**256 + 29*d) * 2**(29*d) * (2**256 - prime) |
624 | | // == 2**(256 + 29*d) + 2**31 * 2**(29*d) * (2**256 - prime) |
625 | | // == 2**(29*d) * (2**256 + 2**31 * (2*256 - prime)) |
626 | | // <= 2**(29*d) * (2**256 + 2**31 * 2*224) |
627 | | // <= 2**(29*d) * (2**256 + 2**255) |
628 | | // <= 2**(29*d) * 2 * (2**256 - 2**224) |
629 | | // <= 2 * prime * 2**(29*d) |
630 | | // clang-format on |
631 | | |
632 | 242M | uint32_t coef = |
633 | 242M | (res->val[d + BN_LIMBS - 1] >> |
634 | 242M | (256 - (BN_LIMBS - 1) * BN_BITS_PER_LIMB)) + |
635 | 242M | (res->val[d + BN_LIMBS] << ((BN_LIMBS * BN_BITS_PER_LIMB) - 256)); |
636 | | |
637 | | // coef == res // 2**(256 + BITS_PER_LIMB * d) |
638 | | |
639 | | // coef < 2**31 |
640 | | // Proof: |
641 | | // coef == res // 2**(256 + BITS_PER_LIMB * d) |
642 | | // < 2**(256 + 29 * d + 31) // 2**(256 + 29 * d) |
643 | | // == 2**31 |
644 | | |
645 | 242M | const int shift = 31; |
646 | 242M | uint64_t acc = 1ull << shift; |
647 | | |
648 | 2.42G | for (int i = 0; i < BN_LIMBS; i++) { |
649 | 2.18G | acc += (((uint64_t)(BN_BASE - 1)) << shift) + res->val[d + i] - |
650 | 2.18G | prime->val[i] * (uint64_t)coef; |
651 | | // acc neither overflow 64 bits nor underflow zero |
652 | | // Proof: |
653 | | // acc + ((BASE - 1) << shift) + res[d + i] - prime[i] * coef |
654 | | // >= ((BASE - 1) << shift) - prime[i] * coef |
655 | | // == 2**shift * (2**BITS_PER_LIMB - 1) - (2**BITS_PER_LIMB - 1) * |
656 | | // (2**31 - 1) |
657 | | // == (2**shift - 2**31 + 1) * (2**BITS_PER_LIMB - 1) |
658 | | // == (2**31 - 2**31 + 1) * (2**29 - 1) |
659 | | // == 2**29 - 1 > 0 |
660 | | // acc + ((BASE - 1) << shift) + res[d + i] - prime[i] * coef |
661 | | // <= acc + ((BASE - 1) << shift) + res[d+i] |
662 | | // <= (2**(64 - BITS_PER_LIMB) - 1) + 2**shift * (2**BITS_PER_LIMB - 1) |
663 | | // + (2*BITS_PER_LIMB - 1) |
664 | | // == (2**(64 - BITS_PER_LIMB) - 1) + (2**shift + 1) * |
665 | | // (2**BITS_PER_LIMB - 1) |
666 | | // == (2**35 - 1) + (2**31 + 1) * (2**29 - 1) |
667 | | // <= 2**35 + 2**60 + 2**29 < 2**64 |
668 | | |
669 | 2.18G | res->val[d + i] = acc & BN_LIMB_MASK; |
670 | 2.18G | acc >>= BN_BITS_PER_LIMB; |
671 | | // acc <= 2**(64 - BITS_PER_LIMB) - 1 == 2**35 - 1 |
672 | | |
673 | | // acc == (1 << BITS_PER_LIMB * (i + 1) + shift) + res[d : d + i + 1] |
674 | | // - coef * prime[:i + 1] >> BITS_PER_LIMB * (i + 1) |
675 | 2.18G | } |
676 | | |
677 | | // acc += (((uint64_t)(BASE - 1)) << shift) + res[d + LIMBS]; |
678 | | // acc >>= BITS_PER_LIMB; |
679 | | // assert(acc <= 1ul << shift); |
680 | | |
681 | | // clang-format off |
682 | | // acc == 1 << shift |
683 | | // Proof: |
684 | | // acc |
685 | | // == (1 << BITS_PER_LIMB * (LIMBS + 1) + shift) + res[d : d + LIMBS + 1] - coef * prime[:LIMBS] >> BITS_PER_LIMB * (LIMBS + 1) |
686 | | // == (1 << BITS_PER_LIMB * (LIMBS + 1) + shift) + res[d : d + LIMBS + 1] - coef * prime >> BITS_PER_LIMB * (LIMBS + 1) |
687 | | |
688 | | // == (1 << BITS_PER_LIMB * (LIMBS + 1) + shift) + (res[d : d + LIMBS + 1] - coef * prime) >> BITS_PER_LIMB * (LIMBS + 1) |
689 | | // <= (1 << BITS_PER_LIMB * (LIMBS + 1) + shift) + (res[:d] + BASE**d * res[d : d + LIMBS + 1] - BASE**d * coef * prime)//BASE**d >> BITS_PER_LIMB * (LIMBS + 1) |
690 | | // <= (1 << BITS_PER_LIMB * (LIMBS + 1) + shift) + (res - BASE**d * coef * prime) // BASE**d >> BITS_PER_LIMB * (LIMBS + 1) |
691 | | // == (1 << BITS_PER_LIMB * (LIMBS + 1) + shift) + (2 * prime * BASE**d) // BASE**d >> BITS_PER_LIMB * (LIMBS + 1) |
692 | | // <= (1 << 321) + 2 * 2**256 >> 290 |
693 | | // == 1 << 31 == 1 << shift |
694 | | |
695 | | // == (1 << BITS_PER_LIMB * (LIMBS + 1) + shift) + res[d : d + LIMBS + 1] - coef * prime[:LIMBS + 1] >> BITS_PER_LIMB * (LIMBS + 1) |
696 | | // >= (1 << BITS_PER_LIMB * (LIMBS + 1) + shift) + 0 >> BITS_PER_LIMB * (LIMBS + 1) |
697 | | // == 1 << shift |
698 | | // clang-format on |
699 | | |
700 | 242M | res->val[d + BN_LIMBS] = 0; |
701 | 242M | } |
702 | | |
703 | | // Partly reduces x |
704 | | // Assumes x in normalized and res < 2**519 |
705 | | // Guarantees x is normalized and partly reduced modulo prime |
706 | | // Assumes prime is normalized, 2**256 - 2**224 <= prime <= 2**256 |
707 | 26.9M | void bn_reduce(bignum512 *x, const bignum256 *prime) { |
708 | 269M | for (int i = BN_LIMBS - 1; i >= 0; i--) { |
709 | | // res < 2**(256 + 29*i + 31) |
710 | | // Proof: |
711 | | // if i == LIMBS - 1: |
712 | | // res < 2**519 |
713 | | // == 2**(256 + 29 * 8 + 31) |
714 | | // == 2**(256 + 29 * (LIMBS - 1) + 31) |
715 | | // else: |
716 | | // res < 2 * prime * 2**(29 * (i + 1)) |
717 | | // <= 2**256 * 2**(29*i + 29) < 2**(256 + 29*i + 31) |
718 | 242M | bn_multiply_reduce_step(x, prime, i); |
719 | 242M | } |
720 | 26.9M | } |
721 | | |
722 | | // x = k * x % prime |
723 | | // Assumes k, x are normalized, k * x < 2**519 |
724 | | // Guarantees x is normalized and partly reduced modulo prime |
725 | | // Assumes prime is normalized, 2**256 - 2**224 <= prime <= 2**256 |
726 | 26.9M | void bn_multiply(const bignum256 *k, bignum256 *x, const bignum256 *prime) { |
727 | 26.9M | bignum512 res = {0}; |
728 | | |
729 | 26.9M | bn_multiply_long(k, x, &res); |
730 | 26.9M | bn_reduce(&res, prime); |
731 | 26.9M | bn_copy_lower(&res, x); |
732 | | |
733 | 26.9M | memzero(&res, sizeof(res)); |
734 | 26.9M | } |
735 | | |
736 | | // Partly reduces x modulo prime |
737 | | // Assumes limbs of x except the last (the most significant) one are normalized |
738 | | // Assumes prime is normalized and 2^256 - 2^224 <= prime <= 2^256 |
739 | | // Guarantees x is normalized and partly reduced modulo prime |
740 | 13.6M | void bn_fast_mod(bignum256 *x, const bignum256 *prime) { |
741 | | // Computes x = x - (x // 2**256) * prime |
742 | | |
743 | | // x < 2**((LIMBS - 1) * BITS_PER_LIMB + 32) == 2**264 |
744 | | |
745 | | // x - (x // 2**256) * prime < 2 * prime |
746 | | // Proof: |
747 | | // x - (x // 2**256) * prime |
748 | | // == x - (x // 2**256) * (2**256 - (2**256 - prime)) |
749 | | // == x - ((x // 2**256) * 2**256) + (x // 2**256) * (2**256 - prime) |
750 | | // == (x % prime) + (x // 2**256) * (2**256 - prime) |
751 | | // <= prime - 1 + (2**264 // 2**256) * (2**256 - prime) |
752 | | // <= 2**256 + 2**8 * 2**224 == 2**256 + 2**232 |
753 | | // < 2 * (2**256 - 2**224) |
754 | | // <= 2 * prime |
755 | | |
756 | | // x - (x // 2**256 - 1) * prime < 2 * prime |
757 | | // Proof: |
758 | | // x - (x // 2**256) * prime + prime |
759 | | // == x - (x // 2**256) * (2**256 - (2**256 - prime)) + prime |
760 | | // == x - ((x//2**256) * 2**256) + (x//2**256) * (2**256 - prime) + prime |
761 | | // == (x % prime) + (x // 2**256) * (2**256 - prime) + prime |
762 | | // <= 2 * prime - 1 + (2**264 // 2**256) * (2**256 - prime) |
763 | | // <= 2 * prime + 2**8 * 2**224 == 2**256 + 2**232 + 2**256 - 2**224 |
764 | | // < 2 * (2**256 - 2**224) |
765 | | // <= 2 * prime |
766 | | |
767 | 13.6M | uint32_t coef = |
768 | 13.6M | x->val[BN_LIMBS - 1] >> (256 - ((BN_LIMBS - 1) * BN_BITS_PER_LIMB)); |
769 | | |
770 | | // clang-format off |
771 | | // coef == x // 2**256 |
772 | | // 0 <= coef < 2**((LIMBS - 1) * BITS_PER_LIMB + 32 - 256) == 256 |
773 | | // Proof: |
774 | | //* Let x[[a : b] be the number consisting of a-th to (b-1)-th bit of the number x. |
775 | | // x[LIMBS - 1] >> (256 - ((LIMBS - 1) * BITS_PER_LIMB)) |
776 | | // == x[[(LIMBS - 1) * BITS_PER_LIMB : (LIMBS - 1) * BITS_PER_LIMB + 32]] >> (256 - ((LIMBS - 1) * BITS_PER_LIMB)) |
777 | | // == x[[256 - ((LIMBS - 1) * BITS_PER_LIMB) + (LIMBS - 1) * BITS_PER_LIMB : (LIMBS - 1) * BITS_PER_LIMB + 32]] |
778 | | // == x[[256 : (LIMBS - 1) * BITS_PER_LIMB + 32]] |
779 | | // == x[[256 : 264]] == x // 2**256 |
780 | | // clang-format on |
781 | | |
782 | 13.6M | const int shift = 8; |
783 | 13.6M | uint64_t acc = 1ull << shift; |
784 | | |
785 | 136M | for (int i = 0; i < BN_LIMBS; i++) { |
786 | 122M | acc += (((uint64_t)(BN_BASE - 1)) << shift) + x->val[i] - |
787 | 122M | prime->val[i] * (uint64_t)coef; |
788 | | // acc neither overflows 64 bits nor underflows 0 |
789 | | // Proof: |
790 | | // acc + (BASE - 1 << shift) + x[i] - prime[i] * coef |
791 | | // >= (BASE - 1 << shift) - prime[i] * coef |
792 | | // >= 2**shift * (2**BITS_PER_LIMB - 1) - (2**BITS_PER_LIMB - 1) * 255 |
793 | | // == (2**shift - 255) * (2**BITS_PER_LIMB - 1) |
794 | | // == (2**8 - 255) * (2**29 - 1) == 2**29 - 1 >= 0 |
795 | | // acc + (BASE - 1 << shift) + x[i] - prime[i] * coef |
796 | | // <= acc + ((BASE - 1) << shift) + x[i] |
797 | | // <= (2**(64 - BITS_PER_LIMB) - 1) + 2**shift * (2**BITS_PER_LIMB - 1) |
798 | | // + (2**32 - 1) |
799 | | // == (2**35 - 1) + 2**8 * (2**29 - 1) + 2**32 |
800 | | // < 2**35 + 2**37 + 2**32 < 2**64 |
801 | | |
802 | 122M | x->val[i] = acc & BN_LIMB_MASK; |
803 | 122M | acc >>= BN_BITS_PER_LIMB; |
804 | | // acc <= 2**(64 - BITS_PER_LIMB) - 1 == 2**35 - 1 |
805 | | |
806 | | // acc == (1 << BITS_PER_LIMB * (i + 1) + shift) + x[:i + 1] |
807 | | // - coef * prime[:i + 1] >> BITS_PER_LIMB * (i + 1) |
808 | 122M | } |
809 | | |
810 | | // assert(acc == 1 << shift); |
811 | | |
812 | | // clang-format off |
813 | | // acc == 1 << shift |
814 | | // Proof: |
815 | | // acc |
816 | | // == (1 << BITS_PER_LIMB * LIMBS + shift) + x[:LIMBS] - coef * prime[:LIMBS] >> BITS_PER_LIMB * LIMBS |
817 | | // == (1 << BITS_PER_LIMB * LIMBS + shift) + (x - coef * prime) >> BITS_PER_LIMB * LIMBS |
818 | | // <= (1 << BITS_PER_LIMB * LIMBS + shift) + (2 * prime) >> BITS_PER_LIMB * LIMBS |
819 | | // <= (1 << BITS_PER_LIMB * LIMBS + shift) + 2 * 2**256 >> BITS_PER_LIMB * LIMBS |
820 | | // <= 2**269 + 2**257 >> 2**261 |
821 | | // <= 1 << 8 == 1 << shift |
822 | | |
823 | | // acc |
824 | | // == (1 << BITS_PER_LIMB * LIMBS + shift) + x[:LIMBS] - coef * prime[:LIMBS] >> BITS_PER_LIMB * LIMBS |
825 | | // >= (1 << BITS_PER_LIMB * LIMBS + shift) + 0 >> BITS_PER_LIMB * LIMBS |
826 | | // == (1 << BITS_PER_LIMB * LIMBS + shift) + 0 >> BITS_PER_LIMB * LIMBS |
827 | | // <= 1 << 8 == 1 << shift |
828 | | // clang-format on |
829 | 13.6M | } |
830 | | |
831 | | // res = x**e % prime |
832 | | // Assumes both x and e are normalized, x < 2**259 |
833 | | // Guarantees res is normalized and partly reduced modulo prime |
834 | | // Works properly even if &x == &res |
835 | | // Assumes prime is normalized, 2**256 - 2**224 <= prime <= 2**256 |
836 | | // The function doesn't have neither constant control flow nor constant memory |
837 | | // access flow with regard to e |
838 | | void bn_power_mod(const bignum256 *x, const bignum256 *e, |
839 | 186 | const bignum256 *prime, bignum256 *res) { |
840 | | // Uses iterative right-to-left exponentiation by squaring, see |
841 | | // https://en.wikipedia.org/wiki/Modular_exponentiation#Right-to-left_binary_method |
842 | | |
843 | 186 | bignum256 acc = {0}; |
844 | 186 | bn_copy(x, &acc); |
845 | | |
846 | 186 | bn_one(res); |
847 | 1.86k | for (int i = 0; i < BN_LIMBS; i++) { |
848 | 1.67k | uint32_t limb = e->val[i]; |
849 | | |
850 | 48.9k | for (int j = 0; j < BN_BITS_PER_LIMB; j++) { |
851 | | // Break if the following bits of the last limb are zero |
852 | 47.4k | if (i == BN_LIMBS - 1 && limb == 0) break; |
853 | | |
854 | 47.2k | if (limb & 1) |
855 | | // acc * res < 2**519 |
856 | | // Proof: |
857 | | // acc * res <= max(2**259 - 1, 2 * prime) * (2 * prime) |
858 | | // == max(2**259 - 1, 2**257) * 2**257 < 2**259 * 2**257 |
859 | | // == 2**516 < 2**519 |
860 | 45.9k | bn_multiply(&acc, res, prime); |
861 | | |
862 | 47.2k | limb >>= 1; |
863 | | // acc * acc < 2**519 |
864 | | // Proof: |
865 | | // acc * acc <= max(2**259 - 1, 2 * prime)**2 |
866 | | // <= (2**259)**2 == 2**518 < 2**519 |
867 | 47.2k | bn_multiply(&acc, &acc, prime); |
868 | 47.2k | } |
869 | | // acc == x**(e[:i + 1]) % prime |
870 | 1.67k | } |
871 | | |
872 | 186 | memzero(&acc, sizeof(acc)); |
873 | 186 | } |
874 | | |
875 | | // x = sqrt(x) % prime |
876 | | // Explicitly x = x**((prime+1)/4) % prime |
877 | | // The other root is -sqrt(x) |
878 | | // Assumes x is normalized, x < 2**259 and quadratic residuum mod prime |
879 | | // Assumes prime is a prime number, prime % 4 == 3, it is normalized and |
880 | | // 2**256 - 2**224 <= prime <= 2**256 |
881 | | // Guarantees x is normalized and fully reduced modulo prime |
882 | | // The function doesn't have neither constant control flow nor constant memory |
883 | | // access flow with regard to prime |
884 | 186 | void bn_sqrt(bignum256 *x, const bignum256 *prime) { |
885 | | // Uses the Lagrange formula for the primes of the special form, see |
886 | | // http://en.wikipedia.org/wiki/Quadratic_residue#Prime_or_prime_power_modulus |
887 | | // If prime % 4 == 3, then sqrt(x) % prime == x**((prime+1)//4) % prime |
888 | | |
889 | 186 | assert(prime->val[0] % 4 == 3); |
890 | | |
891 | | // e = (prime + 1) // 4 |
892 | 186 | bignum256 e = {0}; |
893 | 186 | bn_copy(prime, &e); |
894 | 186 | bn_addi(&e, 1); |
895 | 186 | bn_rshift(&e); |
896 | 186 | bn_rshift(&e); |
897 | | |
898 | 186 | bn_power_mod(x, &e, prime, x); |
899 | 186 | bn_mod(x, prime); |
900 | | |
901 | 186 | memzero(&e, sizeof(e)); |
902 | 186 | } |
903 | | |
904 | | // a = 1/a % 2**n |
905 | | // Assumes a is odd, 1 <= n <= 32 |
906 | | // The function doesn't have neither constant control flow nor constant memory |
907 | | // access flow with regard to n |
908 | 1.50M | uint32_t inverse_mod_power_two(uint32_t a, uint32_t n) { |
909 | | // Uses "Explicit Quadratic Modular inverse modulo 2" from section 3.3 of "On |
910 | | // Newton-Raphson iteration for multiplicative inverses modulo prime powers" |
911 | | // by Jean-Guillaume Dumas, see |
912 | | // https://arxiv.org/pdf/1209.6626.pdf |
913 | | |
914 | | // 1/a % 2**n |
915 | | // = (2-a) * product([1 + (a-1)**(2**i) for i in range(1, floor(log2(n)))]) |
916 | | |
917 | 1.50M | uint32_t acc = 2 - a; |
918 | 1.50M | uint32_t f = a - 1; |
919 | | |
920 | | // mask = (1 << n) - 1 |
921 | 1.50M | uint32_t mask = n == 32 ? 0xFFFFFFFF : (1u << n) - 1; |
922 | | |
923 | 9.05M | for (uint32_t i = 1; i < n; i <<= 1) { |
924 | 7.54M | f = (f * f) & mask; |
925 | 7.54M | acc = (acc * (1 + f)) & mask; |
926 | 7.54M | } |
927 | | |
928 | 1.50M | return acc; |
929 | 1.50M | } |
930 | | |
931 | | // x = (x / 2**BITS_PER_LIMB) % prime |
932 | | // Assumes both x and prime are normalized |
933 | | // Assumes prime is an odd number and normalized |
934 | | // Guarantees x is normalized |
935 | | // If x is partly reduced (fully reduced) modulo prime, |
936 | | // guarantess x will be partly reduced (fully reduced) modulo prime |
937 | 1.50M | void bn_divide_base(bignum256 *x, const bignum256 *prime) { |
938 | | // Uses an explicit formula for the modular inverse of power of two |
939 | | // (x / 2**n) % prime == (x + ((-x / prime) % 2**n) * prime) // 2**n |
940 | | // Proof: |
941 | | // (x + ((-x / prime) % 2**n) * prime) % 2**n |
942 | | // == (x - x / prime * prime) % 2**n |
943 | | // == 0 |
944 | | // (x + ((-1 / prime) % 2**n) * prime) % prime |
945 | | // == x |
946 | | // if x < prime: |
947 | | // (x + ((-x / prime) % 2**n) * prime) // 2**n |
948 | | // <= ((prime - 1) + (2**n - 1) * prime) / 2**n |
949 | | // == (2**n * prime - 1) / 2**n == prime - 1 / 2**n < prime |
950 | | // if x < 2 * prime: |
951 | | // (x + ((-x / prime) % 2**n) * prime) // 2**n |
952 | | // <= ((2 * prime - 1) + (2**n - 1) * prime) / 2**n |
953 | | // == (2**n * prime + prime - 1) / 2**n |
954 | | // == prime + (prime - 1) / 2**n < 2 * prime |
955 | | |
956 | | // m = (-x / prime) % 2**BITS_PER_LIMB |
957 | 1.50M | uint32_t m = (x->val[0] * (BN_BASE - inverse_mod_power_two( |
958 | 1.50M | prime->val[0], BN_BITS_PER_LIMB))) & |
959 | 1.50M | BN_LIMB_MASK; |
960 | | // m < 2**BITS_PER_LIMB |
961 | | |
962 | 1.50M | uint64_t acc = x->val[0] + (uint64_t)m * prime->val[0]; |
963 | 1.50M | acc >>= BN_BITS_PER_LIMB; |
964 | | |
965 | 13.5M | for (int i = 1; i < BN_LIMBS; i++) { |
966 | 12.0M | acc = acc + x->val[i] + (uint64_t)m * prime->val[i]; |
967 | | // acc does not overflow 64 bits |
968 | | // acc == acc + x + m * prime |
969 | | // <= 2**(64 - BITS_PER_LIMB) + 2**(BITS_PER_LIMB) |
970 | | // 2**(BITS_PER_LIMB) * 2**(BITS_PER_LIMB) |
971 | | // <= 2**(2 * BITS_PER_LIMB) + 2**(64 - BITS_PER_LIMB) + |
972 | | // 2**(BITS_PER_LIMB) |
973 | | // <= 2**58 + 2**35 + 2**29 < 2**64 |
974 | | |
975 | 12.0M | x->val[i - 1] = acc & BN_LIMB_MASK; |
976 | 12.0M | acc >>= BN_BITS_PER_LIMB; |
977 | | // acc < 2**35 == 2**(64 - BITS_PER_LIMB) |
978 | | |
979 | | // acc == x[:i + 1] + m * prime[:i + 1] >> BITS_PER_LIMB * (i + 1) |
980 | 12.0M | } |
981 | | |
982 | 1.50M | x->val[BN_LIMBS - 1] = acc; |
983 | | |
984 | 1.50M | assert(acc >> BN_BITS_PER_LIMB == 0); |
985 | | |
986 | | // clang-format off |
987 | | // acc >> BITS_PER_LIMB == 0 |
988 | | // Proof: |
989 | | // acc >> BITS_PER_LIMB |
990 | | // == (x[:LIMB] + m * prime[:LIMB] >> BITS_PER_LIMB * LIMBS) >> BITS_PER_LIMB * (LIMBS + 1) |
991 | | // == x + m * prime >> BITS_PER_LIMB * (LIMBS + 1) |
992 | | // <= (2**(BITS_PER_LIMB * LIMBS) - 1) + (2**BITS_PER_LIMB - 1) * (2**(BITS_PER_LIMB * LIMBS) - 1) >> BITS_PER_LIMB * (LIMBS + 1) |
993 | | // == 2**(BITS_PER_LIMB * LIMBS) - 1 + 2**(BITS_PER_LIMB * (LIMBS + 1)) - 2**(BITS_PER_LIMB * LIMBS) - 2**BITS_PER_LIMB + 1 >> BITS_PER_LIMB * (LIMBS + 1) |
994 | | // == 2**(BITS_PER_LIMB * (LIMBS + 1)) - 2**BITS_PER_LIMB >> BITS_PER_LIMB * (LIMBS + 1) |
995 | | // == 0 |
996 | | // clang-format on |
997 | 1.50M | } |
998 | | |
999 | | #if !USE_INVERSE_FAST |
1000 | | // x = 1/x % prime if x != 0 else 0 |
1001 | | // Assumes x is normalized |
1002 | | // Assumes prime is a prime number |
1003 | | // Guarantees x is normalized and fully reduced modulo prime |
1004 | | // Assumes prime is normalized, 2**256 - 2**224 <= prime <= 2**256 |
1005 | | // The function doesn't have neither constant control flow nor constant memory |
1006 | | // access flow with regard to prime |
1007 | | static void bn_inverse_slow(bignum256 *x, const bignum256 *prime) { |
1008 | | // Uses formula 1/x % prime == x**(prime - 2) % prime |
1009 | | // See https://en.wikipedia.org/wiki/Fermat%27s_little_theorem |
1010 | | |
1011 | | bn_fast_mod(x, prime); |
1012 | | |
1013 | | // e = prime - 2 |
1014 | | bignum256 e = {0}; |
1015 | | bn_read_uint32(2, &e); |
1016 | | bn_subtract(prime, &e, &e); |
1017 | | |
1018 | | bn_power_mod(x, &e, prime, x); |
1019 | | bn_mod(x, prime); |
1020 | | |
1021 | | memzero(&e, sizeof(e)); |
1022 | | } |
1023 | | #endif |
1024 | | |
1025 | | #if false |
1026 | | // x = 1/x % prime if x != 0 else 0 |
1027 | | // Assumes x is is_normalized |
1028 | | // Assumes GCD(x, prime) = 1 |
1029 | | // Guarantees x is normalized and fully reduced modulo prime |
1030 | | // Assumes prime is odd, normalized, 2**256 - 2**224 <= prime <= 2**256 |
1031 | | // The function doesn't have neither constant control flow nor constant memory |
1032 | | // access flow with regard to prime and x |
1033 | | static void bn_inverse_fast(bignum256 *x, const bignum256 *prime) { |
1034 | | // "The Almost Montgomery Inverse" from the section 3 of "Constant Time |
1035 | | // Modular Inversion" by Joppe W. Bos |
1036 | | // See http://www.joppebos.com/files/CTInversion.pdf |
1037 | | |
1038 | | /* |
1039 | | u = prime |
1040 | | v = x & prime |
1041 | | s = 1 |
1042 | | r = 0 |
1043 | | |
1044 | | k = 0 |
1045 | | while v != 1: |
1046 | | k += 1 |
1047 | | if is_even(u): |
1048 | | u = u // 2 |
1049 | | s = 2 * s |
1050 | | elif is_even(v): |
1051 | | v = v // 2 |
1052 | | r = 2 * r |
1053 | | elif v < u: |
1054 | | u = (u - v) // 2 |
1055 | | r = r + s |
1056 | | s = 2 * s |
1057 | | else: |
1058 | | v = (v - u) // 2 |
1059 | | s = r + s |
1060 | | r = 2 * r |
1061 | | |
1062 | | s = (s / 2**k) % prime |
1063 | | return s |
1064 | | */ |
1065 | | |
1066 | | if (bn_is_zero(x)) return; |
1067 | | |
1068 | | bn_fast_mod(x, prime); |
1069 | | bn_mod(x, prime); |
1070 | | |
1071 | | bignum256 u = {0}, v = {0}, r = {0}, s = {0}; |
1072 | | bn_copy(prime, &u); |
1073 | | bn_copy(x, &v); |
1074 | | bn_one(&s); |
1075 | | bn_zero(&r); |
1076 | | |
1077 | | int k = 0; |
1078 | | while (!bn_is_one(&v)) { |
1079 | | if ((u.val[0] & 1) == 0) { |
1080 | | bn_rshift(&u); |
1081 | | bn_lshift(&s); |
1082 | | } else if ((v.val[0] & 1) == 0) { |
1083 | | bn_rshift(&v); |
1084 | | bn_lshift(&r); |
1085 | | } else if (bn_is_less(&v, &u)) { |
1086 | | bn_subtract(&u, &v, &u); |
1087 | | bn_rshift(&u); |
1088 | | bn_add(&r, &s); |
1089 | | bn_lshift(&s); |
1090 | | } else { |
1091 | | bn_subtract(&v, &u, &v); |
1092 | | bn_rshift(&v); |
1093 | | bn_add(&s, &r); |
1094 | | bn_lshift(&r); |
1095 | | } |
1096 | | k += 1; |
1097 | | assert(!bn_is_zero(&v)); // assert GCD(x, prime) == 1 |
1098 | | } |
1099 | | |
1100 | | // s = s / 2**(k // BITS_PER_LIMB * BITS_PER_LIMB) |
1101 | | for (int i = 0; i < k / BITS_PER_LIMB; i++) { |
1102 | | bn_divide_base(&s, prime); |
1103 | | } |
1104 | | |
1105 | | // s = s / 2**(k % BITS_PER_LIMB) |
1106 | | for (int i = 0; i < k % BN_BITS_PER_LIMB; i++) { |
1107 | | bn_mult_half(&s, prime); |
1108 | | } |
1109 | | |
1110 | | bn_copy(&s, x); |
1111 | | |
1112 | | memzero(&u, sizeof(u)); |
1113 | | memzero(&v, sizeof(v)); |
1114 | | memzero(&r, sizeof(r)); |
1115 | | memzero(&s, sizeof(s)); |
1116 | | } |
1117 | | #endif |
1118 | | |
1119 | | #if USE_INVERSE_FAST |
1120 | | // x = 1/x % prime if x != 0 else 0 |
1121 | | // Assumes x is is_normalized |
1122 | | // Assumes GCD(x, prime) = 1 |
1123 | | // Guarantees x is normalized and fully reduced modulo prime |
1124 | | // Assumes prime is odd, normalized, 2**256 - 2**224 <= prime <= 2**256 |
1125 | | // The function has constant control flow but not constant memory access flow |
1126 | | // with regard to prime and x |
1127 | 83.8k | static void bn_inverse_fast(bignum256 *x, const bignum256 *prime) { |
1128 | | // Custom constant time version of "The Almost Montgomery Inverse" from the |
1129 | | // section 3 of "Constant Time Modular Inversion" by Joppe W. Bos |
1130 | | // See http://www.joppebos.com/files/CTInversion.pdf |
1131 | | |
1132 | | /* |
1133 | | u = prime |
1134 | | v = x % prime |
1135 | | s = 1 |
1136 | | r = 0 |
1137 | | |
1138 | | k = 0 |
1139 | | while v != 1: |
1140 | | k += 1 |
1141 | | if is_even(u): # b1 |
1142 | | u = u // 2 |
1143 | | s = 2 * s |
1144 | | elif is_even(v): # b2 |
1145 | | v = v // 2 |
1146 | | r = 2 * r |
1147 | | elif v < u: # b3 |
1148 | | u = (u - v) // 2 |
1149 | | r = r + s |
1150 | | s = 2 * s |
1151 | | else: # b4 |
1152 | | v = (v - u) // 2 |
1153 | | s = r + s |
1154 | | r = 2 * r |
1155 | | |
1156 | | s = (s / 2**k) % prime |
1157 | | return s |
1158 | | */ |
1159 | | |
1160 | 83.8k | bn_fast_mod(x, prime); |
1161 | 83.8k | bn_mod(x, prime); |
1162 | | |
1163 | 83.8k | bignum256 u = {0}, v = {0}, r = {0}, s = {0}; |
1164 | 83.8k | bn_copy(prime, &u); |
1165 | 83.8k | bn_copy(x, &v); |
1166 | 83.8k | bn_one(&s); |
1167 | 83.8k | bn_zero(&r); |
1168 | | |
1169 | 83.8k | bignum256 zero = {0}; |
1170 | 83.8k | bn_zero(&zero); |
1171 | | |
1172 | 83.8k | int k = 0; |
1173 | | |
1174 | 83.8k | int finished = 0, u_even = 0, v_even = 0, v_less_u = 0, b1 = 0, b2 = 0, |
1175 | 83.8k | b3 = 0, b4 = 0; |
1176 | 83.8k | finished = 0; |
1177 | | |
1178 | 43.8M | for (int i = 0; i < 2 * BN_LIMBS * BN_BITS_PER_LIMB; i++) { |
1179 | 43.7M | finished = finished | -bn_is_one(&v); |
1180 | 43.7M | u_even = -bn_is_even(&u); |
1181 | 43.7M | v_even = -bn_is_even(&v); |
1182 | 43.7M | v_less_u = -bn_is_less(&v, &u); |
1183 | | |
1184 | 43.7M | b1 = ~finished & u_even; |
1185 | 43.7M | b2 = ~finished & ~b1 & v_even; |
1186 | 43.7M | b3 = ~finished & ~b1 & ~b2 & v_less_u; |
1187 | 43.7M | b4 = ~finished & ~b1 & ~b2 & ~b3; |
1188 | | |
1189 | | // The ternary operator for pointers with constant control flow |
1190 | | // BN_INVERSE_FAST_TERNARY(c, t, f) = t if c else f |
1191 | | // Very nasty hack, sorry for that |
1192 | 43.7M | #define BN_INVERSE_FAST_TERNARY(c, t, f) \ |
1193 | 306M | ((void *)(((c) & (uintptr_t)(t)) | (~(c) & (uintptr_t)(f)))) |
1194 | | |
1195 | 43.7M | bn_subtract(BN_INVERSE_FAST_TERNARY(b3, &u, &v), |
1196 | 43.7M | BN_INVERSE_FAST_TERNARY( |
1197 | 43.7M | b3 | b4, BN_INVERSE_FAST_TERNARY(b3, &v, &u), &zero), |
1198 | 43.7M | BN_INVERSE_FAST_TERNARY(b3, &u, &v)); |
1199 | | |
1200 | 43.7M | bn_add(BN_INVERSE_FAST_TERNARY(b3, &r, &s), |
1201 | 43.7M | BN_INVERSE_FAST_TERNARY(b3 | b4, BN_INVERSE_FAST_TERNARY(b3, &s, &r), |
1202 | 43.7M | &zero)); |
1203 | 43.7M | bn_rshift(BN_INVERSE_FAST_TERNARY(b1 | b3, &u, &v)); |
1204 | 43.7M | bn_lshift(BN_INVERSE_FAST_TERNARY(b1 | b3, &s, &r)); |
1205 | | |
1206 | 43.7M | k = k - ~finished; |
1207 | 43.7M | } |
1208 | | |
1209 | | // s = s / 2**(k // BITS_PER_LIMB * BITS_PER_LIMB) |
1210 | 1.59M | for (int i = 0; i < 2 * BN_LIMBS; i++) { |
1211 | | // s = s / 2**BITS_PER_LIMB % prime if i < k // BITS_PER_LIMB else s |
1212 | 1.50M | bn_copy(&s, &r); |
1213 | 1.50M | bn_divide_base(&r, prime); |
1214 | 1.50M | bn_cmov(&s, i < k / BN_BITS_PER_LIMB, &r, &s); |
1215 | 1.50M | } |
1216 | | |
1217 | | // s = s / 2**(k % BITS_PER_LIMB) |
1218 | 2.51M | for (int i = 0; i < BN_BITS_PER_LIMB; i++) { |
1219 | | // s = s / 2 % prime if i < k % BITS_PER_LIMB else s |
1220 | 2.43M | bn_copy(&s, &r); |
1221 | 2.43M | bn_mult_half(&r, prime); |
1222 | 2.43M | bn_cmov(&s, i < k % BN_BITS_PER_LIMB, &r, &s); |
1223 | 2.43M | } |
1224 | | |
1225 | 83.8k | bn_cmov(x, bn_is_zero(x), x, &s); |
1226 | | |
1227 | 83.8k | memzero(&u, sizeof(u)); |
1228 | 83.8k | memzero(&v, sizeof(v)); |
1229 | 83.8k | memzero(&r, sizeof(s)); |
1230 | 83.8k | memzero(&s, sizeof(s)); |
1231 | 83.8k | } |
1232 | | #endif |
1233 | | |
1234 | | #if false |
1235 | | // x = 1/x % prime if x != 0 else 0 |
1236 | | // Assumes x is is_normalized |
1237 | | // Assumes GCD(x, prime) = 1 |
1238 | | // Guarantees x is normalized and fully reduced modulo prime |
1239 | | // Assumes prime is odd, normalized, 2**256 - 2**224 <= prime <= 2**256 |
1240 | | static void bn_inverse_fast(bignum256 *x, const bignum256 *prime) { |
1241 | | // Custom constant time version of "The Almost Montgomery Inverse" from the |
1242 | | // section 3 of "Constant Time Modular Inversion" by Joppe W. Bos |
1243 | | // See http://www.joppebos.com/files/CTInversion.pdf |
1244 | | |
1245 | | /* |
1246 | | u = prime |
1247 | | v = x % prime |
1248 | | s = 1 |
1249 | | r = 0 |
1250 | | |
1251 | | k = 0 |
1252 | | while v != 1: |
1253 | | k += 1 |
1254 | | if is_even(u): # b1 |
1255 | | u = u // 2 |
1256 | | s = 2 * s |
1257 | | elif is_even(v): # b2 |
1258 | | v = v // 2 |
1259 | | r = 2 * r |
1260 | | elif v < u: # b3 |
1261 | | u = (u - v) // 2 |
1262 | | r = r + s |
1263 | | s = 2 * s |
1264 | | else: # b4 |
1265 | | v = (v - u) // 2 |
1266 | | s = r + s |
1267 | | r = 2 * r |
1268 | | |
1269 | | s = (s / 2**k) % prime |
1270 | | return s |
1271 | | */ |
1272 | | |
1273 | | bn_fast_mod(x, prime); |
1274 | | bn_mod(x, prime); |
1275 | | |
1276 | | bignum256 u = {0}, v = {0}, r = {0}, s = {0}; |
1277 | | bn_copy(prime, &u); |
1278 | | bn_copy(x, &v); |
1279 | | bn_one(&s); |
1280 | | bn_zero(&r); |
1281 | | |
1282 | | bignum256 zero = {0}; |
1283 | | bn_zero(&zero); |
1284 | | |
1285 | | int k = 0; |
1286 | | |
1287 | | uint32_t finished = 0, u_even = 0, v_even = 0, v_less_u = 0, b1 = 0, b2 = 0, |
1288 | | b3 = 0, b4 = 0; |
1289 | | finished = 0; |
1290 | | |
1291 | | bignum256 u_half = {0}, v_half = {0}, u_minus_v_half = {0}, v_minus_u_half = {0}, r_plus_s = {0}, r_twice = {0}, s_twice = {0}; |
1292 | | for (int i = 0; i < 2 * BN_LIMBS * BN_BITS_PER_LIMB; i++) { |
1293 | | finished = finished | bn_is_one(&v); |
1294 | | u_even = bn_is_even(&u); |
1295 | | v_even = bn_is_even(&v); |
1296 | | v_less_u = bn_is_less(&v, &u); |
1297 | | |
1298 | | b1 = (finished ^ 1) & u_even; |
1299 | | b2 = (finished ^ 1) & (b1 ^ 1) & v_even; |
1300 | | b3 = (finished ^ 1) & (b1 ^ 1) & (b2 ^ 1) & v_less_u; |
1301 | | b4 = (finished ^ 1) & (b1 ^ 1) & (b2 ^ 1) & (b3 ^ 1); |
1302 | | |
1303 | | // u_half = u // 2 |
1304 | | bn_copy(&u, &u_half); |
1305 | | bn_rshift(&u_half); |
1306 | | |
1307 | | // v_half = v // 2 |
1308 | | bn_copy(&v, &v_half); |
1309 | | bn_rshift(&v_half); |
1310 | | |
1311 | | // u_minus_v_half = (u - v) // 2 |
1312 | | bn_subtract(&u, &v, &u_minus_v_half); |
1313 | | bn_rshift(&u_minus_v_half); |
1314 | | |
1315 | | // v_minus_u_half = (v - u) // 2 |
1316 | | bn_subtract(&v, &u, &v_minus_u_half); |
1317 | | bn_rshift(&v_minus_u_half); |
1318 | | |
1319 | | // r_plus_s = r + s |
1320 | | bn_copy(&r, &r_plus_s); |
1321 | | bn_add(&r_plus_s, &s); |
1322 | | |
1323 | | // r_twice = 2 * r |
1324 | | bn_copy(&r, &r_twice); |
1325 | | bn_lshift(&r_twice); |
1326 | | |
1327 | | // s_twice = 2 * s |
1328 | | bn_copy(&s, &s_twice); |
1329 | | bn_lshift(&s_twice); |
1330 | | |
1331 | | bn_cmov(&u, b1, &u_half, &u); |
1332 | | bn_cmov(&u, b3, &u_minus_v_half, &u); |
1333 | | |
1334 | | bn_cmov(&v, b2, &v_half, &v); |
1335 | | bn_cmov(&v, b4, &v_minus_u_half, &v); |
1336 | | |
1337 | | bn_cmov(&r, b2 | b4, &r_twice, &r); |
1338 | | bn_cmov(&r, b3, &r_plus_s, &r); |
1339 | | |
1340 | | bn_cmov(&s, b1 | b3, &s_twice, &s); |
1341 | | bn_cmov(&s, b4, &r_plus_s, &s); |
1342 | | |
1343 | | k = k + (finished ^ 1); |
1344 | | } |
1345 | | |
1346 | | // s = s / 2**(k // BITS_PER_LIMB * BITS_PER_LIMB) |
1347 | | for (int i = 0; i < 2 * BN_LIMBS; i++) { |
1348 | | // s = s / 2**BITS_PER_LIMB % prime if i < k // BITS_PER_LIMB else s |
1349 | | bn_copy(&s, &r); |
1350 | | bn_divide_base(&r, prime); |
1351 | | bn_cmov(&s, i < k / BITS_PER_LIMB, &r, &s); |
1352 | | } |
1353 | | |
1354 | | // s = s / 2**(k % BITS_PER_LIMB) |
1355 | | for (int i = 0; i < BN_BITS_PER_LIMB; i++) { |
1356 | | // s = s / 2 % prime if i < k % BITS_PER_LIMB else s |
1357 | | bn_copy(&s, &r); |
1358 | | bn_mult_half(&r, prime); |
1359 | | bn_cmov(&s, i < k % BN_BITS_PER_LIMB, &r, &s); |
1360 | | } |
1361 | | |
1362 | | bn_cmov(x, bn_is_zero(x), x, &s); |
1363 | | |
1364 | | memzero(&u, sizeof(u)); |
1365 | | memzero(&v, sizeof(v)); |
1366 | | memzero(&r, sizeof(r)); |
1367 | | memzero(&s, sizeof(s)); |
1368 | | memzero(&u_half, sizeof(u_half)); |
1369 | | memzero(&v_half, sizeof(v_half)); |
1370 | | memzero(&u_minus_v_half, sizeof(u_minus_v_half)); |
1371 | | memzero(&v_minus_u_half, sizeof(v_minus_u_half)); |
1372 | | memzero(&r_twice, sizeof(r_twice)); |
1373 | | memzero(&s_twice, sizeof(s_twice)); |
1374 | | memzero(&r_plus_s, sizeof(r_plus_s)); |
1375 | | } |
1376 | | #endif |
1377 | | |
1378 | | // Normalizes x |
1379 | | // Assumes x < 2**261 == 2**(LIMBS * BITS_PER_LIMB) |
1380 | | // Guarantees x is normalized |
1381 | 0 | void bn_normalize(bignum256 *x) { |
1382 | 0 | uint32_t acc = 0; |
1383 | |
|
1384 | 0 | for (int i = 0; i < BN_LIMBS; i++) { |
1385 | 0 | acc += x->val[i]; |
1386 | | // acc doesn't overflow 32 bits |
1387 | | // Proof: |
1388 | | // acc + x[i] |
1389 | | // <= (2**(32 - BITS_PER_LIMB) - 1) + (2**BITS_PER_LIMB - 1) |
1390 | | // == 7 + 2**29 - 1 < 2**32 |
1391 | |
|
1392 | 0 | x->val[i] = acc & BN_LIMB_MASK; |
1393 | 0 | acc >>= (BN_BITS_PER_LIMB); |
1394 | | // acc <= 7 == 2**(32 - BITS_PER_LIMB) - 1 |
1395 | 0 | } |
1396 | 0 | } |
1397 | | |
1398 | | // x = x + y |
1399 | | // Assumes x, y are normalized, x + y < 2**(LIMBS*BITS_PER_LIMB) == 2**261 |
1400 | | // Guarantees x is normalized |
1401 | | // Works properly even if &x == &y |
1402 | 44.8M | void bn_add(bignum256 *x, const bignum256 *y) { |
1403 | 44.8M | uint32_t acc = 0; |
1404 | 448M | for (int i = 0; i < BN_LIMBS; i++) { |
1405 | 403M | acc += x->val[i] + y->val[i]; |
1406 | | // acc doesn't overflow 32 bits |
1407 | | // Proof: |
1408 | | // acc + x[i] + y[i] |
1409 | | // <= (2**(32 - BITS_PER_LIMB) - 1) + 2 * (2**BITS_PER_LIMB - 1) |
1410 | | // == (2**(32 - BITS_PER_LIMB) - 1) + 2**(BITS_PER_LIMB + 1) - 2 |
1411 | | // == 7 + 2**30 - 2 < 2**32 |
1412 | | |
1413 | 403M | x->val[i] = acc & BN_LIMB_MASK; |
1414 | 403M | acc >>= BN_BITS_PER_LIMB; |
1415 | | // acc <= 7 == 2**(32 - BITS_PER_LIMB) - 1 |
1416 | | |
1417 | | // acc == x[:i + 1] + y[:i + 1] >> BITS_PER_LIMB * (i + 1) |
1418 | 403M | } |
1419 | | |
1420 | | // assert(acc == 0); // assert x + y < 2**261 |
1421 | | // acc == 0 |
1422 | | // Proof: |
1423 | | // acc == x[:LIMBS] + y[:LIMBS] >> LIMBS * BITS_PER_LIMB |
1424 | | // == x + y >> LIMBS * BITS_PER_LIMB |
1425 | | // <= 2**(LIMBS * BITS_PER_LIMB) - 1 >> LIMBS * BITS_PER_LIMB == 0 |
1426 | 44.8M | } |
1427 | | |
1428 | | // x = x + y % prime |
1429 | | // Assumes x, y are normalized |
1430 | | // Guarantees x is normalized and partly reduced modulo prime |
1431 | | // Assumes prime is normalized and 2^256 - 2^224 <= prime <= 2^256 |
1432 | 65.7k | void bn_addmod(bignum256 *x, const bignum256 *y, const bignum256 *prime) { |
1433 | 657k | for (int i = 0; i < BN_LIMBS; i++) { |
1434 | 591k | x->val[i] += y->val[i]; |
1435 | | // x[i] doesn't overflow 32 bits |
1436 | | // Proof: |
1437 | | // x[i] + y[i] |
1438 | | // <= 2 * (2**BITS_PER_LIMB - 1) |
1439 | | // == 2**30 - 2 < 2**32 |
1440 | 591k | } |
1441 | | |
1442 | 65.7k | bn_fast_mod(x, prime); |
1443 | 65.7k | } |
1444 | | |
1445 | | // x = x + y |
1446 | | // Assumes x is normalized |
1447 | | // Assumes y <= 2**32 - 2**29 == 2**32 - 2**BITS_PER_LIMB and |
1448 | | // x + y < 2**261 == 2**(LIMBS * BITS_PER_LIMB) |
1449 | | // Guarantees x is normalized |
1450 | 186 | void bn_addi(bignum256 *x, uint32_t y) { |
1451 | | // assert(y <= 3758096384); // assert y <= 2**32 - 2**29 |
1452 | 186 | uint32_t acc = y; |
1453 | | |
1454 | 1.86k | for (int i = 0; i < BN_LIMBS; i++) { |
1455 | 1.67k | acc += x->val[i]; |
1456 | | // acc doesn't overflow 32 bits |
1457 | | // Proof: |
1458 | | // if i == 0: |
1459 | | // acc + x[i] == y + x[0] |
1460 | | // <= (2**32 - 2**BITS_PER_LIMB) + (2**BITS_PER_LIMB - 1) |
1461 | | // == 2**32 - 1 < 2**32 |
1462 | | // else: |
1463 | | // acc + x[i] |
1464 | | // <= (2**(32 - BITS_PER_LIMB) - 1) + (2**BITS_PER_LIMB - 1) |
1465 | | // == 7 + 2**29 - 1 < 2**32 |
1466 | | |
1467 | 1.67k | x->val[i] = acc & BN_LIMB_MASK; |
1468 | 1.67k | acc >>= (BN_BITS_PER_LIMB); |
1469 | | // acc <= 7 == 2**(32 - BITS_PER_LIMB) - 1 |
1470 | | |
1471 | | // acc == x[:i + 1] + y >> BITS_PER_LIMB * (i + 1) |
1472 | 1.67k | } |
1473 | | |
1474 | | // assert(acc == 0); // assert x + y < 2**261 |
1475 | | // acc == 0 |
1476 | | // Proof: |
1477 | | // acc == x[:LIMBS] + y << LIMBS * BITS_PER_LIMB |
1478 | | // == x + y << LIMBS * BITS_PER_LIMB |
1479 | | // <= 2**(LIMBS + BITS_PER_LIMB) - 1 << LIMBS * BITS_PER_LIMB |
1480 | | // == 0 |
1481 | 186 | } |
1482 | | |
1483 | | // x = x - y % prime |
1484 | | // Explicitly x = x + prime - y |
1485 | | // Assumes x, y are normalized |
1486 | | // Assumes y < prime[0], x + prime - y < 2**261 == 2**(LIMBS * BITS_PER_LIMB) |
1487 | | // Guarantees x is normalized |
1488 | | // If x is fully reduced modulo prime, |
1489 | | // guarantess x will be partly reduced modulo prime |
1490 | | // Assumes prime is nonzero and normalized |
1491 | 12.0k | void bn_subi(bignum256 *x, uint32_t y, const bignum256 *prime) { |
1492 | 12.0k | assert(y < prime->val[0]); |
1493 | | |
1494 | | // x = x + prime - y |
1495 | | |
1496 | 12.0k | uint32_t acc = -y; |
1497 | 120k | for (int i = 0; i < BN_LIMBS; i++) { |
1498 | 108k | acc += x->val[i] + prime->val[i]; |
1499 | | // acc neither overflows 32 bits nor underflows 0 |
1500 | | // Proof: |
1501 | | // acc + x[i] + prime[i] |
1502 | | // <= (2**(32 - BITS_PER_LIMB) - 1) + 2 * (2**BITS_PER_LIMB - 1) |
1503 | | // <= 7 + 2**30 - 2 < 2**32 |
1504 | | // acc + x[i] + prime[i] |
1505 | | // >= -y + prime[0] >= 0 |
1506 | | |
1507 | 108k | x->val[i] = acc & BN_LIMB_MASK; |
1508 | 108k | acc >>= BN_BITS_PER_LIMB; |
1509 | | // acc <= 7 == 2**(32 - BITS_PER_LIMB) - 1 |
1510 | | |
1511 | | // acc == x[:i + 1] + prime[:i + 1] - y >> BITS_PER_LIMB * (i + 1) |
1512 | 108k | } |
1513 | | |
1514 | | // assert(acc == 0); // assert x + prime - y < 2**261 |
1515 | | // acc == 0 |
1516 | | // Proof: |
1517 | | // acc == x[:LIMBS] + prime[:LIMBS] - y >> BITS_PER_LIMB * LIMBS |
1518 | | // == x + prime - y >> BITS_PER_LIMB * LIMBS |
1519 | | // <= 2**(LIMBS * BITS_PER_LIMB) - 1 >> BITS_PER_LIMB * LIMBS == 0 |
1520 | 12.0k | } |
1521 | | |
1522 | | // res = x - y % prime |
1523 | | // Explicitly res = x + (2 * prime - y) |
1524 | | // Assumes x, y are normalized, y is partly reduced |
1525 | | // Assumes x + 2 * prime - y < 2**261 == 2**(BITS_PER_LIMB * LIMBS) |
1526 | | // Guarantees res is normalized |
1527 | | // Assumes prime is nonzero and normalized |
1528 | | void bn_subtractmod(const bignum256 *x, const bignum256 *y, bignum256 *res, |
1529 | 12.5M | const bignum256 *prime) { |
1530 | | // res = x + (2 * prime - y) |
1531 | | |
1532 | 12.5M | uint32_t acc = 1; |
1533 | | |
1534 | 125M | for (int i = 0; i < BN_LIMBS; i++) { |
1535 | 112M | acc += (BN_BASE - 1) + x->val[i] + 2 * prime->val[i] - y->val[i]; |
1536 | | // acc neither overflows 32 bits nor underflows 0 |
1537 | | // Proof: |
1538 | | // acc + (BASE - 1) + x[i] + 2 * prime[i] - y[i] |
1539 | | // >= (BASE - 1) - y[i] |
1540 | | // == (2**BITS_PER_LIMB - 1) - (2**BITS_PER_LIMB - 1) == 0 |
1541 | | // acc + (BASE - 1) + x[i] + 2 * prime[i] - y[i] |
1542 | | // <= acc + (BASE - 1) + x[i] + 2 * prime[i] |
1543 | | // <= (2**(32 - BITS_PER_LIMB) - 1) + (2**BITS_PER_LIMB - 1) + |
1544 | | // (2**BITS_PER_LIMB - 1) + 2 * (2**BITS_PER_LIMB - 1) |
1545 | | // <= (2**(32 - BITS_PER_LIMB) - 1) + 4 * (2**BITS_PER_LIMB - 1) |
1546 | | // == 7 + 4 * 2**29 - 4 == 2**31 + 3 < 2**32 |
1547 | | |
1548 | 112M | res->val[i] = acc & (BN_BASE - 1); |
1549 | 112M | acc >>= BN_BITS_PER_LIMB; |
1550 | | // acc <= 7 == 2**(32 - BITS_PER_LIMB) - 1 |
1551 | | |
1552 | | // acc == 2**(BITS_PER_LIMB * (i + 1)) + x[:i+1] - y[:i+1] + 2*prime[:i+1] |
1553 | | // >> BITS_PER_LIMB * (i+1) |
1554 | 112M | } |
1555 | | |
1556 | | // assert(acc == 1); // assert x + 2 * prime - y < 2**261 |
1557 | | |
1558 | | // clang-format off |
1559 | | // acc == 1 |
1560 | | // Proof: |
1561 | | // acc == 2**(BITS_PER_LIMB * LIMBS) + x[:LIMBS] - y[:LIMBS] + 2 * prime[:LIMBS] >> BITS_PER_LIMB * LIMBS |
1562 | | // == 2**(BITS_PER_LIMB * LIMBS) + x - y + 2 * prime >> BITS_PER_LIMB * LIMBS |
1563 | | // == 2**(BITS_PER_LIMB * LIMBS) + x + (2 * prime - y) >> BITS_PER_LIMB * LIMBS |
1564 | | // <= 2**(BITS_PER_LIMB * LIMBS) + 2**(BITS_PER_LIMB * LIMBS) - 1 >> BITS_PER_LIMB * LIMBS |
1565 | | // <= 2 * 2**(BITS_PER_LIMB * LIMBS) - 1 >> BITS_PER_LIMB * LIMBS |
1566 | | // == 1 |
1567 | | |
1568 | | // acc == 2**(BITS_PER_LIMB * LIMBS) + x[:LIMBS] - y[:LIMBS] + 2 * prime[:LIMBS] >> BITS_PER_LIMB * LIMBS |
1569 | | // == 2**(BITS_PER_LIMB * LIMBS) + x - y + 2 * prime >> BITS_PER_LIMB * LIMBS |
1570 | | // == 2**(BITS_PER_LIMB * LIMBS) + x + (2 * prime - y) >> BITS_PER_LIMB * LIMBS |
1571 | | // >= 2**(BITS_PER_LIMB * LIMBS) + 0 + 1 >> BITS_PER_LIMB * LIMBS |
1572 | | // == 1 |
1573 | | // clang-format on |
1574 | 12.5M | } |
1575 | | |
1576 | | // res = x - y |
1577 | | // Assumes x, y are normalized and x >= y |
1578 | | // Guarantees res is normalized |
1579 | | // Works properly even if &x == &y or &x == &res or &y == &res or |
1580 | | // &x == &y == &res |
1581 | 44.6M | void bn_subtract(const bignum256 *x, const bignum256 *y, bignum256 *res) { |
1582 | 44.6M | uint32_t acc = 1; |
1583 | 446M | for (int i = 0; i < BN_LIMBS; i++) { |
1584 | 401M | acc += (BN_BASE - 1) + x->val[i] - y->val[i]; |
1585 | | // acc neither overflows 32 bits nor underflows 0 |
1586 | | // Proof: |
1587 | | // acc + (BASE - 1) + x[i] - y[i] |
1588 | | // >= (BASE - 1) - y == (2**BITS_PER_LIMB - 1) - (2**BITS_PER_LIMB - 1) |
1589 | | // == 0 |
1590 | | // acc + (BASE - 1) + x[i] - y[i] |
1591 | | // <= acc + (BASE - 1) + x[i] |
1592 | | // <= (2**(32 - BITS_PER_LIMB) - 1) + (2**BITS_PER_LIMB - 1) + |
1593 | | // (2**BITS_PER_LIMB - 1) |
1594 | | // == 7 + 2 * 2**29 < 2 **32 |
1595 | | |
1596 | 401M | res->val[i] = acc & BN_LIMB_MASK; |
1597 | 401M | acc >>= BN_BITS_PER_LIMB; |
1598 | | // acc <= 7 == 2**(32 - BITS_PER_LIMB) - 1 |
1599 | | |
1600 | | // acc == 2**(BITS_PER_LIMB * (i + 1)) + x[:i + 1] - y[:i + 1] |
1601 | | // >> BITS_PER_LIMB * (i + 1) |
1602 | 401M | } |
1603 | | |
1604 | | // assert(acc == 1); // assert x >= y |
1605 | | |
1606 | | // clang-format off |
1607 | | // acc == 1 |
1608 | | // Proof: |
1609 | | // acc == 2**(BITS_PER_LIMB * LIMBS) + x[:LIMBS] - y[:LIMBS] >> BITS_PER_LIMB * LIMBS |
1610 | | // == 2**(BITS_PER_LIMB * LIMBS) + x - y >> BITS_PER_LIMB * LIMBS |
1611 | | // == 2**(BITS_PER_LIMB * LIMBS) + x >> BITS_PER_LIMB * LIMBS |
1612 | | // <= 2**(BITS_PER_LIMB * LIMBS) + 2**(BITS_PER_LIMB * LIMBS) - 1 >> BITS_PER_LIMB * LIMBS |
1613 | | // <= 2 * 2**(BITS_PER_LIMB * LIMBS) - 1 >> BITS_PER_LIMB * LIMBS |
1614 | | // == 1 |
1615 | | |
1616 | | // acc == 2**(BITS_PER_LIMB * LIMBS) + x[:LIMBS] - y[:LIMBS] >> BITS_PER_LIMB * LIMBS |
1617 | | // == 2**(BITS_PER_LIMB * LIMBS) + x - y >> BITS_PER_LIMB * LIMBS |
1618 | | // >= 2**(BITS_PER_LIMB * LIMBS) >> BITS_PER_LIMB * LIMBS |
1619 | | // == 1 |
1620 | 44.6M | } |
1621 | | |
1622 | | // Returns 0 if x is zero |
1623 | | // Returns 1 if x is a square modulo prime |
1624 | | // Returns -1 if x is not a square modulo prime |
1625 | | // Assumes x is normalized, x < 2**259 |
1626 | | // Assumes prime is normalized, 2**256 - 2**224 <= prime <= 2**256 |
1627 | | // Assumes prime is a prime |
1628 | | // The function doesn't have neither constant control flow nor constant memory |
1629 | | // access flow with regard to prime |
1630 | 0 | int bn_legendre(const bignum256 *x, const bignum256 *prime) { |
1631 | | // This is a naive implementation |
1632 | | // A better implementation would be to use the Euclidean algorithm together with the quadratic reciprocity law |
1633 | | |
1634 | | // e = (prime - 1) / 2 |
1635 | 0 | bignum256 e = {0}; |
1636 | 0 | bn_copy(prime, &e); |
1637 | 0 | bn_rshift(&e); |
1638 | | |
1639 | | // res = x**e % prime |
1640 | 0 | bignum256 res = {0}; |
1641 | 0 | bn_power_mod(x, &e, prime, &res); |
1642 | 0 | bn_mod(&res, prime); |
1643 | |
|
1644 | 0 | if (bn_is_one(&res)) { |
1645 | 0 | return 1; |
1646 | 0 | } |
1647 | | |
1648 | 0 | if (bn_is_zero(&res)) { |
1649 | 0 | return 0; |
1650 | 0 | } |
1651 | | |
1652 | 0 | return -1; |
1653 | 0 | } |
1654 | | |
1655 | | // q = x // d, r = x % d |
1656 | | // Assumes x is normalized, 1 <= d <= 61304 |
1657 | | // Guarantees q is normalized |
1658 | 0 | void bn_long_division(bignum256 *x, uint32_t d, bignum256 *q, uint32_t *r) { |
1659 | 0 | assert(1 <= d && d < 61304); |
1660 | | |
1661 | 0 | uint32_t acc = 0; |
1662 | |
|
1663 | 0 | *r = x->val[BN_LIMBS - 1] % d; |
1664 | 0 | q->val[BN_LIMBS - 1] = x->val[BN_LIMBS - 1] / d; |
1665 | |
|
1666 | 0 | for (int i = BN_LIMBS - 2; i >= 0; i--) { |
1667 | 0 | acc = *r * (BN_BASE % d) + x->val[i]; |
1668 | | // acc doesn't overflow 32 bits |
1669 | | // Proof: |
1670 | | // r * (BASE % d) + x[i] |
1671 | | // <= (d - 1) * (d - 1) + (2**BITS_PER_LIMB - 1) |
1672 | | // == d**2 - 2*d + 2**BITS_PER_LIMB |
1673 | | // == 61304**2 - 2 * 61304 + 2**29 |
1674 | | // == 3758057808 + 2**29 < 2**32 |
1675 | |
|
1676 | 0 | q->val[i] = *r * (BN_BASE / d) + (acc / d); |
1677 | | // q[i] doesn't overflow 32 bits |
1678 | | // Proof: |
1679 | | // r * (BASE // d) + (acc // d) |
1680 | | // <= (d - 1) * (2**BITS_PER_LIMB / d) + |
1681 | | // ((d**2 - 2*d + 2**BITS_PER_LIMB) / d) |
1682 | | // <= (d - 1) * (2**BITS_PER_LIMB / d) + (d - 2 + 2**BITS_PER_LIMB / d) |
1683 | | // == (d - 1 + 1) * (2**BITS_PER_LIMB / d) + d - 2 |
1684 | | // == 2**BITS_PER_LIMB + d - 2 <= 2**29 + 61304 < 2**32 |
1685 | | |
1686 | | // q[i] == (r * BASE + x[i]) // d |
1687 | | // Proof: |
1688 | | // q[i] == r * (BASE // d) + (acc // d) |
1689 | | // == r * (BASE // d) + (r * (BASE % d) + x[i]) // d |
1690 | | // == (r * d * (BASE // d) + r * (BASE % d) + x[i]) // d |
1691 | | // == (r * (d * (BASE // d) + (BASE % d)) + x[i]) // d |
1692 | | // == (r * BASE + x[i]) // d |
1693 | | |
1694 | | // q[i] < 2**BITS_PER_LIMB |
1695 | | // Proof: |
1696 | | // q[i] == (r * BASE + x[i]) // d |
1697 | | // <= ((d - 1) * 2**BITS_PER_LIMB + (2**BITS_PER_LIMB - 1)) / d |
1698 | | // == (d * 2**BITS_PER_LIMB - 1) / d == 2**BITS_PER_LIMB - 1 / d |
1699 | | // < 2**BITS_PER_LIMB |
1700 | |
|
1701 | 0 | *r = acc % d; |
1702 | | // r == (r * BASE + x[i]) % d |
1703 | | // Proof: |
1704 | | // r == acc % d == (r * (BASE % d) + x[i]) % d |
1705 | | // == (r * BASE + x[i]) % d |
1706 | | |
1707 | | // x[:i] == q[:i] * d + r |
1708 | 0 | } |
1709 | 0 | } |
1710 | | |
1711 | | // x = x // 58, r = x % 58 |
1712 | | // Assumes x is normalized |
1713 | | // Guarantees x is normalized |
1714 | 0 | void bn_divmod58(bignum256 *x, uint32_t *r) { bn_long_division(x, 58, x, r); } |
1715 | | |
1716 | | // x = x // 1000, r = x % 1000 |
1717 | | // Assumes x is normalized |
1718 | | // Guarantees x is normalized |
1719 | 0 | void bn_divmod1000(bignum256 *x, uint32_t *r) { |
1720 | 0 | bn_long_division(x, 1000, x, r); |
1721 | 0 | } |
1722 | | |
1723 | | // x = x // 10, r = x % 10 |
1724 | | // Assumes x is normalized |
1725 | | // Guarantees x is normalized |
1726 | 0 | void bn_divmod10(bignum256 *x, uint32_t *r) { bn_long_division(x, 10, x, r); } |
1727 | | |
1728 | | // Formats amount |
1729 | | // Assumes amount is normalized |
1730 | | // Assumes prefix and suffix are null-terminated strings |
1731 | | // Assumes output is an array of length output_length |
1732 | | // The function doesn't have neither constant control flow nor constant memory |
1733 | | // access flow with regard to any its argument |
1734 | 0 | size_t bn_format(const bignum256 *amount, const char *prefix, const char *suffix, unsigned int decimals, int exponent, bool trailing, char thousands, char *output, size_t output_length) { |
1735 | | |
1736 | | /* |
1737 | | Python prototype of the function: |
1738 | | |
1739 | | def format(amount, prefix, suffix, decimals, exponent, trailing, thousands): |
1740 | | if exponent >= 0: |
1741 | | amount *= 10**exponent |
1742 | | else: |
1743 | | amount //= 10 ** (-exponent) |
1744 | | |
1745 | | d = pow(10, decimals) |
1746 | | |
1747 | | integer_part = amount // d |
1748 | | integer_str = f"{integer_part:,}".replace(",", thousands or "") |
1749 | | |
1750 | | if decimals: |
1751 | | decimal_part = amount % d |
1752 | | decimal_str = f".{decimal_part:0{decimals}d}" |
1753 | | if not trailing: |
1754 | | decimal_str = decimal_str.rstrip("0").rstrip(".") |
1755 | | else: |
1756 | | decimal_str = "" |
1757 | | |
1758 | | return prefix + integer_str + decimal_str + suffix |
1759 | | */ |
1760 | | |
1761 | | // Auxiliary macro for bn_format |
1762 | | // If enough space adds one character to output starting from the end |
1763 | 0 | #define BN_FORMAT_ADD_OUTPUT_CHAR(c) \ |
1764 | 0 | { \ |
1765 | 0 | --position; \ |
1766 | 0 | if (output <= position && position < output + output_length) { \ |
1767 | 0 | *position = (c); \ |
1768 | 0 | } else { \ |
1769 | 0 | memset(output, '\0', output_length); \ |
1770 | 0 | return 0; \ |
1771 | 0 | } \ |
1772 | 0 | } |
1773 | |
|
1774 | 0 | bignum256 temp = {0}; |
1775 | 0 | bn_copy(amount, &temp); |
1776 | 0 | uint32_t digit = 0; |
1777 | |
|
1778 | 0 | char *position = output + output_length; |
1779 | | |
1780 | | // Add string ending character |
1781 | 0 | BN_FORMAT_ADD_OUTPUT_CHAR('\0'); |
1782 | | |
1783 | | // Add suffix |
1784 | 0 | size_t suffix_length = suffix ? strlen(suffix) : 0; |
1785 | 0 | for (int i = suffix_length - 1; i >= 0; --i) |
1786 | 0 | BN_FORMAT_ADD_OUTPUT_CHAR(suffix[i]) |
1787 | | |
1788 | | // amount //= 10**exponent |
1789 | 0 | for (; exponent < 0; ++exponent) { |
1790 | | // if temp == 0, there is no need to divide it by 10 anymore |
1791 | 0 | if (bn_is_zero(&temp)) { |
1792 | 0 | exponent = 0; |
1793 | 0 | break; |
1794 | 0 | } |
1795 | 0 | bn_divmod10(&temp, &digit); |
1796 | 0 | } |
1797 | | |
1798 | | // exponent >= 0 && decimals >= 0 |
1799 | |
|
1800 | 0 | bool fractional_part = false; // is fractional-part of amount present |
1801 | |
|
1802 | 0 | { // Add fractional-part digits of amount |
1803 | | // Add trailing zeroes |
1804 | 0 | unsigned int trailing_zeros = decimals < (unsigned int) exponent ? decimals : (unsigned int) exponent; |
1805 | | // When casting a negative int to unsigned int, UINT_MAX is added to the int before |
1806 | | // Since exponent >= 0, the value remains unchanged |
1807 | 0 | decimals -= trailing_zeros; |
1808 | 0 | exponent -= trailing_zeros; |
1809 | |
|
1810 | 0 | if (trailing && trailing_zeros) { |
1811 | 0 | fractional_part = true; |
1812 | 0 | for (; trailing_zeros > 0; --trailing_zeros) |
1813 | 0 | BN_FORMAT_ADD_OUTPUT_CHAR('0') |
1814 | 0 | } |
1815 | | |
1816 | | // exponent == 0 || decimals == 0 |
1817 | | |
1818 | | // Add significant digits and leading zeroes |
1819 | 0 | for (; decimals > 0; --decimals) { |
1820 | 0 | bn_divmod10(&temp, &digit); |
1821 | |
|
1822 | 0 | if (fractional_part || digit || trailing) { |
1823 | 0 | fractional_part = true; |
1824 | 0 | BN_FORMAT_ADD_OUTPUT_CHAR('0' + digit) |
1825 | 0 | } |
1826 | 0 | else if (bn_is_zero(&temp)) { |
1827 | | // We break since the remaining digits are zeroes and fractional_part == trailing == false |
1828 | 0 | decimals = 0; |
1829 | 0 | break; |
1830 | 0 | } |
1831 | 0 | } |
1832 | | // decimals == 0 |
1833 | 0 | } |
1834 | | |
1835 | 0 | if (fractional_part) { |
1836 | 0 | BN_FORMAT_ADD_OUTPUT_CHAR('.') |
1837 | 0 | } |
1838 | | |
1839 | 0 | { // Add integer-part digits of amount |
1840 | | // Add trailing zeroes |
1841 | 0 | int digits = 0; |
1842 | 0 | if (!bn_is_zero(&temp)) { |
1843 | 0 | for (; exponent > 0; --exponent) { |
1844 | 0 | ++digits; |
1845 | 0 | BN_FORMAT_ADD_OUTPUT_CHAR('0') |
1846 | 0 | if (thousands != 0 && digits % 3 == 0) { |
1847 | 0 | BN_FORMAT_ADD_OUTPUT_CHAR(thousands) |
1848 | 0 | } |
1849 | 0 | } |
1850 | 0 | } |
1851 | | // decimals == 0 && exponent == 0 |
1852 | | |
1853 | | // Add significant digits |
1854 | 0 | bool is_zero = false; |
1855 | 0 | do { |
1856 | 0 | ++digits; |
1857 | 0 | bn_divmod10(&temp, &digit); |
1858 | 0 | is_zero = bn_is_zero(&temp); |
1859 | 0 | BN_FORMAT_ADD_OUTPUT_CHAR('0' + digit) |
1860 | 0 | if (thousands != 0 && !is_zero && digits % 3 == 0) { |
1861 | 0 | BN_FORMAT_ADD_OUTPUT_CHAR(thousands) |
1862 | 0 | } |
1863 | 0 | } while (!is_zero); |
1864 | 0 | } |
1865 | | |
1866 | | // Add prefix |
1867 | 0 | size_t prefix_length = prefix ? strlen(prefix) : 0; |
1868 | 0 | for (int i = prefix_length - 1; i >= 0; --i) |
1869 | 0 | BN_FORMAT_ADD_OUTPUT_CHAR(prefix[i]) |
1870 | | |
1871 | | // Move formatted amount to the start of output |
1872 | 0 | int length = output - position + output_length; |
1873 | 0 | memmove(output, position, length); |
1874 | 0 | return length - 1; |
1875 | 0 | } |
1876 | | |
1877 | | #if USE_BN_PRINT |
1878 | | // Prints x in hexadecimal |
1879 | | // Assumes x is normalized and x < 2**256 |
1880 | | void bn_print(const bignum256 *x) { |
1881 | | printf("%06x", x->val[8]); |
1882 | | printf("%08x", ((x->val[7] << 3) | (x->val[6] >> 26))); |
1883 | | printf("%07x", ((x->val[6] << 2) | (x->val[5] >> 27)) & 0x0FFFFFFF); |
1884 | | printf("%07x", ((x->val[5] << 1) | (x->val[4] >> 28)) & 0x0FFFFFFF); |
1885 | | printf("%07x", x->val[4] & 0x0FFFFFFF); |
1886 | | printf("%08x", ((x->val[3] << 3) | (x->val[2] >> 26))); |
1887 | | printf("%07x", ((x->val[2] << 2) | (x->val[1] >> 27)) & 0x0FFFFFFF); |
1888 | | printf("%07x", ((x->val[1] << 1) | (x->val[0] >> 28)) & 0x0FFFFFFF); |
1889 | | printf("%07x", x->val[0] & 0x0FFFFFFF); |
1890 | | } |
1891 | | |
1892 | | // Prints comma separated list of limbs of x |
1893 | | void bn_print_raw(const bignum256 *x) { |
1894 | | for (int i = 0; i < BN_LIMBS - 1; i++) { |
1895 | | printf("0x%08x, ", x->val[i]); |
1896 | | } |
1897 | | printf("0x%08x", x->val[BN_LIMBS - 1]); |
1898 | | } |
1899 | | #endif |
1900 | | |
1901 | | #if USE_INVERSE_FAST |
1902 | 83.8k | void bn_inverse(bignum256 *x, const bignum256 *prime) { |
1903 | 83.8k | bn_inverse_fast(x, prime); |
1904 | 83.8k | } |
1905 | | #else |
1906 | | void bn_inverse(bignum256 *x, const bignum256 *prime) { |
1907 | | bn_inverse_slow(x, prime); |
1908 | | } |
1909 | | #endif |