/src/openssl/crypto/bn/bn_mont.c
Line | Count | Source |
1 | | /* |
2 | | * Copyright 1995-2025 The OpenSSL Project Authors. All Rights Reserved. |
3 | | * |
4 | | * Licensed under the Apache License 2.0 (the "License"). You may not use |
5 | | * this file except in compliance with the License. You can obtain a copy |
6 | | * in the file LICENSE in the source distribution or at |
7 | | * https://www.openssl.org/source/license.html |
8 | | */ |
9 | | |
10 | | /* |
11 | | * Details about Montgomery multiplication algorithms can be found in |
12 | | * https://www.microsoft.com/en-us/research/wp-content/uploads/1996/01/j37acmon.pdf |
13 | | * and https://cetinkayakoc.net/docs/r01.pdf |
14 | | */ |
15 | | |
16 | | #include "internal/cryptlib.h" |
17 | | #include "bn_local.h" |
18 | | |
19 | | #define MONT_WORD /* use the faster word-based algorithm */ |
20 | | |
21 | | #ifdef MONT_WORD |
22 | | static int bn_from_montgomery_word(BIGNUM *ret, BIGNUM *r, BN_MONT_CTX *mont); |
23 | | #endif |
24 | | |
25 | | int BN_mod_mul_montgomery(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, |
26 | | BN_MONT_CTX *mont, BN_CTX *ctx) |
27 | 0 | { |
28 | 0 | int ret = bn_mul_mont_fixed_top(r, a, b, mont, ctx); |
29 | |
|
30 | 0 | bn_correct_top(r); |
31 | 0 | bn_check_top(r); |
32 | |
|
33 | 0 | return ret; |
34 | 0 | } |
35 | | |
36 | | int bn_mul_mont_fixed_top(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, |
37 | | BN_MONT_CTX *mont, BN_CTX *ctx) |
38 | 0 | { |
39 | 0 | BIGNUM *tmp; |
40 | 0 | int ret = 0; |
41 | 0 | int num = mont->N.top; |
42 | |
|
43 | 0 | #if defined(OPENSSL_BN_ASM_MONT) && defined(MONT_WORD) |
44 | 0 | if (num > 1 && num <= BN_SOFT_LIMIT && a->top == num && b->top == num) { |
45 | 0 | if (bn_wexpand(r, num) == NULL) |
46 | 0 | return 0; |
47 | 0 | if (bn_mul_mont(r->d, a->d, b->d, mont->N.d, mont->n0, num)) { |
48 | 0 | r->neg = a->neg ^ b->neg; |
49 | 0 | r->top = num; |
50 | 0 | r->flags |= BN_FLG_FIXED_TOP; |
51 | 0 | return 1; |
52 | 0 | } |
53 | 0 | } |
54 | 0 | #endif |
55 | | |
56 | 0 | if ((a->top + b->top) > 2 * num) |
57 | 0 | return 0; |
58 | | |
59 | 0 | BN_CTX_start(ctx); |
60 | 0 | tmp = BN_CTX_get(ctx); |
61 | 0 | if (tmp == NULL) |
62 | 0 | goto err; |
63 | | |
64 | 0 | bn_check_top(tmp); |
65 | 0 | if (a == b) { |
66 | 0 | if (!bn_sqr_fixed_top(tmp, a, ctx)) |
67 | 0 | goto err; |
68 | 0 | } else { |
69 | 0 | if (!bn_mul_fixed_top(tmp, a, b, ctx)) |
70 | 0 | goto err; |
71 | 0 | } |
72 | | /* reduce from aRR to aR */ |
73 | 0 | #ifdef MONT_WORD |
74 | 0 | if (!bn_from_montgomery_word(r, tmp, mont)) |
75 | 0 | goto err; |
76 | | #else |
77 | | if (!BN_from_montgomery(r, tmp, mont, ctx)) |
78 | | goto err; |
79 | | #endif |
80 | 0 | ret = 1; |
81 | 0 | err: |
82 | 0 | BN_CTX_end(ctx); |
83 | 0 | return ret; |
84 | 0 | } |
85 | | |
86 | | #ifdef MONT_WORD |
87 | | static int bn_from_montgomery_word(BIGNUM *ret, BIGNUM *r, BN_MONT_CTX *mont) |
88 | 0 | { |
89 | 0 | BIGNUM *n; |
90 | 0 | BN_ULONG *ap, *np, *rp, n0, v, carry; |
91 | 0 | int nl, max, i; |
92 | 0 | unsigned int rtop; |
93 | |
|
94 | 0 | n = &(mont->N); |
95 | 0 | nl = n->top; |
96 | 0 | if (nl == 0) { |
97 | 0 | ret->top = 0; |
98 | 0 | return 1; |
99 | 0 | } |
100 | | |
101 | 0 | max = (2 * nl); /* carry is stored separately */ |
102 | 0 | if (bn_wexpand(r, max) == NULL) |
103 | 0 | return 0; |
104 | | |
105 | 0 | r->neg ^= n->neg; |
106 | 0 | np = n->d; |
107 | 0 | rp = r->d; |
108 | | |
109 | | /* clear the top words of T */ |
110 | 0 | for (rtop = r->top, i = 0; i < max; i++) { |
111 | 0 | v = (BN_ULONG)0 - ((i - rtop) >> (8 * sizeof(rtop) - 1)); |
112 | 0 | rp[i] &= v; |
113 | 0 | } |
114 | |
|
115 | 0 | r->top = max; |
116 | 0 | r->flags |= BN_FLG_FIXED_TOP; |
117 | 0 | n0 = mont->n0[0]; |
118 | | |
119 | | /* |
120 | | * Add multiples of |n| to |r| until R = 2^(nl * BN_BITS2) divides it. On |
121 | | * input, we had |r| < |n| * R, so now |r| < 2 * |n| * R. Note that |r| |
122 | | * includes |carry| which is stored separately. |
123 | | */ |
124 | 0 | for (carry = 0, i = 0; i < nl; i++, rp++) { |
125 | 0 | v = bn_mul_add_words(rp, np, nl, (rp[0] * n0) & BN_MASK2); |
126 | 0 | v = (v + carry + rp[nl]) & BN_MASK2; |
127 | 0 | carry |= (v != rp[nl]); |
128 | 0 | carry &= (v <= rp[nl]); |
129 | 0 | rp[nl] = v; |
130 | 0 | } |
131 | |
|
132 | 0 | if (bn_wexpand(ret, nl) == NULL) |
133 | 0 | return 0; |
134 | 0 | ret->top = nl; |
135 | 0 | ret->flags |= BN_FLG_FIXED_TOP; |
136 | 0 | ret->neg = r->neg; |
137 | |
|
138 | 0 | rp = ret->d; |
139 | | |
140 | | /* |
141 | | * Shift |nl| words to divide by R. We have |ap| < 2 * |n|. Note that |ap| |
142 | | * includes |carry| which is stored separately. |
143 | | */ |
144 | 0 | ap = &(r->d[nl]); |
145 | |
|
146 | 0 | carry -= bn_sub_words(rp, ap, np, nl); |
147 | | /* |
148 | | * |carry| is -1 if |ap| - |np| underflowed or zero if it did not. Note |
149 | | * |carry| cannot be 1. That would imply the subtraction did not fit in |
150 | | * |nl| words, and we know at most one subtraction is needed. |
151 | | */ |
152 | 0 | for (i = 0; i < nl; i++) { |
153 | 0 | rp[i] = (carry & ap[i]) | (~carry & rp[i]); |
154 | 0 | ap[i] = 0; |
155 | 0 | } |
156 | |
|
157 | 0 | return 1; |
158 | 0 | } |
159 | | #endif /* MONT_WORD */ |
160 | | |
161 | | int BN_from_montgomery(BIGNUM *ret, const BIGNUM *a, BN_MONT_CTX *mont, |
162 | | BN_CTX *ctx) |
163 | 0 | { |
164 | 0 | int retn; |
165 | |
|
166 | 0 | retn = bn_from_mont_fixed_top(ret, a, mont, ctx); |
167 | 0 | bn_correct_top(ret); |
168 | 0 | bn_check_top(ret); |
169 | |
|
170 | 0 | return retn; |
171 | 0 | } |
172 | | |
173 | | int bn_from_mont_fixed_top(BIGNUM *ret, const BIGNUM *a, BN_MONT_CTX *mont, |
174 | | BN_CTX *ctx) |
175 | 0 | { |
176 | 0 | int retn = 0; |
177 | 0 | #ifdef MONT_WORD |
178 | 0 | BIGNUM *t; |
179 | |
|
180 | 0 | BN_CTX_start(ctx); |
181 | 0 | if ((t = BN_CTX_get(ctx)) && BN_copy(t, a)) { |
182 | 0 | retn = bn_from_montgomery_word(ret, t, mont); |
183 | 0 | } |
184 | 0 | BN_CTX_end(ctx); |
185 | | #else /* !MONT_WORD */ |
186 | | BIGNUM *t1, *t2; |
187 | | |
188 | | BN_CTX_start(ctx); |
189 | | t1 = BN_CTX_get(ctx); |
190 | | t2 = BN_CTX_get(ctx); |
191 | | if (t2 == NULL) |
192 | | goto err; |
193 | | |
194 | | if (BN_copy(t1, a) == NULL) |
195 | | goto err; |
196 | | BN_mask_bits(t1, mont->ri); |
197 | | |
198 | | if (!BN_mul(t2, t1, &mont->Ni, ctx)) |
199 | | goto err; |
200 | | BN_mask_bits(t2, mont->ri); |
201 | | |
202 | | if (!BN_mul(t1, t2, &mont->N, ctx)) |
203 | | goto err; |
204 | | if (!BN_add(t2, a, t1)) |
205 | | goto err; |
206 | | if (!BN_rshift(ret, t2, mont->ri)) |
207 | | goto err; |
208 | | |
209 | | if (BN_ucmp(ret, &(mont->N)) >= 0) { |
210 | | if (!BN_usub(ret, ret, &(mont->N))) |
211 | | goto err; |
212 | | } |
213 | | retn = 1; |
214 | | bn_check_top(ret); |
215 | | err: |
216 | | BN_CTX_end(ctx); |
217 | | #endif /* MONT_WORD */ |
218 | 0 | return retn; |
219 | 0 | } |
220 | | |
221 | | int bn_to_mont_fixed_top(BIGNUM *r, const BIGNUM *a, BN_MONT_CTX *mont, |
222 | | BN_CTX *ctx) |
223 | 0 | { |
224 | 0 | return bn_mul_mont_fixed_top(r, a, &(mont->RR), mont, ctx); |
225 | 0 | } |
226 | | |
227 | | BN_MONT_CTX *BN_MONT_CTX_new(void) |
228 | 0 | { |
229 | 0 | BN_MONT_CTX *ret; |
230 | |
|
231 | 0 | if ((ret = OPENSSL_malloc(sizeof(*ret))) == NULL) |
232 | 0 | return NULL; |
233 | | |
234 | 0 | BN_MONT_CTX_init(ret); |
235 | 0 | ret->flags = BN_FLG_MALLOCED; |
236 | 0 | return ret; |
237 | 0 | } |
238 | | |
239 | | void BN_MONT_CTX_init(BN_MONT_CTX *ctx) |
240 | 0 | { |
241 | 0 | ctx->ri = 0; |
242 | 0 | bn_init(&ctx->RR); |
243 | 0 | bn_init(&ctx->N); |
244 | 0 | bn_init(&ctx->Ni); |
245 | 0 | ctx->n0[0] = ctx->n0[1] = 0; |
246 | 0 | ctx->flags = 0; |
247 | 0 | } |
248 | | |
249 | | void BN_MONT_CTX_free(BN_MONT_CTX *mont) |
250 | 152k | { |
251 | 152k | if (mont == NULL) |
252 | 152k | return; |
253 | 0 | BN_clear_free(&mont->RR); |
254 | 0 | BN_clear_free(&mont->N); |
255 | 0 | BN_clear_free(&mont->Ni); |
256 | 0 | if (mont->flags & BN_FLG_MALLOCED) |
257 | 0 | OPENSSL_free(mont); |
258 | 0 | } |
259 | | |
260 | | int BN_MONT_CTX_set(BN_MONT_CTX *mont, const BIGNUM *mod, BN_CTX *ctx) |
261 | 0 | { |
262 | 0 | int i, ret = 0; |
263 | 0 | BIGNUM *Ri, *R; |
264 | |
|
265 | 0 | if (BN_is_zero(mod)) |
266 | 0 | return 0; |
267 | | |
268 | 0 | BN_CTX_start(ctx); |
269 | 0 | if ((Ri = BN_CTX_get(ctx)) == NULL) |
270 | 0 | goto err; |
271 | 0 | R = &(mont->RR); /* grab RR as a temp */ |
272 | 0 | if (BN_copy(&(mont->N), mod) == NULL) |
273 | 0 | goto err; /* Set N */ |
274 | 0 | if (BN_get_flags(mod, BN_FLG_CONSTTIME) != 0) |
275 | 0 | BN_set_flags(&(mont->N), BN_FLG_CONSTTIME); |
276 | 0 | mont->N.neg = 0; |
277 | |
|
278 | 0 | #ifdef MONT_WORD |
279 | 0 | { |
280 | 0 | BIGNUM tmod; |
281 | 0 | BN_ULONG buf[2]; |
282 | |
|
283 | 0 | bn_init(&tmod); |
284 | 0 | tmod.d = buf; |
285 | 0 | tmod.dmax = 2; |
286 | 0 | tmod.neg = 0; |
287 | |
|
288 | 0 | if (BN_get_flags(mod, BN_FLG_CONSTTIME) != 0) |
289 | 0 | BN_set_flags(&tmod, BN_FLG_CONSTTIME); |
290 | |
|
291 | 0 | mont->ri = (BN_num_bits(mod) + (BN_BITS2 - 1)) / BN_BITS2 * BN_BITS2; |
292 | |
|
293 | | #if defined(OPENSSL_BN_ASM_MONT) && (BN_BITS2 <= 32) |
294 | | /* |
295 | | * Only certain BN_BITS2<=32 platforms actually make use of n0[1], |
296 | | * and we could use the #else case (with a shorter R value) for the |
297 | | * others. However, currently only the assembler files do know which |
298 | | * is which. |
299 | | */ |
300 | | |
301 | | BN_zero(R); |
302 | | if (!(BN_set_bit(R, 2 * BN_BITS2))) |
303 | | goto err; |
304 | | |
305 | | tmod.top = 0; |
306 | | if ((buf[0] = mod->d[0])) |
307 | | tmod.top = 1; |
308 | | if ((buf[1] = mod->top > 1 ? mod->d[1] : 0)) |
309 | | tmod.top = 2; |
310 | | |
311 | | if (BN_is_one(&tmod)) |
312 | | BN_zero(Ri); |
313 | | else if ((BN_mod_inverse(Ri, R, &tmod, ctx)) == NULL) |
314 | | goto err; |
315 | | if (!BN_lshift(Ri, Ri, 2 * BN_BITS2)) |
316 | | goto err; /* R*Ri */ |
317 | | if (!BN_is_zero(Ri)) { |
318 | | if (!BN_sub_word(Ri, 1)) |
319 | | goto err; |
320 | | } else { /* if N mod word size == 1 */ |
321 | | |
322 | | if (bn_expand(Ri, (int)sizeof(BN_ULONG) * 2) == NULL) |
323 | | goto err; |
324 | | /* Ri-- (mod double word size) */ |
325 | | Ri->neg = 0; |
326 | | Ri->d[0] = BN_MASK2; |
327 | | Ri->d[1] = BN_MASK2; |
328 | | Ri->top = 2; |
329 | | } |
330 | | if (!BN_div(Ri, NULL, Ri, &tmod, ctx)) |
331 | | goto err; |
332 | | /* |
333 | | * Ni = (R*Ri-1)/N, keep only couple of least significant words: |
334 | | */ |
335 | | mont->n0[0] = (Ri->top > 0) ? Ri->d[0] : 0; |
336 | | mont->n0[1] = (Ri->top > 1) ? Ri->d[1] : 0; |
337 | | #else |
338 | 0 | BN_zero(R); |
339 | 0 | if (!(BN_set_bit(R, BN_BITS2))) |
340 | 0 | goto err; /* R */ |
341 | | |
342 | 0 | buf[0] = mod->d[0]; /* tmod = N mod word size */ |
343 | 0 | buf[1] = 0; |
344 | 0 | tmod.top = buf[0] != 0 ? 1 : 0; |
345 | | /* Ri = R^-1 mod N */ |
346 | 0 | if (BN_is_one(&tmod)) |
347 | 0 | BN_zero(Ri); |
348 | 0 | else if ((BN_mod_inverse(Ri, R, &tmod, ctx)) == NULL) |
349 | 0 | goto err; |
350 | 0 | if (!BN_lshift(Ri, Ri, BN_BITS2)) |
351 | 0 | goto err; /* R*Ri */ |
352 | 0 | if (!BN_is_zero(Ri)) { |
353 | 0 | if (!BN_sub_word(Ri, 1)) |
354 | 0 | goto err; |
355 | 0 | } else { /* if N mod word size == 1 */ |
356 | |
|
357 | 0 | if (!BN_set_word(Ri, BN_MASK2)) |
358 | 0 | goto err; /* Ri-- (mod word size) */ |
359 | 0 | } |
360 | 0 | if (!BN_div(Ri, NULL, Ri, &tmod, ctx)) |
361 | 0 | goto err; |
362 | | /* |
363 | | * Ni = (R*Ri-1)/N, keep only least significant word: |
364 | | */ |
365 | 0 | mont->n0[0] = (Ri->top > 0) ? Ri->d[0] : 0; |
366 | 0 | mont->n0[1] = 0; |
367 | 0 | #endif |
368 | 0 | } |
369 | | #else /* !MONT_WORD */ |
370 | | { /* bignum version */ |
371 | | mont->ri = BN_num_bits(&mont->N); |
372 | | BN_zero(R); |
373 | | if (!BN_set_bit(R, mont->ri)) |
374 | | goto err; /* R = 2^ri */ |
375 | | /* Ri = R^-1 mod N */ |
376 | | if ((BN_mod_inverse(Ri, R, &mont->N, ctx)) == NULL) |
377 | | goto err; |
378 | | if (!BN_lshift(Ri, Ri, mont->ri)) |
379 | | goto err; /* R*Ri */ |
380 | | if (!BN_sub_word(Ri, 1)) |
381 | | goto err; |
382 | | /* |
383 | | * Ni = (R*Ri-1) / N |
384 | | */ |
385 | | if (!BN_div(&(mont->Ni), NULL, Ri, &mont->N, ctx)) |
386 | | goto err; |
387 | | } |
388 | | #endif |
389 | | |
390 | | /* setup RR for conversions */ |
391 | 0 | BN_zero(&(mont->RR)); |
392 | 0 | if (!BN_set_bit(&(mont->RR), mont->ri * 2)) |
393 | 0 | goto err; |
394 | 0 | if (!BN_mod(&(mont->RR), &(mont->RR), &(mont->N), ctx)) |
395 | 0 | goto err; |
396 | | |
397 | 0 | for (i = mont->RR.top, ret = mont->N.top; i < ret; i++) |
398 | 0 | mont->RR.d[i] = 0; |
399 | 0 | mont->RR.top = ret; |
400 | 0 | mont->RR.flags |= BN_FLG_FIXED_TOP; |
401 | |
|
402 | 0 | ret = 1; |
403 | 0 | err: |
404 | 0 | BN_CTX_end(ctx); |
405 | 0 | return ret; |
406 | 0 | } |
407 | | |
408 | | BN_MONT_CTX *BN_MONT_CTX_copy(BN_MONT_CTX *to, BN_MONT_CTX *from) |
409 | 0 | { |
410 | 0 | if (to == from) |
411 | 0 | return to; |
412 | | |
413 | 0 | if (BN_copy(&(to->RR), &(from->RR)) == NULL) |
414 | 0 | return NULL; |
415 | 0 | if (BN_copy(&(to->N), &(from->N)) == NULL) |
416 | 0 | return NULL; |
417 | 0 | if (BN_copy(&(to->Ni), &(from->Ni)) == NULL) |
418 | 0 | return NULL; |
419 | 0 | to->ri = from->ri; |
420 | 0 | to->n0[0] = from->n0[0]; |
421 | 0 | to->n0[1] = from->n0[1]; |
422 | 0 | return to; |
423 | 0 | } |
424 | | |
425 | | BN_MONT_CTX *BN_MONT_CTX_set_locked(BN_MONT_CTX **pmont, CRYPTO_RWLOCK *lock, |
426 | | const BIGNUM *mod, BN_CTX *ctx) |
427 | 0 | { |
428 | 0 | BN_MONT_CTX *ret; |
429 | |
|
430 | 0 | if (!CRYPTO_THREAD_read_lock(lock)) |
431 | 0 | return NULL; |
432 | 0 | ret = *pmont; |
433 | 0 | CRYPTO_THREAD_unlock(lock); |
434 | 0 | if (ret) |
435 | 0 | return ret; |
436 | | |
437 | | /* |
438 | | * We don't want to serialize globally while doing our lazy-init math in |
439 | | * BN_MONT_CTX_set. That punishes threads that are doing independent |
440 | | * things. Instead, punish the case where more than one thread tries to |
441 | | * lazy-init the same 'pmont', by having each do the lazy-init math work |
442 | | * independently and only use the one from the thread that wins the race |
443 | | * (the losers throw away the work they've done). |
444 | | */ |
445 | 0 | ret = BN_MONT_CTX_new(); |
446 | 0 | if (ret == NULL) |
447 | 0 | return NULL; |
448 | 0 | if (!BN_MONT_CTX_set(ret, mod, ctx)) { |
449 | 0 | BN_MONT_CTX_free(ret); |
450 | 0 | return NULL; |
451 | 0 | } |
452 | | |
453 | | /* The locked compare-and-set, after the local work is done. */ |
454 | 0 | if (!CRYPTO_THREAD_write_lock(lock)) { |
455 | 0 | BN_MONT_CTX_free(ret); |
456 | 0 | return NULL; |
457 | 0 | } |
458 | | |
459 | 0 | if (*pmont) { |
460 | 0 | BN_MONT_CTX_free(ret); |
461 | 0 | ret = *pmont; |
462 | 0 | } else |
463 | 0 | *pmont = ret; |
464 | 0 | CRYPTO_THREAD_unlock(lock); |
465 | 0 | return ret; |
466 | 0 | } |
467 | | |
468 | | int ossl_bn_mont_ctx_set(BN_MONT_CTX *ctx, const BIGNUM *modulus, int ri, const unsigned char *rr, |
469 | | int rrlen, uint32_t nlo, uint32_t nhi) |
470 | 0 | { |
471 | 0 | if (BN_copy(&ctx->N, modulus) == NULL) |
472 | 0 | return 0; |
473 | 0 | if (BN_bin2bn(rr, rrlen, &ctx->RR) == NULL) |
474 | 0 | return 0; |
475 | 0 | ctx->ri = ri; |
476 | | #if (BN_BITS2 <= 32) && defined(OPENSSL_BN_ASM_MONT) |
477 | | ctx->n0[0] = nlo; |
478 | | ctx->n0[1] = nhi; |
479 | | #elif BN_BITS2 <= 32 |
480 | | ctx->n0[0] = nlo; |
481 | | ctx->n0[1] = 0; |
482 | | #else |
483 | 0 | ctx->n0[0] = ((BN_ULONG)nhi << 32) | nlo; |
484 | 0 | ctx->n0[1] = 0; |
485 | 0 | #endif |
486 | |
|
487 | 0 | return 1; |
488 | 0 | } |
489 | | |
490 | | int ossl_bn_mont_ctx_eq(const BN_MONT_CTX *m1, const BN_MONT_CTX *m2) |
491 | 0 | { |
492 | 0 | if (m1->ri != m2->ri) |
493 | 0 | return 0; |
494 | 0 | if (BN_cmp(&m1->RR, &m2->RR) != 0) |
495 | 0 | return 0; |
496 | 0 | if (m1->flags != m2->flags) |
497 | 0 | return 0; |
498 | 0 | #ifdef MONT_WORD |
499 | 0 | if (m1->n0[0] != m2->n0[0]) |
500 | 0 | return 0; |
501 | 0 | if (m1->n0[1] != m2->n0[1]) |
502 | 0 | return 0; |
503 | | #else |
504 | | if (BN_cmp(&m1->Ni, &m2->Ni) != 0) |
505 | | return 0; |
506 | | #endif |
507 | 0 | return 1; |
508 | 0 | } |