/src/libressl/crypto/bn/bn_exp.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* $OpenBSD: bn_exp.c,v 1.32 2022/04/20 13:32:34 tb Exp $ */ |
2 | | /* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) |
3 | | * All rights reserved. |
4 | | * |
5 | | * This package is an SSL implementation written |
6 | | * by Eric Young (eay@cryptsoft.com). |
7 | | * The implementation was written so as to conform with Netscapes SSL. |
8 | | * |
9 | | * This library is free for commercial and non-commercial use as long as |
10 | | * the following conditions are aheared to. The following conditions |
11 | | * apply to all code found in this distribution, be it the RC4, RSA, |
12 | | * lhash, DES, etc., code; not just the SSL code. The SSL documentation |
13 | | * included with this distribution is covered by the same copyright terms |
14 | | * except that the holder is Tim Hudson (tjh@cryptsoft.com). |
15 | | * |
16 | | * Copyright remains Eric Young's, and as such any Copyright notices in |
17 | | * the code are not to be removed. |
18 | | * If this package is used in a product, Eric Young should be given attribution |
19 | | * as the author of the parts of the library used. |
20 | | * This can be in the form of a textual message at program startup or |
21 | | * in documentation (online or textual) provided with the package. |
22 | | * |
23 | | * Redistribution and use in source and binary forms, with or without |
24 | | * modification, are permitted provided that the following conditions |
25 | | * are met: |
26 | | * 1. Redistributions of source code must retain the copyright |
27 | | * notice, this list of conditions and the following disclaimer. |
28 | | * 2. Redistributions in binary form must reproduce the above copyright |
29 | | * notice, this list of conditions and the following disclaimer in the |
30 | | * documentation and/or other materials provided with the distribution. |
31 | | * 3. All advertising materials mentioning features or use of this software |
32 | | * must display the following acknowledgement: |
33 | | * "This product includes cryptographic software written by |
34 | | * Eric Young (eay@cryptsoft.com)" |
35 | | * The word 'cryptographic' can be left out if the rouines from the library |
36 | | * being used are not cryptographic related :-). |
37 | | * 4. If you include any Windows specific code (or a derivative thereof) from |
38 | | * the apps directory (application code) you must include an acknowledgement: |
39 | | * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)" |
40 | | * |
41 | | * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND |
42 | | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
43 | | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
44 | | * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE |
45 | | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
46 | | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS |
47 | | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
48 | | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
49 | | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
50 | | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
51 | | * SUCH DAMAGE. |
52 | | * |
53 | | * The licence and distribution terms for any publically available version or |
54 | | * derivative of this code cannot be changed. i.e. this code cannot simply be |
55 | | * copied and put under another distribution licence |
56 | | * [including the GNU Public Licence.] |
57 | | */ |
58 | | /* ==================================================================== |
59 | | * Copyright (c) 1998-2005 The OpenSSL Project. All rights reserved. |
60 | | * |
61 | | * Redistribution and use in source and binary forms, with or without |
62 | | * modification, are permitted provided that the following conditions |
63 | | * are met: |
64 | | * |
65 | | * 1. Redistributions of source code must retain the above copyright |
66 | | * notice, this list of conditions and the following disclaimer. |
67 | | * |
68 | | * 2. Redistributions in binary form must reproduce the above copyright |
69 | | * notice, this list of conditions and the following disclaimer in |
70 | | * the documentation and/or other materials provided with the |
71 | | * distribution. |
72 | | * |
73 | | * 3. All advertising materials mentioning features or use of this |
74 | | * software must display the following acknowledgment: |
75 | | * "This product includes software developed by the OpenSSL Project |
76 | | * for use in the OpenSSL Toolkit. (http://www.openssl.org/)" |
77 | | * |
78 | | * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to |
79 | | * endorse or promote products derived from this software without |
80 | | * prior written permission. For written permission, please contact |
81 | | * openssl-core@openssl.org. |
82 | | * |
83 | | * 5. Products derived from this software may not be called "OpenSSL" |
84 | | * nor may "OpenSSL" appear in their names without prior written |
85 | | * permission of the OpenSSL Project. |
86 | | * |
87 | | * 6. Redistributions of any form whatsoever must retain the following |
88 | | * acknowledgment: |
89 | | * "This product includes software developed by the OpenSSL Project |
90 | | * for use in the OpenSSL Toolkit (http://www.openssl.org/)" |
91 | | * |
92 | | * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY |
93 | | * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
94 | | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR |
95 | | * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR |
96 | | * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
97 | | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT |
98 | | * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; |
99 | | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
100 | | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, |
101 | | * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
102 | | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED |
103 | | * OF THE POSSIBILITY OF SUCH DAMAGE. |
104 | | * ==================================================================== |
105 | | * |
106 | | * This product includes cryptographic software written by Eric Young |
107 | | * (eay@cryptsoft.com). This product includes software written by Tim |
108 | | * Hudson (tjh@cryptsoft.com). |
109 | | * |
110 | | */ |
111 | | |
112 | | #include <stdlib.h> |
113 | | #include <string.h> |
114 | | |
115 | | #include <openssl/err.h> |
116 | | |
117 | | #include "bn_lcl.h" |
118 | | #include "constant_time_locl.h" |
119 | | |
120 | | /* maximum precomputation table size for *variable* sliding windows */ |
121 | | #define TABLE_SIZE 32 |
122 | | |
123 | | /* this one works - simple but works */ |
124 | | int |
125 | | BN_exp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx) |
126 | 80 | { |
127 | 80 | int i, bits, ret = 0; |
128 | 80 | BIGNUM *v, *rr; |
129 | | |
130 | 80 | if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0) { |
131 | | /* BN_FLG_CONSTTIME only supported by BN_mod_exp_mont() */ |
132 | 0 | BNerror(ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED); |
133 | 0 | return -1; |
134 | 0 | } |
135 | | |
136 | 80 | BN_CTX_start(ctx); |
137 | 80 | if ((r == a) || (r == p)) |
138 | 0 | rr = BN_CTX_get(ctx); |
139 | 80 | else |
140 | 80 | rr = r; |
141 | 80 | v = BN_CTX_get(ctx); |
142 | 80 | if (rr == NULL || v == NULL) |
143 | 0 | goto err; |
144 | | |
145 | 80 | if (BN_copy(v, a) == NULL) |
146 | 0 | goto err; |
147 | 80 | bits = BN_num_bits(p); |
148 | | |
149 | 80 | if (BN_is_odd(p)) { |
150 | 26 | if (BN_copy(rr, a) == NULL) |
151 | 0 | goto err; |
152 | 54 | } else { |
153 | 54 | if (!BN_one(rr)) |
154 | 0 | goto err; |
155 | 54 | } |
156 | | |
157 | 490 | for (i = 1; i < bits; i++) { |
158 | 410 | if (!BN_sqr(v, v, ctx)) |
159 | 0 | goto err; |
160 | 410 | if (BN_is_bit_set(p, i)) { |
161 | 237 | if (!BN_mul(rr, rr, v, ctx)) |
162 | 0 | goto err; |
163 | 237 | } |
164 | 410 | } |
165 | 80 | ret = 1; |
166 | | |
167 | 80 | err: |
168 | 80 | if (r != rr && rr != NULL) |
169 | 0 | BN_copy(r, rr); |
170 | 80 | BN_CTX_end(ctx); |
171 | 80 | bn_check_top(r); |
172 | 80 | return (ret); |
173 | 80 | } |
174 | | |
175 | | static int |
176 | | BN_mod_exp_internal(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m, |
177 | | BN_CTX *ctx, int ct) |
178 | 396 | { |
179 | 396 | int ret; |
180 | | |
181 | 396 | bn_check_top(a); |
182 | 396 | bn_check_top(p); |
183 | 396 | bn_check_top(m); |
184 | | |
185 | | /* For even modulus m = 2^k*m_odd, it might make sense to compute |
186 | | * a^p mod m_odd and a^p mod 2^k separately (with Montgomery |
187 | | * exponentiation for the odd part), using appropriate exponent |
188 | | * reductions, and combine the results using the CRT. |
189 | | * |
190 | | * For now, we use Montgomery only if the modulus is odd; otherwise, |
191 | | * exponentiation using the reciprocal-based quick remaindering |
192 | | * algorithm is used. |
193 | | * |
194 | | * (Timing obtained with expspeed.c [computations a^p mod m |
195 | | * where a, p, m are of the same length: 256, 512, 1024, 2048, |
196 | | * 4096, 8192 bits], compared to the running time of the |
197 | | * standard algorithm: |
198 | | * |
199 | | * BN_mod_exp_mont 33 .. 40 % [AMD K6-2, Linux, debug configuration] |
200 | | * 55 .. 77 % [UltraSparc processor, but |
201 | | * debug-solaris-sparcv8-gcc conf.] |
202 | | * |
203 | | * BN_mod_exp_recp 50 .. 70 % [AMD K6-2, Linux, debug configuration] |
204 | | * 62 .. 118 % [UltraSparc, debug-solaris-sparcv8-gcc] |
205 | | * |
206 | | * On the Sparc, BN_mod_exp_recp was faster than BN_mod_exp_mont |
207 | | * at 2048 and more bits, but at 512 and 1024 bits, it was |
208 | | * slower even than the standard algorithm! |
209 | | * |
210 | | * "Real" timings [linux-elf, solaris-sparcv9-gcc configurations] |
211 | | * should be obtained when the new Montgomery reduction code |
212 | | * has been integrated into OpenSSL.) |
213 | | */ |
214 | | |
215 | 396 | if (BN_is_odd(m)) { |
216 | 319 | if (a->top == 1 && !a->neg && !ct) { |
217 | 72 | BN_ULONG A = a->d[0]; |
218 | 72 | ret = BN_mod_exp_mont_word(r, A,p, m,ctx, NULL); |
219 | 72 | } else |
220 | 247 | ret = BN_mod_exp_mont_ct(r, a,p, m,ctx, NULL); |
221 | 319 | } else { |
222 | 77 | ret = BN_mod_exp_recp(r, a,p, m, ctx); |
223 | 77 | } |
224 | | |
225 | 396 | bn_check_top(r); |
226 | 396 | return (ret); |
227 | 396 | } |
228 | | |
229 | | int |
230 | | BN_mod_exp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m, |
231 | | BN_CTX *ctx) |
232 | 167 | { |
233 | 167 | return BN_mod_exp_internal(r, a, p, m, ctx, |
234 | 167 | (BN_get_flags(p, BN_FLG_CONSTTIME) != 0)); |
235 | 167 | } |
236 | | |
237 | | int |
238 | | BN_mod_exp_ct(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m, |
239 | | BN_CTX *ctx) |
240 | 229 | { |
241 | 229 | return BN_mod_exp_internal(r, a, p, m, ctx, 1); |
242 | 229 | } |
243 | | |
244 | | |
245 | | int |
246 | | BN_mod_exp_nonct(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m, |
247 | | BN_CTX *ctx) |
248 | 0 | { |
249 | 0 | return BN_mod_exp_internal(r, a, p, m, ctx, 0); |
250 | 0 | } |
251 | | |
252 | | |
253 | | int |
254 | | BN_mod_exp_recp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m, |
255 | | BN_CTX *ctx) |
256 | 77 | { |
257 | 77 | int i, j, bits, ret = 0, wstart, wend, window, wvalue; |
258 | 77 | int start = 1; |
259 | 77 | BIGNUM *aa; |
260 | | /* Table of variables obtained from 'ctx' */ |
261 | 77 | BIGNUM *val[TABLE_SIZE]; |
262 | 77 | BN_RECP_CTX recp; |
263 | | |
264 | 77 | if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0) { |
265 | | /* BN_FLG_CONSTTIME only supported by BN_mod_exp_mont() */ |
266 | 2 | BNerror(ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED); |
267 | 2 | return -1; |
268 | 2 | } |
269 | | |
270 | 75 | bits = BN_num_bits(p); |
271 | 75 | if (bits == 0) { |
272 | | /* x**0 mod 1 is still zero. */ |
273 | 1 | if (BN_is_one(m)) { |
274 | 0 | ret = 1; |
275 | 0 | BN_zero(r); |
276 | 0 | } else |
277 | 1 | ret = BN_one(r); |
278 | 1 | return ret; |
279 | 1 | } |
280 | | |
281 | 74 | BN_RECP_CTX_init(&recp); |
282 | | |
283 | 74 | BN_CTX_start(ctx); |
284 | 74 | if ((aa = BN_CTX_get(ctx)) == NULL) |
285 | 0 | goto err; |
286 | 74 | if ((val[0] = BN_CTX_get(ctx)) == NULL) |
287 | 0 | goto err; |
288 | | |
289 | 74 | if (m->neg) { |
290 | | /* ignore sign of 'm' */ |
291 | 0 | if (!BN_copy(aa, m)) |
292 | 0 | goto err; |
293 | 0 | aa->neg = 0; |
294 | 0 | if (BN_RECP_CTX_set(&recp, aa, ctx) <= 0) |
295 | 0 | goto err; |
296 | 74 | } else { |
297 | 74 | if (BN_RECP_CTX_set(&recp, m, ctx) <= 0) |
298 | 0 | goto err; |
299 | 74 | } |
300 | | |
301 | 74 | if (!BN_nnmod(val[0], a, m, ctx)) |
302 | 0 | goto err; /* 1 */ |
303 | 74 | if (BN_is_zero(val[0])) { |
304 | 1 | BN_zero(r); |
305 | 1 | ret = 1; |
306 | 1 | goto err; |
307 | 1 | } |
308 | | |
309 | 73 | window = BN_window_bits_for_exponent_size(bits); |
310 | 73 | if (window > 1) { |
311 | 59 | if (!BN_mod_mul_reciprocal(aa, val[0], val[0], &recp, ctx)) |
312 | 0 | goto err; /* 2 */ |
313 | 59 | j = 1 << (window - 1); |
314 | 476 | for (i = 1; i < j; i++) { |
315 | 417 | if (((val[i] = BN_CTX_get(ctx)) == NULL) || |
316 | 417 | !BN_mod_mul_reciprocal(val[i], val[i - 1], |
317 | 417 | aa, &recp, ctx)) |
318 | 0 | goto err; |
319 | 417 | } |
320 | 59 | } |
321 | | |
322 | 73 | start = 1; /* This is used to avoid multiplication etc |
323 | | * when there is only the value '1' in the |
324 | | * buffer. */ |
325 | 73 | wvalue = 0; /* The 'value' of the window */ |
326 | 73 | wstart = bits - 1; /* The top bit of the window */ |
327 | 73 | wend = 0; /* The bottom bit of the window */ |
328 | | |
329 | 73 | if (!BN_one(r)) |
330 | 0 | goto err; |
331 | | |
332 | 7.48k | for (;;) { |
333 | 7.48k | if (BN_is_bit_set(p, wstart) == 0) { |
334 | 5.02k | if (!start) |
335 | 5.02k | if (!BN_mod_mul_reciprocal(r, r,r, &recp, ctx)) |
336 | 0 | goto err; |
337 | 5.02k | if (wstart == 0) |
338 | 53 | break; |
339 | 4.97k | wstart--; |
340 | 4.97k | continue; |
341 | 5.02k | } |
342 | | /* We now have wstart on a 'set' bit, we now need to work out |
343 | | * how bit a window to do. To do this we need to scan |
344 | | * forward until the last set bit before the end of the |
345 | | * window */ |
346 | 2.45k | j = wstart; |
347 | 2.45k | wvalue = 1; |
348 | 2.45k | wend = 0; |
349 | 11.9k | for (i = 1; i < window; i++) { |
350 | 9.53k | if (wstart - i < 0) |
351 | 14 | break; |
352 | 9.52k | if (BN_is_bit_set(p, wstart - i)) { |
353 | 4.75k | wvalue <<= (i - wend); |
354 | 4.75k | wvalue |= 1; |
355 | 4.75k | wend = i; |
356 | 4.75k | } |
357 | 9.52k | } |
358 | | |
359 | | /* wend is the size of the current window */ |
360 | 2.45k | j = wend + 1; |
361 | | /* add the 'bytes above' */ |
362 | 2.45k | if (!start) |
363 | 11.8k | for (i = 0; i < j; i++) { |
364 | 9.51k | if (!BN_mod_mul_reciprocal(r, r,r, &recp, ctx)) |
365 | 0 | goto err; |
366 | 9.51k | } |
367 | | |
368 | | /* wvalue will be an odd number < 2^window */ |
369 | 2.45k | if (!BN_mod_mul_reciprocal(r, r,val[wvalue >> 1], &recp, ctx)) |
370 | 0 | goto err; |
371 | | |
372 | | /* move the 'window' down further */ |
373 | 2.45k | wstart -= wend + 1; |
374 | 2.45k | wvalue = 0; |
375 | 2.45k | start = 0; |
376 | 2.45k | if (wstart < 0) |
377 | 20 | break; |
378 | 2.45k | } |
379 | 73 | ret = 1; |
380 | | |
381 | 74 | err: |
382 | 74 | BN_CTX_end(ctx); |
383 | 74 | BN_RECP_CTX_free(&recp); |
384 | 74 | bn_check_top(r); |
385 | 74 | return (ret); |
386 | 73 | } |
387 | | |
388 | | static int |
389 | | BN_mod_exp_mont_internal(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m, |
390 | | BN_CTX *ctx, BN_MONT_CTX *in_mont, int ct) |
391 | 814 | { |
392 | 814 | int i, j, bits, ret = 0, wstart, wend, window, wvalue; |
393 | 814 | int start = 1; |
394 | 814 | BIGNUM *d, *r; |
395 | 814 | const BIGNUM *aa; |
396 | | /* Table of variables obtained from 'ctx' */ |
397 | 814 | BIGNUM *val[TABLE_SIZE]; |
398 | 814 | BN_MONT_CTX *mont = NULL; |
399 | | |
400 | 814 | if (ct) { |
401 | 754 | return BN_mod_exp_mont_consttime(rr, a, p, m, ctx, in_mont); |
402 | 754 | } |
403 | | |
404 | 60 | bn_check_top(a); |
405 | 60 | bn_check_top(p); |
406 | 60 | bn_check_top(m); |
407 | | |
408 | 60 | if (!BN_is_odd(m)) { |
409 | 13 | BNerror(BN_R_CALLED_WITH_EVEN_MODULUS); |
410 | 13 | return (0); |
411 | 13 | } |
412 | | |
413 | 47 | bits = BN_num_bits(p); |
414 | 47 | if (bits == 0) { |
415 | | /* x**0 mod 1 is still zero. */ |
416 | 9 | if (BN_is_one(m)) { |
417 | 5 | ret = 1; |
418 | 5 | BN_zero(rr); |
419 | 5 | } else |
420 | 4 | ret = BN_one(rr); |
421 | 9 | return ret; |
422 | 9 | } |
423 | | |
424 | 38 | BN_CTX_start(ctx); |
425 | 38 | if ((d = BN_CTX_get(ctx)) == NULL) |
426 | 0 | goto err; |
427 | 38 | if ((r = BN_CTX_get(ctx)) == NULL) |
428 | 0 | goto err; |
429 | 38 | if ((val[0] = BN_CTX_get(ctx)) == NULL) |
430 | 0 | goto err; |
431 | | |
432 | | /* If this is not done, things will break in the montgomery |
433 | | * part */ |
434 | | |
435 | 38 | if (in_mont != NULL) |
436 | 0 | mont = in_mont; |
437 | 38 | else { |
438 | 38 | if ((mont = BN_MONT_CTX_new()) == NULL) |
439 | 0 | goto err; |
440 | 38 | if (!BN_MONT_CTX_set(mont, m, ctx)) |
441 | 0 | goto err; |
442 | 38 | } |
443 | | |
444 | 38 | if (a->neg || BN_ucmp(a, m) >= 0) { |
445 | 15 | if (!BN_nnmod(val[0], a,m, ctx)) |
446 | 0 | goto err; |
447 | 15 | aa = val[0]; |
448 | 15 | } else |
449 | 23 | aa = a; |
450 | 38 | if (BN_is_zero(aa)) { |
451 | 1 | BN_zero(rr); |
452 | 1 | ret = 1; |
453 | 1 | goto err; |
454 | 1 | } |
455 | 37 | if (!BN_to_montgomery(val[0], aa, mont, ctx)) |
456 | 0 | goto err; /* 1 */ |
457 | | |
458 | 37 | window = BN_window_bits_for_exponent_size(bits); |
459 | 37 | if (window > 1) { |
460 | 23 | if (!BN_mod_mul_montgomery(d, val[0], val[0], mont, ctx)) |
461 | 0 | goto err; /* 2 */ |
462 | 23 | j = 1 << (window - 1); |
463 | 340 | for (i = 1; i < j; i++) { |
464 | 317 | if (((val[i] = BN_CTX_get(ctx)) == NULL) || |
465 | 317 | !BN_mod_mul_montgomery(val[i], val[i - 1], |
466 | 317 | d, mont, ctx)) |
467 | 0 | goto err; |
468 | 317 | } |
469 | 23 | } |
470 | | |
471 | 37 | start = 1; /* This is used to avoid multiplication etc |
472 | | * when there is only the value '1' in the |
473 | | * buffer. */ |
474 | 37 | wvalue = 0; /* The 'value' of the window */ |
475 | 37 | wstart = bits - 1; /* The top bit of the window */ |
476 | 37 | wend = 0; /* The bottom bit of the window */ |
477 | | |
478 | 37 | if (!BN_to_montgomery(r, BN_value_one(), mont, ctx)) |
479 | 0 | goto err; |
480 | 6.37k | for (;;) { |
481 | 6.37k | if (BN_is_bit_set(p, wstart) == 0) { |
482 | 4.23k | if (!start) { |
483 | 4.23k | if (!BN_mod_mul_montgomery(r, r, r, mont, ctx)) |
484 | 0 | goto err; |
485 | 4.23k | } |
486 | 4.23k | if (wstart == 0) |
487 | 23 | break; |
488 | 4.21k | wstart--; |
489 | 4.21k | continue; |
490 | 4.23k | } |
491 | | /* We now have wstart on a 'set' bit, we now need to work out |
492 | | * how bit a window to do. To do this we need to scan |
493 | | * forward until the last set bit before the end of the |
494 | | * window */ |
495 | 2.13k | j = wstart; |
496 | 2.13k | wvalue = 1; |
497 | 2.13k | wend = 0; |
498 | 11.0k | for (i = 1; i < window; i++) { |
499 | 8.92k | if (wstart - i < 0) |
500 | 17 | break; |
501 | 8.90k | if (BN_is_bit_set(p, wstart - i)) { |
502 | 4.59k | wvalue <<= (i - wend); |
503 | 4.59k | wvalue |= 1; |
504 | 4.59k | wend = i; |
505 | 4.59k | } |
506 | 8.90k | } |
507 | | |
508 | | /* wend is the size of the current window */ |
509 | 2.13k | j = wend + 1; |
510 | | /* add the 'bytes above' */ |
511 | 2.13k | if (!start) |
512 | 11.2k | for (i = 0; i < j; i++) { |
513 | 9.11k | if (!BN_mod_mul_montgomery(r, r, r, mont, ctx)) |
514 | 0 | goto err; |
515 | 9.11k | } |
516 | | |
517 | | /* wvalue will be an odd number < 2^window */ |
518 | 2.13k | if (!BN_mod_mul_montgomery(r, r, val[wvalue >> 1], mont, ctx)) |
519 | 0 | goto err; |
520 | | |
521 | | /* move the 'window' down further */ |
522 | 2.13k | wstart -= wend + 1; |
523 | 2.13k | wvalue = 0; |
524 | 2.13k | start = 0; |
525 | 2.13k | if (wstart < 0) |
526 | 14 | break; |
527 | 2.13k | } |
528 | 37 | if (!BN_from_montgomery(rr, r,mont, ctx)) |
529 | 0 | goto err; |
530 | 37 | ret = 1; |
531 | | |
532 | 38 | err: |
533 | 38 | if ((in_mont == NULL) && (mont != NULL)) |
534 | 38 | BN_MONT_CTX_free(mont); |
535 | 38 | BN_CTX_end(ctx); |
536 | 38 | bn_check_top(rr); |
537 | 38 | return (ret); |
538 | 37 | } |
539 | | |
540 | | int |
541 | | BN_mod_exp_mont(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m, |
542 | | BN_CTX *ctx, BN_MONT_CTX *in_mont) |
543 | 61 | { |
544 | 61 | return BN_mod_exp_mont_internal(rr, a, p, m, ctx, in_mont, |
545 | 61 | (BN_get_flags(p, BN_FLG_CONSTTIME) != 0)); |
546 | 61 | } |
547 | | |
548 | | int |
549 | | BN_mod_exp_mont_ct(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m, |
550 | | BN_CTX *ctx, BN_MONT_CTX *in_mont) |
551 | 753 | { |
552 | 753 | return BN_mod_exp_mont_internal(rr, a, p, m, ctx, in_mont, 1); |
553 | 753 | } |
554 | | |
555 | | int |
556 | | BN_mod_exp_mont_nonct(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m, |
557 | | BN_CTX *ctx, BN_MONT_CTX *in_mont) |
558 | 0 | { |
559 | 0 | return BN_mod_exp_mont_internal(rr, a, p, m, ctx, in_mont, 0); |
560 | 0 | } |
561 | | |
562 | | /* BN_mod_exp_mont_consttime() stores the precomputed powers in a specific layout |
563 | | * so that accessing any of these table values shows the same access pattern as far |
564 | | * as cache lines are concerned. The following functions are used to transfer a BIGNUM |
565 | | * from/to that table. */ |
566 | | |
567 | | static int |
568 | | MOD_EXP_CTIME_COPY_TO_PREBUF(const BIGNUM *b, int top, unsigned char *buf, |
569 | | int idx, int window) |
570 | 26.1k | { |
571 | 26.1k | int i, j; |
572 | 26.1k | int width = 1 << window; |
573 | 26.1k | BN_ULONG *table = (BN_ULONG *)buf; |
574 | | |
575 | 26.1k | if (top > b->top) |
576 | 15.0k | top = b->top; /* this works because 'buf' is explicitly zeroed */ |
577 | | |
578 | 603k | for (i = 0, j = idx; i < top; i++, j += width) { |
579 | 577k | table[j] = b->d[i]; |
580 | 577k | } |
581 | | |
582 | 26.1k | return 1; |
583 | 26.1k | } |
584 | | |
585 | | static int |
586 | | MOD_EXP_CTIME_COPY_FROM_PREBUF(BIGNUM *b, int top, unsigned char *buf, int idx, |
587 | | int window) |
588 | 115k | { |
589 | 115k | int i, j; |
590 | 115k | int width = 1 << window; |
591 | 115k | volatile BN_ULONG *table = (volatile BN_ULONG *)buf; |
592 | | |
593 | 115k | if (bn_wexpand(b, top) == NULL) |
594 | 0 | return 0; |
595 | | |
596 | 115k | if (window <= 3) { |
597 | 5.52k | for (i = 0; i < top; i++, table += width) { |
598 | 3.97k | BN_ULONG acc = 0; |
599 | | |
600 | 27.4k | for (j = 0; j < width; j++) { |
601 | 23.4k | acc |= table[j] & |
602 | 23.4k | ((BN_ULONG)0 - (constant_time_eq_int(j,idx)&1)); |
603 | 23.4k | } |
604 | | |
605 | 3.97k | b->d[i] = acc; |
606 | 3.97k | } |
607 | 113k | } else { |
608 | 113k | int xstride = 1 << (window - 2); |
609 | 113k | BN_ULONG y0, y1, y2, y3; |
610 | | |
611 | 113k | i = idx >> (window - 2); /* equivalent of idx / xstride */ |
612 | 113k | idx &= xstride - 1; /* equivalent of idx % xstride */ |
613 | | |
614 | 113k | y0 = (BN_ULONG)0 - (constant_time_eq_int(i,0)&1); |
615 | 113k | y1 = (BN_ULONG)0 - (constant_time_eq_int(i,1)&1); |
616 | 113k | y2 = (BN_ULONG)0 - (constant_time_eq_int(i,2)&1); |
617 | 113k | y3 = (BN_ULONG)0 - (constant_time_eq_int(i,3)&1); |
618 | | |
619 | 3.98M | for (i = 0; i < top; i++, table += width) { |
620 | 3.87M | BN_ULONG acc = 0; |
621 | | |
622 | 65.1M | for (j = 0; j < xstride; j++) { |
623 | 61.2M | acc |= ( (table[j + 0 * xstride] & y0) | |
624 | 61.2M | (table[j + 1 * xstride] & y1) | |
625 | 61.2M | (table[j + 2 * xstride] & y2) | |
626 | 61.2M | (table[j + 3 * xstride] & y3) ) |
627 | 61.2M | & ((BN_ULONG)0 - (constant_time_eq_int(j,idx)&1)); |
628 | 61.2M | } |
629 | | |
630 | 3.87M | b->d[i] = acc; |
631 | 3.87M | } |
632 | 113k | } |
633 | 115k | b->top = top; |
634 | 115k | bn_correct_top(b); |
635 | 115k | return 1; |
636 | 115k | } |
637 | | |
638 | | /* Given a pointer value, compute the next address that is a cache line multiple. */ |
639 | | #define MOD_EXP_CTIME_ALIGN(x_) \ |
640 | 814 | ((unsigned char*)(x_) + (MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH - (((size_t)(x_)) & (MOD_EXP_CTIME_MIN_CACHE_LINE_MASK)))) |
641 | | |
642 | | /* This variant of BN_mod_exp_mont() uses fixed windows and the special |
643 | | * precomputation memory layout to limit data-dependency to a minimum |
644 | | * to protect secret exponents (cf. the hyper-threading timing attacks |
645 | | * pointed out by Colin Percival, |
646 | | * http://www.daemonology.net/hyperthreading-considered-harmful/) |
647 | | */ |
648 | | int |
649 | | BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p, |
650 | | const BIGNUM *m, BN_CTX *ctx, BN_MONT_CTX *in_mont) |
651 | 876 | { |
652 | 876 | int i, bits, ret = 0, window, wvalue; |
653 | 876 | int top; |
654 | 876 | BN_MONT_CTX *mont = NULL; |
655 | 876 | int numPowers; |
656 | 876 | unsigned char *powerbufFree = NULL; |
657 | 876 | int powerbufLen = 0; |
658 | 876 | unsigned char *powerbuf = NULL; |
659 | 876 | BIGNUM tmp, am; |
660 | | |
661 | 876 | bn_check_top(a); |
662 | 876 | bn_check_top(p); |
663 | 876 | bn_check_top(m); |
664 | | |
665 | 876 | if (!BN_is_odd(m)) { |
666 | 34 | BNerror(BN_R_CALLED_WITH_EVEN_MODULUS); |
667 | 34 | return (0); |
668 | 34 | } |
669 | | |
670 | 842 | top = m->top; |
671 | | |
672 | 842 | bits = BN_num_bits(p); |
673 | 842 | if (bits == 0) { |
674 | | /* x**0 mod 1 is still zero. */ |
675 | 28 | if (BN_is_one(m)) { |
676 | 14 | ret = 1; |
677 | 14 | BN_zero(rr); |
678 | 14 | } else |
679 | 14 | ret = BN_one(rr); |
680 | 28 | return ret; |
681 | 28 | } |
682 | | |
683 | 814 | BN_CTX_start(ctx); |
684 | | |
685 | | /* Allocate a montgomery context if it was not supplied by the caller. |
686 | | * If this is not done, things will break in the montgomery part. |
687 | | */ |
688 | 814 | if (in_mont != NULL) |
689 | 495 | mont = in_mont; |
690 | 319 | else { |
691 | 319 | if ((mont = BN_MONT_CTX_new()) == NULL) |
692 | 0 | goto err; |
693 | 319 | if (!BN_MONT_CTX_set(mont, m, ctx)) |
694 | 0 | goto err; |
695 | 319 | } |
696 | | |
697 | | /* Get the window size to use with size of p. */ |
698 | 814 | window = BN_window_bits_for_ctime_exponent_size(bits); |
699 | 814 | #if defined(OPENSSL_BN_ASM_MONT5) |
700 | 814 | if (window == 6 && bits <= 1024) |
701 | 3 | window = 5; /* ~5% improvement of 2048-bit RSA sign */ |
702 | 814 | #endif |
703 | | |
704 | | /* Allocate a buffer large enough to hold all of the pre-computed |
705 | | * powers of am, am itself and tmp. |
706 | | */ |
707 | 814 | numPowers = 1 << window; |
708 | 814 | powerbufLen = sizeof(m->d[0]) * (top * numPowers + |
709 | 814 | ((2*top) > numPowers ? (2*top) : numPowers)); |
710 | 814 | if ((powerbufFree = calloc(powerbufLen + |
711 | 814 | MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH, 1)) == NULL) |
712 | 0 | goto err; |
713 | 814 | powerbuf = MOD_EXP_CTIME_ALIGN(powerbufFree); |
714 | | |
715 | | /* lay down tmp and am right after powers table */ |
716 | 814 | tmp.d = (BN_ULONG *)(powerbuf + sizeof(m->d[0]) * top * numPowers); |
717 | 814 | am.d = tmp.d + top; |
718 | 814 | tmp.top = am.top = 0; |
719 | 814 | tmp.dmax = am.dmax = top; |
720 | 814 | tmp.neg = am.neg = 0; |
721 | 814 | tmp.flags = am.flags = BN_FLG_STATIC_DATA; |
722 | | |
723 | | /* prepare a^0 in Montgomery domain */ |
724 | 814 | #if 1 |
725 | 814 | if (!BN_to_montgomery(&tmp, BN_value_one(), mont, ctx)) |
726 | 0 | goto err; |
727 | | #else |
728 | | tmp.d[0] = (0 - m - >d[0]) & BN_MASK2; /* 2^(top*BN_BITS2) - m */ |
729 | | for (i = 1; i < top; i++) |
730 | | tmp.d[i] = (~m->d[i]) & BN_MASK2; |
731 | | tmp.top = top; |
732 | | #endif |
733 | | |
734 | | /* prepare a^1 in Montgomery domain */ |
735 | 814 | if (a->neg || BN_ucmp(a, m) >= 0) { |
736 | 48 | if (!BN_mod_ct(&am, a,m, ctx)) |
737 | 0 | goto err; |
738 | 48 | if (!BN_to_montgomery(&am, &am, mont, ctx)) |
739 | 0 | goto err; |
740 | 766 | } else if (!BN_to_montgomery(&am, a,mont, ctx)) |
741 | 0 | goto err; |
742 | | |
743 | 814 | #if defined(OPENSSL_BN_ASM_MONT5) |
744 | | /* This optimization uses ideas from http://eprint.iacr.org/2011/239, |
745 | | * specifically optimization of cache-timing attack countermeasures |
746 | | * and pre-computation optimization. */ |
747 | | |
748 | | /* Dedicated window==4 case improves 512-bit RSA sign by ~15%, but as |
749 | | * 512-bit RSA is hardly relevant, we omit it to spare size... */ |
750 | 814 | if (window == 5 && top > 1) { |
751 | 66 | void bn_mul_mont_gather5(BN_ULONG *rp, const BN_ULONG *ap, |
752 | 66 | const void *table, const BN_ULONG *np, |
753 | 66 | const BN_ULONG *n0, int num, int power); |
754 | 66 | void bn_scatter5(const BN_ULONG *inp, size_t num, |
755 | 66 | void *table, size_t power); |
756 | 66 | void bn_gather5(BN_ULONG *out, size_t num, |
757 | 66 | void *table, size_t power); |
758 | | |
759 | 66 | BN_ULONG *np = mont->N.d, *n0 = mont->n0; |
760 | | |
761 | | /* BN_to_montgomery can contaminate words above .top |
762 | | * [in BN_DEBUG[_DEBUG] build]... */ |
763 | 112 | for (i = am.top; i < top; i++) |
764 | 46 | am.d[i] = 0; |
765 | 86 | for (i = tmp.top; i < top; i++) |
766 | 20 | tmp.d[i] = 0; |
767 | | |
768 | 66 | bn_scatter5(tmp.d, top, powerbuf, 0); |
769 | 66 | bn_scatter5(am.d, am.top, powerbuf, 1); |
770 | 66 | bn_mul_mont(tmp.d, am.d, am.d, np, n0, top); |
771 | 66 | bn_scatter5(tmp.d, top, powerbuf, 2); |
772 | | |
773 | | #if 0 |
774 | | for (i = 3; i < 32; i++) { |
775 | | /* Calculate a^i = a^(i-1) * a */ |
776 | | bn_mul_mont_gather5(tmp.d, am.d, powerbuf, np, |
777 | | n0, top, i - 1); |
778 | | bn_scatter5(tmp.d, top, powerbuf, i); |
779 | | } |
780 | | #else |
781 | | /* same as above, but uses squaring for 1/2 of operations */ |
782 | 264 | for (i = 4; i < 32; i*=2) { |
783 | 198 | bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top); |
784 | 198 | bn_scatter5(tmp.d, top, powerbuf, i); |
785 | 198 | } |
786 | 264 | for (i = 3; i < 8; i += 2) { |
787 | 198 | int j; |
788 | 198 | bn_mul_mont_gather5(tmp.d, am.d, powerbuf, np, |
789 | 198 | n0, top, i - 1); |
790 | 198 | bn_scatter5(tmp.d, top, powerbuf, i); |
791 | 660 | for (j = 2 * i; j < 32; j *= 2) { |
792 | 462 | bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top); |
793 | 462 | bn_scatter5(tmp.d, top, powerbuf, j); |
794 | 462 | } |
795 | 198 | } |
796 | 330 | for (; i < 16; i += 2) { |
797 | 264 | bn_mul_mont_gather5(tmp.d, am.d, powerbuf, np, |
798 | 264 | n0, top, i - 1); |
799 | 264 | bn_scatter5(tmp.d, top, powerbuf, i); |
800 | 264 | bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top); |
801 | 264 | bn_scatter5(tmp.d, top, powerbuf, 2*i); |
802 | 264 | } |
803 | 594 | for (; i < 32; i += 2) { |
804 | 528 | bn_mul_mont_gather5(tmp.d, am.d, powerbuf, np, |
805 | 528 | n0, top, i - 1); |
806 | 528 | bn_scatter5(tmp.d, top, powerbuf, i); |
807 | 528 | } |
808 | 66 | #endif |
809 | 66 | bits--; |
810 | 261 | for (wvalue = 0, i = bits % 5; i >= 0; i--, bits--) |
811 | 195 | wvalue = (wvalue << 1) + BN_is_bit_set(p, bits); |
812 | 66 | bn_gather5(tmp.d, top, powerbuf, wvalue); |
813 | | |
814 | | /* Scan the exponent one window at a time starting from the most |
815 | | * significant bits. |
816 | | */ |
817 | 5.31k | while (bits >= 0) { |
818 | 31.4k | for (wvalue = 0, i = 0; i < 5; i++, bits--) |
819 | 26.2k | wvalue = (wvalue << 1) + BN_is_bit_set(p, bits); |
820 | | |
821 | 5.24k | bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top); |
822 | 5.24k | bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top); |
823 | 5.24k | bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top); |
824 | 5.24k | bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top); |
825 | 5.24k | bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top); |
826 | 5.24k | bn_mul_mont_gather5(tmp.d, tmp.d, powerbuf, np, n0, top, wvalue); |
827 | 5.24k | } |
828 | | |
829 | 66 | tmp.top = top; |
830 | 66 | bn_correct_top(&tmp); |
831 | 66 | } else |
832 | 748 | #endif |
833 | 748 | { |
834 | 748 | if (!MOD_EXP_CTIME_COPY_TO_PREBUF(&tmp, top, powerbuf, 0, |
835 | 748 | window)) |
836 | 0 | goto err; |
837 | 748 | if (!MOD_EXP_CTIME_COPY_TO_PREBUF(&am, top, powerbuf, 1, |
838 | 748 | window)) |
839 | 0 | goto err; |
840 | | |
841 | | /* If the window size is greater than 1, then calculate |
842 | | * val[i=2..2^winsize-1]. Powers are computed as a*a^(i-1) |
843 | | * (even powers could instead be computed as (a^(i/2))^2 |
844 | | * to use the slight performance advantage of sqr over mul). |
845 | | */ |
846 | 748 | if (window > 1) { |
847 | 664 | if (!BN_mod_mul_montgomery(&tmp, &am, &am, mont, ctx)) |
848 | 0 | goto err; |
849 | 664 | if (!MOD_EXP_CTIME_COPY_TO_PREBUF(&tmp, top, powerbuf, |
850 | 664 | 2, window)) |
851 | 0 | goto err; |
852 | 24.6k | for (i = 3; i < numPowers; i++) { |
853 | | /* Calculate a^i = a^(i-1) * a */ |
854 | 23.9k | if (!BN_mod_mul_montgomery(&tmp, &am, &tmp, |
855 | 23.9k | mont, ctx)) |
856 | 0 | goto err; |
857 | 23.9k | if (!MOD_EXP_CTIME_COPY_TO_PREBUF(&tmp, top, |
858 | 23.9k | powerbuf, i, window)) |
859 | 0 | goto err; |
860 | 23.9k | } |
861 | 664 | } |
862 | | |
863 | 748 | bits--; |
864 | 3.05k | for (wvalue = 0, i = bits % window; i >= 0; i--, bits--) |
865 | 2.31k | wvalue = (wvalue << 1) + BN_is_bit_set(p, bits); |
866 | 748 | if (!MOD_EXP_CTIME_COPY_FROM_PREBUF(&tmp, top, powerbuf, |
867 | 748 | wvalue, window)) |
868 | 0 | goto err; |
869 | | |
870 | | /* Scan the exponent one window at a time starting from the most |
871 | | * significant bits. |
872 | | */ |
873 | 115k | while (bits >= 0) { |
874 | 114k | wvalue = 0; /* The 'value' of the window */ |
875 | | |
876 | | /* Scan the window, squaring the result as we go */ |
877 | 768k | for (i = 0; i < window; i++, bits--) { |
878 | 653k | if (!BN_mod_mul_montgomery(&tmp, &tmp, &tmp, |
879 | 653k | mont, ctx)) |
880 | 0 | goto err; |
881 | 653k | wvalue = (wvalue << 1) + BN_is_bit_set(p, bits); |
882 | 653k | } |
883 | | |
884 | | /* Fetch the appropriate pre-computed value from the pre-buf */ |
885 | 114k | if (!MOD_EXP_CTIME_COPY_FROM_PREBUF(&am, top, powerbuf, |
886 | 114k | wvalue, window)) |
887 | 0 | goto err; |
888 | | |
889 | | /* Multiply the result into the intermediate result */ |
890 | 114k | if (!BN_mod_mul_montgomery(&tmp, &tmp, &am, mont, ctx)) |
891 | 0 | goto err; |
892 | 114k | } |
893 | 748 | } |
894 | | |
895 | | /* Convert the final result from montgomery to standard format */ |
896 | 814 | if (!BN_from_montgomery(rr, &tmp, mont, ctx)) |
897 | 0 | goto err; |
898 | 814 | ret = 1; |
899 | | |
900 | 814 | err: |
901 | 814 | if ((in_mont == NULL) && (mont != NULL)) |
902 | 319 | BN_MONT_CTX_free(mont); |
903 | 814 | freezero(powerbufFree, powerbufLen + MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH); |
904 | 814 | BN_CTX_end(ctx); |
905 | 814 | return (ret); |
906 | 814 | } |
907 | | |
908 | | int |
909 | | BN_mod_exp_mont_word(BIGNUM *rr, BN_ULONG a, const BIGNUM *p, const BIGNUM *m, |
910 | | BN_CTX *ctx, BN_MONT_CTX *in_mont) |
911 | 77 | { |
912 | 77 | BN_MONT_CTX *mont = NULL; |
913 | 77 | int b, bits, ret = 0; |
914 | 77 | int r_is_one; |
915 | 77 | BN_ULONG w, next_w; |
916 | 77 | BIGNUM *d, *r, *t; |
917 | 77 | BIGNUM *swap_tmp; |
918 | | |
919 | 77 | #define BN_MOD_MUL_WORD(r, w, m) \ |
920 | 911 | (BN_mul_word(r, (w)) && \ |
921 | 911 | (/* BN_ucmp(r, (m)) < 0 ? 1 :*/ \ |
922 | 911 | (BN_mod_ct(t, r, m, ctx) && (swap_tmp = r, r = t, t = swap_tmp, 1)))) |
923 | | /* BN_MOD_MUL_WORD is only used with 'w' large, |
924 | | * so the BN_ucmp test is probably more overhead |
925 | | * than always using BN_mod (which uses BN_copy if |
926 | | * a similar test returns true). */ |
927 | | /* We can use BN_mod and do not need BN_nnmod because our |
928 | | * accumulator is never negative (the result of BN_mod does |
929 | | * not depend on the sign of the modulus). |
930 | | */ |
931 | 77 | #define BN_TO_MONTGOMERY_WORD(r, w, mont) \ |
932 | 77 | (BN_set_word(r, (w)) && BN_to_montgomery(r, r, (mont), ctx)) |
933 | | |
934 | 77 | if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0) { |
935 | | /* BN_FLG_CONSTTIME only supported by BN_mod_exp_mont() */ |
936 | 1 | BNerror(ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED); |
937 | 1 | return -1; |
938 | 1 | } |
939 | | |
940 | 76 | bn_check_top(p); |
941 | 76 | bn_check_top(m); |
942 | | |
943 | 76 | if (!BN_is_odd(m)) { |
944 | 3 | BNerror(BN_R_CALLED_WITH_EVEN_MODULUS); |
945 | 3 | return (0); |
946 | 3 | } |
947 | 73 | if (m->top == 1) |
948 | 51 | a %= m->d[0]; /* make sure that 'a' is reduced */ |
949 | | |
950 | 73 | bits = BN_num_bits(p); |
951 | 73 | if (bits == 0) { |
952 | | /* x**0 mod 1 is still zero. */ |
953 | 18 | if (BN_is_one(m)) { |
954 | 5 | ret = 1; |
955 | 5 | BN_zero(rr); |
956 | 5 | } else |
957 | 13 | ret = BN_one(rr); |
958 | 18 | return ret; |
959 | 18 | } |
960 | 55 | if (a == 0) { |
961 | 3 | BN_zero(rr); |
962 | 3 | ret = 1; |
963 | 3 | return ret; |
964 | 3 | } |
965 | | |
966 | 52 | BN_CTX_start(ctx); |
967 | 52 | if ((d = BN_CTX_get(ctx)) == NULL) |
968 | 0 | goto err; |
969 | 52 | if ((r = BN_CTX_get(ctx)) == NULL) |
970 | 0 | goto err; |
971 | 52 | if ((t = BN_CTX_get(ctx)) == NULL) |
972 | 0 | goto err; |
973 | | |
974 | 52 | if (in_mont != NULL) |
975 | 0 | mont = in_mont; |
976 | 52 | else { |
977 | 52 | if ((mont = BN_MONT_CTX_new()) == NULL) |
978 | 0 | goto err; |
979 | 52 | if (!BN_MONT_CTX_set(mont, m, ctx)) |
980 | 0 | goto err; |
981 | 52 | } |
982 | | |
983 | 52 | r_is_one = 1; /* except for Montgomery factor */ |
984 | | |
985 | | /* bits-1 >= 0 */ |
986 | | |
987 | | /* The result is accumulated in the product r*w. */ |
988 | 52 | w = a; /* bit 'bits-1' of 'p' is always set */ |
989 | 2.35k | for (b = bits - 2; b >= 0; b--) { |
990 | | /* First, square r*w. */ |
991 | 2.30k | next_w = w * w; |
992 | 2.30k | if ((next_w / w) != w) /* overflow */ |
993 | 577 | { |
994 | 577 | if (r_is_one) { |
995 | 26 | if (!BN_TO_MONTGOMERY_WORD(r, w, mont)) |
996 | 0 | goto err; |
997 | 26 | r_is_one = 0; |
998 | 551 | } else { |
999 | 551 | if (!BN_MOD_MUL_WORD(r, w, m)) |
1000 | 0 | goto err; |
1001 | 551 | } |
1002 | 577 | next_w = 1; |
1003 | 577 | } |
1004 | 2.30k | w = next_w; |
1005 | 2.30k | if (!r_is_one) { |
1006 | 2.19k | if (!BN_mod_mul_montgomery(r, r, r, mont, ctx)) |
1007 | 0 | goto err; |
1008 | 2.19k | } |
1009 | | |
1010 | | /* Second, multiply r*w by 'a' if exponent bit is set. */ |
1011 | 2.30k | if (BN_is_bit_set(p, b)) { |
1012 | 986 | next_w = w * a; |
1013 | 986 | if ((next_w / a) != w) /* overflow */ |
1014 | 352 | { |
1015 | 352 | if (r_is_one) { |
1016 | 4 | if (!BN_TO_MONTGOMERY_WORD(r, w, mont)) |
1017 | 0 | goto err; |
1018 | 4 | r_is_one = 0; |
1019 | 348 | } else { |
1020 | 348 | if (!BN_MOD_MUL_WORD(r, w, m)) |
1021 | 0 | goto err; |
1022 | 348 | } |
1023 | 352 | next_w = a; |
1024 | 352 | } |
1025 | 986 | w = next_w; |
1026 | 986 | } |
1027 | 2.30k | } |
1028 | | |
1029 | | /* Finally, set r:=r*w. */ |
1030 | 52 | if (w != 1) { |
1031 | 23 | if (r_is_one) { |
1032 | 11 | if (!BN_TO_MONTGOMERY_WORD(r, w, mont)) |
1033 | 0 | goto err; |
1034 | 11 | r_is_one = 0; |
1035 | 12 | } else { |
1036 | 12 | if (!BN_MOD_MUL_WORD(r, w, m)) |
1037 | 0 | goto err; |
1038 | 12 | } |
1039 | 23 | } |
1040 | | |
1041 | 52 | if (r_is_one) /* can happen only if a == 1*/ |
1042 | 11 | { |
1043 | 11 | if (!BN_one(rr)) |
1044 | 0 | goto err; |
1045 | 41 | } else { |
1046 | 41 | if (!BN_from_montgomery(rr, r, mont, ctx)) |
1047 | 0 | goto err; |
1048 | 41 | } |
1049 | 52 | ret = 1; |
1050 | | |
1051 | 52 | err: |
1052 | 52 | if ((in_mont == NULL) && (mont != NULL)) |
1053 | 52 | BN_MONT_CTX_free(mont); |
1054 | 52 | BN_CTX_end(ctx); |
1055 | 52 | bn_check_top(rr); |
1056 | 52 | return (ret); |
1057 | 52 | } |
1058 | | |
1059 | | |
1060 | | /* The old fallback, simple version :-) */ |
1061 | | int |
1062 | | BN_mod_exp_simple(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m, |
1063 | | BN_CTX *ctx) |
1064 | 63 | { |
1065 | 63 | int i, j, bits, ret = 0, wstart, wend, window, wvalue; |
1066 | 63 | int start = 1; |
1067 | 63 | BIGNUM *d; |
1068 | | /* Table of variables obtained from 'ctx' */ |
1069 | 63 | BIGNUM *val[TABLE_SIZE]; |
1070 | | |
1071 | 63 | if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0) { |
1072 | | /* BN_FLG_CONSTTIME only supported by BN_mod_exp_mont() */ |
1073 | 0 | BNerror(ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED); |
1074 | 0 | return -1; |
1075 | 0 | } |
1076 | | |
1077 | 63 | bits = BN_num_bits(p); |
1078 | 63 | if (bits == 0) { |
1079 | | /* x**0 mod 1 is still zero. */ |
1080 | 9 | if (BN_is_one(m)) { |
1081 | 4 | ret = 1; |
1082 | 4 | BN_zero(r); |
1083 | 4 | } else |
1084 | 5 | ret = BN_one(r); |
1085 | 9 | return ret; |
1086 | 9 | } |
1087 | | |
1088 | 54 | BN_CTX_start(ctx); |
1089 | 54 | if ((d = BN_CTX_get(ctx)) == NULL) |
1090 | 0 | goto err; |
1091 | 54 | if ((val[0] = BN_CTX_get(ctx)) == NULL) |
1092 | 0 | goto err; |
1093 | | |
1094 | 54 | if (!BN_nnmod(val[0],a,m,ctx)) |
1095 | 3 | goto err; /* 1 */ |
1096 | 51 | if (BN_is_zero(val[0])) { |
1097 | 2 | BN_zero(r); |
1098 | 2 | ret = 1; |
1099 | 2 | goto err; |
1100 | 2 | } |
1101 | | |
1102 | 49 | window = BN_window_bits_for_exponent_size(bits); |
1103 | 49 | if (window > 1) { |
1104 | 33 | if (!BN_mod_mul(d, val[0], val[0], m, ctx)) |
1105 | 0 | goto err; /* 2 */ |
1106 | 33 | j = 1 << (window - 1); |
1107 | 248 | for (i = 1; i < j; i++) { |
1108 | 215 | if (((val[i] = BN_CTX_get(ctx)) == NULL) || |
1109 | 215 | !BN_mod_mul(val[i], val[i - 1], d,m, ctx)) |
1110 | 0 | goto err; |
1111 | 215 | } |
1112 | 33 | } |
1113 | | |
1114 | 49 | start = 1; /* This is used to avoid multiplication etc |
1115 | | * when there is only the value '1' in the |
1116 | | * buffer. */ |
1117 | 49 | wvalue = 0; /* The 'value' of the window */ |
1118 | 49 | wstart = bits - 1; /* The top bit of the window */ |
1119 | 49 | wend = 0; /* The bottom bit of the window */ |
1120 | | |
1121 | 49 | if (!BN_one(r)) |
1122 | 0 | goto err; |
1123 | | |
1124 | 2.60k | for (;;) { |
1125 | 2.60k | if (BN_is_bit_set(p, wstart) == 0) { |
1126 | 1.68k | if (!start) |
1127 | 1.68k | if (!BN_mod_mul(r, r, r, m, ctx)) |
1128 | 0 | goto err; |
1129 | 1.68k | if (wstart == 0) |
1130 | 27 | break; |
1131 | 1.66k | wstart--; |
1132 | 1.66k | continue; |
1133 | 1.68k | } |
1134 | | /* We now have wstart on a 'set' bit, we now need to work out |
1135 | | * how bit a window to do. To do this we need to scan |
1136 | | * forward until the last set bit before the end of the |
1137 | | * window */ |
1138 | 916 | j = wstart; |
1139 | 916 | wvalue = 1; |
1140 | 916 | wend = 0; |
1141 | 3.57k | for (i = 1; i < window; i++) { |
1142 | 2.67k | if (wstart - i < 0) |
1143 | 21 | break; |
1144 | 2.65k | if (BN_is_bit_set(p, wstart - i)) { |
1145 | 1.35k | wvalue <<= (i - wend); |
1146 | 1.35k | wvalue |= 1; |
1147 | 1.35k | wend = i; |
1148 | 1.35k | } |
1149 | 2.65k | } |
1150 | | |
1151 | | /* wend is the size of the current window */ |
1152 | 916 | j = wend + 1; |
1153 | | /* add the 'bytes above' */ |
1154 | 916 | if (!start) |
1155 | 3.60k | for (i = 0; i < j; i++) { |
1156 | 2.73k | if (!BN_mod_mul(r, r, r, m, ctx)) |
1157 | 0 | goto err; |
1158 | 2.73k | } |
1159 | | |
1160 | | /* wvalue will be an odd number < 2^window */ |
1161 | 916 | if (!BN_mod_mul(r, r, val[wvalue >> 1], m, ctx)) |
1162 | 0 | goto err; |
1163 | | |
1164 | | /* move the 'window' down further */ |
1165 | 916 | wstart -= wend + 1; |
1166 | 916 | wvalue = 0; |
1167 | 916 | start = 0; |
1168 | 916 | if (wstart < 0) |
1169 | 22 | break; |
1170 | 916 | } |
1171 | 49 | ret = 1; |
1172 | | |
1173 | 54 | err: |
1174 | 54 | BN_CTX_end(ctx); |
1175 | 54 | bn_check_top(r); |
1176 | 54 | return (ret); |
1177 | 49 | } |