/src/libgcrypt/mpi/mpi-pow.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* mpi-pow.c - MPI functions for exponentiation |
2 | | * Copyright (C) 1994, 1996, 1998, 2000, 2002 |
3 | | * 2003 Free Software Foundation, Inc. |
4 | | * 2013 g10 Code GmbH |
5 | | * |
6 | | * This file is part of Libgcrypt. |
7 | | * |
8 | | * Libgcrypt is free software; you can redistribute it and/or modify |
9 | | * it under the terms of the GNU Lesser General Public License as |
10 | | * published by the Free Software Foundation; either version 2.1 of |
11 | | * the License, or (at your option) any later version. |
12 | | * |
13 | | * Libgcrypt is distributed in the hope that it will be useful, |
14 | | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
15 | | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
16 | | * GNU Lesser General Public License for more details. |
17 | | * |
18 | | * You should have received a copy of the GNU Lesser General Public |
19 | | * License along with this program; if not, see <http://www.gnu.org/licenses/>. |
20 | | * |
21 | | * Note: This code is heavily based on the GNU MP Library. |
22 | | * Actually it's the same code with only minor changes in the |
23 | | * way the data is stored; this is to support the abstraction |
24 | | * of an optional secure memory allocation which may be used |
25 | | * to avoid revealing of sensitive data due to paging etc. |
26 | | */ |
27 | | |
28 | | #include <config.h> |
29 | | #include <stdio.h> |
30 | | #include <stdlib.h> |
31 | | #include <string.h> |
32 | | |
33 | | #include "mpi-internal.h" |
34 | | #include "longlong.h" |
35 | | |
36 | | |
37 | | /* |
38 | | * When you need old implementation, please add compilation option |
39 | | * -DUSE_ALGORITHM_SIMPLE_EXPONENTIATION |
40 | | * or expose this line: |
41 | | #define USE_ALGORITHM_SIMPLE_EXPONENTIATION 1 |
42 | | */ |
43 | | |
44 | | #if defined(USE_ALGORITHM_SIMPLE_EXPONENTIATION) |
45 | | /**************** |
46 | | * RES = BASE ^ EXPO mod MOD |
47 | | */ |
48 | | void |
49 | | _gcry_mpi_powm (gcry_mpi_t res, |
50 | | gcry_mpi_t base, gcry_mpi_t expo, gcry_mpi_t mod) |
51 | | { |
52 | | /* Pointer to the limbs of the arguments, their size and signs. */ |
53 | | mpi_ptr_t rp, ep, mp, bp; |
54 | | mpi_size_t esize, msize, bsize, rsize; |
55 | | int msign, bsign, rsign; |
56 | | /* Flags telling the secure allocation status of the arguments. */ |
57 | | int esec, msec, bsec; |
58 | | /* Size of the result including space for temporary values. */ |
59 | | mpi_size_t size; |
60 | | /* Helper. */ |
61 | | int mod_shift_cnt; |
62 | | int negative_result; |
63 | | mpi_ptr_t mp_marker = NULL; |
64 | | mpi_ptr_t bp_marker = NULL; |
65 | | mpi_ptr_t ep_marker = NULL; |
66 | | mpi_ptr_t xp_marker = NULL; |
67 | | unsigned int mp_nlimbs = 0; |
68 | | unsigned int bp_nlimbs = 0; |
69 | | unsigned int ep_nlimbs = 0; |
70 | | unsigned int xp_nlimbs = 0; |
71 | | mpi_ptr_t tspace = NULL; |
72 | | mpi_size_t tsize = 0; |
73 | | |
74 | | |
75 | | esize = expo->nlimbs; |
76 | | msize = mod->nlimbs; |
77 | | size = 2 * msize; |
78 | | msign = mod->sign; |
79 | | |
80 | | esec = mpi_is_secure(expo); |
81 | | msec = mpi_is_secure(mod); |
82 | | bsec = mpi_is_secure(base); |
83 | | |
84 | | rp = res->d; |
85 | | ep = expo->d; |
86 | | MPN_NORMALIZE(ep, esize); |
87 | | |
88 | | if (!msize) |
89 | | _gcry_divide_by_zero(); |
90 | | |
91 | | if (!esize) |
92 | | { |
93 | | /* Exponent is zero, result is 1 mod MOD, i.e., 1 or 0 depending |
94 | | on if MOD equals 1. */ |
95 | | res->nlimbs = (msize == 1 && mod->d[0] == 1) ? 0 : 1; |
96 | | if (res->nlimbs) |
97 | | { |
98 | | RESIZE_IF_NEEDED (res, 1); |
99 | | rp = res->d; |
100 | | rp[0] = 1; |
101 | | } |
102 | | res->sign = 0; |
103 | | goto leave; |
104 | | } |
105 | | |
106 | | /* Normalize MOD (i.e. make its most significant bit set) as |
107 | | required by mpn_divrem. This will make the intermediate values |
108 | | in the calculation slightly larger, but the correct result is |
109 | | obtained after a final reduction using the original MOD value. */ |
110 | | mp_nlimbs = msec? msize:0; |
111 | | mp = mp_marker = mpi_alloc_limb_space(msize, msec); |
112 | | count_leading_zeros (mod_shift_cnt, mod->d[msize-1]); |
113 | | if (mod_shift_cnt) |
114 | | _gcry_mpih_lshift (mp, mod->d, msize, mod_shift_cnt); |
115 | | else |
116 | | MPN_COPY( mp, mod->d, msize ); |
117 | | |
118 | | bsize = base->nlimbs; |
119 | | bsign = base->sign; |
120 | | if (bsize > msize) |
121 | | { |
122 | | /* The base is larger than the module. Reduce it. |
123 | | |
124 | | Allocate (BSIZE + 1) with space for remainder and quotient. |
125 | | (The quotient is (bsize - msize + 1) limbs.) */ |
126 | | bp_nlimbs = bsec ? (bsize + 1):0; |
127 | | bp = bp_marker = mpi_alloc_limb_space( bsize + 1, bsec ); |
128 | | MPN_COPY ( bp, base->d, bsize ); |
129 | | /* We don't care about the quotient, store it above the |
130 | | * remainder, at BP + MSIZE. */ |
131 | | _gcry_mpih_divrem( bp + msize, 0, bp, bsize, mp, msize ); |
132 | | bsize = msize; |
133 | | /* Canonicalize the base, since we are going to multiply with it |
134 | | quite a few times. */ |
135 | | MPN_NORMALIZE( bp, bsize ); |
136 | | } |
137 | | else |
138 | | bp = base->d; |
139 | | |
140 | | if (!bsize) |
141 | | { |
142 | | res->nlimbs = 0; |
143 | | res->sign = 0; |
144 | | goto leave; |
145 | | } |
146 | | |
147 | | |
148 | | /* Make BASE, EXPO and MOD not overlap with RES. */ |
149 | | if ( rp == bp ) |
150 | | { |
151 | | /* RES and BASE are identical. Allocate temp. space for BASE. */ |
152 | | gcry_assert (!bp_marker); |
153 | | bp_nlimbs = bsec? bsize:0; |
154 | | bp = bp_marker = mpi_alloc_limb_space( bsize, bsec ); |
155 | | MPN_COPY(bp, rp, bsize); |
156 | | } |
157 | | if ( rp == ep ) |
158 | | { |
159 | | /* RES and EXPO are identical. Allocate temp. space for EXPO. */ |
160 | | ep_nlimbs = esec? esize:0; |
161 | | ep = ep_marker = mpi_alloc_limb_space( esize, esec ); |
162 | | MPN_COPY(ep, rp, esize); |
163 | | } |
164 | | if ( rp == mp ) |
165 | | { |
166 | | /* RES and MOD are identical. Allocate temporary space for MOD.*/ |
167 | | gcry_assert (!mp_marker); |
168 | | mp_nlimbs = msec?msize:0; |
169 | | mp = mp_marker = mpi_alloc_limb_space( msize, msec ); |
170 | | MPN_COPY(mp, rp, msize); |
171 | | } |
172 | | |
173 | | /* Copy base to the result. */ |
174 | | if (res->alloced < size) |
175 | | { |
176 | | mpi_resize (res, size); |
177 | | rp = res->d; |
178 | | } |
179 | | MPN_COPY ( rp, bp, bsize ); |
180 | | rsize = bsize; |
181 | | rsign = 0; |
182 | | |
183 | | /* Main processing. */ |
184 | | { |
185 | | mpi_size_t i; |
186 | | mpi_ptr_t xp; |
187 | | int c; |
188 | | mpi_limb_t e; |
189 | | mpi_limb_t carry_limb; |
190 | | struct karatsuba_ctx karactx; |
191 | | struct gcry_mpi w, u; |
192 | | |
193 | | xp_nlimbs = msec? size:0; |
194 | | xp = xp_marker = mpi_alloc_limb_space( size, msec ); |
195 | | |
196 | | w.sign = u.sign = 0; |
197 | | w.flags = u.flags = 0; |
198 | | w.alloced = w.nlimbs = size; /* RES->alloc may be longer. */ |
199 | | u.alloced = u.nlimbs = size; |
200 | | |
201 | | memset( &karactx, 0, sizeof karactx ); |
202 | | negative_result = (ep[0] & 1) && bsign; |
203 | | |
204 | | i = esize - 1; |
205 | | e = ep[i]; |
206 | | count_leading_zeros (c, e); |
207 | | e = (e << c) << 1; /* Shift the expo bits to the left, lose msb. */ |
208 | | c = BITS_PER_MPI_LIMB - 1 - c; |
209 | | |
210 | | /* Main loop. |
211 | | |
212 | | Make the result be pointed to alternately by XP and RP. This |
213 | | helps us avoid block copying, which would otherwise be |
214 | | necessary with the overlap restrictions of |
215 | | _gcry_mpih_divmod. With 50% probability the result after this |
216 | | loop will be in the area originally pointed by RP (==RES->d), |
217 | | and with 50% probability in the area originally pointed to by XP. */ |
218 | | for (;;) |
219 | | { |
220 | | while (c) |
221 | | { |
222 | | mpi_ptr_t tp; |
223 | | mpi_size_t xsize; |
224 | | |
225 | | /*mpih_mul_n(xp, rp, rp, rsize);*/ |
226 | | if ( rsize < KARATSUBA_THRESHOLD ) |
227 | | _gcry_mpih_sqr_n_basecase( xp, rp, rsize ); |
228 | | else |
229 | | { |
230 | | if ( !tspace ) |
231 | | { |
232 | | tsize = 2 * rsize; |
233 | | tspace = mpi_alloc_limb_space( tsize, 0 ); |
234 | | } |
235 | | else if ( tsize < (2*rsize) ) |
236 | | { |
237 | | _gcry_mpi_free_limb_space (tspace, 0); |
238 | | tsize = 2 * rsize; |
239 | | tspace = mpi_alloc_limb_space (tsize, 0 ); |
240 | | } |
241 | | _gcry_mpih_sqr_n (xp, rp, rsize, tspace); |
242 | | } |
243 | | |
244 | | xsize = 2 * rsize; |
245 | | if ( xsize > msize ) |
246 | | { |
247 | | _gcry_mpih_divrem(xp + msize, 0, xp, xsize, mp, msize); |
248 | | xsize = msize; |
249 | | } |
250 | | |
251 | | tp = rp; rp = xp; xp = tp; |
252 | | rsize = xsize; |
253 | | |
254 | | /* To mitigate the Yarom/Falkner flush+reload cache |
255 | | * side-channel attack on the RSA secret exponent, we do |
256 | | * the multiplication regardless of the value of the |
257 | | * high-bit of E. But to avoid this performance penalty |
258 | | * we do it only if the exponent has been stored in secure |
259 | | * memory and we can thus assume it is a secret exponent. */ |
260 | | if (esec || (mpi_limb_signed_t)e < 0) |
261 | | { |
262 | | /*mpih_mul( xp, rp, rsize, bp, bsize );*/ |
263 | | if( bsize < KARATSUBA_THRESHOLD ) |
264 | | _gcry_mpih_mul ( xp, rp, rsize, bp, bsize ); |
265 | | else |
266 | | _gcry_mpih_mul_karatsuba_case (xp, rp, rsize, bp, bsize, |
267 | | &karactx); |
268 | | |
269 | | xsize = rsize + bsize; |
270 | | if ( xsize > msize ) |
271 | | { |
272 | | _gcry_mpih_divrem(xp + msize, 0, xp, xsize, mp, msize); |
273 | | xsize = msize; |
274 | | } |
275 | | } |
276 | | |
277 | | w.d = rp; |
278 | | u.d = xp; |
279 | | mpi_set_cond (&w, &u, ((mpi_limb_signed_t)e < 0)); |
280 | | |
281 | | e <<= 1; |
282 | | c--; |
283 | | } |
284 | | |
285 | | i--; |
286 | | if ( i < 0 ) |
287 | | break; |
288 | | e = ep[i]; |
289 | | c = BITS_PER_MPI_LIMB; |
290 | | } |
291 | | |
292 | | /* We shifted MOD, the modulo reduction argument, left |
293 | | MOD_SHIFT_CNT steps. Adjust the result by reducing it with the |
294 | | original MOD. |
295 | | |
296 | | Also make sure the result is put in RES->d (where it already |
297 | | might be, see above). */ |
298 | | if ( mod_shift_cnt ) |
299 | | { |
300 | | carry_limb = _gcry_mpih_lshift( res->d, rp, rsize, mod_shift_cnt); |
301 | | rp = res->d; |
302 | | if ( carry_limb ) |
303 | | { |
304 | | rp[rsize] = carry_limb; |
305 | | rsize++; |
306 | | } |
307 | | } |
308 | | else if (res->d != rp) |
309 | | { |
310 | | MPN_COPY (res->d, rp, rsize); |
311 | | rp = res->d; |
312 | | } |
313 | | |
314 | | if ( rsize >= msize ) |
315 | | { |
316 | | _gcry_mpih_divrem(rp + msize, 0, rp, rsize, mp, msize); |
317 | | rsize = msize; |
318 | | } |
319 | | |
320 | | /* Remove any leading zero words from the result. */ |
321 | | if ( mod_shift_cnt ) |
322 | | _gcry_mpih_rshift( rp, rp, rsize, mod_shift_cnt); |
323 | | MPN_NORMALIZE (rp, rsize); |
324 | | |
325 | | _gcry_mpih_release_karatsuba_ctx (&karactx ); |
326 | | } |
327 | | |
328 | | /* Fixup for negative results. */ |
329 | | if ( negative_result && rsize ) |
330 | | { |
331 | | if ( mod_shift_cnt ) |
332 | | _gcry_mpih_rshift( mp, mp, msize, mod_shift_cnt); |
333 | | _gcry_mpih_sub( rp, mp, msize, rp, rsize); |
334 | | rsize = msize; |
335 | | rsign = msign; |
336 | | MPN_NORMALIZE(rp, rsize); |
337 | | } |
338 | | gcry_assert (res->d == rp); |
339 | | res->nlimbs = rsize; |
340 | | res->sign = rsign; |
341 | | |
342 | | leave: |
343 | | if (mp_marker) |
344 | | _gcry_mpi_free_limb_space( mp_marker, mp_nlimbs ); |
345 | | if (bp_marker) |
346 | | _gcry_mpi_free_limb_space( bp_marker, bp_nlimbs ); |
347 | | if (ep_marker) |
348 | | _gcry_mpi_free_limb_space( ep_marker, ep_nlimbs ); |
349 | | if (xp_marker) |
350 | | _gcry_mpi_free_limb_space( xp_marker, xp_nlimbs ); |
351 | | if (tspace) |
352 | | _gcry_mpi_free_limb_space( tspace, 0 ); |
353 | | } |
354 | | #else |
355 | | /** |
356 | | * Internal function to compute |
357 | | * |
358 | | * X = R * S mod M |
359 | | * |
360 | | * and set the size of X at the pointer XSIZE_P. |
361 | | * Use karatsuba structure at KARACTX_P. |
362 | | * |
363 | | * Condition: |
364 | | * RSIZE >= SSIZE |
365 | | * Enough space for X is allocated beforehand. |
366 | | * |
367 | | * For generic cases, we can/should use gcry_mpi_mulm. |
368 | | * This function is use for specific internal case. |
369 | | */ |
370 | | static void |
371 | | mul_mod (mpi_ptr_t xp, mpi_size_t *xsize_p, |
372 | | mpi_ptr_t rp, mpi_size_t rsize, |
373 | | mpi_ptr_t sp, mpi_size_t ssize, |
374 | | mpi_ptr_t mp, mpi_size_t msize, |
375 | | struct karatsuba_ctx *karactx_p) |
376 | 4.58M | { |
377 | 4.58M | if( ssize < KARATSUBA_THRESHOLD ) |
378 | 4.30M | _gcry_mpih_mul ( xp, rp, rsize, sp, ssize ); |
379 | 280k | else |
380 | 280k | _gcry_mpih_mul_karatsuba_case (xp, rp, rsize, sp, ssize, karactx_p); |
381 | | |
382 | 4.58M | if (rsize + ssize > msize) |
383 | 3.89M | { |
384 | 3.89M | _gcry_mpih_divrem (xp + msize, 0, xp, rsize + ssize, mp, msize); |
385 | 3.89M | *xsize_p = msize; |
386 | 3.89M | } |
387 | 694k | else |
388 | 694k | *xsize_p = rsize + ssize; |
389 | 4.58M | } |
390 | | |
391 | | #define SIZE_PRECOMP ((1 << (5 - 1))) |
392 | | |
393 | | /**************** |
394 | | * RES = BASE ^ EXPO mod MOD |
395 | | * |
396 | | * To mitigate the Yarom/Falkner flush+reload cache side-channel |
397 | | * attack on the RSA secret exponent, we don't use the square |
398 | | * routine but multiplication. |
399 | | * |
400 | | * Reference: |
401 | | * Handbook of Applied Cryptography |
402 | | * Algorithm 14.83: Modified left-to-right k-ary exponentiation |
403 | | */ |
404 | | void |
405 | | _gcry_mpi_powm (gcry_mpi_t res, |
406 | | gcry_mpi_t base, gcry_mpi_t expo, gcry_mpi_t mod) |
407 | 1.36M | { |
408 | | /* Pointer to the limbs of the arguments, their size and signs. */ |
409 | 1.36M | mpi_ptr_t rp, ep, mp, bp; |
410 | 1.36M | mpi_size_t esize, msize, bsize, rsize; |
411 | 1.36M | int msign, bsign, rsign; |
412 | | /* Flags telling the secure allocation status of the arguments. */ |
413 | 1.36M | int esec, msec, bsec; |
414 | | /* Size of the result including space for temporary values. */ |
415 | 1.36M | mpi_size_t size; |
416 | | /* Helper. */ |
417 | 1.36M | int mod_shift_cnt; |
418 | 1.36M | int negative_result; |
419 | 1.36M | mpi_ptr_t mp_marker = NULL; |
420 | 1.36M | mpi_ptr_t bp_marker = NULL; |
421 | 1.36M | mpi_ptr_t ep_marker = NULL; |
422 | 1.36M | mpi_ptr_t xp_marker = NULL; |
423 | 1.36M | unsigned int mp_nlimbs = 0; |
424 | 1.36M | unsigned int bp_nlimbs = 0; |
425 | 1.36M | unsigned int ep_nlimbs = 0; |
426 | 1.36M | unsigned int xp_nlimbs = 0; |
427 | 1.36M | mpi_ptr_t precomp[SIZE_PRECOMP]; /* Pre-computed array: BASE^1, ^3, ^5, ... */ |
428 | 1.36M | mpi_size_t precomp_size[SIZE_PRECOMP]; |
429 | 1.36M | mpi_size_t W; |
430 | 1.36M | mpi_ptr_t base_u; |
431 | 1.36M | mpi_size_t base_u_size; |
432 | 1.36M | mpi_size_t max_u_size; |
433 | | |
434 | 1.36M | esize = expo->nlimbs; |
435 | 1.36M | msize = mod->nlimbs; |
436 | 1.36M | size = 2 * msize; |
437 | 1.36M | msign = mod->sign; |
438 | | |
439 | 1.36M | ep = expo->d; |
440 | 1.36M | MPN_NORMALIZE(ep, esize); |
441 | | |
442 | 1.36M | if (esize * BITS_PER_MPI_LIMB > 512) |
443 | 176 | W = 5; |
444 | 1.36M | else if (esize * BITS_PER_MPI_LIMB > 256) |
445 | 18 | W = 4; |
446 | 1.36M | else if (esize * BITS_PER_MPI_LIMB > 128) |
447 | 4.60k | W = 3; |
448 | 1.36M | else if (esize * BITS_PER_MPI_LIMB > 64) |
449 | 10 | W = 2; |
450 | 1.36M | else |
451 | 1.36M | W = 1; |
452 | | |
453 | 1.36M | esec = mpi_is_secure(expo); |
454 | 1.36M | msec = mpi_is_secure(mod); |
455 | 1.36M | bsec = mpi_is_secure(base); |
456 | | |
457 | 1.36M | rp = res->d; |
458 | | |
459 | 1.36M | if (!msize) |
460 | 0 | _gcry_divide_by_zero(); |
461 | | |
462 | 1.36M | if (!esize) |
463 | 7 | { |
464 | | /* Exponent is zero, result is 1 mod MOD, i.e., 1 or 0 depending |
465 | | on if MOD equals 1. */ |
466 | 7 | res->nlimbs = (msize == 1 && mod->d[0] == 1) ? 0 : 1; |
467 | 7 | if (res->nlimbs) |
468 | 7 | { |
469 | 7 | RESIZE_IF_NEEDED (res, 1); |
470 | 7 | rp = res->d; |
471 | 7 | rp[0] = 1; |
472 | 7 | } |
473 | 7 | res->sign = 0; |
474 | 7 | goto leave; |
475 | 7 | } |
476 | | |
477 | | /* Normalize MOD (i.e. make its most significant bit set) as |
478 | | required by mpn_divrem. This will make the intermediate values |
479 | | in the calculation slightly larger, but the correct result is |
480 | | obtained after a final reduction using the original MOD value. */ |
481 | 1.36M | mp_nlimbs = msec? msize:0; |
482 | 1.36M | mp = mp_marker = mpi_alloc_limb_space(msize, msec); |
483 | 1.36M | count_leading_zeros (mod_shift_cnt, mod->d[msize-1]); |
484 | 1.36M | if (mod_shift_cnt) |
485 | 248k | _gcry_mpih_lshift (mp, mod->d, msize, mod_shift_cnt); |
486 | 1.12M | else |
487 | 1.12M | MPN_COPY( mp, mod->d, msize ); |
488 | | |
489 | 1.36M | bsize = base->nlimbs; |
490 | 1.36M | bsign = base->sign; |
491 | 1.36M | if (bsize > msize) |
492 | 306 | { |
493 | | /* The base is larger than the module. Reduce it. |
494 | | |
495 | | Allocate (BSIZE + 1) with space for remainder and quotient. |
496 | | (The quotient is (bsize - msize + 1) limbs.) */ |
497 | 306 | bp_nlimbs = bsec ? (bsize + 1):0; |
498 | 306 | bp = bp_marker = mpi_alloc_limb_space( bsize + 1, bsec ); |
499 | 306 | MPN_COPY ( bp, base->d, bsize ); |
500 | | /* We don't care about the quotient, store it above the |
501 | | * remainder, at BP + MSIZE. */ |
502 | 306 | _gcry_mpih_divrem( bp + msize, 0, bp, bsize, mp, msize ); |
503 | 306 | bsize = msize; |
504 | | /* Canonicalize the base, since we are going to multiply with it |
505 | | quite a few times. */ |
506 | 306 | MPN_NORMALIZE( bp, bsize ); |
507 | 306 | } |
508 | 1.36M | else |
509 | 1.36M | bp = base->d; |
510 | | |
511 | 1.36M | if (!bsize) |
512 | 189 | { |
513 | 189 | res->nlimbs = 0; |
514 | 189 | res->sign = 0; |
515 | 189 | goto leave; |
516 | 189 | } |
517 | | |
518 | | |
519 | | /* Make BASE, EXPO not overlap with RES. We don't need to check MOD |
520 | | because that has already been copied to the MP var. */ |
521 | 1.36M | if ( rp == bp ) |
522 | 4.59k | { |
523 | | /* RES and BASE are identical. Allocate temp. space for BASE. */ |
524 | 4.59k | gcry_assert (!bp_marker); |
525 | 4.59k | bp_nlimbs = bsec? bsize:0; |
526 | 4.59k | bp = bp_marker = mpi_alloc_limb_space( bsize, bsec ); |
527 | 4.59k | MPN_COPY(bp, rp, bsize); |
528 | 4.59k | } |
529 | 1.36M | if ( rp == ep ) |
530 | 0 | { |
531 | | /* RES and EXPO are identical. Allocate temp. space for EXPO. */ |
532 | 0 | ep_nlimbs = esec? esize:0; |
533 | 0 | ep = ep_marker = mpi_alloc_limb_space( esize, esec ); |
534 | 0 | MPN_COPY(ep, rp, esize); |
535 | 0 | } |
536 | | |
537 | | /* Copy base to the result. */ |
538 | 1.36M | if (res->alloced < size) |
539 | 13.8k | { |
540 | 13.8k | mpi_resize (res, size); |
541 | 13.8k | rp = res->d; |
542 | 13.8k | } |
543 | | |
544 | | /* Main processing. */ |
545 | 1.36M | { |
546 | 1.36M | mpi_size_t i, j, k; |
547 | 1.36M | mpi_ptr_t xp; |
548 | 1.36M | mpi_size_t xsize = 0; |
549 | 1.36M | int c; |
550 | 1.36M | mpi_limb_t e; |
551 | 1.36M | mpi_limb_t carry_limb; |
552 | 1.36M | struct karatsuba_ctx karactx; |
553 | 1.36M | mpi_ptr_t tp; |
554 | | |
555 | 1.36M | xp_nlimbs = msec? size:0; |
556 | 1.36M | xp = xp_marker = mpi_alloc_limb_space( size, msec ); |
557 | | |
558 | 1.36M | memset( &karactx, 0, sizeof karactx ); |
559 | 1.36M | negative_result = (ep[0] & 1) && bsign; |
560 | | |
561 | | /* Precompute PRECOMP[], BASE^(2 * i + 1), BASE^1, ^3, ^5, ... */ |
562 | 1.36M | if (W > 1) /* X := BASE^2 */ |
563 | 4.80k | mul_mod (xp, &xsize, bp, bsize, bp, bsize, mp, msize, &karactx); |
564 | 1.36M | base_u = precomp[0] = mpi_alloc_limb_space (bsize, esec); |
565 | 1.36M | base_u_size = max_u_size = precomp_size[0] = bsize; |
566 | 1.36M | MPN_COPY (precomp[0], bp, bsize); |
567 | 1.38M | for (i = 1; i < (1 << (W - 1)); i++) |
568 | 16.5k | { /* PRECOMP[i] = BASE^(2 * i + 1) */ |
569 | 16.5k | if (xsize >= base_u_size) |
570 | 13.9k | mul_mod (rp, &rsize, xp, xsize, base_u, base_u_size, |
571 | 13.9k | mp, msize, &karactx); |
572 | 2.59k | else |
573 | 2.59k | mul_mod (rp, &rsize, base_u, base_u_size, xp, xsize, |
574 | 2.59k | mp, msize, &karactx); |
575 | 16.5k | base_u = precomp[i] = mpi_alloc_limb_space (rsize, esec); |
576 | 16.5k | base_u_size = precomp_size[i] = rsize; |
577 | 16.5k | if (max_u_size < base_u_size) |
578 | 1.14k | max_u_size = base_u_size; |
579 | 16.5k | MPN_COPY (precomp[i], rp, rsize); |
580 | 16.5k | } |
581 | | |
582 | 1.36M | if (msize > max_u_size) |
583 | 346k | max_u_size = msize; |
584 | 1.36M | base_u = mpi_alloc_limb_space (max_u_size, esec); |
585 | 1.36M | MPN_ZERO (base_u, max_u_size); |
586 | | |
587 | 1.36M | i = esize - 1; |
588 | | |
589 | | /* Main loop. |
590 | | |
591 | | Make the result be pointed to alternately by XP and RP. This |
592 | | helps us avoid block copying, which would otherwise be |
593 | | necessary with the overlap restrictions of |
594 | | _gcry_mpih_divmod. With 50% probability the result after this |
595 | | loop will be in the area originally pointed by RP (==RES->d), |
596 | | and with 50% probability in the area originally pointed to by XP. */ |
597 | 1.36M | rsign = 0; |
598 | 1.36M | if (W == 1) |
599 | 1.36M | { |
600 | 1.36M | rsize = bsize; |
601 | 1.36M | } |
602 | 4.80k | else |
603 | 4.80k | { |
604 | 4.80k | rsize = msize; |
605 | 4.80k | MPN_ZERO (rp, rsize); |
606 | 4.80k | } |
607 | 1.36M | MPN_COPY ( rp, bp, bsize ); |
608 | | |
609 | 1.36M | e = ep[i]; |
610 | 1.36M | count_leading_zeros (c, e); |
611 | 1.36M | e = (e << c) << 1; |
612 | 1.36M | c = BITS_PER_MPI_LIMB - 1 - c; |
613 | | |
614 | 1.36M | j = 0; |
615 | | |
616 | 1.36M | for (;;) |
617 | 2.84M | if (e == 0) |
618 | 1.37M | { |
619 | 1.37M | j += c; |
620 | 1.37M | if ( --i < 0 ) |
621 | 1.36M | break; |
622 | | |
623 | 6.59k | e = ep[i]; |
624 | 6.59k | c = BITS_PER_MPI_LIMB; |
625 | 6.59k | } |
626 | 1.47M | else |
627 | 1.47M | { |
628 | 1.47M | int c0; |
629 | 1.47M | mpi_limb_t e0; |
630 | 1.47M | struct gcry_mpi w, u; |
631 | 1.47M | w.sign = u.sign = 0; |
632 | 1.47M | w.flags = u.flags = 0; |
633 | 1.47M | w.d = base_u; |
634 | | |
635 | 1.47M | count_leading_zeros (c0, e); |
636 | 1.47M | e = (e << c0); |
637 | 1.47M | c -= c0; |
638 | 1.47M | j += c0; |
639 | | |
640 | 1.47M | e0 = (e >> (BITS_PER_MPI_LIMB - W)); |
641 | 1.47M | if (c >= W) |
642 | 1.45M | c0 = 0; |
643 | 15.5k | else |
644 | 15.5k | { |
645 | 15.5k | if ( --i < 0 ) |
646 | 4.64k | { |
647 | 4.64k | e0 = (e >> (BITS_PER_MPI_LIMB - c)); |
648 | 4.64k | j += c - W; |
649 | 4.64k | goto last_step; |
650 | 4.64k | } |
651 | 10.9k | else |
652 | 10.9k | { |
653 | 10.9k | c0 = c; |
654 | 10.9k | e = ep[i]; |
655 | 10.9k | c = BITS_PER_MPI_LIMB; |
656 | 10.9k | e0 |= (e >> (BITS_PER_MPI_LIMB - (W - c0))); |
657 | 10.9k | } |
658 | 15.5k | } |
659 | | |
660 | 1.46M | e = e << (W - c0); |
661 | 1.46M | c -= (W - c0); |
662 | | |
663 | 1.47M | last_step: |
664 | 1.47M | count_trailing_zeros (c0, e0); |
665 | 1.47M | e0 = (e0 >> c0) >> 1; |
666 | | |
667 | 5.40M | for (j += W - c0; j >= 0; j--) |
668 | 3.93M | { |
669 | | |
670 | | /* |
671 | | * base_u <= precomp[e0] |
672 | | * base_u_size <= precomp_size[e0] |
673 | | */ |
674 | 3.93M | base_u_size = 0; |
675 | 16.4M | for (k = 0; k < (1<< (W - 1)); k++) |
676 | 12.5M | { |
677 | 12.5M | w.alloced = w.nlimbs = precomp_size[k]; |
678 | 12.5M | u.alloced = u.nlimbs = precomp_size[k]; |
679 | 12.5M | u.d = precomp[k]; |
680 | | |
681 | 12.5M | mpi_set_cond (&w, &u, k == e0); |
682 | 12.5M | base_u_size |= ( precomp_size[k] & (0UL - (k == e0)) ); |
683 | 12.5M | } |
684 | | |
685 | 3.93M | w.alloced = w.nlimbs = rsize; |
686 | 3.93M | u.alloced = u.nlimbs = rsize; |
687 | 3.93M | u.d = rp; |
688 | 3.93M | mpi_set_cond (&w, &u, j != 0); |
689 | 3.93M | base_u_size ^= ((base_u_size ^ rsize) & (0UL - (j != 0))); |
690 | | |
691 | 3.93M | mul_mod (xp, &xsize, rp, rsize, base_u, base_u_size, |
692 | 3.93M | mp, msize, &karactx); |
693 | 3.93M | tp = rp; rp = xp; xp = tp; |
694 | 3.93M | rsize = xsize; |
695 | 3.93M | } |
696 | | |
697 | 1.47M | j = c0; |
698 | 1.47M | if ( i < 0 ) |
699 | 4.64k | break; |
700 | 1.47M | } |
701 | | |
702 | 1.99M | while (j--) |
703 | 629k | { |
704 | 629k | mul_mod (xp, &xsize, rp, rsize, rp, rsize, mp, msize, &karactx); |
705 | 629k | tp = rp; rp = xp; xp = tp; |
706 | 629k | rsize = xsize; |
707 | 629k | } |
708 | | |
709 | | /* We shifted MOD, the modulo reduction argument, left |
710 | | MOD_SHIFT_CNT steps. Adjust the result by reducing it with the |
711 | | original MOD. |
712 | | |
713 | | Also make sure the result is put in RES->d (where it already |
714 | | might be, see above). */ |
715 | 1.36M | if ( mod_shift_cnt ) |
716 | 248k | { |
717 | 248k | carry_limb = _gcry_mpih_lshift( res->d, rp, rsize, mod_shift_cnt); |
718 | 248k | rp = res->d; |
719 | 248k | if ( carry_limb ) |
720 | 166k | { |
721 | 166k | rp[rsize] = carry_limb; |
722 | 166k | rsize++; |
723 | 166k | } |
724 | 248k | } |
725 | 1.12M | else if (res->d != rp) |
726 | 1.14k | { |
727 | 1.14k | MPN_COPY (res->d, rp, rsize); |
728 | 1.14k | rp = res->d; |
729 | 1.14k | } |
730 | | |
731 | 1.36M | if ( rsize >= msize ) |
732 | 1.02M | { |
733 | 1.02M | _gcry_mpih_divrem(rp + msize, 0, rp, rsize, mp, msize); |
734 | 1.02M | rsize = msize; |
735 | 1.02M | } |
736 | | |
737 | | /* Remove any leading zero words from the result. */ |
738 | 1.36M | if ( mod_shift_cnt ) |
739 | 248k | _gcry_mpih_rshift( rp, rp, rsize, mod_shift_cnt); |
740 | 1.36M | MPN_NORMALIZE (rp, rsize); |
741 | | |
742 | 1.36M | _gcry_mpih_release_karatsuba_ctx (&karactx ); |
743 | 2.75M | for (i = 0; i < (1 << (W - 1)); i++) |
744 | 1.38M | _gcry_mpi_free_limb_space( precomp[i], esec ? precomp_size[i] : 0 ); |
745 | 1.36M | _gcry_mpi_free_limb_space (base_u, esec ? max_u_size : 0); |
746 | 1.36M | } |
747 | | |
748 | | /* Fixup for negative results. */ |
749 | 1.36M | if ( negative_result && rsize ) |
750 | 0 | { |
751 | 0 | if ( mod_shift_cnt ) |
752 | 0 | _gcry_mpih_rshift( mp, mp, msize, mod_shift_cnt); |
753 | 0 | _gcry_mpih_sub( rp, mp, msize, rp, rsize); |
754 | 0 | rsize = msize; |
755 | 0 | rsign = msign; |
756 | 0 | MPN_NORMALIZE(rp, rsize); |
757 | 0 | } |
758 | 1.36M | gcry_assert (res->d == rp); |
759 | 0 | res->nlimbs = rsize; |
760 | 1.36M | res->sign = rsign; |
761 | | |
762 | 1.36M | leave: |
763 | 1.36M | if (mp_marker) |
764 | 1.36M | _gcry_mpi_free_limb_space( mp_marker, mp_nlimbs ); |
765 | 1.36M | if (bp_marker) |
766 | 4.89k | _gcry_mpi_free_limb_space( bp_marker, bp_nlimbs ); |
767 | 1.36M | if (ep_marker) |
768 | 0 | _gcry_mpi_free_limb_space( ep_marker, ep_nlimbs ); |
769 | 1.36M | if (xp_marker) |
770 | 1.36M | _gcry_mpi_free_limb_space( xp_marker, xp_nlimbs ); |
771 | 1.36M | } |
772 | | #endif |