Coverage Report

Created: 2024-11-21 07:03

/src/libgcrypt/mpi/mpi-pow.c
Line
Count
Source (jump to first uncovered line)
1
/* mpi-pow.c  -  MPI functions for exponentiation
2
 * Copyright (C) 1994, 1996, 1998, 2000, 2002
3
 *               2003  Free Software Foundation, Inc.
4
 *               2013  g10 Code GmbH
5
 *
6
 * This file is part of Libgcrypt.
7
 *
8
 * Libgcrypt is free software; you can redistribute it and/or modify
9
 * it under the terms of the GNU Lesser General Public License as
10
 * published by the Free Software Foundation; either version 2.1 of
11
 * the License, or (at your option) any later version.
12
 *
13
 * Libgcrypt is distributed in the hope that it will be useful,
14
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16
 * GNU Lesser General Public License for more details.
17
 *
18
 * You should have received a copy of the GNU Lesser General Public
19
 * License along with this program; if not, see <http://www.gnu.org/licenses/>.
20
 *
21
 * Note: This code is heavily based on the GNU MP Library.
22
 *   Actually it's the same code with only minor changes in the
23
 *   way the data is stored; this is to support the abstraction
24
 *   of an optional secure memory allocation which may be used
25
 *   to avoid revealing of sensitive data due to paging etc.
26
 */
27
28
#include <config.h>
29
#include <stdio.h>
30
#include <stdlib.h>
31
#include <string.h>
32
33
#include "mpi-internal.h"
34
#include "longlong.h"
35
36
37
/*
38
 * When you need old implementation, please add compilation option
39
 * -DUSE_ALGORITHM_SIMPLE_EXPONENTIATION
40
 * or expose this line:
41
#define USE_ALGORITHM_SIMPLE_EXPONENTIATION 1
42
 */
43
44
#if defined(USE_ALGORITHM_SIMPLE_EXPONENTIATION)
45
/****************
46
 * RES = BASE ^ EXPO mod MOD
47
 */
48
void
49
_gcry_mpi_powm (gcry_mpi_t res,
50
                gcry_mpi_t base, gcry_mpi_t expo, gcry_mpi_t mod)
51
{
52
  /* Pointer to the limbs of the arguments, their size and signs. */
53
  mpi_ptr_t  rp, ep, mp, bp;
54
  mpi_size_t esize, msize, bsize, rsize;
55
  int               msign, bsign, rsign;
56
  /* Flags telling the secure allocation status of the arguments.  */
57
  int        esec,  msec,  bsec;
58
  /* Size of the result including space for temporary values.  */
59
  mpi_size_t size;
60
  /* Helper.  */
61
  int mod_shift_cnt;
62
  int negative_result;
63
  mpi_ptr_t mp_marker = NULL;
64
  mpi_ptr_t bp_marker = NULL;
65
  mpi_ptr_t ep_marker = NULL;
66
  mpi_ptr_t xp_marker = NULL;
67
  unsigned int mp_nlimbs = 0;
68
  unsigned int bp_nlimbs = 0;
69
  unsigned int ep_nlimbs = 0;
70
  unsigned int xp_nlimbs = 0;
71
  mpi_ptr_t tspace = NULL;
72
  mpi_size_t tsize = 0;
73
74
75
  esize = expo->nlimbs;
76
  msize = mod->nlimbs;
77
  size = 2 * msize;
78
  msign = mod->sign;
79
80
  esec = mpi_is_secure(expo);
81
  msec = mpi_is_secure(mod);
82
  bsec = mpi_is_secure(base);
83
84
  rp = res->d;
85
  ep = expo->d;
86
  MPN_NORMALIZE(ep, esize);
87
88
  if (!msize)
89
    _gcry_divide_by_zero();
90
91
  if (!esize)
92
    {
93
      /* Exponent is zero, result is 1 mod MOD, i.e., 1 or 0 depending
94
         on if MOD equals 1.  */
95
      res->nlimbs = (msize == 1 && mod->d[0] == 1) ? 0 : 1;
96
      if (res->nlimbs)
97
        {
98
          RESIZE_IF_NEEDED (res, 1);
99
          rp = res->d;
100
          rp[0] = 1;
101
        }
102
      res->sign = 0;
103
      goto leave;
104
    }
105
106
  /* Normalize MOD (i.e. make its most significant bit set) as
107
     required by mpn_divrem.  This will make the intermediate values
108
     in the calculation slightly larger, but the correct result is
109
     obtained after a final reduction using the original MOD value. */
110
  mp_nlimbs = msec? msize:0;
111
  mp = mp_marker = mpi_alloc_limb_space(msize, msec);
112
  count_leading_zeros (mod_shift_cnt, mod->d[msize-1]);
113
  if (mod_shift_cnt)
114
    _gcry_mpih_lshift (mp, mod->d, msize, mod_shift_cnt);
115
  else
116
    MPN_COPY( mp, mod->d, msize );
117
118
  bsize = base->nlimbs;
119
  bsign = base->sign;
120
  if (bsize > msize)
121
    {
122
      /* The base is larger than the module.  Reduce it.
123
124
         Allocate (BSIZE + 1) with space for remainder and quotient.
125
         (The quotient is (bsize - msize + 1) limbs.)  */
126
      bp_nlimbs = bsec ? (bsize + 1):0;
127
      bp = bp_marker = mpi_alloc_limb_space( bsize + 1, bsec );
128
      MPN_COPY ( bp, base->d, bsize );
129
      /* We don't care about the quotient, store it above the
130
       * remainder, at BP + MSIZE.  */
131
      _gcry_mpih_divrem( bp + msize, 0, bp, bsize, mp, msize );
132
      bsize = msize;
133
      /* Canonicalize the base, since we are going to multiply with it
134
   quite a few times.  */
135
      MPN_NORMALIZE( bp, bsize );
136
    }
137
  else
138
    bp = base->d;
139
140
  if (!bsize)
141
    {
142
      res->nlimbs = 0;
143
      res->sign = 0;
144
      goto leave;
145
    }
146
147
148
  /* Make BASE, EXPO and MOD not overlap with RES.  */
149
  if ( rp == bp )
150
    {
151
      /* RES and BASE are identical.  Allocate temp. space for BASE.  */
152
      gcry_assert (!bp_marker);
153
      bp_nlimbs = bsec? bsize:0;
154
      bp = bp_marker = mpi_alloc_limb_space( bsize, bsec );
155
      MPN_COPY(bp, rp, bsize);
156
    }
157
  if ( rp == ep )
158
    {
159
      /* RES and EXPO are identical.  Allocate temp. space for EXPO.  */
160
      ep_nlimbs = esec? esize:0;
161
      ep = ep_marker = mpi_alloc_limb_space( esize, esec );
162
      MPN_COPY(ep, rp, esize);
163
    }
164
  if ( rp == mp )
165
    {
166
      /* RES and MOD are identical.  Allocate temporary space for MOD.*/
167
      gcry_assert (!mp_marker);
168
      mp_nlimbs = msec?msize:0;
169
      mp = mp_marker = mpi_alloc_limb_space( msize, msec );
170
      MPN_COPY(mp, rp, msize);
171
    }
172
173
  /* Copy base to the result.  */
174
  if (res->alloced < size)
175
    {
176
      mpi_resize (res, size);
177
      rp = res->d;
178
    }
179
  MPN_COPY ( rp, bp, bsize );
180
  rsize = bsize;
181
  rsign = 0;
182
183
  /* Main processing.  */
184
  {
185
    mpi_size_t i;
186
    mpi_ptr_t xp;
187
    int c;
188
    mpi_limb_t e;
189
    mpi_limb_t carry_limb;
190
    struct karatsuba_ctx karactx;
191
    struct gcry_mpi w, u;
192
193
    xp_nlimbs = msec? size:0;
194
    xp = xp_marker = mpi_alloc_limb_space( size, msec );
195
196
    w.sign = u.sign = 0;
197
    w.flags = u.flags = 0;
198
    w.alloced = w.nlimbs = size; /* RES->alloc may be longer.  */
199
    u.alloced = u.nlimbs = size;
200
201
    memset( &karactx, 0, sizeof karactx );
202
    negative_result = (ep[0] & 1) && bsign;
203
204
    i = esize - 1;
205
    e = ep[i];
206
    count_leading_zeros (c, e);
207
    e = (e << c) << 1;     /* Shift the expo bits to the left, lose msb.  */
208
    c = BITS_PER_MPI_LIMB - 1 - c;
209
210
    /* Main loop.
211
212
       Make the result be pointed to alternately by XP and RP.  This
213
       helps us avoid block copying, which would otherwise be
214
       necessary with the overlap restrictions of
215
       _gcry_mpih_divmod. With 50% probability the result after this
216
       loop will be in the area originally pointed by RP (==RES->d),
217
       and with 50% probability in the area originally pointed to by XP. */
218
    for (;;)
219
      {
220
        while (c)
221
          {
222
            mpi_ptr_t tp;
223
            mpi_size_t xsize;
224
225
            /*mpih_mul_n(xp, rp, rp, rsize);*/
226
            if ( rsize < KARATSUBA_THRESHOLD )
227
              _gcry_mpih_sqr_n_basecase( xp, rp, rsize );
228
            else
229
              {
230
                if ( !tspace )
231
                  {
232
                    tsize = 2 * rsize;
233
                    tspace = mpi_alloc_limb_space( tsize, 0 );
234
                  }
235
                else if ( tsize < (2*rsize) )
236
                  {
237
                    _gcry_mpi_free_limb_space (tspace, 0);
238
                    tsize = 2 * rsize;
239
                    tspace = mpi_alloc_limb_space (tsize, 0 );
240
                  }
241
                _gcry_mpih_sqr_n (xp, rp, rsize, tspace);
242
              }
243
244
            xsize = 2 * rsize;
245
            if ( xsize > msize )
246
              {
247
                _gcry_mpih_divrem(xp + msize, 0, xp, xsize, mp, msize);
248
                xsize = msize;
249
              }
250
251
            tp = rp; rp = xp; xp = tp;
252
            rsize = xsize;
253
254
            /* To mitigate the Yarom/Falkner flush+reload cache
255
             * side-channel attack on the RSA secret exponent, we do
256
             * the multiplication regardless of the value of the
257
             * high-bit of E.  But to avoid this performance penalty
258
             * we do it only if the exponent has been stored in secure
259
             * memory and we can thus assume it is a secret exponent.  */
260
            if (esec || (mpi_limb_signed_t)e < 0)
261
              {
262
                /*mpih_mul( xp, rp, rsize, bp, bsize );*/
263
                if( bsize < KARATSUBA_THRESHOLD )
264
                  _gcry_mpih_mul ( xp, rp, rsize, bp, bsize );
265
                else
266
                  _gcry_mpih_mul_karatsuba_case (xp, rp, rsize, bp, bsize,
267
                                                 &karactx);
268
269
                xsize = rsize + bsize;
270
                if ( xsize > msize )
271
                  {
272
                    _gcry_mpih_divrem(xp + msize, 0, xp, xsize, mp, msize);
273
                    xsize = msize;
274
                  }
275
              }
276
277
            w.d = rp;
278
            u.d = xp;
279
            mpi_set_cond (&w, &u, ((mpi_limb_signed_t)e < 0));
280
281
            e <<= 1;
282
            c--;
283
          }
284
285
        i--;
286
        if ( i < 0 )
287
          break;
288
        e = ep[i];
289
        c = BITS_PER_MPI_LIMB;
290
      }
291
292
    /* We shifted MOD, the modulo reduction argument, left
293
       MOD_SHIFT_CNT steps.  Adjust the result by reducing it with the
294
       original MOD.
295
296
       Also make sure the result is put in RES->d (where it already
297
       might be, see above).  */
298
    if ( mod_shift_cnt )
299
      {
300
        carry_limb = _gcry_mpih_lshift( res->d, rp, rsize, mod_shift_cnt);
301
        rp = res->d;
302
        if ( carry_limb )
303
          {
304
            rp[rsize] = carry_limb;
305
            rsize++;
306
          }
307
      }
308
    else if (res->d != rp)
309
      {
310
        MPN_COPY (res->d, rp, rsize);
311
        rp = res->d;
312
      }
313
314
    if ( rsize >= msize )
315
      {
316
        _gcry_mpih_divrem(rp + msize, 0, rp, rsize, mp, msize);
317
        rsize = msize;
318
      }
319
320
    /* Remove any leading zero words from the result.  */
321
    if ( mod_shift_cnt )
322
      _gcry_mpih_rshift( rp, rp, rsize, mod_shift_cnt);
323
    MPN_NORMALIZE (rp, rsize);
324
325
    _gcry_mpih_release_karatsuba_ctx (&karactx );
326
  }
327
328
  /* Fixup for negative results.  */
329
  if ( negative_result && rsize )
330
    {
331
      if ( mod_shift_cnt )
332
        _gcry_mpih_rshift( mp, mp, msize, mod_shift_cnt);
333
      _gcry_mpih_sub( rp, mp, msize, rp, rsize);
334
      rsize = msize;
335
      rsign = msign;
336
      MPN_NORMALIZE(rp, rsize);
337
    }
338
  gcry_assert (res->d == rp);
339
  res->nlimbs = rsize;
340
  res->sign = rsign;
341
342
 leave:
343
  if (mp_marker)
344
    _gcry_mpi_free_limb_space( mp_marker, mp_nlimbs );
345
  if (bp_marker)
346
    _gcry_mpi_free_limb_space( bp_marker, bp_nlimbs );
347
  if (ep_marker)
348
    _gcry_mpi_free_limb_space( ep_marker, ep_nlimbs );
349
  if (xp_marker)
350
    _gcry_mpi_free_limb_space( xp_marker, xp_nlimbs );
351
  if (tspace)
352
    _gcry_mpi_free_limb_space( tspace, 0 );
353
}
354
#else
355
/**
356
 * Internal function to compute
357
 *
358
 *    X = R * S mod M
359
 *
360
 * and set the size of X at the pointer XSIZE_P.
361
 * Use karatsuba structure at KARACTX_P.
362
 *
363
 * Condition:
364
 *   RSIZE >= SSIZE
365
 *   Enough space for X is allocated beforehand.
366
 *
367
 * For generic cases, we can/should use gcry_mpi_mulm.
368
 * This function is use for specific internal case.
369
 */
370
static void
371
mul_mod (mpi_ptr_t xp, mpi_size_t *xsize_p,
372
         mpi_ptr_t rp, mpi_size_t rsize,
373
         mpi_ptr_t sp, mpi_size_t ssize,
374
         mpi_ptr_t mp, mpi_size_t msize,
375
         struct karatsuba_ctx *karactx_p)
376
7.07M
{
377
7.07M
  if( ssize < KARATSUBA_THRESHOLD )
378
4.12M
    _gcry_mpih_mul ( xp, rp, rsize, sp, ssize );
379
2.94M
  else
380
2.94M
    _gcry_mpih_mul_karatsuba_case (xp, rp, rsize, sp, ssize, karactx_p);
381
382
7.07M
   if (rsize + ssize > msize)
383
6.23M
    {
384
6.23M
      _gcry_mpih_divrem (xp + msize, 0, xp, rsize + ssize, mp, msize);
385
6.23M
      *xsize_p = msize;
386
6.23M
    }
387
846k
   else
388
846k
     *xsize_p = rsize + ssize;
389
7.07M
}
390
391
#define SIZE_PRECOMP ((1 << (5 - 1)))
392
393
/****************
394
 * RES = BASE ^ EXPO mod MOD
395
 *
396
 * To mitigate the Yarom/Falkner flush+reload cache side-channel
397
 * attack on the RSA secret exponent, we don't use the square
398
 * routine but multiplication.
399
 *
400
 * Reference:
401
 *   Handbook of Applied Cryptography
402
 *       Algorithm 14.83: Modified left-to-right k-ary exponentiation
403
 */
404
void
405
_gcry_mpi_powm (gcry_mpi_t res,
406
                gcry_mpi_t base, gcry_mpi_t expo, gcry_mpi_t mod)
407
1.62M
{
408
  /* Pointer to the limbs of the arguments, their size and signs. */
409
1.62M
  mpi_ptr_t  rp, ep, mp, bp;
410
1.62M
  mpi_size_t esize, msize, bsize, rsize;
411
1.62M
  int               msign, bsign, rsign;
412
  /* Flags telling the secure allocation status of the arguments.  */
413
1.62M
  int        esec,  msec,  bsec;
414
  /* Size of the result including space for temporary values.  */
415
1.62M
  mpi_size_t size;
416
  /* Helper.  */
417
1.62M
  int mod_shift_cnt;
418
1.62M
  int negative_result;
419
1.62M
  mpi_ptr_t mp_marker = NULL;
420
1.62M
  mpi_ptr_t bp_marker = NULL;
421
1.62M
  mpi_ptr_t ep_marker = NULL;
422
1.62M
  mpi_ptr_t xp_marker = NULL;
423
1.62M
  unsigned int mp_nlimbs = 0;
424
1.62M
  unsigned int bp_nlimbs = 0;
425
1.62M
  unsigned int ep_nlimbs = 0;
426
1.62M
  unsigned int xp_nlimbs = 0;
427
1.62M
  mpi_ptr_t precomp[SIZE_PRECOMP]; /* Pre-computed array: BASE^1, ^3, ^5, ... */
428
1.62M
  mpi_size_t precomp_size[SIZE_PRECOMP];
429
1.62M
  mpi_size_t W;
430
1.62M
  mpi_ptr_t base_u;
431
1.62M
  mpi_size_t base_u_size;
432
1.62M
  mpi_size_t max_u_size;
433
434
1.62M
  esize = expo->nlimbs;
435
1.62M
  msize = mod->nlimbs;
436
1.62M
  size = 2 * msize;
437
1.62M
  msign = mod->sign;
438
439
1.62M
  ep = expo->d;
440
1.62M
  MPN_NORMALIZE(ep, esize);
441
442
1.62M
  if (esize * BITS_PER_MPI_LIMB > 512)
443
883
    W = 5;
444
1.62M
  else if (esize * BITS_PER_MPI_LIMB > 256)
445
395
    W = 4;
446
1.62M
  else if (esize * BITS_PER_MPI_LIMB > 128)
447
651
    W = 3;
448
1.62M
  else if (esize * BITS_PER_MPI_LIMB > 64)
449
550
    W = 2;
450
1.62M
  else
451
1.62M
    W = 1;
452
453
1.62M
  esec = mpi_is_secure(expo);
454
1.62M
  msec = mpi_is_secure(mod);
455
1.62M
  bsec = mpi_is_secure(base);
456
457
1.62M
  rp = res->d;
458
459
1.62M
  if (!msize)
460
0
    _gcry_divide_by_zero();
461
462
1.62M
  if (!esize)
463
66
    {
464
      /* Exponent is zero, result is 1 mod MOD, i.e., 1 or 0 depending
465
         on if MOD equals 1.  */
466
66
      res->nlimbs = (msize == 1 && mod->d[0] == 1) ? 0 : 1;
467
66
      if (res->nlimbs)
468
54
        {
469
54
          RESIZE_IF_NEEDED (res, 1);
470
54
          rp = res->d;
471
54
          rp[0] = 1;
472
54
        }
473
66
      res->sign = 0;
474
66
      goto leave;
475
66
    }
476
477
  /* Normalize MOD (i.e. make its most significant bit set) as
478
     required by mpn_divrem.  This will make the intermediate values
479
     in the calculation slightly larger, but the correct result is
480
     obtained after a final reduction using the original MOD value. */
481
1.62M
  mp_nlimbs = msec? msize:0;
482
1.62M
  mp = mp_marker = mpi_alloc_limb_space(msize, msec);
483
1.62M
  count_leading_zeros (mod_shift_cnt, mod->d[msize-1]);
484
1.62M
  if (mod_shift_cnt)
485
305k
    _gcry_mpih_lshift (mp, mod->d, msize, mod_shift_cnt);
486
1.31M
  else
487
1.31M
    MPN_COPY( mp, mod->d, msize );
488
489
1.62M
  bsize = base->nlimbs;
490
1.62M
  bsign = base->sign;
491
1.62M
  if (bsize > msize)
492
366
    {
493
      /* The base is larger than the module.  Reduce it.
494
495
         Allocate (BSIZE + 1) with space for remainder and quotient.
496
         (The quotient is (bsize - msize + 1) limbs.)  */
497
366
      bp_nlimbs = bsec ? (bsize + 1):0;
498
366
      bp = bp_marker = mpi_alloc_limb_space( bsize + 1, bsec );
499
366
      MPN_COPY ( bp, base->d, bsize );
500
      /* We don't care about the quotient, store it above the
501
       * remainder, at BP + MSIZE.  */
502
366
      _gcry_mpih_divrem( bp + msize, 0, bp, bsize, mp, msize );
503
366
      bsize = msize;
504
      /* Canonicalize the base, since we are going to multiply with it
505
         quite a few times.  */
506
366
      MPN_NORMALIZE( bp, bsize );
507
366
    }
508
1.62M
  else
509
1.62M
    bp = base->d;
510
511
1.62M
  if (!bsize)
512
155
    {
513
155
      res->nlimbs = 0;
514
155
      res->sign = 0;
515
155
      goto leave;
516
155
    }
517
518
519
  /* Make BASE, EXPO not overlap with RES.  We don't need to check MOD
520
     because that has already been copied to the MP var.  */
521
1.62M
  if ( rp == bp )
522
2.56k
    {
523
      /* RES and BASE are identical.  Allocate temp. space for BASE.  */
524
2.56k
      gcry_assert (!bp_marker);
525
2.56k
      bp_nlimbs = bsec? bsize:0;
526
2.56k
      bp = bp_marker = mpi_alloc_limb_space( bsize, bsec );
527
2.56k
      MPN_COPY(bp, rp, bsize);
528
2.56k
    }
529
1.62M
  if ( rp == ep )
530
105
    {
531
      /* RES and EXPO are identical.  Allocate temp. space for EXPO.  */
532
105
      ep_nlimbs = esec? esize:0;
533
105
      ep = ep_marker = mpi_alloc_limb_space( esize, esec );
534
105
      MPN_COPY(ep, rp, esize);
535
105
    }
536
537
  /* Copy base to the result.  */
538
1.62M
  if (res->alloced < size)
539
5.13k
    {
540
5.13k
      mpi_resize (res, size);
541
5.13k
      rp = res->d;
542
5.13k
    }
543
544
  /* Main processing.  */
545
1.62M
  {
546
1.62M
    mpi_size_t i, j, k;
547
1.62M
    mpi_ptr_t xp;
548
1.62M
    mpi_size_t xsize = 0;
549
1.62M
    int c;
550
1.62M
    mpi_limb_t e;
551
1.62M
    mpi_limb_t carry_limb;
552
1.62M
    struct karatsuba_ctx karactx;
553
1.62M
    mpi_ptr_t tp;
554
555
1.62M
    xp_nlimbs = msec? size:0;
556
1.62M
    xp = xp_marker = mpi_alloc_limb_space( size, msec );
557
558
1.62M
    memset( &karactx, 0, sizeof karactx );
559
1.62M
    negative_result = (ep[0] & 1) && bsign;
560
561
    /* Precompute PRECOMP[], BASE^(2 * i + 1), BASE^1, ^3, ^5, ... */
562
1.62M
    if (W > 1)                  /* X := BASE^2 */
563
2.47k
      mul_mod (xp, &xsize, bp, bsize, bp, bsize, mp, msize, &karactx);
564
1.62M
    base_u = precomp[0] = mpi_alloc_limb_space (bsize, esec);
565
1.62M
    base_u_size = max_u_size = precomp_size[0] = bsize;
566
1.62M
    MPN_COPY (precomp[0], bp, bsize);
567
1.64M
    for (i = 1; i < (1 << (W - 1)); i++)
568
18.4k
      {                         /* PRECOMP[i] = BASE^(2 * i + 1) */
569
18.4k
        if (xsize >= base_u_size)
570
7.83k
          mul_mod (rp, &rsize, xp, xsize, base_u, base_u_size,
571
7.83k
                   mp, msize, &karactx);
572
10.6k
        else
573
10.6k
          mul_mod (rp, &rsize, base_u, base_u_size, xp, xsize,
574
10.6k
                   mp, msize, &karactx);
575
18.4k
        base_u = precomp[i] = mpi_alloc_limb_space (rsize, esec);
576
18.4k
        base_u_size = precomp_size[i] = rsize;
577
18.4k
        if (max_u_size < base_u_size)
578
8.97k
          max_u_size = base_u_size;
579
18.4k
        MPN_COPY (precomp[i], rp, rsize);
580
18.4k
      }
581
582
1.62M
    if (msize > max_u_size)
583
418k
      max_u_size = msize;
584
1.62M
    base_u = mpi_alloc_limb_space (max_u_size, esec);
585
1.62M
    MPN_ZERO (base_u, max_u_size);
586
587
1.62M
    i = esize - 1;
588
589
    /* Main loop.
590
591
       Make the result be pointed to alternately by XP and RP.  This
592
       helps us avoid block copying, which would otherwise be
593
       necessary with the overlap restrictions of
594
       _gcry_mpih_divmod. With 50% probability the result after this
595
       loop will be in the area originally pointed by RP (==RES->d),
596
       and with 50% probability in the area originally pointed to by XP. */
597
1.62M
    rsign = 0;
598
1.62M
    if (W == 1)
599
1.61M
      {
600
1.61M
        rsize = bsize;
601
1.61M
      }
602
2.47k
    else
603
2.47k
      {
604
2.47k
        rsize = msize;
605
2.47k
        MPN_ZERO (rp, rsize);
606
2.47k
      }
607
1.62M
    MPN_COPY ( rp, bp, bsize );
608
609
1.62M
    e = ep[i];
610
1.62M
    count_leading_zeros (c, e);
611
1.62M
    e = (e << c) << 1;
612
1.62M
    c = BITS_PER_MPI_LIMB - 1 - c;
613
614
1.62M
    j = 0;
615
616
1.62M
    for (;;)
617
3.45M
      if (e == 0)
618
1.63M
        {
619
1.63M
          j += c;
620
1.63M
          if ( --i < 0 )
621
1.62M
            break;
622
623
17.1k
          e = ep[i];
624
17.1k
          c = BITS_PER_MPI_LIMB;
625
17.1k
        }
626
1.81M
      else
627
1.81M
        {
628
1.81M
          int c0;
629
1.81M
          mpi_limb_t e0;
630
1.81M
          struct gcry_mpi w, u;
631
1.81M
          w.sign = u.sign = 0;
632
1.81M
          w.flags = u.flags = 0;
633
1.81M
          w.d = base_u;
634
635
1.81M
          count_leading_zeros (c0, e);
636
1.81M
          e = (e << c0);
637
1.81M
          c -= c0;
638
1.81M
          j += c0;
639
640
1.81M
          e0 = (e >> (BITS_PER_MPI_LIMB - W));
641
1.81M
          if (c >= W)
642
1.78M
            c0 = 0;
643
33.2k
          else
644
33.2k
            {
645
33.2k
              if ( --i < 0 )
646
1.20k
                {
647
1.20k
                  e0 = (e >> (BITS_PER_MPI_LIMB - c));
648
1.20k
                  j += c - W;
649
1.20k
                  goto last_step;
650
1.20k
                }
651
32.0k
              else
652
32.0k
                {
653
32.0k
                  c0 = c;
654
32.0k
                  e = ep[i];
655
32.0k
                  c = BITS_PER_MPI_LIMB;
656
32.0k
                  e0 |= (e >> (BITS_PER_MPI_LIMB - (W - c0)));
657
32.0k
                }
658
33.2k
            }
659
660
1.81M
          e = e << (W - c0);
661
1.81M
          c -= (W - c0);
662
663
1.81M
        last_step:
664
1.81M
          count_trailing_zeros (c0, e0);
665
1.81M
          e0 = (e0 >> c0) >> 1;
666
667
8.12M
          for (j += W - c0; j >= 0; j--)
668
6.30M
            {
669
670
              /*
671
               *  base_u <= precomp[e0]
672
               *  base_u_size <= precomp_size[e0]
673
               */
674
6.30M
              base_u_size = 0;
675
64.8M
              for (k = 0; k < (1<< (W - 1)); k++)
676
58.5M
                {
677
58.5M
                  w.alloced = w.nlimbs = precomp_size[k];
678
58.5M
                  u.alloced = u.nlimbs = precomp_size[k];
679
58.5M
                  u.d = precomp[k];
680
681
58.5M
                  mpi_set_cond (&w, &u, k == e0);
682
58.5M
                  base_u_size |= ( precomp_size[k] & (0UL - (k == e0)) );
683
58.5M
                }
684
685
6.30M
              w.alloced = w.nlimbs = rsize;
686
6.30M
              u.alloced = u.nlimbs = rsize;
687
6.30M
              u.d = rp;
688
6.30M
              mpi_set_cond (&w, &u, j != 0);
689
6.30M
              base_u_size ^= ((base_u_size ^ rsize)  & (0UL - (j != 0)));
690
691
6.30M
              mul_mod (xp, &xsize, rp, rsize, base_u, base_u_size,
692
6.30M
                       mp, msize, &karactx);
693
6.30M
              tp = rp; rp = xp; xp = tp;
694
6.30M
              rsize = xsize;
695
6.30M
            }
696
697
1.81M
          j = c0;
698
1.81M
          if ( i < 0 )
699
1.20k
            break;
700
1.81M
        }
701
702
2.37M
    while (j--)
703
752k
      {
704
752k
        mul_mod (xp, &xsize, rp, rsize, rp, rsize, mp, msize, &karactx);
705
752k
        tp = rp; rp = xp; xp = tp;
706
752k
        rsize = xsize;
707
752k
      }
708
709
    /* We shifted MOD, the modulo reduction argument, left
710
       MOD_SHIFT_CNT steps.  Adjust the result by reducing it with the
711
       original MOD.
712
713
       Also make sure the result is put in RES->d (where it already
714
       might be, see above).  */
715
1.62M
    if ( mod_shift_cnt )
716
305k
      {
717
305k
        carry_limb = _gcry_mpih_lshift( res->d, rp, rsize, mod_shift_cnt);
718
305k
        rp = res->d;
719
305k
        if ( carry_limb )
720
207k
          {
721
207k
            rp[rsize] = carry_limb;
722
207k
            rsize++;
723
207k
          }
724
305k
      }
725
1.31M
    else if (res->d != rp)
726
160
      {
727
160
        MPN_COPY (res->d, rp, rsize);
728
160
        rp = res->d;
729
160
      }
730
731
1.62M
    if ( rsize >= msize )
732
1.21M
      {
733
1.21M
        _gcry_mpih_divrem(rp + msize, 0, rp, rsize, mp, msize);
734
1.21M
        rsize = msize;
735
1.21M
      }
736
737
    /* Remove any leading zero words from the result.  */
738
1.62M
    if ( mod_shift_cnt )
739
305k
      _gcry_mpih_rshift( rp, rp, rsize, mod_shift_cnt);
740
1.62M
    MPN_NORMALIZE (rp, rsize);
741
742
1.62M
    _gcry_mpih_release_karatsuba_ctx (&karactx );
743
3.26M
    for (i = 0; i < (1 << (W - 1)); i++)
744
1.64M
      _gcry_mpi_free_limb_space( precomp[i], esec ? precomp_size[i] : 0 );
745
1.62M
    _gcry_mpi_free_limb_space (base_u, esec ? max_u_size : 0);
746
1.62M
  }
747
748
  /* Fixup for negative results.  */
749
1.62M
  if ( negative_result && rsize )
750
0
    {
751
0
      if ( mod_shift_cnt )
752
0
        _gcry_mpih_rshift( mp, mp, msize, mod_shift_cnt);
753
0
      _gcry_mpih_sub( rp, mp, msize, rp, rsize);
754
0
      rsize = msize;
755
0
      rsign = msign;
756
0
      MPN_NORMALIZE(rp, rsize);
757
0
    }
758
1.62M
  gcry_assert (res->d == rp);
759
0
  res->nlimbs = rsize;
760
1.62M
  res->sign = rsign;
761
762
1.62M
 leave:
763
1.62M
  if (mp_marker)
764
1.62M
    _gcry_mpi_free_limb_space( mp_marker, mp_nlimbs );
765
1.62M
  if (bp_marker)
766
2.93k
    _gcry_mpi_free_limb_space( bp_marker, bp_nlimbs );
767
1.62M
  if (ep_marker)
768
105
    _gcry_mpi_free_limb_space( ep_marker, ep_nlimbs );
769
1.62M
  if (xp_marker)
770
1.62M
    _gcry_mpi_free_limb_space( xp_marker, xp_nlimbs );
771
1.62M
}
772
#endif