Coverage Report

Created: 2022-12-08 06:09

/src/libgcrypt/mpi/mpi-pow.c
Line
Count
Source (jump to first uncovered line)
1
/* mpi-pow.c  -  MPI functions for exponentiation
2
 * Copyright (C) 1994, 1996, 1998, 2000, 2002
3
 *               2003  Free Software Foundation, Inc.
4
 *               2013  g10 Code GmbH
5
 *
6
 * This file is part of Libgcrypt.
7
 *
8
 * Libgcrypt is free software; you can redistribute it and/or modify
9
 * it under the terms of the GNU Lesser General Public License as
10
 * published by the Free Software Foundation; either version 2.1 of
11
 * the License, or (at your option) any later version.
12
 *
13
 * Libgcrypt is distributed in the hope that it will be useful,
14
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16
 * GNU Lesser General Public License for more details.
17
 *
18
 * You should have received a copy of the GNU Lesser General Public
19
 * License along with this program; if not, see <http://www.gnu.org/licenses/>.
20
 *
21
 * Note: This code is heavily based on the GNU MP Library.
22
 *   Actually it's the same code with only minor changes in the
23
 *   way the data is stored; this is to support the abstraction
24
 *   of an optional secure memory allocation which may be used
25
 *   to avoid revealing of sensitive data due to paging etc.
26
 */
27
28
#include <config.h>
29
#include <stdio.h>
30
#include <stdlib.h>
31
#include <string.h>
32
33
#include "mpi-internal.h"
34
#include "longlong.h"
35
36
37
/*
38
 * When you need old implementation, please add compilation option
39
 * -DUSE_ALGORITHM_SIMPLE_EXPONENTIATION
40
 * or expose this line:
41
#define USE_ALGORITHM_SIMPLE_EXPONENTIATION 1
42
 */
43
44
#if defined(USE_ALGORITHM_SIMPLE_EXPONENTIATION)
45
/****************
46
 * RES = BASE ^ EXPO mod MOD
47
 */
48
void
49
_gcry_mpi_powm (gcry_mpi_t res,
50
                gcry_mpi_t base, gcry_mpi_t expo, gcry_mpi_t mod)
51
{
52
  /* Pointer to the limbs of the arguments, their size and signs. */
53
  mpi_ptr_t  rp, ep, mp, bp;
54
  mpi_size_t esize, msize, bsize, rsize;
55
  int               msign, bsign, rsign;
56
  /* Flags telling the secure allocation status of the arguments.  */
57
  int        esec,  msec,  bsec;
58
  /* Size of the result including space for temporary values.  */
59
  mpi_size_t size;
60
  /* Helper.  */
61
  int mod_shift_cnt;
62
  int negative_result;
63
  mpi_ptr_t mp_marker = NULL;
64
  mpi_ptr_t bp_marker = NULL;
65
  mpi_ptr_t ep_marker = NULL;
66
  mpi_ptr_t xp_marker = NULL;
67
  unsigned int mp_nlimbs = 0;
68
  unsigned int bp_nlimbs = 0;
69
  unsigned int ep_nlimbs = 0;
70
  unsigned int xp_nlimbs = 0;
71
  mpi_ptr_t tspace = NULL;
72
  mpi_size_t tsize = 0;
73
74
75
  esize = expo->nlimbs;
76
  msize = mod->nlimbs;
77
  size = 2 * msize;
78
  msign = mod->sign;
79
80
  esec = mpi_is_secure(expo);
81
  msec = mpi_is_secure(mod);
82
  bsec = mpi_is_secure(base);
83
84
  rp = res->d;
85
  ep = expo->d;
86
  MPN_NORMALIZE(ep, esize);
87
88
  if (!msize)
89
    _gcry_divide_by_zero();
90
91
  if (!esize)
92
    {
93
      /* Exponent is zero, result is 1 mod MOD, i.e., 1 or 0 depending
94
         on if MOD equals 1.  */
95
      res->nlimbs = (msize == 1 && mod->d[0] == 1) ? 0 : 1;
96
      if (res->nlimbs)
97
        {
98
          RESIZE_IF_NEEDED (res, 1);
99
          rp = res->d;
100
          rp[0] = 1;
101
        }
102
      res->sign = 0;
103
      goto leave;
104
    }
105
106
  /* Normalize MOD (i.e. make its most significant bit set) as
107
     required by mpn_divrem.  This will make the intermediate values
108
     in the calculation slightly larger, but the correct result is
109
     obtained after a final reduction using the original MOD value. */
110
  mp_nlimbs = msec? msize:0;
111
  mp = mp_marker = mpi_alloc_limb_space(msize, msec);
112
  count_leading_zeros (mod_shift_cnt, mod->d[msize-1]);
113
  if (mod_shift_cnt)
114
    _gcry_mpih_lshift (mp, mod->d, msize, mod_shift_cnt);
115
  else
116
    MPN_COPY( mp, mod->d, msize );
117
118
  bsize = base->nlimbs;
119
  bsign = base->sign;
120
  if (bsize > msize)
121
    {
122
      /* The base is larger than the module.  Reduce it.
123
124
         Allocate (BSIZE + 1) with space for remainder and quotient.
125
         (The quotient is (bsize - msize + 1) limbs.)  */
126
      bp_nlimbs = bsec ? (bsize + 1):0;
127
      bp = bp_marker = mpi_alloc_limb_space( bsize + 1, bsec );
128
      MPN_COPY ( bp, base->d, bsize );
129
      /* We don't care about the quotient, store it above the
130
       * remainder, at BP + MSIZE.  */
131
      _gcry_mpih_divrem( bp + msize, 0, bp, bsize, mp, msize );
132
      bsize = msize;
133
      /* Canonicalize the base, since we are going to multiply with it
134
   quite a few times.  */
135
      MPN_NORMALIZE( bp, bsize );
136
    }
137
  else
138
    bp = base->d;
139
140
  if (!bsize)
141
    {
142
      res->nlimbs = 0;
143
      res->sign = 0;
144
      goto leave;
145
    }
146
147
148
  /* Make BASE, EXPO and MOD not overlap with RES.  */
149
  if ( rp == bp )
150
    {
151
      /* RES and BASE are identical.  Allocate temp. space for BASE.  */
152
      gcry_assert (!bp_marker);
153
      bp_nlimbs = bsec? bsize:0;
154
      bp = bp_marker = mpi_alloc_limb_space( bsize, bsec );
155
      MPN_COPY(bp, rp, bsize);
156
    }
157
  if ( rp == ep )
158
    {
159
      /* RES and EXPO are identical.  Allocate temp. space for EXPO.  */
160
      ep_nlimbs = esec? esize:0;
161
      ep = ep_marker = mpi_alloc_limb_space( esize, esec );
162
      MPN_COPY(ep, rp, esize);
163
    }
164
  if ( rp == mp )
165
    {
166
      /* RES and MOD are identical.  Allocate temporary space for MOD.*/
167
      gcry_assert (!mp_marker);
168
      mp_nlimbs = msec?msize:0;
169
      mp = mp_marker = mpi_alloc_limb_space( msize, msec );
170
      MPN_COPY(mp, rp, msize);
171
    }
172
173
  /* Copy base to the result.  */
174
  if (res->alloced < size)
175
    {
176
      mpi_resize (res, size);
177
      rp = res->d;
178
    }
179
  MPN_COPY ( rp, bp, bsize );
180
  rsize = bsize;
181
  rsign = 0;
182
183
  /* Main processing.  */
184
  {
185
    mpi_size_t i;
186
    mpi_ptr_t xp;
187
    int c;
188
    mpi_limb_t e;
189
    mpi_limb_t carry_limb;
190
    struct karatsuba_ctx karactx;
191
    struct gcry_mpi w, u;
192
193
    xp_nlimbs = msec? size:0;
194
    xp = xp_marker = mpi_alloc_limb_space( size, msec );
195
196
    w.sign = u.sign = 0;
197
    w.flags = u.flags = 0;
198
    w.alloced = w.nlimbs = size; /* RES->alloc may be longer.  */
199
    u.alloced = u.nlimbs = size;
200
201
    memset( &karactx, 0, sizeof karactx );
202
    negative_result = (ep[0] & 1) && bsign;
203
204
    i = esize - 1;
205
    e = ep[i];
206
    count_leading_zeros (c, e);
207
    e = (e << c) << 1;     /* Shift the expo bits to the left, lose msb.  */
208
    c = BITS_PER_MPI_LIMB - 1 - c;
209
210
    /* Main loop.
211
212
       Make the result be pointed to alternately by XP and RP.  This
213
       helps us avoid block copying, which would otherwise be
214
       necessary with the overlap restrictions of
215
       _gcry_mpih_divmod. With 50% probability the result after this
216
       loop will be in the area originally pointed by RP (==RES->d),
217
       and with 50% probability in the area originally pointed to by XP. */
218
    for (;;)
219
      {
220
        while (c)
221
          {
222
            mpi_ptr_t tp;
223
            mpi_size_t xsize;
224
225
            /*mpih_mul_n(xp, rp, rp, rsize);*/
226
            if ( rsize < KARATSUBA_THRESHOLD )
227
              _gcry_mpih_sqr_n_basecase( xp, rp, rsize );
228
            else
229
              {
230
                if ( !tspace )
231
                  {
232
                    tsize = 2 * rsize;
233
                    tspace = mpi_alloc_limb_space( tsize, 0 );
234
                  }
235
                else if ( tsize < (2*rsize) )
236
                  {
237
                    _gcry_mpi_free_limb_space (tspace, 0);
238
                    tsize = 2 * rsize;
239
                    tspace = mpi_alloc_limb_space (tsize, 0 );
240
                  }
241
                _gcry_mpih_sqr_n (xp, rp, rsize, tspace);
242
              }
243
244
            xsize = 2 * rsize;
245
            if ( xsize > msize )
246
              {
247
                _gcry_mpih_divrem(xp + msize, 0, xp, xsize, mp, msize);
248
                xsize = msize;
249
              }
250
251
            tp = rp; rp = xp; xp = tp;
252
            rsize = xsize;
253
254
            /* To mitigate the Yarom/Falkner flush+reload cache
255
             * side-channel attack on the RSA secret exponent, we do
256
             * the multiplication regardless of the value of the
257
             * high-bit of E.  But to avoid this performance penalty
258
             * we do it only if the exponent has been stored in secure
259
             * memory and we can thus assume it is a secret exponent.  */
260
            if (esec || (mpi_limb_signed_t)e < 0)
261
              {
262
                /*mpih_mul( xp, rp, rsize, bp, bsize );*/
263
                if( bsize < KARATSUBA_THRESHOLD )
264
                  _gcry_mpih_mul ( xp, rp, rsize, bp, bsize );
265
                else
266
                  _gcry_mpih_mul_karatsuba_case (xp, rp, rsize, bp, bsize,
267
                                                 &karactx);
268
269
                xsize = rsize + bsize;
270
                if ( xsize > msize )
271
                  {
272
                    _gcry_mpih_divrem(xp + msize, 0, xp, xsize, mp, msize);
273
                    xsize = msize;
274
                  }
275
              }
276
277
            w.d = rp;
278
            u.d = xp;
279
            mpi_set_cond (&w, &u, ((mpi_limb_signed_t)e < 0));
280
281
            e <<= 1;
282
            c--;
283
          }
284
285
        i--;
286
        if ( i < 0 )
287
          break;
288
        e = ep[i];
289
        c = BITS_PER_MPI_LIMB;
290
      }
291
292
    /* We shifted MOD, the modulo reduction argument, left
293
       MOD_SHIFT_CNT steps.  Adjust the result by reducing it with the
294
       original MOD.
295
296
       Also make sure the result is put in RES->d (where it already
297
       might be, see above).  */
298
    if ( mod_shift_cnt )
299
      {
300
        carry_limb = _gcry_mpih_lshift( res->d, rp, rsize, mod_shift_cnt);
301
        rp = res->d;
302
        if ( carry_limb )
303
          {
304
            rp[rsize] = carry_limb;
305
            rsize++;
306
          }
307
      }
308
    else if (res->d != rp)
309
      {
310
        MPN_COPY (res->d, rp, rsize);
311
        rp = res->d;
312
      }
313
314
    if ( rsize >= msize )
315
      {
316
        _gcry_mpih_divrem(rp + msize, 0, rp, rsize, mp, msize);
317
        rsize = msize;
318
      }
319
320
    /* Remove any leading zero words from the result.  */
321
    if ( mod_shift_cnt )
322
      _gcry_mpih_rshift( rp, rp, rsize, mod_shift_cnt);
323
    MPN_NORMALIZE (rp, rsize);
324
325
    _gcry_mpih_release_karatsuba_ctx (&karactx );
326
  }
327
328
  /* Fixup for negative results.  */
329
  if ( negative_result && rsize )
330
    {
331
      if ( mod_shift_cnt )
332
        _gcry_mpih_rshift( mp, mp, msize, mod_shift_cnt);
333
      _gcry_mpih_sub( rp, mp, msize, rp, rsize);
334
      rsize = msize;
335
      rsign = msign;
336
      MPN_NORMALIZE(rp, rsize);
337
    }
338
  gcry_assert (res->d == rp);
339
  res->nlimbs = rsize;
340
  res->sign = rsign;
341
342
 leave:
343
  if (mp_marker)
344
    _gcry_mpi_free_limb_space( mp_marker, mp_nlimbs );
345
  if (bp_marker)
346
    _gcry_mpi_free_limb_space( bp_marker, bp_nlimbs );
347
  if (ep_marker)
348
    _gcry_mpi_free_limb_space( ep_marker, ep_nlimbs );
349
  if (xp_marker)
350
    _gcry_mpi_free_limb_space( xp_marker, xp_nlimbs );
351
  if (tspace)
352
    _gcry_mpi_free_limb_space( tspace, 0 );
353
}
354
#else
355
/**
356
 * Internal function to compute
357
 *
358
 *    X = R * S mod M
359
 *
360
 * and set the size of X at the pointer XSIZE_P.
361
 * Use karatsuba structure at KARACTX_P.
362
 *
363
 * Condition:
364
 *   RSIZE >= SSIZE
365
 *   Enough space for X is allocated beforehand.
366
 *
367
 * For generic cases, we can/should use gcry_mpi_mulm.
368
 * This function is use for specific internal case.
369
 */
370
static void
371
mul_mod (mpi_ptr_t xp, mpi_size_t *xsize_p,
372
         mpi_ptr_t rp, mpi_size_t rsize,
373
         mpi_ptr_t sp, mpi_size_t ssize,
374
         mpi_ptr_t mp, mpi_size_t msize,
375
         struct karatsuba_ctx *karactx_p)
376
4.58M
{
377
4.58M
  if( ssize < KARATSUBA_THRESHOLD )
378
4.30M
    _gcry_mpih_mul ( xp, rp, rsize, sp, ssize );
379
280k
  else
380
280k
    _gcry_mpih_mul_karatsuba_case (xp, rp, rsize, sp, ssize, karactx_p);
381
382
4.58M
   if (rsize + ssize > msize)
383
3.89M
    {
384
3.89M
      _gcry_mpih_divrem (xp + msize, 0, xp, rsize + ssize, mp, msize);
385
3.89M
      *xsize_p = msize;
386
3.89M
    }
387
694k
   else
388
694k
     *xsize_p = rsize + ssize;
389
4.58M
}
390
391
#define SIZE_PRECOMP ((1 << (5 - 1)))
392
393
/****************
394
 * RES = BASE ^ EXPO mod MOD
395
 *
396
 * To mitigate the Yarom/Falkner flush+reload cache side-channel
397
 * attack on the RSA secret exponent, we don't use the square
398
 * routine but multiplication.
399
 *
400
 * Reference:
401
 *   Handbook of Applied Cryptography
402
 *       Algorithm 14.83: Modified left-to-right k-ary exponentiation
403
 */
404
void
405
_gcry_mpi_powm (gcry_mpi_t res,
406
                gcry_mpi_t base, gcry_mpi_t expo, gcry_mpi_t mod)
407
1.36M
{
408
  /* Pointer to the limbs of the arguments, their size and signs. */
409
1.36M
  mpi_ptr_t  rp, ep, mp, bp;
410
1.36M
  mpi_size_t esize, msize, bsize, rsize;
411
1.36M
  int               msign, bsign, rsign;
412
  /* Flags telling the secure allocation status of the arguments.  */
413
1.36M
  int        esec,  msec,  bsec;
414
  /* Size of the result including space for temporary values.  */
415
1.36M
  mpi_size_t size;
416
  /* Helper.  */
417
1.36M
  int mod_shift_cnt;
418
1.36M
  int negative_result;
419
1.36M
  mpi_ptr_t mp_marker = NULL;
420
1.36M
  mpi_ptr_t bp_marker = NULL;
421
1.36M
  mpi_ptr_t ep_marker = NULL;
422
1.36M
  mpi_ptr_t xp_marker = NULL;
423
1.36M
  unsigned int mp_nlimbs = 0;
424
1.36M
  unsigned int bp_nlimbs = 0;
425
1.36M
  unsigned int ep_nlimbs = 0;
426
1.36M
  unsigned int xp_nlimbs = 0;
427
1.36M
  mpi_ptr_t precomp[SIZE_PRECOMP]; /* Pre-computed array: BASE^1, ^3, ^5, ... */
428
1.36M
  mpi_size_t precomp_size[SIZE_PRECOMP];
429
1.36M
  mpi_size_t W;
430
1.36M
  mpi_ptr_t base_u;
431
1.36M
  mpi_size_t base_u_size;
432
1.36M
  mpi_size_t max_u_size;
433
434
1.36M
  esize = expo->nlimbs;
435
1.36M
  msize = mod->nlimbs;
436
1.36M
  size = 2 * msize;
437
1.36M
  msign = mod->sign;
438
439
1.36M
  ep = expo->d;
440
1.36M
  MPN_NORMALIZE(ep, esize);
441
442
1.36M
  if (esize * BITS_PER_MPI_LIMB > 512)
443
176
    W = 5;
444
1.36M
  else if (esize * BITS_PER_MPI_LIMB > 256)
445
18
    W = 4;
446
1.36M
  else if (esize * BITS_PER_MPI_LIMB > 128)
447
4.60k
    W = 3;
448
1.36M
  else if (esize * BITS_PER_MPI_LIMB > 64)
449
10
    W = 2;
450
1.36M
  else
451
1.36M
    W = 1;
452
453
1.36M
  esec = mpi_is_secure(expo);
454
1.36M
  msec = mpi_is_secure(mod);
455
1.36M
  bsec = mpi_is_secure(base);
456
457
1.36M
  rp = res->d;
458
459
1.36M
  if (!msize)
460
0
    _gcry_divide_by_zero();
461
462
1.36M
  if (!esize)
463
7
    {
464
      /* Exponent is zero, result is 1 mod MOD, i.e., 1 or 0 depending
465
         on if MOD equals 1.  */
466
7
      res->nlimbs = (msize == 1 && mod->d[0] == 1) ? 0 : 1;
467
7
      if (res->nlimbs)
468
7
        {
469
7
          RESIZE_IF_NEEDED (res, 1);
470
7
          rp = res->d;
471
7
          rp[0] = 1;
472
7
        }
473
7
      res->sign = 0;
474
7
      goto leave;
475
7
    }
476
477
  /* Normalize MOD (i.e. make its most significant bit set) as
478
     required by mpn_divrem.  This will make the intermediate values
479
     in the calculation slightly larger, but the correct result is
480
     obtained after a final reduction using the original MOD value. */
481
1.36M
  mp_nlimbs = msec? msize:0;
482
1.36M
  mp = mp_marker = mpi_alloc_limb_space(msize, msec);
483
1.36M
  count_leading_zeros (mod_shift_cnt, mod->d[msize-1]);
484
1.36M
  if (mod_shift_cnt)
485
248k
    _gcry_mpih_lshift (mp, mod->d, msize, mod_shift_cnt);
486
1.12M
  else
487
1.12M
    MPN_COPY( mp, mod->d, msize );
488
489
1.36M
  bsize = base->nlimbs;
490
1.36M
  bsign = base->sign;
491
1.36M
  if (bsize > msize)
492
306
    {
493
      /* The base is larger than the module.  Reduce it.
494
495
         Allocate (BSIZE + 1) with space for remainder and quotient.
496
         (The quotient is (bsize - msize + 1) limbs.)  */
497
306
      bp_nlimbs = bsec ? (bsize + 1):0;
498
306
      bp = bp_marker = mpi_alloc_limb_space( bsize + 1, bsec );
499
306
      MPN_COPY ( bp, base->d, bsize );
500
      /* We don't care about the quotient, store it above the
501
       * remainder, at BP + MSIZE.  */
502
306
      _gcry_mpih_divrem( bp + msize, 0, bp, bsize, mp, msize );
503
306
      bsize = msize;
504
      /* Canonicalize the base, since we are going to multiply with it
505
         quite a few times.  */
506
306
      MPN_NORMALIZE( bp, bsize );
507
306
    }
508
1.36M
  else
509
1.36M
    bp = base->d;
510
511
1.36M
  if (!bsize)
512
189
    {
513
189
      res->nlimbs = 0;
514
189
      res->sign = 0;
515
189
      goto leave;
516
189
    }
517
518
519
  /* Make BASE, EXPO not overlap with RES.  We don't need to check MOD
520
     because that has already been copied to the MP var.  */
521
1.36M
  if ( rp == bp )
522
4.59k
    {
523
      /* RES and BASE are identical.  Allocate temp. space for BASE.  */
524
4.59k
      gcry_assert (!bp_marker);
525
4.59k
      bp_nlimbs = bsec? bsize:0;
526
4.59k
      bp = bp_marker = mpi_alloc_limb_space( bsize, bsec );
527
4.59k
      MPN_COPY(bp, rp, bsize);
528
4.59k
    }
529
1.36M
  if ( rp == ep )
530
0
    {
531
      /* RES and EXPO are identical.  Allocate temp. space for EXPO.  */
532
0
      ep_nlimbs = esec? esize:0;
533
0
      ep = ep_marker = mpi_alloc_limb_space( esize, esec );
534
0
      MPN_COPY(ep, rp, esize);
535
0
    }
536
537
  /* Copy base to the result.  */
538
1.36M
  if (res->alloced < size)
539
13.8k
    {
540
13.8k
      mpi_resize (res, size);
541
13.8k
      rp = res->d;
542
13.8k
    }
543
544
  /* Main processing.  */
545
1.36M
  {
546
1.36M
    mpi_size_t i, j, k;
547
1.36M
    mpi_ptr_t xp;
548
1.36M
    mpi_size_t xsize = 0;
549
1.36M
    int c;
550
1.36M
    mpi_limb_t e;
551
1.36M
    mpi_limb_t carry_limb;
552
1.36M
    struct karatsuba_ctx karactx;
553
1.36M
    mpi_ptr_t tp;
554
555
1.36M
    xp_nlimbs = msec? size:0;
556
1.36M
    xp = xp_marker = mpi_alloc_limb_space( size, msec );
557
558
1.36M
    memset( &karactx, 0, sizeof karactx );
559
1.36M
    negative_result = (ep[0] & 1) && bsign;
560
561
    /* Precompute PRECOMP[], BASE^(2 * i + 1), BASE^1, ^3, ^5, ... */
562
1.36M
    if (W > 1)                  /* X := BASE^2 */
563
4.80k
      mul_mod (xp, &xsize, bp, bsize, bp, bsize, mp, msize, &karactx);
564
1.36M
    base_u = precomp[0] = mpi_alloc_limb_space (bsize, esec);
565
1.36M
    base_u_size = max_u_size = precomp_size[0] = bsize;
566
1.36M
    MPN_COPY (precomp[0], bp, bsize);
567
1.38M
    for (i = 1; i < (1 << (W - 1)); i++)
568
16.5k
      {                         /* PRECOMP[i] = BASE^(2 * i + 1) */
569
16.5k
        if (xsize >= base_u_size)
570
13.9k
          mul_mod (rp, &rsize, xp, xsize, base_u, base_u_size,
571
13.9k
                   mp, msize, &karactx);
572
2.59k
        else
573
2.59k
          mul_mod (rp, &rsize, base_u, base_u_size, xp, xsize,
574
2.59k
                   mp, msize, &karactx);
575
16.5k
        base_u = precomp[i] = mpi_alloc_limb_space (rsize, esec);
576
16.5k
        base_u_size = precomp_size[i] = rsize;
577
16.5k
        if (max_u_size < base_u_size)
578
1.14k
          max_u_size = base_u_size;
579
16.5k
        MPN_COPY (precomp[i], rp, rsize);
580
16.5k
      }
581
582
1.36M
    if (msize > max_u_size)
583
346k
      max_u_size = msize;
584
1.36M
    base_u = mpi_alloc_limb_space (max_u_size, esec);
585
1.36M
    MPN_ZERO (base_u, max_u_size);
586
587
1.36M
    i = esize - 1;
588
589
    /* Main loop.
590
591
       Make the result be pointed to alternately by XP and RP.  This
592
       helps us avoid block copying, which would otherwise be
593
       necessary with the overlap restrictions of
594
       _gcry_mpih_divmod. With 50% probability the result after this
595
       loop will be in the area originally pointed by RP (==RES->d),
596
       and with 50% probability in the area originally pointed to by XP. */
597
1.36M
    rsign = 0;
598
1.36M
    if (W == 1)
599
1.36M
      {
600
1.36M
        rsize = bsize;
601
1.36M
      }
602
4.80k
    else
603
4.80k
      {
604
4.80k
        rsize = msize;
605
4.80k
        MPN_ZERO (rp, rsize);
606
4.80k
      }
607
1.36M
    MPN_COPY ( rp, bp, bsize );
608
609
1.36M
    e = ep[i];
610
1.36M
    count_leading_zeros (c, e);
611
1.36M
    e = (e << c) << 1;
612
1.36M
    c = BITS_PER_MPI_LIMB - 1 - c;
613
614
1.36M
    j = 0;
615
616
1.36M
    for (;;)
617
2.84M
      if (e == 0)
618
1.37M
        {
619
1.37M
          j += c;
620
1.37M
          if ( --i < 0 )
621
1.36M
            break;
622
623
6.59k
          e = ep[i];
624
6.59k
          c = BITS_PER_MPI_LIMB;
625
6.59k
        }
626
1.47M
      else
627
1.47M
        {
628
1.47M
          int c0;
629
1.47M
          mpi_limb_t e0;
630
1.47M
          struct gcry_mpi w, u;
631
1.47M
          w.sign = u.sign = 0;
632
1.47M
          w.flags = u.flags = 0;
633
1.47M
          w.d = base_u;
634
635
1.47M
          count_leading_zeros (c0, e);
636
1.47M
          e = (e << c0);
637
1.47M
          c -= c0;
638
1.47M
          j += c0;
639
640
1.47M
          e0 = (e >> (BITS_PER_MPI_LIMB - W));
641
1.47M
          if (c >= W)
642
1.45M
            c0 = 0;
643
15.5k
          else
644
15.5k
            {
645
15.5k
              if ( --i < 0 )
646
4.64k
                {
647
4.64k
                  e0 = (e >> (BITS_PER_MPI_LIMB - c));
648
4.64k
                  j += c - W;
649
4.64k
                  goto last_step;
650
4.64k
                }
651
10.9k
              else
652
10.9k
                {
653
10.9k
                  c0 = c;
654
10.9k
                  e = ep[i];
655
10.9k
                  c = BITS_PER_MPI_LIMB;
656
10.9k
                  e0 |= (e >> (BITS_PER_MPI_LIMB - (W - c0)));
657
10.9k
                }
658
15.5k
            }
659
660
1.46M
          e = e << (W - c0);
661
1.46M
          c -= (W - c0);
662
663
1.47M
        last_step:
664
1.47M
          count_trailing_zeros (c0, e0);
665
1.47M
          e0 = (e0 >> c0) >> 1;
666
667
5.40M
          for (j += W - c0; j >= 0; j--)
668
3.93M
            {
669
670
              /*
671
               *  base_u <= precomp[e0]
672
               *  base_u_size <= precomp_size[e0]
673
               */
674
3.93M
              base_u_size = 0;
675
16.4M
              for (k = 0; k < (1<< (W - 1)); k++)
676
12.5M
                {
677
12.5M
                  w.alloced = w.nlimbs = precomp_size[k];
678
12.5M
                  u.alloced = u.nlimbs = precomp_size[k];
679
12.5M
                  u.d = precomp[k];
680
681
12.5M
                  mpi_set_cond (&w, &u, k == e0);
682
12.5M
                  base_u_size |= ( precomp_size[k] & (0UL - (k == e0)) );
683
12.5M
                }
684
685
3.93M
              w.alloced = w.nlimbs = rsize;
686
3.93M
              u.alloced = u.nlimbs = rsize;
687
3.93M
              u.d = rp;
688
3.93M
              mpi_set_cond (&w, &u, j != 0);
689
3.93M
              base_u_size ^= ((base_u_size ^ rsize)  & (0UL - (j != 0)));
690
691
3.93M
              mul_mod (xp, &xsize, rp, rsize, base_u, base_u_size,
692
3.93M
                       mp, msize, &karactx);
693
3.93M
              tp = rp; rp = xp; xp = tp;
694
3.93M
              rsize = xsize;
695
3.93M
            }
696
697
1.47M
          j = c0;
698
1.47M
          if ( i < 0 )
699
4.64k
            break;
700
1.47M
        }
701
702
1.99M
    while (j--)
703
629k
      {
704
629k
        mul_mod (xp, &xsize, rp, rsize, rp, rsize, mp, msize, &karactx);
705
629k
        tp = rp; rp = xp; xp = tp;
706
629k
        rsize = xsize;
707
629k
      }
708
709
    /* We shifted MOD, the modulo reduction argument, left
710
       MOD_SHIFT_CNT steps.  Adjust the result by reducing it with the
711
       original MOD.
712
713
       Also make sure the result is put in RES->d (where it already
714
       might be, see above).  */
715
1.36M
    if ( mod_shift_cnt )
716
248k
      {
717
248k
        carry_limb = _gcry_mpih_lshift( res->d, rp, rsize, mod_shift_cnt);
718
248k
        rp = res->d;
719
248k
        if ( carry_limb )
720
166k
          {
721
166k
            rp[rsize] = carry_limb;
722
166k
            rsize++;
723
166k
          }
724
248k
      }
725
1.12M
    else if (res->d != rp)
726
1.14k
      {
727
1.14k
        MPN_COPY (res->d, rp, rsize);
728
1.14k
        rp = res->d;
729
1.14k
      }
730
731
1.36M
    if ( rsize >= msize )
732
1.02M
      {
733
1.02M
        _gcry_mpih_divrem(rp + msize, 0, rp, rsize, mp, msize);
734
1.02M
        rsize = msize;
735
1.02M
      }
736
737
    /* Remove any leading zero words from the result.  */
738
1.36M
    if ( mod_shift_cnt )
739
248k
      _gcry_mpih_rshift( rp, rp, rsize, mod_shift_cnt);
740
1.36M
    MPN_NORMALIZE (rp, rsize);
741
742
1.36M
    _gcry_mpih_release_karatsuba_ctx (&karactx );
743
2.75M
    for (i = 0; i < (1 << (W - 1)); i++)
744
1.38M
      _gcry_mpi_free_limb_space( precomp[i], esec ? precomp_size[i] : 0 );
745
1.36M
    _gcry_mpi_free_limb_space (base_u, esec ? max_u_size : 0);
746
1.36M
  }
747
748
  /* Fixup for negative results.  */
749
1.36M
  if ( negative_result && rsize )
750
0
    {
751
0
      if ( mod_shift_cnt )
752
0
        _gcry_mpih_rshift( mp, mp, msize, mod_shift_cnt);
753
0
      _gcry_mpih_sub( rp, mp, msize, rp, rsize);
754
0
      rsize = msize;
755
0
      rsign = msign;
756
0
      MPN_NORMALIZE(rp, rsize);
757
0
    }
758
1.36M
  gcry_assert (res->d == rp);
759
0
  res->nlimbs = rsize;
760
1.36M
  res->sign = rsign;
761
762
1.36M
 leave:
763
1.36M
  if (mp_marker)
764
1.36M
    _gcry_mpi_free_limb_space( mp_marker, mp_nlimbs );
765
1.36M
  if (bp_marker)
766
4.89k
    _gcry_mpi_free_limb_space( bp_marker, bp_nlimbs );
767
1.36M
  if (ep_marker)
768
0
    _gcry_mpi_free_limb_space( ep_marker, ep_nlimbs );
769
1.36M
  if (xp_marker)
770
1.36M
    _gcry_mpi_free_limb_space( xp_marker, xp_nlimbs );
771
1.36M
}
772
#endif