Coverage Report

Created: 2022-12-08 06:10

/src/libgcrypt/mpi/mpi-pow.c
Line
Count
Source (jump to first uncovered line)
1
/* mpi-pow.c  -  MPI functions for exponentiation
2
 * Copyright (C) 1994, 1996, 1998, 2000, 2002
3
 *               2003  Free Software Foundation, Inc.
4
 *               2013  g10 Code GmbH
5
 *
6
 * This file is part of Libgcrypt.
7
 *
8
 * Libgcrypt is free software; you can redistribute it and/or modify
9
 * it under the terms of the GNU Lesser General Public License as
10
 * published by the Free Software Foundation; either version 2.1 of
11
 * the License, or (at your option) any later version.
12
 *
13
 * Libgcrypt is distributed in the hope that it will be useful,
14
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16
 * GNU Lesser General Public License for more details.
17
 *
18
 * You should have received a copy of the GNU Lesser General Public
19
 * License along with this program; if not, see <http://www.gnu.org/licenses/>.
20
 *
21
 * Note: This code is heavily based on the GNU MP Library.
22
 *   Actually it's the same code with only minor changes in the
23
 *   way the data is stored; this is to support the abstraction
24
 *   of an optional secure memory allocation which may be used
25
 *   to avoid revealing of sensitive data due to paging etc.
26
 */
27
28
#include <config.h>
29
#include <stdio.h>
30
#include <stdlib.h>
31
#include <string.h>
32
33
#include "mpi-internal.h"
34
#include "longlong.h"
35
36
37
/*
38
 * When you need old implementation, please add compilation option
39
 * -DUSE_ALGORITHM_SIMPLE_EXPONENTIATION
40
 * or expose this line:
41
#define USE_ALGORITHM_SIMPLE_EXPONENTIATION 1
42
 */
43
44
#if defined(USE_ALGORITHM_SIMPLE_EXPONENTIATION)
45
/****************
46
 * RES = BASE ^ EXPO mod MOD
47
 */
48
void
49
_gcry_mpi_powm (gcry_mpi_t res,
50
                gcry_mpi_t base, gcry_mpi_t expo, gcry_mpi_t mod)
51
{
52
  /* Pointer to the limbs of the arguments, their size and signs. */
53
  mpi_ptr_t  rp, ep, mp, bp;
54
  mpi_size_t esize, msize, bsize, rsize;
55
  int               msign, bsign, rsign;
56
  /* Flags telling the secure allocation status of the arguments.  */
57
  int        esec,  msec,  bsec;
58
  /* Size of the result including space for temporary values.  */
59
  mpi_size_t size;
60
  /* Helper.  */
61
  int mod_shift_cnt;
62
  int negative_result;
63
  mpi_ptr_t mp_marker = NULL;
64
  mpi_ptr_t bp_marker = NULL;
65
  mpi_ptr_t ep_marker = NULL;
66
  mpi_ptr_t xp_marker = NULL;
67
  unsigned int mp_nlimbs = 0;
68
  unsigned int bp_nlimbs = 0;
69
  unsigned int ep_nlimbs = 0;
70
  unsigned int xp_nlimbs = 0;
71
  mpi_ptr_t tspace = NULL;
72
  mpi_size_t tsize = 0;
73
74
75
  esize = expo->nlimbs;
76
  msize = mod->nlimbs;
77
  size = 2 * msize;
78
  msign = mod->sign;
79
80
  esec = mpi_is_secure(expo);
81
  msec = mpi_is_secure(mod);
82
  bsec = mpi_is_secure(base);
83
84
  rp = res->d;
85
  ep = expo->d;
86
  MPN_NORMALIZE(ep, esize);
87
88
  if (!msize)
89
    _gcry_divide_by_zero();
90
91
  if (!esize)
92
    {
93
      /* Exponent is zero, result is 1 mod MOD, i.e., 1 or 0 depending
94
         on if MOD equals 1.  */
95
      res->nlimbs = (msize == 1 && mod->d[0] == 1) ? 0 : 1;
96
      if (res->nlimbs)
97
        {
98
          RESIZE_IF_NEEDED (res, 1);
99
          rp = res->d;
100
          rp[0] = 1;
101
        }
102
      res->sign = 0;
103
      goto leave;
104
    }
105
106
  /* Normalize MOD (i.e. make its most significant bit set) as
107
     required by mpn_divrem.  This will make the intermediate values
108
     in the calculation slightly larger, but the correct result is
109
     obtained after a final reduction using the original MOD value. */
110
  mp_nlimbs = msec? msize:0;
111
  mp = mp_marker = mpi_alloc_limb_space(msize, msec);
112
  count_leading_zeros (mod_shift_cnt, mod->d[msize-1]);
113
  if (mod_shift_cnt)
114
    _gcry_mpih_lshift (mp, mod->d, msize, mod_shift_cnt);
115
  else
116
    MPN_COPY( mp, mod->d, msize );
117
118
  bsize = base->nlimbs;
119
  bsign = base->sign;
120
  if (bsize > msize)
121
    {
122
      /* The base is larger than the module.  Reduce it.
123
124
         Allocate (BSIZE + 1) with space for remainder and quotient.
125
         (The quotient is (bsize - msize + 1) limbs.)  */
126
      bp_nlimbs = bsec ? (bsize + 1):0;
127
      bp = bp_marker = mpi_alloc_limb_space( bsize + 1, bsec );
128
      MPN_COPY ( bp, base->d, bsize );
129
      /* We don't care about the quotient, store it above the
130
       * remainder, at BP + MSIZE.  */
131
      _gcry_mpih_divrem( bp + msize, 0, bp, bsize, mp, msize );
132
      bsize = msize;
133
      /* Canonicalize the base, since we are going to multiply with it
134
   quite a few times.  */
135
      MPN_NORMALIZE( bp, bsize );
136
    }
137
  else
138
    bp = base->d;
139
140
  if (!bsize)
141
    {
142
      res->nlimbs = 0;
143
      res->sign = 0;
144
      goto leave;
145
    }
146
147
148
  /* Make BASE, EXPO and MOD not overlap with RES.  */
149
  if ( rp == bp )
150
    {
151
      /* RES and BASE are identical.  Allocate temp. space for BASE.  */
152
      gcry_assert (!bp_marker);
153
      bp_nlimbs = bsec? bsize:0;
154
      bp = bp_marker = mpi_alloc_limb_space( bsize, bsec );
155
      MPN_COPY(bp, rp, bsize);
156
    }
157
  if ( rp == ep )
158
    {
159
      /* RES and EXPO are identical.  Allocate temp. space for EXPO.  */
160
      ep_nlimbs = esec? esize:0;
161
      ep = ep_marker = mpi_alloc_limb_space( esize, esec );
162
      MPN_COPY(ep, rp, esize);
163
    }
164
  if ( rp == mp )
165
    {
166
      /* RES and MOD are identical.  Allocate temporary space for MOD.*/
167
      gcry_assert (!mp_marker);
168
      mp_nlimbs = msec?msize:0;
169
      mp = mp_marker = mpi_alloc_limb_space( msize, msec );
170
      MPN_COPY(mp, rp, msize);
171
    }
172
173
  /* Copy base to the result.  */
174
  if (res->alloced < size)
175
    {
176
      mpi_resize (res, size);
177
      rp = res->d;
178
    }
179
  MPN_COPY ( rp, bp, bsize );
180
  rsize = bsize;
181
  rsign = 0;
182
183
  /* Main processing.  */
184
  {
185
    mpi_size_t i;
186
    mpi_ptr_t xp;
187
    int c;
188
    mpi_limb_t e;
189
    mpi_limb_t carry_limb;
190
    struct karatsuba_ctx karactx;
191
    struct gcry_mpi w, u;
192
193
    xp_nlimbs = msec? size:0;
194
    xp = xp_marker = mpi_alloc_limb_space( size, msec );
195
196
    w.sign = u.sign = 0;
197
    w.flags = u.flags = 0;
198
    w.alloced = w.nlimbs = size; /* RES->alloc may be longer.  */
199
    u.alloced = u.nlimbs = size;
200
201
    memset( &karactx, 0, sizeof karactx );
202
    negative_result = (ep[0] & 1) && bsign;
203
204
    i = esize - 1;
205
    e = ep[i];
206
    count_leading_zeros (c, e);
207
    e = (e << c) << 1;     /* Shift the expo bits to the left, lose msb.  */
208
    c = BITS_PER_MPI_LIMB - 1 - c;
209
210
    /* Main loop.
211
212
       Make the result be pointed to alternately by XP and RP.  This
213
       helps us avoid block copying, which would otherwise be
214
       necessary with the overlap restrictions of
215
       _gcry_mpih_divmod. With 50% probability the result after this
216
       loop will be in the area originally pointed by RP (==RES->d),
217
       and with 50% probability in the area originally pointed to by XP. */
218
    for (;;)
219
      {
220
        while (c)
221
          {
222
            mpi_ptr_t tp;
223
            mpi_size_t xsize;
224
225
            /*mpih_mul_n(xp, rp, rp, rsize);*/
226
            if ( rsize < KARATSUBA_THRESHOLD )
227
              _gcry_mpih_sqr_n_basecase( xp, rp, rsize );
228
            else
229
              {
230
                if ( !tspace )
231
                  {
232
                    tsize = 2 * rsize;
233
                    tspace = mpi_alloc_limb_space( tsize, 0 );
234
                  }
235
                else if ( tsize < (2*rsize) )
236
                  {
237
                    _gcry_mpi_free_limb_space (tspace, 0);
238
                    tsize = 2 * rsize;
239
                    tspace = mpi_alloc_limb_space (tsize, 0 );
240
                  }
241
                _gcry_mpih_sqr_n (xp, rp, rsize, tspace);
242
              }
243
244
            xsize = 2 * rsize;
245
            if ( xsize > msize )
246
              {
247
                _gcry_mpih_divrem(xp + msize, 0, xp, xsize, mp, msize);
248
                xsize = msize;
249
              }
250
251
            tp = rp; rp = xp; xp = tp;
252
            rsize = xsize;
253
254
            /* To mitigate the Yarom/Falkner flush+reload cache
255
             * side-channel attack on the RSA secret exponent, we do
256
             * the multiplication regardless of the value of the
257
             * high-bit of E.  But to avoid this performance penalty
258
             * we do it only if the exponent has been stored in secure
259
             * memory and we can thus assume it is a secret exponent.  */
260
            if (esec || (mpi_limb_signed_t)e < 0)
261
              {
262
                /*mpih_mul( xp, rp, rsize, bp, bsize );*/
263
                if( bsize < KARATSUBA_THRESHOLD )
264
                  _gcry_mpih_mul ( xp, rp, rsize, bp, bsize );
265
                else
266
                  _gcry_mpih_mul_karatsuba_case (xp, rp, rsize, bp, bsize,
267
                                                 &karactx);
268
269
                xsize = rsize + bsize;
270
                if ( xsize > msize )
271
                  {
272
                    _gcry_mpih_divrem(xp + msize, 0, xp, xsize, mp, msize);
273
                    xsize = msize;
274
                  }
275
              }
276
277
            w.d = rp;
278
            u.d = xp;
279
            mpi_set_cond (&w, &u, ((mpi_limb_signed_t)e < 0));
280
281
            e <<= 1;
282
            c--;
283
          }
284
285
        i--;
286
        if ( i < 0 )
287
          break;
288
        e = ep[i];
289
        c = BITS_PER_MPI_LIMB;
290
      }
291
292
    /* We shifted MOD, the modulo reduction argument, left
293
       MOD_SHIFT_CNT steps.  Adjust the result by reducing it with the
294
       original MOD.
295
296
       Also make sure the result is put in RES->d (where it already
297
       might be, see above).  */
298
    if ( mod_shift_cnt )
299
      {
300
        carry_limb = _gcry_mpih_lshift( res->d, rp, rsize, mod_shift_cnt);
301
        rp = res->d;
302
        if ( carry_limb )
303
          {
304
            rp[rsize] = carry_limb;
305
            rsize++;
306
          }
307
      }
308
    else if (res->d != rp)
309
      {
310
        MPN_COPY (res->d, rp, rsize);
311
        rp = res->d;
312
      }
313
314
    if ( rsize >= msize )
315
      {
316
        _gcry_mpih_divrem(rp + msize, 0, rp, rsize, mp, msize);
317
        rsize = msize;
318
      }
319
320
    /* Remove any leading zero words from the result.  */
321
    if ( mod_shift_cnt )
322
      _gcry_mpih_rshift( rp, rp, rsize, mod_shift_cnt);
323
    MPN_NORMALIZE (rp, rsize);
324
325
    _gcry_mpih_release_karatsuba_ctx (&karactx );
326
  }
327
328
  /* Fixup for negative results.  */
329
  if ( negative_result && rsize )
330
    {
331
      if ( mod_shift_cnt )
332
        _gcry_mpih_rshift( mp, mp, msize, mod_shift_cnt);
333
      _gcry_mpih_sub( rp, mp, msize, rp, rsize);
334
      rsize = msize;
335
      rsign = msign;
336
      MPN_NORMALIZE(rp, rsize);
337
    }
338
  gcry_assert (res->d == rp);
339
  res->nlimbs = rsize;
340
  res->sign = rsign;
341
342
 leave:
343
  if (mp_marker)
344
    _gcry_mpi_free_limb_space( mp_marker, mp_nlimbs );
345
  if (bp_marker)
346
    _gcry_mpi_free_limb_space( bp_marker, bp_nlimbs );
347
  if (ep_marker)
348
    _gcry_mpi_free_limb_space( ep_marker, ep_nlimbs );
349
  if (xp_marker)
350
    _gcry_mpi_free_limb_space( xp_marker, xp_nlimbs );
351
  if (tspace)
352
    _gcry_mpi_free_limb_space( tspace, 0 );
353
}
354
#else
355
/**
356
 * Internal function to compute
357
 *
358
 *    X = R * S mod M
359
 *
360
 * and set the size of X at the pointer XSIZE_P.
361
 * Use karatsuba structure at KARACTX_P.
362
 *
363
 * Condition:
364
 *   RSIZE >= SSIZE
365
 *   Enough space for X is allocated beforehand.
366
 *
367
 * For generic cases, we can/should use gcry_mpi_mulm.
368
 * This function is use for specific internal case.
369
 */
370
static void
371
mul_mod (mpi_ptr_t xp, mpi_size_t *xsize_p,
372
         mpi_ptr_t rp, mpi_size_t rsize,
373
         mpi_ptr_t sp, mpi_size_t ssize,
374
         mpi_ptr_t mp, mpi_size_t msize,
375
         struct karatsuba_ctx *karactx_p)
376
17.3k
{
377
17.3k
  if( ssize < KARATSUBA_THRESHOLD )
378
17.1k
    _gcry_mpih_mul ( xp, rp, rsize, sp, ssize );
379
255
  else
380
255
    _gcry_mpih_mul_karatsuba_case (xp, rp, rsize, sp, ssize, karactx_p);
381
382
17.3k
   if (rsize + ssize > msize)
383
11.7k
    {
384
11.7k
      _gcry_mpih_divrem (xp + msize, 0, xp, rsize + ssize, mp, msize);
385
11.7k
      *xsize_p = msize;
386
11.7k
    }
387
5.66k
   else
388
5.66k
     *xsize_p = rsize + ssize;
389
17.3k
}
390
391
#define SIZE_PRECOMP ((1 << (5 - 1)))
392
393
/****************
394
 * RES = BASE ^ EXPO mod MOD
395
 *
396
 * To mitigate the Yarom/Falkner flush+reload cache side-channel
397
 * attack on the RSA secret exponent, we don't use the square
398
 * routine but multiplication.
399
 *
400
 * Reference:
401
 *   Handbook of Applied Cryptography
402
 *       Algorithm 14.83: Modified left-to-right k-ary exponentiation
403
 */
404
void
405
_gcry_mpi_powm (gcry_mpi_t res,
406
                gcry_mpi_t base, gcry_mpi_t expo, gcry_mpi_t mod)
407
8.57k
{
408
  /* Pointer to the limbs of the arguments, their size and signs. */
409
8.57k
  mpi_ptr_t  rp, ep, mp, bp;
410
8.57k
  mpi_size_t esize, msize, bsize, rsize;
411
8.57k
  int               msign, bsign, rsign;
412
  /* Flags telling the secure allocation status of the arguments.  */
413
8.57k
  int        esec,  msec,  bsec;
414
  /* Size of the result including space for temporary values.  */
415
8.57k
  mpi_size_t size;
416
  /* Helper.  */
417
8.57k
  int mod_shift_cnt;
418
8.57k
  int negative_result;
419
8.57k
  mpi_ptr_t mp_marker = NULL;
420
8.57k
  mpi_ptr_t bp_marker = NULL;
421
8.57k
  mpi_ptr_t ep_marker = NULL;
422
8.57k
  mpi_ptr_t xp_marker = NULL;
423
8.57k
  unsigned int mp_nlimbs = 0;
424
8.57k
  unsigned int bp_nlimbs = 0;
425
8.57k
  unsigned int ep_nlimbs = 0;
426
8.57k
  unsigned int xp_nlimbs = 0;
427
8.57k
  mpi_ptr_t precomp[SIZE_PRECOMP]; /* Pre-computed array: BASE^1, ^3, ^5, ... */
428
8.57k
  mpi_size_t precomp_size[SIZE_PRECOMP];
429
8.57k
  mpi_size_t W;
430
8.57k
  mpi_ptr_t base_u;
431
8.57k
  mpi_size_t base_u_size;
432
8.57k
  mpi_size_t max_u_size;
433
434
8.57k
  esize = expo->nlimbs;
435
8.57k
  msize = mod->nlimbs;
436
8.57k
  size = 2 * msize;
437
8.57k
  msign = mod->sign;
438
439
8.57k
  ep = expo->d;
440
8.57k
  MPN_NORMALIZE(ep, esize);
441
442
8.57k
  if (esize * BITS_PER_MPI_LIMB > 512)
443
0
    W = 5;
444
8.57k
  else if (esize * BITS_PER_MPI_LIMB > 256)
445
0
    W = 4;
446
8.57k
  else if (esize * BITS_PER_MPI_LIMB > 128)
447
0
    W = 3;
448
8.57k
  else if (esize * BITS_PER_MPI_LIMB > 64)
449
0
    W = 2;
450
8.57k
  else
451
8.57k
    W = 1;
452
453
8.57k
  esec = mpi_is_secure(expo);
454
8.57k
  msec = mpi_is_secure(mod);
455
8.57k
  bsec = mpi_is_secure(base);
456
457
8.57k
  rp = res->d;
458
459
8.57k
  if (!msize)
460
0
    _gcry_divide_by_zero();
461
462
8.57k
  if (!esize)
463
0
    {
464
      /* Exponent is zero, result is 1 mod MOD, i.e., 1 or 0 depending
465
         on if MOD equals 1.  */
466
0
      res->nlimbs = (msize == 1 && mod->d[0] == 1) ? 0 : 1;
467
0
      if (res->nlimbs)
468
0
        {
469
0
          RESIZE_IF_NEEDED (res, 1);
470
0
          rp = res->d;
471
0
          rp[0] = 1;
472
0
        }
473
0
      res->sign = 0;
474
0
      goto leave;
475
0
    }
476
477
  /* Normalize MOD (i.e. make its most significant bit set) as
478
     required by mpn_divrem.  This will make the intermediate values
479
     in the calculation slightly larger, but the correct result is
480
     obtained after a final reduction using the original MOD value. */
481
8.57k
  mp_nlimbs = msec? msize:0;
482
8.57k
  mp = mp_marker = mpi_alloc_limb_space(msize, msec);
483
8.57k
  count_leading_zeros (mod_shift_cnt, mod->d[msize-1]);
484
8.57k
  if (mod_shift_cnt)
485
0
    _gcry_mpih_lshift (mp, mod->d, msize, mod_shift_cnt);
486
8.57k
  else
487
8.57k
    MPN_COPY( mp, mod->d, msize );
488
489
8.57k
  bsize = base->nlimbs;
490
8.57k
  bsign = base->sign;
491
8.57k
  if (bsize > msize)
492
0
    {
493
      /* The base is larger than the module.  Reduce it.
494
495
         Allocate (BSIZE + 1) with space for remainder and quotient.
496
         (The quotient is (bsize - msize + 1) limbs.)  */
497
0
      bp_nlimbs = bsec ? (bsize + 1):0;
498
0
      bp = bp_marker = mpi_alloc_limb_space( bsize + 1, bsec );
499
0
      MPN_COPY ( bp, base->d, bsize );
500
      /* We don't care about the quotient, store it above the
501
       * remainder, at BP + MSIZE.  */
502
0
      _gcry_mpih_divrem( bp + msize, 0, bp, bsize, mp, msize );
503
0
      bsize = msize;
504
      /* Canonicalize the base, since we are going to multiply with it
505
         quite a few times.  */
506
0
      MPN_NORMALIZE( bp, bsize );
507
0
    }
508
8.57k
  else
509
8.57k
    bp = base->d;
510
511
8.57k
  if (!bsize)
512
0
    {
513
0
      res->nlimbs = 0;
514
0
      res->sign = 0;
515
0
      goto leave;
516
0
    }
517
518
519
  /* Make BASE, EXPO not overlap with RES.  We don't need to check MOD
520
     because that has already been copied to the MP var.  */
521
8.57k
  if ( rp == bp )
522
0
    {
523
      /* RES and BASE are identical.  Allocate temp. space for BASE.  */
524
0
      gcry_assert (!bp_marker);
525
0
      bp_nlimbs = bsec? bsize:0;
526
0
      bp = bp_marker = mpi_alloc_limb_space( bsize, bsec );
527
0
      MPN_COPY(bp, rp, bsize);
528
0
    }
529
8.57k
  if ( rp == ep )
530
0
    {
531
      /* RES and EXPO are identical.  Allocate temp. space for EXPO.  */
532
0
      ep_nlimbs = esec? esize:0;
533
0
      ep = ep_marker = mpi_alloc_limb_space( esize, esec );
534
0
      MPN_COPY(ep, rp, esize);
535
0
    }
536
537
  /* Copy base to the result.  */
538
8.57k
  if (res->alloced < size)
539
31
    {
540
31
      mpi_resize (res, size);
541
31
      rp = res->d;
542
31
    }
543
544
  /* Main processing.  */
545
8.57k
  {
546
8.57k
    mpi_size_t i, j, k;
547
8.57k
    mpi_ptr_t xp;
548
8.57k
    mpi_size_t xsize = 0;
549
8.57k
    int c;
550
8.57k
    mpi_limb_t e;
551
8.57k
    mpi_limb_t carry_limb;
552
8.57k
    struct karatsuba_ctx karactx;
553
8.57k
    mpi_ptr_t tp;
554
555
8.57k
    xp_nlimbs = msec? size:0;
556
8.57k
    xp = xp_marker = mpi_alloc_limb_space( size, msec );
557
558
8.57k
    memset( &karactx, 0, sizeof karactx );
559
8.57k
    negative_result = (ep[0] & 1) && bsign;
560
561
    /* Precompute PRECOMP[], BASE^(2 * i + 1), BASE^1, ^3, ^5, ... */
562
8.57k
    if (W > 1)                  /* X := BASE^2 */
563
0
      mul_mod (xp, &xsize, bp, bsize, bp, bsize, mp, msize, &karactx);
564
8.57k
    base_u = precomp[0] = mpi_alloc_limb_space (bsize, esec);
565
8.57k
    base_u_size = max_u_size = precomp_size[0] = bsize;
566
8.57k
    MPN_COPY (precomp[0], bp, bsize);
567
8.57k
    for (i = 1; i < (1 << (W - 1)); i++)
568
0
      {                         /* PRECOMP[i] = BASE^(2 * i + 1) */
569
0
        if (xsize >= base_u_size)
570
0
          mul_mod (rp, &rsize, xp, xsize, base_u, base_u_size,
571
0
                   mp, msize, &karactx);
572
0
        else
573
0
          mul_mod (rp, &rsize, base_u, base_u_size, xp, xsize,
574
0
                   mp, msize, &karactx);
575
0
        base_u = precomp[i] = mpi_alloc_limb_space (rsize, esec);
576
0
        base_u_size = precomp_size[i] = rsize;
577
0
        if (max_u_size < base_u_size)
578
0
          max_u_size = base_u_size;
579
0
        MPN_COPY (precomp[i], rp, rsize);
580
0
      }
581
582
8.57k
    if (msize > max_u_size)
583
2.83k
      max_u_size = msize;
584
8.57k
    base_u = mpi_alloc_limb_space (max_u_size, esec);
585
8.57k
    MPN_ZERO (base_u, max_u_size);
586
587
8.57k
    i = esize - 1;
588
589
    /* Main loop.
590
591
       Make the result be pointed to alternately by XP and RP.  This
592
       helps us avoid block copying, which would otherwise be
593
       necessary with the overlap restrictions of
594
       _gcry_mpih_divmod. With 50% probability the result after this
595
       loop will be in the area originally pointed by RP (==RES->d),
596
       and with 50% probability in the area originally pointed to by XP. */
597
8.57k
    rsign = 0;
598
8.57k
    if (W == 1)
599
8.57k
      {
600
8.57k
        rsize = bsize;
601
8.57k
      }
602
0
    else
603
0
      {
604
0
        rsize = msize;
605
0
        MPN_ZERO (rp, rsize);
606
0
      }
607
8.57k
    MPN_COPY ( rp, bp, bsize );
608
609
8.57k
    e = ep[i];
610
8.57k
    count_leading_zeros (c, e);
611
8.57k
    e = (e << c) << 1;
612
8.57k
    c = BITS_PER_MPI_LIMB - 1 - c;
613
614
8.57k
    j = 0;
615
616
8.57k
    for (;;)
617
17.1k
      if (e == 0)
618
8.57k
        {
619
8.57k
          j += c;
620
8.57k
          if ( --i < 0 )
621
8.57k
            break;
622
623
0
          e = ep[i];
624
0
          c = BITS_PER_MPI_LIMB;
625
0
        }
626
8.57k
      else
627
8.57k
        {
628
8.57k
          int c0;
629
8.57k
          mpi_limb_t e0;
630
8.57k
          struct gcry_mpi w, u;
631
8.57k
          w.sign = u.sign = 0;
632
8.57k
          w.flags = u.flags = 0;
633
8.57k
          w.d = base_u;
634
635
8.57k
          count_leading_zeros (c0, e);
636
8.57k
          e = (e << c0);
637
8.57k
          c -= c0;
638
8.57k
          j += c0;
639
640
8.57k
          e0 = (e >> (BITS_PER_MPI_LIMB - W));
641
8.57k
          if (c >= W)
642
8.57k
            c0 = 0;
643
0
          else
644
0
            {
645
0
              if ( --i < 0 )
646
0
                {
647
0
                  e0 = (e >> (BITS_PER_MPI_LIMB - c));
648
0
                  j += c - W;
649
0
                  goto last_step;
650
0
                }
651
0
              else
652
0
                {
653
0
                  c0 = c;
654
0
                  e = ep[i];
655
0
                  c = BITS_PER_MPI_LIMB;
656
0
                  e0 |= (e >> (BITS_PER_MPI_LIMB - (W - c0)));
657
0
                }
658
0
            }
659
660
8.57k
          e = e << (W - c0);
661
8.57k
          c -= (W - c0);
662
663
8.57k
        last_step:
664
8.57k
          count_trailing_zeros (c0, e0);
665
8.57k
          e0 = (e0 >> c0) >> 1;
666
667
25.9k
          for (j += W - c0; j >= 0; j--)
668
17.3k
            {
669
670
              /*
671
               *  base_u <= precomp[e0]
672
               *  base_u_size <= precomp_size[e0]
673
               */
674
17.3k
              base_u_size = 0;
675
34.7k
              for (k = 0; k < (1<< (W - 1)); k++)
676
17.3k
                {
677
17.3k
                  w.alloced = w.nlimbs = precomp_size[k];
678
17.3k
                  u.alloced = u.nlimbs = precomp_size[k];
679
17.3k
                  u.d = precomp[k];
680
681
17.3k
                  mpi_set_cond (&w, &u, k == e0);
682
17.3k
                  base_u_size |= ( precomp_size[k] & (0UL - (k == e0)) );
683
17.3k
                }
684
685
17.3k
              w.alloced = w.nlimbs = rsize;
686
17.3k
              u.alloced = u.nlimbs = rsize;
687
17.3k
              u.d = rp;
688
17.3k
              mpi_set_cond (&w, &u, j != 0);
689
17.3k
              base_u_size ^= ((base_u_size ^ rsize)  & (0UL - (j != 0)));
690
691
17.3k
              mul_mod (xp, &xsize, rp, rsize, base_u, base_u_size,
692
17.3k
                       mp, msize, &karactx);
693
17.3k
              tp = rp; rp = xp; xp = tp;
694
17.3k
              rsize = xsize;
695
17.3k
            }
696
697
8.57k
          j = c0;
698
8.57k
          if ( i < 0 )
699
0
            break;
700
8.57k
        }
701
702
8.57k
    while (j--)
703
0
      {
704
0
        mul_mod (xp, &xsize, rp, rsize, rp, rsize, mp, msize, &karactx);
705
0
        tp = rp; rp = xp; xp = tp;
706
0
        rsize = xsize;
707
0
      }
708
709
    /* We shifted MOD, the modulo reduction argument, left
710
       MOD_SHIFT_CNT steps.  Adjust the result by reducing it with the
711
       original MOD.
712
713
       Also make sure the result is put in RES->d (where it already
714
       might be, see above).  */
715
8.57k
    if ( mod_shift_cnt )
716
0
      {
717
0
        carry_limb = _gcry_mpih_lshift( res->d, rp, rsize, mod_shift_cnt);
718
0
        rp = res->d;
719
0
        if ( carry_limb )
720
0
          {
721
0
            rp[rsize] = carry_limb;
722
0
            rsize++;
723
0
          }
724
0
      }
725
8.57k
    else if (res->d != rp)
726
15
      {
727
15
        MPN_COPY (res->d, rp, rsize);
728
15
        rp = res->d;
729
15
      }
730
731
8.57k
    if ( rsize >= msize )
732
5.74k
      {
733
5.74k
        _gcry_mpih_divrem(rp + msize, 0, rp, rsize, mp, msize);
734
5.74k
        rsize = msize;
735
5.74k
      }
736
737
    /* Remove any leading zero words from the result.  */
738
8.57k
    if ( mod_shift_cnt )
739
0
      _gcry_mpih_rshift( rp, rp, rsize, mod_shift_cnt);
740
8.57k
    MPN_NORMALIZE (rp, rsize);
741
742
8.57k
    _gcry_mpih_release_karatsuba_ctx (&karactx );
743
17.1k
    for (i = 0; i < (1 << (W - 1)); i++)
744
8.57k
      _gcry_mpi_free_limb_space( precomp[i], esec ? precomp_size[i] : 0 );
745
8.57k
    _gcry_mpi_free_limb_space (base_u, esec ? max_u_size : 0);
746
8.57k
  }
747
748
  /* Fixup for negative results.  */
749
8.57k
  if ( negative_result && rsize )
750
0
    {
751
0
      if ( mod_shift_cnt )
752
0
        _gcry_mpih_rshift( mp, mp, msize, mod_shift_cnt);
753
0
      _gcry_mpih_sub( rp, mp, msize, rp, rsize);
754
0
      rsize = msize;
755
0
      rsign = msign;
756
0
      MPN_NORMALIZE(rp, rsize);
757
0
    }
758
8.57k
  gcry_assert (res->d == rp);
759
0
  res->nlimbs = rsize;
760
8.57k
  res->sign = rsign;
761
762
8.57k
 leave:
763
8.57k
  if (mp_marker)
764
8.57k
    _gcry_mpi_free_limb_space( mp_marker, mp_nlimbs );
765
8.57k
  if (bp_marker)
766
0
    _gcry_mpi_free_limb_space( bp_marker, bp_nlimbs );
767
8.57k
  if (ep_marker)
768
0
    _gcry_mpi_free_limb_space( ep_marker, ep_nlimbs );
769
8.57k
  if (xp_marker)
770
8.57k
    _gcry_mpi_free_limb_space( xp_marker, xp_nlimbs );
771
8.57k
}
772
#endif