Coverage Report

Created: 2025-12-31 06:37

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/gmp/mpn/powm.c
Line
Count
Source
1
/* mpn_powm -- Compute R = U^E mod M.
2
3
   Contributed to the GNU project by Torbjorn Granlund.
4
5
   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
6
   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
7
   GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
8
9
Copyright 2007-2012, 2019-2021 Free Software Foundation, Inc.
10
11
This file is part of the GNU MP Library.
12
13
The GNU MP Library is free software; you can redistribute it and/or modify
14
it under the terms of either:
15
16
  * the GNU Lesser General Public License as published by the Free
17
    Software Foundation; either version 3 of the License, or (at your
18
    option) any later version.
19
20
or
21
22
  * the GNU General Public License as published by the Free Software
23
    Foundation; either version 2 of the License, or (at your option) any
24
    later version.
25
26
or both in parallel, as here.
27
28
The GNU MP Library is distributed in the hope that it will be useful, but
29
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
30
or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
31
for more details.
32
33
You should have received copies of the GNU General Public License and the
34
GNU Lesser General Public License along with the GNU MP Library.  If not,
35
see https://www.gnu.org/licenses/.  */
36
37
38
/*
39
  BASIC ALGORITHM, Compute U^E mod M, where M < B^n is odd.
40
41
  1. W <- U
42
43
  2. T <- (B^n * U) mod M                Convert to REDC form
44
45
  3. Compute table U^1, U^3, U^5... of E-dependent size
46
47
  4. While there are more bits in E
48
       W <- power left-to-right base-k
49
50
51
  TODO:
52
53
   * Make getbits a macro, thereby allowing it to update the index operand.
54
     That will simplify the code using getbits.  (Perhaps make getbits' sibling
55
     getbit then have similar form, for symmetry.)
56
57
   * Write an itch function.  Or perhaps get rid of tp parameter since the huge
58
     pp area is allocated locally anyway?
59
60
   * Choose window size without looping.  (Superoptimize or think(tm).)
61
62
   * Handle small bases with initial, reduction-free exponentiation.
63
64
   * Call new division functions, not mpn_tdiv_qr.
65
66
   * Consider special code for one-limb M.
67
68
   * How should we handle the redc1/redc2/redc_n choice?
69
     - redc1:  T(binvert_1limb)  + e * (n)   * (T(mullo-1x1) + n*T(addmul_1))
70
     - redc2:  T(binvert_2limbs) + e * (n/2) * (T(mullo-2x2) + n*T(addmul_2))
71
     - redc_n: T(binvert_nlimbs) + e * (T(mullo-nxn) + T(M(n)))
72
     This disregards the addmul_N constant term, but we could think of
73
     that as part of the respective mullo.
74
75
   * When U (the base) is small, we should start the exponentiation with plain
76
     operations, then convert that partial result to REDC form.
77
78
   * When U is just one limb, should it be handled without the k-ary tricks?
79
     We could keep a factor of B^n in W, but use U' = BU as base.  After
80
     multiplying by this (pseudo two-limb) number, we need to multiply by 1/B
81
     mod M.
82
*/
83
84
#include "gmp-impl.h"
85
#include "longlong.h"
86
87
#undef MPN_REDC_0
88
#define MPN_REDC_0(r0, u1, u0, m0, invm)        \
89
686k
  do {                 \
90
686k
    mp_limb_t _p1, _u1, _u0, _m0, _r0, _dummy;        \
91
686k
    _u0 = (u0);               \
92
686k
    _m0 = (m0);               \
93
686k
    umul_ppmm (_p1, _dummy, _m0, (_u0 * (invm)) & GMP_NUMB_MASK);  \
94
686k
    ASSERT (((_u0 - _dummy) & GMP_NUMB_MASK) == 0);     \
95
686k
    _u1 = (u1);               \
96
686k
    _r0 = _u1 - _p1;              \
97
686k
    _r0 = _u1 < _p1 ? _r0 + _m0 : _r0; /* _u1 < _r0 */     \
98
686k
    (r0) = _r0 & GMP_NUMB_MASK;           \
99
686k
  } while (0)
100
101
#undef MPN_REDC_1
102
#if HAVE_NATIVE_mpn_sbpi1_bdiv_r
103
#define MPN_REDC_1(rp, up, mp, n, invm)         \
104
  do {                  \
105
    mp_limb_t cy;             \
106
    cy = mpn_sbpi1_bdiv_r (up, 2 * n, mp, n, invm);     \
107
    if (cy != 0)              \
108
      mpn_sub_n (rp, up + n, mp, n);          \
109
    else                \
110
      MPN_COPY (rp, up + n, n);           \
111
  } while (0)
112
#else
113
#define MPN_REDC_1(rp, up, mp, n, invm)         \
114
13.8M
  do {                 \
115
13.8M
    mp_limb_t cy;             \
116
13.8M
    cy = mpn_redc_1 (rp, up, mp, n, invm);       \
117
13.8M
    if (cy != 0)             \
118
13.8M
      mpn_sub_n (rp, rp, mp, n);         \
119
13.8M
  } while (0)
120
#endif
121
122
#undef MPN_REDC_2
123
#define MPN_REDC_2(rp, up, mp, n, mip)          \
124
  do {                  \
125
    mp_limb_t cy;             \
126
    cy = mpn_redc_2 (rp, up, mp, n, mip);       \
127
    if (cy != 0)              \
128
      mpn_sub_n (rp, rp, mp, n);          \
129
  } while (0)
130
131
#if HAVE_NATIVE_mpn_addmul_2 || HAVE_NATIVE_mpn_redc_2
132
#define WANT_REDC_2 1
133
#endif
134
135
#define getbit(p,bi) \
136
10.5M
  ((p[(bi - 1) / GMP_LIMB_BITS] >> (bi - 1) % GMP_LIMB_BITS) & 1)
137
138
static inline mp_limb_t
139
getbits (const mp_limb_t *p, mp_bitcnt_t bi, int nbits)
140
1.33M
{
141
1.33M
  int nbits_in_r;
142
1.33M
  mp_limb_t r;
143
1.33M
  mp_size_t i;
144
145
1.33M
  if (bi <= nbits)
146
35.5k
    {
147
35.5k
      return p[0] & (((mp_limb_t) 1 << bi) - 1);
148
35.5k
    }
149
1.29M
  else
150
1.29M
    {
151
1.29M
      bi -= nbits;      /* bit index of low bit to extract */
152
1.29M
      i = bi / GMP_NUMB_BITS;   /* word index of low bit to extract */
153
1.29M
      bi %= GMP_NUMB_BITS;   /* bit index in low word */
154
1.29M
      r = p[i] >> bi;     /* extract (low) bits */
155
1.29M
      nbits_in_r = GMP_NUMB_BITS - bi;  /* number of bits now in r */
156
1.29M
      if (nbits_in_r < nbits)    /* did we get enough bits? */
157
88.0k
  r += p[i + 1] << nbits_in_r; /* prepend bits from higher word */
158
1.29M
      return r & (((mp_limb_t) 1 << nbits) - 1);
159
1.29M
    }
160
1.33M
}
161
162
static inline int
163
win_size (mp_bitcnt_t eb)
164
43.4k
{
165
43.4k
  int k;
166
43.4k
  static mp_bitcnt_t x[] = {7,25,81,241,673,1793,4609,11521,28161,~(mp_bitcnt_t)0};
167
126k
  for (k = 0; eb > x[k++]; )
168
82.5k
    ;
169
43.4k
  return k;
170
43.4k
}
171
172
/* Convert U to REDC form, U_r = B^n * U mod M */
173
static void
174
redcify (mp_ptr rp, mp_srcptr up, mp_size_t un, mp_srcptr mp, mp_size_t n)
175
51.1k
{
176
51.1k
  mp_ptr tp, qp;
177
51.1k
  TMP_DECL;
178
51.1k
  TMP_MARK;
179
180
51.1k
  TMP_ALLOC_LIMBS_2 (tp, un + n, qp, un + 1);
181
182
51.1k
  MPN_ZERO (tp, n);
183
51.1k
  MPN_COPY (tp + n, up, un);
184
51.1k
  mpn_tdiv_qr (qp, rp, 0L, tp, un + n, mp, n);
185
51.1k
  TMP_FREE;
186
51.1k
}
187
188
#if ! HAVE_NATIVE_mpn_rsblsh1_n_ip2
189
#undef mpn_rsblsh1_n_ip2
190
#if HAVE_NATIVE_mpn_rsblsh1_n
191
#define mpn_rsblsh1_n_ip2(a,b,n)  mpn_rsblsh1_n(a,b,a,n)
192
#else
193
#define mpn_rsblsh1_n_ip2(a,b,n)        \
194
1.35M
  do                \
195
1.35M
    {               \
196
1.35M
      mpn_lshift (a, a, n, 1);          \
197
1.35M
      mpn_sub_n (a, a, b, n);          \
198
1.35M
    } while (0)
199
#endif
200
#endif
201
202
#define INNERLOOP2            \
203
7.73k
  do                \
204
6.26M
    {               \
205
6.26M
      MPN_SQR (tp, rp, n);         \
206
6.26M
      MPN_REDUCE (rp, tp, mp, n, mip);        \
207
6.26M
      if (mpn_cmp (rp, mp, n) >= 0)       \
208
6.26M
  ASSERT_NOCARRY (mpn_sub_n (rp, rp, mp, n));   \
209
6.26M
      if (getbit (ep, ebi) != 0)       \
210
6.26M
  {             \
211
3.10M
    if (rp[n - 1] >> (mbi - 1) % GMP_LIMB_BITS == 0)  \
212
3.10M
      ASSERT_NOCARRY (mpn_lshift (rp, rp, n, 1));   \
213
3.10M
    else              \
214
3.10M
      mpn_rsblsh1_n_ip2 (rp, mp, n);     \
215
3.10M
  }              \
216
6.26M
    } while (--ebi != 0)
217
218
/* rp[n-1..0] = 2 ^ ep[en-1..0] mod mp[n-1..0]
219
   Requires that mp[n-1..0] is odd and > 1.
220
   Requires that ep[en-1..0] is > 1.
221
   Uses scratch space at tp of MAX(mpn_binvert_itch(n),2n) limbs.  */
222
static void
223
mpn_2powm (mp_ptr rp, mp_srcptr ep, mp_size_t en,
224
    mp_srcptr mp, mp_size_t n, mp_ptr tp)
225
8.86k
{
226
8.86k
  mp_limb_t ip[2], *mip;
227
8.86k
  mp_bitcnt_t ebi, mbi, tbi;
228
8.86k
  mp_size_t tn;
229
8.86k
  int count;
230
8.86k
  TMP_DECL;
231
232
8.86k
  ASSERT (en > 1 || (en == 1 && ep[0] > 1));
233
8.86k
  ASSERT (n > 0 && (mp[0] & 1) != 0);
234
235
8.86k
  MPN_SIZEINBASE_2EXP(ebi, ep, en, 1);
236
8.86k
  MPN_SIZEINBASE_2EXP(mbi, mp, n, 1);
237
238
8.86k
  if (LIKELY (mbi <= GMP_NUMB_MAX))
239
8.86k
    {
240
8.86k
      count_leading_zeros(count, (mp_limb_t) mbi);
241
8.86k
      count = GMP_NUMB_BITS - (count - GMP_NAIL_BITS);
242
8.86k
    }
243
0
  else
244
0
    {
245
0
      mp_bitcnt_t tc = mbi;
246
247
0
      count = 0;
248
0
      do { ++count; } while ((tc >>= 1) != 0);
249
0
    }
250
251
8.86k
  tbi = getbits (ep, ebi, count);
252
8.86k
  if (tbi >= mbi)
253
6.75k
    {
254
6.75k
      --count;
255
6.75k
      ASSERT ((tbi >> count) == 1);
256
6.75k
      tbi >>= 1;
257
6.75k
      ASSERT (tbi < mbi);
258
6.75k
      ASSERT (ebi > count);
259
6.75k
    }
260
2.11k
  else if (ebi <= count)
261
15
    {
262
15
      MPN_FILL (rp, n, 0);
263
15
      rp[tbi / GMP_LIMB_BITS] = CNST_LIMB (1) << (tbi % GMP_LIMB_BITS);
264
15
      return;
265
15
    }
266
8.85k
  ebi -= count;
267
268
8.85k
  if (n == 1)
269
1.12k
    {
270
1.12k
      mp_limb_t r0, m0, invm;
271
1.12k
      m0 = *mp;
272
273
      /* redcify (rp, tp, tn + 1, mp, n); */
274
      /* TODO: test direct use of udiv_qrnnd */
275
1.12k
      ASSERT (tbi < GMP_LIMB_BITS);
276
1.12k
      tp[1] = CNST_LIMB (1) << tbi;
277
1.12k
      tp[0] = CNST_LIMB (0);
278
1.12k
      r0 = mpn_mod_1 (tp, 2, m0);
279
280
1.12k
      binvert_limb (invm, m0);
281
1.12k
      do
282
149k
  {
283
149k
    mp_limb_t t0, t1, t2;
284
    /* MPN_SQR (tp, rp, n);     */
285
149k
    umul_ppmm (t1, t0, r0, r0);
286
    /* MPN_REDUCE (rp, tp, mp, n, mip);   */
287
149k
    MPN_REDC_0(r0, t1, t0, m0, invm);
288
289
149k
    t2 = r0 << 1;
290
149k
    t2 = r0 > (m0 >> 1) ? t2 - m0 : t2;
291
149k
    r0 = getbit (ep, ebi) != 0 ? t2 : r0;
292
149k
  } while (--ebi != 0);
293
294
      /* tp[1] = 0; tp[0] = r0; */
295
      /* MPN_REDUCE (rp, tp, mp, n, mip); */
296
1.12k
      MPN_REDC_0(*rp, 0, r0, m0, invm);
297
298
1.12k
      return;
299
1.12k
    }
300
301
7.73k
  TMP_MARK;
302
303
#if WANT_REDC_2
304
  if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD))
305
    {
306
      mip = ip;
307
      binvert_limb (ip[0], mp[0]);
308
      ip[0] = -ip[0];
309
    }
310
  else if (BELOW_THRESHOLD (n, REDC_2_TO_REDC_N_THRESHOLD))
311
    {
312
      mip = ip;
313
      mpn_binvert (ip, mp, 2, tp);
314
      ip[0] = -ip[0]; ip[1] = ~ip[1];
315
    }
316
#else
317
7.73k
  if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_N_THRESHOLD))
318
6.96k
    {
319
6.96k
      mip = ip;
320
6.96k
      binvert_limb (ip[0], mp[0]);
321
6.96k
      ip[0] = -ip[0];
322
6.96k
    }
323
766
#endif
324
766
  else
325
766
    {
326
766
      mip = TMP_ALLOC_LIMBS (n);
327
766
      mpn_binvert (mip, mp, n, tp);
328
766
    }
329
330
7.73k
  tn = tbi / GMP_LIMB_BITS;
331
7.73k
  MPN_ZERO (tp, tn);
332
7.73k
  tp[tn] = CNST_LIMB (1) << (tbi % GMP_LIMB_BITS);
333
334
7.73k
  redcify (rp, tp, tn + 1, mp, n);
335
336
#if WANT_REDC_2
337
  if (REDC_1_TO_REDC_2_THRESHOLD < MUL_TOOM22_THRESHOLD)
338
    {
339
      if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD))
340
  {
341
    if (REDC_1_TO_REDC_2_THRESHOLD < SQR_BASECASE_THRESHOLD
342
        || BELOW_THRESHOLD (n, SQR_BASECASE_THRESHOLD))
343
      {
344
#undef MPN_SQR
345
#undef MPN_REDUCE
346
#define MPN_SQR(r,a,n)      mpn_mul_basecase (r,a,n,a,n)
347
#define MPN_REDUCE(rp,tp,mp,n,mip)  MPN_REDC_1 (rp, tp, mp, n, mip[0])
348
        INNERLOOP2;
349
      }
350
    else
351
      {
352
#undef MPN_SQR
353
#undef MPN_REDUCE
354
#define MPN_SQR(r,a,n)      mpn_sqr_basecase (r,a,n)
355
#define MPN_REDUCE(rp,tp,mp,n,mip)  MPN_REDC_1 (rp, tp, mp, n, mip[0])
356
        INNERLOOP2;
357
      }
358
  }
359
      else if (BELOW_THRESHOLD (n, MUL_TOOM22_THRESHOLD))
360
  {
361
    if (MUL_TOOM22_THRESHOLD < SQR_BASECASE_THRESHOLD
362
        || BELOW_THRESHOLD (n, SQR_BASECASE_THRESHOLD))
363
      {
364
#undef MPN_SQR
365
#undef MPN_REDUCE
366
#define MPN_SQR(r,a,n)      mpn_mul_basecase (r,a,n,a,n)
367
#define MPN_REDUCE(rp,tp,mp,n,mip)  MPN_REDC_2 (rp, tp, mp, n, mip)
368
        INNERLOOP2;
369
      }
370
    else
371
      {
372
#undef MPN_SQR
373
#undef MPN_REDUCE
374
#define MPN_SQR(r,a,n)      mpn_sqr_basecase (r,a,n)
375
#define MPN_REDUCE(rp,tp,mp,n,mip)  MPN_REDC_2 (rp, tp, mp, n, mip)
376
        INNERLOOP2;
377
      }
378
  }
379
      else if (BELOW_THRESHOLD (n, REDC_2_TO_REDC_N_THRESHOLD))
380
  {
381
#undef MPN_SQR
382
#undef MPN_REDUCE
383
#define MPN_SQR(r,a,n)      mpn_sqr (r,a,n)
384
#define MPN_REDUCE(rp,tp,mp,n,mip)  MPN_REDC_2 (rp, tp, mp, n, mip)
385
    INNERLOOP2;
386
  }
387
      else
388
  {
389
#undef MPN_SQR
390
#undef MPN_REDUCE
391
#define MPN_SQR(r,a,n)      mpn_sqr (r,a,n)
392
#define MPN_REDUCE(rp,tp,mp,n,mip)  mpn_redc_n (rp, tp, mp, n, mip)
393
    INNERLOOP2;
394
  }
395
    }
396
  else
397
    {
398
      if (BELOW_THRESHOLD (n, MUL_TOOM22_THRESHOLD))
399
  {
400
    if (MUL_TOOM22_THRESHOLD < SQR_BASECASE_THRESHOLD
401
        || BELOW_THRESHOLD (n, SQR_BASECASE_THRESHOLD))
402
      {
403
#undef MPN_SQR
404
#undef MPN_REDUCE
405
#define MPN_SQR(r,a,n)      mpn_mul_basecase (r,a,n,a,n)
406
#define MPN_REDUCE(rp,tp,mp,n,mip)  MPN_REDC_1 (rp, tp, mp, n, mip[0])
407
        INNERLOOP2;
408
      }
409
    else
410
      {
411
#undef MPN_SQR
412
#undef MPN_REDUCE
413
#define MPN_SQR(r,a,n)      mpn_sqr_basecase (r,a,n)
414
#define MPN_REDUCE(rp,tp,mp,n,mip)  MPN_REDC_1 (rp, tp, mp, n, mip[0])
415
        INNERLOOP2;
416
      }
417
  }
418
      else if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD))
419
  {
420
#undef MPN_SQR
421
#undef MPN_REDUCE
422
#define MPN_SQR(r,a,n)      mpn_sqr (r,a,n)
423
#define MPN_REDUCE(rp,tp,mp,n,mip)  MPN_REDC_1 (rp, tp, mp, n, mip[0])
424
    INNERLOOP2;
425
  }
426
      else if (BELOW_THRESHOLD (n, REDC_2_TO_REDC_N_THRESHOLD))
427
  {
428
#undef MPN_SQR
429
#undef MPN_REDUCE
430
#define MPN_SQR(r,a,n)      mpn_sqr (r,a,n)
431
#define MPN_REDUCE(rp,tp,mp,n,mip)  MPN_REDC_2 (rp, tp, mp, n, mip)
432
    INNERLOOP2;
433
  }
434
      else
435
  {
436
#undef MPN_SQR
437
#undef MPN_REDUCE
438
#define MPN_SQR(r,a,n)      mpn_sqr (r,a,n)
439
#define MPN_REDUCE(rp,tp,mp,n,mip)  mpn_redc_n (rp, tp, mp, n, mip)
440
    INNERLOOP2;
441
  }
442
    }
443
444
#else  /* WANT_REDC_2 */
445
446
7.73k
  if (REDC_1_TO_REDC_N_THRESHOLD < MUL_TOOM22_THRESHOLD)
447
0
    {
448
0
      if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_N_THRESHOLD))
449
0
  {
450
0
    if (REDC_1_TO_REDC_N_THRESHOLD < SQR_BASECASE_THRESHOLD
451
0
        || BELOW_THRESHOLD (n, SQR_BASECASE_THRESHOLD))
452
0
      {
453
0
#undef MPN_SQR
454
0
#undef MPN_REDUCE
455
0
#define MPN_SQR(r,a,n)      mpn_mul_basecase (r,a,n,a,n)
456
0
#define MPN_REDUCE(rp,tp,mp,n,mip)  MPN_REDC_1 (rp, tp, mp, n, mip[0])
457
0
        INNERLOOP2;
458
0
      }
459
0
    else
460
0
      {
461
0
#undef MPN_SQR
462
0
#undef MPN_REDUCE
463
0
#define MPN_SQR(r,a,n)      mpn_sqr_basecase (r,a,n)
464
0
#define MPN_REDUCE(rp,tp,mp,n,mip)  MPN_REDC_1 (rp, tp, mp, n, mip[0])
465
0
        INNERLOOP2;
466
0
      }
467
0
  }
468
0
      else if (BELOW_THRESHOLD (n, MUL_TOOM22_THRESHOLD))
469
0
  {
470
0
    if (MUL_TOOM22_THRESHOLD < SQR_BASECASE_THRESHOLD
471
0
        || BELOW_THRESHOLD (n, SQR_BASECASE_THRESHOLD))
472
0
      {
473
0
#undef MPN_SQR
474
0
#undef MPN_REDUCE
475
0
#define MPN_SQR(r,a,n)      mpn_mul_basecase (r,a,n,a,n)
476
0
#define MPN_REDUCE(rp,tp,mp,n,mip)  mpn_redc_n (rp, tp, mp, n, mip)
477
0
        INNERLOOP2;
478
0
      }
479
0
    else
480
0
      {
481
0
#undef MPN_SQR
482
0
#undef MPN_REDUCE
483
0
#define MPN_SQR(r,a,n)      mpn_sqr_basecase (r,a,n)
484
0
#define MPN_REDUCE(rp,tp,mp,n,mip)  mpn_redc_n (rp, tp, mp, n, mip)
485
0
        INNERLOOP2;
486
0
      }
487
0
  }
488
0
      else
489
0
  {
490
0
#undef MPN_SQR
491
0
#undef MPN_REDUCE
492
0
#define MPN_SQR(r,a,n)      mpn_sqr (r,a,n)
493
0
#define MPN_REDUCE(rp,tp,mp,n,mip)  mpn_redc_n (rp, tp, mp, n, mip)
494
0
    INNERLOOP2;
495
0
  }
496
0
    }
497
7.73k
  else
498
7.73k
    {
499
7.73k
      if (BELOW_THRESHOLD (n, MUL_TOOM22_THRESHOLD))
500
1.95k
  {
501
1.95k
    if (MUL_TOOM22_THRESHOLD < SQR_BASECASE_THRESHOLD
502
0
        || BELOW_THRESHOLD (n, SQR_BASECASE_THRESHOLD))
503
0
      {
504
0
#undef MPN_SQR
505
0
#undef MPN_REDUCE
506
0
#define MPN_SQR(r,a,n)      mpn_mul_basecase (r,a,n,a,n)
507
0
#define MPN_REDUCE(rp,tp,mp,n,mip)  MPN_REDC_1 (rp, tp, mp, n, mip[0])
508
0
        INNERLOOP2;
509
0
      }
510
1.95k
    else
511
1.95k
      {
512
1.95k
#undef MPN_SQR
513
1.95k
#undef MPN_REDUCE
514
240k
#define MPN_SQR(r,a,n)      mpn_sqr_basecase (r,a,n)
515
240k
#define MPN_REDUCE(rp,tp,mp,n,mip)  MPN_REDC_1 (rp, tp, mp, n, mip[0])
516
1.95k
        INNERLOOP2;
517
1.95k
      }
518
1.95k
  }
519
5.77k
      else if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_N_THRESHOLD))
520
5.00k
  {
521
5.00k
#undef MPN_SQR
522
5.00k
#undef MPN_REDUCE
523
4.94M
#define MPN_SQR(r,a,n)      mpn_sqr (r,a,n)
524
4.94M
#define MPN_REDUCE(rp,tp,mp,n,mip)  MPN_REDC_1 (rp, tp, mp, n, mip[0])
525
5.00k
    INNERLOOP2;
526
5.00k
  }
527
766
      else
528
766
  {
529
766
#undef MPN_SQR
530
766
#undef MPN_REDUCE
531
1.08M
#define MPN_SQR(r,a,n)      mpn_sqr (r,a,n)
532
1.08M
#define MPN_REDUCE(rp,tp,mp,n,mip)  mpn_redc_n (rp, tp, mp, n, mip)
533
766
    INNERLOOP2;
534
766
  }
535
7.73k
    }
536
7.73k
#endif  /* WANT_REDC_2 */
537
538
7.73k
  MPN_COPY (tp, rp, n);
539
7.73k
  MPN_FILL (tp + n, n, 0);
540
541
#if WANT_REDC_2
542
  if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD))
543
    MPN_REDC_1 (rp, tp, mp, n, ip[0]);
544
  else if (BELOW_THRESHOLD (n, REDC_2_TO_REDC_N_THRESHOLD))
545
    MPN_REDC_2 (rp, tp, mp, n, mip);
546
#else
547
7.73k
  if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_N_THRESHOLD))
548
6.96k
    MPN_REDC_1 (rp, tp, mp, n, ip[0]);
549
766
#endif
550
766
  else
551
766
    mpn_redc_n (rp, tp, mp, n, mip);
552
553
7.73k
  if (mpn_cmp (rp, mp, n) >= 0)
554
0
    mpn_sub_n (rp, rp, mp, n);
555
556
7.73k
  TMP_FREE;
557
7.73k
}
558
559
/* rp[n-1..0] = bp[bn-1..0] ^ ep[en-1..0] mod mp[n-1..0]
560
   Requires that mp[n-1..0] is odd.
561
   Requires that ep[en-1..0] is > 1.
562
   Uses scratch space at tp of MAX(mpn_binvert_itch(n),2n) limbs.  */
563
void
564
mpn_powm (mp_ptr rp, mp_srcptr bp, mp_size_t bn,
565
    mp_srcptr ep, mp_size_t en,
566
    mp_srcptr mp, mp_size_t n, mp_ptr tp)
567
52.3k
{
568
52.3k
  mp_limb_t ip[2], *mip;
569
52.3k
  int cnt;
570
52.3k
  mp_bitcnt_t ebi;
571
52.3k
  int windowsize, this_windowsize;
572
52.3k
  mp_limb_t expbits;
573
52.3k
  mp_ptr pp, this_pp;
574
52.3k
  long i;
575
52.3k
  TMP_DECL;
576
577
52.3k
  ASSERT (en > 1 || (en == 1 && ep[0] > 1));
578
52.3k
  ASSERT (n >= 1 && ((mp[0] & 1) != 0));
579
580
52.3k
  if (bn == 1 && bp[0] == 2)
581
8.86k
    {
582
8.86k
      mpn_2powm (rp, ep, en, mp, n, tp);
583
8.86k
      return;
584
8.86k
    }
585
586
43.4k
  TMP_MARK;
587
588
43.4k
  MPN_SIZEINBASE_2EXP(ebi, ep, en, 1);
589
590
#if 0
591
  if (bn < n)
592
    {
593
      /* Do the first few exponent bits without mod reductions,
594
   until the result is greater than the mod argument.  */
595
      for (;;)
596
  {
597
    mpn_sqr (tp, this_pp, tn);
598
    tn = tn * 2 - 1,  tn += tp[tn] != 0;
599
    if (getbit (ep, ebi) != 0)
600
      mpn_mul (..., tp, tn, bp, bn);
601
    ebi--;
602
  }
603
    }
604
#endif
605
606
43.4k
  windowsize = win_size (ebi);
607
608
#if WANT_REDC_2
609
  if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD))
610
    {
611
      mip = ip;
612
      binvert_limb (mip[0], mp[0]);
613
      mip[0] = -mip[0];
614
    }
615
  else if (BELOW_THRESHOLD (n, REDC_2_TO_REDC_N_THRESHOLD))
616
    {
617
      mip = ip;
618
      mpn_binvert (mip, mp, 2, tp);
619
      mip[0] = -mip[0]; mip[1] = ~mip[1];
620
    }
621
#else
622
43.4k
  if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_N_THRESHOLD))
623
41.8k
    {
624
41.8k
      mip = ip;
625
41.8k
      binvert_limb (mip[0], mp[0]);
626
41.8k
      mip[0] = -mip[0];
627
41.8k
    }
628
1.57k
#endif
629
1.57k
  else
630
1.57k
    {
631
1.57k
      mip = TMP_ALLOC_LIMBS (n);
632
1.57k
      mpn_binvert (mip, mp, n, tp);
633
1.57k
    }
634
635
43.4k
  pp = TMP_ALLOC_LIMBS (n << (windowsize - 1));
636
637
43.4k
  this_pp = pp;
638
43.4k
  redcify (this_pp, bp, bn, mp, n);
639
640
  /* Store b^2 at rp.  */
641
43.4k
  mpn_sqr (tp, this_pp, n);
642
#if 0
643
  if (n == 1) {
644
    MPN_REDC_0 (rp[0], tp[1], tp[0], mp[0], -mip[0]);
645
  } else
646
#endif
647
#if WANT_REDC_2
648
  if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD))
649
    MPN_REDC_1 (rp, tp, mp, n, mip[0]);
650
  else if (BELOW_THRESHOLD (n, REDC_2_TO_REDC_N_THRESHOLD))
651
    MPN_REDC_2 (rp, tp, mp, n, mip);
652
#else
653
43.4k
  if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_N_THRESHOLD))
654
41.8k
    MPN_REDC_1 (rp, tp, mp, n, mip[0]);
655
1.57k
#endif
656
1.57k
  else
657
1.57k
    mpn_redc_n (rp, tp, mp, n, mip);
658
659
  /* Precompute odd powers of b and put them in the temporary area at pp.  */
660
343k
  for (i = (1 << (windowsize - 1)) - 1; i > 0; i--)
661
299k
#if 1
662
299k
    if (n == 1) {
663
18.0k
      umul_ppmm((tp)[1], *(tp), *(this_pp), *(rp));
664
18.0k
      ++this_pp ;
665
18.0k
      MPN_REDC_0 (*this_pp, tp[1], tp[0], *mp, -mip[0]);
666
18.0k
    } else
667
281k
#endif
668
281k
    {
669
281k
      mpn_mul_n (tp, this_pp, rp, n);
670
281k
      this_pp += n;
671
#if WANT_REDC_2
672
      if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD))
673
  MPN_REDC_1 (this_pp, tp, mp, n, mip[0]);
674
      else if (BELOW_THRESHOLD (n, REDC_2_TO_REDC_N_THRESHOLD))
675
  MPN_REDC_2 (this_pp, tp, mp, n, mip);
676
#else
677
281k
      if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_N_THRESHOLD))
678
232k
  MPN_REDC_1 (this_pp, tp, mp, n, mip[0]);
679
48.9k
#endif
680
48.9k
      else
681
48.9k
  mpn_redc_n (this_pp, tp, mp, n, mip);
682
281k
    }
683
684
43.4k
  expbits = getbits (ep, ebi, windowsize);
685
43.4k
  ebi -= windowsize;
686
687
  /* THINK: Should we initialise the case expbits % 4 == 0 with a mul? */
688
43.4k
  count_trailing_zeros (cnt, expbits);
689
43.4k
  ebi += cnt;
690
43.4k
  expbits >>= cnt;
691
692
43.4k
  MPN_COPY (rp, pp + n * (expbits >> 1), n);
693
694
43.4k
#define INNERLOOP             \
695
1.32M
  while (ebi != 0)             \
696
1.29M
    {                 \
697
4.16M
      while (getbit (ep, ebi) == 0)         \
698
2.87M
  {               \
699
2.87M
    MPN_SQR (tp, rp, n);           \
700
2.87M
    MPN_REDUCE (rp, tp, mp, n, mip);        \
701
2.87M
    if (--ebi == 0)           \
702
2.87M
      goto done;             \
703
2.87M
  }                \
704
1.29M
                  \
705
      /* The next bit of the exponent is 1.  Now extract the largest  \
706
   block of bits <= windowsize, and such that the least   \
707
   significant bit is 1.  */          \
708
1.29M
                  \
709
1.29M
      expbits = getbits (ep, ebi, windowsize);        \
710
1.28M
      this_windowsize = MIN (ebi, windowsize);       \
711
1.28M
                  \
712
1.28M
      count_trailing_zeros (cnt, expbits);        \
713
1.28M
      this_windowsize -= cnt;           \
714
1.28M
      ebi -= this_windowsize;           \
715
1.28M
      expbits >>= cnt;              \
716
1.28M
                  \
717
1.28M
      do                \
718
6.38M
  {               \
719
6.38M
    MPN_SQR (tp, rp, n);           \
720
6.38M
    MPN_REDUCE (rp, tp, mp, n, mip);        \
721
6.38M
  }                \
722
6.38M
      while (--this_windowsize != 0);          \
723
1.28M
                  \
724
1.28M
      MPN_MUL_N (tp, rp, pp + n * (expbits >> 1), n);      \
725
1.28M
      MPN_REDUCE (rp, tp, mp, n, mip);          \
726
1.28M
    }
727
728
729
43.4k
  if (n == 1)
730
1.96k
    {
731
1.96k
#undef MPN_MUL_N
732
1.96k
#undef MPN_SQR
733
1.96k
#undef MPN_REDUCE
734
74.5k
#define MPN_MUL_N(r,a,b,n)    umul_ppmm((r)[1], *(r), *(a), *(b))
735
443k
#define MPN_SQR(r,a,n)      umul_ppmm((r)[1], *(r), *(a), *(a))
736
518k
#define MPN_REDUCE(rp,tp,mp,n,mip)  MPN_REDC_0(*(rp), (tp)[1], (tp)[0], *(mp), - *(mip))
737
1.96k
      INNERLOOP;
738
493
    }
739
41.4k
  else
740
#if WANT_REDC_2
741
  if (REDC_1_TO_REDC_2_THRESHOLD < MUL_TOOM22_THRESHOLD)
742
    {
743
      if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD))
744
  {
745
    if (REDC_1_TO_REDC_2_THRESHOLD < SQR_BASECASE_THRESHOLD
746
        || BELOW_THRESHOLD (n, SQR_BASECASE_THRESHOLD))
747
      {
748
#undef MPN_MUL_N
749
#undef MPN_SQR
750
#undef MPN_REDUCE
751
#define MPN_MUL_N(r,a,b,n)    mpn_mul_basecase (r,a,n,b,n)
752
#define MPN_SQR(r,a,n)      mpn_mul_basecase (r,a,n,a,n)
753
#define MPN_REDUCE(rp,tp,mp,n,mip)  MPN_REDC_1 (rp, tp, mp, n, mip[0])
754
        INNERLOOP;
755
      }
756
    else
757
      {
758
#undef MPN_MUL_N
759
#undef MPN_SQR
760
#undef MPN_REDUCE
761
#define MPN_MUL_N(r,a,b,n)    mpn_mul_basecase (r,a,n,b,n)
762
#define MPN_SQR(r,a,n)      mpn_sqr_basecase (r,a,n)
763
#define MPN_REDUCE(rp,tp,mp,n,mip)  MPN_REDC_1 (rp, tp, mp, n, mip[0])
764
        INNERLOOP;
765
      }
766
  }
767
      else if (BELOW_THRESHOLD (n, MUL_TOOM22_THRESHOLD))
768
  {
769
    if (MUL_TOOM22_THRESHOLD < SQR_BASECASE_THRESHOLD
770
        || BELOW_THRESHOLD (n, SQR_BASECASE_THRESHOLD))
771
      {
772
#undef MPN_MUL_N
773
#undef MPN_SQR
774
#undef MPN_REDUCE
775
#define MPN_MUL_N(r,a,b,n)    mpn_mul_basecase (r,a,n,b,n)
776
#define MPN_SQR(r,a,n)      mpn_mul_basecase (r,a,n,a,n)
777
#define MPN_REDUCE(rp,tp,mp,n,mip)  MPN_REDC_2 (rp, tp, mp, n, mip)
778
        INNERLOOP;
779
      }
780
    else
781
      {
782
#undef MPN_MUL_N
783
#undef MPN_SQR
784
#undef MPN_REDUCE
785
#define MPN_MUL_N(r,a,b,n)    mpn_mul_basecase (r,a,n,b,n)
786
#define MPN_SQR(r,a,n)      mpn_sqr_basecase (r,a,n)
787
#define MPN_REDUCE(rp,tp,mp,n,mip)  MPN_REDC_2 (rp, tp, mp, n, mip)
788
        INNERLOOP;
789
      }
790
  }
791
      else if (BELOW_THRESHOLD (n, REDC_2_TO_REDC_N_THRESHOLD))
792
  {
793
#undef MPN_MUL_N
794
#undef MPN_SQR
795
#undef MPN_REDUCE
796
#define MPN_MUL_N(r,a,b,n)    mpn_mul_n (r,a,b,n)
797
#define MPN_SQR(r,a,n)      mpn_sqr (r,a,n)
798
#define MPN_REDUCE(rp,tp,mp,n,mip)  MPN_REDC_2 (rp, tp, mp, n, mip)
799
    INNERLOOP;
800
  }
801
      else
802
  {
803
#undef MPN_MUL_N
804
#undef MPN_SQR
805
#undef MPN_REDUCE
806
#define MPN_MUL_N(r,a,b,n)    mpn_mul_n (r,a,b,n)
807
#define MPN_SQR(r,a,n)      mpn_sqr (r,a,n)
808
#define MPN_REDUCE(rp,tp,mp,n,mip)  mpn_redc_n (rp, tp, mp, n, mip)
809
    INNERLOOP;
810
  }
811
    }
812
  else
813
    {
814
      if (BELOW_THRESHOLD (n, MUL_TOOM22_THRESHOLD))
815
  {
816
    if (MUL_TOOM22_THRESHOLD < SQR_BASECASE_THRESHOLD
817
        || BELOW_THRESHOLD (n, SQR_BASECASE_THRESHOLD))
818
      {
819
#undef MPN_MUL_N
820
#undef MPN_SQR
821
#undef MPN_REDUCE
822
#define MPN_MUL_N(r,a,b,n)    mpn_mul_basecase (r,a,n,b,n)
823
#define MPN_SQR(r,a,n)      mpn_mul_basecase (r,a,n,a,n)
824
#define MPN_REDUCE(rp,tp,mp,n,mip)  MPN_REDC_1 (rp, tp, mp, n, mip[0])
825
        INNERLOOP;
826
      }
827
    else
828
      {
829
#undef MPN_MUL_N
830
#undef MPN_SQR
831
#undef MPN_REDUCE
832
#define MPN_MUL_N(r,a,b,n)    mpn_mul_basecase (r,a,n,b,n)
833
#define MPN_SQR(r,a,n)      mpn_sqr_basecase (r,a,n)
834
#define MPN_REDUCE(rp,tp,mp,n,mip)  MPN_REDC_1 (rp, tp, mp, n, mip[0])
835
        INNERLOOP;
836
      }
837
  }
838
      else if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD))
839
  {
840
#undef MPN_MUL_N
841
#undef MPN_SQR
842
#undef MPN_REDUCE
843
#define MPN_MUL_N(r,a,b,n)    mpn_mul_n (r,a,b,n)
844
#define MPN_SQR(r,a,n)      mpn_sqr (r,a,n)
845
#define MPN_REDUCE(rp,tp,mp,n,mip)  MPN_REDC_1 (rp, tp, mp, n, mip[0])
846
    INNERLOOP;
847
  }
848
      else if (BELOW_THRESHOLD (n, REDC_2_TO_REDC_N_THRESHOLD))
849
  {
850
#undef MPN_MUL_N
851
#undef MPN_SQR
852
#undef MPN_REDUCE
853
#define MPN_MUL_N(r,a,b,n)    mpn_mul_n (r,a,b,n)
854
#define MPN_SQR(r,a,n)      mpn_sqr (r,a,n)
855
#define MPN_REDUCE(rp,tp,mp,n,mip)  MPN_REDC_2 (rp, tp, mp, n, mip)
856
    INNERLOOP;
857
  }
858
      else
859
  {
860
#undef MPN_MUL_N
861
#undef MPN_SQR
862
#undef MPN_REDUCE
863
#define MPN_MUL_N(r,a,b,n)    mpn_mul_n (r,a,b,n)
864
#define MPN_SQR(r,a,n)      mpn_sqr (r,a,n)
865
#define MPN_REDUCE(rp,tp,mp,n,mip)  mpn_redc_n (rp, tp, mp, n, mip)
866
    INNERLOOP;
867
  }
868
    }
869
870
#else  /* WANT_REDC_2 */
871
872
41.4k
  if (REDC_1_TO_REDC_N_THRESHOLD < MUL_TOOM22_THRESHOLD)
873
0
    {
874
0
      if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_N_THRESHOLD))
875
0
  {
876
0
    if (REDC_1_TO_REDC_N_THRESHOLD < SQR_BASECASE_THRESHOLD
877
0
        || BELOW_THRESHOLD (n, SQR_BASECASE_THRESHOLD))
878
0
      {
879
0
#undef MPN_MUL_N
880
0
#undef MPN_SQR
881
0
#undef MPN_REDUCE
882
0
#define MPN_MUL_N(r,a,b,n)    mpn_mul_basecase (r,a,n,b,n)
883
0
#define MPN_SQR(r,a,n)      mpn_mul_basecase (r,a,n,a,n)
884
0
#define MPN_REDUCE(rp,tp,mp,n,mip)  MPN_REDC_1 (rp, tp, mp, n, mip[0])
885
0
        INNERLOOP;
886
0
      }
887
0
    else
888
0
      {
889
0
#undef MPN_MUL_N
890
0
#undef MPN_SQR
891
0
#undef MPN_REDUCE
892
0
#define MPN_MUL_N(r,a,b,n)    mpn_mul_basecase (r,a,n,b,n)
893
0
#define MPN_SQR(r,a,n)      mpn_sqr_basecase (r,a,n)
894
0
#define MPN_REDUCE(rp,tp,mp,n,mip)  MPN_REDC_1 (rp, tp, mp, n, mip[0])
895
0
        INNERLOOP;
896
0
      }
897
0
  }
898
0
      else if (BELOW_THRESHOLD (n, MUL_TOOM22_THRESHOLD))
899
0
  {
900
0
    if (MUL_TOOM22_THRESHOLD < SQR_BASECASE_THRESHOLD
901
0
        || BELOW_THRESHOLD (n, SQR_BASECASE_THRESHOLD))
902
0
      {
903
0
#undef MPN_MUL_N
904
0
#undef MPN_SQR
905
0
#undef MPN_REDUCE
906
0
#define MPN_MUL_N(r,a,b,n)    mpn_mul_basecase (r,a,n,b,n)
907
0
#define MPN_SQR(r,a,n)      mpn_mul_basecase (r,a,n,a,n)
908
0
#define MPN_REDUCE(rp,tp,mp,n,mip)  mpn_redc_n (rp, tp, mp, n, mip)
909
0
        INNERLOOP;
910
0
      }
911
0
    else
912
0
      {
913
0
#undef MPN_MUL_N
914
0
#undef MPN_SQR
915
0
#undef MPN_REDUCE
916
0
#define MPN_MUL_N(r,a,b,n)    mpn_mul_basecase (r,a,n,b,n)
917
0
#define MPN_SQR(r,a,n)      mpn_sqr_basecase (r,a,n)
918
0
#define MPN_REDUCE(rp,tp,mp,n,mip)  mpn_redc_n (rp, tp, mp, n, mip)
919
0
        INNERLOOP;
920
0
      }
921
0
  }
922
0
      else
923
0
  {
924
0
#undef MPN_MUL_N
925
0
#undef MPN_SQR
926
0
#undef MPN_REDUCE
927
0
#define MPN_MUL_N(r,a,b,n)    mpn_mul_n (r,a,b,n)
928
0
#define MPN_SQR(r,a,n)      mpn_sqr (r,a,n)
929
0
#define MPN_REDUCE(rp,tp,mp,n,mip)  mpn_redc_n (rp, tp, mp, n, mip)
930
0
    INNERLOOP;
931
0
  }
932
0
    }
933
41.4k
  else
934
41.4k
    {
935
41.4k
      if (BELOW_THRESHOLD (n, MUL_TOOM22_THRESHOLD))
936
9.35k
  {
937
9.35k
    if (MUL_TOOM22_THRESHOLD < SQR_BASECASE_THRESHOLD
938
0
        || BELOW_THRESHOLD (n, SQR_BASECASE_THRESHOLD))
939
0
      {
940
0
#undef MPN_MUL_N
941
0
#undef MPN_SQR
942
0
#undef MPN_REDUCE
943
0
#define MPN_MUL_N(r,a,b,n)    mpn_mul_basecase (r,a,n,b,n)
944
0
#define MPN_SQR(r,a,n)      mpn_mul_basecase (r,a,n,a,n)
945
0
#define MPN_REDUCE(rp,tp,mp,n,mip)  MPN_REDC_1 (rp, tp, mp, n, mip[0])
946
0
        INNERLOOP;
947
0
      }
948
9.35k
    else
949
9.35k
      {
950
9.35k
#undef MPN_MUL_N
951
9.35k
#undef MPN_SQR
952
9.35k
#undef MPN_REDUCE
953
236k
#define MPN_MUL_N(r,a,b,n)    mpn_mul_basecase (r,a,n,b,n)
954
1.36M
#define MPN_SQR(r,a,n)      mpn_sqr_basecase (r,a,n)
955
1.59M
#define MPN_REDUCE(rp,tp,mp,n,mip)  MPN_REDC_1 (rp, tp, mp, n, mip[0])
956
9.35k
        INNERLOOP;
957
1.88k
      }
958
9.35k
  }
959
32.1k
      else if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_N_THRESHOLD))
960
30.5k
  {
961
30.5k
#undef MPN_MUL_N
962
30.5k
#undef MPN_SQR
963
30.5k
#undef MPN_REDUCE
964
766k
#define MPN_MUL_N(r,a,b,n)    mpn_mul_n (r,a,b,n)
965
6.00M
#define MPN_SQR(r,a,n)      mpn_sqr (r,a,n)
966
6.77M
#define MPN_REDUCE(rp,tp,mp,n,mip)  MPN_REDC_1 (rp, tp, mp, n, mip[0])
967
30.5k
    INNERLOOP;
968
28.9k
  }
969
1.57k
      else
970
1.57k
  {
971
1.57k
#undef MPN_MUL_N
972
1.57k
#undef MPN_SQR
973
1.57k
#undef MPN_REDUCE
974
205k
#define MPN_MUL_N(r,a,b,n)    mpn_mul_n (r,a,b,n)
975
1.44M
#define MPN_SQR(r,a,n)      mpn_sqr (r,a,n)
976
1.65M
#define MPN_REDUCE(rp,tp,mp,n,mip)  mpn_redc_n (rp, tp, mp, n, mip)
977
1.57k
    INNERLOOP;
978
624
  }
979
41.4k
    }
980
31.9k
#endif  /* WANT_REDC_2 */
981
982
43.4k
 done:
983
984
43.4k
  MPN_COPY (tp, rp, n);
985
43.4k
  MPN_ZERO (tp + n, n);
986
987
#if WANT_REDC_2
988
  if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD))
989
    MPN_REDC_1 (rp, tp, mp, n, mip[0]);
990
  else if (BELOW_THRESHOLD (n, REDC_2_TO_REDC_N_THRESHOLD))
991
    MPN_REDC_2 (rp, tp, mp, n, mip);
992
#else
993
43.4k
  if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_N_THRESHOLD))
994
41.8k
    MPN_REDC_1 (rp, tp, mp, n, mip[0]);
995
1.57k
#endif
996
1.57k
  else
997
1.57k
    mpn_redc_n (rp, tp, mp, n, mip);
998
999
43.4k
  if (mpn_cmp (rp, mp, n) >= 0)
1000
1
    mpn_sub_n (rp, rp, mp, n);
1001
1002
43.4k
  TMP_FREE;
1003
43.4k
}