Coverage Report

Created: 2024-11-21 07:03

/src/libgmp/mpn/powm.c
Line
Count
Source (jump to first uncovered line)
1
/* mpn_powm -- Compute R = U^E mod M.
2
3
   Contributed to the GNU project by Torbjorn Granlund.
4
5
   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
6
   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
7
   GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
8
9
Copyright 2007-2012, 2019-2021 Free Software Foundation, Inc.
10
11
This file is part of the GNU MP Library.
12
13
The GNU MP Library is free software; you can redistribute it and/or modify
14
it under the terms of either:
15
16
  * the GNU Lesser General Public License as published by the Free
17
    Software Foundation; either version 3 of the License, or (at your
18
    option) any later version.
19
20
or
21
22
  * the GNU General Public License as published by the Free Software
23
    Foundation; either version 2 of the License, or (at your option) any
24
    later version.
25
26
or both in parallel, as here.
27
28
The GNU MP Library is distributed in the hope that it will be useful, but
29
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
30
or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
31
for more details.
32
33
You should have received copies of the GNU General Public License and the
34
GNU Lesser General Public License along with the GNU MP Library.  If not,
35
see https://www.gnu.org/licenses/.  */
36
37
38
/*
39
  BASIC ALGORITHM, Compute U^E mod M, where M < B^n is odd.
40
41
  1. W <- U
42
43
  2. T <- (B^n * U) mod M                Convert to REDC form
44
45
  3. Compute table U^1, U^3, U^5... of E-dependent size
46
47
  4. While there are more bits in E
48
       W <- power left-to-right base-k
49
50
51
  TODO:
52
53
   * Make getbits a macro, thereby allowing it to update the index operand.
54
     That will simplify the code using getbits.  (Perhaps make getbits' sibling
55
     getbit then have similar form, for symmetry.)
56
57
   * Write an itch function.  Or perhaps get rid of tp parameter since the huge
58
     pp area is allocated locally anyway?
59
60
   * Choose window size without looping.  (Superoptimize or think(tm).)
61
62
   * Handle small bases with initial, reduction-free exponentiation.
63
64
   * Call new division functions, not mpn_tdiv_qr.
65
66
   * Consider special code for one-limb M.
67
68
   * How should we handle the redc1/redc2/redc_n choice?
69
     - redc1:  T(binvert_1limb)  + e * (n)   * (T(mullo-1x1) + n*T(addmul_1))
70
     - redc2:  T(binvert_2limbs) + e * (n/2) * (T(mullo-2x2) + n*T(addmul_2))
71
     - redc_n: T(binvert_nlimbs) + e * (T(mullo-nxn) + T(M(n)))
72
     This disregards the addmul_N constant term, but we could think of
73
     that as part of the respective mullo.
74
75
   * When U (the base) is small, we should start the exponentiation with plain
76
     operations, then convert that partial result to REDC form.
77
78
   * When U is just one limb, should it be handled without the k-ary tricks?
79
     We could keep a factor of B^n in W, but use U' = BU as base.  After
80
     multiplying by this (pseudo two-limb) number, we need to multiply by 1/B
81
     mod M.
82
*/
83
84
#include "gmp-impl.h"
85
#include "longlong.h"
86
87
#undef MPN_REDC_0
88
#define MPN_REDC_0(r0, u1, u0, m0, invm)        \
89
322k
  do {                 \
90
322k
    mp_limb_t _p1, _u1, _u0, _m0, _r0, _dummy;        \
91
322k
    _u0 = (u0);               \
92
322k
    _m0 = (m0);               \
93
322k
    umul_ppmm (_p1, _dummy, _m0, (_u0 * (invm)) & GMP_NUMB_MASK);  \
94
322k
    ASSERT (((_u0 - _dummy) & GMP_NUMB_MASK) == 0);     \
95
322k
    _u1 = (u1);               \
96
322k
    _r0 = _u1 - _p1;              \
97
322k
    _r0 = _u1 < _p1 ? _r0 + _m0 : _r0; /* _u1 < _r0 */     \
98
322k
    (r0) = _r0 & GMP_NUMB_MASK;           \
99
322k
  } while (0)
100
101
#undef MPN_REDC_1
102
#if HAVE_NATIVE_mpn_sbpi1_bdiv_r
103
#define MPN_REDC_1(rp, up, mp, n, invm)         \
104
  do {                  \
105
    mp_limb_t cy;             \
106
    cy = mpn_sbpi1_bdiv_r (up, 2 * n, mp, n, invm);     \
107
    if (cy != 0)              \
108
      mpn_sub_n (rp, up + n, mp, n);          \
109
    else                \
110
      MPN_COPY (rp, up + n, n);           \
111
  } while (0)
112
#else
113
#define MPN_REDC_1(rp, up, mp, n, invm)         \
114
8.70M
  do {                 \
115
8.70M
    mp_limb_t cy;             \
116
8.70M
    cy = mpn_redc_1 (rp, up, mp, n, invm);       \
117
8.70M
    if (cy != 0)             \
118
8.70M
      mpn_sub_n (rp, rp, mp, n);         \
119
8.70M
  } while (0)
120
#endif
121
122
#undef MPN_REDC_2
123
#define MPN_REDC_2(rp, up, mp, n, mip)          \
124
1.03M
  do {                 \
125
1.03M
    mp_limb_t cy;             \
126
1.03M
    cy = mpn_redc_2 (rp, up, mp, n, mip);        \
127
1.03M
    if (cy != 0)             \
128
1.03M
      mpn_sub_n (rp, rp, mp, n);         \
129
1.03M
  } while (0)
130
131
#if HAVE_NATIVE_mpn_addmul_2 || HAVE_NATIVE_mpn_redc_2
132
#define WANT_REDC_2 1
133
#endif
134
135
#define getbit(p,bi) \
136
9.97M
  ((p[(bi - 1) / GMP_LIMB_BITS] >> (bi - 1) % GMP_LIMB_BITS) & 1)
137
138
static inline mp_limb_t
139
getbits (const mp_limb_t *p, mp_bitcnt_t bi, int nbits)
140
443k
{
141
443k
  int nbits_in_r;
142
443k
  mp_limb_t r;
143
443k
  mp_size_t i;
144
145
443k
  if (bi <= nbits)
146
3.58k
    {
147
3.58k
      return p[0] & (((mp_limb_t) 1 << bi) - 1);
148
3.58k
    }
149
440k
  else
150
440k
    {
151
440k
      bi -= nbits;      /* bit index of low bit to extract */
152
440k
      i = bi / GMP_NUMB_BITS;   /* word index of low bit to extract */
153
440k
      bi %= GMP_NUMB_BITS;   /* bit index in low word */
154
440k
      r = p[i] >> bi;     /* extract (low) bits */
155
440k
      nbits_in_r = GMP_NUMB_BITS - bi;  /* number of bits now in r */
156
440k
      if (nbits_in_r < nbits)    /* did we get enough bits? */
157
37.7k
  r += p[i + 1] << nbits_in_r; /* prepend bits from higher word */
158
440k
      return r & (((mp_limb_t) 1 << nbits) - 1);
159
440k
    }
160
443k
}
161
162
static inline int
163
win_size (mp_bitcnt_t eb)
164
4.71k
{
165
4.71k
  int k;
166
4.71k
  static mp_bitcnt_t x[] = {7,25,81,241,673,1793,4609,11521,28161,~(mp_bitcnt_t)0};
167
20.7k
  for (k = 0; eb > x[k++]; )
168
15.9k
    ;
169
4.71k
  return k;
170
4.71k
}
171
172
/* Convert U to REDC form, U_r = B^n * U mod M */
173
static void
174
redcify (mp_ptr rp, mp_srcptr up, mp_size_t un, mp_srcptr mp, mp_size_t n)
175
26.2k
{
176
26.2k
  mp_ptr tp, qp;
177
26.2k
  TMP_DECL;
178
26.2k
  TMP_MARK;
179
180
26.2k
  TMP_ALLOC_LIMBS_2 (tp, un + n, qp, un + 1);
181
182
26.2k
  MPN_ZERO (tp, n);
183
26.2k
  MPN_COPY (tp + n, up, un);
184
26.2k
  mpn_tdiv_qr (qp, rp, 0L, tp, un + n, mp, n);
185
26.2k
  TMP_FREE;
186
26.2k
}
187
188
#if ! HAVE_NATIVE_mpn_rsblsh1_n_ip2
189
#undef mpn_rsblsh1_n_ip2
190
#if HAVE_NATIVE_mpn_rsblsh1_n
191
1.31M
#define mpn_rsblsh1_n_ip2(a,b,n)  mpn_rsblsh1_n(a,b,a,n)
192
#else
193
#define mpn_rsblsh1_n_ip2(a,b,n)        \
194
  do                \
195
    {               \
196
      mpn_lshift (a, a, n, 1);          \
197
      mpn_sub_n (a, a, b, n);         \
198
    } while (0)
199
#endif
200
#endif
201
202
#define INNERLOOP2            \
203
21.4k
  do                \
204
8.65M
    {               \
205
8.65M
      MPN_SQR (tp, rp, n);         \
206
8.65M
      MPN_REDUCE (rp, tp, mp, n, mip);        \
207
8.65M
      if (mpn_cmp (rp, mp, n) >= 0)       \
208
8.65M
  ASSERT_NOCARRY (mpn_sub_n (rp, rp, mp, n));   \
209
8.65M
      if (getbit (ep, ebi) != 0)       \
210
8.65M
  {             \
211
4.33M
    if (rp[n - 1] >> (mbi - 1) % GMP_LIMB_BITS == 0)  \
212
4.33M
      ASSERT_NOCARRY (mpn_lshift (rp, rp, n, 1));   \
213
4.33M
    else              \
214
4.33M
      mpn_rsblsh1_n_ip2 (rp, mp, n);     \
215
4.33M
  }              \
216
8.65M
    } while (--ebi != 0)
217
218
/* rp[n-1..0] = 2 ^ ep[en-1..0] mod mp[n-1..0]
219
   Requires that mp[n-1..0] is odd and > 1.
220
   Requires that ep[en-1..0] is > 1.
221
   Uses scratch space at tp of MAX(mpn_binvert_itch(n),2n) limbs.  */
222
static void
223
mpn_2powm (mp_ptr rp, mp_srcptr ep, mp_size_t en,
224
    mp_srcptr mp, mp_size_t n, mp_ptr tp)
225
23.0k
{
226
23.0k
  mp_limb_t ip[2], *mip;
227
23.0k
  mp_bitcnt_t ebi, mbi, tbi;
228
23.0k
  mp_size_t tn;
229
23.0k
  int count;
230
23.0k
  TMP_DECL;
231
232
23.0k
  ASSERT (en > 1 || (en == 1 && ep[0] > 1));
233
23.0k
  ASSERT (n > 0 && (mp[0] & 1) != 0);
234
235
23.0k
  MPN_SIZEINBASE_2EXP(ebi, ep, en, 1);
236
23.0k
  MPN_SIZEINBASE_2EXP(mbi, mp, n, 1);
237
238
23.0k
  if (LIKELY (mbi <= GMP_NUMB_MAX))
239
23.0k
    {
240
23.0k
      count_leading_zeros(count, (mp_limb_t) mbi);
241
23.0k
      count = GMP_NUMB_BITS - (count - GMP_NAIL_BITS);
242
23.0k
    }
243
0
  else
244
0
    {
245
0
      mp_bitcnt_t tc = mbi;
246
247
0
      count = 0;
248
0
      do { ++count; } while ((tc >>= 1) != 0);
249
0
    }
250
251
23.0k
  tbi = getbits (ep, ebi, count);
252
23.0k
  if (tbi >= mbi)
253
9.72k
    {
254
9.72k
      --count;
255
9.72k
      ASSERT ((tbi >> count) == 1);
256
9.72k
      tbi >>= 1;
257
9.72k
      ASSERT (tbi < mbi);
258
9.72k
      ASSERT (ebi > count);
259
9.72k
    }
260
13.3k
  else if (ebi <= count)
261
4
    {
262
4
      MPN_FILL (rp, n, 0);
263
4
      rp[tbi / GMP_LIMB_BITS] = CNST_LIMB (1) << (tbi % GMP_LIMB_BITS);
264
4
      return;
265
4
    }
266
23.0k
  ebi -= count;
267
268
23.0k
  if (n == 1)
269
1.52k
    {
270
1.52k
      mp_limb_t r0, m0, invm;
271
1.52k
      m0 = *mp;
272
273
      /* redcify (rp, tp, tn + 1, mp, n); */
274
      /* TODO: test direct use of udiv_qrnnd */
275
1.52k
      ASSERT (tbi < GMP_LIMB_BITS);
276
1.52k
      tp[1] = CNST_LIMB (1) << tbi;
277
1.52k
      tp[0] = CNST_LIMB (0);
278
1.52k
      r0 = mpn_mod_1 (tp, 2, m0);
279
280
1.52k
      binvert_limb (invm, m0);
281
1.52k
      do
282
62.6k
  {
283
62.6k
    mp_limb_t t0, t1, t2;
284
    /* MPN_SQR (tp, rp, n);     */
285
62.6k
    umul_ppmm (t1, t0, r0, r0);
286
    /* MPN_REDUCE (rp, tp, mp, n, mip);   */
287
62.6k
    MPN_REDC_0(r0, t1, t0, m0, invm);
288
289
62.6k
    t2 = r0 << 1;
290
62.6k
    t2 = r0 > (m0 >> 1) ? t2 - m0 : t2;
291
62.6k
    r0 = getbit (ep, ebi) != 0 ? t2 : r0;
292
62.6k
  } while (--ebi != 0);
293
294
      /* tp[1] = 0; tp[0] = r0; */
295
      /* MPN_REDUCE (rp, tp, mp, n, mip); */
296
1.52k
      MPN_REDC_0(*rp, 0, r0, m0, invm);
297
298
1.52k
      return;
299
1.52k
    }
300
301
21.4k
  TMP_MARK;
302
303
21.4k
#if WANT_REDC_2
304
21.4k
  if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD))
305
21.2k
    {
306
21.2k
      mip = ip;
307
21.2k
      binvert_limb (ip[0], mp[0]);
308
21.2k
      ip[0] = -ip[0];
309
21.2k
    }
310
262
  else if (BELOW_THRESHOLD (n, REDC_2_TO_REDC_N_THRESHOLD))
311
158
    {
312
158
      mip = ip;
313
158
      mpn_binvert (ip, mp, 2, tp);
314
158
      ip[0] = -ip[0]; ip[1] = ~ip[1];
315
158
    }
316
#else
317
  if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_N_THRESHOLD))
318
    {
319
      mip = ip;
320
      binvert_limb (ip[0], mp[0]);
321
      ip[0] = -ip[0];
322
    }
323
#endif
324
104
  else
325
104
    {
326
104
      mip = TMP_ALLOC_LIMBS (n);
327
104
      mpn_binvert (mip, mp, n, tp);
328
104
    }
329
330
21.4k
  tn = tbi / GMP_LIMB_BITS;
331
21.4k
  MPN_ZERO (tp, tn);
332
21.4k
  tp[tn] = CNST_LIMB (1) << (tbi % GMP_LIMB_BITS);
333
334
21.4k
  redcify (rp, tp, tn + 1, mp, n);
335
336
21.4k
#if WANT_REDC_2
337
21.4k
  if (REDC_1_TO_REDC_2_THRESHOLD < MUL_TOOM22_THRESHOLD)
338
0
    {
339
0
      if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD))
340
0
  {
341
0
    if (REDC_1_TO_REDC_2_THRESHOLD < SQR_BASECASE_THRESHOLD
342
0
        || BELOW_THRESHOLD (n, SQR_BASECASE_THRESHOLD))
343
0
      {
344
0
#undef MPN_SQR
345
0
#undef MPN_REDUCE
346
0
#define MPN_SQR(r,a,n)      mpn_mul_basecase (r,a,n,a,n)
347
0
#define MPN_REDUCE(rp,tp,mp,n,mip)  MPN_REDC_1 (rp, tp, mp, n, mip[0])
348
0
        INNERLOOP2;
349
0
      }
350
0
    else
351
0
      {
352
0
#undef MPN_SQR
353
0
#undef MPN_REDUCE
354
0
#define MPN_SQR(r,a,n)      mpn_sqr_basecase (r,a,n)
355
0
#define MPN_REDUCE(rp,tp,mp,n,mip)  MPN_REDC_1 (rp, tp, mp, n, mip[0])
356
0
        INNERLOOP2;
357
0
      }
358
0
  }
359
0
      else if (BELOW_THRESHOLD (n, MUL_TOOM22_THRESHOLD))
360
0
  {
361
0
    if (MUL_TOOM22_THRESHOLD < SQR_BASECASE_THRESHOLD
362
0
        || BELOW_THRESHOLD (n, SQR_BASECASE_THRESHOLD))
363
0
      {
364
0
#undef MPN_SQR
365
0
#undef MPN_REDUCE
366
0
#define MPN_SQR(r,a,n)      mpn_mul_basecase (r,a,n,a,n)
367
0
#define MPN_REDUCE(rp,tp,mp,n,mip)  MPN_REDC_2 (rp, tp, mp, n, mip)
368
0
        INNERLOOP2;
369
0
      }
370
0
    else
371
0
      {
372
0
#undef MPN_SQR
373
0
#undef MPN_REDUCE
374
0
#define MPN_SQR(r,a,n)      mpn_sqr_basecase (r,a,n)
375
0
#define MPN_REDUCE(rp,tp,mp,n,mip)  MPN_REDC_2 (rp, tp, mp, n, mip)
376
0
        INNERLOOP2;
377
0
      }
378
0
  }
379
0
      else if (BELOW_THRESHOLD (n, REDC_2_TO_REDC_N_THRESHOLD))
380
0
  {
381
0
#undef MPN_SQR
382
0
#undef MPN_REDUCE
383
0
#define MPN_SQR(r,a,n)      mpn_sqr (r,a,n)
384
0
#define MPN_REDUCE(rp,tp,mp,n,mip)  MPN_REDC_2 (rp, tp, mp, n, mip)
385
0
    INNERLOOP2;
386
0
  }
387
0
      else
388
0
  {
389
0
#undef MPN_SQR
390
0
#undef MPN_REDUCE
391
0
#define MPN_SQR(r,a,n)      mpn_sqr (r,a,n)
392
0
#define MPN_REDUCE(rp,tp,mp,n,mip)  mpn_redc_n (rp, tp, mp, n, mip)
393
0
    INNERLOOP2;
394
0
  }
395
0
    }
396
21.4k
  else
397
21.4k
    {
398
21.4k
      if (BELOW_THRESHOLD (n, MUL_TOOM22_THRESHOLD))
399
21.0k
  {
400
21.0k
    if (MUL_TOOM22_THRESHOLD < SQR_BASECASE_THRESHOLD
401
21.0k
        || BELOW_THRESHOLD (n, SQR_BASECASE_THRESHOLD))
402
0
      {
403
0
#undef MPN_SQR
404
0
#undef MPN_REDUCE
405
0
#define MPN_SQR(r,a,n)      mpn_mul_basecase (r,a,n,a,n)
406
0
#define MPN_REDUCE(rp,tp,mp,n,mip)  MPN_REDC_1 (rp, tp, mp, n, mip[0])
407
0
        INNERLOOP2;
408
0
      }
409
21.0k
    else
410
21.0k
      {
411
21.0k
#undef MPN_SQR
412
21.0k
#undef MPN_REDUCE
413
7.50M
#define MPN_SQR(r,a,n)      mpn_sqr_basecase (r,a,n)
414
7.50M
#define MPN_REDUCE(rp,tp,mp,n,mip)  MPN_REDC_1 (rp, tp, mp, n, mip[0])
415
21.0k
        INNERLOOP2;
416
21.0k
      }
417
21.0k
  }
418
408
      else if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD))
419
146
  {
420
146
#undef MPN_SQR
421
146
#undef MPN_REDUCE
422
56.0k
#define MPN_SQR(r,a,n)      mpn_sqr (r,a,n)
423
56.0k
#define MPN_REDUCE(rp,tp,mp,n,mip)  MPN_REDC_1 (rp, tp, mp, n, mip[0])
424
146
    INNERLOOP2;
425
146
  }
426
262
      else if (BELOW_THRESHOLD (n, REDC_2_TO_REDC_N_THRESHOLD))
427
158
  {
428
158
#undef MPN_SQR
429
158
#undef MPN_REDUCE
430
340k
#define MPN_SQR(r,a,n)      mpn_sqr (r,a,n)
431
340k
#define MPN_REDUCE(rp,tp,mp,n,mip)  MPN_REDC_2 (rp, tp, mp, n, mip)
432
158
    INNERLOOP2;
433
158
  }
434
104
      else
435
104
  {
436
104
#undef MPN_SQR
437
104
#undef MPN_REDUCE
438
747k
#define MPN_SQR(r,a,n)      mpn_sqr (r,a,n)
439
747k
#define MPN_REDUCE(rp,tp,mp,n,mip)  mpn_redc_n (rp, tp, mp, n, mip)
440
104
    INNERLOOP2;
441
104
  }
442
21.4k
    }
443
444
#else  /* WANT_REDC_2 */
445
446
  if (REDC_1_TO_REDC_N_THRESHOLD < MUL_TOOM22_THRESHOLD)
447
    {
448
      if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_N_THRESHOLD))
449
  {
450
    if (REDC_1_TO_REDC_N_THRESHOLD < SQR_BASECASE_THRESHOLD
451
        || BELOW_THRESHOLD (n, SQR_BASECASE_THRESHOLD))
452
      {
453
#undef MPN_SQR
454
#undef MPN_REDUCE
455
#define MPN_SQR(r,a,n)      mpn_mul_basecase (r,a,n,a,n)
456
#define MPN_REDUCE(rp,tp,mp,n,mip)  MPN_REDC_1 (rp, tp, mp, n, mip[0])
457
        INNERLOOP2;
458
      }
459
    else
460
      {
461
#undef MPN_SQR
462
#undef MPN_REDUCE
463
#define MPN_SQR(r,a,n)      mpn_sqr_basecase (r,a,n)
464
#define MPN_REDUCE(rp,tp,mp,n,mip)  MPN_REDC_1 (rp, tp, mp, n, mip[0])
465
        INNERLOOP2;
466
      }
467
  }
468
      else if (BELOW_THRESHOLD (n, MUL_TOOM22_THRESHOLD))
469
  {
470
    if (MUL_TOOM22_THRESHOLD < SQR_BASECASE_THRESHOLD
471
        || BELOW_THRESHOLD (n, SQR_BASECASE_THRESHOLD))
472
      {
473
#undef MPN_SQR
474
#undef MPN_REDUCE
475
#define MPN_SQR(r,a,n)      mpn_mul_basecase (r,a,n,a,n)
476
#define MPN_REDUCE(rp,tp,mp,n,mip)  mpn_redc_n (rp, tp, mp, n, mip)
477
        INNERLOOP2;
478
      }
479
    else
480
      {
481
#undef MPN_SQR
482
#undef MPN_REDUCE
483
#define MPN_SQR(r,a,n)      mpn_sqr_basecase (r,a,n)
484
#define MPN_REDUCE(rp,tp,mp,n,mip)  mpn_redc_n (rp, tp, mp, n, mip)
485
        INNERLOOP2;
486
      }
487
  }
488
      else
489
  {
490
#undef MPN_SQR
491
#undef MPN_REDUCE
492
#define MPN_SQR(r,a,n)      mpn_sqr (r,a,n)
493
#define MPN_REDUCE(rp,tp,mp,n,mip)  mpn_redc_n (rp, tp, mp, n, mip)
494
    INNERLOOP2;
495
  }
496
    }
497
  else
498
    {
499
      if (BELOW_THRESHOLD (n, MUL_TOOM22_THRESHOLD))
500
  {
501
    if (MUL_TOOM22_THRESHOLD < SQR_BASECASE_THRESHOLD
502
        || BELOW_THRESHOLD (n, SQR_BASECASE_THRESHOLD))
503
      {
504
#undef MPN_SQR
505
#undef MPN_REDUCE
506
#define MPN_SQR(r,a,n)      mpn_mul_basecase (r,a,n,a,n)
507
#define MPN_REDUCE(rp,tp,mp,n,mip)  MPN_REDC_1 (rp, tp, mp, n, mip[0])
508
        INNERLOOP2;
509
      }
510
    else
511
      {
512
#undef MPN_SQR
513
#undef MPN_REDUCE
514
#define MPN_SQR(r,a,n)      mpn_sqr_basecase (r,a,n)
515
#define MPN_REDUCE(rp,tp,mp,n,mip)  MPN_REDC_1 (rp, tp, mp, n, mip[0])
516
        INNERLOOP2;
517
      }
518
  }
519
      else if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_N_THRESHOLD))
520
  {
521
#undef MPN_SQR
522
#undef MPN_REDUCE
523
#define MPN_SQR(r,a,n)      mpn_sqr (r,a,n)
524
#define MPN_REDUCE(rp,tp,mp,n,mip)  MPN_REDC_1 (rp, tp, mp, n, mip[0])
525
    INNERLOOP2;
526
  }
527
      else
528
  {
529
#undef MPN_SQR
530
#undef MPN_REDUCE
531
#define MPN_SQR(r,a,n)      mpn_sqr (r,a,n)
532
#define MPN_REDUCE(rp,tp,mp,n,mip)  mpn_redc_n (rp, tp, mp, n, mip)
533
    INNERLOOP2;
534
  }
535
    }
536
#endif  /* WANT_REDC_2 */
537
538
21.4k
  MPN_COPY (tp, rp, n);
539
21.4k
  MPN_FILL (tp + n, n, 0);
540
541
21.4k
#if WANT_REDC_2
542
21.4k
  if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD))
543
21.2k
    MPN_REDC_1 (rp, tp, mp, n, ip[0]);
544
262
  else if (BELOW_THRESHOLD (n, REDC_2_TO_REDC_N_THRESHOLD))
545
158
    MPN_REDC_2 (rp, tp, mp, n, mip);
546
#else
547
  if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_N_THRESHOLD))
548
    MPN_REDC_1 (rp, tp, mp, n, ip[0]);
549
#endif
550
104
  else
551
104
    mpn_redc_n (rp, tp, mp, n, mip);
552
553
21.4k
  if (mpn_cmp (rp, mp, n) >= 0)
554
0
    mpn_sub_n (rp, rp, mp, n);
555
556
21.4k
  TMP_FREE;
557
21.4k
}
558
559
/* rp[n-1..0] = bp[bn-1..0] ^ ep[en-1..0] mod mp[n-1..0]
560
   Requires that mp[n-1..0] is odd.
561
   Requires that ep[en-1..0] is > 1.
562
   Uses scratch space at tp of MAX(mpn_binvert_itch(n),2n) limbs.  */
563
void
564
mpn_powm (mp_ptr rp, mp_srcptr bp, mp_size_t bn,
565
    mp_srcptr ep, mp_size_t en,
566
    mp_srcptr mp, mp_size_t n, mp_ptr tp)
567
27.7k
{
568
27.7k
  mp_limb_t ip[2], *mip;
569
27.7k
  int cnt;
570
27.7k
  mp_bitcnt_t ebi;
571
27.7k
  int windowsize, this_windowsize;
572
27.7k
  mp_limb_t expbits;
573
27.7k
  mp_ptr pp, this_pp;
574
27.7k
  long i;
575
27.7k
  TMP_DECL;
576
577
27.7k
  ASSERT (en > 1 || (en == 1 && ep[0] > 1));
578
27.7k
  ASSERT (n >= 1 && ((mp[0] & 1) != 0));
579
580
27.7k
  if (bn == 1 && bp[0] == 2)
581
23.0k
    {
582
23.0k
      mpn_2powm (rp, ep, en, mp, n, tp);
583
23.0k
      return;
584
23.0k
    }
585
586
4.71k
  TMP_MARK;
587
588
4.71k
  MPN_SIZEINBASE_2EXP(ebi, ep, en, 1);
589
590
#if 0
591
  if (bn < n)
592
    {
593
      /* Do the first few exponent bits without mod reductions,
594
   until the result is greater than the mod argument.  */
595
      for (;;)
596
  {
597
    mpn_sqr (tp, this_pp, tn);
598
    tn = tn * 2 - 1,  tn += tp[tn] != 0;
599
    if (getbit (ep, ebi) != 0)
600
      mpn_mul (..., tp, tn, bp, bn);
601
    ebi--;
602
  }
603
    }
604
#endif
605
606
4.71k
  windowsize = win_size (ebi);
607
608
4.71k
#if WANT_REDC_2
609
4.71k
  if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD))
610
3.67k
    {
611
3.67k
      mip = ip;
612
3.67k
      binvert_limb (mip[0], mp[0]);
613
3.67k
      mip[0] = -mip[0];
614
3.67k
    }
615
1.04k
  else if (BELOW_THRESHOLD (n, REDC_2_TO_REDC_N_THRESHOLD))
616
409
    {
617
409
      mip = ip;
618
409
      mpn_binvert (mip, mp, 2, tp);
619
409
      mip[0] = -mip[0]; mip[1] = ~mip[1];
620
409
    }
621
#else
622
  if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_N_THRESHOLD))
623
    {
624
      mip = ip;
625
      binvert_limb (mip[0], mp[0]);
626
      mip[0] = -mip[0];
627
    }
628
#endif
629
631
  else
630
631
    {
631
631
      mip = TMP_ALLOC_LIMBS (n);
632
631
      mpn_binvert (mip, mp, n, tp);
633
631
    }
634
635
4.71k
  pp = TMP_ALLOC_LIMBS (n << (windowsize - 1));
636
637
4.71k
  this_pp = pp;
638
4.71k
  redcify (this_pp, bp, bn, mp, n);
639
640
  /* Store b^2 at rp.  */
641
4.71k
  mpn_sqr (tp, this_pp, n);
642
#if 0
643
  if (n == 1) {
644
    MPN_REDC_0 (rp[0], tp[1], tp[0], mp[0], -mip[0]);
645
  } else
646
#endif
647
4.71k
#if WANT_REDC_2
648
4.71k
  if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD))
649
3.67k
    MPN_REDC_1 (rp, tp, mp, n, mip[0]);
650
1.04k
  else if (BELOW_THRESHOLD (n, REDC_2_TO_REDC_N_THRESHOLD))
651
409
    MPN_REDC_2 (rp, tp, mp, n, mip);
652
#else
653
  if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_N_THRESHOLD))
654
    MPN_REDC_1 (rp, tp, mp, n, mip[0]);
655
#endif
656
631
  else
657
631
    mpn_redc_n (rp, tp, mp, n, mip);
658
659
  /* Precompute odd powers of b and put them in the temporary area at pp.  */
660
82.0k
  for (i = (1 << (windowsize - 1)) - 1; i > 0; i--)
661
77.3k
#if 1
662
77.3k
    if (n == 1) {
663
5.50k
      umul_ppmm((tp)[1], *(tp), *(this_pp), *(rp));
664
5.50k
      ++this_pp ;
665
5.50k
      MPN_REDC_0 (*this_pp, tp[1], tp[0], *mp, -mip[0]);
666
5.50k
    } else
667
71.8k
#endif
668
71.8k
    {
669
71.8k
      mpn_mul_n (tp, this_pp, rp, n);
670
71.8k
      this_pp += n;
671
71.8k
#if WANT_REDC_2
672
71.8k
      if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD))
673
35.1k
  MPN_REDC_1 (this_pp, tp, mp, n, mip[0]);
674
36.6k
      else if (BELOW_THRESHOLD (n, REDC_2_TO_REDC_N_THRESHOLD))
675
12.3k
  MPN_REDC_2 (this_pp, tp, mp, n, mip);
676
#else
677
      if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_N_THRESHOLD))
678
  MPN_REDC_1 (this_pp, tp, mp, n, mip[0]);
679
#endif
680
24.3k
      else
681
24.3k
  mpn_redc_n (this_pp, tp, mp, n, mip);
682
71.8k
    }
683
684
4.71k
  expbits = getbits (ep, ebi, windowsize);
685
4.71k
  ebi -= windowsize;
686
687
  /* THINK: Should we initialise the case expbits % 4 == 0 with a mul? */
688
4.71k
  count_trailing_zeros (cnt, expbits);
689
4.71k
  ebi += cnt;
690
4.71k
  expbits >>= cnt;
691
692
4.71k
  MPN_COPY (rp, pp + n * (expbits >> 1), n);
693
694
4.71k
#define INNERLOOP             \
695
420k
  while (ebi != 0)             \
696
417k
    {                 \
697
1.25M
      while (getbit (ep, ebi) == 0)         \
698
839k
  {               \
699
839k
    MPN_SQR (tp, rp, n);           \
700
839k
    MPN_REDUCE (rp, tp, mp, n, mip);        \
701
839k
    if (--ebi == 0)           \
702
62.2k
      goto done;             \
703
62.2k
  }                \
704
417k
                  \
705
      /* The next bit of the exponent is 1.  Now extract the largest  \
706
   block of bits <= windowsize, and such that the least   \
707
   significant bit is 1.  */          \
708
417k
                  \
709
417k
      expbits = getbits (ep, ebi, windowsize);        \
710
415k
      this_windowsize = MIN (ebi, windowsize);       \
711
415k
                  \
712
415k
      count_trailing_zeros (cnt, expbits);        \
713
415k
      this_windowsize -= cnt;           \
714
415k
      ebi -= this_windowsize;           \
715
415k
      expbits >>= cnt;              \
716
415k
                  \
717
415k
      do                \
718
2.21M
  {               \
719
2.21M
    MPN_SQR (tp, rp, n);           \
720
2.21M
    MPN_REDUCE (rp, tp, mp, n, mip);        \
721
2.21M
  }                \
722
2.21M
      while (--this_windowsize != 0);          \
723
415k
                  \
724
415k
      MPN_MUL_N (tp, rp, pp + n * (expbits >> 1), n);      \
725
415k
      MPN_REDUCE (rp, tp, mp, n, mip);          \
726
31.1k
    }
727
728
729
4.71k
  if (n == 1)
730
429
    {
731
429
#undef MPN_MUL_N
732
429
#undef MPN_SQR
733
429
#undef MPN_REDUCE
734
31.1k
#define MPN_MUL_N(r,a,b,n)    umul_ppmm((r)[1], *(r), *(a), *(b))
735
221k
#define MPN_SQR(r,a,n)      umul_ppmm((r)[1], *(r), *(a), *(a))
736
253k
#define MPN_REDUCE(rp,tp,mp,n,mip)  MPN_REDC_0(*(rp), (tp)[1], (tp)[0], *(mp), - *(mip))
737
429
      INNERLOOP;
738
245
    }
739
4.28k
  else
740
4.28k
#if WANT_REDC_2
741
4.28k
  if (REDC_1_TO_REDC_2_THRESHOLD < MUL_TOOM22_THRESHOLD)
742
0
    {
743
0
      if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD))
744
0
  {
745
0
    if (REDC_1_TO_REDC_2_THRESHOLD < SQR_BASECASE_THRESHOLD
746
0
        || BELOW_THRESHOLD (n, SQR_BASECASE_THRESHOLD))
747
0
      {
748
0
#undef MPN_MUL_N
749
0
#undef MPN_SQR
750
0
#undef MPN_REDUCE
751
0
#define MPN_MUL_N(r,a,b,n)    mpn_mul_basecase (r,a,n,b,n)
752
0
#define MPN_SQR(r,a,n)      mpn_mul_basecase (r,a,n,a,n)
753
0
#define MPN_REDUCE(rp,tp,mp,n,mip)  MPN_REDC_1 (rp, tp, mp, n, mip[0])
754
0
        INNERLOOP;
755
0
      }
756
0
    else
757
0
      {
758
0
#undef MPN_MUL_N
759
0
#undef MPN_SQR
760
0
#undef MPN_REDUCE
761
0
#define MPN_MUL_N(r,a,b,n)    mpn_mul_basecase (r,a,n,b,n)
762
0
#define MPN_SQR(r,a,n)      mpn_sqr_basecase (r,a,n)
763
0
#define MPN_REDUCE(rp,tp,mp,n,mip)  MPN_REDC_1 (rp, tp, mp, n, mip[0])
764
0
        INNERLOOP;
765
0
      }
766
0
  }
767
0
      else if (BELOW_THRESHOLD (n, MUL_TOOM22_THRESHOLD))
768
0
  {
769
0
    if (MUL_TOOM22_THRESHOLD < SQR_BASECASE_THRESHOLD
770
0
        || BELOW_THRESHOLD (n, SQR_BASECASE_THRESHOLD))
771
0
      {
772
0
#undef MPN_MUL_N
773
0
#undef MPN_SQR
774
0
#undef MPN_REDUCE
775
0
#define MPN_MUL_N(r,a,b,n)    mpn_mul_basecase (r,a,n,b,n)
776
0
#define MPN_SQR(r,a,n)      mpn_mul_basecase (r,a,n,a,n)
777
0
#define MPN_REDUCE(rp,tp,mp,n,mip)  MPN_REDC_2 (rp, tp, mp, n, mip)
778
0
        INNERLOOP;
779
0
      }
780
0
    else
781
0
      {
782
0
#undef MPN_MUL_N
783
0
#undef MPN_SQR
784
0
#undef MPN_REDUCE
785
0
#define MPN_MUL_N(r,a,b,n)    mpn_mul_basecase (r,a,n,b,n)
786
0
#define MPN_SQR(r,a,n)      mpn_sqr_basecase (r,a,n)
787
0
#define MPN_REDUCE(rp,tp,mp,n,mip)  MPN_REDC_2 (rp, tp, mp, n, mip)
788
0
        INNERLOOP;
789
0
      }
790
0
  }
791
0
      else if (BELOW_THRESHOLD (n, REDC_2_TO_REDC_N_THRESHOLD))
792
0
  {
793
0
#undef MPN_MUL_N
794
0
#undef MPN_SQR
795
0
#undef MPN_REDUCE
796
0
#define MPN_MUL_N(r,a,b,n)    mpn_mul_n (r,a,b,n)
797
0
#define MPN_SQR(r,a,n)      mpn_sqr (r,a,n)
798
0
#define MPN_REDUCE(rp,tp,mp,n,mip)  MPN_REDC_2 (rp, tp, mp, n, mip)
799
0
    INNERLOOP;
800
0
  }
801
0
      else
802
0
  {
803
0
#undef MPN_MUL_N
804
0
#undef MPN_SQR
805
0
#undef MPN_REDUCE
806
0
#define MPN_MUL_N(r,a,b,n)    mpn_mul_n (r,a,b,n)
807
0
#define MPN_SQR(r,a,n)      mpn_sqr (r,a,n)
808
0
#define MPN_REDUCE(rp,tp,mp,n,mip)  mpn_redc_n (rp, tp, mp, n, mip)
809
0
    INNERLOOP;
810
0
  }
811
0
    }
812
4.28k
  else
813
4.28k
    {
814
4.28k
      if (BELOW_THRESHOLD (n, MUL_TOOM22_THRESHOLD))
815
2.83k
  {
816
2.83k
    if (MUL_TOOM22_THRESHOLD < SQR_BASECASE_THRESHOLD
817
2.83k
        || BELOW_THRESHOLD (n, SQR_BASECASE_THRESHOLD))
818
0
      {
819
0
#undef MPN_MUL_N
820
0
#undef MPN_SQR
821
0
#undef MPN_REDUCE
822
0
#define MPN_MUL_N(r,a,b,n)    mpn_mul_basecase (r,a,n,b,n)
823
0
#define MPN_SQR(r,a,n)      mpn_mul_basecase (r,a,n,a,n)
824
0
#define MPN_REDUCE(rp,tp,mp,n,mip)  MPN_REDC_1 (rp, tp, mp, n, mip[0])
825
0
        INNERLOOP;
826
0
      }
827
2.83k
    else
828
2.83k
      {
829
2.83k
#undef MPN_MUL_N
830
2.83k
#undef MPN_SQR
831
2.83k
#undef MPN_REDUCE
832
136k
#define MPN_MUL_N(r,a,b,n)    mpn_mul_basecase (r,a,n,b,n)
833
828k
#define MPN_SQR(r,a,n)      mpn_sqr_basecase (r,a,n)
834
964k
#define MPN_REDUCE(rp,tp,mp,n,mip)  MPN_REDC_1 (rp, tp, mp, n, mip[0])
835
2.83k
        INNERLOOP;
836
1.94k
      }
837
2.83k
  }
838
1.44k
      else if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD))
839
408
  {
840
408
#undef MPN_MUL_N
841
408
#undef MPN_SQR
842
408
#undef MPN_REDUCE
843
15.8k
#define MPN_MUL_N(r,a,b,n)    mpn_mul_n (r,a,b,n)
844
94.5k
#define MPN_SQR(r,a,n)      mpn_sqr (r,a,n)
845
110k
#define MPN_REDUCE(rp,tp,mp,n,mip)  MPN_REDC_1 (rp, tp, mp, n, mip[0])
846
408
    INNERLOOP;
847
194
  }
848
1.04k
      else if (BELOW_THRESHOLD (n, REDC_2_TO_REDC_N_THRESHOLD))
849
409
  {
850
409
#undef MPN_MUL_N
851
409
#undef MPN_SQR
852
409
#undef MPN_REDUCE
853
79.2k
#define MPN_MUL_N(r,a,b,n)    mpn_mul_n (r,a,b,n)
854
606k
#define MPN_SQR(r,a,n)      mpn_sqr (r,a,n)
855
685k
#define MPN_REDUCE(rp,tp,mp,n,mip)  MPN_REDC_2 (rp, tp, mp, n, mip)
856
409
    INNERLOOP;
857
228
  }
858
631
      else
859
631
  {
860
631
#undef MPN_MUL_N
861
631
#undef MPN_SQR
862
631
#undef MPN_REDUCE
863
153k
#define MPN_MUL_N(r,a,b,n)    mpn_mul_n (r,a,b,n)
864
1.30M
#define MPN_SQR(r,a,n)      mpn_sqr (r,a,n)
865
1.45M
#define MPN_REDUCE(rp,tp,mp,n,mip)  mpn_redc_n (rp, tp, mp, n, mip)
866
631
    INNERLOOP;
867
222
  }
868
4.28k
    }
869
870
#else  /* WANT_REDC_2 */
871
872
  if (REDC_1_TO_REDC_N_THRESHOLD < MUL_TOOM22_THRESHOLD)
873
    {
874
      if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_N_THRESHOLD))
875
  {
876
    if (REDC_1_TO_REDC_N_THRESHOLD < SQR_BASECASE_THRESHOLD
877
        || BELOW_THRESHOLD (n, SQR_BASECASE_THRESHOLD))
878
      {
879
#undef MPN_MUL_N
880
#undef MPN_SQR
881
#undef MPN_REDUCE
882
#define MPN_MUL_N(r,a,b,n)    mpn_mul_basecase (r,a,n,b,n)
883
#define MPN_SQR(r,a,n)      mpn_mul_basecase (r,a,n,a,n)
884
#define MPN_REDUCE(rp,tp,mp,n,mip)  MPN_REDC_1 (rp, tp, mp, n, mip[0])
885
        INNERLOOP;
886
      }
887
    else
888
      {
889
#undef MPN_MUL_N
890
#undef MPN_SQR
891
#undef MPN_REDUCE
892
#define MPN_MUL_N(r,a,b,n)    mpn_mul_basecase (r,a,n,b,n)
893
#define MPN_SQR(r,a,n)      mpn_sqr_basecase (r,a,n)
894
#define MPN_REDUCE(rp,tp,mp,n,mip)  MPN_REDC_1 (rp, tp, mp, n, mip[0])
895
        INNERLOOP;
896
      }
897
  }
898
      else if (BELOW_THRESHOLD (n, MUL_TOOM22_THRESHOLD))
899
  {
900
    if (MUL_TOOM22_THRESHOLD < SQR_BASECASE_THRESHOLD
901
        || BELOW_THRESHOLD (n, SQR_BASECASE_THRESHOLD))
902
      {
903
#undef MPN_MUL_N
904
#undef MPN_SQR
905
#undef MPN_REDUCE
906
#define MPN_MUL_N(r,a,b,n)    mpn_mul_basecase (r,a,n,b,n)
907
#define MPN_SQR(r,a,n)      mpn_mul_basecase (r,a,n,a,n)
908
#define MPN_REDUCE(rp,tp,mp,n,mip)  mpn_redc_n (rp, tp, mp, n, mip)
909
        INNERLOOP;
910
      }
911
    else
912
      {
913
#undef MPN_MUL_N
914
#undef MPN_SQR
915
#undef MPN_REDUCE
916
#define MPN_MUL_N(r,a,b,n)    mpn_mul_basecase (r,a,n,b,n)
917
#define MPN_SQR(r,a,n)      mpn_sqr_basecase (r,a,n)
918
#define MPN_REDUCE(rp,tp,mp,n,mip)  mpn_redc_n (rp, tp, mp, n, mip)
919
        INNERLOOP;
920
      }
921
  }
922
      else
923
  {
924
#undef MPN_MUL_N
925
#undef MPN_SQR
926
#undef MPN_REDUCE
927
#define MPN_MUL_N(r,a,b,n)    mpn_mul_n (r,a,b,n)
928
#define MPN_SQR(r,a,n)      mpn_sqr (r,a,n)
929
#define MPN_REDUCE(rp,tp,mp,n,mip)  mpn_redc_n (rp, tp, mp, n, mip)
930
    INNERLOOP;
931
  }
932
    }
933
  else
934
    {
935
      if (BELOW_THRESHOLD (n, MUL_TOOM22_THRESHOLD))
936
  {
937
    if (MUL_TOOM22_THRESHOLD < SQR_BASECASE_THRESHOLD
938
        || BELOW_THRESHOLD (n, SQR_BASECASE_THRESHOLD))
939
      {
940
#undef MPN_MUL_N
941
#undef MPN_SQR
942
#undef MPN_REDUCE
943
#define MPN_MUL_N(r,a,b,n)    mpn_mul_basecase (r,a,n,b,n)
944
#define MPN_SQR(r,a,n)      mpn_mul_basecase (r,a,n,a,n)
945
#define MPN_REDUCE(rp,tp,mp,n,mip)  MPN_REDC_1 (rp, tp, mp, n, mip[0])
946
        INNERLOOP;
947
      }
948
    else
949
      {
950
#undef MPN_MUL_N
951
#undef MPN_SQR
952
#undef MPN_REDUCE
953
#define MPN_MUL_N(r,a,b,n)    mpn_mul_basecase (r,a,n,b,n)
954
#define MPN_SQR(r,a,n)      mpn_sqr_basecase (r,a,n)
955
#define MPN_REDUCE(rp,tp,mp,n,mip)  MPN_REDC_1 (rp, tp, mp, n, mip[0])
956
        INNERLOOP;
957
      }
958
  }
959
      else if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_N_THRESHOLD))
960
  {
961
#undef MPN_MUL_N
962
#undef MPN_SQR
963
#undef MPN_REDUCE
964
#define MPN_MUL_N(r,a,b,n)    mpn_mul_n (r,a,b,n)
965
#define MPN_SQR(r,a,n)      mpn_sqr (r,a,n)
966
#define MPN_REDUCE(rp,tp,mp,n,mip)  MPN_REDC_1 (rp, tp, mp, n, mip[0])
967
    INNERLOOP;
968
  }
969
      else
970
  {
971
#undef MPN_MUL_N
972
#undef MPN_SQR
973
#undef MPN_REDUCE
974
#define MPN_MUL_N(r,a,b,n)    mpn_mul_n (r,a,b,n)
975
#define MPN_SQR(r,a,n)      mpn_sqr (r,a,n)
976
#define MPN_REDUCE(rp,tp,mp,n,mip)  mpn_redc_n (rp, tp, mp, n, mip)
977
    INNERLOOP;
978
  }
979
    }
980
#endif  /* WANT_REDC_2 */
981
982
4.71k
 done:
983
984
4.71k
  MPN_COPY (tp, rp, n);
985
4.71k
  MPN_ZERO (tp + n, n);
986
987
4.71k
#if WANT_REDC_2
988
4.71k
  if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD))
989
3.67k
    MPN_REDC_1 (rp, tp, mp, n, mip[0]);
990
1.04k
  else if (BELOW_THRESHOLD (n, REDC_2_TO_REDC_N_THRESHOLD))
991
409
    MPN_REDC_2 (rp, tp, mp, n, mip);
992
#else
993
  if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_N_THRESHOLD))
994
    MPN_REDC_1 (rp, tp, mp, n, mip[0]);
995
#endif
996
631
  else
997
631
    mpn_redc_n (rp, tp, mp, n, mip);
998
999
4.71k
  if (mpn_cmp (rp, mp, n) >= 0)
1000
4
    mpn_sub_n (rp, rp, mp, n);
1001
1002
4.71k
  TMP_FREE;
1003
4.71k
}