Coverage Report

Created: 2025-03-09 06:52

/src/gmp-6.2.1/mpn/powm.c
Line
Count
Source (jump to first uncovered line)
1
/* mpn_powm -- Compute R = U^E mod M.
2
3
   Contributed to the GNU project by Torbjorn Granlund.
4
5
   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
6
   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
7
   GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
8
9
Copyright 2007-2012, 2019 Free Software Foundation, Inc.
10
11
This file is part of the GNU MP Library.
12
13
The GNU MP Library is free software; you can redistribute it and/or modify
14
it under the terms of either:
15
16
  * the GNU Lesser General Public License as published by the Free
17
    Software Foundation; either version 3 of the License, or (at your
18
    option) any later version.
19
20
or
21
22
  * the GNU General Public License as published by the Free Software
23
    Foundation; either version 2 of the License, or (at your option) any
24
    later version.
25
26
or both in parallel, as here.
27
28
The GNU MP Library is distributed in the hope that it will be useful, but
29
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
30
or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
31
for more details.
32
33
You should have received copies of the GNU General Public License and the
34
GNU Lesser General Public License along with the GNU MP Library.  If not,
35
see https://www.gnu.org/licenses/.  */
36
37
38
/*
39
  BASIC ALGORITHM, Compute U^E mod M, where M < B^n is odd.
40
41
  1. W <- U
42
43
  2. T <- (B^n * U) mod M                Convert to REDC form
44
45
  3. Compute table U^1, U^3, U^5... of E-dependent size
46
47
  4. While there are more bits in E
48
       W <- power left-to-right base-k
49
50
51
  TODO:
52
53
   * Make getbits a macro, thereby allowing it to update the index operand.
54
     That will simplify the code using getbits.  (Perhaps make getbits' sibling
55
     getbit then have similar form, for symmetry.)
56
57
   * Write an itch function.  Or perhaps get rid of tp parameter since the huge
58
     pp area is allocated locally anyway?
59
60
   * Choose window size without looping.  (Superoptimize or think(tm).)
61
62
   * Handle small bases with initial, reduction-free exponentiation.
63
64
   * Call new division functions, not mpn_tdiv_qr.
65
66
   * Consider special code for one-limb M.
67
68
   * How should we handle the redc1/redc2/redc_n choice?
69
     - redc1:  T(binvert_1limb)  + e * (n)   * (T(mullo-1x1) + n*T(addmul_1))
70
     - redc2:  T(binvert_2limbs) + e * (n/2) * (T(mullo-2x2) + n*T(addmul_2))
71
     - redc_n: T(binvert_nlimbs) + e * (T(mullo-nxn) + T(M(n)))
72
     This disregards the addmul_N constant term, but we could think of
73
     that as part of the respective mullo.
74
75
   * When U (the base) is small, we should start the exponentiation with plain
76
     operations, then convert that partial result to REDC form.
77
78
   * When U is just one limb, should it be handled without the k-ary tricks?
79
     We could keep a factor of B^n in W, but use U' = BU as base.  After
80
     multiplying by this (pseudo two-limb) number, we need to multiply by 1/B
81
     mod M.
82
*/
83
84
#include "gmp-impl.h"
85
#include "longlong.h"
86
87
#undef MPN_REDC_0
88
#define MPN_REDC_0(rp, up, mp, invm)          \
89
175k
  do {                 \
90
175k
    mp_limb_t p1, r0, u0, _dummy;         \
91
175k
    u0 = *(up);               \
92
175k
    umul_ppmm (p1, _dummy, *(mp), (u0 * (invm)) & GMP_NUMB_MASK);  \
93
175k
    ASSERT (((u0 + _dummy) & GMP_NUMB_MASK) == 0);      \
94
175k
    p1 += (u0 != 0);              \
95
175k
    r0 = (up)[1] + p1;              \
96
175k
    if (p1 > r0)             \
97
175k
      r0 -= *(mp);             \
98
175k
    *(rp) = r0;               \
99
175k
  } while (0)
100
101
#undef MPN_REDC_1
102
#if HAVE_NATIVE_mpn_sbpi1_bdiv_r
103
#define MPN_REDC_1(rp, up, mp, n, invm)         \
104
  do {                  \
105
    mp_limb_t cy;             \
106
    cy = mpn_sbpi1_bdiv_r (up, 2 * n, mp, n, invm);     \
107
    if (cy != 0)              \
108
      mpn_sub_n (rp, up + n, mp, n);          \
109
    else                \
110
      MPN_COPY (rp, up + n, n);           \
111
  } while (0)
112
#else
113
#define MPN_REDC_1(rp, up, mp, n, invm)         \
114
7.70M
  do {                 \
115
7.70M
    mp_limb_t cy;             \
116
7.70M
    cy = mpn_redc_1 (rp, up, mp, n, invm);       \
117
7.70M
    if (cy != 0)             \
118
7.70M
      mpn_sub_n (rp, rp, mp, n);         \
119
7.70M
  } while (0)
120
#endif
121
122
#undef MPN_REDC_2
123
#define MPN_REDC_2(rp, up, mp, n, mip)          \
124
317k
  do {                 \
125
317k
    mp_limb_t cy;             \
126
317k
    cy = mpn_redc_2 (rp, up, mp, n, mip);        \
127
317k
    if (cy != 0)             \
128
317k
      mpn_sub_n (rp, rp, mp, n);         \
129
317k
  } while (0)
130
131
#if HAVE_NATIVE_mpn_addmul_2 || HAVE_NATIVE_mpn_redc_2
132
#define WANT_REDC_2 1
133
#endif
134
135
#define getbit(p,bi) \
136
4.27M
  ((p[(bi - 1) / GMP_LIMB_BITS] >> (bi - 1) % GMP_LIMB_BITS) & 1)
137
138
static inline mp_limb_t
139
getbits (const mp_limb_t *p, mp_bitcnt_t bi, int nbits)
140
1.46M
{
141
1.46M
  int nbits_in_r;
142
1.46M
  mp_limb_t r;
143
1.46M
  mp_size_t i;
144
145
1.46M
  if (bi < nbits)
146
16.1k
    {
147
16.1k
      return p[0] & (((mp_limb_t) 1 << bi) - 1);
148
16.1k
    }
149
1.44M
  else
150
1.44M
    {
151
1.44M
      bi -= nbits;      /* bit index of low bit to extract */
152
1.44M
      i = bi / GMP_NUMB_BITS;   /* word index of low bit to extract */
153
1.44M
      bi %= GMP_NUMB_BITS;   /* bit index in low word */
154
1.44M
      r = p[i] >> bi;     /* extract (low) bits */
155
1.44M
      nbits_in_r = GMP_NUMB_BITS - bi;  /* number of bits now in r */
156
1.44M
      if (nbits_in_r < nbits)    /* did we get enough bits? */
157
98.3k
  r += p[i + 1] << nbits_in_r; /* prepend bits from higher word */
158
1.44M
      return r & (((mp_limb_t) 1 << nbits) - 1);
159
1.44M
    }
160
1.46M
}
161
162
static inline int
163
win_size (mp_bitcnt_t eb)
164
19.8k
{
165
19.8k
  int k;
166
19.8k
  static mp_bitcnt_t x[] = {0,7,25,81,241,673,1793,4609,11521,28161,~(mp_bitcnt_t)0};
167
94.4k
  for (k = 1; eb > x[k]; k++)
168
74.5k
    ;
169
19.8k
  return k;
170
19.8k
}
171
172
/* Convert U to REDC form, U_r = B^n * U mod M */
173
static void
174
redcify (mp_ptr rp, mp_srcptr up, mp_size_t un, mp_srcptr mp, mp_size_t n)
175
19.8k
{
176
19.8k
  mp_ptr tp, qp;
177
19.8k
  TMP_DECL;
178
19.8k
  TMP_MARK;
179
180
19.8k
  TMP_ALLOC_LIMBS_2 (tp, un + n, qp, un + 1);
181
182
19.8k
  MPN_ZERO (tp, n);
183
19.8k
  MPN_COPY (tp + n, up, un);
184
19.8k
  mpn_tdiv_qr (qp, rp, 0L, tp, un + n, mp, n);
185
19.8k
  TMP_FREE;
186
19.8k
}
187
188
/* rp[n-1..0] = bp[bn-1..0] ^ ep[en-1..0] mod mp[n-1..0]
189
   Requires that mp[n-1..0] is odd.
190
   Requires that ep[en-1..0] is > 1.
191
   Uses scratch space at tp of MAX(mpn_binvert_itch(n),2n) limbs.  */
192
void
193
mpn_powm (mp_ptr rp, mp_srcptr bp, mp_size_t bn,
194
    mp_srcptr ep, mp_size_t en,
195
    mp_srcptr mp, mp_size_t n, mp_ptr tp)
196
19.8k
{
197
19.8k
  mp_limb_t ip[2], *mip;
198
19.8k
  int cnt;
199
19.8k
  mp_bitcnt_t ebi;
200
19.8k
  int windowsize, this_windowsize;
201
19.8k
  mp_limb_t expbits;
202
19.8k
  mp_ptr pp, this_pp;
203
19.8k
  long i;
204
19.8k
  TMP_DECL;
205
206
19.8k
  ASSERT (en > 1 || (en == 1 && ep[0] > 1));
207
19.8k
  ASSERT (n >= 1 && ((mp[0] & 1) != 0));
208
209
19.8k
  TMP_MARK;
210
211
19.8k
  MPN_SIZEINBASE_2EXP(ebi, ep, en, 1);
212
213
#if 0
214
  if (bn < n)
215
    {
216
      /* Do the first few exponent bits without mod reductions,
217
   until the result is greater than the mod argument.  */
218
      for (;;)
219
  {
220
    mpn_sqr (tp, this_pp, tn);
221
    tn = tn * 2 - 1,  tn += tp[tn] != 0;
222
    if (getbit (ep, ebi) != 0)
223
      mpn_mul (..., tp, tn, bp, bn);
224
    ebi--;
225
  }
226
    }
227
#endif
228
229
19.8k
  windowsize = win_size (ebi);
230
231
19.8k
#if WANT_REDC_2
232
19.8k
  if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD))
233
18.9k
    {
234
18.9k
      mip = ip;
235
18.9k
      binvert_limb (mip[0], mp[0]);
236
18.9k
      mip[0] = -mip[0];
237
18.9k
    }
238
889
  else if (BELOW_THRESHOLD (n, REDC_2_TO_REDC_N_THRESHOLD))
239
106
    {
240
106
      mip = ip;
241
106
      mpn_binvert (mip, mp, 2, tp);
242
106
      mip[0] = -mip[0]; mip[1] = ~mip[1];
243
106
    }
244
#else
245
  if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_N_THRESHOLD))
246
    {
247
      mip = ip;
248
      binvert_limb (mip[0], mp[0]);
249
      mip[0] = -mip[0];
250
    }
251
#endif
252
783
  else
253
783
    {
254
783
      mip = TMP_ALLOC_LIMBS (n);
255
783
      mpn_binvert (mip, mp, n, tp);
256
783
    }
257
258
19.8k
  pp = TMP_ALLOC_LIMBS (n << (windowsize - 1));
259
260
19.8k
  this_pp = pp;
261
19.8k
  redcify (this_pp, bp, bn, mp, n);
262
263
  /* Store b^2 at rp.  */
264
19.8k
  mpn_sqr (tp, this_pp, n);
265
#if 0
266
  if (n == 1) {
267
    MPN_REDC_0 (rp, tp, mp, mip[0]);
268
  } else
269
#endif
270
19.8k
#if WANT_REDC_2
271
19.8k
  if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD))
272
18.9k
    MPN_REDC_1 (rp, tp, mp, n, mip[0]);
273
889
  else if (BELOW_THRESHOLD (n, REDC_2_TO_REDC_N_THRESHOLD))
274
106
    MPN_REDC_2 (rp, tp, mp, n, mip);
275
#else
276
  if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_N_THRESHOLD))
277
    MPN_REDC_1 (rp, tp, mp, n, mip[0]);
278
#endif
279
783
  else
280
783
    mpn_redc_n (rp, tp, mp, n, mip);
281
282
  /* Precompute odd powers of b and put them in the temporary area at pp.  */
283
326k
  for (i = (1 << (windowsize - 1)) - 1; i > 0; i--)
284
306k
#if 1
285
306k
    if (n == 1) {
286
5.23k
      umul_ppmm((tp)[1], *(tp), *(this_pp), *(rp));
287
5.23k
      ++this_pp ;
288
5.23k
      MPN_REDC_0 (this_pp, tp, mp, mip[0]);
289
5.23k
    } else
290
301k
#endif
291
301k
    {
292
301k
      mpn_mul_n (tp, this_pp, rp, n);
293
301k
      this_pp += n;
294
301k
#if WANT_REDC_2
295
301k
      if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD))
296
247k
  MPN_REDC_1 (this_pp, tp, mp, n, mip[0]);
297
54.0k
      else if (BELOW_THRESHOLD (n, REDC_2_TO_REDC_N_THRESHOLD))
298
6.71k
  MPN_REDC_2 (this_pp, tp, mp, n, mip);
299
#else
300
      if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_N_THRESHOLD))
301
  MPN_REDC_1 (this_pp, tp, mp, n, mip[0]);
302
#endif
303
47.2k
      else
304
47.2k
  mpn_redc_n (this_pp, tp, mp, n, mip);
305
301k
    }
306
307
19.8k
  expbits = getbits (ep, ebi, windowsize);
308
19.8k
  if (ebi < windowsize)
309
0
    ebi = 0;
310
19.8k
  else
311
19.8k
    ebi -= windowsize;
312
313
19.8k
  count_trailing_zeros (cnt, expbits);
314
19.8k
  ebi += cnt;
315
19.8k
  expbits >>= cnt;
316
317
19.8k
  MPN_COPY (rp, pp + n * (expbits >> 1), n);
318
319
19.8k
#define INNERLOOP             \
320
1.46M
  while (ebi != 0)             \
321
1.44M
    {                 \
322
4.27M
      while (getbit (ep, ebi) == 0)         \
323
2.83M
  {               \
324
2.83M
    MPN_SQR (tp, rp, n);           \
325
2.83M
    MPN_REDUCE (rp, tp, mp, n, mip);        \
326
2.83M
    if (--ebi == 0)           \
327
49.4k
      goto done;             \
328
49.4k
  }                \
329
1.44M
                  \
330
      /* The next bit of the exponent is 1.  Now extract the largest  \
331
   block of bits <= windowsize, and such that the least   \
332
   significant bit is 1.  */          \
333
1.44M
                  \
334
1.44M
      expbits = getbits (ep, ebi, windowsize);        \
335
1.44M
      this_windowsize = windowsize;         \
336
1.44M
      if (ebi < windowsize)           \
337
1.44M
  {               \
338
16.1k
    this_windowsize -= windowsize - ebi;        \
339
16.1k
    ebi = 0;              \
340
16.1k
  }                \
341
1.44M
      else                \
342
1.44M
        ebi -= windowsize;           \
343
1.44M
                  \
344
1.44M
      count_trailing_zeros (cnt, expbits);        \
345
1.44M
      this_windowsize -= cnt;           \
346
1.44M
      ebi += cnt;             \
347
1.44M
      expbits >>= cnt;              \
348
1.44M
                  \
349
1.44M
      do                \
350
6.76M
  {               \
351
6.76M
    MPN_SQR (tp, rp, n);           \
352
6.76M
    MPN_REDUCE (rp, tp, mp, n, mip);        \
353
6.76M
  }                \
354
6.76M
      while (--this_windowsize != 0);          \
355
1.44M
                  \
356
1.44M
      MPN_MUL_N (tp, rp, pp + n * (expbits >> 1), n);      \
357
1.44M
      MPN_REDUCE (rp, tp, mp, n, mip);          \
358
25.6k
    }
359
360
361
19.8k
  if (n == 1)
362
1.46k
    {
363
1.46k
#undef MPN_MUL_N
364
1.46k
#undef MPN_SQR
365
1.46k
#undef MPN_REDUCE
366
25.6k
#define MPN_MUL_N(r,a,b,n)    umul_ppmm((r)[1], *(r), *(a), *(b))
367
144k
#define MPN_SQR(r,a,n)      umul_ppmm((r)[1], *(r), *(a), *(a))
368
170k
#define MPN_REDUCE(rp,tp,mp,n,mip)  MPN_REDC_0(rp, tp, mp, mip[0])
369
1.46k
      INNERLOOP;
370
1.34k
    }
371
18.4k
  else
372
18.4k
#if WANT_REDC_2
373
18.4k
  if (REDC_1_TO_REDC_2_THRESHOLD < MUL_TOOM22_THRESHOLD)
374
0
    {
375
0
      if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD))
376
0
  {
377
0
    if (REDC_1_TO_REDC_2_THRESHOLD < SQR_BASECASE_THRESHOLD
378
0
        || BELOW_THRESHOLD (n, SQR_BASECASE_THRESHOLD))
379
0
      {
380
0
#undef MPN_MUL_N
381
0
#undef MPN_SQR
382
0
#undef MPN_REDUCE
383
0
#define MPN_MUL_N(r,a,b,n)    mpn_mul_basecase (r,a,n,b,n)
384
0
#define MPN_SQR(r,a,n)      mpn_mul_basecase (r,a,n,a,n)
385
0
#define MPN_REDUCE(rp,tp,mp,n,mip)  MPN_REDC_1 (rp, tp, mp, n, mip[0])
386
0
        INNERLOOP;
387
0
      }
388
0
    else
389
0
      {
390
0
#undef MPN_MUL_N
391
0
#undef MPN_SQR
392
0
#undef MPN_REDUCE
393
0
#define MPN_MUL_N(r,a,b,n)    mpn_mul_basecase (r,a,n,b,n)
394
0
#define MPN_SQR(r,a,n)      mpn_sqr_basecase (r,a,n)
395
0
#define MPN_REDUCE(rp,tp,mp,n,mip)  MPN_REDC_1 (rp, tp, mp, n, mip[0])
396
0
        INNERLOOP;
397
0
      }
398
0
  }
399
0
      else if (BELOW_THRESHOLD (n, MUL_TOOM22_THRESHOLD))
400
0
  {
401
0
    if (MUL_TOOM22_THRESHOLD < SQR_BASECASE_THRESHOLD
402
0
        || BELOW_THRESHOLD (n, SQR_BASECASE_THRESHOLD))
403
0
      {
404
0
#undef MPN_MUL_N
405
0
#undef MPN_SQR
406
0
#undef MPN_REDUCE
407
0
#define MPN_MUL_N(r,a,b,n)    mpn_mul_basecase (r,a,n,b,n)
408
0
#define MPN_SQR(r,a,n)      mpn_mul_basecase (r,a,n,a,n)
409
0
#define MPN_REDUCE(rp,tp,mp,n,mip)  MPN_REDC_2 (rp, tp, mp, n, mip)
410
0
        INNERLOOP;
411
0
      }
412
0
    else
413
0
      {
414
0
#undef MPN_MUL_N
415
0
#undef MPN_SQR
416
0
#undef MPN_REDUCE
417
0
#define MPN_MUL_N(r,a,b,n)    mpn_mul_basecase (r,a,n,b,n)
418
0
#define MPN_SQR(r,a,n)      mpn_sqr_basecase (r,a,n)
419
0
#define MPN_REDUCE(rp,tp,mp,n,mip)  MPN_REDC_2 (rp, tp, mp, n, mip)
420
0
        INNERLOOP;
421
0
      }
422
0
  }
423
0
      else if (BELOW_THRESHOLD (n, REDC_2_TO_REDC_N_THRESHOLD))
424
0
  {
425
0
#undef MPN_MUL_N
426
0
#undef MPN_SQR
427
0
#undef MPN_REDUCE
428
0
#define MPN_MUL_N(r,a,b,n)    mpn_mul_n (r,a,b,n)
429
0
#define MPN_SQR(r,a,n)      mpn_sqr (r,a,n)
430
0
#define MPN_REDUCE(rp,tp,mp,n,mip)  MPN_REDC_2 (rp, tp, mp, n, mip)
431
0
    INNERLOOP;
432
0
  }
433
0
      else
434
0
  {
435
0
#undef MPN_MUL_N
436
0
#undef MPN_SQR
437
0
#undef MPN_REDUCE
438
0
#define MPN_MUL_N(r,a,b,n)    mpn_mul_n (r,a,b,n)
439
0
#define MPN_SQR(r,a,n)      mpn_sqr (r,a,n)
440
0
#define MPN_REDUCE(rp,tp,mp,n,mip)  mpn_redc_n (rp, tp, mp, n, mip)
441
0
    INNERLOOP;
442
0
  }
443
0
    }
444
18.4k
  else
445
18.4k
    {
446
18.4k
      if (BELOW_THRESHOLD (n, MUL_TOOM22_THRESHOLD))
447
17.4k
  {
448
17.4k
    if (MUL_TOOM22_THRESHOLD < SQR_BASECASE_THRESHOLD
449
17.4k
        || BELOW_THRESHOLD (n, SQR_BASECASE_THRESHOLD))
450
0
      {
451
0
#undef MPN_MUL_N
452
0
#undef MPN_SQR
453
0
#undef MPN_REDUCE
454
0
#define MPN_MUL_N(r,a,b,n)    mpn_mul_basecase (r,a,n,b,n)
455
0
#define MPN_SQR(r,a,n)      mpn_mul_basecase (r,a,n,a,n)
456
0
#define MPN_REDUCE(rp,tp,mp,n,mip)  MPN_REDC_1 (rp, tp, mp, n, mip[0])
457
0
        INNERLOOP;
458
0
      }
459
17.4k
    else
460
17.4k
      {
461
17.4k
#undef MPN_MUL_N
462
17.4k
#undef MPN_SQR
463
17.4k
#undef MPN_REDUCE
464
1.05M
#define MPN_MUL_N(r,a,b,n)    mpn_mul_basecase (r,a,n,b,n)
465
6.25M
#define MPN_SQR(r,a,n)      mpn_sqr_basecase (r,a,n)
466
7.30M
#define MPN_REDUCE(rp,tp,mp,n,mip)  MPN_REDC_1 (rp, tp, mp, n, mip[0])
467
17.4k
        INNERLOOP;
468
17.3k
      }
469
17.4k
  }
470
1.01k
      else if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD))
471
124
  {
472
124
#undef MPN_MUL_N
473
124
#undef MPN_SQR
474
124
#undef MPN_REDUCE
475
12.4k
#define MPN_MUL_N(r,a,b,n)    mpn_mul_n (r,a,b,n)
476
99.8k
#define MPN_SQR(r,a,n)      mpn_sqr (r,a,n)
477
112k
#define MPN_REDUCE(rp,tp,mp,n,mip)  MPN_REDC_1 (rp, tp, mp, n, mip[0])
478
124
    INNERLOOP;
479
103
  }
480
889
      else if (BELOW_THRESHOLD (n, REDC_2_TO_REDC_N_THRESHOLD))
481
106
  {
482
106
#undef MPN_MUL_N
483
106
#undef MPN_SQR
484
106
#undef MPN_REDUCE
485
33.7k
#define MPN_MUL_N(r,a,b,n)    mpn_mul_n (r,a,b,n)
486
276k
#define MPN_SQR(r,a,n)      mpn_sqr (r,a,n)
487
310k
#define MPN_REDUCE(rp,tp,mp,n,mip)  MPN_REDC_2 (rp, tp, mp, n, mip)
488
106
    INNERLOOP;
489
92
  }
490
783
      else
491
783
  {
492
783
#undef MPN_MUL_N
493
783
#undef MPN_SQR
494
783
#undef MPN_REDUCE
495
315k
#define MPN_MUL_N(r,a,b,n)    mpn_mul_n (r,a,b,n)
496
2.82M
#define MPN_SQR(r,a,n)      mpn_sqr (r,a,n)
497
3.14M
#define MPN_REDUCE(rp,tp,mp,n,mip)  mpn_redc_n (rp, tp, mp, n, mip)
498
783
    INNERLOOP;
499
581
  }
500
18.4k
    }
501
502
#else  /* WANT_REDC_2 */
503
504
  if (REDC_1_TO_REDC_N_THRESHOLD < MUL_TOOM22_THRESHOLD)
505
    {
506
      if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_N_THRESHOLD))
507
  {
508
    if (REDC_1_TO_REDC_N_THRESHOLD < SQR_BASECASE_THRESHOLD
509
        || BELOW_THRESHOLD (n, SQR_BASECASE_THRESHOLD))
510
      {
511
#undef MPN_MUL_N
512
#undef MPN_SQR
513
#undef MPN_REDUCE
514
#define MPN_MUL_N(r,a,b,n)    mpn_mul_basecase (r,a,n,b,n)
515
#define MPN_SQR(r,a,n)      mpn_mul_basecase (r,a,n,a,n)
516
#define MPN_REDUCE(rp,tp,mp,n,mip)  MPN_REDC_1 (rp, tp, mp, n, mip[0])
517
        INNERLOOP;
518
      }
519
    else
520
      {
521
#undef MPN_MUL_N
522
#undef MPN_SQR
523
#undef MPN_REDUCE
524
#define MPN_MUL_N(r,a,b,n)    mpn_mul_basecase (r,a,n,b,n)
525
#define MPN_SQR(r,a,n)      mpn_sqr_basecase (r,a,n)
526
#define MPN_REDUCE(rp,tp,mp,n,mip)  MPN_REDC_1 (rp, tp, mp, n, mip[0])
527
        INNERLOOP;
528
      }
529
  }
530
      else if (BELOW_THRESHOLD (n, MUL_TOOM22_THRESHOLD))
531
  {
532
    if (MUL_TOOM22_THRESHOLD < SQR_BASECASE_THRESHOLD
533
        || BELOW_THRESHOLD (n, SQR_BASECASE_THRESHOLD))
534
      {
535
#undef MPN_MUL_N
536
#undef MPN_SQR
537
#undef MPN_REDUCE
538
#define MPN_MUL_N(r,a,b,n)    mpn_mul_basecase (r,a,n,b,n)
539
#define MPN_SQR(r,a,n)      mpn_mul_basecase (r,a,n,a,n)
540
#define MPN_REDUCE(rp,tp,mp,n,mip)  mpn_redc_n (rp, tp, mp, n, mip)
541
        INNERLOOP;
542
      }
543
    else
544
      {
545
#undef MPN_MUL_N
546
#undef MPN_SQR
547
#undef MPN_REDUCE
548
#define MPN_MUL_N(r,a,b,n)    mpn_mul_basecase (r,a,n,b,n)
549
#define MPN_SQR(r,a,n)      mpn_sqr_basecase (r,a,n)
550
#define MPN_REDUCE(rp,tp,mp,n,mip)  mpn_redc_n (rp, tp, mp, n, mip)
551
        INNERLOOP;
552
      }
553
  }
554
      else
555
  {
556
#undef MPN_MUL_N
557
#undef MPN_SQR
558
#undef MPN_REDUCE
559
#define MPN_MUL_N(r,a,b,n)    mpn_mul_n (r,a,b,n)
560
#define MPN_SQR(r,a,n)      mpn_sqr (r,a,n)
561
#define MPN_REDUCE(rp,tp,mp,n,mip)  mpn_redc_n (rp, tp, mp, n, mip)
562
    INNERLOOP;
563
  }
564
    }
565
  else
566
    {
567
      if (BELOW_THRESHOLD (n, MUL_TOOM22_THRESHOLD))
568
  {
569
    if (MUL_TOOM22_THRESHOLD < SQR_BASECASE_THRESHOLD
570
        || BELOW_THRESHOLD (n, SQR_BASECASE_THRESHOLD))
571
      {
572
#undef MPN_MUL_N
573
#undef MPN_SQR
574
#undef MPN_REDUCE
575
#define MPN_MUL_N(r,a,b,n)    mpn_mul_basecase (r,a,n,b,n)
576
#define MPN_SQR(r,a,n)      mpn_mul_basecase (r,a,n,a,n)
577
#define MPN_REDUCE(rp,tp,mp,n,mip)  MPN_REDC_1 (rp, tp, mp, n, mip[0])
578
        INNERLOOP;
579
      }
580
    else
581
      {
582
#undef MPN_MUL_N
583
#undef MPN_SQR
584
#undef MPN_REDUCE
585
#define MPN_MUL_N(r,a,b,n)    mpn_mul_basecase (r,a,n,b,n)
586
#define MPN_SQR(r,a,n)      mpn_sqr_basecase (r,a,n)
587
#define MPN_REDUCE(rp,tp,mp,n,mip)  MPN_REDC_1 (rp, tp, mp, n, mip[0])
588
        INNERLOOP;
589
      }
590
  }
591
      else if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_N_THRESHOLD))
592
  {
593
#undef MPN_MUL_N
594
#undef MPN_SQR
595
#undef MPN_REDUCE
596
#define MPN_MUL_N(r,a,b,n)    mpn_mul_n (r,a,b,n)
597
#define MPN_SQR(r,a,n)      mpn_sqr (r,a,n)
598
#define MPN_REDUCE(rp,tp,mp,n,mip)  MPN_REDC_1 (rp, tp, mp, n, mip[0])
599
    INNERLOOP;
600
  }
601
      else
602
  {
603
#undef MPN_MUL_N
604
#undef MPN_SQR
605
#undef MPN_REDUCE
606
#define MPN_MUL_N(r,a,b,n)    mpn_mul_n (r,a,b,n)
607
#define MPN_SQR(r,a,n)      mpn_sqr (r,a,n)
608
#define MPN_REDUCE(rp,tp,mp,n,mip)  mpn_redc_n (rp, tp, mp, n, mip)
609
    INNERLOOP;
610
  }
611
    }
612
#endif  /* WANT_REDC_2 */
613
614
19.8k
 done:
615
616
19.8k
  MPN_COPY (tp, rp, n);
617
19.8k
  MPN_ZERO (tp + n, n);
618
619
19.8k
#if WANT_REDC_2
620
19.8k
  if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD))
621
18.9k
    MPN_REDC_1 (rp, tp, mp, n, mip[0]);
622
889
  else if (BELOW_THRESHOLD (n, REDC_2_TO_REDC_N_THRESHOLD))
623
106
    MPN_REDC_2 (rp, tp, mp, n, mip);
624
#else
625
  if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_N_THRESHOLD))
626
    MPN_REDC_1 (rp, tp, mp, n, mip[0]);
627
#endif
628
783
  else
629
783
    mpn_redc_n (rp, tp, mp, n, mip);
630
631
19.8k
  if (mpn_cmp (rp, mp, n) >= 0)
632
2
    mpn_sub_n (rp, rp, mp, n);
633
634
19.8k
  TMP_FREE;
635
19.8k
}