/src/gmp-6.2.1/mpn/powm.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* mpn_powm -- Compute R = U^E mod M. |
2 | | |
3 | | Contributed to the GNU project by Torbjorn Granlund. |
4 | | |
5 | | THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES. IT IS ONLY |
6 | | SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST |
7 | | GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE. |
8 | | |
9 | | Copyright 2007-2012, 2019 Free Software Foundation, Inc. |
10 | | |
11 | | This file is part of the GNU MP Library. |
12 | | |
13 | | The GNU MP Library is free software; you can redistribute it and/or modify |
14 | | it under the terms of either: |
15 | | |
16 | | * the GNU Lesser General Public License as published by the Free |
17 | | Software Foundation; either version 3 of the License, or (at your |
18 | | option) any later version. |
19 | | |
20 | | or |
21 | | |
22 | | * the GNU General Public License as published by the Free Software |
23 | | Foundation; either version 2 of the License, or (at your option) any |
24 | | later version. |
25 | | |
26 | | or both in parallel, as here. |
27 | | |
28 | | The GNU MP Library is distributed in the hope that it will be useful, but |
29 | | WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY |
30 | | or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
31 | | for more details. |
32 | | |
33 | | You should have received copies of the GNU General Public License and the |
34 | | GNU Lesser General Public License along with the GNU MP Library. If not, |
35 | | see https://www.gnu.org/licenses/. */ |
36 | | |
37 | | |
38 | | /* |
39 | | BASIC ALGORITHM, Compute U^E mod M, where M < B^n is odd. |
40 | | |
41 | | 1. W <- U |
42 | | |
43 | | 2. T <- (B^n * U) mod M Convert to REDC form |
44 | | |
45 | | 3. Compute table U^1, U^3, U^5... of E-dependent size |
46 | | |
47 | | 4. While there are more bits in E |
48 | | W <- power left-to-right base-k |
49 | | |
50 | | |
51 | | TODO: |
52 | | |
53 | | * Make getbits a macro, thereby allowing it to update the index operand. |
54 | | That will simplify the code using getbits. (Perhaps make getbits' sibling |
55 | | getbit then have similar form, for symmetry.) |
56 | | |
57 | | * Write an itch function. Or perhaps get rid of tp parameter since the huge |
58 | | pp area is allocated locally anyway? |
59 | | |
60 | | * Choose window size without looping. (Superoptimize or think(tm).) |
61 | | |
62 | | * Handle small bases with initial, reduction-free exponentiation. |
63 | | |
64 | | * Call new division functions, not mpn_tdiv_qr. |
65 | | |
66 | | * Consider special code for one-limb M. |
67 | | |
68 | | * How should we handle the redc1/redc2/redc_n choice? |
69 | | - redc1: T(binvert_1limb) + e * (n) * (T(mullo-1x1) + n*T(addmul_1)) |
70 | | - redc2: T(binvert_2limbs) + e * (n/2) * (T(mullo-2x2) + n*T(addmul_2)) |
71 | | - redc_n: T(binvert_nlimbs) + e * (T(mullo-nxn) + T(M(n))) |
72 | | This disregards the addmul_N constant term, but we could think of |
73 | | that as part of the respective mullo. |
74 | | |
75 | | * When U (the base) is small, we should start the exponentiation with plain |
76 | | operations, then convert that partial result to REDC form. |
77 | | |
78 | | * When U is just one limb, should it be handled without the k-ary tricks? |
79 | | We could keep a factor of B^n in W, but use U' = BU as base. After |
80 | | multiplying by this (pseudo two-limb) number, we need to multiply by 1/B |
81 | | mod M. |
82 | | */ |
83 | | |
84 | | #include "gmp-impl.h" |
85 | | #include "longlong.h" |
86 | | |
87 | | #undef MPN_REDC_0 |
88 | | #define MPN_REDC_0(rp, up, mp, invm) \ |
89 | 225k | do { \ |
90 | 225k | mp_limb_t p1, r0, u0, _dummy; \ |
91 | 225k | u0 = *(up); \ |
92 | 225k | umul_ppmm (p1, _dummy, *(mp), (u0 * (invm)) & GMP_NUMB_MASK); \ |
93 | 225k | ASSERT (((u0 + _dummy) & GMP_NUMB_MASK) == 0); \ |
94 | 225k | p1 += (u0 != 0); \ |
95 | 225k | r0 = (up)[1] + p1; \ |
96 | 225k | if (p1 > r0) \ |
97 | 225k | r0 -= *(mp); \ |
98 | 225k | *(rp) = r0; \ |
99 | 225k | } while (0) |
100 | | |
101 | | #undef MPN_REDC_1 |
102 | | #if HAVE_NATIVE_mpn_sbpi1_bdiv_r |
103 | | #define MPN_REDC_1(rp, up, mp, n, invm) \ |
104 | | do { \ |
105 | | mp_limb_t cy; \ |
106 | | cy = mpn_sbpi1_bdiv_r (up, 2 * n, mp, n, invm); \ |
107 | | if (cy != 0) \ |
108 | | mpn_sub_n (rp, up + n, mp, n); \ |
109 | | else \ |
110 | | MPN_COPY (rp, up + n, n); \ |
111 | | } while (0) |
112 | | #else |
113 | | #define MPN_REDC_1(rp, up, mp, n, invm) \ |
114 | 258k | do { \ |
115 | 258k | mp_limb_t cy; \ |
116 | 258k | cy = mpn_redc_1 (rp, up, mp, n, invm); \ |
117 | 258k | if (cy != 0) \ |
118 | 258k | mpn_sub_n (rp, rp, mp, n); \ |
119 | 258k | } while (0) |
120 | | #endif |
121 | | |
122 | | #undef MPN_REDC_2 |
123 | | #define MPN_REDC_2(rp, up, mp, n, mip) \ |
124 | 370k | do { \ |
125 | 370k | mp_limb_t cy; \ |
126 | 370k | cy = mpn_redc_2 (rp, up, mp, n, mip); \ |
127 | 370k | if (cy != 0) \ |
128 | 370k | mpn_sub_n (rp, rp, mp, n); \ |
129 | 370k | } while (0) |
130 | | |
131 | | #if HAVE_NATIVE_mpn_addmul_2 || HAVE_NATIVE_mpn_redc_2 |
132 | | #define WANT_REDC_2 1 |
133 | | #endif |
134 | | |
135 | | #define getbit(p,bi) \ |
136 | 1.14M | ((p[(bi - 1) / GMP_LIMB_BITS] >> (bi - 1) % GMP_LIMB_BITS) & 1) |
137 | | |
138 | | static inline mp_limb_t |
139 | | getbits (const mp_limb_t *p, mp_bitcnt_t bi, int nbits) |
140 | 354k | { |
141 | 354k | int nbits_in_r; |
142 | 354k | mp_limb_t r; |
143 | 354k | mp_size_t i; |
144 | | |
145 | 354k | if (bi < nbits) |
146 | 1.04k | { |
147 | 1.04k | return p[0] & (((mp_limb_t) 1 << bi) - 1); |
148 | 1.04k | } |
149 | 353k | else |
150 | 353k | { |
151 | 353k | bi -= nbits; /* bit index of low bit to extract */ |
152 | 353k | i = bi / GMP_NUMB_BITS; /* word index of low bit to extract */ |
153 | 353k | bi %= GMP_NUMB_BITS; /* bit index in low word */ |
154 | 353k | r = p[i] >> bi; /* extract (low) bits */ |
155 | 353k | nbits_in_r = GMP_NUMB_BITS - bi; /* number of bits now in r */ |
156 | 353k | if (nbits_in_r < nbits) /* did we get enough bits? */ |
157 | 35.4k | r += p[i + 1] << nbits_in_r; /* prepend bits from higher word */ |
158 | 353k | return r & (((mp_limb_t) 1 << nbits) - 1); |
159 | 353k | } |
160 | 354k | } |
161 | | |
162 | | static inline int |
163 | | win_size (mp_bitcnt_t eb) |
164 | 1.82k | { |
165 | 1.82k | int k; |
166 | 1.82k | static mp_bitcnt_t x[] = {0,7,25,81,241,673,1793,4609,11521,28161,~(mp_bitcnt_t)0}; |
167 | 7.72k | for (k = 1; eb > x[k]; k++) |
168 | 5.89k | ; |
169 | 1.82k | return k; |
170 | 1.82k | } |
171 | | |
172 | | /* Convert U to REDC form, U_r = B^n * U mod M */ |
173 | | static void |
174 | | redcify (mp_ptr rp, mp_srcptr up, mp_size_t un, mp_srcptr mp, mp_size_t n) |
175 | 1.82k | { |
176 | 1.82k | mp_ptr tp, qp; |
177 | 1.82k | TMP_DECL; |
178 | 1.82k | TMP_MARK; |
179 | | |
180 | 1.82k | TMP_ALLOC_LIMBS_2 (tp, un + n, qp, un + 1); |
181 | | |
182 | 1.82k | MPN_ZERO (tp, n); |
183 | 1.82k | MPN_COPY (tp + n, up, un); |
184 | 1.82k | mpn_tdiv_qr (qp, rp, 0L, tp, un + n, mp, n); |
185 | 1.82k | TMP_FREE; |
186 | 1.82k | } |
187 | | |
188 | | /* rp[n-1..0] = bp[bn-1..0] ^ ep[en-1..0] mod mp[n-1..0] |
189 | | Requires that mp[n-1..0] is odd. |
190 | | Requires that ep[en-1..0] is > 1. |
191 | | Uses scratch space at tp of MAX(mpn_binvert_itch(n),2n) limbs. */ |
192 | | void |
193 | | mpn_powm (mp_ptr rp, mp_srcptr bp, mp_size_t bn, |
194 | | mp_srcptr ep, mp_size_t en, |
195 | | mp_srcptr mp, mp_size_t n, mp_ptr tp) |
196 | 1.82k | { |
197 | 1.82k | mp_limb_t ip[2], *mip; |
198 | 1.82k | int cnt; |
199 | 1.82k | mp_bitcnt_t ebi; |
200 | 1.82k | int windowsize, this_windowsize; |
201 | 1.82k | mp_limb_t expbits; |
202 | 1.82k | mp_ptr pp, this_pp; |
203 | 1.82k | long i; |
204 | 1.82k | TMP_DECL; |
205 | | |
206 | 1.82k | ASSERT (en > 1 || (en == 1 && ep[0] > 1)); |
207 | 1.82k | ASSERT (n >= 1 && ((mp[0] & 1) != 0)); |
208 | | |
209 | 1.82k | TMP_MARK; |
210 | | |
211 | 1.82k | MPN_SIZEINBASE_2EXP(ebi, ep, en, 1); |
212 | | |
213 | | #if 0 |
214 | | if (bn < n) |
215 | | { |
216 | | /* Do the first few exponent bits without mod reductions, |
217 | | until the result is greater than the mod argument. */ |
218 | | for (;;) |
219 | | { |
220 | | mpn_sqr (tp, this_pp, tn); |
221 | | tn = tn * 2 - 1, tn += tp[tn] != 0; |
222 | | if (getbit (ep, ebi) != 0) |
223 | | mpn_mul (..., tp, tn, bp, bn); |
224 | | ebi--; |
225 | | } |
226 | | } |
227 | | #endif |
228 | | |
229 | 1.82k | windowsize = win_size (ebi); |
230 | | |
231 | 1.82k | #if WANT_REDC_2 |
232 | 1.82k | if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD)) |
233 | 778 | { |
234 | 778 | mip = ip; |
235 | 778 | binvert_limb (mip[0], mp[0]); |
236 | 778 | mip[0] = -mip[0]; |
237 | 778 | } |
238 | 1.04k | else if (BELOW_THRESHOLD (n, REDC_2_TO_REDC_N_THRESHOLD)) |
239 | 274 | { |
240 | 274 | mip = ip; |
241 | 274 | mpn_binvert (mip, mp, 2, tp); |
242 | 274 | mip[0] = -mip[0]; mip[1] = ~mip[1]; |
243 | 274 | } |
244 | | #else |
245 | | if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_N_THRESHOLD)) |
246 | | { |
247 | | mip = ip; |
248 | | binvert_limb (mip[0], mp[0]); |
249 | | mip[0] = -mip[0]; |
250 | | } |
251 | | #endif |
252 | 774 | else |
253 | 774 | { |
254 | 774 | mip = TMP_ALLOC_LIMBS (n); |
255 | 774 | mpn_binvert (mip, mp, n, tp); |
256 | 774 | } |
257 | | |
258 | 1.82k | pp = TMP_ALLOC_LIMBS (n << (windowsize - 1)); |
259 | | |
260 | 1.82k | this_pp = pp; |
261 | 1.82k | redcify (this_pp, bp, bn, mp, n); |
262 | | |
263 | | /* Store b^2 at rp. */ |
264 | 1.82k | mpn_sqr (tp, this_pp, n); |
265 | | #if 0 |
266 | | if (n == 1) { |
267 | | MPN_REDC_0 (rp, tp, mp, mip[0]); |
268 | | } else |
269 | | #endif |
270 | 1.82k | #if WANT_REDC_2 |
271 | 1.82k | if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD)) |
272 | 778 | MPN_REDC_1 (rp, tp, mp, n, mip[0]); |
273 | 1.04k | else if (BELOW_THRESHOLD (n, REDC_2_TO_REDC_N_THRESHOLD)) |
274 | 274 | MPN_REDC_2 (rp, tp, mp, n, mip); |
275 | | #else |
276 | | if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_N_THRESHOLD)) |
277 | | MPN_REDC_1 (rp, tp, mp, n, mip[0]); |
278 | | #endif |
279 | 774 | else |
280 | 774 | mpn_redc_n (rp, tp, mp, n, mip); |
281 | | |
282 | | /* Precompute odd powers of b and put them in the temporary area at pp. */ |
283 | 57.9k | for (i = (1 << (windowsize - 1)) - 1; i > 0; i--) |
284 | 56.1k | #if 1 |
285 | 56.1k | if (n == 1) { |
286 | 4.40k | umul_ppmm((tp)[1], *(tp), *(this_pp), *(rp)); |
287 | 4.40k | ++this_pp ; |
288 | 4.40k | MPN_REDC_0 (this_pp, tp, mp, mip[0]); |
289 | 4.40k | } else |
290 | 51.7k | #endif |
291 | 51.7k | { |
292 | 51.7k | mpn_mul_n (tp, this_pp, rp, n); |
293 | 51.7k | this_pp += n; |
294 | 51.7k | #if WANT_REDC_2 |
295 | 51.7k | if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD)) |
296 | 5.81k | MPN_REDC_1 (this_pp, tp, mp, n, mip[0]); |
297 | 45.8k | else if (BELOW_THRESHOLD (n, REDC_2_TO_REDC_N_THRESHOLD)) |
298 | 6.34k | MPN_REDC_2 (this_pp, tp, mp, n, mip); |
299 | | #else |
300 | | if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_N_THRESHOLD)) |
301 | | MPN_REDC_1 (this_pp, tp, mp, n, mip[0]); |
302 | | #endif |
303 | 39.5k | else |
304 | 39.5k | mpn_redc_n (this_pp, tp, mp, n, mip); |
305 | 51.7k | } |
306 | | |
307 | 1.82k | expbits = getbits (ep, ebi, windowsize); |
308 | 1.82k | if (ebi < windowsize) |
309 | 0 | ebi = 0; |
310 | 1.82k | else |
311 | 1.82k | ebi -= windowsize; |
312 | | |
313 | 1.82k | count_trailing_zeros (cnt, expbits); |
314 | 1.82k | ebi += cnt; |
315 | 1.82k | expbits >>= cnt; |
316 | | |
317 | 1.82k | MPN_COPY (rp, pp + n * (expbits >> 1), n); |
318 | | |
319 | 1.82k | #define INNERLOOP \ |
320 | 354k | while (ebi != 0) \ |
321 | 353k | { \ |
322 | 1.14M | while (getbit (ep, ebi) == 0) \ |
323 | 795k | { \ |
324 | 795k | MPN_SQR (tp, rp, n); \ |
325 | 795k | MPN_REDUCE (rp, tp, mp, n, mip); \ |
326 | 795k | if (--ebi == 0) \ |
327 | 59.5k | goto done; \ |
328 | 59.5k | } \ |
329 | 353k | \ |
330 | | /* The next bit of the exponent is 1. Now extract the largest \ |
331 | | block of bits <= windowsize, and such that the least \ |
332 | | significant bit is 1. */ \ |
333 | 353k | \ |
334 | 353k | expbits = getbits (ep, ebi, windowsize); \ |
335 | 352k | this_windowsize = windowsize; \ |
336 | 352k | if (ebi < windowsize) \ |
337 | 352k | { \ |
338 | 1.04k | this_windowsize -= windowsize - ebi; \ |
339 | 1.04k | ebi = 0; \ |
340 | 1.04k | } \ |
341 | 352k | else \ |
342 | 352k | ebi -= windowsize; \ |
343 | 352k | \ |
344 | 352k | count_trailing_zeros (cnt, expbits); \ |
345 | 352k | this_windowsize -= cnt; \ |
346 | 352k | ebi += cnt; \ |
347 | 352k | expbits >>= cnt; \ |
348 | 352k | \ |
349 | 352k | do \ |
350 | 2.30M | { \ |
351 | 2.30M | MPN_SQR (tp, rp, n); \ |
352 | 2.30M | MPN_REDUCE (rp, tp, mp, n, mip); \ |
353 | 2.30M | } \ |
354 | 2.30M | while (--this_windowsize != 0); \ |
355 | 352k | \ |
356 | 352k | MPN_MUL_N (tp, rp, pp + n * (expbits >> 1), n); \ |
357 | 352k | MPN_REDUCE (rp, tp, mp, n, mip); \ |
358 | 23.3k | } |
359 | | |
360 | | |
361 | 1.82k | if (n == 1) |
362 | 290 | { |
363 | 290 | #undef MPN_MUL_N |
364 | 290 | #undef MPN_SQR |
365 | 290 | #undef MPN_REDUCE |
366 | 23.3k | #define MPN_MUL_N(r,a,b,n) umul_ppmm((r)[1], *(r), *(a), *(b)) |
367 | 197k | #define MPN_SQR(r,a,n) umul_ppmm((r)[1], *(r), *(a), *(a)) |
368 | 220k | #define MPN_REDUCE(rp,tp,mp,n,mip) MPN_REDC_0(rp, tp, mp, mip[0]) |
369 | 290 | INNERLOOP; |
370 | 128 | } |
371 | 1.53k | else |
372 | 1.53k | #if WANT_REDC_2 |
373 | 1.53k | if (REDC_1_TO_REDC_2_THRESHOLD < MUL_TOOM22_THRESHOLD) |
374 | 0 | { |
375 | 0 | if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD)) |
376 | 0 | { |
377 | 0 | if (REDC_1_TO_REDC_2_THRESHOLD < SQR_BASECASE_THRESHOLD |
378 | 0 | || BELOW_THRESHOLD (n, SQR_BASECASE_THRESHOLD)) |
379 | 0 | { |
380 | 0 | #undef MPN_MUL_N |
381 | 0 | #undef MPN_SQR |
382 | 0 | #undef MPN_REDUCE |
383 | 0 | #define MPN_MUL_N(r,a,b,n) mpn_mul_basecase (r,a,n,b,n) |
384 | 0 | #define MPN_SQR(r,a,n) mpn_mul_basecase (r,a,n,a,n) |
385 | 0 | #define MPN_REDUCE(rp,tp,mp,n,mip) MPN_REDC_1 (rp, tp, mp, n, mip[0]) |
386 | 0 | INNERLOOP; |
387 | 0 | } |
388 | 0 | else |
389 | 0 | { |
390 | 0 | #undef MPN_MUL_N |
391 | 0 | #undef MPN_SQR |
392 | 0 | #undef MPN_REDUCE |
393 | 0 | #define MPN_MUL_N(r,a,b,n) mpn_mul_basecase (r,a,n,b,n) |
394 | 0 | #define MPN_SQR(r,a,n) mpn_sqr_basecase (r,a,n) |
395 | 0 | #define MPN_REDUCE(rp,tp,mp,n,mip) MPN_REDC_1 (rp, tp, mp, n, mip[0]) |
396 | 0 | INNERLOOP; |
397 | 0 | } |
398 | 0 | } |
399 | 0 | else if (BELOW_THRESHOLD (n, MUL_TOOM22_THRESHOLD)) |
400 | 0 | { |
401 | 0 | if (MUL_TOOM22_THRESHOLD < SQR_BASECASE_THRESHOLD |
402 | 0 | || BELOW_THRESHOLD (n, SQR_BASECASE_THRESHOLD)) |
403 | 0 | { |
404 | 0 | #undef MPN_MUL_N |
405 | 0 | #undef MPN_SQR |
406 | 0 | #undef MPN_REDUCE |
407 | 0 | #define MPN_MUL_N(r,a,b,n) mpn_mul_basecase (r,a,n,b,n) |
408 | 0 | #define MPN_SQR(r,a,n) mpn_mul_basecase (r,a,n,a,n) |
409 | 0 | #define MPN_REDUCE(rp,tp,mp,n,mip) MPN_REDC_2 (rp, tp, mp, n, mip) |
410 | 0 | INNERLOOP; |
411 | 0 | } |
412 | 0 | else |
413 | 0 | { |
414 | 0 | #undef MPN_MUL_N |
415 | 0 | #undef MPN_SQR |
416 | 0 | #undef MPN_REDUCE |
417 | 0 | #define MPN_MUL_N(r,a,b,n) mpn_mul_basecase (r,a,n,b,n) |
418 | 0 | #define MPN_SQR(r,a,n) mpn_sqr_basecase (r,a,n) |
419 | 0 | #define MPN_REDUCE(rp,tp,mp,n,mip) MPN_REDC_2 (rp, tp, mp, n, mip) |
420 | 0 | INNERLOOP; |
421 | 0 | } |
422 | 0 | } |
423 | 0 | else if (BELOW_THRESHOLD (n, REDC_2_TO_REDC_N_THRESHOLD)) |
424 | 0 | { |
425 | 0 | #undef MPN_MUL_N |
426 | 0 | #undef MPN_SQR |
427 | 0 | #undef MPN_REDUCE |
428 | 0 | #define MPN_MUL_N(r,a,b,n) mpn_mul_n (r,a,b,n) |
429 | 0 | #define MPN_SQR(r,a,n) mpn_sqr (r,a,n) |
430 | 0 | #define MPN_REDUCE(rp,tp,mp,n,mip) MPN_REDC_2 (rp, tp, mp, n, mip) |
431 | 0 | INNERLOOP; |
432 | 0 | } |
433 | 0 | else |
434 | 0 | { |
435 | 0 | #undef MPN_MUL_N |
436 | 0 | #undef MPN_SQR |
437 | 0 | #undef MPN_REDUCE |
438 | 0 | #define MPN_MUL_N(r,a,b,n) mpn_mul_n (r,a,b,n) |
439 | 0 | #define MPN_SQR(r,a,n) mpn_sqr (r,a,n) |
440 | 0 | #define MPN_REDUCE(rp,tp,mp,n,mip) mpn_redc_n (rp, tp, mp, n, mip) |
441 | 0 | INNERLOOP; |
442 | 0 | } |
443 | 0 | } |
444 | 1.53k | else |
445 | 1.53k | { |
446 | 1.53k | if (BELOW_THRESHOLD (n, MUL_TOOM22_THRESHOLD)) |
447 | 345 | { |
448 | 345 | if (MUL_TOOM22_THRESHOLD < SQR_BASECASE_THRESHOLD |
449 | 345 | || BELOW_THRESHOLD (n, SQR_BASECASE_THRESHOLD)) |
450 | 0 | { |
451 | 0 | #undef MPN_MUL_N |
452 | 0 | #undef MPN_SQR |
453 | 0 | #undef MPN_REDUCE |
454 | 0 | #define MPN_MUL_N(r,a,b,n) mpn_mul_basecase (r,a,n,b,n) |
455 | 0 | #define MPN_SQR(r,a,n) mpn_mul_basecase (r,a,n,a,n) |
456 | 0 | #define MPN_REDUCE(rp,tp,mp,n,mip) MPN_REDC_1 (rp, tp, mp, n, mip[0]) |
457 | 0 | INNERLOOP; |
458 | 0 | } |
459 | 345 | else |
460 | 345 | { |
461 | 345 | #undef MPN_MUL_N |
462 | 345 | #undef MPN_SQR |
463 | 345 | #undef MPN_REDUCE |
464 | 19.2k | #define MPN_MUL_N(r,a,b,n) mpn_mul_basecase (r,a,n,b,n) |
465 | 139k | #define MPN_SQR(r,a,n) mpn_sqr_basecase (r,a,n) |
466 | 158k | #define MPN_REDUCE(rp,tp,mp,n,mip) MPN_REDC_1 (rp, tp, mp, n, mip[0]) |
467 | 345 | INNERLOOP; |
468 | 210 | } |
469 | 345 | } |
470 | 1.19k | else if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD)) |
471 | 143 | { |
472 | 143 | #undef MPN_MUL_N |
473 | 143 | #undef MPN_SQR |
474 | 143 | #undef MPN_REDUCE |
475 | 9.95k | #define MPN_MUL_N(r,a,b,n) mpn_mul_n (r,a,b,n) |
476 | 82.4k | #define MPN_SQR(r,a,n) mpn_sqr (r,a,n) |
477 | 92.3k | #define MPN_REDUCE(rp,tp,mp,n,mip) MPN_REDC_1 (rp, tp, mp, n, mip[0]) |
478 | 143 | INNERLOOP; |
479 | 70 | } |
480 | 1.04k | else if (BELOW_THRESHOLD (n, REDC_2_TO_REDC_N_THRESHOLD)) |
481 | 274 | { |
482 | 274 | #undef MPN_MUL_N |
483 | 274 | #undef MPN_SQR |
484 | 274 | #undef MPN_REDUCE |
485 | 38.1k | #define MPN_MUL_N(r,a,b,n) mpn_mul_n (r,a,b,n) |
486 | 325k | #define MPN_SQR(r,a,n) mpn_sqr (r,a,n) |
487 | 363k | #define MPN_REDUCE(rp,tp,mp,n,mip) MPN_REDC_2 (rp, tp, mp, n, mip) |
488 | 274 | INNERLOOP; |
489 | 132 | } |
490 | 774 | else |
491 | 774 | { |
492 | 774 | #undef MPN_MUL_N |
493 | 774 | #undef MPN_SQR |
494 | 774 | #undef MPN_REDUCE |
495 | 261k | #define MPN_MUL_N(r,a,b,n) mpn_mul_n (r,a,b,n) |
496 | 2.35M | #define MPN_SQR(r,a,n) mpn_sqr (r,a,n) |
497 | 2.61M | #define MPN_REDUCE(rp,tp,mp,n,mip) mpn_redc_n (rp, tp, mp, n, mip) |
498 | 774 | INNERLOOP; |
499 | 269 | } |
500 | 1.53k | } |
501 | | |
502 | | #else /* WANT_REDC_2 */ |
503 | | |
504 | | if (REDC_1_TO_REDC_N_THRESHOLD < MUL_TOOM22_THRESHOLD) |
505 | | { |
506 | | if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_N_THRESHOLD)) |
507 | | { |
508 | | if (REDC_1_TO_REDC_N_THRESHOLD < SQR_BASECASE_THRESHOLD |
509 | | || BELOW_THRESHOLD (n, SQR_BASECASE_THRESHOLD)) |
510 | | { |
511 | | #undef MPN_MUL_N |
512 | | #undef MPN_SQR |
513 | | #undef MPN_REDUCE |
514 | | #define MPN_MUL_N(r,a,b,n) mpn_mul_basecase (r,a,n,b,n) |
515 | | #define MPN_SQR(r,a,n) mpn_mul_basecase (r,a,n,a,n) |
516 | | #define MPN_REDUCE(rp,tp,mp,n,mip) MPN_REDC_1 (rp, tp, mp, n, mip[0]) |
517 | | INNERLOOP; |
518 | | } |
519 | | else |
520 | | { |
521 | | #undef MPN_MUL_N |
522 | | #undef MPN_SQR |
523 | | #undef MPN_REDUCE |
524 | | #define MPN_MUL_N(r,a,b,n) mpn_mul_basecase (r,a,n,b,n) |
525 | | #define MPN_SQR(r,a,n) mpn_sqr_basecase (r,a,n) |
526 | | #define MPN_REDUCE(rp,tp,mp,n,mip) MPN_REDC_1 (rp, tp, mp, n, mip[0]) |
527 | | INNERLOOP; |
528 | | } |
529 | | } |
530 | | else if (BELOW_THRESHOLD (n, MUL_TOOM22_THRESHOLD)) |
531 | | { |
532 | | if (MUL_TOOM22_THRESHOLD < SQR_BASECASE_THRESHOLD |
533 | | || BELOW_THRESHOLD (n, SQR_BASECASE_THRESHOLD)) |
534 | | { |
535 | | #undef MPN_MUL_N |
536 | | #undef MPN_SQR |
537 | | #undef MPN_REDUCE |
538 | | #define MPN_MUL_N(r,a,b,n) mpn_mul_basecase (r,a,n,b,n) |
539 | | #define MPN_SQR(r,a,n) mpn_mul_basecase (r,a,n,a,n) |
540 | | #define MPN_REDUCE(rp,tp,mp,n,mip) mpn_redc_n (rp, tp, mp, n, mip) |
541 | | INNERLOOP; |
542 | | } |
543 | | else |
544 | | { |
545 | | #undef MPN_MUL_N |
546 | | #undef MPN_SQR |
547 | | #undef MPN_REDUCE |
548 | | #define MPN_MUL_N(r,a,b,n) mpn_mul_basecase (r,a,n,b,n) |
549 | | #define MPN_SQR(r,a,n) mpn_sqr_basecase (r,a,n) |
550 | | #define MPN_REDUCE(rp,tp,mp,n,mip) mpn_redc_n (rp, tp, mp, n, mip) |
551 | | INNERLOOP; |
552 | | } |
553 | | } |
554 | | else |
555 | | { |
556 | | #undef MPN_MUL_N |
557 | | #undef MPN_SQR |
558 | | #undef MPN_REDUCE |
559 | | #define MPN_MUL_N(r,a,b,n) mpn_mul_n (r,a,b,n) |
560 | | #define MPN_SQR(r,a,n) mpn_sqr (r,a,n) |
561 | | #define MPN_REDUCE(rp,tp,mp,n,mip) mpn_redc_n (rp, tp, mp, n, mip) |
562 | | INNERLOOP; |
563 | | } |
564 | | } |
565 | | else |
566 | | { |
567 | | if (BELOW_THRESHOLD (n, MUL_TOOM22_THRESHOLD)) |
568 | | { |
569 | | if (MUL_TOOM22_THRESHOLD < SQR_BASECASE_THRESHOLD |
570 | | || BELOW_THRESHOLD (n, SQR_BASECASE_THRESHOLD)) |
571 | | { |
572 | | #undef MPN_MUL_N |
573 | | #undef MPN_SQR |
574 | | #undef MPN_REDUCE |
575 | | #define MPN_MUL_N(r,a,b,n) mpn_mul_basecase (r,a,n,b,n) |
576 | | #define MPN_SQR(r,a,n) mpn_mul_basecase (r,a,n,a,n) |
577 | | #define MPN_REDUCE(rp,tp,mp,n,mip) MPN_REDC_1 (rp, tp, mp, n, mip[0]) |
578 | | INNERLOOP; |
579 | | } |
580 | | else |
581 | | { |
582 | | #undef MPN_MUL_N |
583 | | #undef MPN_SQR |
584 | | #undef MPN_REDUCE |
585 | | #define MPN_MUL_N(r,a,b,n) mpn_mul_basecase (r,a,n,b,n) |
586 | | #define MPN_SQR(r,a,n) mpn_sqr_basecase (r,a,n) |
587 | | #define MPN_REDUCE(rp,tp,mp,n,mip) MPN_REDC_1 (rp, tp, mp, n, mip[0]) |
588 | | INNERLOOP; |
589 | | } |
590 | | } |
591 | | else if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_N_THRESHOLD)) |
592 | | { |
593 | | #undef MPN_MUL_N |
594 | | #undef MPN_SQR |
595 | | #undef MPN_REDUCE |
596 | | #define MPN_MUL_N(r,a,b,n) mpn_mul_n (r,a,b,n) |
597 | | #define MPN_SQR(r,a,n) mpn_sqr (r,a,n) |
598 | | #define MPN_REDUCE(rp,tp,mp,n,mip) MPN_REDC_1 (rp, tp, mp, n, mip[0]) |
599 | | INNERLOOP; |
600 | | } |
601 | | else |
602 | | { |
603 | | #undef MPN_MUL_N |
604 | | #undef MPN_SQR |
605 | | #undef MPN_REDUCE |
606 | | #define MPN_MUL_N(r,a,b,n) mpn_mul_n (r,a,b,n) |
607 | | #define MPN_SQR(r,a,n) mpn_sqr (r,a,n) |
608 | | #define MPN_REDUCE(rp,tp,mp,n,mip) mpn_redc_n (rp, tp, mp, n, mip) |
609 | | INNERLOOP; |
610 | | } |
611 | | } |
612 | | #endif /* WANT_REDC_2 */ |
613 | | |
614 | 1.82k | done: |
615 | | |
616 | 1.82k | MPN_COPY (tp, rp, n); |
617 | 1.82k | MPN_ZERO (tp + n, n); |
618 | | |
619 | 1.82k | #if WANT_REDC_2 |
620 | 1.82k | if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD)) |
621 | 778 | MPN_REDC_1 (rp, tp, mp, n, mip[0]); |
622 | 1.04k | else if (BELOW_THRESHOLD (n, REDC_2_TO_REDC_N_THRESHOLD)) |
623 | 274 | MPN_REDC_2 (rp, tp, mp, n, mip); |
624 | | #else |
625 | | if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_N_THRESHOLD)) |
626 | | MPN_REDC_1 (rp, tp, mp, n, mip[0]); |
627 | | #endif |
628 | 774 | else |
629 | 774 | mpn_redc_n (rp, tp, mp, n, mip); |
630 | | |
631 | 1.82k | if (mpn_cmp (rp, mp, n) >= 0) |
632 | 23 | mpn_sub_n (rp, rp, mp, n); |
633 | | |
634 | 1.82k | TMP_FREE; |
635 | 1.82k | } |