Line | Count | Source (jump to first uncovered line) |
1 | | /* mpn_powm -- Compute R = U^E mod M. |
2 | | |
3 | | Contributed to the GNU project by Torbjorn Granlund. |
4 | | |
5 | | THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES. IT IS ONLY |
6 | | SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST |
7 | | GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE. |
8 | | |
9 | | Copyright 2007-2012, 2019-2021 Free Software Foundation, Inc. |
10 | | |
11 | | This file is part of the GNU MP Library. |
12 | | |
13 | | The GNU MP Library is free software; you can redistribute it and/or modify |
14 | | it under the terms of either: |
15 | | |
16 | | * the GNU Lesser General Public License as published by the Free |
17 | | Software Foundation; either version 3 of the License, or (at your |
18 | | option) any later version. |
19 | | |
20 | | or |
21 | | |
22 | | * the GNU General Public License as published by the Free Software |
23 | | Foundation; either version 2 of the License, or (at your option) any |
24 | | later version. |
25 | | |
26 | | or both in parallel, as here. |
27 | | |
28 | | The GNU MP Library is distributed in the hope that it will be useful, but |
29 | | WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY |
30 | | or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
31 | | for more details. |
32 | | |
33 | | You should have received copies of the GNU General Public License and the |
34 | | GNU Lesser General Public License along with the GNU MP Library. If not, |
35 | | see https://www.gnu.org/licenses/. */ |
36 | | |
37 | | |
38 | | /* |
39 | | BASIC ALGORITHM, Compute U^E mod M, where M < B^n is odd. |
40 | | |
41 | | 1. W <- U |
42 | | |
43 | | 2. T <- (B^n * U) mod M Convert to REDC form |
44 | | |
45 | | 3. Compute table U^1, U^3, U^5... of E-dependent size |
46 | | |
47 | | 4. While there are more bits in E |
48 | | W <- power left-to-right base-k |
49 | | |
50 | | |
51 | | TODO: |
52 | | |
53 | | * Make getbits a macro, thereby allowing it to update the index operand. |
54 | | That will simplify the code using getbits. (Perhaps make getbits' sibling |
55 | | getbit then have similar form, for symmetry.) |
56 | | |
57 | | * Write an itch function. Or perhaps get rid of tp parameter since the huge |
58 | | pp area is allocated locally anyway? |
59 | | |
60 | | * Choose window size without looping. (Superoptimize or think(tm).) |
61 | | |
62 | | * Handle small bases with initial, reduction-free exponentiation. |
63 | | |
64 | | * Call new division functions, not mpn_tdiv_qr. |
65 | | |
66 | | * Consider special code for one-limb M. |
67 | | |
68 | | * How should we handle the redc1/redc2/redc_n choice? |
69 | | - redc1: T(binvert_1limb) + e * (n) * (T(mullo-1x1) + n*T(addmul_1)) |
70 | | - redc2: T(binvert_2limbs) + e * (n/2) * (T(mullo-2x2) + n*T(addmul_2)) |
71 | | - redc_n: T(binvert_nlimbs) + e * (T(mullo-nxn) + T(M(n))) |
72 | | This disregards the addmul_N constant term, but we could think of |
73 | | that as part of the respective mullo. |
74 | | |
75 | | * When U (the base) is small, we should start the exponentiation with plain |
76 | | operations, then convert that partial result to REDC form. |
77 | | |
78 | | * When U is just one limb, should it be handled without the k-ary tricks? |
79 | | We could keep a factor of B^n in W, but use U' = BU as base. After |
80 | | multiplying by this (pseudo two-limb) number, we need to multiply by 1/B |
81 | | mod M. |
82 | | */ |
83 | | |
84 | | #include "gmp-impl.h" |
85 | | #include "longlong.h" |
86 | | |
87 | | #undef MPN_REDC_0 |
88 | | #define MPN_REDC_0(r0, u1, u0, m0, invm) \ |
89 | 648k | do { \ |
90 | 648k | mp_limb_t _p1, _u1, _u0, _m0, _r0, _dummy; \ |
91 | 648k | _u0 = (u0); \ |
92 | 648k | _m0 = (m0); \ |
93 | 648k | umul_ppmm (_p1, _dummy, _m0, (_u0 * (invm)) & GMP_NUMB_MASK); \ |
94 | 648k | ASSERT (((_u0 - _dummy) & GMP_NUMB_MASK) == 0); \ |
95 | 648k | _u1 = (u1); \ |
96 | 648k | _r0 = _u1 - _p1; \ |
97 | 648k | _r0 = _u1 < _p1 ? _r0 + _m0 : _r0; /* _u1 < _r0 */ \ |
98 | 648k | (r0) = _r0 & GMP_NUMB_MASK; \ |
99 | 648k | } while (0) |
100 | | |
101 | | #undef MPN_REDC_1 |
102 | | #if HAVE_NATIVE_mpn_sbpi1_bdiv_r |
103 | | #define MPN_REDC_1(rp, up, mp, n, invm) \ |
104 | | do { \ |
105 | | mp_limb_t cy; \ |
106 | | cy = mpn_sbpi1_bdiv_r (up, 2 * n, mp, n, invm); \ |
107 | | if (cy != 0) \ |
108 | | mpn_sub_n (rp, up + n, mp, n); \ |
109 | | else \ |
110 | | MPN_COPY (rp, up + n, n); \ |
111 | | } while (0) |
112 | | #else |
113 | | #define MPN_REDC_1(rp, up, mp, n, invm) \ |
114 | 1.77M | do { \ |
115 | 1.77M | mp_limb_t cy; \ |
116 | 1.77M | cy = mpn_redc_1 (rp, up, mp, n, invm); \ |
117 | 1.77M | if (cy != 0) \ |
118 | 1.77M | mpn_sub_n (rp, rp, mp, n); \ |
119 | 1.77M | } while (0) |
120 | | #endif |
121 | | |
122 | | #undef MPN_REDC_2 |
123 | | #define MPN_REDC_2(rp, up, mp, n, mip) \ |
124 | 6.24M | do { \ |
125 | 6.24M | mp_limb_t cy; \ |
126 | 6.24M | cy = mpn_redc_2 (rp, up, mp, n, mip); \ |
127 | 6.24M | if (cy != 0) \ |
128 | 6.24M | mpn_sub_n (rp, rp, mp, n); \ |
129 | 6.24M | } while (0) |
130 | | |
131 | | #if HAVE_NATIVE_mpn_addmul_2 || HAVE_NATIVE_mpn_redc_2 |
132 | | #define WANT_REDC_2 1 |
133 | | #endif |
134 | | |
135 | | #define getbit(p,bi) \ |
136 | 3.69M | ((p[(bi - 1) / GMP_LIMB_BITS] >> (bi - 1) % GMP_LIMB_BITS) & 1) |
137 | | |
138 | | static inline mp_limb_t |
139 | | getbits (const mp_limb_t *p, mp_bitcnt_t bi, int nbits) |
140 | 814k | { |
141 | 814k | int nbits_in_r; |
142 | 814k | mp_limb_t r; |
143 | 814k | mp_size_t i; |
144 | | |
145 | 814k | if (bi <= nbits) |
146 | 1.13k | { |
147 | 1.13k | return p[0] & (((mp_limb_t) 1 << bi) - 1); |
148 | 1.13k | } |
149 | 813k | else |
150 | 813k | { |
151 | 813k | bi -= nbits; /* bit index of low bit to extract */ |
152 | 813k | i = bi / GMP_NUMB_BITS; /* word index of low bit to extract */ |
153 | 813k | bi %= GMP_NUMB_BITS; /* bit index in low word */ |
154 | 813k | r = p[i] >> bi; /* extract (low) bits */ |
155 | 813k | nbits_in_r = GMP_NUMB_BITS - bi; /* number of bits now in r */ |
156 | 813k | if (nbits_in_r < nbits) /* did we get enough bits? */ |
157 | 75.2k | r += p[i + 1] << nbits_in_r; /* prepend bits from higher word */ |
158 | 813k | return r & (((mp_limb_t) 1 << nbits) - 1); |
159 | 813k | } |
160 | 814k | } |
161 | | |
162 | | static inline int |
163 | | win_size (mp_bitcnt_t eb) |
164 | 2.64k | { |
165 | 2.64k | int k; |
166 | 2.64k | static mp_bitcnt_t x[] = {7,25,81,241,673,1793,4609,11521,28161,~(mp_bitcnt_t)0}; |
167 | 16.5k | for (k = 0; eb > x[k++]; ) |
168 | 13.8k | ; |
169 | 2.64k | return k; |
170 | 2.64k | } |
171 | | |
172 | | /* Convert U to REDC form, U_r = B^n * U mod M */ |
173 | | static void |
174 | | redcify (mp_ptr rp, mp_srcptr up, mp_size_t un, mp_srcptr mp, mp_size_t n) |
175 | 2.80k | { |
176 | 2.80k | mp_ptr tp, qp; |
177 | 2.80k | TMP_DECL; |
178 | 2.80k | TMP_MARK; |
179 | | |
180 | 2.80k | TMP_ALLOC_LIMBS_2 (tp, un + n, qp, un + 1); |
181 | | |
182 | 2.80k | MPN_ZERO (tp, n); |
183 | 2.80k | MPN_COPY (tp + n, up, un); |
184 | 2.80k | mpn_tdiv_qr (qp, rp, 0L, tp, un + n, mp, n); |
185 | 2.80k | TMP_FREE; |
186 | 2.80k | } |
187 | | |
188 | | #if ! HAVE_NATIVE_mpn_rsblsh1_n_ip2 |
189 | | #undef mpn_rsblsh1_n_ip2 |
190 | | #if HAVE_NATIVE_mpn_rsblsh1_n |
191 | 56.6k | #define mpn_rsblsh1_n_ip2(a,b,n) mpn_rsblsh1_n(a,b,a,n) |
192 | | #else |
193 | | #define mpn_rsblsh1_n_ip2(a,b,n) \ |
194 | | do \ |
195 | | { \ |
196 | | mpn_lshift (a, a, n, 1); \ |
197 | | mpn_sub_n (a, a, b, n); \ |
198 | | } while (0) |
199 | | #endif |
200 | | #endif |
201 | | |
202 | | #define INNERLOOP2 \ |
203 | 156 | do \ |
204 | 453k | { \ |
205 | 453k | MPN_SQR (tp, rp, n); \ |
206 | 453k | MPN_REDUCE (rp, tp, mp, n, mip); \ |
207 | 453k | if (mpn_cmp (rp, mp, n) >= 0) \ |
208 | 453k | ASSERT_NOCARRY (mpn_sub_n (rp, rp, mp, n)); \ |
209 | 453k | if (getbit (ep, ebi) != 0) \ |
210 | 453k | { \ |
211 | 209k | if (rp[n - 1] >> (mbi - 1) % GMP_LIMB_BITS == 0) \ |
212 | 209k | ASSERT_NOCARRY (mpn_lshift (rp, rp, n, 1)); \ |
213 | 209k | else \ |
214 | 209k | mpn_rsblsh1_n_ip2 (rp, mp, n); \ |
215 | 209k | } \ |
216 | 453k | } while (--ebi != 0) |
217 | | |
218 | | /* rp[n-1..0] = 2 ^ ep[en-1..0] mod mp[n-1..0] |
219 | | Requires that mp[n-1..0] is odd and > 1. |
220 | | Requires that ep[en-1..0] is > 1. |
221 | | Uses scratch space at tp of MAX(mpn_binvert_itch(n),2n) limbs. */ |
222 | | static void |
223 | | mpn_2powm (mp_ptr rp, mp_srcptr ep, mp_size_t en, |
224 | | mp_srcptr mp, mp_size_t n, mp_ptr tp) |
225 | 328 | { |
226 | 328 | mp_limb_t ip[2], *mip; |
227 | 328 | mp_bitcnt_t ebi, mbi, tbi; |
228 | 328 | mp_size_t tn; |
229 | 328 | int count; |
230 | 328 | TMP_DECL; |
231 | | |
232 | 328 | ASSERT (en > 1 || (en == 1 && ep[0] > 1)); |
233 | 328 | ASSERT (n > 0 && (mp[0] & 1) != 0); |
234 | | |
235 | 328 | MPN_SIZEINBASE_2EXP(ebi, ep, en, 1); |
236 | 328 | MPN_SIZEINBASE_2EXP(mbi, mp, n, 1); |
237 | | |
238 | 328 | if (LIKELY (mbi <= GMP_NUMB_MAX)) |
239 | 328 | { |
240 | 328 | count_leading_zeros(count, (mp_limb_t) mbi); |
241 | 328 | count = GMP_NUMB_BITS - (count - GMP_NAIL_BITS); |
242 | 328 | } |
243 | 0 | else |
244 | 0 | { |
245 | 0 | mp_bitcnt_t tc = mbi; |
246 | |
|
247 | 0 | count = 0; |
248 | 0 | do { ++count; } while ((tc >>= 1) != 0); |
249 | 0 | } |
250 | | |
251 | 328 | tbi = getbits (ep, ebi, count); |
252 | 328 | if (tbi >= mbi) |
253 | 232 | { |
254 | 232 | --count; |
255 | 232 | ASSERT ((tbi >> count) == 1); |
256 | 232 | tbi >>= 1; |
257 | 232 | ASSERT (tbi < mbi); |
258 | 232 | ASSERT (ebi > count); |
259 | 232 | } |
260 | 96 | else if (ebi <= count) |
261 | 3 | { |
262 | 3 | MPN_FILL (rp, n, 0); |
263 | 3 | rp[tbi / GMP_LIMB_BITS] = CNST_LIMB (1) << (tbi % GMP_LIMB_BITS); |
264 | 3 | return; |
265 | 3 | } |
266 | 325 | ebi -= count; |
267 | | |
268 | 325 | if (n == 1) |
269 | 169 | { |
270 | 169 | mp_limb_t r0, m0, invm; |
271 | 169 | m0 = *mp; |
272 | | |
273 | | /* redcify (rp, tp, tn + 1, mp, n); */ |
274 | | /* TODO: test direct use of udiv_qrnnd */ |
275 | 169 | ASSERT (tbi < GMP_LIMB_BITS); |
276 | 169 | tp[1] = CNST_LIMB (1) << tbi; |
277 | 169 | tp[0] = CNST_LIMB (0); |
278 | 169 | r0 = mpn_mod_1 (tp, 2, m0); |
279 | | |
280 | 169 | binvert_limb (invm, m0); |
281 | 169 | do |
282 | 328k | { |
283 | 328k | mp_limb_t t0, t1, t2; |
284 | | /* MPN_SQR (tp, rp, n); */ |
285 | 328k | umul_ppmm (t1, t0, r0, r0); |
286 | | /* MPN_REDUCE (rp, tp, mp, n, mip); */ |
287 | 328k | MPN_REDC_0(r0, t1, t0, m0, invm); |
288 | | |
289 | 328k | t2 = r0 << 1; |
290 | 328k | t2 = r0 > (m0 >> 1) ? t2 - m0 : t2; |
291 | 328k | r0 = getbit (ep, ebi) != 0 ? t2 : r0; |
292 | 328k | } while (--ebi != 0); |
293 | | |
294 | | /* tp[1] = 0; tp[0] = r0; */ |
295 | | /* MPN_REDUCE (rp, tp, mp, n, mip); */ |
296 | 169 | MPN_REDC_0(*rp, 0, r0, m0, invm); |
297 | | |
298 | 169 | return; |
299 | 169 | } |
300 | | |
301 | 156 | TMP_MARK; |
302 | | |
303 | 156 | #if WANT_REDC_2 |
304 | 156 | if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD)) |
305 | 112 | { |
306 | 112 | mip = ip; |
307 | 112 | binvert_limb (ip[0], mp[0]); |
308 | 112 | ip[0] = -ip[0]; |
309 | 112 | } |
310 | 44 | else if (BELOW_THRESHOLD (n, REDC_2_TO_REDC_N_THRESHOLD)) |
311 | 44 | { |
312 | 44 | mip = ip; |
313 | 44 | mpn_binvert (ip, mp, 2, tp); |
314 | 44 | ip[0] = -ip[0]; ip[1] = ~ip[1]; |
315 | 44 | } |
316 | | #else |
317 | | if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_N_THRESHOLD)) |
318 | | { |
319 | | mip = ip; |
320 | | binvert_limb (ip[0], mp[0]); |
321 | | ip[0] = -ip[0]; |
322 | | } |
323 | | #endif |
324 | 0 | else |
325 | 0 | { |
326 | 0 | mip = TMP_ALLOC_LIMBS (n); |
327 | 0 | mpn_binvert (mip, mp, n, tp); |
328 | 0 | } |
329 | | |
330 | 156 | tn = tbi / GMP_LIMB_BITS; |
331 | 156 | MPN_ZERO (tp, tn); |
332 | 156 | tp[tn] = CNST_LIMB (1) << (tbi % GMP_LIMB_BITS); |
333 | | |
334 | 156 | redcify (rp, tp, tn + 1, mp, n); |
335 | | |
336 | 156 | #if WANT_REDC_2 |
337 | 156 | if (REDC_1_TO_REDC_2_THRESHOLD < MUL_TOOM22_THRESHOLD) |
338 | 0 | { |
339 | 0 | if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD)) |
340 | 0 | { |
341 | 0 | if (REDC_1_TO_REDC_2_THRESHOLD < SQR_BASECASE_THRESHOLD |
342 | 0 | || BELOW_THRESHOLD (n, SQR_BASECASE_THRESHOLD)) |
343 | 0 | { |
344 | 0 | #undef MPN_SQR |
345 | 0 | #undef MPN_REDUCE |
346 | 0 | #define MPN_SQR(r,a,n) mpn_mul_basecase (r,a,n,a,n) |
347 | 0 | #define MPN_REDUCE(rp,tp,mp,n,mip) MPN_REDC_1 (rp, tp, mp, n, mip[0]) |
348 | 0 | INNERLOOP2; |
349 | 0 | } |
350 | 0 | else |
351 | 0 | { |
352 | 0 | #undef MPN_SQR |
353 | 0 | #undef MPN_REDUCE |
354 | 0 | #define MPN_SQR(r,a,n) mpn_sqr_basecase (r,a,n) |
355 | 0 | #define MPN_REDUCE(rp,tp,mp,n,mip) MPN_REDC_1 (rp, tp, mp, n, mip[0]) |
356 | 0 | INNERLOOP2; |
357 | 0 | } |
358 | 0 | } |
359 | 0 | else if (BELOW_THRESHOLD (n, MUL_TOOM22_THRESHOLD)) |
360 | 0 | { |
361 | 0 | if (MUL_TOOM22_THRESHOLD < SQR_BASECASE_THRESHOLD |
362 | 0 | || BELOW_THRESHOLD (n, SQR_BASECASE_THRESHOLD)) |
363 | 0 | { |
364 | 0 | #undef MPN_SQR |
365 | 0 | #undef MPN_REDUCE |
366 | 0 | #define MPN_SQR(r,a,n) mpn_mul_basecase (r,a,n,a,n) |
367 | 0 | #define MPN_REDUCE(rp,tp,mp,n,mip) MPN_REDC_2 (rp, tp, mp, n, mip) |
368 | 0 | INNERLOOP2; |
369 | 0 | } |
370 | 0 | else |
371 | 0 | { |
372 | 0 | #undef MPN_SQR |
373 | 0 | #undef MPN_REDUCE |
374 | 0 | #define MPN_SQR(r,a,n) mpn_sqr_basecase (r,a,n) |
375 | 0 | #define MPN_REDUCE(rp,tp,mp,n,mip) MPN_REDC_2 (rp, tp, mp, n, mip) |
376 | 0 | INNERLOOP2; |
377 | 0 | } |
378 | 0 | } |
379 | 0 | else if (BELOW_THRESHOLD (n, REDC_2_TO_REDC_N_THRESHOLD)) |
380 | 0 | { |
381 | 0 | #undef MPN_SQR |
382 | 0 | #undef MPN_REDUCE |
383 | 0 | #define MPN_SQR(r,a,n) mpn_sqr (r,a,n) |
384 | 0 | #define MPN_REDUCE(rp,tp,mp,n,mip) MPN_REDC_2 (rp, tp, mp, n, mip) |
385 | 0 | INNERLOOP2; |
386 | 0 | } |
387 | 0 | else |
388 | 0 | { |
389 | 0 | #undef MPN_SQR |
390 | 0 | #undef MPN_REDUCE |
391 | 0 | #define MPN_SQR(r,a,n) mpn_sqr (r,a,n) |
392 | 0 | #define MPN_REDUCE(rp,tp,mp,n,mip) mpn_redc_n (rp, tp, mp, n, mip) |
393 | 0 | INNERLOOP2; |
394 | 0 | } |
395 | 0 | } |
396 | 156 | else |
397 | 156 | { |
398 | 156 | if (BELOW_THRESHOLD (n, MUL_TOOM22_THRESHOLD)) |
399 | 59 | { |
400 | 59 | if (MUL_TOOM22_THRESHOLD < SQR_BASECASE_THRESHOLD |
401 | 59 | || BELOW_THRESHOLD (n, SQR_BASECASE_THRESHOLD)) |
402 | 0 | { |
403 | 0 | #undef MPN_SQR |
404 | 0 | #undef MPN_REDUCE |
405 | 0 | #define MPN_SQR(r,a,n) mpn_mul_basecase (r,a,n,a,n) |
406 | 0 | #define MPN_REDUCE(rp,tp,mp,n,mip) MPN_REDC_1 (rp, tp, mp, n, mip[0]) |
407 | 0 | INNERLOOP2; |
408 | 0 | } |
409 | 59 | else |
410 | 59 | { |
411 | 59 | #undef MPN_SQR |
412 | 59 | #undef MPN_REDUCE |
413 | 181k | #define MPN_SQR(r,a,n) mpn_sqr_basecase (r,a,n) |
414 | 181k | #define MPN_REDUCE(rp,tp,mp,n,mip) MPN_REDC_1 (rp, tp, mp, n, mip[0]) |
415 | 59 | INNERLOOP2; |
416 | 59 | } |
417 | 59 | } |
418 | 97 | else if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD)) |
419 | 53 | { |
420 | 53 | #undef MPN_SQR |
421 | 53 | #undef MPN_REDUCE |
422 | 160k | #define MPN_SQR(r,a,n) mpn_sqr (r,a,n) |
423 | 160k | #define MPN_REDUCE(rp,tp,mp,n,mip) MPN_REDC_1 (rp, tp, mp, n, mip[0]) |
424 | 53 | INNERLOOP2; |
425 | 53 | } |
426 | 44 | else if (BELOW_THRESHOLD (n, REDC_2_TO_REDC_N_THRESHOLD)) |
427 | 44 | { |
428 | 44 | #undef MPN_SQR |
429 | 44 | #undef MPN_REDUCE |
430 | 111k | #define MPN_SQR(r,a,n) mpn_sqr (r,a,n) |
431 | 111k | #define MPN_REDUCE(rp,tp,mp,n,mip) MPN_REDC_2 (rp, tp, mp, n, mip) |
432 | 44 | INNERLOOP2; |
433 | 44 | } |
434 | 0 | else |
435 | 0 | { |
436 | 0 | #undef MPN_SQR |
437 | 0 | #undef MPN_REDUCE |
438 | 0 | #define MPN_SQR(r,a,n) mpn_sqr (r,a,n) |
439 | 0 | #define MPN_REDUCE(rp,tp,mp,n,mip) mpn_redc_n (rp, tp, mp, n, mip) |
440 | 0 | INNERLOOP2; |
441 | 0 | } |
442 | 156 | } |
443 | | |
444 | | #else /* WANT_REDC_2 */ |
445 | | |
446 | | if (REDC_1_TO_REDC_N_THRESHOLD < MUL_TOOM22_THRESHOLD) |
447 | | { |
448 | | if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_N_THRESHOLD)) |
449 | | { |
450 | | if (REDC_1_TO_REDC_N_THRESHOLD < SQR_BASECASE_THRESHOLD |
451 | | || BELOW_THRESHOLD (n, SQR_BASECASE_THRESHOLD)) |
452 | | { |
453 | | #undef MPN_SQR |
454 | | #undef MPN_REDUCE |
455 | | #define MPN_SQR(r,a,n) mpn_mul_basecase (r,a,n,a,n) |
456 | | #define MPN_REDUCE(rp,tp,mp,n,mip) MPN_REDC_1 (rp, tp, mp, n, mip[0]) |
457 | | INNERLOOP2; |
458 | | } |
459 | | else |
460 | | { |
461 | | #undef MPN_SQR |
462 | | #undef MPN_REDUCE |
463 | | #define MPN_SQR(r,a,n) mpn_sqr_basecase (r,a,n) |
464 | | #define MPN_REDUCE(rp,tp,mp,n,mip) MPN_REDC_1 (rp, tp, mp, n, mip[0]) |
465 | | INNERLOOP2; |
466 | | } |
467 | | } |
468 | | else if (BELOW_THRESHOLD (n, MUL_TOOM22_THRESHOLD)) |
469 | | { |
470 | | if (MUL_TOOM22_THRESHOLD < SQR_BASECASE_THRESHOLD |
471 | | || BELOW_THRESHOLD (n, SQR_BASECASE_THRESHOLD)) |
472 | | { |
473 | | #undef MPN_SQR |
474 | | #undef MPN_REDUCE |
475 | | #define MPN_SQR(r,a,n) mpn_mul_basecase (r,a,n,a,n) |
476 | | #define MPN_REDUCE(rp,tp,mp,n,mip) mpn_redc_n (rp, tp, mp, n, mip) |
477 | | INNERLOOP2; |
478 | | } |
479 | | else |
480 | | { |
481 | | #undef MPN_SQR |
482 | | #undef MPN_REDUCE |
483 | | #define MPN_SQR(r,a,n) mpn_sqr_basecase (r,a,n) |
484 | | #define MPN_REDUCE(rp,tp,mp,n,mip) mpn_redc_n (rp, tp, mp, n, mip) |
485 | | INNERLOOP2; |
486 | | } |
487 | | } |
488 | | else |
489 | | { |
490 | | #undef MPN_SQR |
491 | | #undef MPN_REDUCE |
492 | | #define MPN_SQR(r,a,n) mpn_sqr (r,a,n) |
493 | | #define MPN_REDUCE(rp,tp,mp,n,mip) mpn_redc_n (rp, tp, mp, n, mip) |
494 | | INNERLOOP2; |
495 | | } |
496 | | } |
497 | | else |
498 | | { |
499 | | if (BELOW_THRESHOLD (n, MUL_TOOM22_THRESHOLD)) |
500 | | { |
501 | | if (MUL_TOOM22_THRESHOLD < SQR_BASECASE_THRESHOLD |
502 | | || BELOW_THRESHOLD (n, SQR_BASECASE_THRESHOLD)) |
503 | | { |
504 | | #undef MPN_SQR |
505 | | #undef MPN_REDUCE |
506 | | #define MPN_SQR(r,a,n) mpn_mul_basecase (r,a,n,a,n) |
507 | | #define MPN_REDUCE(rp,tp,mp,n,mip) MPN_REDC_1 (rp, tp, mp, n, mip[0]) |
508 | | INNERLOOP2; |
509 | | } |
510 | | else |
511 | | { |
512 | | #undef MPN_SQR |
513 | | #undef MPN_REDUCE |
514 | | #define MPN_SQR(r,a,n) mpn_sqr_basecase (r,a,n) |
515 | | #define MPN_REDUCE(rp,tp,mp,n,mip) MPN_REDC_1 (rp, tp, mp, n, mip[0]) |
516 | | INNERLOOP2; |
517 | | } |
518 | | } |
519 | | else if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_N_THRESHOLD)) |
520 | | { |
521 | | #undef MPN_SQR |
522 | | #undef MPN_REDUCE |
523 | | #define MPN_SQR(r,a,n) mpn_sqr (r,a,n) |
524 | | #define MPN_REDUCE(rp,tp,mp,n,mip) MPN_REDC_1 (rp, tp, mp, n, mip[0]) |
525 | | INNERLOOP2; |
526 | | } |
527 | | else |
528 | | { |
529 | | #undef MPN_SQR |
530 | | #undef MPN_REDUCE |
531 | | #define MPN_SQR(r,a,n) mpn_sqr (r,a,n) |
532 | | #define MPN_REDUCE(rp,tp,mp,n,mip) mpn_redc_n (rp, tp, mp, n, mip) |
533 | | INNERLOOP2; |
534 | | } |
535 | | } |
536 | | #endif /* WANT_REDC_2 */ |
537 | | |
538 | 156 | MPN_COPY (tp, rp, n); |
539 | 156 | MPN_FILL (tp + n, n, 0); |
540 | | |
541 | 156 | #if WANT_REDC_2 |
542 | 156 | if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD)) |
543 | 112 | MPN_REDC_1 (rp, tp, mp, n, ip[0]); |
544 | 44 | else if (BELOW_THRESHOLD (n, REDC_2_TO_REDC_N_THRESHOLD)) |
545 | 44 | MPN_REDC_2 (rp, tp, mp, n, mip); |
546 | | #else |
547 | | if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_N_THRESHOLD)) |
548 | | MPN_REDC_1 (rp, tp, mp, n, ip[0]); |
549 | | #endif |
550 | 0 | else |
551 | 0 | mpn_redc_n (rp, tp, mp, n, mip); |
552 | | |
553 | 156 | if (mpn_cmp (rp, mp, n) >= 0) |
554 | 0 | mpn_sub_n (rp, rp, mp, n); |
555 | | |
556 | 156 | TMP_FREE; |
557 | 156 | } |
558 | | |
559 | | /* rp[n-1..0] = bp[bn-1..0] ^ ep[en-1..0] mod mp[n-1..0] |
560 | | Requires that mp[n-1..0] is odd. |
561 | | Requires that ep[en-1..0] is > 1. |
562 | | Uses scratch space at tp of MAX(mpn_binvert_itch(n),2n) limbs. */ |
563 | | void |
564 | | mpn_powm (mp_ptr rp, mp_srcptr bp, mp_size_t bn, |
565 | | mp_srcptr ep, mp_size_t en, |
566 | | mp_srcptr mp, mp_size_t n, mp_ptr tp) |
567 | 2.97k | { |
568 | 2.97k | mp_limb_t ip[2], *mip; |
569 | 2.97k | int cnt; |
570 | 2.97k | mp_bitcnt_t ebi; |
571 | 2.97k | int windowsize, this_windowsize; |
572 | 2.97k | mp_limb_t expbits; |
573 | 2.97k | mp_ptr pp, this_pp; |
574 | 2.97k | long i; |
575 | 2.97k | TMP_DECL; |
576 | | |
577 | 2.97k | ASSERT (en > 1 || (en == 1 && ep[0] > 1)); |
578 | 2.97k | ASSERT (n >= 1 && ((mp[0] & 1) != 0)); |
579 | | |
580 | 2.97k | if (bn == 1 && bp[0] == 2) |
581 | 328 | { |
582 | 328 | mpn_2powm (rp, ep, en, mp, n, tp); |
583 | 328 | return; |
584 | 328 | } |
585 | | |
586 | 2.64k | TMP_MARK; |
587 | | |
588 | 2.64k | MPN_SIZEINBASE_2EXP(ebi, ep, en, 1); |
589 | | |
590 | | #if 0 |
591 | | if (bn < n) |
592 | | { |
593 | | /* Do the first few exponent bits without mod reductions, |
594 | | until the result is greater than the mod argument. */ |
595 | | for (;;) |
596 | | { |
597 | | mpn_sqr (tp, this_pp, tn); |
598 | | tn = tn * 2 - 1, tn += tp[tn] != 0; |
599 | | if (getbit (ep, ebi) != 0) |
600 | | mpn_mul (..., tp, tn, bp, bn); |
601 | | ebi--; |
602 | | } |
603 | | } |
604 | | #endif |
605 | | |
606 | 2.64k | windowsize = win_size (ebi); |
607 | | |
608 | 2.64k | #if WANT_REDC_2 |
609 | 2.64k | if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD)) |
610 | 611 | { |
611 | 611 | mip = ip; |
612 | 611 | binvert_limb (mip[0], mp[0]); |
613 | 611 | mip[0] = -mip[0]; |
614 | 611 | } |
615 | 2.03k | else if (BELOW_THRESHOLD (n, REDC_2_TO_REDC_N_THRESHOLD)) |
616 | 2.03k | { |
617 | 2.03k | mip = ip; |
618 | 2.03k | mpn_binvert (mip, mp, 2, tp); |
619 | 2.03k | mip[0] = -mip[0]; mip[1] = ~mip[1]; |
620 | 2.03k | } |
621 | | #else |
622 | | if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_N_THRESHOLD)) |
623 | | { |
624 | | mip = ip; |
625 | | binvert_limb (mip[0], mp[0]); |
626 | | mip[0] = -mip[0]; |
627 | | } |
628 | | #endif |
629 | 0 | else |
630 | 0 | { |
631 | 0 | mip = TMP_ALLOC_LIMBS (n); |
632 | 0 | mpn_binvert (mip, mp, n, tp); |
633 | 0 | } |
634 | | |
635 | 2.64k | pp = TMP_ALLOC_LIMBS (n << (windowsize - 1)); |
636 | | |
637 | 2.64k | this_pp = pp; |
638 | 2.64k | redcify (this_pp, bp, bn, mp, n); |
639 | | |
640 | | /* Store b^2 at rp. */ |
641 | 2.64k | mpn_sqr (tp, this_pp, n); |
642 | | #if 0 |
643 | | if (n == 1) { |
644 | | MPN_REDC_0 (rp[0], tp[1], tp[0], mp[0], -mip[0]); |
645 | | } else |
646 | | #endif |
647 | 2.64k | #if WANT_REDC_2 |
648 | 2.64k | if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD)) |
649 | 611 | MPN_REDC_1 (rp, tp, mp, n, mip[0]); |
650 | 2.03k | else if (BELOW_THRESHOLD (n, REDC_2_TO_REDC_N_THRESHOLD)) |
651 | 2.03k | MPN_REDC_2 (rp, tp, mp, n, mip); |
652 | | #else |
653 | | if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_N_THRESHOLD)) |
654 | | MPN_REDC_1 (rp, tp, mp, n, mip[0]); |
655 | | #endif |
656 | 0 | else |
657 | 0 | mpn_redc_n (rp, tp, mp, n, mip); |
658 | | |
659 | | /* Precompute odd powers of b and put them in the temporary area at pp. */ |
660 | 136k | for (i = (1 << (windowsize - 1)) - 1; i > 0; i--) |
661 | 134k | #if 1 |
662 | 134k | if (n == 1) { |
663 | 5.38k | umul_ppmm((tp)[1], *(tp), *(this_pp), *(rp)); |
664 | 5.38k | ++this_pp ; |
665 | 5.38k | MPN_REDC_0 (*this_pp, tp[1], tp[0], *mp, -mip[0]); |
666 | 5.38k | } else |
667 | 128k | #endif |
668 | 128k | { |
669 | 128k | mpn_mul_n (tp, this_pp, rp, n); |
670 | 128k | this_pp += n; |
671 | 128k | #if WANT_REDC_2 |
672 | 128k | if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD)) |
673 | 24.0k | MPN_REDC_1 (this_pp, tp, mp, n, mip[0]); |
674 | 104k | else if (BELOW_THRESHOLD (n, REDC_2_TO_REDC_N_THRESHOLD)) |
675 | 104k | MPN_REDC_2 (this_pp, tp, mp, n, mip); |
676 | | #else |
677 | | if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_N_THRESHOLD)) |
678 | | MPN_REDC_1 (this_pp, tp, mp, n, mip[0]); |
679 | | #endif |
680 | 0 | else |
681 | 0 | mpn_redc_n (this_pp, tp, mp, n, mip); |
682 | 128k | } |
683 | | |
684 | 2.64k | expbits = getbits (ep, ebi, windowsize); |
685 | 2.64k | ebi -= windowsize; |
686 | | |
687 | | /* THINK: Should we initialise the case expbits % 4 == 0 with a mul? */ |
688 | 2.64k | count_trailing_zeros (cnt, expbits); |
689 | 2.64k | ebi += cnt; |
690 | 2.64k | expbits >>= cnt; |
691 | | |
692 | 2.64k | MPN_COPY (rp, pp + n * (expbits >> 1), n); |
693 | | |
694 | 2.64k | #define INNERLOOP \ |
695 | 814k | while (ebi != 0) \ |
696 | 813k | { \ |
697 | 2.90M | while (getbit (ep, ebi) == 0) \ |
698 | 2.09M | { \ |
699 | 2.09M | MPN_SQR (tp, rp, n); \ |
700 | 2.09M | MPN_REDUCE (rp, tp, mp, n, mip); \ |
701 | 2.09M | if (--ebi == 0) \ |
702 | 2.09M | goto done; \ |
703 | 2.09M | } \ |
704 | 813k | \ |
705 | | /* The next bit of the exponent is 1. Now extract the largest \ |
706 | | block of bits <= windowsize, and such that the least \ |
707 | | significant bit is 1. */ \ |
708 | 813k | \ |
709 | 813k | expbits = getbits (ep, ebi, windowsize); \ |
710 | 811k | this_windowsize = MIN (ebi, windowsize); \ |
711 | 811k | \ |
712 | 811k | count_trailing_zeros (cnt, expbits); \ |
713 | 811k | this_windowsize -= cnt; \ |
714 | 811k | ebi -= this_windowsize; \ |
715 | 811k | expbits >>= cnt; \ |
716 | 811k | \ |
717 | 811k | do \ |
718 | 4.84M | { \ |
719 | 4.84M | MPN_SQR (tp, rp, n); \ |
720 | 4.84M | MPN_REDUCE (rp, tp, mp, n, mip); \ |
721 | 4.84M | } \ |
722 | 4.84M | while (--this_windowsize != 0); \ |
723 | 811k | \ |
724 | 811k | MPN_MUL_N (tp, rp, pp + n * (expbits >> 1), n); \ |
725 | 811k | MPN_REDUCE (rp, tp, mp, n, mip); \ |
726 | 811k | } |
727 | | |
728 | | |
729 | 2.64k | if (n == 1) |
730 | 126 | { |
731 | 126 | #undef MPN_MUL_N |
732 | 126 | #undef MPN_SQR |
733 | 126 | #undef MPN_REDUCE |
734 | 30.7k | #define MPN_MUL_N(r,a,b,n) umul_ppmm((r)[1], *(r), *(a), *(b)) |
735 | 283k | #define MPN_SQR(r,a,n) umul_ppmm((r)[1], *(r), *(a), *(a)) |
736 | 313k | #define MPN_REDUCE(rp,tp,mp,n,mip) MPN_REDC_0(*(rp), (tp)[1], (tp)[0], *(mp), - *(mip)) |
737 | 126 | INNERLOOP; |
738 | 42 | } |
739 | 2.52k | else |
740 | 2.52k | #if WANT_REDC_2 |
741 | 2.52k | if (REDC_1_TO_REDC_2_THRESHOLD < MUL_TOOM22_THRESHOLD) |
742 | 0 | { |
743 | 0 | if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD)) |
744 | 0 | { |
745 | 0 | if (REDC_1_TO_REDC_2_THRESHOLD < SQR_BASECASE_THRESHOLD |
746 | 0 | || BELOW_THRESHOLD (n, SQR_BASECASE_THRESHOLD)) |
747 | 0 | { |
748 | 0 | #undef MPN_MUL_N |
749 | 0 | #undef MPN_SQR |
750 | 0 | #undef MPN_REDUCE |
751 | 0 | #define MPN_MUL_N(r,a,b,n) mpn_mul_basecase (r,a,n,b,n) |
752 | 0 | #define MPN_SQR(r,a,n) mpn_mul_basecase (r,a,n,a,n) |
753 | 0 | #define MPN_REDUCE(rp,tp,mp,n,mip) MPN_REDC_1 (rp, tp, mp, n, mip[0]) |
754 | 0 | INNERLOOP; |
755 | 0 | } |
756 | 0 | else |
757 | 0 | { |
758 | 0 | #undef MPN_MUL_N |
759 | 0 | #undef MPN_SQR |
760 | 0 | #undef MPN_REDUCE |
761 | 0 | #define MPN_MUL_N(r,a,b,n) mpn_mul_basecase (r,a,n,b,n) |
762 | 0 | #define MPN_SQR(r,a,n) mpn_sqr_basecase (r,a,n) |
763 | 0 | #define MPN_REDUCE(rp,tp,mp,n,mip) MPN_REDC_1 (rp, tp, mp, n, mip[0]) |
764 | 0 | INNERLOOP; |
765 | 0 | } |
766 | 0 | } |
767 | 0 | else if (BELOW_THRESHOLD (n, MUL_TOOM22_THRESHOLD)) |
768 | 0 | { |
769 | 0 | if (MUL_TOOM22_THRESHOLD < SQR_BASECASE_THRESHOLD |
770 | 0 | || BELOW_THRESHOLD (n, SQR_BASECASE_THRESHOLD)) |
771 | 0 | { |
772 | 0 | #undef MPN_MUL_N |
773 | 0 | #undef MPN_SQR |
774 | 0 | #undef MPN_REDUCE |
775 | 0 | #define MPN_MUL_N(r,a,b,n) mpn_mul_basecase (r,a,n,b,n) |
776 | 0 | #define MPN_SQR(r,a,n) mpn_mul_basecase (r,a,n,a,n) |
777 | 0 | #define MPN_REDUCE(rp,tp,mp,n,mip) MPN_REDC_2 (rp, tp, mp, n, mip) |
778 | 0 | INNERLOOP; |
779 | 0 | } |
780 | 0 | else |
781 | 0 | { |
782 | 0 | #undef MPN_MUL_N |
783 | 0 | #undef MPN_SQR |
784 | 0 | #undef MPN_REDUCE |
785 | 0 | #define MPN_MUL_N(r,a,b,n) mpn_mul_basecase (r,a,n,b,n) |
786 | 0 | #define MPN_SQR(r,a,n) mpn_sqr_basecase (r,a,n) |
787 | 0 | #define MPN_REDUCE(rp,tp,mp,n,mip) MPN_REDC_2 (rp, tp, mp, n, mip) |
788 | 0 | INNERLOOP; |
789 | 0 | } |
790 | 0 | } |
791 | 0 | else if (BELOW_THRESHOLD (n, REDC_2_TO_REDC_N_THRESHOLD)) |
792 | 0 | { |
793 | 0 | #undef MPN_MUL_N |
794 | 0 | #undef MPN_SQR |
795 | 0 | #undef MPN_REDUCE |
796 | 0 | #define MPN_MUL_N(r,a,b,n) mpn_mul_n (r,a,b,n) |
797 | 0 | #define MPN_SQR(r,a,n) mpn_sqr (r,a,n) |
798 | 0 | #define MPN_REDUCE(rp,tp,mp,n,mip) MPN_REDC_2 (rp, tp, mp, n, mip) |
799 | 0 | INNERLOOP; |
800 | 0 | } |
801 | 0 | else |
802 | 0 | { |
803 | 0 | #undef MPN_MUL_N |
804 | 0 | #undef MPN_SQR |
805 | 0 | #undef MPN_REDUCE |
806 | 0 | #define MPN_MUL_N(r,a,b,n) mpn_mul_n (r,a,b,n) |
807 | 0 | #define MPN_SQR(r,a,n) mpn_sqr (r,a,n) |
808 | 0 | #define MPN_REDUCE(rp,tp,mp,n,mip) mpn_redc_n (rp, tp, mp, n, mip) |
809 | 0 | INNERLOOP; |
810 | 0 | } |
811 | 0 | } |
812 | 2.52k | else |
813 | 2.52k | { |
814 | 2.52k | if (BELOW_THRESHOLD (n, MUL_TOOM22_THRESHOLD)) |
815 | 252 | { |
816 | 252 | if (MUL_TOOM22_THRESHOLD < SQR_BASECASE_THRESHOLD |
817 | 252 | || BELOW_THRESHOLD (n, SQR_BASECASE_THRESHOLD)) |
818 | 0 | { |
819 | 0 | #undef MPN_MUL_N |
820 | 0 | #undef MPN_SQR |
821 | 0 | #undef MPN_REDUCE |
822 | 0 | #define MPN_MUL_N(r,a,b,n) mpn_mul_basecase (r,a,n,b,n) |
823 | 0 | #define MPN_SQR(r,a,n) mpn_mul_basecase (r,a,n,a,n) |
824 | 0 | #define MPN_REDUCE(rp,tp,mp,n,mip) MPN_REDC_1 (rp, tp, mp, n, mip[0]) |
825 | 0 | INNERLOOP; |
826 | 0 | } |
827 | 252 | else |
828 | 252 | { |
829 | 252 | #undef MPN_MUL_N |
830 | 252 | #undef MPN_SQR |
831 | 252 | #undef MPN_REDUCE |
832 | 65.3k | #define MPN_MUL_N(r,a,b,n) mpn_mul_basecase (r,a,n,b,n) |
833 | 568k | #define MPN_SQR(r,a,n) mpn_sqr_basecase (r,a,n) |
834 | 633k | #define MPN_REDUCE(rp,tp,mp,n,mip) MPN_REDC_1 (rp, tp, mp, n, mip[0]) |
835 | 252 | INNERLOOP; |
836 | 54 | } |
837 | 252 | } |
838 | 2.27k | else if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD)) |
839 | 233 | { |
840 | 233 | #undef MPN_MUL_N |
841 | 233 | #undef MPN_SQR |
842 | 233 | #undef MPN_REDUCE |
843 | 81.2k | #define MPN_MUL_N(r,a,b,n) mpn_mul_n (r,a,b,n) |
844 | 695k | #define MPN_SQR(r,a,n) mpn_sqr (r,a,n) |
845 | 776k | #define MPN_REDUCE(rp,tp,mp,n,mip) MPN_REDC_1 (rp, tp, mp, n, mip[0]) |
846 | 233 | INNERLOOP; |
847 | 43 | } |
848 | 2.03k | else if (BELOW_THRESHOLD (n, REDC_2_TO_REDC_N_THRESHOLD)) |
849 | 2.03k | { |
850 | 2.03k | #undef MPN_MUL_N |
851 | 2.03k | #undef MPN_SQR |
852 | 2.03k | #undef MPN_REDUCE |
853 | 634k | #define MPN_MUL_N(r,a,b,n) mpn_mul_n (r,a,b,n) |
854 | 5.39M | #define MPN_SQR(r,a,n) mpn_sqr (r,a,n) |
855 | 6.02M | #define MPN_REDUCE(rp,tp,mp,n,mip) MPN_REDC_2 (rp, tp, mp, n, mip) |
856 | 2.03k | INNERLOOP; |
857 | 685 | } |
858 | 0 | else |
859 | 0 | { |
860 | 0 | #undef MPN_MUL_N |
861 | 0 | #undef MPN_SQR |
862 | 0 | #undef MPN_REDUCE |
863 | 0 | #define MPN_MUL_N(r,a,b,n) mpn_mul_n (r,a,b,n) |
864 | 0 | #define MPN_SQR(r,a,n) mpn_sqr (r,a,n) |
865 | 0 | #define MPN_REDUCE(rp,tp,mp,n,mip) mpn_redc_n (rp, tp, mp, n, mip) |
866 | 0 | INNERLOOP; |
867 | 0 | } |
868 | 2.52k | } |
869 | | |
870 | | #else /* WANT_REDC_2 */ |
871 | | |
872 | | if (REDC_1_TO_REDC_N_THRESHOLD < MUL_TOOM22_THRESHOLD) |
873 | | { |
874 | | if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_N_THRESHOLD)) |
875 | | { |
876 | | if (REDC_1_TO_REDC_N_THRESHOLD < SQR_BASECASE_THRESHOLD |
877 | | || BELOW_THRESHOLD (n, SQR_BASECASE_THRESHOLD)) |
878 | | { |
879 | | #undef MPN_MUL_N |
880 | | #undef MPN_SQR |
881 | | #undef MPN_REDUCE |
882 | | #define MPN_MUL_N(r,a,b,n) mpn_mul_basecase (r,a,n,b,n) |
883 | | #define MPN_SQR(r,a,n) mpn_mul_basecase (r,a,n,a,n) |
884 | | #define MPN_REDUCE(rp,tp,mp,n,mip) MPN_REDC_1 (rp, tp, mp, n, mip[0]) |
885 | | INNERLOOP; |
886 | | } |
887 | | else |
888 | | { |
889 | | #undef MPN_MUL_N |
890 | | #undef MPN_SQR |
891 | | #undef MPN_REDUCE |
892 | | #define MPN_MUL_N(r,a,b,n) mpn_mul_basecase (r,a,n,b,n) |
893 | | #define MPN_SQR(r,a,n) mpn_sqr_basecase (r,a,n) |
894 | | #define MPN_REDUCE(rp,tp,mp,n,mip) MPN_REDC_1 (rp, tp, mp, n, mip[0]) |
895 | | INNERLOOP; |
896 | | } |
897 | | } |
898 | | else if (BELOW_THRESHOLD (n, MUL_TOOM22_THRESHOLD)) |
899 | | { |
900 | | if (MUL_TOOM22_THRESHOLD < SQR_BASECASE_THRESHOLD |
901 | | || BELOW_THRESHOLD (n, SQR_BASECASE_THRESHOLD)) |
902 | | { |
903 | | #undef MPN_MUL_N |
904 | | #undef MPN_SQR |
905 | | #undef MPN_REDUCE |
906 | | #define MPN_MUL_N(r,a,b,n) mpn_mul_basecase (r,a,n,b,n) |
907 | | #define MPN_SQR(r,a,n) mpn_mul_basecase (r,a,n,a,n) |
908 | | #define MPN_REDUCE(rp,tp,mp,n,mip) mpn_redc_n (rp, tp, mp, n, mip) |
909 | | INNERLOOP; |
910 | | } |
911 | | else |
912 | | { |
913 | | #undef MPN_MUL_N |
914 | | #undef MPN_SQR |
915 | | #undef MPN_REDUCE |
916 | | #define MPN_MUL_N(r,a,b,n) mpn_mul_basecase (r,a,n,b,n) |
917 | | #define MPN_SQR(r,a,n) mpn_sqr_basecase (r,a,n) |
918 | | #define MPN_REDUCE(rp,tp,mp,n,mip) mpn_redc_n (rp, tp, mp, n, mip) |
919 | | INNERLOOP; |
920 | | } |
921 | | } |
922 | | else |
923 | | { |
924 | | #undef MPN_MUL_N |
925 | | #undef MPN_SQR |
926 | | #undef MPN_REDUCE |
927 | | #define MPN_MUL_N(r,a,b,n) mpn_mul_n (r,a,b,n) |
928 | | #define MPN_SQR(r,a,n) mpn_sqr (r,a,n) |
929 | | #define MPN_REDUCE(rp,tp,mp,n,mip) mpn_redc_n (rp, tp, mp, n, mip) |
930 | | INNERLOOP; |
931 | | } |
932 | | } |
933 | | else |
934 | | { |
935 | | if (BELOW_THRESHOLD (n, MUL_TOOM22_THRESHOLD)) |
936 | | { |
937 | | if (MUL_TOOM22_THRESHOLD < SQR_BASECASE_THRESHOLD |
938 | | || BELOW_THRESHOLD (n, SQR_BASECASE_THRESHOLD)) |
939 | | { |
940 | | #undef MPN_MUL_N |
941 | | #undef MPN_SQR |
942 | | #undef MPN_REDUCE |
943 | | #define MPN_MUL_N(r,a,b,n) mpn_mul_basecase (r,a,n,b,n) |
944 | | #define MPN_SQR(r,a,n) mpn_mul_basecase (r,a,n,a,n) |
945 | | #define MPN_REDUCE(rp,tp,mp,n,mip) MPN_REDC_1 (rp, tp, mp, n, mip[0]) |
946 | | INNERLOOP; |
947 | | } |
948 | | else |
949 | | { |
950 | | #undef MPN_MUL_N |
951 | | #undef MPN_SQR |
952 | | #undef MPN_REDUCE |
953 | | #define MPN_MUL_N(r,a,b,n) mpn_mul_basecase (r,a,n,b,n) |
954 | | #define MPN_SQR(r,a,n) mpn_sqr_basecase (r,a,n) |
955 | | #define MPN_REDUCE(rp,tp,mp,n,mip) MPN_REDC_1 (rp, tp, mp, n, mip[0]) |
956 | | INNERLOOP; |
957 | | } |
958 | | } |
959 | | else if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_N_THRESHOLD)) |
960 | | { |
961 | | #undef MPN_MUL_N |
962 | | #undef MPN_SQR |
963 | | #undef MPN_REDUCE |
964 | | #define MPN_MUL_N(r,a,b,n) mpn_mul_n (r,a,b,n) |
965 | | #define MPN_SQR(r,a,n) mpn_sqr (r,a,n) |
966 | | #define MPN_REDUCE(rp,tp,mp,n,mip) MPN_REDC_1 (rp, tp, mp, n, mip[0]) |
967 | | INNERLOOP; |
968 | | } |
969 | | else |
970 | | { |
971 | | #undef MPN_MUL_N |
972 | | #undef MPN_SQR |
973 | | #undef MPN_REDUCE |
974 | | #define MPN_MUL_N(r,a,b,n) mpn_mul_n (r,a,b,n) |
975 | | #define MPN_SQR(r,a,n) mpn_sqr (r,a,n) |
976 | | #define MPN_REDUCE(rp,tp,mp,n,mip) mpn_redc_n (rp, tp, mp, n, mip) |
977 | | INNERLOOP; |
978 | | } |
979 | | } |
980 | | #endif /* WANT_REDC_2 */ |
981 | | |
982 | 2.64k | done: |
983 | | |
984 | 2.64k | MPN_COPY (tp, rp, n); |
985 | 2.64k | MPN_ZERO (tp + n, n); |
986 | | |
987 | 2.64k | #if WANT_REDC_2 |
988 | 2.64k | if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD)) |
989 | 611 | MPN_REDC_1 (rp, tp, mp, n, mip[0]); |
990 | 2.03k | else if (BELOW_THRESHOLD (n, REDC_2_TO_REDC_N_THRESHOLD)) |
991 | 2.03k | MPN_REDC_2 (rp, tp, mp, n, mip); |
992 | | #else |
993 | | if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_N_THRESHOLD)) |
994 | | MPN_REDC_1 (rp, tp, mp, n, mip[0]); |
995 | | #endif |
996 | 0 | else |
997 | 0 | mpn_redc_n (rp, tp, mp, n, mip); |
998 | | |
999 | 2.64k | if (mpn_cmp (rp, mp, n) >= 0) |
1000 | 32 | mpn_sub_n (rp, rp, mp, n); |
1001 | | |
1002 | 2.64k | TMP_FREE; |
1003 | 2.64k | } |