/src/gmp/mpn/mulmod_bknp1.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* Mulptiplication mod B^n+1, for small operands. |
2 | | |
3 | | Contributed to the GNU project by Marco Bodrato. |
4 | | |
5 | | THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES. IT IS ONLY |
6 | | SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST |
7 | | GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE. |
8 | | |
9 | | Copyright 2020-2022 Free Software Foundation, Inc. |
10 | | |
11 | | This file is part of the GNU MP Library. |
12 | | |
13 | | The GNU MP Library is free software; you can redistribute it and/or modify |
14 | | it under the terms of either: |
15 | | |
16 | | * the GNU Lesser General Public License as published by the Free |
17 | | Software Foundation; either version 3 of the License, or (at your |
18 | | option) any later version. |
19 | | |
20 | | or |
21 | | |
22 | | * the GNU General Public License as published by the Free Software |
23 | | Foundation; either version 2 of the License, or (at your option) any |
24 | | later version. |
25 | | |
26 | | or both in parallel, as here. |
27 | | |
28 | | The GNU MP Library is distributed in the hope that it will be useful, but |
29 | | WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY |
30 | | or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
31 | | for more details. |
32 | | |
33 | | You should have received copies of the GNU General Public License and the |
34 | | GNU Lesser General Public License along with the GNU MP Library. If not, |
35 | | see https://www.gnu.org/licenses/. */ |
36 | | |
37 | | #include "gmp-impl.h" |
38 | | |
39 | | #ifndef MOD_BKNP1_USE11 |
40 | | #define MOD_BKNP1_USE11 ((GMP_NUMB_BITS % 8 != 0) && (GMP_NUMB_BITS % 2 == 0)) |
41 | | #endif |
42 | | #ifndef MOD_BKNP1_ONLY3 |
43 | | #define MOD_BKNP1_ONLY3 0 |
44 | | #endif |
45 | | |
46 | | /* {rp, (k - 1) * n} = {op, k * n + 1} % (B^{k*n}+1) / (B^n+1) */ |
47 | | static void |
48 | | _mpn_modbknp1dbnp1_n (mp_ptr rp, mp_srcptr op, mp_size_t n, unsigned k) |
49 | 0 | { |
50 | 0 | mp_limb_t hl; |
51 | 0 | mp_srcptr hp; |
52 | 0 | unsigned i; |
53 | |
|
54 | | #if MOD_BKNP1_ONLY3 |
55 | | ASSERT (k == 3); |
56 | | k = 3; |
57 | | #endif |
58 | 0 | ASSERT (k > 2); |
59 | 0 | ASSERT (k % 2 == 1); |
60 | |
|
61 | 0 | --k; |
62 | |
|
63 | 0 | rp += k * n; |
64 | 0 | op += k * n; |
65 | 0 | hp = op; |
66 | 0 | hl = hp[n]; /* initial op[k*n]. */ |
67 | 0 | ASSERT (hl < GMP_NUMB_MAX - 1); |
68 | |
|
69 | 0 | #if MOD_BKNP1_ONLY3 == 0 |
70 | | /* The first MPN_INCR_U (rp + n, 1, cy); in the loop should be |
71 | | rp[n] = cy; */ |
72 | 0 | *rp = 0; |
73 | 0 | #endif |
74 | |
|
75 | 0 | i = k >> 1; |
76 | 0 | do |
77 | 0 | { |
78 | 0 | mp_limb_t cy, bw; |
79 | 0 | rp -= n; |
80 | 0 | op -= n; |
81 | 0 | cy = hl + mpn_add_n (rp, op, hp, n); |
82 | | #if MOD_BKNP1_ONLY3 |
83 | | rp[n] = cy; |
84 | | #else |
85 | 0 | MPN_INCR_U (rp + n, (k - i * 2) * n + 1, cy); |
86 | 0 | #endif |
87 | 0 | rp -= n; |
88 | 0 | op -= n; |
89 | 0 | bw = hl + mpn_sub_n (rp, op, hp, n); |
90 | 0 | MPN_DECR_U (rp + n, (k - i * 2 + 1) * n + 1, bw); |
91 | 0 | } |
92 | 0 | while (--i != 0); |
93 | |
|
94 | 0 | for (; (hl = *(rp += k * n)) != 0; ) /* Should run only once... */ |
95 | 0 | { |
96 | 0 | *rp = 0; |
97 | 0 | i = k >> 1; |
98 | 0 | do |
99 | 0 | { |
100 | 0 | rp -= n; |
101 | 0 | MPN_INCR_U (rp, (k - i * 2 + 1) * n + 1, hl); |
102 | 0 | rp -= n; |
103 | 0 | MPN_DECR_U (rp, (k - i * 2 + 2) * n + 1, hl); |
104 | 0 | } |
105 | 0 | while (--i != 0); |
106 | 0 | } |
107 | 0 | } |
108 | | |
109 | | static void |
110 | | _mpn_modbnp1_pn_ip (mp_ptr r, mp_size_t n, mp_limb_t h) |
111 | 0 | { |
112 | 0 | ASSERT (r[n] == h); |
113 | | |
114 | | /* Fully normalise */ |
115 | 0 | MPN_DECR_U (r, n + 1, h); |
116 | 0 | h -= r[n]; |
117 | 0 | r[n] = 0; |
118 | 0 | MPN_INCR_U (r, n + 1, h); |
119 | 0 | } |
120 | | |
121 | | static void |
122 | | _mpn_modbnp1_neg_ip (mp_ptr r, mp_size_t n, mp_limb_t h) |
123 | 0 | { |
124 | 0 | r[n] = 0; |
125 | 0 | MPN_INCR_U (r, n + 1, -h); |
126 | 0 | if (UNLIKELY (r[n] != 0)) |
127 | 0 | _mpn_modbnp1_pn_ip (r, n, 1); |
128 | 0 | } |
129 | | |
130 | | static void |
131 | | _mpn_modbnp1_nc_ip (mp_ptr r, mp_size_t n, mp_limb_t h) |
132 | 0 | { |
133 | 0 | if (h & GMP_NUMB_HIGHBIT) /* This means h < 0 */ |
134 | 0 | { |
135 | 0 | _mpn_modbnp1_neg_ip (r, n, h); |
136 | 0 | } |
137 | 0 | else |
138 | 0 | { |
139 | 0 | r[n] = h; |
140 | 0 | if (h) |
141 | 0 | _mpn_modbnp1_pn_ip(r, n, h); |
142 | 0 | } |
143 | 0 | } |
144 | | |
145 | | /* {rp, rn + 1} = {op, on} mod (B^{rn}+1) */ |
146 | | /* Used when rn < on < 2*rn. */ |
147 | | static void |
148 | | _mpn_modbnp1 (mp_ptr rp, mp_size_t rn, mp_srcptr op, mp_size_t on) |
149 | 0 | { |
150 | 0 | mp_limb_t bw; |
151 | |
|
152 | | #if 0 |
153 | | if (UNLIKELY (on <= rn)) |
154 | | { |
155 | | MPN_COPY (rp, op, on); |
156 | | MPN_ZERO (rp + on, rn - on); |
157 | | return; |
158 | | } |
159 | | #endif |
160 | |
|
161 | 0 | ASSERT (on > rn); |
162 | 0 | ASSERT (on <= 2 * rn); |
163 | |
|
164 | 0 | bw = mpn_sub (rp, op, rn, op + rn, on - rn); |
165 | 0 | rp[rn] = 0; |
166 | 0 | MPN_INCR_U (rp, rn + 1, bw); |
167 | 0 | } |
168 | | |
169 | | /* {rp, rn + 1} = {op, k * rn + 1} % (B^{rn}+1) */ |
170 | | /* With odd k >= 3. */ |
171 | | static void |
172 | | _mpn_modbnp1_kn (mp_ptr rp, mp_srcptr op, mp_size_t rn, unsigned k) |
173 | 0 | { |
174 | 0 | mp_limb_t cy; |
175 | |
|
176 | | #if MOD_BKNP1_ONLY3 |
177 | | ASSERT (k == 3); |
178 | | k = 3; |
179 | | #endif |
180 | 0 | ASSERT (k & 1); |
181 | 0 | k >>= 1; |
182 | 0 | ASSERT (0 < k && k < GMP_NUMB_HIGHBIT - 3); |
183 | 0 | ASSERT (op[(1 + 2 * k) * rn] < GMP_NUMB_HIGHBIT - 2 - k); |
184 | |
|
185 | 0 | cy = - mpn_sub_n (rp, op, op + rn, rn); |
186 | 0 | for (;;) { |
187 | 0 | op += 2 * rn; |
188 | 0 | cy += mpn_add_n (rp, rp, op, rn); |
189 | 0 | if (--k == 0) |
190 | 0 | break; |
191 | 0 | cy -= mpn_sub_n (rp, rp, op + rn, rn); |
192 | 0 | }; |
193 | |
|
194 | 0 | cy += op[rn]; |
195 | 0 | _mpn_modbnp1_nc_ip (rp, rn, cy); |
196 | 0 | } |
197 | | |
198 | | /* For the various mpn_divexact_byN here, fall back to using either |
199 | | mpn_pi1_bdiv_q_1 or mpn_divexact_1. The former has less overhead and is |
200 | | faster if it is native. For now, since mpn_divexact_1 is native on |
201 | | platforms where mpn_pi1_bdiv_q_1 does not yet exist, do not use |
202 | | mpn_pi1_bdiv_q_1 unconditionally. FIXME. */ |
203 | | |
204 | | #ifndef mpn_divexact_by5 |
205 | | #if HAVE_NATIVE_mpn_pi1_bdiv_q_1 |
206 | | #define BINVERT_5 \ |
207 | | ((((GMP_NUMB_MAX >> (GMP_NUMB_BITS % 4)) / 5 * 3 << 3) + 5) & GMP_NUMB_MAX) |
208 | | #define mpn_divexact_by5(dst,src,size) mpn_pi1_bdiv_q_1(dst,src,size,5,BINVERT_5,0) |
209 | | #else |
210 | | #define mpn_divexact_by5(dst,src,size) mpn_divexact_1(dst,src,size,5) |
211 | | #endif |
212 | | #endif |
213 | | |
214 | | #ifndef mpn_divexact_by7 |
215 | | #if HAVE_NATIVE_mpn_pi1_bdiv_q_1 |
216 | | #define BINVERT_7 \ |
217 | 0 | ((((GMP_NUMB_MAX >> (GMP_NUMB_BITS % 3)) / 7 * 3 << 4) + 7) & GMP_NUMB_MAX) |
218 | 0 | #define mpn_divexact_by7(dst,src,size) mpn_pi1_bdiv_q_1(dst,src,size,7,BINVERT_7,0) |
219 | | #else |
220 | | #define mpn_divexact_by7(dst,src,size) mpn_divexact_1(dst,src,size,7) |
221 | | #endif |
222 | | #endif |
223 | | |
224 | | #ifndef mpn_divexact_by11 |
225 | | #if HAVE_NATIVE_mpn_pi1_bdiv_q_1 |
226 | | #define BINVERT_11 \ |
227 | | ((((GMP_NUMB_MAX >> (GMP_NUMB_BITS % 10)) / 11 << 5) + 3) & GMP_NUMB_MAX) |
228 | | #define mpn_divexact_by11(dst,src,size) mpn_pi1_bdiv_q_1(dst,src,size,11,BINVERT_11,0) |
229 | | #else |
230 | | #define mpn_divexact_by11(dst,src,size) mpn_divexact_1(dst,src,size,11) |
231 | | #endif |
232 | | #endif |
233 | | |
234 | | #ifndef mpn_divexact_by13 |
235 | | #if HAVE_NATIVE_mpn_pi1_bdiv_q_1 |
236 | | #define BINVERT_13 \ |
237 | 0 | ((((GMP_NUMB_MAX >> (GMP_NUMB_BITS % 12)) / 13 * 3 << 14) + 3781) & GMP_NUMB_MAX) |
238 | 0 | #define mpn_divexact_by13(dst,src,size) mpn_pi1_bdiv_q_1(dst,src,size,13,BINVERT_13,0) |
239 | | #else |
240 | | #define mpn_divexact_by13(dst,src,size) mpn_divexact_1(dst,src,size,13) |
241 | | #endif |
242 | | #endif |
243 | | |
244 | | #ifndef mpn_divexact_by17 |
245 | | #if HAVE_NATIVE_mpn_pi1_bdiv_q_1 |
246 | | #define BINVERT_17 \ |
247 | | ((((GMP_NUMB_MAX >> (GMP_NUMB_BITS % 8)) / 17 * 15 << 7) + 113) & GMP_NUMB_MAX) |
248 | | #define mpn_divexact_by17(dst,src,size) mpn_pi1_bdiv_q_1(dst,src,size,17,BINVERT_17,0) |
249 | | #else |
250 | | #define mpn_divexact_by17(dst,src,size) mpn_divexact_1(dst,src,size,17) |
251 | | #endif |
252 | | #endif |
253 | | |
254 | | /* Thanks to Chinese remainder theorem, store |
255 | | in {rp, k*n+1} the value mod (B^(k*n)+1), given |
256 | | {ap, k*n+1} mod ((B^(k*n)+1)/(B^n+1)) and |
257 | | {bp, n+1} mod (B^n+1) . |
258 | | {tp, n+1} is a scratch area. |
259 | | tp == rp or rp == ap are possible. |
260 | | */ |
261 | | static void |
262 | | _mpn_crt (mp_ptr rp, mp_srcptr ap, mp_srcptr bp, |
263 | | mp_size_t n, unsigned k, mp_ptr tp) |
264 | 0 | { |
265 | 0 | mp_limb_t mod; |
266 | 0 | unsigned i; |
267 | |
|
268 | | #if MOD_BKNP1_ONLY3 |
269 | | ASSERT (k == 3); |
270 | | k = 3; |
271 | | #endif |
272 | 0 | _mpn_modbnp1_kn (tp, ap, n, k); |
273 | 0 | if (mpn_sub_n (tp, bp, tp, n + 1)) |
274 | 0 | _mpn_modbnp1_neg_ip (tp, n, tp[n]); |
275 | |
|
276 | | #if MOD_BKNP1_USE11 |
277 | | if (UNLIKELY (k == 11)) |
278 | | { |
279 | | ASSERT (GMP_NUMB_BITS % 2 == 0); |
280 | | /* mod <- -Mod(B^n+1,11)^-1 */ |
281 | | mod = n * (GMP_NUMB_BITS % 5) % 5; |
282 | | if ((mod > 2) || UNLIKELY (mod == 0)) |
283 | | mod += 5; |
284 | | |
285 | | mod *= mpn_mod_1 (tp, n + 1, 11); |
286 | | } |
287 | | else |
288 | | #endif |
289 | 0 | { |
290 | 0 | #if GMP_NUMB_BITS % 8 == 0 |
291 | | /* (2^6 - 1) | (2^{GMP_NUMB_BITS*3/4} - 1) */ |
292 | | /* (2^6 - 1) = 3^2 * 7 */ |
293 | 0 | mod = mpn_mod_34lsub1 (tp, n + 1); |
294 | 0 | ASSERT ((GMP_NUMB_MAX >> (GMP_NUMB_BITS >> 2)) % k == 0); |
295 | | /* (2^12 - 1) = 3^2 * 5 * 7 * 13 */ |
296 | | /* (2^24 - 1) = 3^2 * 5 * 7 * 13 * 17 * 241 */ |
297 | 0 | ASSERT (k == 3 || k == 5 || k == 7 || k == 13 || k == 17); |
298 | |
|
299 | 0 | #if GMP_NUMB_BITS % 3 != 0 |
300 | 0 | if (UNLIKELY (k != 3)) |
301 | 0 | { |
302 | 0 | ASSERT ((GMP_NUMB_MAX % k == 0) || (n % 3 != 0)); |
303 | 0 | if ((GMP_NUMB_BITS % 16 == 0) && LIKELY (k == 5)) |
304 | 0 | mod <<= 1; /* k >> 1 = 1 << 1 */ |
305 | 0 | else if ((GMP_NUMB_BITS % 16 != 0) || LIKELY (k == 7)) |
306 | 0 | mod <<= (n << (GMP_NUMB_BITS % 3 >> 1)) % 3; |
307 | 0 | else if ((GMP_NUMB_BITS % 32 != 0) || LIKELY (k == 13)) |
308 | 0 | mod *= ((n << (GMP_NUMB_BITS % 3 >> 1)) % 3 == 1) ? 3 : 9; |
309 | 0 | else /* k == 17 */ |
310 | 0 | mod <<= 3; /* k >> 1 = 1 << 3 */ |
311 | | #if 0 |
312 | | if ((GMP_NUMB_BITS == 8) /* && (k == 7) */ || |
313 | | (GMP_NUMB_BITS == 16) && (k == 13)) |
314 | | mod = ((mod & (GMP_NUMB_MAX >> (GMP_NUMB_BITS >> 2))) + |
315 | | (mod >> (3 * GMP_NUMB_BITS >> 2))); |
316 | | #endif |
317 | 0 | } |
318 | | #else |
319 | | ASSERT (GMP_NUMB_MAX % k == 0); |
320 | | /* 2^{GMP_NUMB_BITS} - 1 = 0 (mod k) */ |
321 | | /* 2^{GMP_NUMB_BITS} = 1 (mod k) */ |
322 | | /* 2^{n*GMP_NUMB_BITS} + 1 = 2 (mod k) */ |
323 | | /* -2^{-1} = k >> 1 (mod k) */ |
324 | | mod *= k >> 1; |
325 | | #endif |
326 | | #else |
327 | | ASSERT_ALWAYS (k == 0); /* Not implemented, should not be used. */ |
328 | | #endif |
329 | 0 | } |
330 | |
|
331 | 0 | MPN_INCR_U (tp, n + 1, mod); |
332 | 0 | tp[n] += mod; |
333 | |
|
334 | 0 | if (LIKELY (k == 3)) |
335 | 0 | ASSERT_NOCARRY (mpn_divexact_by3 (tp, tp, n + 1)); |
336 | 0 | else if ((GMP_NUMB_BITS % 16 == 0) && LIKELY (k == 5)) |
337 | 0 | mpn_divexact_by5 (tp, tp, n + 1); |
338 | 0 | else if (((! MOD_BKNP1_USE11) && (GMP_NUMB_BITS % 16 != 0)) |
339 | 0 | || LIKELY (k == 7)) |
340 | 0 | mpn_divexact_by7 (tp, tp, n + 1); |
341 | | #if MOD_BKNP1_USE11 |
342 | | else if (k == 11) |
343 | | mpn_divexact_by11 (tp, tp, n + 1); |
344 | | #endif |
345 | 0 | else if ((GMP_NUMB_BITS % 32 != 0) || LIKELY (k == 13)) |
346 | 0 | mpn_divexact_by13 (tp, tp, n + 1); |
347 | 0 | else /* (k == 17) */ |
348 | 0 | mpn_divexact_by17 (tp, tp, n + 1); |
349 | |
|
350 | 0 | rp += k * n; |
351 | 0 | ap += k * n; /* tp - 1 */ |
352 | |
|
353 | 0 | rp -= n; |
354 | 0 | ap -= n; |
355 | 0 | ASSERT_NOCARRY (mpn_add_n (rp, ap, tp, n + 1)); |
356 | |
|
357 | 0 | i = k >> 1; |
358 | 0 | do |
359 | 0 | { |
360 | 0 | mp_limb_t cy, bw; |
361 | 0 | rp -= n; |
362 | 0 | ap -= n; |
363 | 0 | bw = mpn_sub_n (rp, ap, tp, n) + tp[n]; |
364 | 0 | MPN_DECR_U (rp + n, (k - i * 2) * n + 1, bw); |
365 | 0 | rp -= n; |
366 | 0 | ap -= n; |
367 | 0 | cy = mpn_add_n (rp, ap, tp, n) + tp[n]; |
368 | 0 | MPN_INCR_U (rp + n, (k - i * 2 + 1) * n + 1, cy); |
369 | 0 | } |
370 | 0 | while (--i != 0); |
371 | | |
372 | | /* if (LIKELY (rp[k * n])) */ |
373 | 0 | _mpn_modbnp1_pn_ip (rp, k * n, rp[k * n]); |
374 | 0 | } |
375 | | |
376 | | |
377 | | static void |
378 | | _mpn_mulmod_bnp1_tp (mp_ptr rp, mp_srcptr ap, mp_srcptr bp, mp_size_t rn, |
379 | | mp_ptr tp) |
380 | 0 | { |
381 | 0 | mp_limb_t cy; |
382 | 0 | unsigned k; |
383 | |
|
384 | 0 | ASSERT (0 < rn); |
385 | 0 | ASSERT ((ap[rn] | bp[rn]) <= 1); |
386 | |
|
387 | 0 | if (UNLIKELY (ap[rn] | bp[rn])) |
388 | 0 | { |
389 | 0 | if (ap[rn]) |
390 | 0 | cy = bp[rn] + mpn_neg (rp, bp, rn); |
391 | 0 | else /* ap[rn] == 0 */ |
392 | 0 | cy = mpn_neg (rp, ap, rn); |
393 | 0 | } |
394 | 0 | else if (MPN_MULMOD_BKNP1_USABLE(rn, k, MUL_FFT_MODF_THRESHOLD / 3)) |
395 | 0 | { |
396 | 0 | rn /= k; |
397 | 0 | mpn_mulmod_bknp1 (rp, ap, bp, rn, k, tp); |
398 | 0 | return; |
399 | 0 | } |
400 | 0 | else |
401 | 0 | { |
402 | 0 | mpn_mul_n (tp, ap, bp, rn); |
403 | 0 | cy = mpn_sub_n (rp, tp, tp + rn, rn); |
404 | 0 | } |
405 | 0 | rp[rn] = 0; |
406 | 0 | MPN_INCR_U (rp, rn + 1, cy); |
407 | 0 | } |
408 | | |
409 | | /* {rp, kn + 1} = {ap, kn + 1} * {bp, kn + 1} % (B^kn + 1) */ |
410 | | /* tp must point to at least 4*(k-1)*n+1 limbs*/ |
411 | | void |
412 | | mpn_mulmod_bknp1 (mp_ptr rp, mp_srcptr ap, mp_srcptr bp, |
413 | | mp_size_t n, unsigned k, mp_ptr tp) |
414 | 0 | { |
415 | 0 | mp_ptr hp; |
416 | |
|
417 | | #if MOD_BKNP1_ONLY3 |
418 | | ASSERT (k == 3); |
419 | | k = 3; |
420 | | #endif |
421 | 0 | ASSERT (k > 2); |
422 | 0 | ASSERT (k % 2 == 1); |
423 | | |
424 | | /* a % (B^{nn}+1)/(B^{nn/k}+1) */ |
425 | 0 | _mpn_modbknp1dbnp1_n (tp + (k - 1) * n * 2, ap, n, k); |
426 | | /* b % (B^{nn}+1)/(B^{nn/k}+1) */ |
427 | 0 | _mpn_modbknp1dbnp1_n (tp + (k - 1) * n * 3, bp, n, k); |
428 | 0 | mpn_mul_n (tp, tp + (k - 1) * n * 2, tp + (k - 1) * n * 3, (k - 1) * n); |
429 | 0 | _mpn_modbnp1 (tp, k * n, tp, (k - 1) * n * 2); |
430 | |
|
431 | 0 | hp = tp + k * n + 1; |
432 | | /* a % (B^{nn/k}+1) */ |
433 | 0 | ASSERT (ap[k * n] <= 1); |
434 | 0 | _mpn_modbnp1_kn (hp, ap, n, k); |
435 | | /* b % (B^{nn/k}+1) */ |
436 | 0 | ASSERT (bp[k * n] <= 1); |
437 | 0 | _mpn_modbnp1_kn (hp + n + 1, bp, n, k); |
438 | 0 | _mpn_mulmod_bnp1_tp (hp + (n + 1) * 2, hp, hp + n + 1, n, hp + (n + 1) * 2); |
439 | |
|
440 | 0 | _mpn_crt (rp, tp, hp + (n + 1) * 2, n, k, hp); |
441 | 0 | } |
442 | | |
443 | | |
444 | | static void |
445 | | _mpn_sqrmod_bnp1_tp (mp_ptr rp, mp_srcptr ap, mp_size_t rn, |
446 | | mp_ptr tp) |
447 | 0 | { |
448 | 0 | mp_limb_t cy; |
449 | 0 | unsigned k; |
450 | |
|
451 | 0 | ASSERT (0 < rn); |
452 | |
|
453 | 0 | if (UNLIKELY (ap[rn])) |
454 | 0 | { |
455 | 0 | ASSERT (ap[rn] == 1); |
456 | 0 | *rp = 1; |
457 | 0 | MPN_FILL (rp + 1, rn, 0); |
458 | 0 | return; |
459 | 0 | } |
460 | 0 | else if (MPN_SQRMOD_BKNP1_USABLE(rn, k, MUL_FFT_MODF_THRESHOLD / 3)) |
461 | 0 | { |
462 | 0 | rn /= k; |
463 | 0 | mpn_sqrmod_bknp1 (rp, ap, rn, k, tp); |
464 | 0 | return; |
465 | 0 | } |
466 | 0 | else |
467 | 0 | { |
468 | 0 | mpn_sqr (tp, ap, rn); |
469 | 0 | cy = mpn_sub_n (rp, tp, tp + rn, rn); |
470 | 0 | } |
471 | 0 | rp[rn] = 0; |
472 | 0 | MPN_INCR_U (rp, rn + 1, cy); |
473 | 0 | } |
474 | | |
475 | | /* {rp, kn + 1} = {ap, kn + 1}^2 % (B^kn + 1) */ |
476 | | /* tp must point to at least 3*(k-1)*n+1 limbs*/ |
477 | | void |
478 | | mpn_sqrmod_bknp1 (mp_ptr rp, mp_srcptr ap, |
479 | | mp_size_t n, unsigned k, mp_ptr tp) |
480 | 0 | { |
481 | 0 | mp_ptr hp; |
482 | |
|
483 | | #if MOD_BKNP1_ONLY3 |
484 | | ASSERT (k == 3); |
485 | | k = 3; |
486 | | #endif |
487 | 0 | ASSERT (k > 2); |
488 | 0 | ASSERT (k % 2 == 1); |
489 | | |
490 | | /* a % (B^{nn}+1)/(B^{nn/k}+1) */ |
491 | 0 | _mpn_modbknp1dbnp1_n (tp + (k - 1) * n * 2, ap, n, k); |
492 | 0 | mpn_sqr (tp, tp + (k - 1) * n * 2, (k - 1) * n); |
493 | 0 | _mpn_modbnp1 (tp, k * n, tp, (k - 1) * n * 2); |
494 | |
|
495 | 0 | hp = tp + k * n + 1; |
496 | | /* a % (B^{nn/k}+1) */ |
497 | 0 | ASSERT (ap[k * n] <= 1); |
498 | 0 | _mpn_modbnp1_kn (hp, ap, n, k); |
499 | 0 | _mpn_sqrmod_bnp1_tp (hp + (n + 1), hp, n, hp + (n + 1)); |
500 | |
|
501 | 0 | _mpn_crt (rp, tp, hp + (n + 1), n, k, hp); |
502 | 0 | } |