/src/gmp/mpn/mulmod_bknp1.c
Line  | Count  | Source (jump to first uncovered line)  | 
1  |  | /* Mulptiplication mod B^n+1, for small operands.  | 
2  |  |  | 
3  |  |    Contributed to the GNU project by Marco Bodrato.  | 
4  |  |  | 
5  |  |    THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY  | 
6  |  |    SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST  | 
7  |  |    GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.  | 
8  |  |  | 
9  |  | Copyright 2020-2022 Free Software Foundation, Inc.  | 
10  |  |  | 
11  |  | This file is part of the GNU MP Library.  | 
12  |  |  | 
13  |  | The GNU MP Library is free software; you can redistribute it and/or modify  | 
14  |  | it under the terms of either:  | 
15  |  |  | 
16  |  |   * the GNU Lesser General Public License as published by the Free  | 
17  |  |     Software Foundation; either version 3 of the License, or (at your  | 
18  |  |     option) any later version.  | 
19  |  |  | 
20  |  | or  | 
21  |  |  | 
22  |  |   * the GNU General Public License as published by the Free Software  | 
23  |  |     Foundation; either version 2 of the License, or (at your option) any  | 
24  |  |     later version.  | 
25  |  |  | 
26  |  | or both in parallel, as here.  | 
27  |  |  | 
28  |  | The GNU MP Library is distributed in the hope that it will be useful, but  | 
29  |  | WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY  | 
30  |  | or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License  | 
31  |  | for more details.  | 
32  |  |  | 
33  |  | You should have received copies of the GNU General Public License and the  | 
34  |  | GNU Lesser General Public License along with the GNU MP Library.  If not,  | 
35  |  | see https://www.gnu.org/licenses/.  */  | 
36  |  |  | 
37  |  | #include "gmp-impl.h"  | 
38  |  |  | 
39  |  | #ifndef MOD_BKNP1_USE11  | 
40  |  | #define MOD_BKNP1_USE11 ((GMP_NUMB_BITS % 8 != 0) && (GMP_NUMB_BITS % 2 == 0))  | 
41  |  | #endif  | 
42  |  | #ifndef MOD_BKNP1_ONLY3  | 
43  |  | #define MOD_BKNP1_ONLY3 0  | 
44  |  | #endif  | 
45  |  |  | 
46  |  | /* {rp, (k - 1) * n} = {op, k * n + 1} % (B^{k*n}+1) / (B^n+1) */ | 
47  |  | static void  | 
48  |  | _mpn_modbknp1dbnp1_n (mp_ptr rp, mp_srcptr op, mp_size_t n, unsigned k)  | 
49  | 0  | { | 
50  | 0  |   mp_limb_t hl;  | 
51  | 0  |   mp_srcptr hp;  | 
52  | 0  |   unsigned i;  | 
53  |  | 
  | 
54  |  | #if MOD_BKNP1_ONLY3  | 
55  |  |   ASSERT (k == 3);  | 
56  |  |   k = 3;  | 
57  |  | #endif  | 
58  | 0  |   ASSERT (k > 2);  | 
59  | 0  |   ASSERT (k % 2 == 1);  | 
60  |  | 
  | 
61  | 0  |   --k;  | 
62  |  | 
  | 
63  | 0  |   rp += k * n;  | 
64  | 0  |   op += k * n;  | 
65  | 0  |   hp = op;  | 
66  | 0  |   hl = hp[n]; /* initial op[k*n]. */  | 
67  | 0  |   ASSERT (hl < GMP_NUMB_MAX - 1);  | 
68  |  | 
  | 
69  | 0  | #if MOD_BKNP1_ONLY3 == 0  | 
70  |  |   /* The first MPN_INCR_U (rp + n, 1, cy); in the loop should be  | 
71  |  |      rp[n] = cy;            */  | 
72  | 0  |   *rp = 0;  | 
73  | 0  | #endif  | 
74  |  | 
  | 
75  | 0  |   i = k >> 1;  | 
76  | 0  |   do  | 
77  | 0  |    { | 
78  | 0  |      mp_limb_t cy, bw;  | 
79  | 0  |      rp -= n;  | 
80  | 0  |      op -= n;  | 
81  | 0  |      cy = hl + mpn_add_n (rp, op, hp, n);  | 
82  |  | #if MOD_BKNP1_ONLY3  | 
83  |  |      rp[n] = cy;  | 
84  |  | #else  | 
85  | 0  |      MPN_INCR_U (rp + n, (k - i * 2) * n + 1, cy);  | 
86  | 0  | #endif  | 
87  | 0  |      rp -= n;  | 
88  | 0  |      op -= n;  | 
89  | 0  |      bw = hl + mpn_sub_n (rp, op, hp, n);  | 
90  | 0  |      MPN_DECR_U (rp + n, (k - i * 2 + 1) * n + 1, bw);  | 
91  | 0  |    }  | 
92  | 0  |   while (--i != 0);  | 
93  |  | 
  | 
94  | 0  |   for (; (hl = *(rp += k * n)) != 0; ) /* Should run only once... */  | 
95  | 0  |     { | 
96  | 0  |       *rp = 0;  | 
97  | 0  |       i = k >> 1;  | 
98  | 0  |       do  | 
99  | 0  |   { | 
100  | 0  |     rp -= n;  | 
101  | 0  |     MPN_INCR_U (rp, (k - i * 2 + 1) * n + 1, hl);  | 
102  | 0  |     rp -= n;  | 
103  | 0  |     MPN_DECR_U (rp, (k - i * 2 + 2) * n + 1, hl);  | 
104  | 0  |   }  | 
105  | 0  |       while (--i != 0);  | 
106  | 0  |     }  | 
107  | 0  | }  | 
108  |  |  | 
109  |  | static void  | 
110  |  | _mpn_modbnp1_pn_ip (mp_ptr r, mp_size_t n, mp_limb_t h)  | 
111  | 0  | { | 
112  | 0  |   ASSERT (r[n] == h);  | 
113  |  |  | 
114  |  |   /* Fully normalise */  | 
115  | 0  |   MPN_DECR_U (r, n + 1, h);  | 
116  | 0  |   h -= r[n];  | 
117  | 0  |   r[n] = 0;  | 
118  | 0  |   MPN_INCR_U (r, n + 1, h);  | 
119  | 0  | }  | 
120  |  |  | 
121  |  | static void  | 
122  |  | _mpn_modbnp1_neg_ip (mp_ptr r, mp_size_t n, mp_limb_t h)  | 
123  | 0  | { | 
124  | 0  |   r[n] = 0;  | 
125  | 0  |   MPN_INCR_U (r, n + 1, -h);  | 
126  | 0  |   if (UNLIKELY (r[n] != 0))  | 
127  | 0  |     _mpn_modbnp1_pn_ip (r, n, 1);  | 
128  | 0  | }  | 
129  |  |  | 
130  |  | static void  | 
131  |  | _mpn_modbnp1_nc_ip (mp_ptr r, mp_size_t n, mp_limb_t h)  | 
132  | 0  | { | 
133  | 0  |   if (h & GMP_NUMB_HIGHBIT) /* This means h < 0 */  | 
134  | 0  |     { | 
135  | 0  |       _mpn_modbnp1_neg_ip (r, n, h);  | 
136  | 0  |     }  | 
137  | 0  |   else  | 
138  | 0  |     { | 
139  | 0  |       r[n] = h;  | 
140  | 0  |       if (h)  | 
141  | 0  |   _mpn_modbnp1_pn_ip(r, n, h);  | 
142  | 0  |     }  | 
143  | 0  | }  | 
144  |  |  | 
145  |  | /* {rp, rn + 1} = {op, on} mod (B^{rn}+1) */ | 
146  |  | /* Used when rn < on < 2*rn. */  | 
147  |  | static void  | 
148  |  | _mpn_modbnp1 (mp_ptr rp, mp_size_t rn, mp_srcptr op, mp_size_t on)  | 
149  | 0  | { | 
150  | 0  |   mp_limb_t bw;  | 
151  |  | 
  | 
152  |  | #if 0  | 
153  |  |   if (UNLIKELY (on <= rn))  | 
154  |  |     { | 
155  |  |       MPN_COPY (rp, op, on);  | 
156  |  |       MPN_ZERO (rp + on, rn - on);  | 
157  |  |       return;  | 
158  |  |     }  | 
159  |  | #endif  | 
160  |  | 
  | 
161  | 0  |   ASSERT (on > rn);  | 
162  | 0  |   ASSERT (on <= 2 * rn);  | 
163  |  | 
  | 
164  | 0  |   bw = mpn_sub (rp, op, rn, op + rn, on - rn);  | 
165  | 0  |   rp[rn] = 0;  | 
166  | 0  |   MPN_INCR_U (rp, rn + 1, bw);  | 
167  | 0  | }  | 
168  |  |  | 
169  |  | /* {rp, rn + 1} = {op, k * rn + 1} % (B^{rn}+1) */ | 
170  |  | /* With odd k >= 3. */  | 
171  |  | static void  | 
172  |  | _mpn_modbnp1_kn (mp_ptr rp, mp_srcptr op, mp_size_t rn, unsigned k)  | 
173  | 0  | { | 
174  | 0  |   mp_limb_t cy;  | 
175  |  | 
  | 
176  |  | #if MOD_BKNP1_ONLY3  | 
177  |  |   ASSERT (k == 3);  | 
178  |  |   k = 3;  | 
179  |  | #endif  | 
180  | 0  |   ASSERT (k & 1);  | 
181  | 0  |   k >>= 1;  | 
182  | 0  |   ASSERT (0 < k && k < GMP_NUMB_HIGHBIT - 3);  | 
183  | 0  |   ASSERT (op[(1 + 2 * k) * rn] < GMP_NUMB_HIGHBIT - 2 - k);  | 
184  |  | 
  | 
185  | 0  |   cy = - mpn_sub_n (rp, op, op + rn, rn);  | 
186  | 0  |   for (;;) { | 
187  | 0  |     op += 2 * rn;  | 
188  | 0  |     cy += mpn_add_n (rp, rp, op, rn);  | 
189  | 0  |     if (--k == 0)  | 
190  | 0  |       break;  | 
191  | 0  |     cy -= mpn_sub_n (rp, rp, op + rn, rn);  | 
192  | 0  |   };  | 
193  |  | 
  | 
194  | 0  |   cy += op[rn];  | 
195  | 0  |   _mpn_modbnp1_nc_ip (rp, rn, cy);  | 
196  | 0  | }  | 
197  |  |  | 
198  |  | /* For the various mpn_divexact_byN here, fall back to using either  | 
199  |  |    mpn_pi1_bdiv_q_1 or mpn_divexact_1.  The former has less overhead and is  | 
200  |  |    faster if it is native.  For now, since mpn_divexact_1 is native on  | 
201  |  |    platforms where mpn_pi1_bdiv_q_1 does not yet exist, do not use  | 
202  |  |    mpn_pi1_bdiv_q_1 unconditionally.  FIXME.  */  | 
203  |  |  | 
204  |  | #ifndef mpn_divexact_by5  | 
205  |  | #if HAVE_NATIVE_mpn_pi1_bdiv_q_1  | 
206  |  | #define BINVERT_5 \  | 
207  |  |   ((((GMP_NUMB_MAX >> (GMP_NUMB_BITS % 4)) / 5 * 3 << 3) + 5) & GMP_NUMB_MAX)  | 
208  |  | #define mpn_divexact_by5(dst,src,size) mpn_pi1_bdiv_q_1(dst,src,size,5,BINVERT_5,0)  | 
209  |  | #else  | 
210  |  | #define mpn_divexact_by5(dst,src,size) mpn_divexact_1(dst,src,size,5)  | 
211  |  | #endif  | 
212  |  | #endif  | 
213  |  |  | 
214  |  | #ifndef mpn_divexact_by7  | 
215  |  | #if HAVE_NATIVE_mpn_pi1_bdiv_q_1  | 
216  |  | #define BINVERT_7 \  | 
217  | 0  |   ((((GMP_NUMB_MAX >> (GMP_NUMB_BITS % 3)) / 7 * 3 << 4) + 7) & GMP_NUMB_MAX)  | 
218  | 0  | #define mpn_divexact_by7(dst,src,size) mpn_pi1_bdiv_q_1(dst,src,size,7,BINVERT_7,0)  | 
219  |  | #else  | 
220  |  | #define mpn_divexact_by7(dst,src,size) mpn_divexact_1(dst,src,size,7)  | 
221  |  | #endif  | 
222  |  | #endif  | 
223  |  |  | 
224  |  | #ifndef mpn_divexact_by11  | 
225  |  | #if HAVE_NATIVE_mpn_pi1_bdiv_q_1  | 
226  |  | #define BINVERT_11 \  | 
227  |  |   ((((GMP_NUMB_MAX >> (GMP_NUMB_BITS % 10)) / 11 << 5) + 3) & GMP_NUMB_MAX)  | 
228  |  | #define mpn_divexact_by11(dst,src,size) mpn_pi1_bdiv_q_1(dst,src,size,11,BINVERT_11,0)  | 
229  |  | #else  | 
230  |  | #define mpn_divexact_by11(dst,src,size) mpn_divexact_1(dst,src,size,11)  | 
231  |  | #endif  | 
232  |  | #endif  | 
233  |  |  | 
234  |  | #ifndef mpn_divexact_by13  | 
235  |  | #if HAVE_NATIVE_mpn_pi1_bdiv_q_1  | 
236  |  | #define BINVERT_13 \  | 
237  | 0  |   ((((GMP_NUMB_MAX >> (GMP_NUMB_BITS % 12)) / 13 * 3 << 14) + 3781) & GMP_NUMB_MAX)  | 
238  | 0  | #define mpn_divexact_by13(dst,src,size) mpn_pi1_bdiv_q_1(dst,src,size,13,BINVERT_13,0)  | 
239  |  | #else  | 
240  |  | #define mpn_divexact_by13(dst,src,size) mpn_divexact_1(dst,src,size,13)  | 
241  |  | #endif  | 
242  |  | #endif  | 
243  |  |  | 
244  |  | #ifndef mpn_divexact_by17  | 
245  |  | #if HAVE_NATIVE_mpn_pi1_bdiv_q_1  | 
246  |  | #define BINVERT_17 \  | 
247  |  |   ((((GMP_NUMB_MAX >> (GMP_NUMB_BITS % 8)) / 17 * 15 << 7) + 113) & GMP_NUMB_MAX)  | 
248  |  | #define mpn_divexact_by17(dst,src,size) mpn_pi1_bdiv_q_1(dst,src,size,17,BINVERT_17,0)  | 
249  |  | #else  | 
250  |  | #define mpn_divexact_by17(dst,src,size) mpn_divexact_1(dst,src,size,17)  | 
251  |  | #endif  | 
252  |  | #endif  | 
253  |  |  | 
254  |  | /* Thanks to Chinese remainder theorem, store  | 
255  |  |    in {rp, k*n+1} the value mod (B^(k*n)+1), given | 
256  |  |    {ap, k*n+1} mod ((B^(k*n)+1)/(B^n+1)) and | 
257  |  |    {bp, n+1} mod (B^n+1) . | 
258  |  |    {tp, n+1} is a scratch area. | 
259  |  |    tp == rp or rp == ap are possible.  | 
260  |  | */  | 
261  |  | static void  | 
262  |  | _mpn_crt (mp_ptr rp, mp_srcptr ap, mp_srcptr bp,  | 
263  |  |     mp_size_t n, unsigned k, mp_ptr tp)  | 
264  | 0  | { | 
265  | 0  |   mp_limb_t mod;  | 
266  | 0  |   unsigned i;  | 
267  |  | 
  | 
268  |  | #if MOD_BKNP1_ONLY3  | 
269  |  |   ASSERT (k == 3);  | 
270  |  |   k = 3;  | 
271  |  | #endif  | 
272  | 0  |   _mpn_modbnp1_kn (tp, ap, n, k);  | 
273  | 0  |   if (mpn_sub_n (tp, bp, tp, n + 1))  | 
274  | 0  |     _mpn_modbnp1_neg_ip (tp, n, tp[n]);  | 
275  |  | 
  | 
276  |  | #if MOD_BKNP1_USE11  | 
277  |  |   if (UNLIKELY (k == 11))  | 
278  |  |     { | 
279  |  |       ASSERT (GMP_NUMB_BITS % 2 == 0);  | 
280  |  |       /* mod <- -Mod(B^n+1,11)^-1 */  | 
281  |  |       mod = n * (GMP_NUMB_BITS % 5) % 5;  | 
282  |  |       if ((mod > 2) || UNLIKELY (mod == 0))  | 
283  |  |   mod += 5;  | 
284  |  |  | 
285  |  |       mod *= mpn_mod_1 (tp, n + 1, 11);  | 
286  |  |     }  | 
287  |  |   else  | 
288  |  | #endif  | 
289  | 0  |     { | 
290  | 0  | #if GMP_NUMB_BITS % 8 == 0  | 
291  |  |   /* (2^6 - 1) | (2^{GMP_NUMB_BITS*3/4} - 1)  */ | 
292  |  |   /* (2^6 - 1) = 3^2 * 7      */  | 
293  | 0  |   mod = mpn_mod_34lsub1 (tp, n + 1);  | 
294  | 0  |   ASSERT ((GMP_NUMB_MAX >> (GMP_NUMB_BITS >> 2)) % k == 0);  | 
295  |  |   /* (2^12 - 1) = 3^2 * 5 * 7 * 13    */  | 
296  |  |   /* (2^24 - 1) = 3^2 * 5 * 7 * 13 * 17 * 241 */  | 
297  | 0  |   ASSERT (k == 3 || k == 5 || k == 7 || k == 13 || k == 17);  | 
298  |  | 
  | 
299  | 0  | #if GMP_NUMB_BITS % 3 != 0  | 
300  | 0  |   if (UNLIKELY (k != 3))  | 
301  | 0  |     { | 
302  | 0  |       ASSERT ((GMP_NUMB_MAX % k == 0) || (n % 3 != 0));  | 
303  | 0  |       if ((GMP_NUMB_BITS % 16 == 0) && LIKELY (k == 5))  | 
304  | 0  |   mod <<= 1; /* k >> 1 = 1 << 1 */  | 
305  | 0  |       else if ((GMP_NUMB_BITS % 16 != 0) || LIKELY (k == 7))  | 
306  | 0  |   mod <<= (n << (GMP_NUMB_BITS % 3 >> 1)) % 3;  | 
307  | 0  |       else if ((GMP_NUMB_BITS % 32 != 0) || LIKELY (k == 13))  | 
308  | 0  |   mod *= ((n << (GMP_NUMB_BITS % 3 >> 1)) % 3 == 1) ? 3 : 9;  | 
309  | 0  |       else /* k == 17 */  | 
310  | 0  |   mod <<= 3; /* k >> 1 = 1 << 3 */  | 
311  |  | #if 0  | 
312  |  |       if ((GMP_NUMB_BITS == 8) /* && (k == 7) */ ||  | 
313  |  |     (GMP_NUMB_BITS == 16) && (k == 13))  | 
314  |  |   mod = ((mod & (GMP_NUMB_MAX >> (GMP_NUMB_BITS >> 2))) +  | 
315  |  |          (mod >> (3 * GMP_NUMB_BITS >> 2)));  | 
316  |  | #endif  | 
317  | 0  |     }  | 
318  |  | #else  | 
319  |  |   ASSERT (GMP_NUMB_MAX % k == 0);  | 
320  |  |   /* 2^{GMP_NUMB_BITS} - 1  = 0 (mod k) */ | 
321  |  |   /* 2^{GMP_NUMB_BITS}    = 1 (mod k) */ | 
322  |  |   /* 2^{n*GMP_NUMB_BITS} + 1  = 2 (mod k) */ | 
323  |  |   /* -2^{-1}  = k >> 1 (mod k) */ | 
324  |  |   mod *= k >> 1;  | 
325  |  | #endif  | 
326  |  | #else  | 
327  |  |   ASSERT_ALWAYS (k == 0); /* Not implemented, should not be used. */  | 
328  |  | #endif  | 
329  | 0  |     }  | 
330  |  | 
  | 
331  | 0  |   MPN_INCR_U (tp, n + 1, mod);  | 
332  | 0  |   tp[n] += mod;  | 
333  |  | 
  | 
334  | 0  |   if (LIKELY (k == 3))  | 
335  | 0  |     ASSERT_NOCARRY (mpn_divexact_by3 (tp, tp, n + 1));  | 
336  | 0  |   else if ((GMP_NUMB_BITS % 16 == 0) && LIKELY (k == 5))  | 
337  | 0  |     mpn_divexact_by5 (tp, tp, n + 1);  | 
338  | 0  |   else if (((! MOD_BKNP1_USE11) && (GMP_NUMB_BITS % 16 != 0))  | 
339  | 0  |      || LIKELY (k == 7))  | 
340  | 0  |     mpn_divexact_by7 (tp, tp, n + 1);  | 
341  |  | #if MOD_BKNP1_USE11  | 
342  |  |   else if (k == 11)  | 
343  |  |     mpn_divexact_by11 (tp, tp, n + 1);  | 
344  |  | #endif  | 
345  | 0  |   else if ((GMP_NUMB_BITS % 32 != 0) || LIKELY (k == 13))  | 
346  | 0  |     mpn_divexact_by13 (tp, tp, n + 1);  | 
347  | 0  |   else /* (k == 17) */  | 
348  | 0  |     mpn_divexact_by17 (tp, tp, n + 1);  | 
349  |  | 
  | 
350  | 0  |   rp += k * n;  | 
351  | 0  |   ap += k * n; /* tp - 1 */  | 
352  |  | 
  | 
353  | 0  |   rp -= n;  | 
354  | 0  |   ap -= n;  | 
355  | 0  |   ASSERT_NOCARRY (mpn_add_n (rp, ap, tp, n + 1));  | 
356  |  | 
  | 
357  | 0  |   i = k >> 1;  | 
358  | 0  |   do  | 
359  | 0  |    { | 
360  | 0  |       mp_limb_t cy, bw;  | 
361  | 0  |       rp -= n;  | 
362  | 0  |       ap -= n;  | 
363  | 0  |       bw = mpn_sub_n (rp, ap, tp, n) + tp[n];  | 
364  | 0  |       MPN_DECR_U (rp + n, (k - i * 2) * n + 1, bw);  | 
365  | 0  |       rp -= n;  | 
366  | 0  |       ap -= n;  | 
367  | 0  |       cy = mpn_add_n (rp, ap, tp, n) + tp[n];  | 
368  | 0  |       MPN_INCR_U (rp + n, (k - i * 2 + 1) * n + 1, cy);  | 
369  | 0  |     }  | 
370  | 0  |   while (--i != 0);  | 
371  |  |  | 
372  |  |   /* if (LIKELY (rp[k * n])) */  | 
373  | 0  |     _mpn_modbnp1_pn_ip (rp, k * n, rp[k * n]);  | 
374  | 0  | }  | 
375  |  |  | 
376  |  |  | 
377  |  | static void  | 
378  |  | _mpn_mulmod_bnp1_tp (mp_ptr rp, mp_srcptr ap, mp_srcptr bp, mp_size_t rn,  | 
379  |  |         mp_ptr tp)  | 
380  | 0  | { | 
381  | 0  |   mp_limb_t cy;  | 
382  | 0  |   unsigned k;  | 
383  |  | 
  | 
384  | 0  |   ASSERT (0 < rn);  | 
385  | 0  |   ASSERT ((ap[rn] | bp[rn]) <= 1);  | 
386  |  | 
  | 
387  | 0  |   if (UNLIKELY (ap[rn] | bp[rn]))  | 
388  | 0  |     { | 
389  | 0  |       if (ap[rn])  | 
390  | 0  |   cy = bp[rn] + mpn_neg (rp, bp, rn);  | 
391  | 0  |       else /* ap[rn] == 0 */  | 
392  | 0  |   cy = mpn_neg (rp, ap, rn);  | 
393  | 0  |     }  | 
394  | 0  |   else if (MPN_MULMOD_BKNP1_USABLE(rn, k, MUL_FFT_MODF_THRESHOLD / 3))  | 
395  | 0  |     { | 
396  | 0  |       rn /= k;  | 
397  | 0  |       mpn_mulmod_bknp1 (rp, ap, bp, rn, k, tp);  | 
398  | 0  |       return;  | 
399  | 0  |     }  | 
400  | 0  |   else  | 
401  | 0  |     { | 
402  | 0  |       mpn_mul_n (tp, ap, bp, rn);  | 
403  | 0  |       cy = mpn_sub_n (rp, tp, tp + rn, rn);  | 
404  | 0  |     }  | 
405  | 0  |   rp[rn] = 0;  | 
406  | 0  |   MPN_INCR_U (rp, rn + 1, cy);  | 
407  | 0  | }  | 
408  |  |  | 
409  |  | /* {rp, kn + 1} = {ap, kn + 1} * {bp, kn + 1} % (B^kn + 1) */ | 
410  |  | /* tp must point to at least 4*(k-1)*n+1 limbs*/  | 
411  |  | void  | 
412  |  | mpn_mulmod_bknp1 (mp_ptr rp, mp_srcptr ap, mp_srcptr bp,  | 
413  |  |       mp_size_t n, unsigned k, mp_ptr tp)  | 
414  | 0  | { | 
415  | 0  |   mp_ptr hp;  | 
416  |  | 
  | 
417  |  | #if MOD_BKNP1_ONLY3  | 
418  |  |   ASSERT (k == 3);  | 
419  |  |   k = 3;  | 
420  |  | #endif  | 
421  | 0  |   ASSERT (k > 2);  | 
422  | 0  |   ASSERT (k % 2 == 1);  | 
423  |  |  | 
424  |  |   /* a % (B^{nn}+1)/(B^{nn/k}+1) */ | 
425  | 0  |   _mpn_modbknp1dbnp1_n (tp + (k - 1) * n * 2, ap, n, k);  | 
426  |  |   /* b % (B^{nn}+1)/(B^{nn/k}+1) */ | 
427  | 0  |   _mpn_modbknp1dbnp1_n (tp + (k - 1) * n * 3, bp, n, k);  | 
428  | 0  |   mpn_mul_n (tp, tp + (k - 1) * n * 2, tp + (k - 1) * n * 3, (k - 1) * n);  | 
429  | 0  |   _mpn_modbnp1 (tp, k * n, tp, (k - 1) * n * 2);  | 
430  |  | 
  | 
431  | 0  |   hp = tp + k * n + 1;  | 
432  |  |   /* a % (B^{nn/k}+1) */ | 
433  | 0  |   ASSERT (ap[k * n] <= 1);  | 
434  | 0  |   _mpn_modbnp1_kn (hp, ap, n, k);  | 
435  |  |   /* b % (B^{nn/k}+1) */ | 
436  | 0  |   ASSERT (bp[k * n] <= 1);  | 
437  | 0  |   _mpn_modbnp1_kn (hp + n + 1, bp, n, k);  | 
438  | 0  |   _mpn_mulmod_bnp1_tp (hp + (n + 1) * 2, hp, hp + n + 1, n, hp + (n + 1) * 2);  | 
439  |  | 
  | 
440  | 0  |   _mpn_crt (rp, tp, hp + (n + 1) * 2, n, k, hp);  | 
441  | 0  | }  | 
442  |  |  | 
443  |  |  | 
444  |  | static void  | 
445  |  | _mpn_sqrmod_bnp1_tp (mp_ptr rp, mp_srcptr ap, mp_size_t rn,  | 
446  |  |         mp_ptr tp)  | 
447  | 0  | { | 
448  | 0  |   mp_limb_t cy;  | 
449  | 0  |   unsigned k;  | 
450  |  | 
  | 
451  | 0  |   ASSERT (0 < rn);  | 
452  |  | 
  | 
453  | 0  |   if (UNLIKELY (ap[rn]))  | 
454  | 0  |     { | 
455  | 0  |       ASSERT (ap[rn] == 1);  | 
456  | 0  |       *rp = 1;  | 
457  | 0  |       MPN_FILL (rp + 1, rn, 0);  | 
458  | 0  |       return;  | 
459  | 0  |     }  | 
460  | 0  |   else if (MPN_SQRMOD_BKNP1_USABLE(rn, k, MUL_FFT_MODF_THRESHOLD / 3))  | 
461  | 0  |     { | 
462  | 0  |       rn /= k;  | 
463  | 0  |       mpn_sqrmod_bknp1 (rp, ap, rn, k, tp);  | 
464  | 0  |       return;  | 
465  | 0  |     }  | 
466  | 0  |   else  | 
467  | 0  |     { | 
468  | 0  |       mpn_sqr (tp, ap, rn);  | 
469  | 0  |       cy = mpn_sub_n (rp, tp, tp + rn, rn);  | 
470  | 0  |     }  | 
471  | 0  |   rp[rn] = 0;  | 
472  | 0  |   MPN_INCR_U (rp, rn + 1, cy);  | 
473  | 0  | }  | 
474  |  |  | 
475  |  | /* {rp, kn + 1} = {ap, kn + 1}^2 % (B^kn + 1) */ | 
476  |  | /* tp must point to at least 3*(k-1)*n+1 limbs*/  | 
477  |  | void  | 
478  |  | mpn_sqrmod_bknp1 (mp_ptr rp, mp_srcptr ap,  | 
479  |  |       mp_size_t n, unsigned k, mp_ptr tp)  | 
480  | 0  | { | 
481  | 0  |   mp_ptr hp;  | 
482  |  | 
  | 
483  |  | #if MOD_BKNP1_ONLY3  | 
484  |  |   ASSERT (k == 3);  | 
485  |  |   k = 3;  | 
486  |  | #endif  | 
487  | 0  |   ASSERT (k > 2);  | 
488  | 0  |   ASSERT (k % 2 == 1);  | 
489  |  |  | 
490  |  |   /* a % (B^{nn}+1)/(B^{nn/k}+1) */ | 
491  | 0  |   _mpn_modbknp1dbnp1_n (tp + (k - 1) * n * 2, ap, n, k);  | 
492  | 0  |   mpn_sqr (tp, tp + (k - 1) * n * 2, (k - 1) * n);  | 
493  | 0  |   _mpn_modbnp1 (tp, k * n, tp, (k - 1) * n * 2);  | 
494  |  | 
  | 
495  | 0  |   hp = tp + k * n + 1;  | 
496  |  |   /* a % (B^{nn/k}+1) */ | 
497  | 0  |   ASSERT (ap[k * n] <= 1);  | 
498  | 0  |   _mpn_modbnp1_kn (hp, ap, n, k);  | 
499  | 0  |   _mpn_sqrmod_bnp1_tp (hp + (n + 1), hp, n, hp + (n + 1));  | 
500  |  | 
  | 
501  | 0  |   _mpn_crt (rp, tp, hp + (n + 1), n, k, hp);  | 
502  | 0  | }  |