Line  | Count  | Source (jump to first uncovered line)  | 
1  |  | /* mpn_mul -- Multiply two natural numbers.  | 
2  |  |  | 
3  |  |    Contributed to the GNU project by Torbjorn Granlund.  | 
4  |  |  | 
5  |  | Copyright 1991, 1993, 1994, 1996, 1997, 1999-2003, 2005-2007, 2009, 2010, 2012,  | 
6  |  | 2014, 2019 Free Software Foundation, Inc.  | 
7  |  |  | 
8  |  | This file is part of the GNU MP Library.  | 
9  |  |  | 
10  |  | The GNU MP Library is free software; you can redistribute it and/or modify  | 
11  |  | it under the terms of either:  | 
12  |  |  | 
13  |  |   * the GNU Lesser General Public License as published by the Free  | 
14  |  |     Software Foundation; either version 3 of the License, or (at your  | 
15  |  |     option) any later version.  | 
16  |  |  | 
17  |  | or  | 
18  |  |  | 
19  |  |   * the GNU General Public License as published by the Free Software  | 
20  |  |     Foundation; either version 2 of the License, or (at your option) any  | 
21  |  |     later version.  | 
22  |  |  | 
23  |  | or both in parallel, as here.  | 
24  |  |  | 
25  |  | The GNU MP Library is distributed in the hope that it will be useful, but  | 
26  |  | WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY  | 
27  |  | or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License  | 
28  |  | for more details.  | 
29  |  |  | 
30  |  | You should have received copies of the GNU General Public License and the  | 
31  |  | GNU Lesser General Public License along with the GNU MP Library.  If not,  | 
32  |  | see https://www.gnu.org/licenses/.  */  | 
33  |  |  | 
34  |  | #include "gmp-impl.h"  | 
35  |  |  | 
36  |  |  | 
37  |  | #ifndef MUL_BASECASE_MAX_UN  | 
38  | 0  | #define MUL_BASECASE_MAX_UN 500  | 
39  |  | #endif  | 
40  |  |  | 
41  |  | /* Areas where the different toom algorithms can be called (extracted  | 
42  |  |    from the t-toom*.c files, and ignoring small constant offsets):  | 
43  |  |  | 
44  |  |    1/6  1/5 1/4 4/13 1/3 3/8 2/5 5/11 1/2 3/5 2/3 3/4 4/5   1 vn/un  | 
45  |  |                                         4/7              6/7  | 
46  |  |                6/11  | 
47  |  |                                        |--------------------| toom22 (small)  | 
48  |  |                                                            || toom22 (large)  | 
49  |  |                                                        |xxxx| toom22 called  | 
50  |  |                       |-------------------------------------| toom32  | 
51  |  |                                          |xxxxxxxxxxxxxxxx| | toom32 called  | 
52  |  |                                                |------------| toom33  | 
53  |  |                                                           |x| toom33 called  | 
54  |  |              |---------------------------------|            | toom42  | 
55  |  |                 |xxxxxxxxxxxxxxxxxxxxxxxx|            | toom42 called  | 
56  |  |                                        |--------------------| toom43  | 
57  |  |                                                |xxxxxxxxxx|   toom43 called  | 
58  |  |          |-----------------------------|                      toom52 (unused)  | 
59  |  |                                                    |--------| toom44  | 
60  |  |                |xxxxxxxx| toom44 called  | 
61  |  |                               |--------------------|        | toom53  | 
62  |  |                                         |xxxxxx|              toom53 called  | 
63  |  |     |-------------------------|                               toom62 (unused)  | 
64  |  |                                            |----------------| toom54 (unused)  | 
65  |  |                       |--------------------|                  toom63  | 
66  |  |                         |xxxxxxxxx|                   | toom63 called  | 
67  |  |                           |---------------------------------| toom6h  | 
68  |  |                |xxxxxxxx| toom6h called  | 
69  |  |                                   |-------------------------| toom8h (32 bit)  | 
70  |  |                  |------------------------------------------| toom8h (64 bit)  | 
71  |  |                |xxxxxxxx| toom8h called  | 
72  |  | */  | 
73  |  |  | 
74  |  | #define TOOM33_OK(an,bn) (6 + 2 * an < 3 * bn)  | 
75  | 0  | #define TOOM44_OK(an,bn) (12 + 3 * an < 4 * bn)  | 
76  |  |  | 
77  |  | /* Multiply the natural numbers u (pointed to by UP, with UN limbs) and v  | 
78  |  |    (pointed to by VP, with VN limbs), and store the result at PRODP.  The  | 
79  |  |    result is UN + VN limbs.  Return the most significant limb of the result.  | 
80  |  |  | 
81  |  |    NOTE: The space pointed to by PRODP is overwritten before finished with U  | 
82  |  |    and V, so overlap is an error.  | 
83  |  |  | 
84  |  |    Argument constraints:  | 
85  |  |    1. UN >= VN.  | 
86  |  |    2. PRODP != UP and PRODP != VP, i.e. the destination must be distinct from  | 
87  |  |       the multiplier and the multiplicand.  */  | 
88  |  |  | 
89  |  | /*  | 
90  |  |   * The cutoff lines in the toomX2 and toomX3 code are now exactly between the  | 
91  |  |     ideal lines of the surrounding algorithms.  Is that optimal?  | 
92  |  |  | 
93  |  |   * The toomX3 code now uses a structure similar to the one of toomX2, except  | 
94  |  |     that it loops longer in the unbalanced case.  The result is that the  | 
95  |  |     remaining area might have un < vn.  Should we fix the toomX2 code in a  | 
96  |  |     similar way?  | 
97  |  |  | 
98  |  |   * The toomX3 code is used for the largest non-FFT unbalanced operands.  It  | 
99  |  |     therefore calls mpn_mul recursively for certain cases.  | 
100  |  |  | 
101  |  |   * Allocate static temp space using THRESHOLD variables (except for toom44  | 
102  |  |     when !WANT_FFT).  That way, we can typically have no TMP_ALLOC at all.  | 
103  |  |  | 
104  |  |   * We sort ToomX2 algorithms together, assuming the toom22, toom32, toom42  | 
105  |  |     have the same vn threshold.  This is not true, we should actually use  | 
106  |  |     mul_basecase for slightly larger operands for toom32 than for toom22, and  | 
107  |  |     even larger for toom42.  | 
108  |  |  | 
109  |  |   * That problem is even more prevalent for toomX3.  We therefore use special  | 
110  |  |     THRESHOLD variables there.  | 
111  |  | */  | 
112  |  |  | 
113  |  | mp_limb_t  | 
114  |  | mpn_mul (mp_ptr prodp,  | 
115  |  |    mp_srcptr up, mp_size_t un,  | 
116  |  |    mp_srcptr vp, mp_size_t vn)  | 
117  | 0  | { | 
118  | 0  |   ASSERT (un >= vn);  | 
119  | 0  |   ASSERT (vn >= 1);  | 
120  | 0  |   ASSERT (! MPN_OVERLAP_P (prodp, un+vn, up, un));  | 
121  | 0  |   ASSERT (! MPN_OVERLAP_P (prodp, un+vn, vp, vn));  | 
122  |  | 
  | 
123  | 0  |   if (BELOW_THRESHOLD (un, MUL_TOOM22_THRESHOLD))  | 
124  | 0  |     { | 
125  |  |       /* When un (and thus vn) is below the toom22 range, do mul_basecase.  | 
126  |  |    Test un and not vn here not to thwart the un >> vn code below.  | 
127  |  |    This special case is not necessary, but cuts the overhead for the  | 
128  |  |    smallest operands. */  | 
129  | 0  |       mpn_mul_basecase (prodp, up, un, vp, vn);  | 
130  | 0  |     }  | 
131  | 0  |   else if (un == vn)  | 
132  | 0  |     { | 
133  | 0  |       mpn_mul_n (prodp, up, vp, un);  | 
134  | 0  |     }  | 
135  | 0  |   else if (vn < MUL_TOOM22_THRESHOLD)  | 
136  | 0  |     { /* plain schoolbook multiplication */ | 
137  |  |  | 
138  |  |       /* Unless un is very large, or else if have an applicable mpn_mul_N,  | 
139  |  |    perform basecase multiply directly.  */  | 
140  | 0  |       if (un <= MUL_BASECASE_MAX_UN  | 
141  |  | #if HAVE_NATIVE_mpn_mul_2  | 
142  |  |     || vn <= 2  | 
143  |  | #else  | 
144  | 0  |     || vn == 1  | 
145  | 0  | #endif  | 
146  | 0  |     )  | 
147  | 0  |   mpn_mul_basecase (prodp, up, un, vp, vn);  | 
148  | 0  |       else  | 
149  | 0  |   { | 
150  |  |     /* We have un >> MUL_BASECASE_MAX_UN > vn.  For better memory  | 
151  |  |        locality, split up[] into MUL_BASECASE_MAX_UN pieces and multiply  | 
152  |  |        these pieces with the vp[] operand.  After each such partial  | 
153  |  |        multiplication (but the last) we copy the most significant vn  | 
154  |  |        limbs into a temporary buffer since that part would otherwise be  | 
155  |  |        overwritten by the next multiplication.  After the next  | 
156  |  |        multiplication, we add it back.  This illustrates the situation:  | 
157  |  |  | 
158  |  |                                                     -->vn<--  | 
159  |  |                                                       |  |<------- un ------->|  | 
160  |  |                                                          _____________________|  | 
161  |  |                                                         X                    /|  | 
162  |  |                                                       /XX__________________/  |  | 
163  |  |                                     _____________________                     |  | 
164  |  |                                    X                    /                     |  | 
165  |  |                                  /XX__________________/                       |  | 
166  |  |                _____________________                                          |  | 
167  |  |               /                    /                                          |  | 
168  |  |             /____________________/                                            |  | 
169  |  |       ==================================================================  | 
170  |  |  | 
171  |  |       The parts marked with X are the parts whose sums are copied into  | 
172  |  |       the temporary buffer.  */  | 
173  |  | 
  | 
174  | 0  |     mp_limb_t tp[MUL_TOOM22_THRESHOLD_LIMIT];  | 
175  | 0  |     mp_limb_t cy;  | 
176  | 0  |     ASSERT (MUL_TOOM22_THRESHOLD <= MUL_TOOM22_THRESHOLD_LIMIT);  | 
177  |  | 
  | 
178  | 0  |     mpn_mul_basecase (prodp, up, MUL_BASECASE_MAX_UN, vp, vn);  | 
179  | 0  |     prodp += MUL_BASECASE_MAX_UN;  | 
180  | 0  |     MPN_COPY (tp, prodp, vn);   /* preserve high triangle */  | 
181  | 0  |     up += MUL_BASECASE_MAX_UN;  | 
182  | 0  |     un -= MUL_BASECASE_MAX_UN;  | 
183  | 0  |     while (un > MUL_BASECASE_MAX_UN)  | 
184  | 0  |       { | 
185  | 0  |         mpn_mul_basecase (prodp, up, MUL_BASECASE_MAX_UN, vp, vn);  | 
186  | 0  |         cy = mpn_add_n (prodp, prodp, tp, vn); /* add back preserved triangle */  | 
187  | 0  |         mpn_incr_u (prodp + vn, cy);  | 
188  | 0  |         prodp += MUL_BASECASE_MAX_UN;  | 
189  | 0  |         MPN_COPY (tp, prodp, vn);   /* preserve high triangle */  | 
190  | 0  |         up += MUL_BASECASE_MAX_UN;  | 
191  | 0  |         un -= MUL_BASECASE_MAX_UN;  | 
192  | 0  |       }  | 
193  | 0  |     if (un > vn)  | 
194  | 0  |       { | 
195  | 0  |         mpn_mul_basecase (prodp, up, un, vp, vn);  | 
196  | 0  |       }  | 
197  | 0  |     else  | 
198  | 0  |       { | 
199  | 0  |         ASSERT (un > 0);  | 
200  | 0  |         mpn_mul_basecase (prodp, vp, vn, up, un);  | 
201  | 0  |       }  | 
202  | 0  |     cy = mpn_add_n (prodp, prodp, tp, vn); /* add back preserved triangle */  | 
203  | 0  |     mpn_incr_u (prodp + vn, cy);  | 
204  | 0  |   }  | 
205  | 0  |     }  | 
206  | 0  |   else if (BELOW_THRESHOLD (vn, MUL_TOOM33_THRESHOLD))  | 
207  | 0  |     { | 
208  |  |       /* Use ToomX2 variants */  | 
209  | 0  |       mp_ptr scratch;  | 
210  | 0  |       TMP_SDECL; TMP_SMARK;  | 
211  |  | 
  | 
212  | 0  | #define ITCH_TOOMX2 (9 * vn / 2 + GMP_NUMB_BITS * 2)  | 
213  | 0  |       scratch = TMP_SALLOC_LIMBS (ITCH_TOOMX2);  | 
214  | 0  |       ASSERT (mpn_toom22_mul_itch ((5*vn-1)/4, vn) <= ITCH_TOOMX2); /* 5vn/2+ */  | 
215  | 0  |       ASSERT (mpn_toom32_mul_itch ((7*vn-1)/4, vn) <= ITCH_TOOMX2); /* 7vn/6+ */  | 
216  | 0  |       ASSERT (mpn_toom42_mul_itch (3 * vn - 1, vn) <= ITCH_TOOMX2); /* 9vn/2+ */  | 
217  | 0  | #undef ITCH_TOOMX2  | 
218  |  |  | 
219  |  |       /* FIXME: This condition (repeated in the loop below) leaves from a vn*vn  | 
220  |  |    square to a (3vn-1)*vn rectangle.  Leaving such a rectangle is hardly  | 
221  |  |    wise; we would get better balance by slightly moving the bound.  We  | 
222  |  |    will sometimes end up with un < vn, like in the X3 arm below.  */  | 
223  | 0  |       if (un >= 3 * vn)  | 
224  | 0  |   { | 
225  | 0  |     mp_limb_t cy;  | 
226  | 0  |     mp_ptr ws;  | 
227  |  |  | 
228  |  |     /* The maximum ws usage is for the mpn_mul result.  */  | 
229  | 0  |     ws = TMP_SALLOC_LIMBS (4 * vn);  | 
230  |  | 
  | 
231  | 0  |     mpn_toom42_mul (prodp, up, 2 * vn, vp, vn, scratch);  | 
232  | 0  |     un -= 2 * vn;  | 
233  | 0  |     up += 2 * vn;  | 
234  | 0  |     prodp += 2 * vn;  | 
235  |  | 
  | 
236  | 0  |     while (un >= 3 * vn)  | 
237  | 0  |       { | 
238  | 0  |         mpn_toom42_mul (ws, up, 2 * vn, vp, vn, scratch);  | 
239  | 0  |         un -= 2 * vn;  | 
240  | 0  |         up += 2 * vn;  | 
241  | 0  |         cy = mpn_add_n (prodp, prodp, ws, vn);  | 
242  | 0  |         MPN_COPY (prodp + vn, ws + vn, 2 * vn);  | 
243  | 0  |         mpn_incr_u (prodp + vn, cy);  | 
244  | 0  |         prodp += 2 * vn;  | 
245  | 0  |       }  | 
246  |  |  | 
247  |  |     /* vn <= un < 3vn */  | 
248  |  | 
  | 
249  | 0  |     if (4 * un < 5 * vn)  | 
250  | 0  |       mpn_toom22_mul (ws, up, un, vp, vn, scratch);  | 
251  | 0  |     else if (4 * un < 7 * vn)  | 
252  | 0  |       mpn_toom32_mul (ws, up, un, vp, vn, scratch);  | 
253  | 0  |     else  | 
254  | 0  |       mpn_toom42_mul (ws, up, un, vp, vn, scratch);  | 
255  |  | 
  | 
256  | 0  |     cy = mpn_add_n (prodp, prodp, ws, vn);  | 
257  | 0  |     MPN_COPY (prodp + vn, ws + vn, un);  | 
258  | 0  |     mpn_incr_u (prodp + vn, cy);  | 
259  | 0  |   }  | 
260  | 0  |       else  | 
261  | 0  |   { | 
262  | 0  |     if (4 * un < 5 * vn)  | 
263  | 0  |       mpn_toom22_mul (prodp, up, un, vp, vn, scratch);  | 
264  | 0  |     else if (4 * un < 7 * vn)  | 
265  | 0  |       mpn_toom32_mul (prodp, up, un, vp, vn, scratch);  | 
266  | 0  |     else  | 
267  | 0  |       mpn_toom42_mul (prodp, up, un, vp, vn, scratch);  | 
268  | 0  |   }  | 
269  | 0  |       TMP_SFREE;  | 
270  | 0  |     }  | 
271  | 0  |   else if (BELOW_THRESHOLD ((un + vn) >> 1, MUL_FFT_THRESHOLD) ||  | 
272  | 0  |      BELOW_THRESHOLD (3 * vn, MUL_FFT_THRESHOLD))  | 
273  | 0  |     { | 
274  |  |       /* Handle the largest operands that are not in the FFT range.  The 2nd  | 
275  |  |    condition makes very unbalanced operands avoid the FFT code (except  | 
276  |  |    perhaps as coefficient products of the Toom code.  */  | 
277  |  | 
  | 
278  | 0  |       if (BELOW_THRESHOLD (vn, MUL_TOOM44_THRESHOLD) || !TOOM44_OK (un, vn))  | 
279  | 0  |   { | 
280  |  |     /* Use ToomX3 variants */  | 
281  | 0  |     mp_ptr scratch;  | 
282  | 0  |     TMP_DECL; TMP_MARK;  | 
283  |  | 
  | 
284  | 0  | #define ITCH_TOOMX3 (4 * vn + GMP_NUMB_BITS)  | 
285  | 0  |     scratch = TMP_ALLOC_LIMBS (ITCH_TOOMX3);  | 
286  | 0  |     ASSERT (mpn_toom33_mul_itch ((7*vn-1)/6, vn) <= ITCH_TOOMX3); /* 7vn/2+ */  | 
287  | 0  |     ASSERT (mpn_toom43_mul_itch ((3*vn-1)/2, vn) <= ITCH_TOOMX3); /* 9vn/4+ */  | 
288  | 0  |     ASSERT (mpn_toom32_mul_itch ((7*vn-1)/4, vn) <= ITCH_TOOMX3); /* 7vn/6+ */  | 
289  | 0  |     ASSERT (mpn_toom53_mul_itch ((11*vn-1)/6, vn) <= ITCH_TOOMX3); /* 11vn/3+ */  | 
290  | 0  |     ASSERT (mpn_toom42_mul_itch ((5*vn-1)/2, vn) <= ITCH_TOOMX3); /* 15vn/4+ */  | 
291  | 0  |     ASSERT (mpn_toom63_mul_itch ((5*vn-1)/2, vn) <= ITCH_TOOMX3); /* 15vn/4+ */  | 
292  | 0  | #undef ITCH_TOOMX3  | 
293  |  | 
  | 
294  | 0  |     if (2 * un >= 5 * vn)  | 
295  | 0  |       { | 
296  | 0  |         mp_limb_t cy;  | 
297  | 0  |         mp_ptr ws;  | 
298  |  |  | 
299  |  |         /* The maximum ws usage is for the mpn_mul result.  */  | 
300  | 0  |         ws = TMP_ALLOC_LIMBS (7 * vn >> 1);  | 
301  |  | 
  | 
302  | 0  |         if (BELOW_THRESHOLD (vn, MUL_TOOM42_TO_TOOM63_THRESHOLD))  | 
303  | 0  |     mpn_toom42_mul (prodp, up, 2 * vn, vp, vn, scratch);  | 
304  | 0  |         else  | 
305  | 0  |     mpn_toom63_mul (prodp, up, 2 * vn, vp, vn, scratch);  | 
306  | 0  |         un -= 2 * vn;  | 
307  | 0  |         up += 2 * vn;  | 
308  | 0  |         prodp += 2 * vn;  | 
309  |  | 
  | 
310  | 0  |         while (2 * un >= 5 * vn) /* un >= 2.5vn */  | 
311  | 0  |     { | 
312  | 0  |       if (BELOW_THRESHOLD (vn, MUL_TOOM42_TO_TOOM63_THRESHOLD))  | 
313  | 0  |         mpn_toom42_mul (ws, up, 2 * vn, vp, vn, scratch);  | 
314  | 0  |       else  | 
315  | 0  |         mpn_toom63_mul (ws, up, 2 * vn, vp, vn, scratch);  | 
316  | 0  |       un -= 2 * vn;  | 
317  | 0  |       up += 2 * vn;  | 
318  | 0  |       cy = mpn_add_n (prodp, prodp, ws, vn);  | 
319  | 0  |       MPN_COPY (prodp + vn, ws + vn, 2 * vn);  | 
320  | 0  |       mpn_incr_u (prodp + vn, cy);  | 
321  | 0  |       prodp += 2 * vn;  | 
322  | 0  |     }  | 
323  |  |  | 
324  |  |         /* vn / 2 <= un < 2.5vn */  | 
325  |  | 
  | 
326  | 0  |         if (un < vn)  | 
327  | 0  |     mpn_mul (ws, vp, vn, up, un);  | 
328  | 0  |         else  | 
329  | 0  |     mpn_mul (ws, up, un, vp, vn);  | 
330  |  | 
  | 
331  | 0  |         cy = mpn_add_n (prodp, prodp, ws, vn);  | 
332  | 0  |         MPN_COPY (prodp + vn, ws + vn, un);  | 
333  | 0  |         mpn_incr_u (prodp + vn, cy);  | 
334  | 0  |       }  | 
335  | 0  |     else  | 
336  | 0  |       { | 
337  | 0  |         if (6 * un < 7 * vn)  | 
338  | 0  |     mpn_toom33_mul (prodp, up, un, vp, vn, scratch);  | 
339  | 0  |         else if (2 * un < 3 * vn)  | 
340  | 0  |     { | 
341  | 0  |       if (BELOW_THRESHOLD (vn, MUL_TOOM32_TO_TOOM43_THRESHOLD))  | 
342  | 0  |         mpn_toom32_mul (prodp, up, un, vp, vn, scratch);  | 
343  | 0  |       else  | 
344  | 0  |         mpn_toom43_mul (prodp, up, un, vp, vn, scratch);  | 
345  | 0  |     }  | 
346  | 0  |         else if (6 * un < 11 * vn)  | 
347  | 0  |     { | 
348  | 0  |       if (4 * un < 7 * vn)  | 
349  | 0  |         { | 
350  | 0  |           if (BELOW_THRESHOLD (vn, MUL_TOOM32_TO_TOOM53_THRESHOLD))  | 
351  | 0  |       mpn_toom32_mul (prodp, up, un, vp, vn, scratch);  | 
352  | 0  |           else  | 
353  | 0  |       mpn_toom53_mul (prodp, up, un, vp, vn, scratch);  | 
354  | 0  |         }  | 
355  | 0  |       else  | 
356  | 0  |         { | 
357  | 0  |           if (BELOW_THRESHOLD (vn, MUL_TOOM42_TO_TOOM53_THRESHOLD))  | 
358  | 0  |       mpn_toom42_mul (prodp, up, un, vp, vn, scratch);  | 
359  | 0  |           else  | 
360  | 0  |       mpn_toom53_mul (prodp, up, un, vp, vn, scratch);  | 
361  | 0  |         }  | 
362  | 0  |     }  | 
363  | 0  |         else  | 
364  | 0  |     { | 
365  | 0  |       if (BELOW_THRESHOLD (vn, MUL_TOOM42_TO_TOOM63_THRESHOLD))  | 
366  | 0  |         mpn_toom42_mul (prodp, up, un, vp, vn, scratch);  | 
367  | 0  |       else  | 
368  | 0  |         mpn_toom63_mul (prodp, up, un, vp, vn, scratch);  | 
369  | 0  |     }  | 
370  | 0  |       }  | 
371  | 0  |     TMP_FREE;  | 
372  | 0  |   }  | 
373  | 0  |       else  | 
374  | 0  |   { | 
375  | 0  |     mp_ptr scratch;  | 
376  | 0  |     TMP_DECL; TMP_MARK;  | 
377  |  | 
  | 
378  | 0  |     if (BELOW_THRESHOLD (vn, MUL_TOOM6H_THRESHOLD))  | 
379  | 0  |       { | 
380  | 0  |         scratch = TMP_SALLOC_LIMBS (mpn_toom44_mul_itch (un, vn));  | 
381  | 0  |         mpn_toom44_mul (prodp, up, un, vp, vn, scratch);  | 
382  | 0  |       }  | 
383  | 0  |     else if (BELOW_THRESHOLD (vn, MUL_TOOM8H_THRESHOLD))  | 
384  | 0  |       { | 
385  | 0  |         scratch = TMP_SALLOC_LIMBS (mpn_toom6h_mul_itch (un, vn));  | 
386  | 0  |         mpn_toom6h_mul (prodp, up, un, vp, vn, scratch);  | 
387  | 0  |       }  | 
388  | 0  |     else  | 
389  | 0  |       { | 
390  | 0  |         scratch = TMP_ALLOC_LIMBS (mpn_toom8h_mul_itch (un, vn));  | 
391  | 0  |         mpn_toom8h_mul (prodp, up, un, vp, vn, scratch);  | 
392  | 0  |       }  | 
393  | 0  |     TMP_FREE;  | 
394  | 0  |   }  | 
395  | 0  |     }  | 
396  | 0  |   else  | 
397  | 0  |     { | 
398  | 0  |       if (un >= 8 * vn)  | 
399  | 0  |   { | 
400  | 0  |     mp_limb_t cy;  | 
401  | 0  |     mp_ptr ws;  | 
402  | 0  |     TMP_DECL; TMP_MARK;  | 
403  |  |  | 
404  |  |     /* The maximum ws usage is for the mpn_mul result.  */  | 
405  | 0  |     ws = TMP_BALLOC_LIMBS (9 * vn >> 1);  | 
406  |  | 
  | 
407  | 0  |     mpn_fft_mul (prodp, up, 3 * vn, vp, vn);  | 
408  | 0  |     un -= 3 * vn;  | 
409  | 0  |     up += 3 * vn;  | 
410  | 0  |     prodp += 3 * vn;  | 
411  |  | 
  | 
412  | 0  |     while (2 * un >= 7 * vn) /* un >= 3.5vn  */  | 
413  | 0  |       { | 
414  | 0  |         mpn_fft_mul (ws, up, 3 * vn, vp, vn);  | 
415  | 0  |         un -= 3 * vn;  | 
416  | 0  |         up += 3 * vn;  | 
417  | 0  |         cy = mpn_add_n (prodp, prodp, ws, vn);  | 
418  | 0  |         MPN_COPY (prodp + vn, ws + vn, 3 * vn);  | 
419  | 0  |         mpn_incr_u (prodp + vn, cy);  | 
420  | 0  |         prodp += 3 * vn;  | 
421  | 0  |       }  | 
422  |  |  | 
423  |  |     /* vn / 2 <= un < 3.5vn */  | 
424  |  | 
  | 
425  | 0  |     if (un < vn)  | 
426  | 0  |       mpn_mul (ws, vp, vn, up, un);  | 
427  | 0  |     else  | 
428  | 0  |       mpn_mul (ws, up, un, vp, vn);  | 
429  |  | 
  | 
430  | 0  |     cy = mpn_add_n (prodp, prodp, ws, vn);  | 
431  | 0  |     MPN_COPY (prodp + vn, ws + vn, un);  | 
432  | 0  |     mpn_incr_u (prodp + vn, cy);  | 
433  |  | 
  | 
434  | 0  |     TMP_FREE;  | 
435  | 0  |   }  | 
436  | 0  |       else  | 
437  | 0  |   mpn_fft_mul (prodp, up, un, vp, vn);  | 
438  | 0  |     }  | 
439  |  | 
  | 
440  | 0  |   return prodp[un + vn - 1];  /* historic */  | 
441  | 0  | }  |