/src/gmp-6.2.1/mpn/toom_interpolate_6pts.c
Line  | Count  | Source (jump to first uncovered line)  | 
1  |  | /* mpn_toom_interpolate_6pts -- Interpolate for toom43, 52  | 
2  |  |  | 
3  |  |    Contributed to the GNU project by Marco Bodrato.  | 
4  |  |  | 
5  |  |    THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY  | 
6  |  |    SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST  | 
7  |  |    GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.  | 
8  |  |  | 
9  |  | Copyright 2009, 2010, 2012 Free Software Foundation, Inc.  | 
10  |  |  | 
11  |  | This file is part of the GNU MP Library.  | 
12  |  |  | 
13  |  | The GNU MP Library is free software; you can redistribute it and/or modify  | 
14  |  | it under the terms of either:  | 
15  |  |  | 
16  |  |   * the GNU Lesser General Public License as published by the Free  | 
17  |  |     Software Foundation; either version 3 of the License, or (at your  | 
18  |  |     option) any later version.  | 
19  |  |  | 
20  |  | or  | 
21  |  |  | 
22  |  |   * the GNU General Public License as published by the Free Software  | 
23  |  |     Foundation; either version 2 of the License, or (at your option) any  | 
24  |  |     later version.  | 
25  |  |  | 
26  |  | or both in parallel, as here.  | 
27  |  |  | 
28  |  | The GNU MP Library is distributed in the hope that it will be useful, but  | 
29  |  | WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY  | 
30  |  | or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License  | 
31  |  | for more details.  | 
32  |  |  | 
33  |  | You should have received copies of the GNU General Public License and the  | 
34  |  | GNU Lesser General Public License along with the GNU MP Library.  If not,  | 
35  |  | see https://www.gnu.org/licenses/.  */  | 
36  |  |  | 
37  |  | #include "gmp-impl.h"  | 
38  |  |  | 
39  |  | #define BINVERT_3 MODLIMB_INVERSE_3  | 
40  |  |  | 
41  |  | /* For odd divisors, mpn_divexact_1 works fine with two's complement. */  | 
42  |  | #ifndef mpn_divexact_by3  | 
43  |  | #if HAVE_NATIVE_mpn_pi1_bdiv_q_1  | 
44  |  | #define mpn_divexact_by3(dst,src,size) mpn_pi1_bdiv_q_1(dst,src,size,3,BINVERT_3,0)  | 
45  |  | #else  | 
46  |  | #define mpn_divexact_by3(dst,src,size) mpn_divexact_1(dst,src,size,3)  | 
47  |  | #endif  | 
48  |  | #endif  | 
49  |  |  | 
50  |  | /* Interpolation for Toom-3.5, using the evaluation points: infinity,  | 
51  |  |    1, -1, 2, -2. More precisely, we want to compute  | 
52  |  |    f(2^(GMP_NUMB_BITS * n)) for a polynomial f of degree 5, given the  | 
53  |  |    six values  | 
54  |  |  | 
55  |  |      w5 = f(0),  | 
56  |  |      w4 = f(-1),  | 
57  |  |      w3 = f(1)  | 
58  |  |      w2 = f(-2),  | 
59  |  |      w1 = f(2),  | 
60  |  |      w0 = limit at infinity of f(x) / x^5,  | 
61  |  |  | 
62  |  |    The result is stored in {pp, 5*n + w0n}. At entry, w5 is stored at | 
63  |  |    {pp, 2n}, w3 is stored at {pp + 2n, 2n+1}, and w0 is stored at | 
64  |  |    {pp + 5n, w0n}. The other values are 2n + 1 limbs each (with most | 
65  |  |    significant limbs small). f(-1) and f(-2) may be negative, signs  | 
66  |  |    determined by the flag bits. All intermediate results are positive.  | 
67  |  |    Inputs are destroyed.  | 
68  |  |  | 
69  |  |    Interpolation sequence was taken from the paper: "Integer and  | 
70  |  |    Polynomial Multiplication: Towards Optimal Toom-Cook Matrices".  | 
71  |  |    Some slight variations were introduced: adaptation to "gmp  | 
72  |  |    instruction set", and a final saving of an operation by interlacing  | 
73  |  |    interpolation and recomposition phases.  | 
74  |  | */  | 
75  |  |  | 
76  |  | void  | 
77  |  | mpn_toom_interpolate_6pts (mp_ptr pp, mp_size_t n, enum toom6_flags flags,  | 
78  |  |          mp_ptr w4, mp_ptr w2, mp_ptr w1,  | 
79  |  |          mp_size_t w0n)  | 
80  | 295  | { | 
81  | 295  |   mp_limb_t cy;  | 
82  |  |   /* cy6 can be stored in w1[2*n], cy4 in w4[0], embankment in w2[0] */  | 
83  | 295  |   mp_limb_t cy4, cy6, embankment;  | 
84  |  |  | 
85  | 295  |   ASSERT( n > 0 );  | 
86  | 295  |   ASSERT( 2*n >= w0n && w0n > 0 );  | 
87  |  |  | 
88  | 590  | #define w5  pp          /* 2n   */  | 
89  | 2.36k  | #define w3  (pp + 2 * n)      /* 2n+1 */  | 
90  | 1.77k  | #define w0  (pp + 5 * n)      /* w0n  */  | 
91  |  |  | 
92  |  |   /* Interpolate with sequence:  | 
93  |  |      W2 =(W1 - W2)>>2  | 
94  |  |      W1 =(W1 - W5)>>1  | 
95  |  |      W1 =(W1 - W2)>>1  | 
96  |  |      W4 =(W3 - W4)>>1  | 
97  |  |      W2 =(W2 - W4)/3  | 
98  |  |      W3 = W3 - W4 - W5  | 
99  |  |      W1 =(W1 - W3)/3  | 
100  |  |      // Last steps are mixed with recomposition...  | 
101  |  |      W2 = W2 - W0<<2  | 
102  |  |      W4 = W4 - W2  | 
103  |  |      W3 = W3 - W1  | 
104  |  |      W2 = W2 - W0  | 
105  |  |   */  | 
106  |  |  | 
107  |  |   /* W2 =(W1 - W2)>>2 */  | 
108  | 295  |   if (flags & toom6_vm2_neg)  | 
109  | 198  |     mpn_add_n (w2, w1, w2, 2 * n + 1);  | 
110  | 97  |   else  | 
111  | 97  |     mpn_sub_n (w2, w1, w2, 2 * n + 1);  | 
112  | 295  |   mpn_rshift (w2, w2, 2 * n + 1, 2);  | 
113  |  |  | 
114  |  |   /* W1 =(W1 - W5)>>1 */  | 
115  | 295  |   w1[2*n] -= mpn_sub_n (w1, w1, w5, 2*n);  | 
116  | 295  |   mpn_rshift (w1, w1, 2 * n + 1, 1);  | 
117  |  |  | 
118  |  |   /* W1 =(W1 - W2)>>1 */  | 
119  | 295  | #if HAVE_NATIVE_mpn_rsh1sub_n  | 
120  | 295  |   mpn_rsh1sub_n (w1, w1, w2, 2 * n + 1);  | 
121  |  | #else  | 
122  |  |   mpn_sub_n (w1, w1, w2, 2 * n + 1);  | 
123  |  |   mpn_rshift (w1, w1, 2 * n + 1, 1);  | 
124  |  | #endif  | 
125  |  |  | 
126  |  |   /* W4 =(W3 - W4)>>1 */  | 
127  | 295  |   if (flags & toom6_vm1_neg)  | 
128  | 144  |     { | 
129  | 144  | #if HAVE_NATIVE_mpn_rsh1add_n  | 
130  | 144  |       mpn_rsh1add_n (w4, w3, w4, 2 * n + 1);  | 
131  |  | #else  | 
132  |  |       mpn_add_n (w4, w3, w4, 2 * n + 1);  | 
133  |  |       mpn_rshift (w4, w4, 2 * n + 1, 1);  | 
134  |  | #endif  | 
135  | 144  |     }  | 
136  | 151  |   else  | 
137  | 151  |     { | 
138  | 151  | #if HAVE_NATIVE_mpn_rsh1sub_n  | 
139  | 151  |       mpn_rsh1sub_n (w4, w3, w4, 2 * n + 1);  | 
140  |  | #else  | 
141  |  |       mpn_sub_n (w4, w3, w4, 2 * n + 1);  | 
142  |  |       mpn_rshift (w4, w4, 2 * n + 1, 1);  | 
143  |  | #endif  | 
144  | 151  |     }  | 
145  |  |  | 
146  |  |   /* W2 =(W2 - W4)/3 */  | 
147  | 295  |   mpn_sub_n (w2, w2, w4, 2 * n + 1);  | 
148  | 295  |   mpn_divexact_by3 (w2, w2, 2 * n + 1);  | 
149  |  |  | 
150  |  |   /* W3 = W3 - W4 - W5 */  | 
151  | 295  |   mpn_sub_n (w3, w3, w4, 2 * n + 1);  | 
152  | 295  |   w3[2 * n] -= mpn_sub_n (w3, w3, w5, 2 * n);  | 
153  |  |  | 
154  |  |   /* W1 =(W1 - W3)/3 */  | 
155  | 295  |   mpn_sub_n (w1, w1, w3, 2 * n + 1);  | 
156  | 295  |   mpn_divexact_by3 (w1, w1, 2 * n + 1);  | 
157  |  |  | 
158  |  |   /*  | 
159  |  |     [1 0 0 0 0 0;  | 
160  |  |      0 1 0 0 0 0;  | 
161  |  |      1 0 1 0 0 0;  | 
162  |  |      0 1 0 1 0 0;  | 
163  |  |      1 0 1 0 1 0;  | 
164  |  |      0 0 0 0 0 1]  | 
165  |  |  | 
166  |  |     pp[] prior to operations:  | 
167  |  |      |_H w0__|_L w0__|______||_H w3__|_L w3__|_H w5__|_L w5__|  | 
168  |  |  | 
169  |  |     summation scheme for remaining operations:  | 
170  |  |      |______________5|n_____4|n_____3|n_____2|n______|n______|pp  | 
171  |  |      |_H w0__|_L w0__|______||_H w3__|_L w3__|_H w5__|_L w5__|  | 
172  |  |             || H w4  | L w4  |  | 
173  |  |         || H w2  | L w2  |  | 
174  |  |       || H w1  | L w1  |  | 
175  |  |           ||-H w1  |-L w1  |  | 
176  |  |          |-H w0  |-L w0 ||-H w2  |-L w2  |  | 
177  |  |   */  | 
178  | 295  |   cy = mpn_add_n (pp + n, pp + n, w4, 2 * n + 1);  | 
179  | 295  |   MPN_INCR_U (pp + 3 * n + 1, n, cy);  | 
180  |  |  | 
181  |  |   /* W2 -= W0<<2 */  | 
182  |  | #if HAVE_NATIVE_mpn_sublsh_n || HAVE_NATIVE_mpn_sublsh2_n_ip1  | 
183  |  | #if HAVE_NATIVE_mpn_sublsh2_n_ip1  | 
184  |  |   cy = mpn_sublsh2_n_ip1 (w2, w0, w0n);  | 
185  |  | #else  | 
186  |  |   cy = mpn_sublsh_n (w2, w2, w0, w0n, 2);  | 
187  |  | #endif  | 
188  |  | #else  | 
189  |  |   /* {W4,2*n+1} is now free and can be overwritten. */ | 
190  | 295  |   cy = mpn_lshift(w4, w0, w0n, 2);  | 
191  | 295  |   cy+= mpn_sub_n(w2, w2, w4, w0n);  | 
192  | 295  | #endif  | 
193  | 295  |   MPN_DECR_U (w2 + w0n, 2 * n + 1 - w0n, cy);  | 
194  |  |  | 
195  |  |   /* W4L = W4L - W2L */  | 
196  | 295  |   cy = mpn_sub_n (pp + n, pp + n, w2, n);  | 
197  | 295  |   MPN_DECR_U (w3, 2 * n + 1, cy);  | 
198  |  |  | 
199  |  |   /* W3H = W3H + W2L */  | 
200  | 295  |   cy4 = w3[2 * n] + mpn_add_n (pp + 3 * n, pp + 3 * n, w2, n);  | 
201  |  |   /* W1L + W2H */  | 
202  | 295  |   cy = w2[2 * n] + mpn_add_n (pp + 4 * n, w1, w2 + n, n);  | 
203  | 295  |   MPN_INCR_U (w1 + n, n + 1, cy);  | 
204  |  |  | 
205  |  |   /* W0 = W0 + W1H */  | 
206  | 295  |   if (LIKELY (w0n > n))  | 
207  | 295  |     cy6 = w1[2 * n] + mpn_add_n (w0, w0, w1 + n, n);  | 
208  | 0  |   else  | 
209  | 0  |     cy6 = mpn_add_n (w0, w0, w1 + n, w0n);  | 
210  |  |  | 
211  |  |   /*  | 
212  |  |     summation scheme for the next operation:  | 
213  |  |      |...____5|n_____4|n_____3|n_____2|n______|n______|pp  | 
214  |  |      |...w0___|_w1_w2_|_H w3__|_L w3__|_H w5__|_L w5__|  | 
215  |  |          ...-w0___|-w1_w2 |  | 
216  |  |   */  | 
217  |  |   /* if(LIKELY(w0n>n)) the two operands below DO overlap! */  | 
218  | 295  |   cy = mpn_sub_n (pp + 2 * n, pp + 2 * n, pp + 4 * n, n + w0n);  | 
219  |  |  | 
220  |  |   /* embankment is a "dirty trick" to avoid carry/borrow propagation  | 
221  |  |      beyond allocated memory */  | 
222  | 295  |   embankment = w0[w0n - 1] - 1;  | 
223  | 295  |   w0[w0n - 1] = 1;  | 
224  | 295  |   if (LIKELY (w0n > n)) { | 
225  | 295  |     if (cy4 > cy6)  | 
226  | 159  |       MPN_INCR_U (pp + 4 * n, w0n + n, cy4 - cy6);  | 
227  | 136  |     else  | 
228  | 136  |       MPN_DECR_U (pp + 4 * n, w0n + n, cy6 - cy4);  | 
229  | 295  |     MPN_DECR_U (pp + 3 * n + w0n, 2 * n, cy);  | 
230  | 295  |     MPN_INCR_U (w0 + n, w0n - n, cy6);  | 
231  | 295  |   } else { | 
232  | 0  |     MPN_INCR_U (pp + 4 * n, w0n + n, cy4);  | 
233  | 0  |     MPN_DECR_U (pp + 3 * n + w0n, 2 * n, cy + cy6);  | 
234  | 0  |   }  | 
235  | 295  |   w0[w0n - 1] += embankment;  | 
236  |  |  | 
237  | 295  | #undef w5  | 
238  | 295  | #undef w3  | 
239  | 295  | #undef w0  | 
240  |  |  | 
241  | 295  | }  |