/src/gmp-6.2.1/mpn/toom_interpolate_5pts.c
Line  | Count  | Source (jump to first uncovered line)  | 
1  |  | /* mpn_toom_interpolate_5pts -- Interpolate for toom3, 33, 42.  | 
2  |  |  | 
3  |  |    Contributed to the GNU project by Robert Harley.  | 
4  |  |    Improvements by Paul Zimmermann and Marco Bodrato.  | 
5  |  |  | 
6  |  |    THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY  | 
7  |  |    SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST  | 
8  |  |    GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.  | 
9  |  |  | 
10  |  | Copyright 2000-2003, 2005-2007, 2009 Free Software Foundation, Inc.  | 
11  |  |  | 
12  |  | This file is part of the GNU MP Library.  | 
13  |  |  | 
14  |  | The GNU MP Library is free software; you can redistribute it and/or modify  | 
15  |  | it under the terms of either:  | 
16  |  |  | 
17  |  |   * the GNU Lesser General Public License as published by the Free  | 
18  |  |     Software Foundation; either version 3 of the License, or (at your  | 
19  |  |     option) any later version.  | 
20  |  |  | 
21  |  | or  | 
22  |  |  | 
23  |  |   * the GNU General Public License as published by the Free Software  | 
24  |  |     Foundation; either version 2 of the License, or (at your option) any  | 
25  |  |     later version.  | 
26  |  |  | 
27  |  | or both in parallel, as here.  | 
28  |  |  | 
29  |  | The GNU MP Library is distributed in the hope that it will be useful, but  | 
30  |  | WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY  | 
31  |  | or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License  | 
32  |  | for more details.  | 
33  |  |  | 
34  |  | You should have received copies of the GNU General Public License and the  | 
35  |  | GNU Lesser General Public License along with the GNU MP Library.  If not,  | 
36  |  | see https://www.gnu.org/licenses/.  */  | 
37  |  |  | 
38  |  | #include "gmp-impl.h"  | 
39  |  |  | 
40  |  | void  | 
41  |  | mpn_toom_interpolate_5pts (mp_ptr c, mp_ptr v2, mp_ptr vm1,  | 
42  |  |          mp_size_t k, mp_size_t twor, int sa,  | 
43  |  |          mp_limb_t vinf0)  | 
44  | 2.29k  | { | 
45  | 2.29k  |   mp_limb_t cy, saved;  | 
46  | 2.29k  |   mp_size_t twok;  | 
47  | 2.29k  |   mp_size_t kk1;  | 
48  | 2.29k  |   mp_ptr c1, v1, c3, vinf;  | 
49  |  |  | 
50  | 2.29k  |   twok = k + k;  | 
51  | 2.29k  |   kk1 = twok + 1;  | 
52  |  |  | 
53  | 2.29k  |   c1 = c  + k;  | 
54  | 2.29k  |   v1 = c1 + k;  | 
55  | 2.29k  |   c3 = v1 + k;  | 
56  | 2.29k  |   vinf = c3 + k;  | 
57  |  |  | 
58  | 2.29k  | #define v0 (c)  | 
59  |  |   /* (1) v2 <- v2-vm1 < v2+|vm1|,       (16 8 4 2 1) - (1 -1 1 -1  1) =  | 
60  |  |      thus 0 <= v2 < 50*B^(2k) < 2^6*B^(2k)             (15 9 3  3  0)  | 
61  |  |   */  | 
62  | 2.29k  |   if (sa)  | 
63  | 2.29k  |     ASSERT_NOCARRY (mpn_add_n (v2, v2, vm1, kk1));  | 
64  | 1.57k  |   else  | 
65  | 2.29k  |     ASSERT_NOCARRY (mpn_sub_n (v2, v2, vm1, kk1));  | 
66  |  |  | 
67  |  |   /* {c,2k} {c+2k,2k+1} {c+4k+1,2r-1} {t,2k+1} {t+2k+1,2k+1} {t+4k+2,2r} | 
68  |  |        v0       v1       hi(vinf)       |vm1|     v2-vm1      EMPTY */  | 
69  |  |  | 
70  | 2.29k  |   ASSERT_NOCARRY (mpn_divexact_by3 (v2, v2, kk1));    /* v2 <- v2 / 3 */  | 
71  |  |                   /* (5 3 1 1 0)*/  | 
72  |  |  | 
73  |  |   /* {c,2k} {c+2k,2k+1} {c+4k+1,2r-1} {t,2k+1} {t+2k+1,2k+1} {t+4k+2,2r} | 
74  |  |        v0       v1      hi(vinf)       |vm1|     (v2-vm1)/3    EMPTY */  | 
75  |  |  | 
76  |  |   /* (2) vm1 <- tm1 := (v1 - vm1) / 2  [(1 1 1 1 1) - (1 -1 1 -1 1)] / 2 =  | 
77  |  |      tm1 >= 0                                         (0  1 0  1 0)  | 
78  |  |      No carry comes out from {v1, kk1} +/- {vm1, kk1}, | 
79  |  |      and the division by two is exact.  | 
80  |  |      If (sa!=0) the sign of vm1 is negative */  | 
81  | 2.29k  |   if (sa)  | 
82  | 716  |     { | 
83  | 716  | #ifdef HAVE_NATIVE_mpn_rsh1add_n  | 
84  | 716  |       mpn_rsh1add_n (vm1, v1, vm1, kk1);  | 
85  |  | #else  | 
86  |  |       ASSERT_NOCARRY (mpn_add_n (vm1, v1, vm1, kk1));  | 
87  |  |       ASSERT_NOCARRY (mpn_rshift (vm1, vm1, kk1, 1));  | 
88  |  | #endif  | 
89  | 716  |     }  | 
90  | 1.57k  |   else  | 
91  | 1.57k  |     { | 
92  | 1.57k  | #ifdef HAVE_NATIVE_mpn_rsh1sub_n  | 
93  | 1.57k  |       mpn_rsh1sub_n (vm1, v1, vm1, kk1);  | 
94  |  | #else  | 
95  |  |       ASSERT_NOCARRY (mpn_sub_n (vm1, v1, vm1, kk1));  | 
96  |  |       ASSERT_NOCARRY (mpn_rshift (vm1, vm1, kk1, 1));  | 
97  |  | #endif  | 
98  | 1.57k  |     }  | 
99  |  |  | 
100  |  |   /* {c,2k} {c+2k,2k+1} {c+4k+1,2r-1} {t,2k+1} {t+2k+1,2k+1} {t+4k+2,2r} | 
101  |  |        v0       v1        hi(vinf)       tm1     (v2-vm1)/3    EMPTY */  | 
102  |  |  | 
103  |  |   /* (3) v1 <- t1 := v1 - v0    (1 1 1 1 1) - (0 0 0 0 1) = (1 1 1 1 0)  | 
104  |  |      t1 >= 0  | 
105  |  |   */  | 
106  | 2.29k  |   vinf[0] -= mpn_sub_n (v1, v1, c, twok);  | 
107  |  |  | 
108  |  |   /* {c,2k} {c+2k,2k+1} {c+4k+1,2r-1} {t,2k+1} {t+2k+1,2k+1} {t+4k+2,2r} | 
109  |  |        v0     v1-v0        hi(vinf)       tm1     (v2-vm1)/3    EMPTY */  | 
110  |  |  | 
111  |  |   /* (4) v2 <- t2 := ((v2-vm1)/3-t1)/2 = (v2-vm1-3*t1)/6  | 
112  |  |      t2 >= 0                  [(5 3 1 1 0) - (1 1 1 1 0)]/2 = (2 1 0 0 0)  | 
113  |  |   */  | 
114  | 2.29k  | #ifdef HAVE_NATIVE_mpn_rsh1sub_n  | 
115  | 2.29k  |   mpn_rsh1sub_n (v2, v2, v1, kk1);  | 
116  |  | #else  | 
117  |  |   ASSERT_NOCARRY (mpn_sub_n (v2, v2, v1, kk1));  | 
118  |  |   ASSERT_NOCARRY (mpn_rshift (v2, v2, kk1, 1));  | 
119  |  | #endif  | 
120  |  |  | 
121  |  |   /* {c,2k} {c+2k,2k+1} {c+4k+1,2r-1} {t,2k+1} {t+2k+1,2k+1} {t+4k+2,2r} | 
122  |  |        v0     v1-v0        hi(vinf)     tm1    (v2-vm1-3t1)/6    EMPTY */  | 
123  |  |  | 
124  |  |   /* (5) v1 <- t1-tm1           (1 1 1 1 0) - (0 1 0 1 0) = (1 0 1 0 0)  | 
125  |  |      result is v1 >= 0  | 
126  |  |   */  | 
127  | 2.29k  |   ASSERT_NOCARRY (mpn_sub_n (v1, v1, vm1, kk1));  | 
128  |  |  | 
129  |  |   /* We do not need to read the value in vm1, so we add it in {c+k, ...} */ | 
130  | 2.29k  |   cy = mpn_add_n (c1, c1, vm1, kk1);  | 
131  | 2.29k  |   MPN_INCR_U (c3 + 1, twor + k - 1, cy); /* 2n-(3k+1) = 2r+k-1 */  | 
132  |  |   /* Memory allocated for vm1 is now free, it can be recycled ...*/  | 
133  |  |  | 
134  |  |   /* (6) v2 <- v2 - 2*vinf,     (2 1 0 0 0) - 2*(1 0 0 0 0) = (0 1 0 0 0)  | 
135  |  |      result is v2 >= 0 */  | 
136  | 2.29k  |   saved = vinf[0];       /* Remember v1's highest byte (will be overwritten). */  | 
137  | 2.29k  |   vinf[0] = vinf0;       /* Set the right value for vinf0                     */  | 
138  | 2.29k  | #ifdef HAVE_NATIVE_mpn_sublsh1_n_ip1  | 
139  | 2.29k  |   cy = mpn_sublsh1_n_ip1 (v2, vinf, twor);  | 
140  |  | #else  | 
141  |  |   /* Overwrite unused vm1 */  | 
142  |  |   cy = mpn_lshift (vm1, vinf, twor, 1);  | 
143  |  |   cy += mpn_sub_n (v2, v2, vm1, twor);  | 
144  |  | #endif  | 
145  | 2.29k  |   MPN_DECR_U (v2 + twor, kk1 - twor, cy);  | 
146  |  |  | 
147  |  |   /* Current matrix is  | 
148  |  |      [1 0 0 0 0; vinf  | 
149  |  |       0 1 0 0 0; v2  | 
150  |  |       1 0 1 0 0; v1  | 
151  |  |       0 1 0 1 0; vm1  | 
152  |  |       0 0 0 0 1] v0  | 
153  |  |      Some values already are in-place (we added vm1 in the correct position)  | 
154  |  |      | vinf|  v1 |  v0 |  | 
155  |  |         | vm1 |  | 
156  |  |      One still is in a separated area  | 
157  |  |   | +v2 |  | 
158  |  |      We have to compute v1-=vinf; vm1 -= v2,  | 
159  |  |      |-vinf|  | 
160  |  |         | -v2 |  | 
161  |  |      Carefully reordering operations we can avoid to compute twice the sum  | 
162  |  |      of the high half of v2 plus the low half of vinf.  | 
163  |  |   */  | 
164  |  |  | 
165  |  |   /* Add the high half of t2 in {vinf} */ | 
166  | 2.29k  |   if ( LIKELY(twor > k + 1) ) { /* This is the expected flow  */ | 
167  | 2.29k  |     cy = mpn_add_n (vinf, vinf, v2 + k, k + 1);  | 
168  | 2.29k  |     MPN_INCR_U (c3 + kk1, twor - k - 1, cy); /* 2n-(5k+1) = 2r-k-1 */  | 
169  | 2.29k  |   } else { /* triggered only by very unbalanced cases like | 
170  |  |         (k+k+(k-2))x(k+k+1) , should be handled by toom32 */  | 
171  | 0  |     ASSERT_NOCARRY (mpn_add_n (vinf, vinf, v2 + k, twor));  | 
172  | 0  |   }  | 
173  |  |   /* (7) v1 <- v1 - vinf,       (1 0 1 0 0) - (1 0 0 0 0) = (0 0 1 0 0)  | 
174  |  |      result is >= 0 */  | 
175  |  |   /* Side effect: we also subtracted (high half) vm1 -= v2 */  | 
176  | 2.29k  |   cy = mpn_sub_n (v1, v1, vinf, twor);          /* vinf is at most twor long.  */  | 
177  | 2.29k  |   vinf0 = vinf[0];                     /* Save again the right value for vinf0 */  | 
178  | 2.29k  |   vinf[0] = saved;  | 
179  | 2.29k  |   MPN_DECR_U (v1 + twor, kk1 - twor, cy);       /* Treat the last bytes.       */  | 
180  |  |  | 
181  |  |   /* (8) vm1 <- vm1-v2          (0 1 0 1 0) - (0 1 0 0 0) = (0 0 0 1 0)  | 
182  |  |      Operate only on the low half.  | 
183  |  |   */  | 
184  | 2.29k  |   cy = mpn_sub_n (c1, c1, v2, k);  | 
185  | 2.29k  |   MPN_DECR_U (v1, kk1, cy);  | 
186  |  |  | 
187  |  |   /********************* Beginning the final phase **********************/  | 
188  |  |  | 
189  |  |   /* Most of the recomposition was done */  | 
190  |  |  | 
191  |  |   /* add t2 in {c+3k, ...}, but only the low half */ | 
192  | 2.29k  |   cy = mpn_add_n (c3, c3, v2, k);  | 
193  | 2.29k  |   vinf[0] += cy;  | 
194  | 2.29k  |   ASSERT(vinf[0] >= cy); /* No carry */  | 
195  | 2.29k  |   MPN_INCR_U (vinf, twor, vinf0); /* Add vinf0, propagate carry. */  | 
196  |  |  | 
197  | 2.29k  | #undef v0  | 
198  | 2.29k  | }  |