/src/gmp/mpn/toom_interpolate_5pts.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* mpn_toom_interpolate_5pts -- Interpolate for toom3, 33, 42. |
2 | | |
3 | | Contributed to the GNU project by Robert Harley. |
4 | | Improvements by Paul Zimmermann and Marco Bodrato. |
5 | | |
6 | | THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE. IT IS ONLY |
7 | | SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST |
8 | | GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE. |
9 | | |
10 | | Copyright 2000-2003, 2005-2007, 2009 Free Software Foundation, Inc. |
11 | | |
12 | | This file is part of the GNU MP Library. |
13 | | |
14 | | The GNU MP Library is free software; you can redistribute it and/or modify |
15 | | it under the terms of either: |
16 | | |
17 | | * the GNU Lesser General Public License as published by the Free |
18 | | Software Foundation; either version 3 of the License, or (at your |
19 | | option) any later version. |
20 | | |
21 | | or |
22 | | |
23 | | * the GNU General Public License as published by the Free Software |
24 | | Foundation; either version 2 of the License, or (at your option) any |
25 | | later version. |
26 | | |
27 | | or both in parallel, as here. |
28 | | |
29 | | The GNU MP Library is distributed in the hope that it will be useful, but |
30 | | WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY |
31 | | or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
32 | | for more details. |
33 | | |
34 | | You should have received copies of the GNU General Public License and the |
35 | | GNU Lesser General Public License along with the GNU MP Library. If not, |
36 | | see https://www.gnu.org/licenses/. */ |
37 | | |
38 | | #include "gmp-impl.h" |
39 | | |
40 | | void |
41 | | mpn_toom_interpolate_5pts (mp_ptr c, mp_ptr v2, mp_ptr vm1, |
42 | | mp_size_t k, mp_size_t twor, int sa, |
43 | | mp_limb_t vinf0) |
44 | 0 | { |
45 | 0 | mp_limb_t cy, saved; |
46 | 0 | mp_size_t twok; |
47 | 0 | mp_size_t kk1; |
48 | 0 | mp_ptr c1, v1, c3, vinf; |
49 | |
|
50 | 0 | twok = k + k; |
51 | 0 | kk1 = twok + 1; |
52 | |
|
53 | 0 | c1 = c + k; |
54 | 0 | v1 = c1 + k; |
55 | 0 | c3 = v1 + k; |
56 | 0 | vinf = c3 + k; |
57 | |
|
58 | 0 | #define v0 (c) |
59 | | /* (1) v2 <- v2-vm1 < v2+|vm1|, (16 8 4 2 1) - (1 -1 1 -1 1) = |
60 | | thus 0 <= v2 < 50*B^(2k) < 2^6*B^(2k) (15 9 3 3 0) |
61 | | */ |
62 | 0 | if (sa) |
63 | 0 | ASSERT_NOCARRY (mpn_add_n (v2, v2, vm1, kk1)); |
64 | 0 | else |
65 | 0 | ASSERT_NOCARRY (mpn_sub_n (v2, v2, vm1, kk1)); |
66 | | |
67 | | /* {c,2k} {c+2k,2k+1} {c+4k+1,2r-1} {t,2k+1} {t+2k+1,2k+1} {t+4k+2,2r} |
68 | | v0 v1 hi(vinf) |vm1| v2-vm1 EMPTY */ |
69 | |
|
70 | 0 | ASSERT_NOCARRY (mpn_divexact_by3 (v2, v2, kk1)); /* v2 <- v2 / 3 */ |
71 | | /* (5 3 1 1 0)*/ |
72 | | |
73 | | /* {c,2k} {c+2k,2k+1} {c+4k+1,2r-1} {t,2k+1} {t+2k+1,2k+1} {t+4k+2,2r} |
74 | | v0 v1 hi(vinf) |vm1| (v2-vm1)/3 EMPTY */ |
75 | | |
76 | | /* (2) vm1 <- tm1 := (v1 - vm1) / 2 [(1 1 1 1 1) - (1 -1 1 -1 1)] / 2 = |
77 | | tm1 >= 0 (0 1 0 1 0) |
78 | | No carry comes out from {v1, kk1} +/- {vm1, kk1}, |
79 | | and the division by two is exact. |
80 | | If (sa!=0) the sign of vm1 is negative */ |
81 | 0 | if (sa) |
82 | 0 | { |
83 | 0 | #ifdef HAVE_NATIVE_mpn_rsh1add_n |
84 | 0 | mpn_rsh1add_n (vm1, v1, vm1, kk1); |
85 | | #else |
86 | | ASSERT_NOCARRY (mpn_add_n (vm1, v1, vm1, kk1)); |
87 | | ASSERT_NOCARRY (mpn_rshift (vm1, vm1, kk1, 1)); |
88 | | #endif |
89 | 0 | } |
90 | 0 | else |
91 | 0 | { |
92 | 0 | #ifdef HAVE_NATIVE_mpn_rsh1sub_n |
93 | 0 | mpn_rsh1sub_n (vm1, v1, vm1, kk1); |
94 | | #else |
95 | | ASSERT_NOCARRY (mpn_sub_n (vm1, v1, vm1, kk1)); |
96 | | ASSERT_NOCARRY (mpn_rshift (vm1, vm1, kk1, 1)); |
97 | | #endif |
98 | 0 | } |
99 | | |
100 | | /* {c,2k} {c+2k,2k+1} {c+4k+1,2r-1} {t,2k+1} {t+2k+1,2k+1} {t+4k+2,2r} |
101 | | v0 v1 hi(vinf) tm1 (v2-vm1)/3 EMPTY */ |
102 | | |
103 | | /* (3) v1 <- t1 := v1 - v0 (1 1 1 1 1) - (0 0 0 0 1) = (1 1 1 1 0) |
104 | | t1 >= 0 |
105 | | */ |
106 | 0 | vinf[0] -= mpn_sub_n (v1, v1, c, twok); |
107 | | |
108 | | /* {c,2k} {c+2k,2k+1} {c+4k+1,2r-1} {t,2k+1} {t+2k+1,2k+1} {t+4k+2,2r} |
109 | | v0 v1-v0 hi(vinf) tm1 (v2-vm1)/3 EMPTY */ |
110 | | |
111 | | /* (4) v2 <- t2 := ((v2-vm1)/3-t1)/2 = (v2-vm1-3*t1)/6 |
112 | | t2 >= 0 [(5 3 1 1 0) - (1 1 1 1 0)]/2 = (2 1 0 0 0) |
113 | | */ |
114 | 0 | #ifdef HAVE_NATIVE_mpn_rsh1sub_n |
115 | 0 | mpn_rsh1sub_n (v2, v2, v1, kk1); |
116 | | #else |
117 | | ASSERT_NOCARRY (mpn_sub_n (v2, v2, v1, kk1)); |
118 | | ASSERT_NOCARRY (mpn_rshift (v2, v2, kk1, 1)); |
119 | | #endif |
120 | | |
121 | | /* {c,2k} {c+2k,2k+1} {c+4k+1,2r-1} {t,2k+1} {t+2k+1,2k+1} {t+4k+2,2r} |
122 | | v0 v1-v0 hi(vinf) tm1 (v2-vm1-3t1)/6 EMPTY */ |
123 | | |
124 | | /* (5) v1 <- t1-tm1 (1 1 1 1 0) - (0 1 0 1 0) = (1 0 1 0 0) |
125 | | result is v1 >= 0 |
126 | | */ |
127 | 0 | ASSERT_NOCARRY (mpn_sub_n (v1, v1, vm1, kk1)); |
128 | | |
129 | | /* We do not need to read the value in vm1, so we add it in {c+k, ...} */ |
130 | 0 | cy = mpn_add_n (c1, c1, vm1, kk1); |
131 | 0 | MPN_INCR_U (c3 + 1, twor + k - 1, cy); /* 2n-(3k+1) = 2r+k-1 */ |
132 | | /* Memory allocated for vm1 is now free, it can be recycled ...*/ |
133 | | |
134 | | /* (6) v2 <- v2 - 2*vinf, (2 1 0 0 0) - 2*(1 0 0 0 0) = (0 1 0 0 0) |
135 | | result is v2 >= 0 */ |
136 | 0 | saved = vinf[0]; /* Remember v1's highest byte (will be overwritten). */ |
137 | 0 | vinf[0] = vinf0; /* Set the right value for vinf0 */ |
138 | 0 | #ifdef HAVE_NATIVE_mpn_sublsh1_n_ip1 |
139 | 0 | cy = mpn_sublsh1_n_ip1 (v2, vinf, twor); |
140 | | #else |
141 | | /* Overwrite unused vm1 */ |
142 | | cy = mpn_lshift (vm1, vinf, twor, 1); |
143 | | cy += mpn_sub_n (v2, v2, vm1, twor); |
144 | | #endif |
145 | 0 | MPN_DECR_U (v2 + twor, kk1 - twor, cy); |
146 | | |
147 | | /* Current matrix is |
148 | | [1 0 0 0 0; vinf |
149 | | 0 1 0 0 0; v2 |
150 | | 1 0 1 0 0; v1 |
151 | | 0 1 0 1 0; vm1 |
152 | | 0 0 0 0 1] v0 |
153 | | Some values already are in-place (we added vm1 in the correct position) |
154 | | | vinf| v1 | v0 | |
155 | | | vm1 | |
156 | | One still is in a separated area |
157 | | | +v2 | |
158 | | We have to compute v1-=vinf; vm1 -= v2, |
159 | | |-vinf| |
160 | | | -v2 | |
161 | | Carefully reordering operations we can avoid to compute twice the sum |
162 | | of the high half of v2 plus the low half of vinf. |
163 | | */ |
164 | | |
165 | | /* Add the high half of t2 in {vinf} */ |
166 | 0 | if ( LIKELY(twor > k + 1) ) { /* This is the expected flow */ |
167 | 0 | cy = mpn_add_n (vinf, vinf, v2 + k, k + 1); |
168 | 0 | MPN_INCR_U (c3 + kk1, twor - k - 1, cy); /* 2n-(5k+1) = 2r-k-1 */ |
169 | 0 | } else { /* triggered only by very unbalanced cases like |
170 | | (k+k+(k-2))x(k+k+1) , should be handled by toom32 */ |
171 | 0 | ASSERT_NOCARRY (mpn_add_n (vinf, vinf, v2 + k, twor)); |
172 | 0 | } |
173 | | /* (7) v1 <- v1 - vinf, (1 0 1 0 0) - (1 0 0 0 0) = (0 0 1 0 0) |
174 | | result is >= 0 */ |
175 | | /* Side effect: we also subtracted (high half) vm1 -= v2 */ |
176 | 0 | cy = mpn_sub_n (v1, v1, vinf, twor); /* vinf is at most twor long. */ |
177 | 0 | vinf0 = vinf[0]; /* Save again the right value for vinf0 */ |
178 | 0 | vinf[0] = saved; |
179 | 0 | MPN_DECR_U (v1 + twor, kk1 - twor, cy); /* Treat the last bytes. */ |
180 | | |
181 | | /* (8) vm1 <- vm1-v2 (0 1 0 1 0) - (0 1 0 0 0) = (0 0 0 1 0) |
182 | | Operate only on the low half. |
183 | | */ |
184 | 0 | cy = mpn_sub_n (c1, c1, v2, k); |
185 | 0 | MPN_DECR_U (v1, kk1, cy); |
186 | | |
187 | | /********************* Beginning the final phase **********************/ |
188 | | |
189 | | /* Most of the recomposition was done */ |
190 | | |
191 | | /* add t2 in {c+3k, ...}, but only the low half */ |
192 | 0 | cy = mpn_add_n (c3, c3, v2, k); |
193 | 0 | vinf[0] += cy; |
194 | 0 | ASSERT(vinf[0] >= cy); /* No carry */ |
195 | 0 | MPN_INCR_U (vinf, twor, vinf0); /* Add vinf0, propagate carry. */ |
196 | |
|
197 | 0 | #undef v0 |
198 | 0 | } |