/src/libgmp/mpn/toom63_mul.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* Implementation of the algorithm for Toom-Cook 4.5-way. |
2 | | |
3 | | Contributed to the GNU project by Marco Bodrato. |
4 | | |
5 | | THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE. IT IS ONLY |
6 | | SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST |
7 | | GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE. |
8 | | |
9 | | Copyright 2009, 2012 Free Software Foundation, Inc. |
10 | | |
11 | | This file is part of the GNU MP Library. |
12 | | |
13 | | The GNU MP Library is free software; you can redistribute it and/or modify |
14 | | it under the terms of either: |
15 | | |
16 | | * the GNU Lesser General Public License as published by the Free |
17 | | Software Foundation; either version 3 of the License, or (at your |
18 | | option) any later version. |
19 | | |
20 | | or |
21 | | |
22 | | * the GNU General Public License as published by the Free Software |
23 | | Foundation; either version 2 of the License, or (at your option) any |
24 | | later version. |
25 | | |
26 | | or both in parallel, as here. |
27 | | |
28 | | The GNU MP Library is distributed in the hope that it will be useful, but |
29 | | WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY |
30 | | or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
31 | | for more details. |
32 | | |
33 | | You should have received copies of the GNU General Public License and the |
34 | | GNU Lesser General Public License along with the GNU MP Library. If not, |
35 | | see https://www.gnu.org/licenses/. */ |
36 | | |
37 | | |
38 | | #include "gmp-impl.h" |
39 | | |
40 | | /* Stores |{ap,n}-{bp,n}| in {rp,n}. */ |
41 | | /* It returns 0 or ~0, depending on the sign of the result. */ |
42 | | static unsigned |
43 | | abs_sub_n (mp_ptr rp, mp_srcptr ap, mp_srcptr bp, mp_size_t n) |
44 | 308 | { |
45 | 308 | mp_limb_t x, y; |
46 | 389 | while (--n >= 0) |
47 | 389 | { |
48 | 389 | x = ap[n]; |
49 | 389 | y = bp[n]; |
50 | 389 | if (x != y) |
51 | 308 | { |
52 | 308 | n++; |
53 | 308 | if (x > y) |
54 | 33 | { |
55 | 33 | mpn_sub_n (rp, ap, bp, n); |
56 | 33 | return 0; |
57 | 33 | } |
58 | 275 | else |
59 | 275 | { |
60 | 275 | mpn_sub_n (rp, bp, ap, n); |
61 | 275 | return ~ (unsigned) 0; |
62 | 275 | } |
63 | 308 | } |
64 | 81 | rp[n] = 0; |
65 | 81 | } |
66 | 0 | return 0; |
67 | 308 | } |
68 | | |
69 | | /* It returns 0 or ~0, depending on the sign of the result rm. */ |
70 | | static unsigned |
71 | 308 | abs_sub_add_n (mp_ptr rm, mp_ptr rp, mp_srcptr rs, mp_size_t n) { |
72 | 308 | unsigned result; |
73 | 308 | result = abs_sub_n (rm, rp, rs, n); |
74 | 308 | ASSERT_NOCARRY(mpn_add_n (rp, rp, rs, n)); |
75 | 308 | return result; |
76 | 308 | } |
77 | | |
78 | | |
79 | | /* Toom-4.5, the splitting 6x3 unbalanced version. |
80 | | Evaluate in: infinity, +4, -4, +2, -2, +1, -1, 0. |
81 | | |
82 | | <--s-><--n--><--n--><--n--><--n--><--n--> |
83 | | ____ ______ ______ ______ ______ ______ |
84 | | |_a5_|__a4__|__a3__|__a2__|__a1__|__a0__| |
85 | | |b2_|__b1__|__b0__| |
86 | | <-t-><--n--><--n--> |
87 | | |
88 | | */ |
89 | | #define TOOM_63_MUL_N_REC(p, a, b, n, ws) \ |
90 | 1.07k | do { mpn_mul_n (p, a, b, n); \ |
91 | 1.07k | } while (0) |
92 | | |
93 | | #define TOOM_63_MUL_REC(p, a, na, b, nb, ws) \ |
94 | 154 | do { mpn_mul (p, a, na, b, nb); \ |
95 | 154 | } while (0) |
96 | | |
97 | | void |
98 | | mpn_toom63_mul (mp_ptr pp, |
99 | | mp_srcptr ap, mp_size_t an, |
100 | | mp_srcptr bp, mp_size_t bn, mp_ptr scratch) |
101 | 154 | { |
102 | 154 | mp_size_t n, s, t; |
103 | 154 | mp_limb_t cy; |
104 | 154 | unsigned sign; |
105 | | |
106 | | /***************************** decomposition *******************************/ |
107 | 154 | #define a5 (ap + 5 * n) |
108 | 462 | #define b0 (bp + 0 * n) |
109 | 770 | #define b1 (bp + 1 * n) |
110 | 462 | #define b2 (bp + 2 * n) |
111 | | |
112 | 154 | ASSERT (an >= bn); |
113 | 154 | n = 1 + (an >= 2 * bn ? (an - 1) / (size_t) 6 : (bn - 1) / (size_t) 3); |
114 | | |
115 | 154 | s = an - 5 * n; |
116 | 154 | t = bn - 2 * n; |
117 | | |
118 | 154 | ASSERT (0 < s && s <= n); |
119 | 154 | ASSERT (0 < t && t <= n); |
120 | | /* WARNING! it assumes s+t>=n */ |
121 | 154 | ASSERT ( s + t >= n ); |
122 | 154 | ASSERT ( s + t > 4); |
123 | | /* WARNING! it assumes n>1 */ |
124 | 154 | ASSERT ( n > 2); |
125 | | |
126 | 154 | #define r8 pp /* 2n */ |
127 | 308 | #define r7 scratch /* 3n+1 */ |
128 | 154 | #define r5 (pp + 3*n) /* 3n+1 */ |
129 | 462 | #define v0 (pp + 3*n) /* n+1 */ |
130 | 616 | #define v1 (pp + 4*n+1) /* n+1 */ |
131 | 462 | #define v2 (pp + 5*n+2) /* n+1 */ |
132 | 2.15k | #define v3 (pp + 6*n+3) /* n+1 */ |
133 | 308 | #define r3 (scratch + 3 * n + 1) /* 3n+1 */ |
134 | 154 | #define r1 (pp + 7*n) /* s+t <= 2*n */ |
135 | 770 | #define ws (scratch + 6 * n + 2) /* ??? */ |
136 | | |
137 | | /* Alloc also 3n+1 limbs for ws... mpn_toom_interpolate_8pts may |
138 | | need all of them, when DO_mpn_sublsh_n usea a scratch */ |
139 | | /* if (scratch == NULL) scratch = TMP_SALLOC_LIMBS (9 * n + 3); */ |
140 | | |
141 | | /********************** evaluation and recursive calls *********************/ |
142 | | /* $\pm4$ */ |
143 | 154 | sign = mpn_toom_eval_pm2exp (v2, v0, 5, ap, n, s, 2, pp); |
144 | 154 | pp[n] = mpn_lshift (pp, b1, n, 2); /* 4b1 */ |
145 | | /* FIXME: use addlsh */ |
146 | 154 | v3[t] = mpn_lshift (v3, b2, t, 4);/* 16b2 */ |
147 | 154 | if ( n == t ) |
148 | 4 | v3[n]+= mpn_add_n (v3, v3, b0, n); /* 16b2+b0 */ |
149 | 150 | else |
150 | 150 | v3[n] = mpn_add (v3, b0, n, v3, t+1); /* 16b2+b0 */ |
151 | 154 | sign ^= abs_sub_add_n (v1, v3, pp, n + 1); |
152 | 154 | TOOM_63_MUL_N_REC(pp, v0, v1, n + 1, ws); /* A(-4)*B(-4) */ |
153 | 154 | TOOM_63_MUL_N_REC(r3, v2, v3, n + 1, ws); /* A(+4)*B(+4) */ |
154 | 154 | mpn_toom_couple_handling (r3, 2*n+1, pp, sign, n, 2, 4); |
155 | | |
156 | | /* $\pm1$ */ |
157 | 154 | sign = mpn_toom_eval_pm1 (v2, v0, 5, ap, n, s, pp); |
158 | | /* Compute bs1 and bsm1. Code taken from toom33 */ |
159 | 154 | cy = mpn_add (ws, b0, n, b2, t); |
160 | | #if HAVE_NATIVE_mpn_add_n_sub_n |
161 | | if (cy == 0 && mpn_cmp (ws, b1, n) < 0) |
162 | | { |
163 | | cy = mpn_add_n_sub_n (v3, v1, b1, ws, n); |
164 | | v3[n] = cy >> 1; |
165 | | v1[n] = 0; |
166 | | sign = ~sign; |
167 | | } |
168 | | else |
169 | | { |
170 | | mp_limb_t cy2; |
171 | | cy2 = mpn_add_n_sub_n (v3, v1, ws, b1, n); |
172 | | v3[n] = cy + (cy2 >> 1); |
173 | | v1[n] = cy - (cy2 & 1); |
174 | | } |
175 | | #else |
176 | 154 | v3[n] = cy + mpn_add_n (v3, ws, b1, n); |
177 | 154 | if (cy == 0 && mpn_cmp (ws, b1, n) < 0) |
178 | 64 | { |
179 | 64 | mpn_sub_n (v1, b1, ws, n); |
180 | 64 | v1[n] = 0; |
181 | 64 | sign = ~sign; |
182 | 64 | } |
183 | 90 | else |
184 | 90 | { |
185 | 90 | cy -= mpn_sub_n (v1, ws, b1, n); |
186 | 90 | v1[n] = cy; |
187 | 90 | } |
188 | 154 | #endif |
189 | 154 | TOOM_63_MUL_N_REC(pp, v0, v1, n + 1, ws); /* A(-1)*B(-1) */ |
190 | 154 | TOOM_63_MUL_N_REC(r7, v2, v3, n + 1, ws); /* A(1)*B(1) */ |
191 | 154 | mpn_toom_couple_handling (r7, 2*n+1, pp, sign, n, 0, 0); |
192 | | |
193 | | /* $\pm2$ */ |
194 | 154 | sign = mpn_toom_eval_pm2 (v2, v0, 5, ap, n, s, pp); |
195 | 154 | pp[n] = mpn_lshift (pp, b1, n, 1); /* 2b1 */ |
196 | | /* FIXME: use addlsh or addlsh2 */ |
197 | 154 | v3[t] = mpn_lshift (v3, b2, t, 2);/* 4b2 */ |
198 | 154 | if ( n == t ) |
199 | 4 | v3[n]+= mpn_add_n (v3, v3, b0, n); /* 4b2+b0 */ |
200 | 150 | else |
201 | 150 | v3[n] = mpn_add (v3, b0, n, v3, t+1); /* 4b2+b0 */ |
202 | 154 | sign ^= abs_sub_add_n (v1, v3, pp, n + 1); |
203 | 154 | TOOM_63_MUL_N_REC(pp, v0, v1, n + 1, ws); /* A(-2)*B(-2) */ |
204 | 154 | TOOM_63_MUL_N_REC(r5, v2, v3, n + 1, ws); /* A(+2)*B(+2) */ |
205 | 154 | mpn_toom_couple_handling (r5, 2*n+1, pp, sign, n, 1, 2); |
206 | | |
207 | | /* A(0)*B(0) */ |
208 | 154 | TOOM_63_MUL_N_REC(pp, ap, bp, n, ws); |
209 | | |
210 | | /* Infinity */ |
211 | 154 | if (s > t) { |
212 | 1 | TOOM_63_MUL_REC(r1, a5, s, b2, t, ws); |
213 | 153 | } else { |
214 | 153 | TOOM_63_MUL_REC(r1, b2, t, a5, s, ws); |
215 | 153 | }; |
216 | | |
217 | 154 | mpn_toom_interpolate_8pts (pp, n, r3, r7, s + t, ws); |
218 | | |
219 | 154 | #undef a5 |
220 | 154 | #undef b0 |
221 | 154 | #undef b1 |
222 | 154 | #undef b2 |
223 | 154 | #undef r1 |
224 | 154 | #undef r3 |
225 | 154 | #undef r5 |
226 | 154 | #undef v0 |
227 | 154 | #undef v1 |
228 | 154 | #undef v2 |
229 | 154 | #undef v3 |
230 | 154 | #undef r7 |
231 | 154 | #undef r8 |
232 | 154 | #undef ws |
233 | 154 | } |