/src/gmp/mpn/toom22_mul.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* mpn_toom22_mul -- Multiply {ap,an} and {bp,bn} where an >= bn. Or more |
2 | | accurately, bn <= an < 2bn. |
3 | | |
4 | | Contributed to the GNU project by Torbjorn Granlund. |
5 | | |
6 | | THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE. IT IS ONLY |
7 | | SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST |
8 | | GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE. |
9 | | |
10 | | Copyright 2006-2010, 2012, 2014, 2018, 2020 Free Software Foundation, Inc. |
11 | | |
12 | | This file is part of the GNU MP Library. |
13 | | |
14 | | The GNU MP Library is free software; you can redistribute it and/or modify |
15 | | it under the terms of either: |
16 | | |
17 | | * the GNU Lesser General Public License as published by the Free |
18 | | Software Foundation; either version 3 of the License, or (at your |
19 | | option) any later version. |
20 | | |
21 | | or |
22 | | |
23 | | * the GNU General Public License as published by the Free Software |
24 | | Foundation; either version 2 of the License, or (at your option) any |
25 | | later version. |
26 | | |
27 | | or both in parallel, as here. |
28 | | |
29 | | The GNU MP Library is distributed in the hope that it will be useful, but |
30 | | WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY |
31 | | or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
32 | | for more details. |
33 | | |
34 | | You should have received copies of the GNU General Public License and the |
35 | | GNU Lesser General Public License along with the GNU MP Library. If not, |
36 | | see https://www.gnu.org/licenses/. */ |
37 | | |
38 | | |
39 | | #include "gmp-impl.h" |
40 | | |
41 | | /* Evaluate in: -1, 0, +inf |
42 | | |
43 | | <-s--><--n--> |
44 | | ____ ______ |
45 | | |_a1_|___a0_| |
46 | | |b1_|___b0_| |
47 | | <-t-><--n--> |
48 | | |
49 | | v0 = a0 * b0 # A(0)*B(0) |
50 | | vm1 = (a0- a1)*(b0- b1) # A(-1)*B(-1) |
51 | | vinf= a1 * b1 # A(inf)*B(inf) |
52 | | */ |
53 | | |
54 | | #if TUNE_PROGRAM_BUILD || WANT_FAT_BINARY |
55 | | #define MAYBE_mul_toom22 1 |
56 | | #else |
57 | | #define MAYBE_mul_toom22 \ |
58 | 0 | (MUL_TOOM33_THRESHOLD >= 2 * MUL_TOOM22_THRESHOLD) |
59 | | #endif |
60 | | |
61 | | #define TOOM22_MUL_N_REC(p, a, b, n, ws) \ |
62 | 0 | do { \ |
63 | 0 | if (! MAYBE_mul_toom22 \ |
64 | 0 | || BELOW_THRESHOLD (n, MUL_TOOM22_THRESHOLD)) \ |
65 | 0 | mpn_mul_basecase (p, a, n, b, n); \ |
66 | 0 | else \ |
67 | 0 | mpn_toom22_mul (p, a, n, b, n, ws); \ |
68 | 0 | } while (0) |
69 | | |
70 | | /* Normally, this calls mul_basecase or toom22_mul. But when when the fraction |
71 | | MUL_TOOM33_THRESHOLD / MUL_TOOM22_THRESHOLD is large, an initially small |
72 | | relative unbalance will become a larger and larger relative unbalance with |
73 | | each recursion (the difference s-t will be invariant over recursive calls). |
74 | | Therefore, we need to call toom32_mul. FIXME: Suppress depending on |
75 | | MUL_TOOM33_THRESHOLD / MUL_TOOM22_THRESHOLD and on MUL_TOOM22_THRESHOLD. */ |
76 | | #define TOOM22_MUL_REC(p, a, an, b, bn, ws) \ |
77 | 0 | do { \ |
78 | 0 | if (! MAYBE_mul_toom22 \ |
79 | 0 | || BELOW_THRESHOLD (bn, MUL_TOOM22_THRESHOLD)) \ |
80 | 0 | mpn_mul_basecase (p, a, an, b, bn); \ |
81 | 0 | else if (4 * an < 5 * bn) \ |
82 | 0 | mpn_toom22_mul (p, a, an, b, bn, ws); \ |
83 | 0 | else \ |
84 | 0 | mpn_toom32_mul (p, a, an, b, bn, ws); \ |
85 | 0 | } while (0) |
86 | | |
87 | | void |
88 | | mpn_toom22_mul (mp_ptr pp, |
89 | | mp_srcptr ap, mp_size_t an, |
90 | | mp_srcptr bp, mp_size_t bn, |
91 | | mp_ptr scratch) |
92 | 0 | { |
93 | 0 | const int __gmpn_cpuvec_initialized = 1; |
94 | 0 | mp_size_t n, s, t; |
95 | 0 | int vm1_neg; |
96 | 0 | mp_limb_t cy, cy2; |
97 | 0 | mp_ptr asm1; |
98 | 0 | mp_ptr bsm1; |
99 | |
|
100 | 0 | #define a0 ap |
101 | 0 | #define a1 (ap + n) |
102 | 0 | #define b0 bp |
103 | 0 | #define b1 (bp + n) |
104 | |
|
105 | 0 | s = an >> 1; |
106 | 0 | n = an - s; |
107 | 0 | t = bn - n; |
108 | |
|
109 | 0 | ASSERT (an >= bn); |
110 | |
|
111 | 0 | ASSERT (0 < s && s <= n && (n - s) == (an & 1)); |
112 | 0 | ASSERT (0 < t && t <= s); |
113 | |
|
114 | 0 | asm1 = pp; |
115 | 0 | bsm1 = pp + n; |
116 | |
|
117 | 0 | vm1_neg = 0; |
118 | | |
119 | | /* Compute asm1. */ |
120 | 0 | if ((an & 1) == 0) /* s == n */ |
121 | 0 | { |
122 | 0 | if (mpn_cmp (a0, a1, n) < 0) |
123 | 0 | { |
124 | 0 | mpn_sub_n (asm1, a1, a0, n); |
125 | 0 | vm1_neg = 1; |
126 | 0 | } |
127 | 0 | else |
128 | 0 | { |
129 | 0 | mpn_sub_n (asm1, a0, a1, n); |
130 | 0 | } |
131 | 0 | } |
132 | 0 | else /* n - s == 1 */ |
133 | 0 | { |
134 | 0 | if (a0[s] == 0 && mpn_cmp (a0, a1, s) < 0) |
135 | 0 | { |
136 | 0 | mpn_sub_n (asm1, a1, a0, s); |
137 | 0 | asm1[s] = 0; |
138 | 0 | vm1_neg = 1; |
139 | 0 | } |
140 | 0 | else |
141 | 0 | { |
142 | 0 | asm1[s] = a0[s] - mpn_sub_n (asm1, a0, a1, s); |
143 | 0 | } |
144 | 0 | } |
145 | | |
146 | | /* Compute bsm1. */ |
147 | 0 | if (t == n) |
148 | 0 | { |
149 | 0 | if (mpn_cmp (b0, b1, n) < 0) |
150 | 0 | { |
151 | 0 | mpn_sub_n (bsm1, b1, b0, n); |
152 | 0 | vm1_neg ^= 1; |
153 | 0 | } |
154 | 0 | else |
155 | 0 | { |
156 | 0 | mpn_sub_n (bsm1, b0, b1, n); |
157 | 0 | } |
158 | 0 | } |
159 | 0 | else |
160 | 0 | { |
161 | 0 | if (mpn_zero_p (b0 + t, n - t) && mpn_cmp (b0, b1, t) < 0) |
162 | 0 | { |
163 | 0 | mpn_sub_n (bsm1, b1, b0, t); |
164 | 0 | MPN_ZERO (bsm1 + t, n - t); |
165 | 0 | vm1_neg ^= 1; |
166 | 0 | } |
167 | 0 | else |
168 | 0 | { |
169 | 0 | mpn_sub (bsm1, b0, n, b1, t); |
170 | 0 | } |
171 | 0 | } |
172 | |
|
173 | 0 | #define v0 pp /* 2n */ |
174 | 0 | #define vinf (pp + 2 * n) /* s+t */ |
175 | 0 | #define vm1 scratch /* 2n */ |
176 | 0 | #define scratch_out scratch + 2 * n |
177 | | |
178 | | /* vm1, 2n limbs */ |
179 | 0 | TOOM22_MUL_N_REC (vm1, asm1, bsm1, n, scratch_out); |
180 | |
|
181 | 0 | if (s > t) TOOM22_MUL_REC (vinf, a1, s, b1, t, scratch_out); |
182 | 0 | else TOOM22_MUL_N_REC (vinf, a1, b1, s, scratch_out); |
183 | | |
184 | | /* v0, 2n limbs */ |
185 | 0 | TOOM22_MUL_N_REC (v0, ap, bp, n, scratch_out); |
186 | | |
187 | | /* H(v0) + L(vinf) */ |
188 | 0 | cy = mpn_add_n (pp + 2 * n, v0 + n, vinf, n); |
189 | | |
190 | | /* L(v0) + (H(v0) + L(vinf)) */ |
191 | 0 | cy2 = cy + mpn_add_n (pp + n, pp + 2 * n, v0, n); |
192 | | |
193 | | /* (H(v0) + L(vinf)) + H(vinf) */ |
194 | 0 | cy += mpn_add (pp + 2 * n, pp + 2 * n, n, vinf + n, s + t - n); |
195 | |
|
196 | 0 | if (vm1_neg) |
197 | 0 | cy += mpn_add_n (pp + n, pp + n, vm1, 2 * n); |
198 | 0 | else { |
199 | 0 | cy -= mpn_sub_n (pp + n, pp + n, vm1, 2 * n); |
200 | 0 | if (UNLIKELY (cy + 1 == 0)) { /* cy is negative */ |
201 | | /* The total contribution of v0+vinf-vm1 can not be negative. */ |
202 | | #if WANT_ASSERT |
203 | | /* The borrow in cy stops the propagation of the carry cy2, */ |
204 | | ASSERT (cy2 == 1); |
205 | | cy += mpn_add_1 (pp + 2 * n, pp + 2 * n, n, cy2); |
206 | | ASSERT (cy == 0); |
207 | | #else |
208 | | /* we simply fill the area with zeros. */ |
209 | 0 | MPN_FILL (pp + 2 * n, n, 0); |
210 | | /* ASSERT (s + t == n || mpn_zero_p (pp + 3 * n, s + t - n)); */ |
211 | 0 | #endif |
212 | 0 | return; |
213 | 0 | } |
214 | 0 | } |
215 | | |
216 | 0 | ASSERT (cy <= 2); |
217 | 0 | ASSERT (cy2 <= 2); |
218 | |
|
219 | 0 | MPN_INCR_U (pp + 2 * n, s + t, cy2); |
220 | | /* if s+t==n, cy is zero, but we should not access pp[3*n] at all. */ |
221 | 0 | MPN_INCR_U (pp + 3 * n, s + t - n, cy); |
222 | 0 | } |