/src/gmp/mpn/sqr_basecase.c
Line | Count | Source |
1 | | /* mpn_sqr_basecase -- Internal routine to square a natural number |
2 | | of length n. |
3 | | |
4 | | THIS IS AN INTERNAL FUNCTION WITH A MUTABLE INTERFACE. IT IS ONLY |
5 | | SAFE TO REACH THIS FUNCTION THROUGH DOCUMENTED INTERFACES. |
6 | | |
7 | | |
8 | | Copyright 1991-1994, 1996, 1997, 2000-2005, 2008, 2010, 2011, 2017 Free |
9 | | Software Foundation, Inc. |
10 | | |
11 | | This file is part of the GNU MP Library. |
12 | | |
13 | | The GNU MP Library is free software; you can redistribute it and/or modify |
14 | | it under the terms of either: |
15 | | |
16 | | * the GNU Lesser General Public License as published by the Free |
17 | | Software Foundation; either version 3 of the License, or (at your |
18 | | option) any later version. |
19 | | |
20 | | or |
21 | | |
22 | | * the GNU General Public License as published by the Free Software |
23 | | Foundation; either version 2 of the License, or (at your option) any |
24 | | later version. |
25 | | |
26 | | or both in parallel, as here. |
27 | | |
28 | | The GNU MP Library is distributed in the hope that it will be useful, but |
29 | | WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY |
30 | | or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
31 | | for more details. |
32 | | |
33 | | You should have received copies of the GNU General Public License and the |
34 | | GNU Lesser General Public License along with the GNU MP Library. If not, |
35 | | see https://www.gnu.org/licenses/. */ |
36 | | |
37 | | #include "gmp-impl.h" |
38 | | #include "longlong.h" |
39 | | |
40 | | |
41 | | #if HAVE_NATIVE_mpn_sqr_diagonal |
42 | | #define MPN_SQR_DIAGONAL(rp, up, n) \ |
43 | | mpn_sqr_diagonal (rp, up, n) |
44 | | #else |
45 | | #define MPN_SQR_DIAGONAL(rp, up, n) \ |
46 | 218M | do { \ |
47 | 218M | mp_size_t _i; \ |
48 | 4.15G | for (_i = 0; _i < (n); _i++) \ |
49 | 3.93G | { \ |
50 | 3.93G | mp_limb_t ul, lpl; \ |
51 | 3.93G | ul = (up)[_i]; \ |
52 | 3.93G | umul_ppmm ((rp)[2 * _i + 1], lpl, ul, ul << GMP_NAIL_BITS); \ |
53 | 3.93G | (rp)[2 * _i] = lpl >> GMP_NAIL_BITS; \ |
54 | 3.93G | } \ |
55 | 218M | } while (0) |
56 | | #endif |
57 | | |
58 | | #if HAVE_NATIVE_mpn_sqr_diag_addlsh1 |
59 | | #define MPN_SQR_DIAG_ADDLSH1(rp, tp, up, n) \ |
60 | | mpn_sqr_diag_addlsh1 (rp, tp, up, n) |
61 | | #else |
62 | | #if HAVE_NATIVE_mpn_addlsh1_n |
63 | | #define MPN_SQR_DIAG_ADDLSH1(rp, tp, up, n) \ |
64 | | do { \ |
65 | | mp_limb_t cy; \ |
66 | | MPN_SQR_DIAGONAL (rp, up, n); \ |
67 | | cy = mpn_addlsh1_n (rp + 1, rp + 1, tp, 2 * n - 2); \ |
68 | | rp[2 * n - 1] += cy; \ |
69 | | } while (0) |
70 | | #else |
71 | | #define MPN_SQR_DIAG_ADDLSH1(rp, tp, up, n) \ |
72 | 218M | do { \ |
73 | 218M | mp_limb_t cy; \ |
74 | 218M | MPN_SQR_DIAGONAL (rp, up, n); \ |
75 | 218M | cy = mpn_lshift (tp, tp, 2 * n - 2, 1); \ |
76 | 218M | cy += mpn_add_n (rp + 1, rp + 1, tp, 2 * n - 2); \ |
77 | 218M | rp[2 * n - 1] += cy; \ |
78 | 218M | } while (0) |
79 | | #endif |
80 | | #endif |
81 | | |
82 | | |
83 | | #undef READY_WITH_mpn_sqr_basecase |
84 | | |
85 | | |
86 | | #if ! defined (READY_WITH_mpn_sqr_basecase) && HAVE_NATIVE_mpn_addmul_2s |
87 | | void |
88 | | mpn_sqr_basecase (mp_ptr rp, mp_srcptr up, mp_size_t n) |
89 | | { |
90 | | mp_size_t i; |
91 | | mp_limb_t tarr[2 * SQR_TOOM2_THRESHOLD]; |
92 | | mp_ptr tp = tarr; |
93 | | mp_limb_t cy; |
94 | | |
95 | | /* must fit 2*n limbs in tarr */ |
96 | | ASSERT (n <= SQR_TOOM2_THRESHOLD); |
97 | | |
98 | | if ((n & 1) != 0) |
99 | | { |
100 | | if (n == 1) |
101 | | { |
102 | | mp_limb_t ul, lpl; |
103 | | ul = up[0]; |
104 | | umul_ppmm (rp[1], lpl, ul, ul << GMP_NAIL_BITS); |
105 | | rp[0] = lpl >> GMP_NAIL_BITS; |
106 | | return; |
107 | | } |
108 | | |
109 | | MPN_ZERO (tp, n); |
110 | | |
111 | | for (i = 0; i <= n - 2; i += 2) |
112 | | { |
113 | | cy = mpn_addmul_2s (tp + 2 * i, up + i + 1, n - (i + 1), up + i); |
114 | | tp[n + i] = cy; |
115 | | } |
116 | | } |
117 | | else |
118 | | { |
119 | | if (n == 2) |
120 | | { |
121 | | #if HAVE_NATIVE_mpn_mul_2 |
122 | | rp[3] = mpn_mul_2 (rp, up, 2, up); |
123 | | #else |
124 | | rp[0] = 0; |
125 | | rp[1] = 0; |
126 | | rp[3] = mpn_addmul_2 (rp, up, 2, up); |
127 | | #endif |
128 | | return; |
129 | | } |
130 | | |
131 | | MPN_ZERO (tp, n); |
132 | | |
133 | | for (i = 0; i <= n - 4; i += 2) |
134 | | { |
135 | | cy = mpn_addmul_2s (tp + 2 * i, up + i + 1, n - (i + 1), up + i); |
136 | | tp[n + i] = cy; |
137 | | } |
138 | | cy = mpn_addmul_1 (tp + 2 * n - 4, up + n - 1, 1, up[n - 2]); |
139 | | tp[2 * n - 3] = cy; |
140 | | } |
141 | | |
142 | | MPN_SQR_DIAG_ADDLSH1 (rp, tp, up, n); |
143 | | } |
144 | | #define READY_WITH_mpn_sqr_basecase |
145 | | #endif |
146 | | |
147 | | |
148 | | #if ! defined (READY_WITH_mpn_sqr_basecase) && HAVE_NATIVE_mpn_addmul_2 |
149 | | |
150 | | /* mpn_sqr_basecase using plain mpn_addmul_2. |
151 | | |
152 | | This is tricky, since we have to let mpn_addmul_2 make some undesirable |
153 | | multiplies, u[k]*u[k], that we would like to let mpn_sqr_diagonal handle. |
154 | | This forces us to conditionally add or subtract the mpn_sqr_diagonal |
155 | | results. Examples of the product we form: |
156 | | |
157 | | n = 4 n = 5 n = 6 |
158 | | u1u0 * u3u2u1 u1u0 * u4u3u2u1 u1u0 * u5u4u3u2u1 |
159 | | u2 * u3 u3u2 * u4u3 u3u2 * u5u4u3 |
160 | | u4 * u5 |
161 | | add: u0 u2 u3 add: u0 u2 u4 add: u0 u2 u4 u5 |
162 | | sub: u1 sub: u1 u3 sub: u1 u3 |
163 | | */ |
164 | | |
165 | | void |
166 | | mpn_sqr_basecase (mp_ptr rp, mp_srcptr up, mp_size_t n) |
167 | | { |
168 | | mp_size_t i; |
169 | | mp_limb_t tarr[2 * SQR_TOOM2_THRESHOLD]; |
170 | | mp_ptr tp = tarr; |
171 | | mp_limb_t cy; |
172 | | |
173 | | /* must fit 2*n limbs in tarr */ |
174 | | ASSERT (n <= SQR_TOOM2_THRESHOLD); |
175 | | |
176 | | if ((n & 1) != 0) |
177 | | { |
178 | | mp_limb_t x0, x1; |
179 | | |
180 | | if (n == 1) |
181 | | { |
182 | | mp_limb_t ul, lpl; |
183 | | ul = up[0]; |
184 | | umul_ppmm (rp[1], lpl, ul, ul << GMP_NAIL_BITS); |
185 | | rp[0] = lpl >> GMP_NAIL_BITS; |
186 | | return; |
187 | | } |
188 | | |
189 | | /* The code below doesn't like unnormalized operands. Since such |
190 | | operands are unusual, handle them with a dumb recursion. */ |
191 | | if (up[n - 1] == 0) |
192 | | { |
193 | | rp[2 * n - 2] = 0; |
194 | | rp[2 * n - 1] = 0; |
195 | | mpn_sqr_basecase (rp, up, n - 1); |
196 | | return; |
197 | | } |
198 | | |
199 | | MPN_ZERO (tp, n); |
200 | | |
201 | | for (i = 0; i <= n - 2; i += 2) |
202 | | { |
203 | | cy = mpn_addmul_2 (tp + 2 * i, up + i + 1, n - (i + 1), up + i); |
204 | | tp[n + i] = cy; |
205 | | } |
206 | | |
207 | | MPN_SQR_DIAGONAL (rp, up, n); |
208 | | |
209 | | for (i = 2;; i += 4) |
210 | | { |
211 | | x0 = rp[i + 0]; |
212 | | rp[i + 0] = (-x0) & GMP_NUMB_MASK; |
213 | | x1 = rp[i + 1]; |
214 | | rp[i + 1] = (-x1 - (x0 != 0)) & GMP_NUMB_MASK; |
215 | | __GMPN_SUB_1 (cy, rp + i + 2, rp + i + 2, 2, (x1 | x0) != 0); |
216 | | if (i + 4 >= 2 * n) |
217 | | break; |
218 | | mpn_incr_u (rp + i + 4, cy); |
219 | | } |
220 | | } |
221 | | else |
222 | | { |
223 | | mp_limb_t x0, x1; |
224 | | |
225 | | if (n == 2) |
226 | | { |
227 | | #if HAVE_NATIVE_mpn_mul_2 |
228 | | rp[3] = mpn_mul_2 (rp, up, 2, up); |
229 | | #else |
230 | | rp[0] = 0; |
231 | | rp[1] = 0; |
232 | | rp[3] = mpn_addmul_2 (rp, up, 2, up); |
233 | | #endif |
234 | | return; |
235 | | } |
236 | | |
237 | | /* The code below doesn't like unnormalized operands. Since such |
238 | | operands are unusual, handle them with a dumb recursion. */ |
239 | | if (up[n - 1] == 0) |
240 | | { |
241 | | rp[2 * n - 2] = 0; |
242 | | rp[2 * n - 1] = 0; |
243 | | mpn_sqr_basecase (rp, up, n - 1); |
244 | | return; |
245 | | } |
246 | | |
247 | | MPN_ZERO (tp, n); |
248 | | |
249 | | for (i = 0; i <= n - 4; i += 2) |
250 | | { |
251 | | cy = mpn_addmul_2 (tp + 2 * i, up + i + 1, n - (i + 1), up + i); |
252 | | tp[n + i] = cy; |
253 | | } |
254 | | cy = mpn_addmul_1 (tp + 2 * n - 4, up + n - 1, 1, up[n - 2]); |
255 | | tp[2 * n - 3] = cy; |
256 | | |
257 | | MPN_SQR_DIAGONAL (rp, up, n); |
258 | | |
259 | | for (i = 2;; i += 4) |
260 | | { |
261 | | x0 = rp[i + 0]; |
262 | | rp[i + 0] = (-x0) & GMP_NUMB_MASK; |
263 | | x1 = rp[i + 1]; |
264 | | rp[i + 1] = (-x1 - (x0 != 0)) & GMP_NUMB_MASK; |
265 | | if (i + 6 >= 2 * n) |
266 | | break; |
267 | | __GMPN_SUB_1 (cy, rp + i + 2, rp + i + 2, 2, (x1 | x0) != 0); |
268 | | mpn_incr_u (rp + i + 4, cy); |
269 | | } |
270 | | mpn_decr_u (rp + i + 2, (x1 | x0) != 0); |
271 | | } |
272 | | |
273 | | #if HAVE_NATIVE_mpn_addlsh1_n |
274 | | cy = mpn_addlsh1_n (rp + 1, rp + 1, tp, 2 * n - 2); |
275 | | #else |
276 | | cy = mpn_lshift (tp, tp, 2 * n - 2, 1); |
277 | | cy += mpn_add_n (rp + 1, rp + 1, tp, 2 * n - 2); |
278 | | #endif |
279 | | rp[2 * n - 1] += cy; |
280 | | } |
281 | | #define READY_WITH_mpn_sqr_basecase |
282 | | #endif |
283 | | |
284 | | |
285 | | #if ! defined (READY_WITH_mpn_sqr_basecase) && HAVE_NATIVE_mpn_sqr_diag_addlsh1 |
286 | | |
287 | | /* mpn_sqr_basecase using mpn_addmul_1 and mpn_sqr_diag_addlsh1, avoiding stack |
288 | | allocation. */ |
289 | | void |
290 | | mpn_sqr_basecase (mp_ptr rp, mp_srcptr up, mp_size_t n) |
291 | | { |
292 | | if (n == 1) |
293 | | { |
294 | | mp_limb_t ul, lpl; |
295 | | ul = up[0]; |
296 | | umul_ppmm (rp[1], lpl, ul, ul << GMP_NAIL_BITS); |
297 | | rp[0] = lpl >> GMP_NAIL_BITS; |
298 | | } |
299 | | else |
300 | | { |
301 | | mp_size_t i; |
302 | | mp_ptr xp; |
303 | | |
304 | | rp += 1; |
305 | | rp[n - 1] = mpn_mul_1 (rp, up + 1, n - 1, up[0]); |
306 | | for (i = n - 2; i != 0; i--) |
307 | | { |
308 | | up += 1; |
309 | | rp += 2; |
310 | | rp[i] = mpn_addmul_1 (rp, up + 1, i, up[0]); |
311 | | } |
312 | | |
313 | | xp = rp - 2 * n + 3; |
314 | | mpn_sqr_diag_addlsh1 (xp, xp + 1, up - n + 2, n); |
315 | | } |
316 | | } |
317 | | #define READY_WITH_mpn_sqr_basecase |
318 | | #endif |
319 | | |
320 | | |
321 | | #if ! defined (READY_WITH_mpn_sqr_basecase) |
322 | | |
323 | | /* Default mpn_sqr_basecase using mpn_addmul_1. */ |
324 | | void |
325 | | mpn_sqr_basecase (mp_ptr rp, mp_srcptr up, mp_size_t n) |
326 | 218M | { |
327 | 218M | mp_size_t i; |
328 | | |
329 | 218M | ASSERT (n >= 1); |
330 | 218M | ASSERT (! MPN_OVERLAP_P (rp, 2*n, up, n)); |
331 | | |
332 | 218M | if (n == 1) |
333 | 1.63k | { |
334 | 1.63k | mp_limb_t ul, lpl; |
335 | 1.63k | ul = up[0]; |
336 | 1.63k | umul_ppmm (rp[1], lpl, ul, ul << GMP_NAIL_BITS); |
337 | 1.63k | rp[0] = lpl >> GMP_NAIL_BITS; |
338 | 1.63k | } |
339 | 218M | else |
340 | 218M | { |
341 | 218M | mp_limb_t tarr[2 * SQR_TOOM2_THRESHOLD]; |
342 | 218M | mp_ptr tp = tarr; |
343 | 218M | mp_limb_t cy; |
344 | | |
345 | | /* must fit 2*n limbs in tarr */ |
346 | 218M | ASSERT (n <= SQR_TOOM2_THRESHOLD); |
347 | | |
348 | 218M | cy = mpn_mul_1 (tp, up + 1, n - 1, up[0]); |
349 | 218M | tp[n - 1] = cy; |
350 | 3.71G | for (i = 2; i < n; i++) |
351 | 3.49G | { |
352 | 3.49G | mp_limb_t cy; |
353 | 3.49G | cy = mpn_addmul_1 (tp + 2 * i - 2, up + i, n - i, up[i - 1]); |
354 | 3.49G | tp[n + i - 2] = cy; |
355 | 3.49G | } |
356 | | |
357 | 218M | MPN_SQR_DIAG_ADDLSH1 (rp, tp, up, n); |
358 | 218M | } |
359 | 218M | } |
360 | | #define READY_WITH_mpn_sqr_basecase |
361 | | #endif |