/src/libgmp/mpn/toom_eval_pm2.c
Line | Count | Source |
1 | | /* mpn_toom_eval_pm2 -- Evaluate a polynomial in +2 and -2 |
2 | | |
3 | | Contributed to the GNU project by Niels Möller and Marco Bodrato |
4 | | |
5 | | THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE. IT IS ONLY |
6 | | SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST |
7 | | GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE. |
8 | | |
9 | | Copyright 2009 Free Software Foundation, Inc. |
10 | | |
11 | | This file is part of the GNU MP Library. |
12 | | |
13 | | The GNU MP Library is free software; you can redistribute it and/or modify |
14 | | it under the terms of either: |
15 | | |
16 | | * the GNU Lesser General Public License as published by the Free |
17 | | Software Foundation; either version 3 of the License, or (at your |
18 | | option) any later version. |
19 | | |
20 | | or |
21 | | |
22 | | * the GNU General Public License as published by the Free Software |
23 | | Foundation; either version 2 of the License, or (at your option) any |
24 | | later version. |
25 | | |
26 | | or both in parallel, as here. |
27 | | |
28 | | The GNU MP Library is distributed in the hope that it will be useful, but |
29 | | WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY |
30 | | or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
31 | | for more details. |
32 | | |
33 | | You should have received copies of the GNU General Public License and the |
34 | | GNU Lesser General Public License along with the GNU MP Library. If not, |
35 | | see https://www.gnu.org/licenses/. */ |
36 | | |
37 | | #include "gmp-impl.h" |
38 | | |
39 | | /* DO_addlsh2(d,a,b,n,cy) computes cy,{d,n} <- {a,n} + 4*(cy,{b,n}), it |
40 | | can be used as DO_addlsh2(d,a,d,n,d[n]), for accumulation on {d,n+1}. */ |
41 | | #if HAVE_NATIVE_mpn_addlsh2_n |
42 | 12.4k | #define DO_addlsh2(d, a, b, n, cy) \ |
43 | 12.4k | do { \ |
44 | 8.95k | (cy) <<= 2; \ |
45 | 8.95k | (cy) += mpn_addlsh2_n(d, a, b, n); \ |
46 | 8.95k | } while (0) |
47 | | #else |
48 | | #if HAVE_NATIVE_mpn_addlsh_n |
49 | | #define DO_addlsh2(d, a, b, n, cy) \ |
50 | | do { \ |
51 | | (cy) <<= 2; \ |
52 | | (cy) += mpn_addlsh_n(d, a, b, n, 2); \ |
53 | | } while (0) |
54 | | #else |
55 | | /* The following is not a general substitute for addlsh2. |
56 | | It is correct if d == b, but it is not if d == a. */ |
57 | | #define DO_addlsh2(d, a, b, n, cy) \ |
58 | | do { \ |
59 | | (cy) <<= 2; \ |
60 | | (cy) += mpn_lshift(d, b, n, 2); \ |
61 | | (cy) += mpn_add_n(d, d, a, n); \ |
62 | | } while (0) |
63 | | #endif |
64 | | #endif |
65 | | |
66 | | /* Evaluates a polynomial of degree 2 < k < GMP_NUMB_BITS, in the |
67 | | points +2 and -2. */ |
68 | | /* It returns 0 or ~0, depending on the sign of the result xm2. */ |
69 | | unsigned |
70 | | mpn_toom_eval_pm2 (mp_ptr xp2, mp_ptr xm2, unsigned k, |
71 | | mp_srcptr xp, mp_size_t n, mp_size_t hn, mp_ptr tp) |
72 | 1.72k | { |
73 | 1.72k | int i; |
74 | 1.72k | unsigned neg; |
75 | 1.72k | mp_limb_t cy; |
76 | | |
77 | 1.72k | ASSERT (k >= 3); |
78 | 1.72k | ASSERT (k < GMP_NUMB_BITS); |
79 | | |
80 | 1.72k | ASSERT (hn > 0); |
81 | 1.72k | ASSERT (hn <= n); |
82 | | |
83 | | /* The degree k is also the number of full-size coefficients, so |
84 | | * that last coefficient, of size hn, starts at xp + k*n. */ |
85 | | |
86 | 1.72k | cy = 0; |
87 | 1.72k | DO_addlsh2 (xp2, xp + (k-2) * n, xp + k * n, hn, cy); |
88 | 1.72k | if (hn != n) |
89 | 1.63k | cy = mpn_add_1 (xp2 + hn, xp + (k-2) * n + hn, n - hn, cy); |
90 | 4.49k | for (i = k - 4; i >= 0; i -= 2) |
91 | 2.76k | DO_addlsh2 (xp2, xp + i * n, xp2, n, cy); |
92 | 1.72k | xp2[n] = cy; |
93 | | |
94 | 1.72k | k--; |
95 | | |
96 | 1.72k | cy = 0; |
97 | 1.72k | DO_addlsh2 (tp, xp + (k-2) * n, xp + k * n, n, cy); |
98 | 4.45k | for (i = k - 4; i >= 0; i -= 2) |
99 | 2.72k | DO_addlsh2 (tp, xp + i * n, tp, n, cy); |
100 | 1.72k | tp[n] = cy; |
101 | | |
102 | 1.72k | if (k & 1) |
103 | 1.72k | ASSERT_NOCARRY(mpn_lshift (tp , tp , n + 1, 1)); |
104 | 1.68k | else |
105 | 1.72k | ASSERT_NOCARRY(mpn_lshift (xp2, xp2, n + 1, 1)); |
106 | | |
107 | 1.72k | neg = (mpn_cmp (xp2, tp, n + 1) < 0); |
108 | | |
109 | | #if HAVE_NATIVE_mpn_add_n_sub_n |
110 | | if (neg) |
111 | | mpn_add_n_sub_n (xp2, xm2, tp, xp2, n + 1); |
112 | | else |
113 | | mpn_add_n_sub_n (xp2, xm2, xp2, tp, n + 1); |
114 | | #else /* !HAVE_NATIVE_mpn_add_n_sub_n */ |
115 | 1.72k | if (neg) |
116 | 1.33k | mpn_sub_n (xm2, tp, xp2, n + 1); |
117 | 390 | else |
118 | 390 | mpn_sub_n (xm2, xp2, tp, n + 1); |
119 | | |
120 | 1.72k | mpn_add_n (xp2, xp2, tp, n + 1); |
121 | 1.72k | #endif /* !HAVE_NATIVE_mpn_add_n_sub_n */ |
122 | | |
123 | 1.72k | ASSERT (xp2[n] < (1<<(k+2))-1); |
124 | 1.72k | ASSERT (xm2[n] < ((1<<(k+3))-1 - (1^k&1))/3); |
125 | | |
126 | 1.72k | neg ^= 1 ^ k & 1; |
127 | | |
128 | 1.72k | return - neg; |
129 | 1.72k | } |
130 | | |
131 | | #undef DO_addlsh2 |