Line | Count | Source (jump to first uncovered line) |
1 | | /* mpn_jacobi_base -- limb/limb Jacobi symbol with restricted arguments. |
2 | | |
3 | | THIS INTERFACE IS PRELIMINARY AND MIGHT DISAPPEAR OR BE SUBJECT TO |
4 | | INCOMPATIBLE CHANGES IN A FUTURE RELEASE OF GMP. |
5 | | |
6 | | Copyright 1999-2002, 2010, 2020 Free Software Foundation, Inc. |
7 | | |
8 | | This file is part of the GNU MP Library. |
9 | | |
10 | | The GNU MP Library is free software; you can redistribute it and/or modify |
11 | | it under the terms of either: |
12 | | |
13 | | * the GNU Lesser General Public License as published by the Free |
14 | | Software Foundation; either version 3 of the License, or (at your |
15 | | option) any later version. |
16 | | |
17 | | or |
18 | | |
19 | | * the GNU General Public License as published by the Free Software |
20 | | Foundation; either version 2 of the License, or (at your option) any |
21 | | later version. |
22 | | |
23 | | or both in parallel, as here. |
24 | | |
25 | | The GNU MP Library is distributed in the hope that it will be useful, but |
26 | | WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY |
27 | | or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
28 | | for more details. |
29 | | |
30 | | You should have received copies of the GNU General Public License and the |
31 | | GNU Lesser General Public License along with the GNU MP Library. If not, |
32 | | see https://www.gnu.org/licenses/. */ |
33 | | |
34 | | #include "gmp-impl.h" |
35 | | #include "longlong.h" |
36 | | |
37 | | |
38 | | /* Use the simple loop by default. The generic count_trailing_zeros is not |
39 | | very fast, and the extra trickery of method 3 has proven to be less use |
40 | | than might have been though. */ |
41 | | #ifndef JACOBI_BASE_METHOD |
42 | | #define JACOBI_BASE_METHOD 2 |
43 | | #endif |
44 | | |
45 | | |
46 | | /* Use count_trailing_zeros. */ |
47 | | #if JACOBI_BASE_METHOD == 1 |
48 | | #define PROCESS_TWOS_ANY \ |
49 | 0 | { \ |
50 | 0 | mp_limb_t twos; \ |
51 | 0 | count_trailing_zeros (twos, a); \ |
52 | 0 | result_bit1 ^= JACOBI_TWOS_U_BIT1 (twos, b); \ |
53 | 0 | a >>= twos; \ |
54 | 0 | } |
55 | 0 | #define PROCESS_TWOS_EVEN PROCESS_TWOS_ANY |
56 | | #endif |
57 | | |
58 | | /* Use a simple loop. A disadvantage of this is that there's a branch on a |
59 | | 50/50 chance of a 0 or 1 low bit. */ |
60 | | #if JACOBI_BASE_METHOD == 2 |
61 | | #define PROCESS_TWOS_EVEN \ |
62 | | { \ |
63 | | int two; \ |
64 | | two = JACOBI_TWO_U_BIT1 (b); \ |
65 | | do \ |
66 | | { \ |
67 | | a >>= 1; \ |
68 | | result_bit1 ^= two; \ |
69 | | ASSERT (a != 0); \ |
70 | | } \ |
71 | | while ((a & 1) == 0); \ |
72 | | } |
73 | | #define PROCESS_TWOS_ANY \ |
74 | | if ((a & 1) == 0) \ |
75 | | PROCESS_TWOS_EVEN; |
76 | | #endif |
77 | | |
78 | | /* Process one bit arithmetically, then a simple loop. This cuts the loop |
79 | | condition down to a 25/75 chance, which should branch predict better. |
80 | | The CPU will need a reasonable variable left shift. */ |
81 | | #if JACOBI_BASE_METHOD == 3 |
82 | | #define PROCESS_TWOS_EVEN \ |
83 | | { \ |
84 | | int two, mask, shift; \ |
85 | | \ |
86 | | two = JACOBI_TWO_U_BIT1 (b); \ |
87 | | mask = (~a & 2); \ |
88 | | a >>= 1; \ |
89 | | \ |
90 | | shift = (~a & 1); \ |
91 | | a >>= shift; \ |
92 | | result_bit1 ^= two ^ (two & mask); \ |
93 | | \ |
94 | | while ((a & 1) == 0) \ |
95 | | { \ |
96 | | a >>= 1; \ |
97 | | result_bit1 ^= two; \ |
98 | | ASSERT (a != 0); \ |
99 | | } \ |
100 | | } |
101 | | #define PROCESS_TWOS_ANY \ |
102 | | { \ |
103 | | int two, mask, shift; \ |
104 | | \ |
105 | | two = JACOBI_TWO_U_BIT1 (b); \ |
106 | | shift = (~a & 1); \ |
107 | | a >>= shift; \ |
108 | | \ |
109 | | mask = shift << 1; \ |
110 | | result_bit1 ^= (two & mask); \ |
111 | | \ |
112 | | while ((a & 1) == 0) \ |
113 | | { \ |
114 | | a >>= 1; \ |
115 | | result_bit1 ^= two; \ |
116 | | ASSERT (a != 0); \ |
117 | | } \ |
118 | | } |
119 | | #endif |
120 | | |
121 | | #if JACOBI_BASE_METHOD < 4 |
122 | | /* Calculate the value of the Jacobi symbol (a/b) of two mp_limb_t's, but |
123 | | with a restricted range of inputs accepted, namely b>1, b odd. |
124 | | |
125 | | The initial result_bit1 is taken as a parameter for the convenience of |
126 | | mpz_kronecker_ui() et al. The sign changes both here and in those |
127 | | routines accumulate nicely in bit 1, see the JACOBI macros. |
128 | | |
129 | | The return value here is the normal +1, 0, or -1. Note that +1 and -1 |
130 | | have bit 1 in the "BIT1" sense, which could be useful if the caller is |
131 | | accumulating it into some extended calculation. |
132 | | |
133 | | Duplicating the loop body to avoid the MP_LIMB_T_SWAP(a,b) would be |
134 | | possible, but a couple of tests suggest it's not a significant speedup, |
135 | | and may even be a slowdown, so what's here is good enough for now. */ |
136 | | |
137 | | int |
138 | | mpn_jacobi_base (mp_limb_t a, mp_limb_t b, int result_bit1) |
139 | 0 | { |
140 | 0 | ASSERT (b & 1); /* b odd */ |
141 | 0 | ASSERT (b != 1); |
142 | |
|
143 | 0 | if (a == 0) |
144 | 0 | return 0; |
145 | | |
146 | 0 | PROCESS_TWOS_ANY; |
147 | 0 | if (a == 1) |
148 | 0 | goto done; |
149 | | |
150 | 0 | if (a >= b) |
151 | 0 | goto a_gt_b; |
152 | | |
153 | 0 | for (;;) |
154 | 0 | { |
155 | 0 | result_bit1 ^= JACOBI_RECIP_UU_BIT1 (a, b); |
156 | 0 | MP_LIMB_T_SWAP (a, b); |
157 | |
|
158 | 0 | a_gt_b: |
159 | 0 | do |
160 | 0 | { |
161 | | /* working on (a/b), a,b odd, a>=b */ |
162 | 0 | ASSERT (a & 1); |
163 | 0 | ASSERT (b & 1); |
164 | 0 | ASSERT (a >= b); |
165 | |
|
166 | 0 | if ((a -= b) == 0) |
167 | 0 | return 0; |
168 | | |
169 | 0 | PROCESS_TWOS_EVEN; |
170 | 0 | if (a == 1) |
171 | 0 | goto done; |
172 | 0 | } |
173 | 0 | while (a >= b); |
174 | 0 | } |
175 | | |
176 | 0 | done: |
177 | 0 | return JACOBI_BIT1_TO_PN (result_bit1); |
178 | 0 | } |
179 | | #endif |
180 | | |
181 | | #if JACOBI_BASE_METHOD == 4 |
182 | | /* Computes (a/b) for odd b > 1 and any a. The initial bit is taken as a |
183 | | * parameter. We have no need for the convention that the sign is in |
184 | | * bit 1, internally we use bit 0. */ |
185 | | |
186 | | /* FIXME: Could try table-based count_trailing_zeros. */ |
187 | | int |
188 | | mpn_jacobi_base (mp_limb_t a, mp_limb_t b, int bit) |
189 | | { |
190 | | int c; |
191 | | |
192 | | ASSERT (b & 1); |
193 | | ASSERT (b > 1); |
194 | | |
195 | | if (a == 0) |
196 | | /* This is the only line which depends on b > 1 */ |
197 | | return 0; |
198 | | |
199 | | bit >>= 1; |
200 | | |
201 | | /* Below, we represent a and b shifted right so that the least |
202 | | significant one bit is implicit. */ |
203 | | |
204 | | b >>= 1; |
205 | | |
206 | | count_trailing_zeros (c, a); |
207 | | bit ^= c & (b ^ (b >> 1)); |
208 | | |
209 | | /* We may have c==GMP_LIMB_BITS-1, so we can't use a>>c+1. */ |
210 | | a >>= c; |
211 | | a >>= 1; |
212 | | |
213 | | do |
214 | | { |
215 | | mp_limb_t t = a - b; |
216 | | mp_limb_t bgta = LIMB_HIGHBIT_TO_MASK (t); |
217 | | |
218 | | if (t == 0) |
219 | | return 0; |
220 | | |
221 | | /* If b > a, invoke reciprocity */ |
222 | | bit ^= (bgta & a & b); |
223 | | |
224 | | /* b <-- min (a, b) */ |
225 | | b += (bgta & t); |
226 | | |
227 | | /* a <-- |a - b| */ |
228 | | a = (t ^ bgta) - bgta; |
229 | | |
230 | | /* Number of trailing zeros is the same no matter if we look at |
231 | | * t or a, but using t gives more parallelism. */ |
232 | | count_trailing_zeros (c, t); |
233 | | c ++; |
234 | | /* (2/b) = -1 if b = 3 or 5 mod 8 */ |
235 | | bit ^= c & (b ^ (b >> 1)); |
236 | | a >>= c; |
237 | | } |
238 | | while (a > 0); |
239 | | |
240 | | return 1-2*(bit & 1); |
241 | | } |
242 | | #endif /* JACOBI_BASE_METHOD == 4 */ |