/src/gmp-6.2.1/mpn/jacobi.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* jacobi.c |
2 | | |
3 | | THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES. IT IS ONLY |
4 | | SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST |
5 | | GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE. |
6 | | |
7 | | Copyright 1996, 1998, 2000-2004, 2008, 2010, 2011 Free Software Foundation, |
8 | | Inc. |
9 | | |
10 | | This file is part of the GNU MP Library. |
11 | | |
12 | | The GNU MP Library is free software; you can redistribute it and/or modify |
13 | | it under the terms of either: |
14 | | |
15 | | * the GNU Lesser General Public License as published by the Free |
16 | | Software Foundation; either version 3 of the License, or (at your |
17 | | option) any later version. |
18 | | |
19 | | or |
20 | | |
21 | | * the GNU General Public License as published by the Free Software |
22 | | Foundation; either version 2 of the License, or (at your option) any |
23 | | later version. |
24 | | |
25 | | or both in parallel, as here. |
26 | | |
27 | | The GNU MP Library is distributed in the hope that it will be useful, but |
28 | | WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY |
29 | | or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
30 | | for more details. |
31 | | |
32 | | You should have received copies of the GNU General Public License and the |
33 | | GNU Lesser General Public License along with the GNU MP Library. If not, |
34 | | see https://www.gnu.org/licenses/. */ |
35 | | |
36 | | #include "gmp-impl.h" |
37 | | #include "longlong.h" |
38 | | |
39 | | #ifndef JACOBI_DC_THRESHOLD |
40 | | #define JACOBI_DC_THRESHOLD GCD_DC_THRESHOLD |
41 | | #endif |
42 | | |
43 | | /* Schönhage's rules: |
44 | | * |
45 | | * Assume r0 = r1 q1 + r2, with r0 odd, and r1 = q2 r2 + r3 |
46 | | * |
47 | | * If r1 is odd, then |
48 | | * |
49 | | * (r1 | r0) = s(r1, r0) (r0 | r1) = s(r1, r0) (r2, r1) |
50 | | * |
51 | | * where s(x,y) = (-1)^{(x-1)(y-1)/4} = (-1)^[x = y = 3 (mod 4)]. |
52 | | * |
53 | | * If r1 is even, r2 must be odd. We have |
54 | | * |
55 | | * (r1 | r0) = (r1 - r0 | r0) = (-1)^(r0-1)/2 (r0 - r1 | r0) |
56 | | * = (-1)^(r0-1)/2 s(r0, r0 - r1) (r0 | r0 - r1) |
57 | | * = (-1)^(r0-1)/2 s(r0, r0 - r1) (r1 | r0 - r1) |
58 | | * |
59 | | * Now, if r1 = 0 (mod 4), then the sign factor is +1, and repeating |
60 | | * q1 times gives |
61 | | * |
62 | | * (r1 | r0) = (r1 | r2) = (r3 | r2) |
63 | | * |
64 | | * On the other hand, if r1 = 2 (mod 4), the sign factor is |
65 | | * (-1)^{(r0-1)/2}, and repeating q1 times gives the exponent |
66 | | * |
67 | | * (r0-1)/2 + (r0-r1-1)/2 + ... + (r0 - (q1-1) r1)/2 |
68 | | * = q1 (r0-1)/2 + q1 (q1-1)/2 |
69 | | * |
70 | | * and we can summarize the even case as |
71 | | * |
72 | | * (r1 | r0) = t(r1, r0, q1) (r3 | r2) |
73 | | * |
74 | | * where t(x,y,q) = (-1)^{[x = 2 (mod 4)] (q(y-1)/2 + y(q-1)/2)} |
75 | | * |
76 | | * What about termination? The remainder sequence ends with (0|1) = 1 |
77 | | * (or (0 | r) = 0 if r != 1). What are the possible cases? If r1 is |
78 | | * odd, r2 may be zero. If r1 is even, then r2 = r0 - q1 r1 is odd and |
79 | | * hence non-zero. We may have r3 = r1 - q2 r2 = 0. |
80 | | * |
81 | | * Examples: (11|15) = - (15|11) = - (4|11) |
82 | | * (4|11) = (4| 3) = (1| 3) |
83 | | * (1| 3) = (3|1) = (0|1) = 1 |
84 | | * |
85 | | * (2|7) = (2|1) = (0|1) = 1 |
86 | | * |
87 | | * Detail: (2|7) = (2-7|7) = (-1|7)(5|7) = -(7|5) = -(2|5) |
88 | | * (2|5) = (2-5|5) = (-1|5)(3|5) = (5|3) = (2|3) |
89 | | * (2|3) = (2-3|3) = (-1|3)(1|3) = -(3|1) = -(2|1) |
90 | | * |
91 | | */ |
92 | | |
93 | | /* In principle, the state consists of four variables: e (one bit), a, |
94 | | b (two bits each), d (one bit). Collected factors are (-1)^e. a and |
95 | | b are the least significant bits of the current remainders. d |
96 | | (denominator) is 0 if we're currently subtracting multiplies of a |
97 | | from b, and 1 if we're subtracting b from a. |
98 | | |
99 | | e is stored in the least significant bit, while a, b and d are |
100 | | coded as only 13 distinct values in bits 1-4, according to the |
101 | | following table. For rows not mentioning d, the value is either |
102 | | implied, or it doesn't matter. */ |
103 | | |
104 | | #if WANT_ASSERT |
105 | | static const struct |
106 | | { |
107 | | unsigned char a; |
108 | | unsigned char b; |
109 | | } decode_table[13] = { |
110 | | /* 0 */ { 0, 1 }, |
111 | | /* 1 */ { 0, 3 }, |
112 | | /* 2 */ { 1, 1 }, |
113 | | /* 3 */ { 1, 3 }, |
114 | | /* 4 */ { 2, 1 }, |
115 | | /* 5 */ { 2, 3 }, |
116 | | /* 6 */ { 3, 1 }, |
117 | | /* 7 */ { 3, 3 }, /* d = 1 */ |
118 | | /* 8 */ { 1, 0 }, |
119 | | /* 9 */ { 1, 2 }, |
120 | | /* 10 */ { 3, 0 }, |
121 | | /* 11 */ { 3, 2 }, |
122 | | /* 12 */ { 3, 3 }, /* d = 0 */ |
123 | | }; |
124 | | #define JACOBI_A(bits) (decode_table[(bits)>>1].a) |
125 | | #define JACOBI_B(bits) (decode_table[(bits)>>1].b) |
126 | | #endif /* WANT_ASSERT */ |
127 | | |
128 | | const unsigned char jacobi_table[208] = { |
129 | | #include "jacobitab.h" |
130 | | }; |
131 | | |
132 | 0 | #define BITS_FAIL 31 |
133 | | |
134 | | static void |
135 | | jacobi_hook (void *p, mp_srcptr gp, mp_size_t gn, |
136 | | mp_srcptr qp, mp_size_t qn, int d) |
137 | 0 | { |
138 | 0 | unsigned *bitsp = (unsigned *) p; |
139 | |
|
140 | 0 | if (gp) |
141 | 0 | { |
142 | 0 | ASSERT (gn > 0); |
143 | 0 | if (gn != 1 || gp[0] != 1) |
144 | 0 | { |
145 | 0 | *bitsp = BITS_FAIL; |
146 | 0 | return; |
147 | 0 | } |
148 | 0 | } |
149 | | |
150 | 0 | if (qp) |
151 | 0 | { |
152 | 0 | ASSERT (qn > 0); |
153 | 0 | ASSERT (d >= 0); |
154 | 0 | *bitsp = mpn_jacobi_update (*bitsp, d, qp[0] & 3); |
155 | 0 | } |
156 | 0 | } |
157 | | |
158 | 0 | #define CHOOSE_P(n) (2*(n) / 3) |
159 | | |
160 | | int |
161 | | mpn_jacobi_n (mp_ptr ap, mp_ptr bp, mp_size_t n, unsigned bits) |
162 | 0 | { |
163 | 0 | mp_size_t scratch; |
164 | 0 | mp_size_t matrix_scratch; |
165 | 0 | mp_ptr tp; |
166 | |
|
167 | 0 | TMP_DECL; |
168 | |
|
169 | 0 | ASSERT (n > 0); |
170 | 0 | ASSERT ( (ap[n-1] | bp[n-1]) > 0); |
171 | 0 | ASSERT ( (bp[0] | ap[0]) & 1); |
172 | | |
173 | | /* FIXME: Check for small sizes first, before setting up temporary |
174 | | storage etc. */ |
175 | 0 | scratch = MPN_GCD_SUBDIV_STEP_ITCH(n); |
176 | |
|
177 | 0 | if (ABOVE_THRESHOLD (n, JACOBI_DC_THRESHOLD)) |
178 | 0 | { |
179 | 0 | mp_size_t hgcd_scratch; |
180 | 0 | mp_size_t update_scratch; |
181 | 0 | mp_size_t p = CHOOSE_P (n); |
182 | 0 | mp_size_t dc_scratch; |
183 | |
|
184 | 0 | matrix_scratch = MPN_HGCD_MATRIX_INIT_ITCH (n - p); |
185 | 0 | hgcd_scratch = mpn_hgcd_itch (n - p); |
186 | 0 | update_scratch = p + n - 1; |
187 | |
|
188 | 0 | dc_scratch = matrix_scratch + MAX(hgcd_scratch, update_scratch); |
189 | 0 | if (dc_scratch > scratch) |
190 | 0 | scratch = dc_scratch; |
191 | 0 | } |
192 | |
|
193 | 0 | TMP_MARK; |
194 | 0 | tp = TMP_ALLOC_LIMBS(scratch); |
195 | |
|
196 | 0 | while (ABOVE_THRESHOLD (n, JACOBI_DC_THRESHOLD)) |
197 | 0 | { |
198 | 0 | struct hgcd_matrix M; |
199 | 0 | mp_size_t p = 2*n/3; |
200 | 0 | mp_size_t matrix_scratch = MPN_HGCD_MATRIX_INIT_ITCH (n - p); |
201 | 0 | mp_size_t nn; |
202 | 0 | mpn_hgcd_matrix_init (&M, n - p, tp); |
203 | |
|
204 | 0 | nn = mpn_hgcd_jacobi (ap + p, bp + p, n - p, &M, &bits, |
205 | 0 | tp + matrix_scratch); |
206 | 0 | if (nn > 0) |
207 | 0 | { |
208 | 0 | ASSERT (M.n <= (n - p - 1)/2); |
209 | 0 | ASSERT (M.n + p <= (p + n - 1) / 2); |
210 | | /* Temporary storage 2 (p + M->n) <= p + n - 1. */ |
211 | 0 | n = mpn_hgcd_matrix_adjust (&M, p + nn, ap, bp, p, tp + matrix_scratch); |
212 | 0 | } |
213 | 0 | else |
214 | 0 | { |
215 | | /* Temporary storage n */ |
216 | 0 | n = mpn_gcd_subdiv_step (ap, bp, n, 0, jacobi_hook, &bits, tp); |
217 | 0 | if (!n) |
218 | 0 | { |
219 | 0 | TMP_FREE; |
220 | 0 | return bits == BITS_FAIL ? 0 : mpn_jacobi_finish (bits); |
221 | 0 | } |
222 | 0 | } |
223 | 0 | } |
224 | | |
225 | 0 | while (n > 2) |
226 | 0 | { |
227 | 0 | struct hgcd_matrix1 M; |
228 | 0 | mp_limb_t ah, al, bh, bl; |
229 | 0 | mp_limb_t mask; |
230 | |
|
231 | 0 | mask = ap[n-1] | bp[n-1]; |
232 | 0 | ASSERT (mask > 0); |
233 | | |
234 | 0 | if (mask & GMP_NUMB_HIGHBIT) |
235 | 0 | { |
236 | 0 | ah = ap[n-1]; al = ap[n-2]; |
237 | 0 | bh = bp[n-1]; bl = bp[n-2]; |
238 | 0 | } |
239 | 0 | else |
240 | 0 | { |
241 | 0 | int shift; |
242 | |
|
243 | 0 | count_leading_zeros (shift, mask); |
244 | 0 | ah = MPN_EXTRACT_NUMB (shift, ap[n-1], ap[n-2]); |
245 | 0 | al = MPN_EXTRACT_NUMB (shift, ap[n-2], ap[n-3]); |
246 | 0 | bh = MPN_EXTRACT_NUMB (shift, bp[n-1], bp[n-2]); |
247 | 0 | bl = MPN_EXTRACT_NUMB (shift, bp[n-2], bp[n-3]); |
248 | 0 | } |
249 | | |
250 | | /* Try an mpn_nhgcd2 step */ |
251 | 0 | if (mpn_hgcd2_jacobi (ah, al, bh, bl, &M, &bits)) |
252 | 0 | { |
253 | 0 | n = mpn_matrix22_mul1_inverse_vector (&M, tp, ap, bp, n); |
254 | 0 | MP_PTR_SWAP (ap, tp); |
255 | 0 | } |
256 | 0 | else |
257 | 0 | { |
258 | | /* mpn_hgcd2 has failed. Then either one of a or b is very |
259 | | small, or the difference is very small. Perform one |
260 | | subtraction followed by one division. */ |
261 | 0 | n = mpn_gcd_subdiv_step (ap, bp, n, 0, &jacobi_hook, &bits, tp); |
262 | 0 | if (!n) |
263 | 0 | { |
264 | 0 | TMP_FREE; |
265 | 0 | return bits == BITS_FAIL ? 0 : mpn_jacobi_finish (bits); |
266 | 0 | } |
267 | 0 | } |
268 | 0 | } |
269 | | |
270 | 0 | if (bits >= 16) |
271 | 0 | MP_PTR_SWAP (ap, bp); |
272 | |
|
273 | 0 | ASSERT (bp[0] & 1); |
274 | | |
275 | 0 | if (n == 1) |
276 | 0 | { |
277 | 0 | mp_limb_t al, bl; |
278 | 0 | al = ap[0]; |
279 | 0 | bl = bp[0]; |
280 | |
|
281 | 0 | TMP_FREE; |
282 | 0 | if (bl == 1) |
283 | 0 | return 1 - 2*(bits & 1); |
284 | 0 | else |
285 | 0 | return mpn_jacobi_base (al, bl, bits << 1); |
286 | 0 | } |
287 | | |
288 | 0 | else |
289 | 0 | { |
290 | 0 | int res = mpn_jacobi_2 (ap, bp, bits & 1); |
291 | 0 | TMP_FREE; |
292 | 0 | return res; |
293 | 0 | } |
294 | 0 | } |