/src/gmp-6.2.1/mpn/jacobi.c
Line  | Count  | Source (jump to first uncovered line)  | 
1  |  | /* jacobi.c  | 
2  |  |  | 
3  |  |    THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY  | 
4  |  |    SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST  | 
5  |  |    GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.  | 
6  |  |  | 
7  |  | Copyright 1996, 1998, 2000-2004, 2008, 2010, 2011 Free Software Foundation,  | 
8  |  | Inc.  | 
9  |  |  | 
10  |  | This file is part of the GNU MP Library.  | 
11  |  |  | 
12  |  | The GNU MP Library is free software; you can redistribute it and/or modify  | 
13  |  | it under the terms of either:  | 
14  |  |  | 
15  |  |   * the GNU Lesser General Public License as published by the Free  | 
16  |  |     Software Foundation; either version 3 of the License, or (at your  | 
17  |  |     option) any later version.  | 
18  |  |  | 
19  |  | or  | 
20  |  |  | 
21  |  |   * the GNU General Public License as published by the Free Software  | 
22  |  |     Foundation; either version 2 of the License, or (at your option) any  | 
23  |  |     later version.  | 
24  |  |  | 
25  |  | or both in parallel, as here.  | 
26  |  |  | 
27  |  | The GNU MP Library is distributed in the hope that it will be useful, but  | 
28  |  | WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY  | 
29  |  | or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License  | 
30  |  | for more details.  | 
31  |  |  | 
32  |  | You should have received copies of the GNU General Public License and the  | 
33  |  | GNU Lesser General Public License along with the GNU MP Library.  If not,  | 
34  |  | see https://www.gnu.org/licenses/.  */  | 
35  |  |  | 
36  |  | #include "gmp-impl.h"  | 
37  |  | #include "longlong.h"  | 
38  |  |  | 
39  |  | #ifndef JACOBI_DC_THRESHOLD  | 
40  |  | #define JACOBI_DC_THRESHOLD GCD_DC_THRESHOLD  | 
41  |  | #endif  | 
42  |  |  | 
43  |  | /* Schönhage's rules:  | 
44  |  |  *  | 
45  |  |  * Assume r0 = r1 q1 + r2, with r0 odd, and r1 = q2 r2 + r3  | 
46  |  |  *  | 
47  |  |  * If r1 is odd, then  | 
48  |  |  *  | 
49  |  |  *   (r1 | r0) = s(r1, r0) (r0 | r1) = s(r1, r0) (r2, r1)  | 
50  |  |  *  | 
51  |  |  * where s(x,y) = (-1)^{(x-1)(y-1)/4} = (-1)^[x = y = 3 (mod 4)]. | 
52  |  |  *  | 
53  |  |  * If r1 is even, r2 must be odd. We have  | 
54  |  |  *  | 
55  |  |  *   (r1 | r0) = (r1 - r0 | r0) = (-1)^(r0-1)/2 (r0 - r1 | r0)  | 
56  |  |  *             = (-1)^(r0-1)/2 s(r0, r0 - r1) (r0 | r0 - r1)  | 
57  |  |  *             = (-1)^(r0-1)/2 s(r0, r0 - r1) (r1 | r0 - r1)  | 
58  |  |  *  | 
59  |  |  * Now, if r1 = 0 (mod 4), then the sign factor is +1, and repeating  | 
60  |  |  * q1 times gives  | 
61  |  |  *  | 
62  |  |  *   (r1 | r0) = (r1 | r2) = (r3 | r2)  | 
63  |  |  *  | 
64  |  |  * On the other hand, if r1 = 2 (mod 4), the sign factor is  | 
65  |  |  * (-1)^{(r0-1)/2}, and repeating q1 times gives the exponent | 
66  |  |  *  | 
67  |  |  *   (r0-1)/2 + (r0-r1-1)/2 + ... + (r0 - (q1-1) r1)/2  | 
68  |  |  *   = q1 (r0-1)/2 + q1 (q1-1)/2  | 
69  |  |  *  | 
70  |  |  * and we can summarize the even case as  | 
71  |  |  *  | 
72  |  |  *   (r1 | r0) = t(r1, r0, q1) (r3 | r2)  | 
73  |  |  *  | 
74  |  |  * where t(x,y,q) = (-1)^{[x = 2 (mod 4)] (q(y-1)/2 + y(q-1)/2)} | 
75  |  |  *  | 
76  |  |  * What about termination? The remainder sequence ends with (0|1) = 1  | 
77  |  |  * (or (0 | r) = 0 if r != 1). What are the possible cases? If r1 is  | 
78  |  |  * odd, r2 may be zero. If r1 is even, then r2 = r0 - q1 r1 is odd and  | 
79  |  |  * hence non-zero. We may have r3 = r1 - q2 r2 = 0.  | 
80  |  |  *  | 
81  |  |  * Examples: (11|15) = - (15|11) = - (4|11)  | 
82  |  |  *            (4|11) =    (4| 3) =   (1| 3)  | 
83  |  |  *            (1| 3) = (3|1) = (0|1) = 1  | 
84  |  |  *  | 
85  |  |  *             (2|7) = (2|1) = (0|1) = 1  | 
86  |  |  *  | 
87  |  |  * Detail:     (2|7) = (2-7|7) = (-1|7)(5|7) = -(7|5) = -(2|5)  | 
88  |  |  *             (2|5) = (2-5|5) = (-1|5)(3|5) =  (5|3) =  (2|3)  | 
89  |  |  *             (2|3) = (2-3|3) = (-1|3)(1|3) = -(3|1) = -(2|1)  | 
90  |  |  *  | 
91  |  |  */  | 
92  |  |  | 
93  |  | /* In principle, the state consists of four variables: e (one bit), a,  | 
94  |  |    b (two bits each), d (one bit). Collected factors are (-1)^e. a and  | 
95  |  |    b are the least significant bits of the current remainders. d  | 
96  |  |    (denominator) is 0 if we're currently subtracting multiplies of a  | 
97  |  |    from b, and 1 if we're subtracting b from a.  | 
98  |  |  | 
99  |  |    e is stored in the least significant bit, while a, b and d are  | 
100  |  |    coded as only 13 distinct values in bits 1-4, according to the  | 
101  |  |    following table. For rows not mentioning d, the value is either  | 
102  |  |    implied, or it doesn't matter. */  | 
103  |  |  | 
104  |  | #if WANT_ASSERT  | 
105  |  | static const struct  | 
106  |  | { | 
107  |  |   unsigned char a;  | 
108  |  |   unsigned char b;  | 
109  |  | } decode_table[13] = { | 
110  |  |   /*  0 */ { 0, 1 }, | 
111  |  |   /*  1 */ { 0, 3 }, | 
112  |  |   /*  2 */ { 1, 1 }, | 
113  |  |   /*  3 */ { 1, 3 }, | 
114  |  |   /*  4 */ { 2, 1 }, | 
115  |  |   /*  5 */ { 2, 3 }, | 
116  |  |   /*  6 */ { 3, 1 }, | 
117  |  |   /*  7 */ { 3, 3 }, /* d = 1 */ | 
118  |  |   /*  8 */ { 1, 0 }, | 
119  |  |   /*  9 */ { 1, 2 }, | 
120  |  |   /* 10 */ { 3, 0 }, | 
121  |  |   /* 11 */ { 3, 2 }, | 
122  |  |   /* 12 */ { 3, 3 }, /* d = 0 */ | 
123  |  | };  | 
124  |  | #define JACOBI_A(bits) (decode_table[(bits)>>1].a)  | 
125  |  | #define JACOBI_B(bits) (decode_table[(bits)>>1].b)  | 
126  |  | #endif /* WANT_ASSERT */  | 
127  |  |  | 
128  |  | const unsigned char jacobi_table[208] = { | 
129  |  | #include "jacobitab.h"  | 
130  |  | };  | 
131  |  |  | 
132  | 0  | #define BITS_FAIL 31  | 
133  |  |  | 
134  |  | static void  | 
135  |  | jacobi_hook (void *p, mp_srcptr gp, mp_size_t gn,  | 
136  |  |        mp_srcptr qp, mp_size_t qn, int d)  | 
137  | 0  | { | 
138  | 0  |   unsigned *bitsp = (unsigned *) p;  | 
139  |  | 
  | 
140  | 0  |   if (gp)  | 
141  | 0  |     { | 
142  | 0  |       ASSERT (gn > 0);  | 
143  | 0  |       if (gn != 1 || gp[0] != 1)  | 
144  | 0  |   { | 
145  | 0  |     *bitsp = BITS_FAIL;  | 
146  | 0  |     return;  | 
147  | 0  |   }  | 
148  | 0  |     }  | 
149  |  |  | 
150  | 0  |   if (qp)  | 
151  | 0  |     { | 
152  | 0  |       ASSERT (qn > 0);  | 
153  | 0  |       ASSERT (d >= 0);  | 
154  | 0  |       *bitsp = mpn_jacobi_update (*bitsp, d, qp[0] & 3);  | 
155  | 0  |     }  | 
156  | 0  | }  | 
157  |  |  | 
158  | 0  | #define CHOOSE_P(n) (2*(n) / 3)  | 
159  |  |  | 
160  |  | int  | 
161  |  | mpn_jacobi_n (mp_ptr ap, mp_ptr bp, mp_size_t n, unsigned bits)  | 
162  | 0  | { | 
163  | 0  |   mp_size_t scratch;  | 
164  | 0  |   mp_size_t matrix_scratch;  | 
165  | 0  |   mp_ptr tp;  | 
166  |  | 
  | 
167  | 0  |   TMP_DECL;  | 
168  |  | 
  | 
169  | 0  |   ASSERT (n > 0);  | 
170  | 0  |   ASSERT ( (ap[n-1] | bp[n-1]) > 0);  | 
171  | 0  |   ASSERT ( (bp[0] | ap[0]) & 1);  | 
172  |  |  | 
173  |  |   /* FIXME: Check for small sizes first, before setting up temporary  | 
174  |  |      storage etc. */  | 
175  | 0  |   scratch = MPN_GCD_SUBDIV_STEP_ITCH(n);  | 
176  |  | 
  | 
177  | 0  |   if (ABOVE_THRESHOLD (n, JACOBI_DC_THRESHOLD))  | 
178  | 0  |     { | 
179  | 0  |       mp_size_t hgcd_scratch;  | 
180  | 0  |       mp_size_t update_scratch;  | 
181  | 0  |       mp_size_t p = CHOOSE_P (n);  | 
182  | 0  |       mp_size_t dc_scratch;  | 
183  |  | 
  | 
184  | 0  |       matrix_scratch = MPN_HGCD_MATRIX_INIT_ITCH (n - p);  | 
185  | 0  |       hgcd_scratch = mpn_hgcd_itch (n - p);  | 
186  | 0  |       update_scratch = p + n - 1;  | 
187  |  | 
  | 
188  | 0  |       dc_scratch = matrix_scratch + MAX(hgcd_scratch, update_scratch);  | 
189  | 0  |       if (dc_scratch > scratch)  | 
190  | 0  |   scratch = dc_scratch;  | 
191  | 0  |     }  | 
192  |  | 
  | 
193  | 0  |   TMP_MARK;  | 
194  | 0  |   tp = TMP_ALLOC_LIMBS(scratch);  | 
195  |  | 
  | 
196  | 0  |   while (ABOVE_THRESHOLD (n, JACOBI_DC_THRESHOLD))  | 
197  | 0  |     { | 
198  | 0  |       struct hgcd_matrix M;  | 
199  | 0  |       mp_size_t p = 2*n/3;  | 
200  | 0  |       mp_size_t matrix_scratch = MPN_HGCD_MATRIX_INIT_ITCH (n - p);  | 
201  | 0  |       mp_size_t nn;  | 
202  | 0  |       mpn_hgcd_matrix_init (&M, n - p, tp);  | 
203  |  | 
  | 
204  | 0  |       nn = mpn_hgcd_jacobi (ap + p, bp + p, n - p, &M, &bits,  | 
205  | 0  |           tp + matrix_scratch);  | 
206  | 0  |       if (nn > 0)  | 
207  | 0  |   { | 
208  | 0  |     ASSERT (M.n <= (n - p - 1)/2);  | 
209  | 0  |     ASSERT (M.n + p <= (p + n - 1) / 2);  | 
210  |  |     /* Temporary storage 2 (p + M->n) <= p + n - 1. */  | 
211  | 0  |     n = mpn_hgcd_matrix_adjust (&M, p + nn, ap, bp, p, tp + matrix_scratch);  | 
212  | 0  |   }  | 
213  | 0  |       else  | 
214  | 0  |   { | 
215  |  |     /* Temporary storage n */  | 
216  | 0  |     n = mpn_gcd_subdiv_step (ap, bp, n, 0, jacobi_hook, &bits, tp);  | 
217  | 0  |     if (!n)  | 
218  | 0  |       { | 
219  | 0  |         TMP_FREE;  | 
220  | 0  |         return bits == BITS_FAIL ? 0 : mpn_jacobi_finish (bits);  | 
221  | 0  |       }  | 
222  | 0  |   }  | 
223  | 0  |     }  | 
224  |  |  | 
225  | 0  |   while (n > 2)  | 
226  | 0  |     { | 
227  | 0  |       struct hgcd_matrix1 M;  | 
228  | 0  |       mp_limb_t ah, al, bh, bl;  | 
229  | 0  |       mp_limb_t mask;  | 
230  |  | 
  | 
231  | 0  |       mask = ap[n-1] | bp[n-1];  | 
232  | 0  |       ASSERT (mask > 0);  | 
233  |  |  | 
234  | 0  |       if (mask & GMP_NUMB_HIGHBIT)  | 
235  | 0  |   { | 
236  | 0  |     ah = ap[n-1]; al = ap[n-2];  | 
237  | 0  |     bh = bp[n-1]; bl = bp[n-2];  | 
238  | 0  |   }  | 
239  | 0  |       else  | 
240  | 0  |   { | 
241  | 0  |     int shift;  | 
242  |  | 
  | 
243  | 0  |     count_leading_zeros (shift, mask);  | 
244  | 0  |     ah = MPN_EXTRACT_NUMB (shift, ap[n-1], ap[n-2]);  | 
245  | 0  |     al = MPN_EXTRACT_NUMB (shift, ap[n-2], ap[n-3]);  | 
246  | 0  |     bh = MPN_EXTRACT_NUMB (shift, bp[n-1], bp[n-2]);  | 
247  | 0  |     bl = MPN_EXTRACT_NUMB (shift, bp[n-2], bp[n-3]);  | 
248  | 0  |   }  | 
249  |  |  | 
250  |  |       /* Try an mpn_nhgcd2 step */  | 
251  | 0  |       if (mpn_hgcd2_jacobi (ah, al, bh, bl, &M, &bits))  | 
252  | 0  |   { | 
253  | 0  |     n = mpn_matrix22_mul1_inverse_vector (&M, tp, ap, bp, n);  | 
254  | 0  |     MP_PTR_SWAP (ap, tp);  | 
255  | 0  |   }  | 
256  | 0  |       else  | 
257  | 0  |   { | 
258  |  |     /* mpn_hgcd2 has failed. Then either one of a or b is very  | 
259  |  |        small, or the difference is very small. Perform one  | 
260  |  |        subtraction followed by one division. */  | 
261  | 0  |     n = mpn_gcd_subdiv_step (ap, bp, n, 0, &jacobi_hook, &bits, tp);  | 
262  | 0  |     if (!n)  | 
263  | 0  |       { | 
264  | 0  |         TMP_FREE;  | 
265  | 0  |         return bits == BITS_FAIL ? 0 : mpn_jacobi_finish (bits);  | 
266  | 0  |       }  | 
267  | 0  |   }  | 
268  | 0  |     }  | 
269  |  |  | 
270  | 0  |   if (bits >= 16)  | 
271  | 0  |     MP_PTR_SWAP (ap, bp);  | 
272  |  | 
  | 
273  | 0  |   ASSERT (bp[0] & 1);  | 
274  |  |  | 
275  | 0  |   if (n == 1)  | 
276  | 0  |     { | 
277  | 0  |       mp_limb_t al, bl;  | 
278  | 0  |       al = ap[0];  | 
279  | 0  |       bl = bp[0];  | 
280  |  | 
  | 
281  | 0  |       TMP_FREE;  | 
282  | 0  |       if (bl == 1)  | 
283  | 0  |   return 1 - 2*(bits & 1);  | 
284  | 0  |       else  | 
285  | 0  |   return mpn_jacobi_base (al, bl, bits << 1);  | 
286  | 0  |     }  | 
287  |  |  | 
288  | 0  |   else  | 
289  | 0  |     { | 
290  | 0  |       int res = mpn_jacobi_2 (ap, bp, bits & 1);  | 
291  | 0  |       TMP_FREE;  | 
292  | 0  |       return res;  | 
293  | 0  |     }  | 
294  | 0  | }  |