/src/libgmp/mpn/hgcd2_jacobi.c
Line | Count | Source |
1 | | /* hgcd2_jacobi.c |
2 | | |
3 | | THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES. IT IS ONLY |
4 | | SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST |
5 | | GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE. |
6 | | |
7 | | Copyright 1996, 1998, 2000-2004, 2008, 2011, 2020 Free Software |
8 | | Foundation, Inc. |
9 | | |
10 | | This file is part of the GNU MP Library. |
11 | | |
12 | | The GNU MP Library is free software; you can redistribute it and/or modify |
13 | | it under the terms of either: |
14 | | |
15 | | * the GNU Lesser General Public License as published by the Free |
16 | | Software Foundation; either version 3 of the License, or (at your |
17 | | option) any later version. |
18 | | |
19 | | or |
20 | | |
21 | | * the GNU General Public License as published by the Free Software |
22 | | Foundation; either version 2 of the License, or (at your option) any |
23 | | later version. |
24 | | |
25 | | or both in parallel, as here. |
26 | | |
27 | | The GNU MP Library is distributed in the hope that it will be useful, but |
28 | | WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY |
29 | | or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
30 | | for more details. |
31 | | |
32 | | You should have received copies of the GNU General Public License and the |
33 | | GNU Lesser General Public License along with the GNU MP Library. If not, |
34 | | see https://www.gnu.org/licenses/. */ |
35 | | |
36 | | #include "gmp-impl.h" |
37 | | #include "longlong.h" |
38 | | |
39 | | #include "mpn/generic/hgcd2-div.h" |
40 | | |
41 | | #if GMP_NAIL_BITS != 0 |
42 | | #error Nails not implemented |
43 | | #endif |
44 | | |
45 | | int |
46 | | mpn_hgcd2_jacobi (mp_limb_t ah, mp_limb_t al, mp_limb_t bh, mp_limb_t bl, |
47 | | struct hgcd_matrix1 *M, unsigned *bitsp) |
48 | 24.3k | { |
49 | 24.3k | mp_limb_t u00, u01, u10, u11; |
50 | 24.3k | unsigned bits = *bitsp; |
51 | | |
52 | 24.3k | if (ah < 2 || bh < 2) |
53 | 4 | return 0; |
54 | | |
55 | 24.3k | if (ah > bh || (ah == bh && al > bl)) |
56 | 12.0k | { |
57 | 12.0k | sub_ddmmss (ah, al, ah, al, bh, bl); |
58 | 12.0k | if (ah < 2) |
59 | 27 | return 0; |
60 | | |
61 | 12.0k | u00 = u01 = u11 = 1; |
62 | 12.0k | u10 = 0; |
63 | 12.0k | bits = mpn_jacobi_update (bits, 1, 1); |
64 | 12.0k | } |
65 | 12.2k | else |
66 | 12.2k | { |
67 | 12.2k | sub_ddmmss (bh, bl, bh, bl, ah, al); |
68 | 12.2k | if (bh < 2) |
69 | 19 | return 0; |
70 | | |
71 | 12.2k | u00 = u10 = u11 = 1; |
72 | 12.2k | u01 = 0; |
73 | 12.2k | bits = mpn_jacobi_update (bits, 0, 1); |
74 | 12.2k | } |
75 | | |
76 | 24.2k | if (ah < bh) |
77 | 11.9k | goto subtract_a; |
78 | | |
79 | 12.2k | for (;;) |
80 | 232k | { |
81 | 232k | ASSERT (ah >= bh); |
82 | 232k | if (ah == bh) |
83 | 16 | goto done; |
84 | | |
85 | 232k | if (ah < (CNST_LIMB(1) << (GMP_LIMB_BITS / 2))) |
86 | 12.0k | { |
87 | 12.0k | ah = (ah << (GMP_LIMB_BITS / 2) ) + (al >> (GMP_LIMB_BITS / 2)); |
88 | 12.0k | bh = (bh << (GMP_LIMB_BITS / 2) ) + (bl >> (GMP_LIMB_BITS / 2)); |
89 | | |
90 | 12.0k | break; |
91 | 12.0k | } |
92 | | |
93 | | /* Subtract a -= q b, and multiply M from the right by (1 q ; 0 |
94 | | 1), affecting the second column of M. */ |
95 | 220k | ASSERT (ah > bh); |
96 | 220k | sub_ddmmss (ah, al, ah, al, bh, bl); |
97 | | |
98 | 220k | if (ah < 2) |
99 | 1 | goto done; |
100 | | |
101 | 220k | if (ah <= bh) |
102 | 87.7k | { |
103 | | /* Use q = 1 */ |
104 | 87.7k | u01 += u00; |
105 | 87.7k | u11 += u10; |
106 | 87.7k | bits = mpn_jacobi_update (bits, 1, 1); |
107 | 87.7k | } |
108 | 132k | else |
109 | 132k | { |
110 | 132k | mp_limb_t r[2]; |
111 | 132k | mp_limb_t q = div2 (r, ah, al, bh, bl); |
112 | 132k | al = r[0]; ah = r[1]; |
113 | 132k | if (ah < 2) |
114 | 18 | { |
115 | | /* A is too small, but q is correct. */ |
116 | 18 | u01 += q * u00; |
117 | 18 | u11 += q * u10; |
118 | 18 | bits = mpn_jacobi_update (bits, 1, q & 3); |
119 | 18 | goto done; |
120 | 18 | } |
121 | 132k | q++; |
122 | 132k | u01 += q * u00; |
123 | 132k | u11 += q * u10; |
124 | 132k | bits = mpn_jacobi_update (bits, 1, q & 3); |
125 | 132k | } |
126 | 232k | subtract_a: |
127 | 232k | ASSERT (bh >= ah); |
128 | 232k | if (ah == bh) |
129 | 16 | goto done; |
130 | | |
131 | 232k | if (bh < (CNST_LIMB(1) << (GMP_LIMB_BITS / 2))) |
132 | 12.1k | { |
133 | 12.1k | ah = (ah << (GMP_LIMB_BITS / 2) ) + (al >> (GMP_LIMB_BITS / 2)); |
134 | 12.1k | bh = (bh << (GMP_LIMB_BITS / 2) ) + (bl >> (GMP_LIMB_BITS / 2)); |
135 | | |
136 | 12.1k | goto subtract_a1; |
137 | 12.1k | } |
138 | | |
139 | | /* Subtract b -= q a, and multiply M from the right by (1 0 ; q |
140 | | 1), affecting the first column of M. */ |
141 | 220k | sub_ddmmss (bh, bl, bh, bl, ah, al); |
142 | | |
143 | 220k | if (bh < 2) |
144 | 3 | goto done; |
145 | | |
146 | 220k | if (bh <= ah) |
147 | 87.2k | { |
148 | | /* Use q = 1 */ |
149 | 87.2k | u00 += u01; |
150 | 87.2k | u10 += u11; |
151 | 87.2k | bits = mpn_jacobi_update (bits, 0, 1); |
152 | 87.2k | } |
153 | 132k | else |
154 | 132k | { |
155 | 132k | mp_limb_t r[2]; |
156 | 132k | mp_limb_t q = div2 (r, bh, bl, ah, al); |
157 | 132k | bl = r[0]; bh = r[1]; |
158 | 132k | if (bh < 2) |
159 | 6 | { |
160 | | /* B is too small, but q is correct. */ |
161 | 6 | u00 += q * u01; |
162 | 6 | u10 += q * u11; |
163 | 6 | bits = mpn_jacobi_update (bits, 0, q & 3); |
164 | 6 | goto done; |
165 | 6 | } |
166 | 132k | q++; |
167 | 132k | u00 += q * u01; |
168 | 132k | u10 += q * u11; |
169 | 132k | bits = mpn_jacobi_update (bits, 0, q & 3); |
170 | 132k | } |
171 | 220k | } |
172 | | |
173 | | /* NOTE: Since we discard the least significant half limb, we don't get a |
174 | | truly maximal M (corresponding to |a - b| < 2^{GMP_LIMB_BITS +1}). */ |
175 | | /* Single precision loop */ |
176 | 12.0k | for (;;) |
177 | 205k | { |
178 | 205k | ASSERT (ah >= bh); |
179 | | |
180 | 205k | ah -= bh; |
181 | 205k | if (ah < (CNST_LIMB (1) << (GMP_LIMB_BITS / 2 + 1))) |
182 | 5.43k | break; |
183 | | |
184 | 200k | if (ah <= bh) |
185 | 83.0k | { |
186 | | /* Use q = 1 */ |
187 | 83.0k | u01 += u00; |
188 | 83.0k | u11 += u10; |
189 | 83.0k | bits = mpn_jacobi_update (bits, 1, 1); |
190 | 83.0k | } |
191 | 117k | else |
192 | 117k | { |
193 | 117k | mp_double_limb_t rq = div1 (ah, bh); |
194 | 117k | mp_limb_t q = rq.d1; |
195 | 117k | ah = rq.d0; |
196 | | |
197 | 117k | if (ah < (CNST_LIMB(1) << (GMP_LIMB_BITS / 2 + 1))) |
198 | 6.65k | { |
199 | | /* A is too small, but q is correct. */ |
200 | 6.65k | u01 += q * u00; |
201 | 6.65k | u11 += q * u10; |
202 | 6.65k | bits = mpn_jacobi_update (bits, 1, q & 3); |
203 | 6.65k | break; |
204 | 6.65k | } |
205 | 110k | q++; |
206 | 110k | u01 += q * u00; |
207 | 110k | u11 += q * u10; |
208 | 110k | bits = mpn_jacobi_update (bits, 1, q & 3); |
209 | 110k | } |
210 | 205k | subtract_a1: |
211 | 205k | ASSERT (bh >= ah); |
212 | | |
213 | 205k | bh -= ah; |
214 | 205k | if (bh < (CNST_LIMB (1) << (GMP_LIMB_BITS / 2 + 1))) |
215 | 5.50k | break; |
216 | | |
217 | 200k | if (bh <= ah) |
218 | 83.4k | { |
219 | | /* Use q = 1 */ |
220 | 83.4k | u00 += u01; |
221 | 83.4k | u10 += u11; |
222 | 83.4k | bits = mpn_jacobi_update (bits, 0, 1); |
223 | 83.4k | } |
224 | 117k | else |
225 | 117k | { |
226 | 117k | mp_double_limb_t rq = div1 (bh, ah); |
227 | 117k | mp_limb_t q = rq.d1; |
228 | 117k | bh = rq.d0; |
229 | | |
230 | 117k | if (bh < (CNST_LIMB(1) << (GMP_LIMB_BITS / 2 + 1))) |
231 | 6.61k | { |
232 | | /* B is too small, but q is correct. */ |
233 | 6.61k | u00 += q * u01; |
234 | 6.61k | u10 += q * u11; |
235 | 6.61k | bits = mpn_jacobi_update (bits, 0, q & 3); |
236 | 6.61k | break; |
237 | 6.61k | } |
238 | 110k | q++; |
239 | 110k | u00 += q * u01; |
240 | 110k | u10 += q * u11; |
241 | 110k | bits = mpn_jacobi_update (bits, 0, q & 3); |
242 | 110k | } |
243 | 200k | } |
244 | | |
245 | 24.2k | done: |
246 | 24.2k | M->u[0][0] = u00; M->u[0][1] = u01; |
247 | 24.2k | M->u[1][0] = u10; M->u[1][1] = u11; |
248 | 24.2k | *bitsp = bits; |
249 | | |
250 | 24.2k | return 1; |
251 | 12.0k | } |