Line | Count | Source (jump to first uncovered line) |
1 | | /* mpn_gcdext -- Extended Greatest Common Divisor. |
2 | | |
3 | | Copyright 1996, 1998, 2000-2005, 2008, 2009 Free Software Foundation, Inc. |
4 | | |
5 | | This file is part of the GNU MP Library. |
6 | | |
7 | | The GNU MP Library is free software; you can redistribute it and/or modify |
8 | | it under the terms of either: |
9 | | |
10 | | * the GNU Lesser General Public License as published by the Free |
11 | | Software Foundation; either version 3 of the License, or (at your |
12 | | option) any later version. |
13 | | |
14 | | or |
15 | | |
16 | | * the GNU General Public License as published by the Free Software |
17 | | Foundation; either version 2 of the License, or (at your option) any |
18 | | later version. |
19 | | |
20 | | or both in parallel, as here. |
21 | | |
22 | | The GNU MP Library is distributed in the hope that it will be useful, but |
23 | | WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY |
24 | | or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
25 | | for more details. |
26 | | |
27 | | You should have received copies of the GNU General Public License and the |
28 | | GNU Lesser General Public License along with the GNU MP Library. If not, |
29 | | see https://www.gnu.org/licenses/. */ |
30 | | |
31 | | #include "gmp-impl.h" |
32 | | #include "longlong.h" |
33 | | |
34 | | #ifndef GCDEXT_1_USE_BINARY |
35 | | #define GCDEXT_1_USE_BINARY 0 |
36 | | #endif |
37 | | |
38 | | #ifndef GCDEXT_1_BINARY_METHOD |
39 | | #define GCDEXT_1_BINARY_METHOD 2 |
40 | | #endif |
41 | | |
42 | | #if GCDEXT_1_USE_BINARY |
43 | | |
44 | | mp_limb_t |
45 | | mpn_gcdext_1 (mp_limb_signed_t *sp, mp_limb_signed_t *tp, |
46 | | mp_limb_t u, mp_limb_t v) |
47 | | { |
48 | | /* Maintain |
49 | | |
50 | | U = t1 u + t0 v |
51 | | V = s1 u + s0 v |
52 | | |
53 | | where U, V are the inputs (without any shared power of two), |
54 | | and the matrix has determinant ± 2^{shift}. |
55 | | */ |
56 | | mp_limb_t s0 = 1; |
57 | | mp_limb_t t0 = 0; |
58 | | mp_limb_t s1 = 0; |
59 | | mp_limb_t t1 = 1; |
60 | | mp_limb_t ug; |
61 | | mp_limb_t vg; |
62 | | mp_limb_t ugh; |
63 | | mp_limb_t vgh; |
64 | | unsigned zero_bits; |
65 | | unsigned shift; |
66 | | unsigned i; |
67 | | #if GCDEXT_1_BINARY_METHOD == 2 |
68 | | mp_limb_t det_sign; |
69 | | #endif |
70 | | |
71 | | ASSERT (u > 0); |
72 | | ASSERT (v > 0); |
73 | | |
74 | | count_trailing_zeros (zero_bits, u | v); |
75 | | u >>= zero_bits; |
76 | | v >>= zero_bits; |
77 | | |
78 | | if ((u & 1) == 0) |
79 | | { |
80 | | count_trailing_zeros (shift, u); |
81 | | u >>= shift; |
82 | | t1 <<= shift; |
83 | | } |
84 | | else if ((v & 1) == 0) |
85 | | { |
86 | | count_trailing_zeros (shift, v); |
87 | | v >>= shift; |
88 | | s0 <<= shift; |
89 | | } |
90 | | else |
91 | | shift = 0; |
92 | | |
93 | | #if GCDEXT_1_BINARY_METHOD == 1 |
94 | | while (u != v) |
95 | | { |
96 | | unsigned count; |
97 | | if (u > v) |
98 | | { |
99 | | u -= v; |
100 | | |
101 | | count_trailing_zeros (count, u); |
102 | | u >>= count; |
103 | | |
104 | | t0 += t1; t1 <<= count; |
105 | | s0 += s1; s1 <<= count; |
106 | | } |
107 | | else |
108 | | { |
109 | | v -= u; |
110 | | |
111 | | count_trailing_zeros (count, v); |
112 | | v >>= count; |
113 | | |
114 | | t1 += t0; t0 <<= count; |
115 | | s1 += s0; s0 <<= count; |
116 | | } |
117 | | shift += count; |
118 | | } |
119 | | #else |
120 | | # if GCDEXT_1_BINARY_METHOD == 2 |
121 | | u >>= 1; |
122 | | v >>= 1; |
123 | | |
124 | | det_sign = 0; |
125 | | |
126 | | while (u != v) |
127 | | { |
128 | | unsigned count; |
129 | | mp_limb_t d = u - v; |
130 | | mp_limb_t vgtu = LIMB_HIGHBIT_TO_MASK (d); |
131 | | mp_limb_t sx; |
132 | | mp_limb_t tx; |
133 | | |
134 | | /* When v <= u (vgtu == 0), the updates are: |
135 | | |
136 | | (u; v) <-- ( (u - v) >> count; v) (det = +(1<<count) for corr. M factor) |
137 | | (t1, t0) <-- (t1 << count, t0 + t1) |
138 | | |
139 | | and when v > 0, the updates are |
140 | | |
141 | | (u; v) <-- ( (v - u) >> count; u) (det = -(1<<count)) |
142 | | (t1, t0) <-- (t0 << count, t0 + t1) |
143 | | |
144 | | and similarly for s1, s0 |
145 | | */ |
146 | | |
147 | | /* v <-- min (u, v) */ |
148 | | v += (vgtu & d); |
149 | | |
150 | | /* u <-- |u - v| */ |
151 | | u = (d ^ vgtu) - vgtu; |
152 | | |
153 | | /* Number of trailing zeros is the same no matter if we look at |
154 | | * d or u, but using d gives more parallelism. */ |
155 | | count_trailing_zeros (count, d); |
156 | | |
157 | | det_sign ^= vgtu; |
158 | | |
159 | | tx = vgtu & (t0 - t1); |
160 | | sx = vgtu & (s0 - s1); |
161 | | t0 += t1; |
162 | | s0 += s1; |
163 | | t1 += tx; |
164 | | s1 += sx; |
165 | | |
166 | | count++; |
167 | | u >>= count; |
168 | | t1 <<= count; |
169 | | s1 <<= count; |
170 | | shift += count; |
171 | | } |
172 | | u = (u << 1) + 1; |
173 | | # else /* GCDEXT_1_BINARY_METHOD == 2 */ |
174 | | # error Unknown GCDEXT_1_BINARY_METHOD |
175 | | # endif |
176 | | #endif |
177 | | |
178 | | /* Now u = v = g = gcd (u,v). Compute U/g and V/g */ |
179 | | ug = t0 + t1; |
180 | | vg = s0 + s1; |
181 | | |
182 | | ugh = ug/2 + (ug & 1); |
183 | | vgh = vg/2 + (vg & 1); |
184 | | |
185 | | /* Now 2^{shift} g = s0 U - t0 V. Get rid of the power of two, using |
186 | | s0 U - t0 V = (s0 + V/g) U - (t0 + U/g) V. */ |
187 | | for (i = 0; i < shift; i++) |
188 | | { |
189 | | mp_limb_t mask = - ( (s0 | t0) & 1); |
190 | | |
191 | | s0 /= 2; |
192 | | t0 /= 2; |
193 | | s0 += mask & vgh; |
194 | | t0 += mask & ugh; |
195 | | } |
196 | | |
197 | | ASSERT_ALWAYS (s0 <= vg); |
198 | | ASSERT_ALWAYS (t0 <= ug); |
199 | | |
200 | | if (s0 > vg - s0) |
201 | | { |
202 | | s0 -= vg; |
203 | | t0 -= ug; |
204 | | } |
205 | | #if GCDEXT_1_BINARY_METHOD == 2 |
206 | | /* Conditional negation. */ |
207 | | s0 = (s0 ^ det_sign) - det_sign; |
208 | | t0 = (t0 ^ det_sign) - det_sign; |
209 | | #endif |
210 | | *sp = s0; |
211 | | *tp = -t0; |
212 | | |
213 | | return u << zero_bits; |
214 | | } |
215 | | |
216 | | #else /* !GCDEXT_1_USE_BINARY */ |
217 | | |
218 | | |
219 | | /* FIXME: Takes two single-word limbs. It could be extended to a |
220 | | * function that accepts a bignum for the first input, and only |
221 | | * returns the first co-factor. */ |
222 | | |
223 | | mp_limb_t |
224 | | mpn_gcdext_1 (mp_limb_signed_t *up, mp_limb_signed_t *vp, |
225 | | mp_limb_t a, mp_limb_t b) |
226 | 0 | { |
227 | | /* Maintain |
228 | | |
229 | | a = u0 A + v0 B |
230 | | b = u1 A + v1 B |
231 | | |
232 | | where A, B are the original inputs. |
233 | | */ |
234 | 0 | mp_limb_signed_t u0 = 1; |
235 | 0 | mp_limb_signed_t v0 = 0; |
236 | 0 | mp_limb_signed_t u1 = 0; |
237 | 0 | mp_limb_signed_t v1 = 1; |
238 | |
|
239 | 0 | ASSERT (a > 0); |
240 | 0 | ASSERT (b > 0); |
241 | |
|
242 | 0 | if (a < b) |
243 | 0 | goto divide_by_b; |
244 | | |
245 | 0 | for (;;) |
246 | 0 | { |
247 | 0 | mp_limb_t q; |
248 | |
|
249 | 0 | q = a / b; |
250 | 0 | a -= q * b; |
251 | |
|
252 | 0 | if (a == 0) |
253 | 0 | { |
254 | 0 | *up = u1; |
255 | 0 | *vp = v1; |
256 | 0 | return b; |
257 | 0 | } |
258 | 0 | u0 -= q * u1; |
259 | 0 | v0 -= q * v1; |
260 | |
|
261 | 0 | divide_by_b: |
262 | 0 | q = b / a; |
263 | 0 | b -= q * a; |
264 | |
|
265 | 0 | if (b == 0) |
266 | 0 | { |
267 | 0 | *up = u0; |
268 | 0 | *vp = v0; |
269 | 0 | return a; |
270 | 0 | } |
271 | 0 | u1 -= q * u0; |
272 | 0 | v1 -= q * v0; |
273 | 0 | } |
274 | 0 | } |
275 | | #endif /* !GCDEXT_1_USE_BINARY */ |