/src/gmp/mpn/gcdext_lehmer.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* mpn_gcdext -- Extended Greatest Common Divisor. |
2 | | |
3 | | Copyright 1996, 1998, 2000-2005, 2008, 2009, 2012 Free Software Foundation, |
4 | | Inc. |
5 | | |
6 | | This file is part of the GNU MP Library. |
7 | | |
8 | | The GNU MP Library is free software; you can redistribute it and/or modify |
9 | | it under the terms of either: |
10 | | |
11 | | * the GNU Lesser General Public License as published by the Free |
12 | | Software Foundation; either version 3 of the License, or (at your |
13 | | option) any later version. |
14 | | |
15 | | or |
16 | | |
17 | | * the GNU General Public License as published by the Free Software |
18 | | Foundation; either version 2 of the License, or (at your option) any |
19 | | later version. |
20 | | |
21 | | or both in parallel, as here. |
22 | | |
23 | | The GNU MP Library is distributed in the hope that it will be useful, but |
24 | | WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY |
25 | | or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
26 | | for more details. |
27 | | |
28 | | You should have received copies of the GNU General Public License and the |
29 | | GNU Lesser General Public License along with the GNU MP Library. If not, |
30 | | see https://www.gnu.org/licenses/. */ |
31 | | |
32 | | #include "gmp-impl.h" |
33 | | #include "longlong.h" |
34 | | |
35 | | /* Here, d is the index of the cofactor to update. FIXME: Could use qn |
36 | | = 0 for the common case q = 1. */ |
37 | | void |
38 | | mpn_gcdext_hook (void *p, mp_srcptr gp, mp_size_t gn, |
39 | | mp_srcptr qp, mp_size_t qn, int d) |
40 | 0 | { |
41 | 0 | struct gcdext_ctx *ctx = (struct gcdext_ctx *) p; |
42 | 0 | mp_size_t un = ctx->un; |
43 | |
|
44 | 0 | if (gp) |
45 | 0 | { |
46 | 0 | mp_srcptr up; |
47 | |
|
48 | 0 | ASSERT (gn > 0); |
49 | 0 | ASSERT (gp[gn-1] > 0); |
50 | |
|
51 | 0 | MPN_COPY (ctx->gp, gp, gn); |
52 | 0 | ctx->gn = gn; |
53 | |
|
54 | 0 | if (d < 0) |
55 | 0 | { |
56 | 0 | int c; |
57 | | |
58 | | /* Must return the smallest cofactor, +u1 or -u0 */ |
59 | 0 | MPN_CMP (c, ctx->u0, ctx->u1, un); |
60 | 0 | ASSERT (c != 0 || (un == 1 && ctx->u0[0] == 1 && ctx->u1[0] == 1)); |
61 | |
|
62 | 0 | d = c < 0; |
63 | 0 | } |
64 | |
|
65 | 0 | up = d ? ctx->u0 : ctx->u1; |
66 | |
|
67 | 0 | MPN_NORMALIZE (up, un); |
68 | 0 | MPN_COPY (ctx->up, up, un); |
69 | |
|
70 | 0 | *ctx->usize = d ? -un : un; |
71 | 0 | } |
72 | 0 | else |
73 | 0 | { |
74 | 0 | mp_limb_t cy; |
75 | 0 | mp_ptr u0 = ctx->u0; |
76 | 0 | mp_ptr u1 = ctx->u1; |
77 | |
|
78 | 0 | ASSERT (d >= 0); |
79 | |
|
80 | 0 | if (d) |
81 | 0 | MP_PTR_SWAP (u0, u1); |
82 | |
|
83 | 0 | qn -= (qp[qn-1] == 0); |
84 | | |
85 | | /* Update u0 += q * u1 */ |
86 | 0 | if (qn == 1) |
87 | 0 | { |
88 | 0 | mp_limb_t q = qp[0]; |
89 | |
|
90 | 0 | if (q == 1) |
91 | | /* A common case. */ |
92 | 0 | cy = mpn_add_n (u0, u0, u1, un); |
93 | 0 | else |
94 | 0 | cy = mpn_addmul_1 (u0, u1, un, q); |
95 | 0 | } |
96 | 0 | else |
97 | 0 | { |
98 | 0 | mp_size_t u1n; |
99 | 0 | mp_ptr tp; |
100 | |
|
101 | 0 | u1n = un; |
102 | 0 | MPN_NORMALIZE (u1, u1n); |
103 | |
|
104 | 0 | if (u1n == 0) |
105 | 0 | return; |
106 | | |
107 | | /* Should always have u1n == un here, and u1 >= u0. The |
108 | | reason is that we alternate adding u0 to u1 and u1 to u0 |
109 | | (corresponding to subtractions a - b and b - a), and we |
110 | | can get a large quotient only just after a switch, which |
111 | | means that we'll add (a multiple of) the larger u to the |
112 | | smaller. */ |
113 | | |
114 | 0 | tp = ctx->tp; |
115 | |
|
116 | 0 | if (qn > u1n) |
117 | 0 | mpn_mul (tp, qp, qn, u1, u1n); |
118 | 0 | else |
119 | 0 | mpn_mul (tp, u1, u1n, qp, qn); |
120 | |
|
121 | 0 | u1n += qn; |
122 | 0 | u1n -= tp[u1n-1] == 0; |
123 | |
|
124 | 0 | if (u1n >= un) |
125 | 0 | { |
126 | 0 | cy = mpn_add (u0, tp, u1n, u0, un); |
127 | 0 | un = u1n; |
128 | 0 | } |
129 | 0 | else |
130 | | /* Note: Unlikely case, maybe never happens? */ |
131 | 0 | cy = mpn_add (u0, u0, un, tp, u1n); |
132 | |
|
133 | 0 | } |
134 | 0 | u0[un] = cy; |
135 | 0 | ctx->un = un + (cy > 0); |
136 | 0 | } |
137 | 0 | } |
138 | | |
139 | | /* Temporary storage: 3*(n+1) for u. If hgcd2 succeeds, we need n for |
140 | | the matrix-vector multiplication adjusting a, b. If hgcd fails, we |
141 | | need at most n for the quotient and n+1 for the u update (reusing |
142 | | the extra u). In all, 4n + 3. */ |
143 | | |
144 | | mp_size_t |
145 | | mpn_gcdext_lehmer_n (mp_ptr gp, mp_ptr up, mp_size_t *usize, |
146 | | mp_ptr ap, mp_ptr bp, mp_size_t n, |
147 | | mp_ptr tp) |
148 | 0 | { |
149 | 0 | mp_size_t ualloc = n + 1; |
150 | | |
151 | | /* Keeps track of the second row of the reduction matrix |
152 | | * |
153 | | * M = (v0, v1 ; u0, u1) |
154 | | * |
155 | | * which correspond to the first column of the inverse |
156 | | * |
157 | | * M^{-1} = (u1, -v1; -u0, v0) |
158 | | * |
159 | | * This implies that |
160 | | * |
161 | | * a = u1 A (mod B) |
162 | | * b = -u0 A (mod B) |
163 | | * |
164 | | * where A, B denotes the input values. |
165 | | */ |
166 | |
|
167 | 0 | struct gcdext_ctx ctx; |
168 | 0 | mp_size_t un; |
169 | 0 | mp_ptr u0; |
170 | 0 | mp_ptr u1; |
171 | 0 | mp_ptr u2; |
172 | |
|
173 | 0 | MPN_ZERO (tp, 3*ualloc); |
174 | 0 | u0 = tp; tp += ualloc; |
175 | 0 | u1 = tp; tp += ualloc; |
176 | 0 | u2 = tp; tp += ualloc; |
177 | |
|
178 | 0 | u1[0] = 1; un = 1; |
179 | |
|
180 | 0 | ctx.gp = gp; |
181 | 0 | ctx.up = up; |
182 | 0 | ctx.usize = usize; |
183 | | |
184 | | /* FIXME: Handle n == 2 differently, after the loop? */ |
185 | 0 | while (n >= 2) |
186 | 0 | { |
187 | 0 | struct hgcd_matrix1 M; |
188 | 0 | mp_limb_t ah, al, bh, bl; |
189 | 0 | mp_limb_t mask; |
190 | |
|
191 | 0 | mask = ap[n-1] | bp[n-1]; |
192 | 0 | ASSERT (mask > 0); |
193 | |
|
194 | 0 | if (mask & GMP_NUMB_HIGHBIT) |
195 | 0 | { |
196 | 0 | ah = ap[n-1]; al = ap[n-2]; |
197 | 0 | bh = bp[n-1]; bl = bp[n-2]; |
198 | 0 | } |
199 | 0 | else if (n == 2) |
200 | 0 | { |
201 | | /* We use the full inputs without truncation, so we can |
202 | | safely shift left. */ |
203 | 0 | int shift; |
204 | |
|
205 | 0 | count_leading_zeros (shift, mask); |
206 | 0 | ah = MPN_EXTRACT_NUMB (shift, ap[1], ap[0]); |
207 | 0 | al = ap[0] << shift; |
208 | 0 | bh = MPN_EXTRACT_NUMB (shift, bp[1], bp[0]); |
209 | 0 | bl = bp[0] << shift; |
210 | 0 | } |
211 | 0 | else |
212 | 0 | { |
213 | 0 | int shift; |
214 | |
|
215 | 0 | count_leading_zeros (shift, mask); |
216 | 0 | ah = MPN_EXTRACT_NUMB (shift, ap[n-1], ap[n-2]); |
217 | 0 | al = MPN_EXTRACT_NUMB (shift, ap[n-2], ap[n-3]); |
218 | 0 | bh = MPN_EXTRACT_NUMB (shift, bp[n-1], bp[n-2]); |
219 | 0 | bl = MPN_EXTRACT_NUMB (shift, bp[n-2], bp[n-3]); |
220 | 0 | } |
221 | | |
222 | | /* Try an mpn_nhgcd2 step */ |
223 | 0 | if (mpn_hgcd2 (ah, al, bh, bl, &M)) |
224 | 0 | { |
225 | 0 | n = mpn_matrix22_mul1_inverse_vector (&M, tp, ap, bp, n); |
226 | 0 | MP_PTR_SWAP (ap, tp); |
227 | 0 | un = mpn_hgcd_mul_matrix1_vector(&M, u2, u0, u1, un); |
228 | 0 | MP_PTR_SWAP (u0, u2); |
229 | 0 | } |
230 | 0 | else |
231 | 0 | { |
232 | | /* mpn_hgcd2 has failed. Then either one of a or b is very |
233 | | small, or the difference is very small. Perform one |
234 | | subtraction followed by one division. */ |
235 | 0 | ctx.u0 = u0; |
236 | 0 | ctx.u1 = u1; |
237 | 0 | ctx.tp = u2; |
238 | 0 | ctx.un = un; |
239 | | |
240 | | /* Temporary storage n for the quotient and ualloc for the |
241 | | new cofactor. */ |
242 | 0 | n = mpn_gcd_subdiv_step (ap, bp, n, 0, mpn_gcdext_hook, &ctx, tp); |
243 | 0 | if (n == 0) |
244 | 0 | return ctx.gn; |
245 | | |
246 | 0 | un = ctx.un; |
247 | 0 | } |
248 | 0 | } |
249 | 0 | ASSERT_ALWAYS (ap[0] > 0); |
250 | 0 | ASSERT_ALWAYS (bp[0] > 0); |
251 | | |
252 | 0 | if (ap[0] == bp[0]) |
253 | 0 | { |
254 | 0 | int c; |
255 | | |
256 | | /* Which cofactor to return now? Candidates are +u1 and -u0, |
257 | | depending on which of a and b was most recently reduced, |
258 | | which we don't keep track of. So compare and get the smallest |
259 | | one. */ |
260 | |
|
261 | 0 | gp[0] = ap[0]; |
262 | |
|
263 | 0 | MPN_CMP (c, u0, u1, un); |
264 | 0 | ASSERT (c != 0 || (un == 1 && u0[0] == 1 && u1[0] == 1)); |
265 | 0 | if (c < 0) |
266 | 0 | { |
267 | 0 | MPN_NORMALIZE (u0, un); |
268 | 0 | MPN_COPY (up, u0, un); |
269 | 0 | *usize = -un; |
270 | 0 | } |
271 | 0 | else |
272 | 0 | { |
273 | 0 | MPN_NORMALIZE_NOT_ZERO (u1, un); |
274 | 0 | MPN_COPY (up, u1, un); |
275 | 0 | *usize = un; |
276 | 0 | } |
277 | 0 | return 1; |
278 | 0 | } |
279 | 0 | else |
280 | 0 | { |
281 | 0 | mp_limb_t uh, vh; |
282 | 0 | mp_limb_signed_t u; |
283 | 0 | mp_limb_signed_t v; |
284 | 0 | int negate; |
285 | |
|
286 | 0 | gp[0] = mpn_gcdext_1 (&u, &v, ap[0], bp[0]); |
287 | | |
288 | | /* Set up = u u1 - v u0. Keep track of size, un grows by one or |
289 | | two limbs. */ |
290 | |
|
291 | 0 | if (u == 0) |
292 | 0 | { |
293 | 0 | ASSERT (v == 1); |
294 | 0 | MPN_NORMALIZE (u0, un); |
295 | 0 | MPN_COPY (up, u0, un); |
296 | 0 | *usize = -un; |
297 | 0 | return 1; |
298 | 0 | } |
299 | 0 | else if (v == 0) |
300 | 0 | { |
301 | 0 | ASSERT (u == 1); |
302 | 0 | MPN_NORMALIZE (u1, un); |
303 | 0 | MPN_COPY (up, u1, un); |
304 | 0 | *usize = un; |
305 | 0 | return 1; |
306 | 0 | } |
307 | 0 | else if (u > 0) |
308 | 0 | { |
309 | 0 | negate = 0; |
310 | 0 | ASSERT (v < 0); |
311 | 0 | v = -v; |
312 | 0 | } |
313 | 0 | else |
314 | 0 | { |
315 | 0 | negate = 1; |
316 | 0 | ASSERT (v > 0); |
317 | 0 | u = -u; |
318 | 0 | } |
319 | | |
320 | 0 | uh = mpn_mul_1 (up, u1, un, u); |
321 | 0 | vh = mpn_addmul_1 (up, u0, un, v); |
322 | |
|
323 | 0 | if ( (uh | vh) > 0) |
324 | 0 | { |
325 | 0 | uh += vh; |
326 | 0 | up[un++] = uh; |
327 | 0 | if (uh < vh) |
328 | 0 | up[un++] = 1; |
329 | 0 | } |
330 | |
|
331 | 0 | MPN_NORMALIZE_NOT_ZERO (up, un); |
332 | |
|
333 | 0 | *usize = negate ? -un : un; |
334 | 0 | return 1; |
335 | 0 | } |
336 | 0 | } |