Line  | Count  | Source (jump to first uncovered line)  | 
1  |  | /* mpn_gcdext -- Extended Greatest Common Divisor.  | 
2  |  |  | 
3  |  | Copyright 1996, 1998, 2000-2005, 2008, 2009, 2012 Free Software Foundation,  | 
4  |  | Inc.  | 
5  |  |  | 
6  |  | This file is part of the GNU MP Library.  | 
7  |  |  | 
8  |  | The GNU MP Library is free software; you can redistribute it and/or modify  | 
9  |  | it under the terms of either:  | 
10  |  |  | 
11  |  |   * the GNU Lesser General Public License as published by the Free  | 
12  |  |     Software Foundation; either version 3 of the License, or (at your  | 
13  |  |     option) any later version.  | 
14  |  |  | 
15  |  | or  | 
16  |  |  | 
17  |  |   * the GNU General Public License as published by the Free Software  | 
18  |  |     Foundation; either version 2 of the License, or (at your option) any  | 
19  |  |     later version.  | 
20  |  |  | 
21  |  | or both in parallel, as here.  | 
22  |  |  | 
23  |  | The GNU MP Library is distributed in the hope that it will be useful, but  | 
24  |  | WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY  | 
25  |  | or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License  | 
26  |  | for more details.  | 
27  |  |  | 
28  |  | You should have received copies of the GNU General Public License and the  | 
29  |  | GNU Lesser General Public License along with the GNU MP Library.  If not,  | 
30  |  | see https://www.gnu.org/licenses/.  */  | 
31  |  |  | 
32  |  | #include "gmp-impl.h"  | 
33  |  | #include "longlong.h"  | 
34  |  |  | 
35  |  | /* Computes (r;b) = (a; b) M. Result is of size n + M->n +/- 1, and  | 
36  |  |    the size is returned (if inputs are non-normalized, result may be  | 
37  |  |    non-normalized too). Temporary space needed is M->n + n.  | 
38  |  |  */  | 
39  |  | static size_t  | 
40  |  | hgcd_mul_matrix_vector (struct hgcd_matrix *M,  | 
41  |  |       mp_ptr rp, mp_srcptr ap, mp_ptr bp, mp_size_t n, mp_ptr tp)  | 
42  | 0  | { | 
43  | 0  |   mp_limb_t ah, bh;  | 
44  |  |  | 
45  |  |   /* Compute (r,b) <-- (u00 a + u10 b, u01 a + u11 b) as  | 
46  |  |  | 
47  |  |      t  = u00 * a  | 
48  |  |      r  = u10 * b  | 
49  |  |      r += t;  | 
50  |  |  | 
51  |  |      t  = u11 * b  | 
52  |  |      b  = u01 * a  | 
53  |  |      b += t;  | 
54  |  |   */  | 
55  |  | 
  | 
56  | 0  |   if (M->n >= n)  | 
57  | 0  |     { | 
58  | 0  |       mpn_mul (tp, M->p[0][0], M->n, ap, n);  | 
59  | 0  |       mpn_mul (rp, M->p[1][0], M->n, bp, n);  | 
60  | 0  |     }  | 
61  | 0  |   else  | 
62  | 0  |     { | 
63  | 0  |       mpn_mul (tp, ap, n, M->p[0][0], M->n);  | 
64  | 0  |       mpn_mul (rp, bp, n, M->p[1][0], M->n);  | 
65  | 0  |     }  | 
66  |  | 
  | 
67  | 0  |   ah = mpn_add_n (rp, rp, tp, n + M->n);  | 
68  |  | 
  | 
69  | 0  |   if (M->n >= n)  | 
70  | 0  |     { | 
71  | 0  |       mpn_mul (tp, M->p[1][1], M->n, bp, n);  | 
72  | 0  |       mpn_mul (bp, M->p[0][1], M->n, ap, n);  | 
73  | 0  |     }  | 
74  | 0  |   else  | 
75  | 0  |     { | 
76  | 0  |       mpn_mul (tp, bp, n, M->p[1][1], M->n);  | 
77  | 0  |       mpn_mul (bp, ap, n, M->p[0][1], M->n);  | 
78  | 0  |     }  | 
79  | 0  |   bh = mpn_add_n (bp, bp, tp, n + M->n);  | 
80  |  | 
  | 
81  | 0  |   n += M->n;  | 
82  | 0  |   if ( (ah | bh) > 0)  | 
83  | 0  |     { | 
84  | 0  |       rp[n] = ah;  | 
85  | 0  |       bp[n] = bh;  | 
86  | 0  |       n++;  | 
87  | 0  |     }  | 
88  | 0  |   else  | 
89  | 0  |     { | 
90  |  |       /* Normalize */  | 
91  | 0  |       while ( (rp[n-1] | bp[n-1]) == 0)  | 
92  | 0  |   n--;  | 
93  | 0  |     }  | 
94  |  | 
  | 
95  | 0  |   return n;  | 
96  | 0  | }  | 
97  |  |  | 
98  |  | #define COMPUTE_V_ITCH(n) (2*(n))  | 
99  |  |  | 
100  |  | /* Computes |v| = |(g - u a)| / b, where u may be positive or  | 
101  |  |    negative, and v is of the opposite sign. max(a, b) is of size n, u and  | 
102  |  |    v at most size n, and v must have space for n+1 limbs. */  | 
103  |  | static mp_size_t  | 
104  |  | compute_v (mp_ptr vp,  | 
105  |  |      mp_srcptr ap, mp_srcptr bp, mp_size_t n,  | 
106  |  |      mp_srcptr gp, mp_size_t gn,  | 
107  |  |      mp_srcptr up, mp_size_t usize,  | 
108  |  |      mp_ptr tp)  | 
109  | 0  | { | 
110  | 0  |   mp_size_t size;  | 
111  | 0  |   mp_size_t an;  | 
112  | 0  |   mp_size_t bn;  | 
113  | 0  |   mp_size_t vn;  | 
114  |  | 
  | 
115  | 0  |   ASSERT (n > 0);  | 
116  | 0  |   ASSERT (gn > 0);  | 
117  | 0  |   ASSERT (usize != 0);  | 
118  |  | 
  | 
119  | 0  |   size = ABS (usize);  | 
120  | 0  |   ASSERT (size <= n);  | 
121  | 0  |   ASSERT (up[size-1] > 0);  | 
122  |  | 
  | 
123  | 0  |   an = n;  | 
124  | 0  |   MPN_NORMALIZE (ap, an);  | 
125  | 0  |   ASSERT (gn <= an);  | 
126  |  | 
  | 
127  | 0  |   if (an >= size)  | 
128  | 0  |     mpn_mul (tp, ap, an, up, size);  | 
129  | 0  |   else  | 
130  | 0  |     mpn_mul (tp, up, size, ap, an);  | 
131  |  | 
  | 
132  | 0  |   size += an;  | 
133  |  | 
  | 
134  | 0  |   if (usize > 0)  | 
135  | 0  |     { | 
136  |  |       /* |v| = -v = (u a - g) / b */  | 
137  |  | 
  | 
138  | 0  |       ASSERT_NOCARRY (mpn_sub (tp, tp, size, gp, gn));  | 
139  | 0  |       MPN_NORMALIZE (tp, size);  | 
140  | 0  |       if (size == 0)  | 
141  | 0  |   return 0;  | 
142  | 0  |     }  | 
143  | 0  |   else  | 
144  | 0  |     { /* |v| = v = (g - u a) / b = (g + |u| a) / b. Since g <= a, | 
145  |  |    (g + |u| a) always fits in (|usize| + an) limbs. */  | 
146  |  | 
  | 
147  | 0  |       ASSERT_NOCARRY (mpn_add (tp, tp, size, gp, gn));  | 
148  | 0  |       size -= (tp[size - 1] == 0);  | 
149  | 0  |     }  | 
150  |  |  | 
151  |  |   /* Now divide t / b. There must be no remainder */  | 
152  | 0  |   bn = n;  | 
153  | 0  |   MPN_NORMALIZE (bp, bn);  | 
154  | 0  |   ASSERT (size >= bn);  | 
155  |  | 
  | 
156  | 0  |   vn = size + 1 - bn;  | 
157  | 0  |   ASSERT (vn <= n + 1);  | 
158  |  | 
  | 
159  | 0  |   mpn_divexact (vp, tp, size, bp, bn);  | 
160  | 0  |   vn -= (vp[vn-1] == 0);  | 
161  |  | 
  | 
162  | 0  |   return vn;  | 
163  | 0  | }  | 
164  |  |  | 
165  |  | /* Temporary storage:  | 
166  |  |  | 
167  |  |    Initial division: Quotient of at most an - n + 1 <= an limbs.  | 
168  |  |  | 
169  |  |    Storage for u0 and u1: 2(n+1).  | 
170  |  |  | 
171  |  |    Storage for hgcd matrix M, with input ceil(n/2): 5 * ceil(n/4)  | 
172  |  |  | 
173  |  |    Storage for hgcd, input (n + 1)/2: 9 n/4 plus some.  | 
174  |  |  | 
175  |  |    When hgcd succeeds: 1 + floor(3n/2) for adjusting a and b, and 2(n+1) for the cofactors.  | 
176  |  |  | 
177  |  |    When hgcd fails: 2n + 1 for mpn_gcdext_subdiv_step, which is less.  | 
178  |  |  | 
179  |  |    For the lehmer call after the loop, Let T denote  | 
180  |  |    GCDEXT_DC_THRESHOLD. For the gcdext_lehmer call, we need T each for  | 
181  |  |    u, a and b, and 4T+3 scratch space. Next, for compute_v, we need T  | 
182  |  |    for u, T+1 for v and 2T scratch space. In all, 7T + 3 is  | 
183  |  |    sufficient for both operations.  | 
184  |  |  | 
185  |  | */  | 
186  |  |  | 
187  |  | /* Optimal choice of p seems difficult. In each iteration the division  | 
188  |  |  * of work between hgcd and the updates of u0 and u1 depends on the  | 
189  |  |  * current size of the u. It may be desirable to use a different  | 
190  |  |  * choice of p in each iteration. Also the input size seems to matter;  | 
191  |  |  * choosing p = n / 3 in the first iteration seems to improve  | 
192  |  |  * performance slightly for input size just above the threshold, but  | 
193  |  |  * degrade performance for larger inputs. */  | 
194  | 0  | #define CHOOSE_P_1(n) ((n) / 2)  | 
195  | 0  | #define CHOOSE_P_2(n) ((n) / 3)  | 
196  |  |  | 
197  |  | mp_size_t  | 
198  |  | mpn_gcdext (mp_ptr gp, mp_ptr up, mp_size_t *usizep,  | 
199  |  |       mp_ptr ap, mp_size_t an, mp_ptr bp, mp_size_t n)  | 
200  | 0  | { | 
201  | 0  |   mp_size_t talloc;  | 
202  | 0  |   mp_size_t scratch;  | 
203  | 0  |   mp_size_t matrix_scratch;  | 
204  | 0  |   mp_size_t ualloc = n + 1;  | 
205  |  | 
  | 
206  | 0  |   struct gcdext_ctx ctx;  | 
207  | 0  |   mp_size_t un;  | 
208  | 0  |   mp_ptr u0;  | 
209  | 0  |   mp_ptr u1;  | 
210  |  | 
  | 
211  | 0  |   mp_ptr tp;  | 
212  |  | 
  | 
213  | 0  |   TMP_DECL;  | 
214  |  | 
  | 
215  | 0  |   ASSERT (an >= n);  | 
216  | 0  |   ASSERT (n > 0);  | 
217  | 0  |   ASSERT (bp[n-1] > 0);  | 
218  |  | 
  | 
219  | 0  |   TMP_MARK;  | 
220  |  |  | 
221  |  |   /* FIXME: Check for small sizes first, before setting up temporary  | 
222  |  |      storage etc. */  | 
223  | 0  |   talloc = MPN_GCDEXT_LEHMER_N_ITCH(n);  | 
224  |  |  | 
225  |  |   /* For initial division */  | 
226  | 0  |   scratch = an - n + 1;  | 
227  | 0  |   if (scratch > talloc)  | 
228  | 0  |     talloc = scratch;  | 
229  |  | 
  | 
230  | 0  |   if (ABOVE_THRESHOLD (n, GCDEXT_DC_THRESHOLD))  | 
231  | 0  |     { | 
232  |  |       /* For hgcd loop. */  | 
233  | 0  |       mp_size_t hgcd_scratch;  | 
234  | 0  |       mp_size_t update_scratch;  | 
235  | 0  |       mp_size_t p1 = CHOOSE_P_1 (n);  | 
236  | 0  |       mp_size_t p2 = CHOOSE_P_2 (n);  | 
237  | 0  |       mp_size_t min_p = MIN(p1, p2);  | 
238  | 0  |       mp_size_t max_p = MAX(p1, p2);  | 
239  | 0  |       matrix_scratch = MPN_HGCD_MATRIX_INIT_ITCH (n - min_p);  | 
240  | 0  |       hgcd_scratch = mpn_hgcd_itch (n - min_p);  | 
241  | 0  |       update_scratch = max_p + n - 1;  | 
242  |  | 
  | 
243  | 0  |       scratch = matrix_scratch + MAX(hgcd_scratch, update_scratch);  | 
244  | 0  |       if (scratch > talloc)  | 
245  | 0  |   talloc = scratch;  | 
246  |  |  | 
247  |  |       /* Final mpn_gcdext_lehmer_n call. Need space for u and for  | 
248  |  |    copies of a and b. */  | 
249  | 0  |       scratch = MPN_GCDEXT_LEHMER_N_ITCH (GCDEXT_DC_THRESHOLD)  | 
250  | 0  |   + 3*GCDEXT_DC_THRESHOLD;  | 
251  |  | 
  | 
252  | 0  |       if (scratch > talloc)  | 
253  | 0  |   talloc = scratch;  | 
254  |  |  | 
255  |  |       /* Cofactors u0 and u1 */  | 
256  | 0  |       talloc += 2*(n+1);  | 
257  | 0  |     }  | 
258  |  | 
  | 
259  | 0  |   tp = TMP_ALLOC_LIMBS(talloc);  | 
260  |  | 
  | 
261  | 0  |   if (an > n)  | 
262  | 0  |     { | 
263  | 0  |       mpn_tdiv_qr (tp, ap, 0, ap, an, bp, n);  | 
264  |  | 
  | 
265  | 0  |       if (mpn_zero_p (ap, n))  | 
266  | 0  |   { | 
267  | 0  |     MPN_COPY (gp, bp, n);  | 
268  | 0  |     *usizep = 0;  | 
269  | 0  |     TMP_FREE;  | 
270  | 0  |     return n;  | 
271  | 0  |   }  | 
272  | 0  |     }  | 
273  |  |  | 
274  | 0  |   if (BELOW_THRESHOLD (n, GCDEXT_DC_THRESHOLD))  | 
275  | 0  |     { | 
276  | 0  |       mp_size_t gn = mpn_gcdext_lehmer_n(gp, up, usizep, ap, bp, n, tp);  | 
277  |  | 
  | 
278  | 0  |       TMP_FREE;  | 
279  | 0  |       return gn;  | 
280  | 0  |     }  | 
281  |  |  | 
282  | 0  |   MPN_ZERO (tp, 2*ualloc);  | 
283  | 0  |   u0 = tp; tp += ualloc;  | 
284  | 0  |   u1 = tp; tp += ualloc;  | 
285  |  | 
  | 
286  | 0  |   ctx.gp = gp;  | 
287  | 0  |   ctx.up = up;  | 
288  | 0  |   ctx.usize = usizep;  | 
289  |  | 
  | 
290  | 0  |   { | 
291  |  |     /* For the first hgcd call, there are no u updates, and it makes  | 
292  |  |        some sense to use a different choice for p. */  | 
293  |  |  | 
294  |  |     /* FIXME: We could trim use of temporary storage, since u0 and u1  | 
295  |  |        are not used yet. For the hgcd call, we could swap in the u0  | 
296  |  |        and u1 pointers for the relevant matrix elements. */  | 
297  |  | 
  | 
298  | 0  |     struct hgcd_matrix M;  | 
299  | 0  |     mp_size_t p = CHOOSE_P_1 (n);  | 
300  | 0  |     mp_size_t nn;  | 
301  |  | 
  | 
302  | 0  |     mpn_hgcd_matrix_init (&M, n - p, tp);  | 
303  | 0  |     nn = mpn_hgcd (ap + p, bp + p, n - p, &M, tp + matrix_scratch);  | 
304  | 0  |     if (nn > 0)  | 
305  | 0  |       { | 
306  | 0  |   ASSERT (M.n <= (n - p - 1)/2);  | 
307  | 0  |   ASSERT (M.n + p <= (p + n - 1) / 2);  | 
308  |  |  | 
309  |  |   /* Temporary storage 2 (p + M->n) <= p + n - 1 */  | 
310  | 0  |   n = mpn_hgcd_matrix_adjust (&M, p + nn, ap, bp, p, tp + matrix_scratch);  | 
311  |  | 
  | 
312  | 0  |   MPN_COPY (u0, M.p[1][0], M.n);  | 
313  | 0  |   MPN_COPY (u1, M.p[1][1], M.n);  | 
314  | 0  |   un = M.n;  | 
315  | 0  |   while ( (u0[un-1] | u1[un-1] ) == 0)  | 
316  | 0  |     un--;  | 
317  | 0  |       }  | 
318  | 0  |     else  | 
319  | 0  |       { | 
320  |  |   /* mpn_hgcd has failed. Then either one of a or b is very  | 
321  |  |      small, or the difference is very small. Perform one  | 
322  |  |      subtraction followed by one division. */  | 
323  | 0  |   u1[0] = 1;  | 
324  |  | 
  | 
325  | 0  |   ctx.u0 = u0;  | 
326  | 0  |   ctx.u1 = u1;  | 
327  | 0  |   ctx.tp = tp + n; /* ualloc */  | 
328  | 0  |   ctx.un = 1;  | 
329  |  |  | 
330  |  |   /* Temporary storage n */  | 
331  | 0  |   n = mpn_gcd_subdiv_step (ap, bp, n, 0, mpn_gcdext_hook, &ctx, tp);  | 
332  | 0  |   if (n == 0)  | 
333  | 0  |     { | 
334  | 0  |       TMP_FREE;  | 
335  | 0  |       return ctx.gn;  | 
336  | 0  |     }  | 
337  |  |  | 
338  | 0  |   un = ctx.un;  | 
339  | 0  |   ASSERT (un < ualloc);  | 
340  | 0  |       }  | 
341  | 0  |   }  | 
342  |  |  | 
343  | 0  |   while (ABOVE_THRESHOLD (n, GCDEXT_DC_THRESHOLD))  | 
344  | 0  |     { | 
345  | 0  |       struct hgcd_matrix M;  | 
346  | 0  |       mp_size_t p = CHOOSE_P_2 (n);  | 
347  | 0  |       mp_size_t nn;  | 
348  |  | 
  | 
349  | 0  |       mpn_hgcd_matrix_init (&M, n - p, tp);  | 
350  | 0  |       nn = mpn_hgcd (ap + p, bp + p, n - p, &M, tp + matrix_scratch);  | 
351  | 0  |       if (nn > 0)  | 
352  | 0  |   { | 
353  | 0  |     mp_ptr t0;  | 
354  |  | 
  | 
355  | 0  |     t0 = tp + matrix_scratch;  | 
356  | 0  |     ASSERT (M.n <= (n - p - 1)/2);  | 
357  | 0  |     ASSERT (M.n + p <= (p + n - 1) / 2);  | 
358  |  |  | 
359  |  |     /* Temporary storage 2 (p + M->n) <= p + n - 1 */  | 
360  | 0  |     n = mpn_hgcd_matrix_adjust (&M, p + nn, ap, bp, p, t0);  | 
361  |  |  | 
362  |  |     /* By the same analysis as for mpn_hgcd_matrix_mul */  | 
363  | 0  |     ASSERT (M.n + un <= ualloc);  | 
364  |  |  | 
365  |  |     /* FIXME: This copying could be avoided by some swapping of  | 
366  |  |      * pointers. May need more temporary storage, though. */  | 
367  | 0  |     MPN_COPY (t0, u0, un);  | 
368  |  |  | 
369  |  |     /* Temporary storage ualloc */  | 
370  | 0  |     un = hgcd_mul_matrix_vector (&M, u0, t0, u1, un, t0 + un);  | 
371  |  | 
  | 
372  | 0  |     ASSERT (un < ualloc);  | 
373  | 0  |     ASSERT ( (u0[un-1] | u1[un-1]) > 0);  | 
374  | 0  |   }  | 
375  | 0  |       else  | 
376  | 0  |   { | 
377  |  |     /* mpn_hgcd has failed. Then either one of a or b is very  | 
378  |  |        small, or the difference is very small. Perform one  | 
379  |  |        subtraction followed by one division. */  | 
380  | 0  |     ctx.u0 = u0;  | 
381  | 0  |     ctx.u1 = u1;  | 
382  | 0  |     ctx.tp = tp + n; /* ualloc */  | 
383  | 0  |     ctx.un = un;  | 
384  |  |  | 
385  |  |     /* Temporary storage n */  | 
386  | 0  |     n = mpn_gcd_subdiv_step (ap, bp, n, 0, mpn_gcdext_hook, &ctx, tp);  | 
387  | 0  |     if (n == 0)  | 
388  | 0  |       { | 
389  | 0  |         TMP_FREE;  | 
390  | 0  |         return ctx.gn;  | 
391  | 0  |       }  | 
392  |  |  | 
393  | 0  |     un = ctx.un;  | 
394  | 0  |     ASSERT (un < ualloc);  | 
395  | 0  |   }  | 
396  | 0  |     }  | 
397  |  |   /* We have A = ... a + ... b  | 
398  |  |        B =  u0 a +  u1 b  | 
399  |  |  | 
400  |  |        a = u1  A + ... B  | 
401  |  |        b = -u0 A + ... B  | 
402  |  |  | 
403  |  |      with bounds  | 
404  |  |  | 
405  |  |        |u0|, |u1| <= B / min(a, b)  | 
406  |  |  | 
407  |  |      We always have u1 > 0, and u0 == 0 is possible only if u1 == 1,  | 
408  |  |      in which case the only reduction done so far is a = A - k B for  | 
409  |  |      some k.  | 
410  |  |  | 
411  |  |      Compute g = u a + v b = (u u1 - v u0) A + (...) B  | 
412  |  |      Here, u, v are bounded by  | 
413  |  |  | 
414  |  |        |u| <= b,  | 
415  |  |        |v| <= a  | 
416  |  |   */  | 
417  |  |  | 
418  | 0  |   ASSERT ( (ap[n-1] | bp[n-1]) > 0);  | 
419  |  | 
  | 
420  | 0  |   if (UNLIKELY (mpn_cmp (ap, bp, n) == 0))  | 
421  | 0  |     { | 
422  |  |       /* Must return the smallest cofactor, +u1 or -u0 */  | 
423  | 0  |       int c;  | 
424  |  | 
  | 
425  | 0  |       MPN_COPY (gp, ap, n);  | 
426  |  | 
  | 
427  | 0  |       MPN_CMP (c, u0, u1, un);  | 
428  |  |       /* c == 0 can happen only when A = (2k+1) G, B = 2 G. And in  | 
429  |  |    this case we choose the cofactor + 1, corresponding to G = A  | 
430  |  |    - k B, rather than -1, corresponding to G = - A + (k+1) B. */  | 
431  | 0  |       ASSERT (c != 0 || (un == 1 && u0[0] == 1 && u1[0] == 1));  | 
432  | 0  |       if (c < 0)  | 
433  | 0  |   { | 
434  | 0  |     MPN_NORMALIZE (u0, un);  | 
435  | 0  |     MPN_COPY (up, u0, un);  | 
436  | 0  |     *usizep = -un;  | 
437  | 0  |   }  | 
438  | 0  |       else  | 
439  | 0  |   { | 
440  | 0  |     MPN_NORMALIZE_NOT_ZERO (u1, un);  | 
441  | 0  |     MPN_COPY (up, u1, un);  | 
442  | 0  |     *usizep = un;  | 
443  | 0  |   }  | 
444  |  | 
  | 
445  | 0  |       TMP_FREE;  | 
446  | 0  |       return n;  | 
447  | 0  |     }  | 
448  | 0  |   else if (UNLIKELY (u0[0] == 0) && un == 1)  | 
449  | 0  |     { | 
450  | 0  |       mp_size_t gn;  | 
451  | 0  |       ASSERT (u1[0] == 1);  | 
452  |  |  | 
453  |  |       /* g = u a + v b = (u u1 - v u0) A + (...) B = u A + (...) B */  | 
454  | 0  |       gn = mpn_gcdext_lehmer_n (gp, up, usizep, ap, bp, n, tp);  | 
455  |  | 
  | 
456  | 0  |       TMP_FREE;  | 
457  | 0  |       return gn;  | 
458  | 0  |     }  | 
459  | 0  |   else  | 
460  | 0  |     { | 
461  | 0  |       mp_size_t u0n;  | 
462  | 0  |       mp_size_t u1n;  | 
463  | 0  |       mp_size_t lehmer_un;  | 
464  | 0  |       mp_size_t lehmer_vn;  | 
465  | 0  |       mp_size_t gn;  | 
466  |  | 
  | 
467  | 0  |       mp_ptr lehmer_up;  | 
468  | 0  |       mp_ptr lehmer_vp;  | 
469  | 0  |       int negate;  | 
470  |  | 
  | 
471  | 0  |       lehmer_up = tp; tp += n;  | 
472  |  |  | 
473  |  |       /* Call mpn_gcdext_lehmer_n with copies of a and b. */  | 
474  | 0  |       MPN_COPY (tp, ap, n);  | 
475  | 0  |       MPN_COPY (tp + n, bp, n);  | 
476  | 0  |       gn = mpn_gcdext_lehmer_n (gp, lehmer_up, &lehmer_un, tp, tp + n, n, tp + 2*n);  | 
477  |  | 
  | 
478  | 0  |       u0n = un;  | 
479  | 0  |       MPN_NORMALIZE (u0, u0n);  | 
480  | 0  |       ASSERT (u0n > 0);  | 
481  |  | 
  | 
482  | 0  |       if (lehmer_un == 0)  | 
483  | 0  |   { | 
484  |  |     /* u == 0  ==>  v = g / b == 1  ==> g = - u0 A + (...) B */  | 
485  | 0  |     MPN_COPY (up, u0, u0n);  | 
486  | 0  |     *usizep = -u0n;  | 
487  |  | 
  | 
488  | 0  |     TMP_FREE;  | 
489  | 0  |     return gn;  | 
490  | 0  |   }  | 
491  |  |  | 
492  | 0  |       lehmer_vp = tp;  | 
493  |  |       /* Compute v = (g - u a) / b */  | 
494  | 0  |       lehmer_vn = compute_v (lehmer_vp,  | 
495  | 0  |            ap, bp, n, gp, gn, lehmer_up, lehmer_un, tp + n + 1);  | 
496  |  | 
  | 
497  | 0  |       if (lehmer_un > 0)  | 
498  | 0  |   negate = 0;  | 
499  | 0  |       else  | 
500  | 0  |   { | 
501  | 0  |     lehmer_un = -lehmer_un;  | 
502  | 0  |     negate = 1;  | 
503  | 0  |   }  | 
504  |  | 
  | 
505  | 0  |       u1n = un;  | 
506  | 0  |       MPN_NORMALIZE (u1, u1n);  | 
507  | 0  |       ASSERT (u1n > 0);  | 
508  |  | 
  | 
509  | 0  |       ASSERT (lehmer_un + u1n <= ualloc);  | 
510  | 0  |       ASSERT (lehmer_vn + u0n <= ualloc);  | 
511  |  |  | 
512  |  |       /* We may still have v == 0 */  | 
513  |  |  | 
514  |  |       /* Compute u u0 */  | 
515  | 0  |       if (lehmer_un <= u1n)  | 
516  |  |   /* Should be the common case */  | 
517  | 0  |   mpn_mul (up, u1, u1n, lehmer_up, lehmer_un);  | 
518  | 0  |       else  | 
519  | 0  |   mpn_mul (up, lehmer_up, lehmer_un, u1, u1n);  | 
520  |  | 
  | 
521  | 0  |       un = u1n + lehmer_un;  | 
522  | 0  |       un -= (up[un - 1] == 0);  | 
523  |  | 
  | 
524  | 0  |       if (lehmer_vn > 0)  | 
525  | 0  |   { | 
526  | 0  |     mp_limb_t cy;  | 
527  |  |  | 
528  |  |     /* Overwrites old u1 value */  | 
529  | 0  |     if (lehmer_vn <= u0n)  | 
530  |  |       /* Should be the common case */  | 
531  | 0  |       mpn_mul (u1, u0, u0n, lehmer_vp, lehmer_vn);  | 
532  | 0  |     else  | 
533  | 0  |       mpn_mul (u1, lehmer_vp, lehmer_vn, u0, u0n);  | 
534  |  | 
  | 
535  | 0  |     u1n = u0n + lehmer_vn;  | 
536  | 0  |     u1n -= (u1[u1n - 1] == 0);  | 
537  |  | 
  | 
538  | 0  |     if (u1n <= un)  | 
539  | 0  |       { | 
540  | 0  |         cy = mpn_add (up, up, un, u1, u1n);  | 
541  | 0  |       }  | 
542  | 0  |     else  | 
543  | 0  |       { | 
544  | 0  |         cy = mpn_add (up, u1, u1n, up, un);  | 
545  | 0  |         un = u1n;  | 
546  | 0  |       }  | 
547  | 0  |     up[un] = cy;  | 
548  | 0  |     un += (cy != 0);  | 
549  |  | 
  | 
550  | 0  |     ASSERT (un < ualloc);  | 
551  | 0  |   }  | 
552  | 0  |       *usizep = negate ? -un : un;  | 
553  |  | 
  | 
554  | 0  |       TMP_FREE;  | 
555  | 0  |       return gn;  | 
556  | 0  |     }  | 
557  | 0  | }  |