Line | Count | Source (jump to first uncovered line) |
1 | | /* mpn/gcd.c: mpn_gcd for gcd of two odd integers. |
2 | | |
3 | | Copyright 1991, 1993-1998, 2000-2005, 2008, 2010, 2012, 2019 Free Software |
4 | | Foundation, Inc. |
5 | | |
6 | | This file is part of the GNU MP Library. |
7 | | |
8 | | The GNU MP Library is free software; you can redistribute it and/or modify |
9 | | it under the terms of either: |
10 | | |
11 | | * the GNU Lesser General Public License as published by the Free |
12 | | Software Foundation; either version 3 of the License, or (at your |
13 | | option) any later version. |
14 | | |
15 | | or |
16 | | |
17 | | * the GNU General Public License as published by the Free Software |
18 | | Foundation; either version 2 of the License, or (at your option) any |
19 | | later version. |
20 | | |
21 | | or both in parallel, as here. |
22 | | |
23 | | The GNU MP Library is distributed in the hope that it will be useful, but |
24 | | WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY |
25 | | or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
26 | | for more details. |
27 | | |
28 | | You should have received copies of the GNU General Public License and the |
29 | | GNU Lesser General Public License along with the GNU MP Library. If not, |
30 | | see https://www.gnu.org/licenses/. */ |
31 | | |
32 | | #include "gmp-impl.h" |
33 | | #include "longlong.h" |
34 | | |
35 | | /* Uses the HGCD operation described in |
36 | | |
37 | | N. Möller, On Schönhage's algorithm and subquadratic integer gcd |
38 | | computation, Math. Comp. 77 (2008), 589-607. |
39 | | |
40 | | to reduce inputs until they are of size below GCD_DC_THRESHOLD, and |
41 | | then uses Lehmer's algorithm. |
42 | | */ |
43 | | |
44 | | /* Some reasonable choices are n / 2 (same as in hgcd), and p = (n + |
45 | | * 2)/3, which gives a balanced multiplication in |
46 | | * mpn_hgcd_matrix_adjust. However, p = 2 n/3 gives slightly better |
47 | | * performance. The matrix-vector multiplication is then |
48 | | * 4:1-unbalanced, with matrix elements of size n/6, and vector |
49 | | * elements of size p = 2n/3. */ |
50 | | |
51 | | /* From analysis of the theoretical running time, it appears that when |
52 | | * multiplication takes time O(n^alpha), p should be chosen so that |
53 | | * the ratio of the time for the mpn_hgcd call, and the time for the |
54 | | * multiplication in mpn_hgcd_matrix_adjust, is roughly 1/(alpha - |
55 | | * 1). */ |
56 | | #ifdef TUNE_GCD_P |
57 | | #define P_TABLE_SIZE 10000 |
58 | | mp_size_t p_table[P_TABLE_SIZE]; |
59 | | #define CHOOSE_P(n) ( (n) < P_TABLE_SIZE ? p_table[n] : 2*(n)/3) |
60 | | #else |
61 | 0 | #define CHOOSE_P(n) (2*(n) / 3) |
62 | | #endif |
63 | | |
64 | | struct gcd_ctx |
65 | | { |
66 | | mp_ptr gp; |
67 | | mp_size_t gn; |
68 | | }; |
69 | | |
70 | | static void |
71 | | gcd_hook (void *p, mp_srcptr gp, mp_size_t gn, |
72 | | mp_srcptr qp, mp_size_t qn, int d) |
73 | 0 | { |
74 | 0 | struct gcd_ctx *ctx = (struct gcd_ctx *) p; |
75 | 0 | MPN_COPY (ctx->gp, gp, gn); |
76 | 0 | ctx->gn = gn; |
77 | 0 | } |
78 | | |
79 | | mp_size_t |
80 | | mpn_gcd (mp_ptr gp, mp_ptr up, mp_size_t usize, mp_ptr vp, mp_size_t n) |
81 | 0 | { |
82 | 0 | mp_size_t talloc; |
83 | 0 | mp_size_t scratch; |
84 | 0 | mp_size_t matrix_scratch; |
85 | |
|
86 | 0 | struct gcd_ctx ctx; |
87 | 0 | mp_ptr tp; |
88 | 0 | TMP_DECL; |
89 | |
|
90 | 0 | ASSERT (usize >= n); |
91 | 0 | ASSERT (n > 0); |
92 | 0 | ASSERT (vp[n-1] > 0); |
93 | | |
94 | | /* FIXME: Check for small sizes first, before setting up temporary |
95 | | storage etc. */ |
96 | 0 | talloc = MPN_GCD_SUBDIV_STEP_ITCH(n); |
97 | | |
98 | | /* For initial division */ |
99 | 0 | scratch = usize - n + 1; |
100 | 0 | if (scratch > talloc) |
101 | 0 | talloc = scratch; |
102 | |
|
103 | | #if TUNE_GCD_P |
104 | | if (CHOOSE_P (n) > 0) |
105 | | #else |
106 | 0 | if (ABOVE_THRESHOLD (n, GCD_DC_THRESHOLD)) |
107 | 0 | #endif |
108 | 0 | { |
109 | 0 | mp_size_t hgcd_scratch; |
110 | 0 | mp_size_t update_scratch; |
111 | 0 | mp_size_t p = CHOOSE_P (n); |
112 | 0 | mp_size_t scratch; |
113 | | #if TUNE_GCD_P |
114 | | /* Worst case, since we don't guarantee that n - CHOOSE_P(n) |
115 | | is increasing */ |
116 | | matrix_scratch = MPN_HGCD_MATRIX_INIT_ITCH (n); |
117 | | hgcd_scratch = mpn_hgcd_itch (n); |
118 | | update_scratch = 2*(n - 1); |
119 | | #else |
120 | 0 | matrix_scratch = MPN_HGCD_MATRIX_INIT_ITCH (n - p); |
121 | 0 | hgcd_scratch = mpn_hgcd_itch (n - p); |
122 | 0 | update_scratch = p + n - 1; |
123 | 0 | #endif |
124 | 0 | scratch = matrix_scratch + MAX(hgcd_scratch, update_scratch); |
125 | 0 | if (scratch > talloc) |
126 | 0 | talloc = scratch; |
127 | 0 | } |
128 | |
|
129 | 0 | TMP_MARK; |
130 | 0 | tp = TMP_ALLOC_LIMBS(talloc); |
131 | |
|
132 | 0 | if (usize > n) |
133 | 0 | { |
134 | 0 | mpn_tdiv_qr (tp, up, 0, up, usize, vp, n); |
135 | |
|
136 | 0 | if (mpn_zero_p (up, n)) |
137 | 0 | { |
138 | 0 | MPN_COPY (gp, vp, n); |
139 | 0 | ctx.gn = n; |
140 | 0 | goto done; |
141 | 0 | } |
142 | 0 | } |
143 | | |
144 | 0 | ctx.gp = gp; |
145 | |
|
146 | | #if TUNE_GCD_P |
147 | | while (CHOOSE_P (n) > 0) |
148 | | #else |
149 | 0 | while (ABOVE_THRESHOLD (n, GCD_DC_THRESHOLD)) |
150 | 0 | #endif |
151 | 0 | { |
152 | 0 | struct hgcd_matrix M; |
153 | 0 | mp_size_t p = CHOOSE_P (n); |
154 | 0 | mp_size_t matrix_scratch = MPN_HGCD_MATRIX_INIT_ITCH (n - p); |
155 | 0 | mp_size_t nn; |
156 | 0 | mpn_hgcd_matrix_init (&M, n - p, tp); |
157 | 0 | nn = mpn_hgcd (up + p, vp + p, n - p, &M, tp + matrix_scratch); |
158 | 0 | if (nn > 0) |
159 | 0 | { |
160 | 0 | ASSERT (M.n <= (n - p - 1)/2); |
161 | 0 | ASSERT (M.n + p <= (p + n - 1) / 2); |
162 | | /* Temporary storage 2 (p + M->n) <= p + n - 1. */ |
163 | 0 | n = mpn_hgcd_matrix_adjust (&M, p + nn, up, vp, p, tp + matrix_scratch); |
164 | 0 | } |
165 | 0 | else |
166 | 0 | { |
167 | | /* Temporary storage n */ |
168 | 0 | n = mpn_gcd_subdiv_step (up, vp, n, 0, gcd_hook, &ctx, tp); |
169 | 0 | if (n == 0) |
170 | 0 | goto done; |
171 | 0 | } |
172 | 0 | } |
173 | | |
174 | 0 | while (n > 2) |
175 | 0 | { |
176 | 0 | struct hgcd_matrix1 M; |
177 | 0 | mp_limb_t uh, ul, vh, vl; |
178 | 0 | mp_limb_t mask; |
179 | |
|
180 | 0 | mask = up[n-1] | vp[n-1]; |
181 | 0 | ASSERT (mask > 0); |
182 | |
|
183 | 0 | if (mask & GMP_NUMB_HIGHBIT) |
184 | 0 | { |
185 | 0 | uh = up[n-1]; ul = up[n-2]; |
186 | 0 | vh = vp[n-1]; vl = vp[n-2]; |
187 | 0 | } |
188 | 0 | else |
189 | 0 | { |
190 | 0 | int shift; |
191 | |
|
192 | 0 | count_leading_zeros (shift, mask); |
193 | 0 | uh = MPN_EXTRACT_NUMB (shift, up[n-1], up[n-2]); |
194 | 0 | ul = MPN_EXTRACT_NUMB (shift, up[n-2], up[n-3]); |
195 | 0 | vh = MPN_EXTRACT_NUMB (shift, vp[n-1], vp[n-2]); |
196 | 0 | vl = MPN_EXTRACT_NUMB (shift, vp[n-2], vp[n-3]); |
197 | 0 | } |
198 | | |
199 | | /* Try an mpn_hgcd2 step */ |
200 | 0 | if (mpn_hgcd2 (uh, ul, vh, vl, &M)) |
201 | 0 | { |
202 | 0 | n = mpn_matrix22_mul1_inverse_vector (&M, tp, up, vp, n); |
203 | 0 | MP_PTR_SWAP (up, tp); |
204 | 0 | } |
205 | 0 | else |
206 | 0 | { |
207 | | /* mpn_hgcd2 has failed. Then either one of a or b is very |
208 | | small, or the difference is very small. Perform one |
209 | | subtraction followed by one division. */ |
210 | | |
211 | | /* Temporary storage n */ |
212 | 0 | n = mpn_gcd_subdiv_step (up, vp, n, 0, &gcd_hook, &ctx, tp); |
213 | 0 | if (n == 0) |
214 | 0 | goto done; |
215 | 0 | } |
216 | 0 | } |
217 | | |
218 | 0 | ASSERT(up[n-1] | vp[n-1]); |
219 | | |
220 | | /* Due to the calling convention for mpn_gcd, at most one can be even. */ |
221 | 0 | if ((up[0] & 1) == 0) |
222 | 0 | MP_PTR_SWAP (up, vp); |
223 | 0 | ASSERT ((up[0] & 1) != 0); |
224 | |
|
225 | 0 | { |
226 | 0 | mp_limb_t u0, u1, v0, v1; |
227 | 0 | mp_double_limb_t g; |
228 | |
|
229 | 0 | u0 = up[0]; |
230 | 0 | v0 = vp[0]; |
231 | |
|
232 | 0 | if (n == 1) |
233 | 0 | { |
234 | 0 | int cnt; |
235 | 0 | count_trailing_zeros (cnt, v0); |
236 | 0 | *gp = mpn_gcd_11 (u0, v0 >> cnt); |
237 | 0 | ctx.gn = 1; |
238 | 0 | goto done; |
239 | 0 | } |
240 | | |
241 | 0 | v1 = vp[1]; |
242 | 0 | if (UNLIKELY (v0 == 0)) |
243 | 0 | { |
244 | 0 | v0 = v1; |
245 | 0 | v1 = 0; |
246 | | /* FIXME: We could invoke a mpn_gcd_21 here, just like mpn_gcd_22 could |
247 | | when this situation occurs internally. */ |
248 | 0 | } |
249 | 0 | if ((v0 & 1) == 0) |
250 | 0 | { |
251 | 0 | int cnt; |
252 | 0 | count_trailing_zeros (cnt, v0); |
253 | 0 | v0 = ((v1 << (GMP_NUMB_BITS - cnt)) & GMP_NUMB_MASK) | (v0 >> cnt); |
254 | 0 | v1 >>= cnt; |
255 | 0 | } |
256 | |
|
257 | 0 | u1 = up[1]; |
258 | 0 | g = mpn_gcd_22 (u1, u0, v1, v0); |
259 | 0 | gp[0] = g.d0; |
260 | 0 | gp[1] = g.d1; |
261 | 0 | ctx.gn = 1 + (g.d1 > 0); |
262 | 0 | } |
263 | 0 | done: |
264 | 0 | TMP_FREE; |
265 | 0 | return ctx.gn; |
266 | 0 | } |