/src/libgmp/mpn/hgcd_jacobi.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* hgcd_jacobi.c. |
2 | | |
3 | | THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES. IT IS ONLY |
4 | | SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST |
5 | | GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE. |
6 | | |
7 | | Copyright 2003-2005, 2008, 2011, 2012 Free Software Foundation, Inc. |
8 | | |
9 | | This file is part of the GNU MP Library. |
10 | | |
11 | | The GNU MP Library is free software; you can redistribute it and/or modify |
12 | | it under the terms of either: |
13 | | |
14 | | * the GNU Lesser General Public License as published by the Free |
15 | | Software Foundation; either version 3 of the License, or (at your |
16 | | option) any later version. |
17 | | |
18 | | or |
19 | | |
20 | | * the GNU General Public License as published by the Free Software |
21 | | Foundation; either version 2 of the License, or (at your option) any |
22 | | later version. |
23 | | |
24 | | or both in parallel, as here. |
25 | | |
26 | | The GNU MP Library is distributed in the hope that it will be useful, but |
27 | | WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY |
28 | | or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
29 | | for more details. |
30 | | |
31 | | You should have received copies of the GNU General Public License and the |
32 | | GNU Lesser General Public License along with the GNU MP Library. If not, |
33 | | see https://www.gnu.org/licenses/. */ |
34 | | |
35 | | #include "gmp-impl.h" |
36 | | #include "longlong.h" |
37 | | |
38 | | /* This file is almost a copy of hgcd.c, with some added calls to |
39 | | mpn_jacobi_update */ |
40 | | |
41 | | struct hgcd_jacobi_ctx |
42 | | { |
43 | | struct hgcd_matrix *M; |
44 | | unsigned *bitsp; |
45 | | }; |
46 | | |
47 | | static void |
48 | | hgcd_jacobi_hook (void *p, mp_srcptr gp, mp_size_t gn, |
49 | | mp_srcptr qp, mp_size_t qn, int d) |
50 | 0 | { |
51 | 0 | ASSERT (!gp); |
52 | 0 | ASSERT (d >= 0); |
53 | | |
54 | 0 | MPN_NORMALIZE (qp, qn); |
55 | 0 | if (qn > 0) |
56 | 0 | { |
57 | 0 | struct hgcd_jacobi_ctx *ctx = (struct hgcd_jacobi_ctx *) p; |
58 | | /* NOTES: This is a bit ugly. A tp area is passed to |
59 | | gcd_subdiv_step, which stores q at the start of that area. We |
60 | | now use the rest. */ |
61 | 0 | mp_ptr tp = (mp_ptr) qp + qn; |
62 | |
|
63 | 0 | mpn_hgcd_matrix_update_q (ctx->M, qp, qn, d, tp); |
64 | 0 | *ctx->bitsp = mpn_jacobi_update (*ctx->bitsp, d, qp[0] & 3); |
65 | 0 | } |
66 | 0 | } |
67 | | |
68 | | /* Perform a few steps, using some of mpn_hgcd2, subtraction and |
69 | | division. Reduces the size by almost one limb or more, but never |
70 | | below the given size s. Return new size for a and b, or 0 if no |
71 | | more steps are possible. |
72 | | |
73 | | If hgcd2 succeeds, needs temporary space for hgcd_matrix_mul_1, M->n |
74 | | limbs, and hgcd_mul_matrix1_inverse_vector, n limbs. If hgcd2 |
75 | | fails, needs space for the quotient, qn <= n - s + 1 limbs, for and |
76 | | hgcd_matrix_update_q, qn + (size of the appropriate column of M) <= |
77 | | resulting size of M. |
78 | | |
79 | | If N is the input size to the calling hgcd, then s = floor(N/2) + |
80 | | 1, M->n < N, qn + matrix size <= n - s + 1 + n - s = 2 (n - s) + 1 |
81 | | < N, so N is sufficient. |
82 | | */ |
83 | | |
84 | | static mp_size_t |
85 | | hgcd_jacobi_step (mp_size_t n, mp_ptr ap, mp_ptr bp, mp_size_t s, |
86 | | struct hgcd_matrix *M, unsigned *bitsp, mp_ptr tp) |
87 | 0 | { |
88 | 0 | struct hgcd_matrix1 M1; |
89 | 0 | mp_limb_t mask; |
90 | 0 | mp_limb_t ah, al, bh, bl; |
91 | |
|
92 | 0 | ASSERT (n > s); |
93 | | |
94 | 0 | mask = ap[n-1] | bp[n-1]; |
95 | 0 | ASSERT (mask > 0); |
96 | | |
97 | 0 | if (n == s + 1) |
98 | 0 | { |
99 | 0 | if (mask < 4) |
100 | 0 | goto subtract; |
101 | | |
102 | 0 | ah = ap[n-1]; al = ap[n-2]; |
103 | 0 | bh = bp[n-1]; bl = bp[n-2]; |
104 | 0 | } |
105 | 0 | else if (mask & GMP_NUMB_HIGHBIT) |
106 | 0 | { |
107 | 0 | ah = ap[n-1]; al = ap[n-2]; |
108 | 0 | bh = bp[n-1]; bl = bp[n-2]; |
109 | 0 | } |
110 | 0 | else |
111 | 0 | { |
112 | 0 | int shift; |
113 | |
|
114 | 0 | count_leading_zeros (shift, mask); |
115 | 0 | ah = MPN_EXTRACT_NUMB (shift, ap[n-1], ap[n-2]); |
116 | 0 | al = MPN_EXTRACT_NUMB (shift, ap[n-2], ap[n-3]); |
117 | 0 | bh = MPN_EXTRACT_NUMB (shift, bp[n-1], bp[n-2]); |
118 | 0 | bl = MPN_EXTRACT_NUMB (shift, bp[n-2], bp[n-3]); |
119 | 0 | } |
120 | | |
121 | | /* Try an mpn_hgcd2 step */ |
122 | 0 | if (mpn_hgcd2_jacobi (ah, al, bh, bl, &M1, bitsp)) |
123 | 0 | { |
124 | | /* Multiply M <- M * M1 */ |
125 | 0 | mpn_hgcd_matrix_mul_1 (M, &M1, tp); |
126 | | |
127 | | /* Can't swap inputs, so we need to copy. */ |
128 | 0 | MPN_COPY (tp, ap, n); |
129 | | /* Multiply M1^{-1} (a;b) */ |
130 | 0 | return mpn_matrix22_mul1_inverse_vector (&M1, ap, tp, bp, n); |
131 | 0 | } |
132 | | |
133 | 0 | subtract: |
134 | 0 | { |
135 | 0 | struct hgcd_jacobi_ctx ctx; |
136 | 0 | ctx.M = M; |
137 | 0 | ctx.bitsp = bitsp; |
138 | |
|
139 | 0 | return mpn_gcd_subdiv_step (ap, bp, n, s, hgcd_jacobi_hook, &ctx, tp); |
140 | 0 | } |
141 | 0 | } |
142 | | |
143 | | /* Reduces a,b until |a-b| fits in n/2 + 1 limbs. Constructs matrix M |
144 | | with elements of size at most (n+1)/2 - 1. Returns new size of a, |
145 | | b, or zero if no reduction is possible. */ |
146 | | |
147 | | /* Same scratch requirements as for mpn_hgcd. */ |
148 | | mp_size_t |
149 | | mpn_hgcd_jacobi (mp_ptr ap, mp_ptr bp, mp_size_t n, |
150 | | struct hgcd_matrix *M, unsigned *bitsp, mp_ptr tp) |
151 | 0 | { |
152 | 0 | mp_size_t s = n/2 + 1; |
153 | |
|
154 | 0 | mp_size_t nn; |
155 | 0 | int success = 0; |
156 | |
|
157 | 0 | if (n <= s) |
158 | | /* Happens when n <= 2, a fairly uninteresting case but exercised |
159 | | by the random inputs of the testsuite. */ |
160 | 0 | return 0; |
161 | | |
162 | 0 | ASSERT ((ap[n-1] | bp[n-1]) > 0); |
163 | | |
164 | 0 | ASSERT ((n+1)/2 - 1 < M->alloc); |
165 | | |
166 | 0 | if (ABOVE_THRESHOLD (n, HGCD_THRESHOLD)) |
167 | 0 | { |
168 | 0 | mp_size_t n2 = (3*n)/4 + 1; |
169 | 0 | mp_size_t p = n/2; |
170 | |
|
171 | 0 | nn = mpn_hgcd_jacobi (ap + p, bp + p, n - p, M, bitsp, tp); |
172 | 0 | if (nn > 0) |
173 | 0 | { |
174 | | /* Needs 2*(p + M->n) <= 2*(floor(n/2) + ceil(n/2) - 1) |
175 | | = 2 (n - 1) */ |
176 | 0 | n = mpn_hgcd_matrix_adjust (M, p + nn, ap, bp, p, tp); |
177 | 0 | success = 1; |
178 | 0 | } |
179 | 0 | while (n > n2) |
180 | 0 | { |
181 | | /* Needs n + 1 storage */ |
182 | 0 | nn = hgcd_jacobi_step (n, ap, bp, s, M, bitsp, tp); |
183 | 0 | if (!nn) |
184 | 0 | return success ? n : 0; |
185 | 0 | n = nn; |
186 | 0 | success = 1; |
187 | 0 | } |
188 | | |
189 | 0 | if (n > s + 2) |
190 | 0 | { |
191 | 0 | struct hgcd_matrix M1; |
192 | 0 | mp_size_t scratch; |
193 | |
|
194 | 0 | p = 2*s - n + 1; |
195 | 0 | scratch = MPN_HGCD_MATRIX_INIT_ITCH (n-p); |
196 | |
|
197 | 0 | mpn_hgcd_matrix_init(&M1, n - p, tp); |
198 | 0 | nn = mpn_hgcd_jacobi (ap + p, bp + p, n - p, &M1, bitsp, tp + scratch); |
199 | 0 | if (nn > 0) |
200 | 0 | { |
201 | | /* We always have max(M) > 2^{-(GMP_NUMB_BITS + 1)} max(M1) */ |
202 | 0 | ASSERT (M->n + 2 >= M1.n); |
203 | | |
204 | | /* Furthermore, assume M ends with a quotient (1, q; 0, 1), |
205 | | then either q or q + 1 is a correct quotient, and M1 will |
206 | | start with either (1, 0; 1, 1) or (2, 1; 1, 1). This |
207 | | rules out the case that the size of M * M1 is much |
208 | | smaller than the expected M->n + M1->n. */ |
209 | | |
210 | 0 | ASSERT (M->n + M1.n < M->alloc); |
211 | | |
212 | | /* Needs 2 (p + M->n) <= 2 (2*s - n2 + 1 + n2 - s - 1) |
213 | | = 2*s <= 2*(floor(n/2) + 1) <= n + 2. */ |
214 | 0 | n = mpn_hgcd_matrix_adjust (&M1, p + nn, ap, bp, p, tp + scratch); |
215 | | |
216 | | /* We need a bound for of M->n + M1.n. Let n be the original |
217 | | input size. Then |
218 | | |
219 | | ceil(n/2) - 1 >= size of product >= M.n + M1.n - 2 |
220 | | |
221 | | and it follows that |
222 | | |
223 | | M.n + M1.n <= ceil(n/2) + 1 |
224 | | |
225 | | Then 3*(M.n + M1.n) + 5 <= 3 * ceil(n/2) + 8 is the |
226 | | amount of needed scratch space. */ |
227 | 0 | mpn_hgcd_matrix_mul (M, &M1, tp + scratch); |
228 | 0 | success = 1; |
229 | 0 | } |
230 | 0 | } |
231 | 0 | } |
232 | | |
233 | 0 | for (;;) |
234 | 0 | { |
235 | | /* Needs s+3 < n */ |
236 | 0 | nn = hgcd_jacobi_step (n, ap, bp, s, M, bitsp, tp); |
237 | 0 | if (!nn) |
238 | 0 | return success ? n : 0; |
239 | | |
240 | 0 | n = nn; |
241 | 0 | success = 1; |
242 | 0 | } |
243 | 0 | } |