Line | Count | Source (jump to first uncovered line) |
1 | | /* hgcd_appr.c. |
2 | | |
3 | | THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES. IT IS ONLY |
4 | | SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST |
5 | | GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE. |
6 | | |
7 | | Copyright 2011, 2012 Free Software Foundation, Inc. |
8 | | |
9 | | This file is part of the GNU MP Library. |
10 | | |
11 | | The GNU MP Library is free software; you can redistribute it and/or modify |
12 | | it under the terms of either: |
13 | | |
14 | | * the GNU Lesser General Public License as published by the Free |
15 | | Software Foundation; either version 3 of the License, or (at your |
16 | | option) any later version. |
17 | | |
18 | | or |
19 | | |
20 | | * the GNU General Public License as published by the Free Software |
21 | | Foundation; either version 2 of the License, or (at your option) any |
22 | | later version. |
23 | | |
24 | | or both in parallel, as here. |
25 | | |
26 | | The GNU MP Library is distributed in the hope that it will be useful, but |
27 | | WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY |
28 | | or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
29 | | for more details. |
30 | | |
31 | | You should have received copies of the GNU General Public License and the |
32 | | GNU Lesser General Public License along with the GNU MP Library. If not, |
33 | | see https://www.gnu.org/licenses/. */ |
34 | | |
35 | | #include "gmp-impl.h" |
36 | | #include "longlong.h" |
37 | | |
38 | | /* Identical to mpn_hgcd_itch. FIXME: Do we really need to add |
39 | | HGCD_THRESHOLD at the end? */ |
40 | | mp_size_t |
41 | | mpn_hgcd_appr_itch (mp_size_t n) |
42 | 0 | { |
43 | 0 | if (BELOW_THRESHOLD (n, HGCD_APPR_THRESHOLD)) |
44 | 0 | return n; |
45 | 0 | else |
46 | 0 | { |
47 | 0 | unsigned k; |
48 | 0 | int count; |
49 | 0 | mp_size_t nscaled; |
50 | | |
51 | | /* Get the recursion depth. */ |
52 | 0 | nscaled = (n - 1) / (HGCD_APPR_THRESHOLD - 1); |
53 | 0 | count_leading_zeros (count, nscaled); |
54 | 0 | k = GMP_LIMB_BITS - count; |
55 | |
|
56 | 0 | return 20 * ((n+3) / 4) + 22 * k + HGCD_THRESHOLD; |
57 | 0 | } |
58 | 0 | } |
59 | | |
60 | | /* Destroys inputs. */ |
61 | | int |
62 | | mpn_hgcd_appr (mp_ptr ap, mp_ptr bp, mp_size_t n, |
63 | | struct hgcd_matrix *M, mp_ptr tp) |
64 | 0 | { |
65 | 0 | mp_size_t s; |
66 | 0 | int success = 0; |
67 | |
|
68 | 0 | ASSERT (n > 0); |
69 | |
|
70 | 0 | ASSERT ((ap[n-1] | bp[n-1]) != 0); |
71 | |
|
72 | 0 | if (n <= 2) |
73 | | /* Implies s = n. A fairly uninteresting case but exercised by the |
74 | | random inputs of the testsuite. */ |
75 | 0 | return 0; |
76 | | |
77 | 0 | ASSERT ((n+1)/2 - 1 < M->alloc); |
78 | | |
79 | | /* We aim for reduction of to GMP_NUMB_BITS * s bits. But each time |
80 | | we discard some of the least significant limbs, we must keep one |
81 | | additional bit to account for the truncation error. We maintain |
82 | | the GMP_NUMB_BITS * s - extra_bits as the current target size. */ |
83 | |
|
84 | 0 | s = n/2 + 1; |
85 | 0 | if (BELOW_THRESHOLD (n, HGCD_APPR_THRESHOLD)) |
86 | 0 | { |
87 | 0 | unsigned extra_bits = 0; |
88 | |
|
89 | 0 | while (n > 2) |
90 | 0 | { |
91 | 0 | mp_size_t nn; |
92 | |
|
93 | 0 | ASSERT (n > s); |
94 | 0 | ASSERT (n <= 2*s); |
95 | |
|
96 | 0 | nn = mpn_hgcd_step (n, ap, bp, s, M, tp); |
97 | 0 | if (!nn) |
98 | 0 | break; |
99 | | |
100 | 0 | n = nn; |
101 | 0 | success = 1; |
102 | | |
103 | | /* We can truncate and discard the lower p bits whenever nbits <= |
104 | | 2*sbits - p. To account for the truncation error, we must |
105 | | adjust |
106 | | |
107 | | sbits <-- sbits + 1 - p, |
108 | | |
109 | | rather than just sbits <-- sbits - p. This adjustment makes |
110 | | the produced matrix slightly smaller than it could be. */ |
111 | |
|
112 | 0 | if (GMP_NUMB_BITS * (n + 1) + 2 * extra_bits <= 2*GMP_NUMB_BITS * s) |
113 | 0 | { |
114 | 0 | mp_size_t p = (GMP_NUMB_BITS * (2*s - n) - 2*extra_bits) / GMP_NUMB_BITS; |
115 | |
|
116 | 0 | if (extra_bits == 0) |
117 | 0 | { |
118 | | /* We cross a limb boundary and bump s. We can't do that |
119 | | if the result is that it makes makes min(U, V) |
120 | | smaller than 2^{GMP_NUMB_BITS} s. */ |
121 | 0 | if (s + 1 == n |
122 | 0 | || mpn_zero_p (ap + s + 1, n - s - 1) |
123 | 0 | || mpn_zero_p (bp + s + 1, n - s - 1)) |
124 | 0 | continue; |
125 | | |
126 | 0 | extra_bits = GMP_NUMB_BITS - 1; |
127 | 0 | s++; |
128 | 0 | } |
129 | 0 | else |
130 | 0 | { |
131 | 0 | extra_bits--; |
132 | 0 | } |
133 | | |
134 | | /* Drop the p least significant limbs */ |
135 | 0 | ap += p; bp += p; n -= p; s -= p; |
136 | 0 | } |
137 | 0 | } |
138 | |
|
139 | 0 | ASSERT (s > 0); |
140 | |
|
141 | 0 | if (extra_bits > 0) |
142 | 0 | { |
143 | | /* We can get here only of we have dropped at least one of the least |
144 | | significant bits, so we can decrement ap and bp. We can then shift |
145 | | left extra bits using mpn_rshift. */ |
146 | | /* NOTE: In the unlikely case that n is large, it would be preferable |
147 | | to do an initial subdiv step to reduce the size before shifting, |
148 | | but that would mean duplicating mpn_gcd_subdiv_step with a bit |
149 | | count rather than a limb count. */ |
150 | 0 | ap--; bp--; |
151 | 0 | ap[0] = mpn_rshift (ap+1, ap+1, n, GMP_NUMB_BITS - extra_bits); |
152 | 0 | bp[0] = mpn_rshift (bp+1, bp+1, n, GMP_NUMB_BITS - extra_bits); |
153 | 0 | n += (ap[n] | bp[n]) > 0; |
154 | |
|
155 | 0 | ASSERT (success); |
156 | |
|
157 | 0 | while (n > 2) |
158 | 0 | { |
159 | 0 | mp_size_t nn; |
160 | |
|
161 | 0 | ASSERT (n > s); |
162 | 0 | ASSERT (n <= 2*s); |
163 | |
|
164 | 0 | nn = mpn_hgcd_step (n, ap, bp, s, M, tp); |
165 | |
|
166 | 0 | if (!nn) |
167 | 0 | return 1; |
168 | | |
169 | 0 | n = nn; |
170 | 0 | } |
171 | 0 | } |
172 | | |
173 | 0 | if (n == 2) |
174 | 0 | { |
175 | 0 | struct hgcd_matrix1 M1; |
176 | 0 | ASSERT (s == 1); |
177 | |
|
178 | 0 | if (mpn_hgcd2 (ap[1], ap[0], bp[1], bp[0], &M1)) |
179 | 0 | { |
180 | | /* Multiply M <- M * M1 */ |
181 | 0 | mpn_hgcd_matrix_mul_1 (M, &M1, tp); |
182 | 0 | success = 1; |
183 | 0 | } |
184 | 0 | } |
185 | 0 | return success; |
186 | 0 | } |
187 | 0 | else |
188 | 0 | { |
189 | 0 | mp_size_t n2 = (3*n)/4 + 1; |
190 | 0 | mp_size_t p = n/2; |
191 | 0 | mp_size_t nn; |
192 | |
|
193 | 0 | nn = mpn_hgcd_reduce (M, ap, bp, n, p, tp); |
194 | 0 | if (nn) |
195 | 0 | { |
196 | 0 | n = nn; |
197 | | /* FIXME: Discard some of the low limbs immediately? */ |
198 | 0 | success = 1; |
199 | 0 | } |
200 | |
|
201 | 0 | while (n > n2) |
202 | 0 | { |
203 | 0 | mp_size_t nn; |
204 | | |
205 | | /* Needs n + 1 storage */ |
206 | 0 | nn = mpn_hgcd_step (n, ap, bp, s, M, tp); |
207 | 0 | if (!nn) |
208 | 0 | return success; |
209 | | |
210 | 0 | n = nn; |
211 | 0 | success = 1; |
212 | 0 | } |
213 | 0 | if (n > s + 2) |
214 | 0 | { |
215 | 0 | struct hgcd_matrix M1; |
216 | 0 | mp_size_t scratch; |
217 | |
|
218 | 0 | p = 2*s - n + 1; |
219 | 0 | scratch = MPN_HGCD_MATRIX_INIT_ITCH (n-p); |
220 | |
|
221 | 0 | mpn_hgcd_matrix_init(&M1, n - p, tp); |
222 | 0 | if (mpn_hgcd_appr (ap + p, bp + p, n - p, &M1, tp + scratch)) |
223 | 0 | { |
224 | | /* We always have max(M) > 2^{-(GMP_NUMB_BITS + 1)} max(M1) */ |
225 | 0 | ASSERT (M->n + 2 >= M1.n); |
226 | | |
227 | | /* Furthermore, assume M ends with a quotient (1, q; 0, 1), |
228 | | then either q or q + 1 is a correct quotient, and M1 will |
229 | | start with either (1, 0; 1, 1) or (2, 1; 1, 1). This |
230 | | rules out the case that the size of M * M1 is much |
231 | | smaller than the expected M->n + M1->n. */ |
232 | |
|
233 | 0 | ASSERT (M->n + M1.n < M->alloc); |
234 | | |
235 | | /* We need a bound for of M->n + M1.n. Let n be the original |
236 | | input size. Then |
237 | | |
238 | | ceil(n/2) - 1 >= size of product >= M.n + M1.n - 2 |
239 | | |
240 | | and it follows that |
241 | | |
242 | | M.n + M1.n <= ceil(n/2) + 1 |
243 | | |
244 | | Then 3*(M.n + M1.n) + 5 <= 3 * ceil(n/2) + 8 is the |
245 | | amount of needed scratch space. */ |
246 | 0 | mpn_hgcd_matrix_mul (M, &M1, tp + scratch); |
247 | 0 | return 1; |
248 | 0 | } |
249 | 0 | } |
250 | | |
251 | 0 | for(;;) |
252 | 0 | { |
253 | 0 | mp_size_t nn; |
254 | |
|
255 | 0 | ASSERT (n > s); |
256 | 0 | ASSERT (n <= 2*s); |
257 | |
|
258 | 0 | nn = mpn_hgcd_step (n, ap, bp, s, M, tp); |
259 | |
|
260 | 0 | if (!nn) |
261 | 0 | return success; |
262 | | |
263 | 0 | n = nn; |
264 | 0 | success = 1; |
265 | 0 | } |
266 | 0 | } |
267 | 0 | } |