Line | Count | Source (jump to first uncovered line) |
1 | | /* hgcd2.c |
2 | | |
3 | | THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES. IT IS ONLY |
4 | | SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST |
5 | | GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE. |
6 | | |
7 | | Copyright 1996, 1998, 2000-2004, 2008, 2012, 2019 Free Software Foundation, |
8 | | Inc. |
9 | | |
10 | | This file is part of the GNU MP Library. |
11 | | |
12 | | The GNU MP Library is free software; you can redistribute it and/or modify |
13 | | it under the terms of either: |
14 | | |
15 | | * the GNU Lesser General Public License as published by the Free |
16 | | Software Foundation; either version 3 of the License, or (at your |
17 | | option) any later version. |
18 | | |
19 | | or |
20 | | |
21 | | * the GNU General Public License as published by the Free Software |
22 | | Foundation; either version 2 of the License, or (at your option) any |
23 | | later version. |
24 | | |
25 | | or both in parallel, as here. |
26 | | |
27 | | The GNU MP Library is distributed in the hope that it will be useful, but |
28 | | WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY |
29 | | or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
30 | | for more details. |
31 | | |
32 | | You should have received copies of the GNU General Public License and the |
33 | | GNU Lesser General Public License along with the GNU MP Library. If not, |
34 | | see https://www.gnu.org/licenses/. */ |
35 | | |
36 | | #include "gmp-impl.h" |
37 | | #include "longlong.h" |
38 | | |
39 | | #include "mpn/generic/hgcd2-div.h" |
40 | | |
41 | | #if GMP_NAIL_BITS != 0 |
42 | | #error Nails not implemented |
43 | | #endif |
44 | | |
45 | | /* Reduces a,b until |a-b| (almost) fits in one limb + 1 bit. Constructs |
46 | | matrix M. Returns 1 if we make progress, i.e. can perform at least |
47 | | one subtraction. Otherwise returns zero. */ |
48 | | |
49 | | /* FIXME: Possible optimizations: |
50 | | |
51 | | The div2 function starts with checking the most significant bit of |
52 | | the numerator. We can maintained normalized operands here, call |
53 | | hgcd with normalized operands only, which should make the code |
54 | | simpler and possibly faster. |
55 | | |
56 | | Experiment with table lookups on the most significant bits. |
57 | | |
58 | | This function is also a candidate for assembler implementation. |
59 | | */ |
60 | | int |
61 | | mpn_hgcd2 (mp_limb_t ah, mp_limb_t al, mp_limb_t bh, mp_limb_t bl, |
62 | | struct hgcd_matrix1 *M) |
63 | 0 | { |
64 | 0 | mp_limb_t u00, u01, u10, u11; |
65 | |
|
66 | 0 | if (ah < 2 || bh < 2) |
67 | 0 | return 0; |
68 | | |
69 | 0 | if (ah > bh || (ah == bh && al > bl)) |
70 | 0 | { |
71 | 0 | sub_ddmmss (ah, al, ah, al, bh, bl); |
72 | 0 | if (ah < 2) |
73 | 0 | return 0; |
74 | | |
75 | 0 | u00 = u01 = u11 = 1; |
76 | 0 | u10 = 0; |
77 | 0 | } |
78 | 0 | else |
79 | 0 | { |
80 | 0 | sub_ddmmss (bh, bl, bh, bl, ah, al); |
81 | 0 | if (bh < 2) |
82 | 0 | return 0; |
83 | | |
84 | 0 | u00 = u10 = u11 = 1; |
85 | 0 | u01 = 0; |
86 | 0 | } |
87 | | |
88 | 0 | if (ah < bh) |
89 | 0 | goto subtract_a; |
90 | | |
91 | 0 | for (;;) |
92 | 0 | { |
93 | 0 | ASSERT (ah >= bh); |
94 | 0 | if (ah == bh) |
95 | 0 | goto done; |
96 | | |
97 | 0 | if (ah < (CNST_LIMB(1) << (GMP_LIMB_BITS / 2))) |
98 | 0 | { |
99 | 0 | ah = (ah << (GMP_LIMB_BITS / 2) ) + (al >> (GMP_LIMB_BITS / 2)); |
100 | 0 | bh = (bh << (GMP_LIMB_BITS / 2) ) + (bl >> (GMP_LIMB_BITS / 2)); |
101 | |
|
102 | 0 | break; |
103 | 0 | } |
104 | | |
105 | | /* Subtract a -= q b, and multiply M from the right by (1 q ; 0 |
106 | | 1), affecting the second column of M. */ |
107 | 0 | ASSERT (ah > bh); |
108 | 0 | sub_ddmmss (ah, al, ah, al, bh, bl); |
109 | |
|
110 | 0 | if (ah < 2) |
111 | 0 | goto done; |
112 | | |
113 | 0 | if (ah <= bh) |
114 | 0 | { |
115 | | /* Use q = 1 */ |
116 | 0 | u01 += u00; |
117 | 0 | u11 += u10; |
118 | 0 | } |
119 | 0 | else |
120 | 0 | { |
121 | 0 | mp_limb_t r[2]; |
122 | 0 | mp_limb_t q = div2 (r, ah, al, bh, bl); |
123 | 0 | al = r[0]; ah = r[1]; |
124 | 0 | if (ah < 2) |
125 | 0 | { |
126 | | /* A is too small, but q is correct. */ |
127 | 0 | u01 += q * u00; |
128 | 0 | u11 += q * u10; |
129 | 0 | goto done; |
130 | 0 | } |
131 | 0 | q++; |
132 | 0 | u01 += q * u00; |
133 | 0 | u11 += q * u10; |
134 | 0 | } |
135 | 0 | subtract_a: |
136 | 0 | ASSERT (bh >= ah); |
137 | 0 | if (ah == bh) |
138 | 0 | goto done; |
139 | | |
140 | 0 | if (bh < (CNST_LIMB(1) << (GMP_LIMB_BITS / 2))) |
141 | 0 | { |
142 | 0 | ah = (ah << (GMP_LIMB_BITS / 2) ) + (al >> (GMP_LIMB_BITS / 2)); |
143 | 0 | bh = (bh << (GMP_LIMB_BITS / 2) ) + (bl >> (GMP_LIMB_BITS / 2)); |
144 | |
|
145 | 0 | goto subtract_a1; |
146 | 0 | } |
147 | | |
148 | | /* Subtract b -= q a, and multiply M from the right by (1 0 ; q |
149 | | 1), affecting the first column of M. */ |
150 | 0 | sub_ddmmss (bh, bl, bh, bl, ah, al); |
151 | |
|
152 | 0 | if (bh < 2) |
153 | 0 | goto done; |
154 | | |
155 | 0 | if (bh <= ah) |
156 | 0 | { |
157 | | /* Use q = 1 */ |
158 | 0 | u00 += u01; |
159 | 0 | u10 += u11; |
160 | 0 | } |
161 | 0 | else |
162 | 0 | { |
163 | 0 | mp_limb_t r[2]; |
164 | 0 | mp_limb_t q = div2 (r, bh, bl, ah, al); |
165 | 0 | bl = r[0]; bh = r[1]; |
166 | 0 | if (bh < 2) |
167 | 0 | { |
168 | | /* B is too small, but q is correct. */ |
169 | 0 | u00 += q * u01; |
170 | 0 | u10 += q * u11; |
171 | 0 | goto done; |
172 | 0 | } |
173 | 0 | q++; |
174 | 0 | u00 += q * u01; |
175 | 0 | u10 += q * u11; |
176 | 0 | } |
177 | 0 | } |
178 | | |
179 | | /* NOTE: Since we discard the least significant half limb, we don't get a |
180 | | truly maximal M (corresponding to |a - b| < 2^{GMP_LIMB_BITS +1}). */ |
181 | | /* Single precision loop */ |
182 | 0 | for (;;) |
183 | 0 | { |
184 | 0 | ASSERT (ah >= bh); |
185 | |
|
186 | 0 | ah -= bh; |
187 | 0 | if (ah < (CNST_LIMB (1) << (GMP_LIMB_BITS / 2 + 1))) |
188 | 0 | break; |
189 | | |
190 | 0 | if (ah <= bh) |
191 | 0 | { |
192 | | /* Use q = 1 */ |
193 | 0 | u01 += u00; |
194 | 0 | u11 += u10; |
195 | 0 | } |
196 | 0 | else |
197 | 0 | { |
198 | 0 | mp_double_limb_t rq = div1 (ah, bh); |
199 | 0 | mp_limb_t q = rq.d1; |
200 | 0 | ah = rq.d0; |
201 | |
|
202 | 0 | if (ah < (CNST_LIMB(1) << (GMP_LIMB_BITS / 2 + 1))) |
203 | 0 | { |
204 | | /* A is too small, but q is correct. */ |
205 | 0 | u01 += q * u00; |
206 | 0 | u11 += q * u10; |
207 | 0 | break; |
208 | 0 | } |
209 | 0 | q++; |
210 | 0 | u01 += q * u00; |
211 | 0 | u11 += q * u10; |
212 | 0 | } |
213 | 0 | subtract_a1: |
214 | 0 | ASSERT (bh >= ah); |
215 | |
|
216 | 0 | bh -= ah; |
217 | 0 | if (bh < (CNST_LIMB (1) << (GMP_LIMB_BITS / 2 + 1))) |
218 | 0 | break; |
219 | | |
220 | 0 | if (bh <= ah) |
221 | 0 | { |
222 | | /* Use q = 1 */ |
223 | 0 | u00 += u01; |
224 | 0 | u10 += u11; |
225 | 0 | } |
226 | 0 | else |
227 | 0 | { |
228 | 0 | mp_double_limb_t rq = div1 (bh, ah); |
229 | 0 | mp_limb_t q = rq.d1; |
230 | 0 | bh = rq.d0; |
231 | |
|
232 | 0 | if (bh < (CNST_LIMB(1) << (GMP_LIMB_BITS / 2 + 1))) |
233 | 0 | { |
234 | | /* B is too small, but q is correct. */ |
235 | 0 | u00 += q * u01; |
236 | 0 | u10 += q * u11; |
237 | 0 | break; |
238 | 0 | } |
239 | 0 | q++; |
240 | 0 | u00 += q * u01; |
241 | 0 | u10 += q * u11; |
242 | 0 | } |
243 | 0 | } |
244 | | |
245 | 0 | done: |
246 | 0 | M->u[0][0] = u00; M->u[0][1] = u01; |
247 | 0 | M->u[1][0] = u10; M->u[1][1] = u11; |
248 | |
|
249 | 0 | return 1; |
250 | 0 | } |
251 | | |
252 | | /* Sets (r;b) = (a;b) M, with M = (u00, u01; u10, u11). Vector must |
253 | | * have space for n + 1 limbs. Uses three buffers to avoid a copy*/ |
254 | | mp_size_t |
255 | | mpn_hgcd_mul_matrix1_vector (const struct hgcd_matrix1 *M, |
256 | | mp_ptr rp, mp_srcptr ap, mp_ptr bp, mp_size_t n) |
257 | 0 | { |
258 | 0 | mp_limb_t ah, bh; |
259 | | |
260 | | /* Compute (r,b) <-- (u00 a + u10 b, u01 a + u11 b) as |
261 | | |
262 | | r = u00 * a |
263 | | r += u10 * b |
264 | | b *= u11 |
265 | | b += u01 * a |
266 | | */ |
267 | |
|
268 | | #if HAVE_NATIVE_mpn_addaddmul_1msb0 |
269 | | ah = mpn_addaddmul_1msb0 (rp, ap, bp, n, M->u[0][0], M->u[1][0]); |
270 | | bh = mpn_addaddmul_1msb0 (bp, bp, ap, n, M->u[1][1], M->u[0][1]); |
271 | | #else |
272 | 0 | ah = mpn_mul_1 (rp, ap, n, M->u[0][0]); |
273 | 0 | ah += mpn_addmul_1 (rp, bp, n, M->u[1][0]); |
274 | |
|
275 | 0 | bh = mpn_mul_1 (bp, bp, n, M->u[1][1]); |
276 | 0 | bh += mpn_addmul_1 (bp, ap, n, M->u[0][1]); |
277 | 0 | #endif |
278 | 0 | rp[n] = ah; |
279 | 0 | bp[n] = bh; |
280 | |
|
281 | 0 | n += (ah | bh) > 0; |
282 | 0 | return n; |
283 | 0 | } |