Line | Count | Source (jump to first uncovered line) |
1 | | /* mpn_tdiv_qr -- Divide the numerator (np,nn) by the denominator (dp,dn) and |
2 | | write the nn-dn+1 quotient limbs at qp and the dn remainder limbs at rp. If |
3 | | qxn is non-zero, generate that many fraction limbs and append them after the |
4 | | other quotient limbs, and update the remainder accordingly. The input |
5 | | operands are unaffected. |
6 | | |
7 | | Preconditions: |
8 | | 1. The most significant limb of the divisor must be non-zero. |
9 | | 2. nn >= dn, even if qxn is non-zero. (??? relax this ???) |
10 | | |
11 | | The time complexity of this is O(qn*qn+M(dn,qn)), where M(m,n) is the time |
12 | | complexity of multiplication. |
13 | | |
14 | | Copyright 1997, 2000-2002, 2005, 2009, 2015 Free Software Foundation, Inc. |
15 | | |
16 | | This file is part of the GNU MP Library. |
17 | | |
18 | | The GNU MP Library is free software; you can redistribute it and/or modify |
19 | | it under the terms of either: |
20 | | |
21 | | * the GNU Lesser General Public License as published by the Free |
22 | | Software Foundation; either version 3 of the License, or (at your |
23 | | option) any later version. |
24 | | |
25 | | or |
26 | | |
27 | | * the GNU General Public License as published by the Free Software |
28 | | Foundation; either version 2 of the License, or (at your option) any |
29 | | later version. |
30 | | |
31 | | or both in parallel, as here. |
32 | | |
33 | | The GNU MP Library is distributed in the hope that it will be useful, but |
34 | | WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY |
35 | | or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
36 | | for more details. |
37 | | |
38 | | You should have received copies of the GNU General Public License and the |
39 | | GNU Lesser General Public License along with the GNU MP Library. If not, |
40 | | see https://www.gnu.org/licenses/. */ |
41 | | |
42 | | #include "gmp-impl.h" |
43 | | #include "longlong.h" |
44 | | |
45 | | |
46 | | void |
47 | | mpn_tdiv_qr (mp_ptr qp, mp_ptr rp, mp_size_t qxn, |
48 | | mp_srcptr np, mp_size_t nn, mp_srcptr dp, mp_size_t dn) |
49 | 0 | { |
50 | 0 | ASSERT_ALWAYS (qxn == 0); |
51 | | |
52 | 0 | ASSERT (nn >= 0); |
53 | 0 | ASSERT (dn >= 0); |
54 | 0 | ASSERT (dn == 0 || dp[dn - 1] != 0); |
55 | 0 | ASSERT (! MPN_OVERLAP_P (qp, nn - dn + 1 + qxn, np, nn)); |
56 | 0 | ASSERT (! MPN_OVERLAP_P (qp, nn - dn + 1 + qxn, dp, dn)); |
57 | |
|
58 | 0 | switch (dn) |
59 | 0 | { |
60 | 0 | case 0: |
61 | 0 | DIVIDE_BY_ZERO; |
62 | | |
63 | 0 | case 1: |
64 | 0 | { |
65 | 0 | rp[0] = mpn_divrem_1 (qp, (mp_size_t) 0, np, nn, dp[0]); |
66 | 0 | return; |
67 | 0 | } |
68 | | |
69 | 0 | case 2: |
70 | 0 | { |
71 | 0 | mp_ptr n2p; |
72 | 0 | mp_limb_t qhl, cy; |
73 | 0 | TMP_DECL; |
74 | 0 | TMP_MARK; |
75 | 0 | if ((dp[1] & GMP_NUMB_HIGHBIT) == 0) |
76 | 0 | { |
77 | 0 | int cnt; |
78 | 0 | mp_limb_t d2p[2]; |
79 | 0 | count_leading_zeros (cnt, dp[1]); |
80 | 0 | cnt -= GMP_NAIL_BITS; |
81 | 0 | d2p[1] = (dp[1] << cnt) | (dp[0] >> (GMP_NUMB_BITS - cnt)); |
82 | 0 | d2p[0] = (dp[0] << cnt) & GMP_NUMB_MASK; |
83 | 0 | n2p = TMP_ALLOC_LIMBS (nn + 1); |
84 | 0 | cy = mpn_lshift (n2p, np, nn, cnt); |
85 | 0 | n2p[nn] = cy; |
86 | 0 | qhl = mpn_divrem_2 (qp, 0L, n2p, nn + (cy != 0), d2p); |
87 | 0 | if (cy == 0) |
88 | 0 | qp[nn - 2] = qhl; /* always store nn-2+1 quotient limbs */ |
89 | 0 | rp[0] = (n2p[0] >> cnt) |
90 | 0 | | ((n2p[1] << (GMP_NUMB_BITS - cnt)) & GMP_NUMB_MASK); |
91 | 0 | rp[1] = (n2p[1] >> cnt); |
92 | 0 | } |
93 | 0 | else |
94 | 0 | { |
95 | 0 | n2p = TMP_ALLOC_LIMBS (nn); |
96 | 0 | MPN_COPY (n2p, np, nn); |
97 | 0 | qhl = mpn_divrem_2 (qp, 0L, n2p, nn, dp); |
98 | 0 | qp[nn - 2] = qhl; /* always store nn-2+1 quotient limbs */ |
99 | 0 | rp[0] = n2p[0]; |
100 | 0 | rp[1] = n2p[1]; |
101 | 0 | } |
102 | 0 | TMP_FREE; |
103 | 0 | return; |
104 | 0 | } |
105 | | |
106 | 0 | default: |
107 | 0 | { |
108 | 0 | int adjust; |
109 | 0 | gmp_pi1_t dinv; |
110 | 0 | TMP_DECL; |
111 | 0 | TMP_MARK; |
112 | 0 | adjust = np[nn - 1] >= dp[dn - 1]; /* conservative tests for quotient size */ |
113 | 0 | if (nn + adjust >= 2 * dn) |
114 | 0 | { |
115 | 0 | mp_ptr n2p, d2p; |
116 | 0 | mp_limb_t cy; |
117 | 0 | int cnt; |
118 | |
|
119 | 0 | qp[nn - dn] = 0; /* zero high quotient limb */ |
120 | 0 | if ((dp[dn - 1] & GMP_NUMB_HIGHBIT) == 0) /* normalize divisor */ |
121 | 0 | { |
122 | 0 | count_leading_zeros (cnt, dp[dn - 1]); |
123 | 0 | cnt -= GMP_NAIL_BITS; |
124 | 0 | d2p = TMP_ALLOC_LIMBS (dn); |
125 | 0 | mpn_lshift (d2p, dp, dn, cnt); |
126 | 0 | n2p = TMP_ALLOC_LIMBS (nn + 1); |
127 | 0 | cy = mpn_lshift (n2p, np, nn, cnt); |
128 | 0 | n2p[nn] = cy; |
129 | 0 | nn += adjust; |
130 | 0 | } |
131 | 0 | else |
132 | 0 | { |
133 | 0 | cnt = 0; |
134 | 0 | d2p = (mp_ptr) dp; |
135 | 0 | n2p = TMP_ALLOC_LIMBS (nn + 1); |
136 | 0 | MPN_COPY (n2p, np, nn); |
137 | 0 | n2p[nn] = 0; |
138 | 0 | nn += adjust; |
139 | 0 | } |
140 | |
|
141 | 0 | invert_pi1 (dinv, d2p[dn - 1], d2p[dn - 2]); |
142 | 0 | if (BELOW_THRESHOLD (dn, DC_DIV_QR_THRESHOLD)) |
143 | 0 | mpn_sbpi1_div_qr (qp, n2p, nn, d2p, dn, dinv.inv32); |
144 | 0 | else if (BELOW_THRESHOLD (dn, MUPI_DIV_QR_THRESHOLD) || /* fast condition */ |
145 | 0 | BELOW_THRESHOLD (nn, 2 * MU_DIV_QR_THRESHOLD) || /* fast condition */ |
146 | 0 | (double) (2 * (MU_DIV_QR_THRESHOLD - MUPI_DIV_QR_THRESHOLD)) * dn /* slow... */ |
147 | 0 | + (double) MUPI_DIV_QR_THRESHOLD * nn > (double) dn * nn) /* ...condition */ |
148 | 0 | mpn_dcpi1_div_qr (qp, n2p, nn, d2p, dn, &dinv); |
149 | 0 | else |
150 | 0 | { |
151 | 0 | mp_size_t itch = mpn_mu_div_qr_itch (nn, dn, 0); |
152 | 0 | mp_ptr scratch = TMP_ALLOC_LIMBS (itch); |
153 | 0 | mpn_mu_div_qr (qp, rp, n2p, nn, d2p, dn, scratch); |
154 | 0 | n2p = rp; |
155 | 0 | } |
156 | |
|
157 | 0 | if (cnt != 0) |
158 | 0 | mpn_rshift (rp, n2p, dn, cnt); |
159 | 0 | else |
160 | 0 | MPN_COPY (rp, n2p, dn); |
161 | 0 | TMP_FREE; |
162 | 0 | return; |
163 | 0 | } |
164 | | |
165 | | /* When we come here, the numerator/partial remainder is less |
166 | | than twice the size of the denominator. */ |
167 | | |
168 | 0 | { |
169 | | /* Problem: |
170 | | |
171 | | Divide a numerator N with nn limbs by a denominator D with dn |
172 | | limbs forming a quotient of qn=nn-dn+1 limbs. When qn is small |
173 | | compared to dn, conventional division algorithms perform poorly. |
174 | | We want an algorithm that has an expected running time that is |
175 | | dependent only on qn. |
176 | | |
177 | | Algorithm (very informally stated): |
178 | | |
179 | | 1) Divide the 2 x qn most significant limbs from the numerator |
180 | | by the qn most significant limbs from the denominator. Call |
181 | | the result qest. This is either the correct quotient, but |
182 | | might be 1 or 2 too large. Compute the remainder from the |
183 | | division. (This step is implemented by an mpn_divrem call.) |
184 | | |
185 | | 2) Is the most significant limb from the remainder < p, where p |
186 | | is the product of the most significant limb from the quotient |
187 | | and the next(d)? (Next(d) denotes the next ignored limb from |
188 | | the denominator.) If it is, decrement qest, and adjust the |
189 | | remainder accordingly. |
190 | | |
191 | | 3) Is the remainder >= qest? If it is, qest is the desired |
192 | | quotient. The algorithm terminates. |
193 | | |
194 | | 4) Subtract qest x next(d) from the remainder. If there is |
195 | | borrow out, decrement qest, and adjust the remainder |
196 | | accordingly. |
197 | | |
198 | | 5) Skip one word from the denominator (i.e., let next(d) denote |
199 | | the next less significant limb. */ |
200 | |
|
201 | 0 | mp_size_t qn; |
202 | 0 | mp_ptr n2p, d2p; |
203 | 0 | mp_ptr tp; |
204 | 0 | mp_limb_t cy; |
205 | 0 | mp_size_t in, rn; |
206 | 0 | mp_limb_t quotient_too_large; |
207 | 0 | unsigned int cnt; |
208 | |
|
209 | 0 | qn = nn - dn; |
210 | 0 | qp[qn] = 0; /* zero high quotient limb */ |
211 | 0 | qn += adjust; /* qn cannot become bigger */ |
212 | |
|
213 | 0 | if (qn == 0) |
214 | 0 | { |
215 | 0 | MPN_COPY (rp, np, dn); |
216 | 0 | TMP_FREE; |
217 | 0 | return; |
218 | 0 | } |
219 | | |
220 | 0 | in = dn - qn; /* (at least partially) ignored # of limbs in ops */ |
221 | | /* Normalize denominator by shifting it to the left such that its |
222 | | most significant bit is set. Then shift the numerator the same |
223 | | amount, to mathematically preserve quotient. */ |
224 | 0 | if ((dp[dn - 1] & GMP_NUMB_HIGHBIT) == 0) |
225 | 0 | { |
226 | 0 | count_leading_zeros (cnt, dp[dn - 1]); |
227 | 0 | cnt -= GMP_NAIL_BITS; |
228 | |
|
229 | 0 | d2p = TMP_ALLOC_LIMBS (qn); |
230 | 0 | mpn_lshift (d2p, dp + in, qn, cnt); |
231 | 0 | d2p[0] |= dp[in - 1] >> (GMP_NUMB_BITS - cnt); |
232 | |
|
233 | 0 | n2p = TMP_ALLOC_LIMBS (2 * qn + 1); |
234 | 0 | cy = mpn_lshift (n2p, np + nn - 2 * qn, 2 * qn, cnt); |
235 | 0 | if (adjust) |
236 | 0 | { |
237 | 0 | n2p[2 * qn] = cy; |
238 | 0 | n2p++; |
239 | 0 | } |
240 | 0 | else |
241 | 0 | { |
242 | 0 | n2p[0] |= np[nn - 2 * qn - 1] >> (GMP_NUMB_BITS - cnt); |
243 | 0 | } |
244 | 0 | } |
245 | 0 | else |
246 | 0 | { |
247 | 0 | cnt = 0; |
248 | 0 | d2p = (mp_ptr) dp + in; |
249 | |
|
250 | 0 | n2p = TMP_ALLOC_LIMBS (2 * qn + 1); |
251 | 0 | MPN_COPY (n2p, np + nn - 2 * qn, 2 * qn); |
252 | 0 | if (adjust) |
253 | 0 | { |
254 | 0 | n2p[2 * qn] = 0; |
255 | 0 | n2p++; |
256 | 0 | } |
257 | 0 | } |
258 | | |
259 | | /* Get an approximate quotient using the extracted operands. */ |
260 | 0 | if (qn == 1) |
261 | 0 | { |
262 | 0 | mp_limb_t q0, r0; |
263 | 0 | udiv_qrnnd (q0, r0, n2p[1], n2p[0] << GMP_NAIL_BITS, d2p[0] << GMP_NAIL_BITS); |
264 | 0 | n2p[0] = r0 >> GMP_NAIL_BITS; |
265 | 0 | qp[0] = q0; |
266 | 0 | } |
267 | 0 | else if (qn == 2) |
268 | 0 | mpn_divrem_2 (qp, 0L, n2p, 4L, d2p); /* FIXME: obsolete function */ |
269 | 0 | else |
270 | 0 | { |
271 | 0 | invert_pi1 (dinv, d2p[qn - 1], d2p[qn - 2]); |
272 | 0 | if (BELOW_THRESHOLD (qn, DC_DIV_QR_THRESHOLD)) |
273 | 0 | mpn_sbpi1_div_qr (qp, n2p, 2 * qn, d2p, qn, dinv.inv32); |
274 | 0 | else if (BELOW_THRESHOLD (qn, MU_DIV_QR_THRESHOLD)) |
275 | 0 | mpn_dcpi1_div_qr (qp, n2p, 2 * qn, d2p, qn, &dinv); |
276 | 0 | else |
277 | 0 | { |
278 | 0 | mp_size_t itch = mpn_mu_div_qr_itch (2 * qn, qn, 0); |
279 | 0 | mp_ptr scratch = TMP_ALLOC_LIMBS (itch); |
280 | 0 | mp_ptr r2p = rp; |
281 | 0 | if (np == r2p) /* If N and R share space, put ... */ |
282 | 0 | r2p += nn - qn; /* intermediate remainder at N's upper end. */ |
283 | 0 | mpn_mu_div_qr (qp, r2p, n2p, 2 * qn, d2p, qn, scratch); |
284 | 0 | MPN_COPY (n2p, r2p, qn); |
285 | 0 | } |
286 | 0 | } |
287 | |
|
288 | 0 | rn = qn; |
289 | | /* Multiply the first ignored divisor limb by the most significant |
290 | | quotient limb. If that product is > the partial remainder's |
291 | | most significant limb, we know the quotient is too large. This |
292 | | test quickly catches most cases where the quotient is too large; |
293 | | it catches all cases where the quotient is 2 too large. */ |
294 | 0 | { |
295 | 0 | mp_limb_t dl, x; |
296 | 0 | mp_limb_t h, dummy; |
297 | |
|
298 | 0 | if (in - 2 < 0) |
299 | 0 | dl = 0; |
300 | 0 | else |
301 | 0 | dl = dp[in - 2]; |
302 | |
|
303 | 0 | #if GMP_NAIL_BITS == 0 |
304 | 0 | x = (dp[in - 1] << cnt) | ((dl >> 1) >> ((~cnt) % GMP_LIMB_BITS)); |
305 | | #else |
306 | | x = (dp[in - 1] << cnt) & GMP_NUMB_MASK; |
307 | | if (cnt != 0) |
308 | | x |= dl >> (GMP_NUMB_BITS - cnt); |
309 | | #endif |
310 | 0 | umul_ppmm (h, dummy, x, qp[qn - 1] << GMP_NAIL_BITS); |
311 | |
|
312 | 0 | if (n2p[qn - 1] < h) |
313 | 0 | { |
314 | 0 | mp_limb_t cy; |
315 | |
|
316 | 0 | mpn_decr_u (qp, (mp_limb_t) 1); |
317 | 0 | cy = mpn_add_n (n2p, n2p, d2p, qn); |
318 | 0 | if (cy) |
319 | 0 | { |
320 | | /* The partial remainder is safely large. */ |
321 | 0 | n2p[qn] = cy; |
322 | 0 | ++rn; |
323 | 0 | } |
324 | 0 | } |
325 | 0 | } |
326 | |
|
327 | 0 | quotient_too_large = 0; |
328 | 0 | if (cnt != 0) |
329 | 0 | { |
330 | 0 | mp_limb_t cy1, cy2; |
331 | | |
332 | | /* Append partially used numerator limb to partial remainder. */ |
333 | 0 | cy1 = mpn_lshift (n2p, n2p, rn, GMP_NUMB_BITS - cnt); |
334 | 0 | n2p[0] |= np[in - 1] & (GMP_NUMB_MASK >> cnt); |
335 | | |
336 | | /* Update partial remainder with partially used divisor limb. */ |
337 | 0 | cy2 = mpn_submul_1 (n2p, qp, qn, dp[in - 1] & (GMP_NUMB_MASK >> cnt)); |
338 | 0 | if (qn != rn) |
339 | 0 | { |
340 | 0 | ASSERT_ALWAYS (n2p[qn] >= cy2); |
341 | 0 | n2p[qn] -= cy2; |
342 | 0 | } |
343 | 0 | else |
344 | 0 | { |
345 | 0 | n2p[qn] = cy1 - cy2; /* & GMP_NUMB_MASK; */ |
346 | |
|
347 | 0 | quotient_too_large = (cy1 < cy2); |
348 | 0 | ++rn; |
349 | 0 | } |
350 | 0 | --in; |
351 | 0 | } |
352 | | /* True: partial remainder now is neutral, i.e., it is not shifted up. */ |
353 | | |
354 | 0 | tp = TMP_ALLOC_LIMBS (dn); |
355 | |
|
356 | 0 | if (in < qn) |
357 | 0 | { |
358 | 0 | if (in == 0) |
359 | 0 | { |
360 | 0 | MPN_COPY (rp, n2p, rn); |
361 | 0 | ASSERT_ALWAYS (rn == dn); |
362 | 0 | goto foo; |
363 | 0 | } |
364 | 0 | mpn_mul (tp, qp, qn, dp, in); |
365 | 0 | } |
366 | 0 | else |
367 | 0 | mpn_mul (tp, dp, in, qp, qn); |
368 | | |
369 | 0 | cy = mpn_sub (n2p, n2p, rn, tp + in, qn); |
370 | 0 | MPN_COPY (rp + in, n2p, dn - in); |
371 | 0 | quotient_too_large |= cy; |
372 | 0 | cy = mpn_sub_n (rp, np, tp, in); |
373 | 0 | cy = mpn_sub_1 (rp + in, rp + in, rn, cy); |
374 | 0 | quotient_too_large |= cy; |
375 | 0 | foo: |
376 | 0 | if (quotient_too_large) |
377 | 0 | { |
378 | 0 | mpn_decr_u (qp, (mp_limb_t) 1); |
379 | 0 | mpn_add_n (rp, rp, dp, dn); |
380 | 0 | } |
381 | 0 | } |
382 | 0 | TMP_FREE; |
383 | 0 | return; |
384 | 0 | } |
385 | 0 | } |
386 | 0 | } |