/src/gmp-6.2.1/mpn/tdiv_qr.c
Line  | Count  | Source (jump to first uncovered line)  | 
1  |  | /* mpn_tdiv_qr -- Divide the numerator (np,nn) by the denominator (dp,dn) and  | 
2  |  |    write the nn-dn+1 quotient limbs at qp and the dn remainder limbs at rp.  If  | 
3  |  |    qxn is non-zero, generate that many fraction limbs and append them after the  | 
4  |  |    other quotient limbs, and update the remainder accordingly.  The input  | 
5  |  |    operands are unaffected.  | 
6  |  |  | 
7  |  |    Preconditions:  | 
8  |  |    1. The most significant limb of the divisor must be non-zero.  | 
9  |  |    2. nn >= dn, even if qxn is non-zero.  (??? relax this ???)  | 
10  |  |  | 
11  |  |    The time complexity of this is O(qn*qn+M(dn,qn)), where M(m,n) is the time  | 
12  |  |    complexity of multiplication.  | 
13  |  |  | 
14  |  | Copyright 1997, 2000-2002, 2005, 2009, 2015 Free Software Foundation, Inc.  | 
15  |  |  | 
16  |  | This file is part of the GNU MP Library.  | 
17  |  |  | 
18  |  | The GNU MP Library is free software; you can redistribute it and/or modify  | 
19  |  | it under the terms of either:  | 
20  |  |  | 
21  |  |   * the GNU Lesser General Public License as published by the Free  | 
22  |  |     Software Foundation; either version 3 of the License, or (at your  | 
23  |  |     option) any later version.  | 
24  |  |  | 
25  |  | or  | 
26  |  |  | 
27  |  |   * the GNU General Public License as published by the Free Software  | 
28  |  |     Foundation; either version 2 of the License, or (at your option) any  | 
29  |  |     later version.  | 
30  |  |  | 
31  |  | or both in parallel, as here.  | 
32  |  |  | 
33  |  | The GNU MP Library is distributed in the hope that it will be useful, but  | 
34  |  | WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY  | 
35  |  | or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License  | 
36  |  | for more details.  | 
37  |  |  | 
38  |  | You should have received copies of the GNU General Public License and the  | 
39  |  | GNU Lesser General Public License along with the GNU MP Library.  If not,  | 
40  |  | see https://www.gnu.org/licenses/.  */  | 
41  |  |  | 
42  |  | #include "gmp-impl.h"  | 
43  |  | #include "longlong.h"  | 
44  |  |  | 
45  |  |  | 
46  |  | void  | 
47  |  | mpn_tdiv_qr (mp_ptr qp, mp_ptr rp, mp_size_t qxn,  | 
48  |  |        mp_srcptr np, mp_size_t nn, mp_srcptr dp, mp_size_t dn)  | 
49  | 16.2k  | { | 
50  | 16.2k  |   ASSERT_ALWAYS (qxn == 0);  | 
51  |  |  | 
52  | 16.2k  |   ASSERT (nn >= 0);  | 
53  | 16.2k  |   ASSERT (dn >= 0);  | 
54  | 16.2k  |   ASSERT (dn == 0 || dp[dn - 1] != 0);  | 
55  | 16.2k  |   ASSERT (! MPN_OVERLAP_P (qp, nn - dn + 1 + qxn, np, nn));  | 
56  | 16.2k  |   ASSERT (! MPN_OVERLAP_P (qp, nn - dn + 1 + qxn, dp, dn));  | 
57  |  |  | 
58  | 16.2k  |   switch (dn)  | 
59  | 16.2k  |     { | 
60  | 0  |     case 0:  | 
61  | 0  |       DIVIDE_BY_ZERO;  | 
62  |  |  | 
63  | 560  |     case 1:  | 
64  | 560  |       { | 
65  | 560  |   rp[0] = mpn_divrem_1 (qp, (mp_size_t) 0, np, nn, dp[0]);  | 
66  | 560  |   return;  | 
67  | 0  |       }  | 
68  |  |  | 
69  | 419  |     case 2:  | 
70  | 419  |       { | 
71  | 419  |   mp_ptr n2p;  | 
72  | 419  |   mp_limb_t qhl, cy;  | 
73  | 419  |   TMP_DECL;  | 
74  | 419  |   TMP_MARK;  | 
75  | 419  |   if ((dp[1] & GMP_NUMB_HIGHBIT) == 0)  | 
76  | 385  |     { | 
77  | 385  |       int cnt;  | 
78  | 385  |       mp_limb_t d2p[2];  | 
79  | 385  |       count_leading_zeros (cnt, dp[1]);  | 
80  | 385  |       cnt -= GMP_NAIL_BITS;  | 
81  | 385  |       d2p[1] = (dp[1] << cnt) | (dp[0] >> (GMP_NUMB_BITS - cnt));  | 
82  | 385  |       d2p[0] = (dp[0] << cnt) & GMP_NUMB_MASK;  | 
83  | 385  |       n2p = TMP_ALLOC_LIMBS (nn + 1);  | 
84  | 385  |       cy = mpn_lshift (n2p, np, nn, cnt);  | 
85  | 385  |       n2p[nn] = cy;  | 
86  | 385  |       qhl = mpn_divrem_2 (qp, 0L, n2p, nn + (cy != 0), d2p);  | 
87  | 385  |       if (cy == 0)  | 
88  | 89  |         qp[nn - 2] = qhl; /* always store nn-2+1 quotient limbs */  | 
89  | 385  |       rp[0] = (n2p[0] >> cnt)  | 
90  | 385  |         | ((n2p[1] << (GMP_NUMB_BITS - cnt)) & GMP_NUMB_MASK);  | 
91  | 385  |       rp[1] = (n2p[1] >> cnt);  | 
92  | 385  |     }  | 
93  | 34  |   else  | 
94  | 34  |     { | 
95  | 34  |       n2p = TMP_ALLOC_LIMBS (nn);  | 
96  | 34  |       MPN_COPY (n2p, np, nn);  | 
97  | 34  |       qhl = mpn_divrem_2 (qp, 0L, n2p, nn, dp);  | 
98  | 34  |       qp[nn - 2] = qhl; /* always store nn-2+1 quotient limbs */  | 
99  | 34  |       rp[0] = n2p[0];  | 
100  | 34  |       rp[1] = n2p[1];  | 
101  | 34  |     }  | 
102  | 419  |   TMP_FREE;  | 
103  | 419  |   return;  | 
104  | 419  |       }  | 
105  |  |  | 
106  | 15.3k  |     default:  | 
107  | 15.3k  |       { | 
108  | 15.3k  |   int adjust;  | 
109  | 15.3k  |   gmp_pi1_t dinv;  | 
110  | 15.3k  |   TMP_DECL;  | 
111  | 15.3k  |   TMP_MARK;  | 
112  | 15.3k  |   adjust = np[nn - 1] >= dp[dn - 1];  /* conservative tests for quotient size */  | 
113  | 15.3k  |   if (nn + adjust >= 2 * dn)  | 
114  | 11.5k  |     { | 
115  | 11.5k  |       mp_ptr n2p, d2p;  | 
116  | 11.5k  |       mp_limb_t cy;  | 
117  | 11.5k  |       int cnt;  | 
118  |  |  | 
119  | 11.5k  |       qp[nn - dn] = 0;        /* zero high quotient limb */  | 
120  | 11.5k  |       if ((dp[dn - 1] & GMP_NUMB_HIGHBIT) == 0) /* normalize divisor */  | 
121  | 11.2k  |         { | 
122  | 11.2k  |     count_leading_zeros (cnt, dp[dn - 1]);  | 
123  | 11.2k  |     cnt -= GMP_NAIL_BITS;  | 
124  | 11.2k  |     d2p = TMP_ALLOC_LIMBS (dn);  | 
125  | 11.2k  |     mpn_lshift (d2p, dp, dn, cnt);  | 
126  | 11.2k  |     n2p = TMP_ALLOC_LIMBS (nn + 1);  | 
127  | 11.2k  |     cy = mpn_lshift (n2p, np, nn, cnt);  | 
128  | 11.2k  |     n2p[nn] = cy;  | 
129  | 11.2k  |     nn += adjust;  | 
130  | 11.2k  |         }  | 
131  | 312  |       else  | 
132  | 312  |         { | 
133  | 312  |     cnt = 0;  | 
134  | 312  |     d2p = (mp_ptr) dp;  | 
135  | 312  |     n2p = TMP_ALLOC_LIMBS (nn + 1);  | 
136  | 312  |     MPN_COPY (n2p, np, nn);  | 
137  | 312  |     n2p[nn] = 0;  | 
138  | 312  |     nn += adjust;  | 
139  | 312  |         }  | 
140  |  |  | 
141  | 11.5k  |       invert_pi1 (dinv, d2p[dn - 1], d2p[dn - 2]);  | 
142  | 11.5k  |       if (BELOW_THRESHOLD (dn, DC_DIV_QR_THRESHOLD))  | 
143  | 10.2k  |         mpn_sbpi1_div_qr (qp, n2p, nn, d2p, dn, dinv.inv32);  | 
144  | 1.36k  |       else if (BELOW_THRESHOLD (dn, MUPI_DIV_QR_THRESHOLD) ||   /* fast condition */  | 
145  | 1.36k  |          BELOW_THRESHOLD (nn, 2 * MU_DIV_QR_THRESHOLD) || /* fast condition */  | 
146  | 1.36k  |          (double) (2 * (MU_DIV_QR_THRESHOLD - MUPI_DIV_QR_THRESHOLD)) * dn /* slow... */  | 
147  | 0  |          + (double) MUPI_DIV_QR_THRESHOLD * nn > (double) dn * nn)    /* ...condition */  | 
148  | 1.36k  |         mpn_dcpi1_div_qr (qp, n2p, nn, d2p, dn, &dinv);  | 
149  | 0  |       else  | 
150  | 0  |         { | 
151  | 0  |     mp_size_t itch = mpn_mu_div_qr_itch (nn, dn, 0);  | 
152  | 0  |     mp_ptr scratch = TMP_ALLOC_LIMBS (itch);  | 
153  | 0  |     mpn_mu_div_qr (qp, rp, n2p, nn, d2p, dn, scratch);  | 
154  | 0  |     n2p = rp;  | 
155  | 0  |         }  | 
156  |  |  | 
157  | 11.5k  |       if (cnt != 0)  | 
158  | 11.2k  |         mpn_rshift (rp, n2p, dn, cnt);  | 
159  | 312  |       else  | 
160  | 312  |         MPN_COPY (rp, n2p, dn);  | 
161  | 11.5k  |       TMP_FREE;  | 
162  | 11.5k  |       return;  | 
163  | 11.5k  |     }  | 
164  |  |  | 
165  |  |   /* When we come here, the numerator/partial remainder is less  | 
166  |  |      than twice the size of the denominator.  */  | 
167  |  |  | 
168  | 3.74k  |     { | 
169  |  |       /* Problem:  | 
170  |  |  | 
171  |  |          Divide a numerator N with nn limbs by a denominator D with dn  | 
172  |  |          limbs forming a quotient of qn=nn-dn+1 limbs.  When qn is small  | 
173  |  |          compared to dn, conventional division algorithms perform poorly.  | 
174  |  |          We want an algorithm that has an expected running time that is  | 
175  |  |          dependent only on qn.  | 
176  |  |  | 
177  |  |          Algorithm (very informally stated):  | 
178  |  |  | 
179  |  |          1) Divide the 2 x qn most significant limbs from the numerator  | 
180  |  |       by the qn most significant limbs from the denominator.  Call  | 
181  |  |       the result qest.  This is either the correct quotient, but  | 
182  |  |       might be 1 or 2 too large.  Compute the remainder from the  | 
183  |  |       division.  (This step is implemented by an mpn_divrem call.)  | 
184  |  |  | 
185  |  |          2) Is the most significant limb from the remainder < p, where p  | 
186  |  |       is the product of the most significant limb from the quotient  | 
187  |  |       and the next(d)?  (Next(d) denotes the next ignored limb from  | 
188  |  |       the denominator.)  If it is, decrement qest, and adjust the  | 
189  |  |       remainder accordingly.  | 
190  |  |  | 
191  |  |          3) Is the remainder >= qest?  If it is, qest is the desired  | 
192  |  |       quotient.  The algorithm terminates.  | 
193  |  |  | 
194  |  |          4) Subtract qest x next(d) from the remainder.  If there is  | 
195  |  |       borrow out, decrement qest, and adjust the remainder  | 
196  |  |       accordingly.  | 
197  |  |  | 
198  |  |          5) Skip one word from the denominator (i.e., let next(d) denote  | 
199  |  |       the next less significant limb.  */  | 
200  |  |  | 
201  | 3.74k  |       mp_size_t qn;  | 
202  | 3.74k  |       mp_ptr n2p, d2p;  | 
203  | 3.74k  |       mp_ptr tp;  | 
204  | 3.74k  |       mp_limb_t cy;  | 
205  | 3.74k  |       mp_size_t in, rn;  | 
206  | 3.74k  |       mp_limb_t quotient_too_large;  | 
207  | 3.74k  |       unsigned int cnt;  | 
208  |  |  | 
209  | 3.74k  |       qn = nn - dn;  | 
210  | 3.74k  |       qp[qn] = 0;       /* zero high quotient limb */  | 
211  | 3.74k  |       qn += adjust;     /* qn cannot become bigger */  | 
212  |  |  | 
213  | 3.74k  |       if (qn == 0)  | 
214  | 15  |         { | 
215  | 15  |     MPN_COPY (rp, np, dn);  | 
216  | 15  |     TMP_FREE;  | 
217  | 15  |     return;  | 
218  | 15  |         }  | 
219  |  |  | 
220  | 3.73k  |       in = dn - qn;   /* (at least partially) ignored # of limbs in ops */  | 
221  |  |       /* Normalize denominator by shifting it to the left such that its  | 
222  |  |          most significant bit is set.  Then shift the numerator the same  | 
223  |  |          amount, to mathematically preserve quotient.  */  | 
224  | 3.73k  |       if ((dp[dn - 1] & GMP_NUMB_HIGHBIT) == 0)  | 
225  | 3.46k  |         { | 
226  | 3.46k  |     count_leading_zeros (cnt, dp[dn - 1]);  | 
227  | 3.46k  |     cnt -= GMP_NAIL_BITS;  | 
228  |  |  | 
229  | 3.46k  |     d2p = TMP_ALLOC_LIMBS (qn);  | 
230  | 3.46k  |     mpn_lshift (d2p, dp + in, qn, cnt);  | 
231  | 3.46k  |     d2p[0] |= dp[in - 1] >> (GMP_NUMB_BITS - cnt);  | 
232  |  |  | 
233  | 3.46k  |     n2p = TMP_ALLOC_LIMBS (2 * qn + 1);  | 
234  | 3.46k  |     cy = mpn_lshift (n2p, np + nn - 2 * qn, 2 * qn, cnt);  | 
235  | 3.46k  |     if (adjust)  | 
236  | 1.72k  |       { | 
237  | 1.72k  |         n2p[2 * qn] = cy;  | 
238  | 1.72k  |         n2p++;  | 
239  | 1.72k  |       }  | 
240  | 1.73k  |     else  | 
241  | 1.73k  |       { | 
242  | 1.73k  |         n2p[0] |= np[nn - 2 * qn - 1] >> (GMP_NUMB_BITS - cnt);  | 
243  | 1.73k  |       }  | 
244  | 3.46k  |         }  | 
245  | 270  |       else  | 
246  | 270  |         { | 
247  | 270  |     cnt = 0;  | 
248  | 270  |     d2p = (mp_ptr) dp + in;  | 
249  |  |  | 
250  | 270  |     n2p = TMP_ALLOC_LIMBS (2 * qn + 1);  | 
251  | 270  |     MPN_COPY (n2p, np + nn - 2 * qn, 2 * qn);  | 
252  | 270  |     if (adjust)  | 
253  | 10  |       { | 
254  | 10  |         n2p[2 * qn] = 0;  | 
255  | 10  |         n2p++;  | 
256  | 10  |       }  | 
257  | 270  |         }  | 
258  |  |  | 
259  |  |       /* Get an approximate quotient using the extracted operands.  */  | 
260  | 3.73k  |       if (qn == 1)  | 
261  | 345  |         { | 
262  | 345  |     mp_limb_t q0, r0;  | 
263  | 345  |     udiv_qrnnd (q0, r0, n2p[1], n2p[0] << GMP_NAIL_BITS, d2p[0] << GMP_NAIL_BITS);  | 
264  | 345  |     n2p[0] = r0 >> GMP_NAIL_BITS;  | 
265  | 345  |     qp[0] = q0;  | 
266  | 345  |         }  | 
267  | 3.38k  |       else if (qn == 2)  | 
268  | 1.50k  |         mpn_divrem_2 (qp, 0L, n2p, 4L, d2p); /* FIXME: obsolete function */  | 
269  | 1.88k  |       else  | 
270  | 1.88k  |         { | 
271  | 1.88k  |     invert_pi1 (dinv, d2p[qn - 1], d2p[qn - 2]);  | 
272  | 1.88k  |     if (BELOW_THRESHOLD (qn, DC_DIV_QR_THRESHOLD))  | 
273  | 1.74k  |       mpn_sbpi1_div_qr (qp, n2p, 2 * qn, d2p, qn, dinv.inv32);  | 
274  | 141  |     else if (BELOW_THRESHOLD (qn, MU_DIV_QR_THRESHOLD))  | 
275  | 141  |       mpn_dcpi1_div_qr (qp, n2p, 2 * qn, d2p, qn, &dinv);  | 
276  | 0  |     else  | 
277  | 0  |       { | 
278  | 0  |         mp_size_t itch = mpn_mu_div_qr_itch (2 * qn, qn, 0);  | 
279  | 0  |         mp_ptr scratch = TMP_ALLOC_LIMBS (itch);  | 
280  | 0  |         mp_ptr r2p = rp;  | 
281  | 0  |         if (np == r2p) /* If N and R share space, put ... */  | 
282  | 0  |           r2p += nn - qn; /* intermediate remainder at N's upper end. */  | 
283  | 0  |         mpn_mu_div_qr (qp, r2p, n2p, 2 * qn, d2p, qn, scratch);  | 
284  | 0  |         MPN_COPY (n2p, r2p, qn);  | 
285  | 0  |       }  | 
286  | 1.88k  |         }  | 
287  |  |  | 
288  | 3.73k  |       rn = qn;  | 
289  |  |       /* Multiply the first ignored divisor limb by the most significant  | 
290  |  |          quotient limb.  If that product is > the partial remainder's  | 
291  |  |          most significant limb, we know the quotient is too large.  This  | 
292  |  |          test quickly catches most cases where the quotient is too large;  | 
293  |  |          it catches all cases where the quotient is 2 too large.  */  | 
294  | 3.73k  |       { | 
295  | 3.73k  |         mp_limb_t dl, x;  | 
296  | 3.73k  |         mp_limb_t h, dummy;  | 
297  |  |  | 
298  | 3.73k  |         if (in - 2 < 0)  | 
299  | 295  |     dl = 0;  | 
300  | 3.43k  |         else  | 
301  | 3.43k  |     dl = dp[in - 2];  | 
302  |  |  | 
303  | 3.73k  | #if GMP_NAIL_BITS == 0  | 
304  | 3.73k  |         x = (dp[in - 1] << cnt) | ((dl >> 1) >> ((~cnt) % GMP_LIMB_BITS));  | 
305  |  | #else  | 
306  |  |         x = (dp[in - 1] << cnt) & GMP_NUMB_MASK;  | 
307  |  |         if (cnt != 0)  | 
308  |  |     x |= dl >> (GMP_NUMB_BITS - cnt);  | 
309  |  | #endif  | 
310  | 3.73k  |         umul_ppmm (h, dummy, x, qp[qn - 1] << GMP_NAIL_BITS);  | 
311  |  |  | 
312  | 3.73k  |         if (n2p[qn - 1] < h)  | 
313  | 164  |     { | 
314  | 164  |       mp_limb_t cy;  | 
315  |  |  | 
316  | 164  |       mpn_decr_u (qp, (mp_limb_t) 1);  | 
317  | 164  |       cy = mpn_add_n (n2p, n2p, d2p, qn);  | 
318  | 164  |       if (cy)  | 
319  | 41  |         { | 
320  |  |           /* The partial remainder is safely large.  */  | 
321  | 41  |           n2p[qn] = cy;  | 
322  | 41  |           ++rn;  | 
323  | 41  |         }  | 
324  | 164  |     }  | 
325  | 3.73k  |       }  | 
326  |  |  | 
327  | 3.73k  |       quotient_too_large = 0;  | 
328  | 3.73k  |       if (cnt != 0)  | 
329  | 3.46k  |         { | 
330  | 3.46k  |     mp_limb_t cy1, cy2;  | 
331  |  |  | 
332  |  |     /* Append partially used numerator limb to partial remainder.  */  | 
333  | 3.46k  |     cy1 = mpn_lshift (n2p, n2p, rn, GMP_NUMB_BITS - cnt);  | 
334  | 3.46k  |     n2p[0] |= np[in - 1] & (GMP_NUMB_MASK >> cnt);  | 
335  |  |  | 
336  |  |     /* Update partial remainder with partially used divisor limb.  */  | 
337  | 3.46k  |     cy2 = mpn_submul_1 (n2p, qp, qn, dp[in - 1] & (GMP_NUMB_MASK >> cnt));  | 
338  | 3.46k  |     if (qn != rn)  | 
339  | 34  |       { | 
340  | 34  |         ASSERT_ALWAYS (n2p[qn] >= cy2);  | 
341  | 34  |         n2p[qn] -= cy2;  | 
342  | 34  |       }  | 
343  | 3.42k  |     else  | 
344  | 3.42k  |       { | 
345  | 3.42k  |         n2p[qn] = cy1 - cy2; /* & GMP_NUMB_MASK; */  | 
346  |  |  | 
347  | 3.42k  |         quotient_too_large = (cy1 < cy2);  | 
348  | 3.42k  |         ++rn;  | 
349  | 3.42k  |       }  | 
350  | 3.46k  |     --in;  | 
351  | 3.46k  |         }  | 
352  |  |       /* True: partial remainder now is neutral, i.e., it is not shifted up.  */  | 
353  |  |  | 
354  | 3.73k  |       tp = TMP_ALLOC_LIMBS (dn);  | 
355  |  |  | 
356  | 3.73k  |       if (in < qn)  | 
357  | 612  |         { | 
358  | 612  |     if (in == 0)  | 
359  | 270  |       { | 
360  | 270  |         MPN_COPY (rp, n2p, rn);  | 
361  | 270  |         ASSERT_ALWAYS (rn == dn);  | 
362  | 270  |         goto foo;  | 
363  | 270  |       }  | 
364  | 342  |     mpn_mul (tp, qp, qn, dp, in);  | 
365  | 342  |         }  | 
366  | 3.12k  |       else  | 
367  | 3.12k  |         mpn_mul (tp, dp, in, qp, qn);  | 
368  |  |  | 
369  | 3.46k  |       cy = mpn_sub (n2p, n2p, rn, tp + in, qn);  | 
370  | 3.46k  |       MPN_COPY (rp + in, n2p, dn - in);  | 
371  | 3.46k  |       quotient_too_large |= cy;  | 
372  | 3.46k  |       cy = mpn_sub_n (rp, np, tp, in);  | 
373  | 3.46k  |       cy = mpn_sub_1 (rp + in, rp + in, rn, cy);  | 
374  | 3.46k  |       quotient_too_large |= cy;  | 
375  | 3.73k  |     foo:  | 
376  | 3.73k  |       if (quotient_too_large)  | 
377  | 211  |         { | 
378  | 211  |     mpn_decr_u (qp, (mp_limb_t) 1);  | 
379  | 211  |     mpn_add_n (rp, rp, dp, dn);  | 
380  | 211  |         }  | 
381  | 3.73k  |     }  | 
382  | 3.73k  |   TMP_FREE;  | 
383  | 3.73k  |   return;  | 
384  | 3.46k  |       }  | 
385  | 16.2k  |     }  | 
386  | 16.2k  | }  |