/src/gmp-6.2.1/mpn/div_q.c
Line  | Count  | Source (jump to first uncovered line)  | 
1  |  | /* mpn_div_q -- division for arbitrary size operands.  | 
2  |  |  | 
3  |  |    Contributed to the GNU project by Torbjorn Granlund.  | 
4  |  |  | 
5  |  |    THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY  | 
6  |  |    SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST  | 
7  |  |    GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.  | 
8  |  |  | 
9  |  | Copyright 2009, 2010, 2015, 2018 Free Software Foundation, Inc.  | 
10  |  |  | 
11  |  | This file is part of the GNU MP Library.  | 
12  |  |  | 
13  |  | The GNU MP Library is free software; you can redistribute it and/or modify  | 
14  |  | it under the terms of either:  | 
15  |  |  | 
16  |  |   * the GNU Lesser General Public License as published by the Free  | 
17  |  |     Software Foundation; either version 3 of the License, or (at your  | 
18  |  |     option) any later version.  | 
19  |  |  | 
20  |  | or  | 
21  |  |  | 
22  |  |   * the GNU General Public License as published by the Free Software  | 
23  |  |     Foundation; either version 2 of the License, or (at your option) any  | 
24  |  |     later version.  | 
25  |  |  | 
26  |  | or both in parallel, as here.  | 
27  |  |  | 
28  |  | The GNU MP Library is distributed in the hope that it will be useful, but  | 
29  |  | WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY  | 
30  |  | or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License  | 
31  |  | for more details.  | 
32  |  |  | 
33  |  | You should have received copies of the GNU General Public License and the  | 
34  |  | GNU Lesser General Public License along with the GNU MP Library.  If not,  | 
35  |  | see https://www.gnu.org/licenses/.  */  | 
36  |  |  | 
37  |  | #include "gmp-impl.h"  | 
38  |  | #include "longlong.h"  | 
39  |  |  | 
40  |  |  | 
41  |  | /* Compute Q = N/D with truncation.  | 
42  |  |      N = {np,nn} | 
43  |  |      D = {dp,dn} | 
44  |  |      Q = {qp,nn-dn+1} | 
45  |  |      T = {scratch,nn+1} is scratch space | 
46  |  |    N and D are both untouched by the computation.  | 
47  |  |    N and T may overlap; pass the same space if N is irrelevant after the call,  | 
48  |  |    but note that tp needs an extra limb.  | 
49  |  |  | 
50  |  |    Operand requirements:  | 
51  |  |      N >= D > 0  | 
52  |  |      dp[dn-1] != 0  | 
53  |  |      No overlap between the N, D, and Q areas.  | 
54  |  |  | 
55  |  |    This division function does not clobber its input operands, since it is  | 
56  |  |    intended to support average-O(qn) division, and for that to be effective, it  | 
57  |  |    cannot put requirements on callers to copy a O(nn) operand.  | 
58  |  |  | 
59  |  |    If a caller does not care about the value of {np,nn+1} after calling this | 
60  |  |    function, it should pass np also for the scratch argument.  This function  | 
61  |  |    will then save some time and space by avoiding allocation and copying.  | 
62  |  |    (FIXME: Is this a good design?  We only really save any copying for  | 
63  |  |    already-normalised divisors, which should be rare.  It also prevents us from  | 
64  |  |    reasonably asking for all scratch space we need.)  | 
65  |  |  | 
66  |  |    We write nn-dn+1 limbs for the quotient, but return void.  Why not return  | 
67  |  |    the most significant quotient limb?  Look at the 4 main code blocks below  | 
68  |  |    (consisting of an outer if-else where each arm contains an if-else). It is  | 
69  |  |    tricky for the first code block, since the mpn_*_div_q calls will typically  | 
70  |  |    generate all nn-dn+1 and return 0 or 1.  I don't see how to fix that unless  | 
71  |  |    we generate the most significant quotient limb here, before calling  | 
72  |  |    mpn_*_div_q, or put the quotient in a temporary area.  Since this is a  | 
73  |  |    critical division case (the SB sub-case in particular) copying is not a good  | 
74  |  |    idea.  | 
75  |  |  | 
76  |  |    It might make sense to split the if-else parts of the (qn + FUDGE  | 
77  |  |    >= dn) blocks into separate functions, since we could promise quite  | 
78  |  |    different things to callers in these two cases.  The 'then' case  | 
79  |  |    benefits from np=scratch, and it could perhaps even tolerate qp=np,  | 
80  |  |    saving some headache for many callers.  | 
81  |  |  | 
82  |  |    FIXME: Scratch allocation leaves a lot to be desired.  E.g., for the MU size  | 
83  |  |    operands, we do not reuse the huge scratch for adjustments.  This can be a  | 
84  |  |    serious waste of memory for the largest operands.  | 
85  |  | */  | 
86  |  |  | 
87  |  | /* FUDGE determines when to try getting an approximate quotient from the upper  | 
88  |  |    parts of the dividend and divisor, then adjust.  N.B. FUDGE must be >= 2  | 
89  |  |    for the code to be correct.  */  | 
90  | 0  | #define FUDGE 5      /* FIXME: tune this */  | 
91  |  |  | 
92  |  | #define DC_DIV_Q_THRESHOLD      DC_DIVAPPR_Q_THRESHOLD  | 
93  | 0  | #define MU_DIV_Q_THRESHOLD      MU_DIVAPPR_Q_THRESHOLD  | 
94  | 0  | #define MUPI_DIV_Q_THRESHOLD  MUPI_DIVAPPR_Q_THRESHOLD  | 
95  |  | #ifndef MUPI_DIVAPPR_Q_THRESHOLD  | 
96  | 0  | #define MUPI_DIVAPPR_Q_THRESHOLD  MUPI_DIV_QR_THRESHOLD  | 
97  |  | #endif  | 
98  |  |  | 
99  |  | void  | 
100  |  | mpn_div_q (mp_ptr qp,  | 
101  |  |      mp_srcptr np, mp_size_t nn,  | 
102  |  |      mp_srcptr dp, mp_size_t dn, mp_ptr scratch)  | 
103  | 0  | { | 
104  | 0  |   mp_ptr new_dp, new_np, tp, rp;  | 
105  | 0  |   mp_limb_t cy, dh, qh;  | 
106  | 0  |   mp_size_t new_nn, qn;  | 
107  | 0  |   gmp_pi1_t dinv;  | 
108  | 0  |   int cnt;  | 
109  | 0  |   TMP_DECL;  | 
110  | 0  |   TMP_MARK;  | 
111  |  | 
  | 
112  | 0  |   ASSERT (nn >= dn);  | 
113  | 0  |   ASSERT (dn > 0);  | 
114  | 0  |   ASSERT (dp[dn - 1] != 0);  | 
115  | 0  |   ASSERT (! MPN_OVERLAP_P (qp, nn - dn + 1, np, nn));  | 
116  | 0  |   ASSERT (! MPN_OVERLAP_P (qp, nn - dn + 1, dp, dn));  | 
117  | 0  |   ASSERT (MPN_SAME_OR_SEPARATE_P (np, scratch, nn));  | 
118  |  |  | 
119  | 0  |   ASSERT_ALWAYS (FUDGE >= 2);  | 
120  |  |  | 
121  | 0  |   dh = dp[dn - 1];  | 
122  | 0  |   if (dn == 1)  | 
123  | 0  |     { | 
124  | 0  |       mpn_divrem_1 (qp, 0L, np, nn, dh);  | 
125  | 0  |       return;  | 
126  | 0  |     }  | 
127  |  |  | 
128  | 0  |   qn = nn - dn + 1;   /* Quotient size, high limb might be zero */  | 
129  |  | 
  | 
130  | 0  |   if (qn + FUDGE >= dn)  | 
131  | 0  |     { | 
132  |  |       /* |________________________|  | 
133  |  |                           |_______|  */  | 
134  | 0  |       new_np = scratch;  | 
135  |  | 
  | 
136  | 0  |       if (LIKELY ((dh & GMP_NUMB_HIGHBIT) == 0))  | 
137  | 0  |   { | 
138  | 0  |     count_leading_zeros (cnt, dh);  | 
139  |  |  | 
140  | 0  |     cy = mpn_lshift (new_np, np, nn, cnt);  | 
141  | 0  |     new_np[nn] = cy;  | 
142  | 0  |     new_nn = nn + (cy != 0);  | 
143  |  | 
  | 
144  | 0  |     new_dp = TMP_ALLOC_LIMBS (dn);  | 
145  | 0  |     mpn_lshift (new_dp, dp, dn, cnt);  | 
146  |  | 
  | 
147  | 0  |     if (dn == 2)  | 
148  | 0  |       { | 
149  | 0  |         qh = mpn_divrem_2 (qp, 0L, new_np, new_nn, new_dp);  | 
150  | 0  |       }  | 
151  | 0  |     else if (BELOW_THRESHOLD (dn, DC_DIV_Q_THRESHOLD) ||  | 
152  | 0  |        BELOW_THRESHOLD (new_nn - dn, DC_DIV_Q_THRESHOLD))  | 
153  | 0  |       { | 
154  | 0  |         invert_pi1 (dinv, new_dp[dn - 1], new_dp[dn - 2]);  | 
155  | 0  |         qh = mpn_sbpi1_div_q (qp, new_np, new_nn, new_dp, dn, dinv.inv32);  | 
156  | 0  |       }  | 
157  | 0  |     else if (BELOW_THRESHOLD (dn, MUPI_DIV_Q_THRESHOLD) ||   /* fast condition */  | 
158  | 0  |        BELOW_THRESHOLD (nn, 2 * MU_DIV_Q_THRESHOLD) || /* fast condition */  | 
159  | 0  |        (double) (2 * (MU_DIV_Q_THRESHOLD - MUPI_DIV_Q_THRESHOLD)) * dn /* slow... */  | 
160  | 0  |        + (double) MUPI_DIV_Q_THRESHOLD * nn > (double) dn * nn)   /* ...condition */  | 
161  | 0  |       { | 
162  | 0  |         invert_pi1 (dinv, new_dp[dn - 1], new_dp[dn - 2]);  | 
163  | 0  |         qh = mpn_dcpi1_div_q (qp, new_np, new_nn, new_dp, dn, &dinv);  | 
164  | 0  |       }  | 
165  | 0  |     else  | 
166  | 0  |       { | 
167  | 0  |         mp_size_t itch = mpn_mu_div_q_itch (new_nn, dn, 0);  | 
168  | 0  |         mp_ptr scratch = TMP_ALLOC_LIMBS (itch);  | 
169  | 0  |         qh = mpn_mu_div_q (qp, new_np, new_nn, new_dp, dn, scratch);  | 
170  | 0  |       }  | 
171  | 0  |     if (cy == 0)  | 
172  | 0  |       qp[qn - 1] = qh;  | 
173  | 0  |     else  | 
174  | 0  |       ASSERT (qh == 0);  | 
175  | 0  |   }  | 
176  | 0  |       else  /* divisor is already normalised */  | 
177  | 0  |   { | 
178  | 0  |     if (new_np != np)  | 
179  | 0  |       MPN_COPY (new_np, np, nn);  | 
180  |  |  | 
181  | 0  |     if (dn == 2)  | 
182  | 0  |       { | 
183  | 0  |         qh = mpn_divrem_2 (qp, 0L, new_np, nn, dp);  | 
184  | 0  |       }  | 
185  | 0  |     else if (BELOW_THRESHOLD (dn, DC_DIV_Q_THRESHOLD) ||  | 
186  | 0  |        BELOW_THRESHOLD (nn - dn, DC_DIV_Q_THRESHOLD))  | 
187  | 0  |       { | 
188  | 0  |         invert_pi1 (dinv, dh, dp[dn - 2]);  | 
189  | 0  |         qh = mpn_sbpi1_div_q (qp, new_np, nn, dp, dn, dinv.inv32);  | 
190  | 0  |       }  | 
191  | 0  |     else if (BELOW_THRESHOLD (dn, MUPI_DIV_Q_THRESHOLD) ||   /* fast condition */  | 
192  | 0  |        BELOW_THRESHOLD (nn, 2 * MU_DIV_Q_THRESHOLD) || /* fast condition */  | 
193  | 0  |        (double) (2 * (MU_DIV_Q_THRESHOLD - MUPI_DIV_Q_THRESHOLD)) * dn /* slow... */  | 
194  | 0  |        + (double) MUPI_DIV_Q_THRESHOLD * nn > (double) dn * nn)   /* ...condition */  | 
195  | 0  |       { | 
196  | 0  |         invert_pi1 (dinv, dh, dp[dn - 2]);  | 
197  | 0  |         qh = mpn_dcpi1_div_q (qp, new_np, nn, dp, dn, &dinv);  | 
198  | 0  |       }  | 
199  | 0  |     else  | 
200  | 0  |       { | 
201  | 0  |         mp_size_t itch = mpn_mu_div_q_itch (nn, dn, 0);  | 
202  | 0  |         mp_ptr scratch = TMP_ALLOC_LIMBS (itch);  | 
203  | 0  |         qh = mpn_mu_div_q (qp, np, nn, dp, dn, scratch);  | 
204  | 0  |       }  | 
205  | 0  |     qp[nn - dn] = qh;  | 
206  | 0  |   }  | 
207  | 0  |     }  | 
208  | 0  |   else  | 
209  | 0  |     { | 
210  |  |       /* |________________________|  | 
211  |  |                 |_________________|  */  | 
212  | 0  |       tp = TMP_ALLOC_LIMBS (qn + 1);  | 
213  |  | 
  | 
214  | 0  |       new_np = scratch;  | 
215  | 0  |       new_nn = 2 * qn + 1;  | 
216  | 0  |       if (new_np == np)  | 
217  |  |   /* We need {np,nn} to remain untouched until the final adjustment, so | 
218  |  |      we need to allocate separate space for new_np.  */  | 
219  | 0  |   new_np = TMP_ALLOC_LIMBS (new_nn + 1);  | 
220  |  |  | 
221  |  | 
  | 
222  | 0  |       if (LIKELY ((dh & GMP_NUMB_HIGHBIT) == 0))  | 
223  | 0  |   { | 
224  | 0  |     count_leading_zeros (cnt, dh);  | 
225  |  |  | 
226  | 0  |     cy = mpn_lshift (new_np, np + nn - new_nn, new_nn, cnt);  | 
227  | 0  |     new_np[new_nn] = cy;  | 
228  |  | 
  | 
229  | 0  |     new_nn += (cy != 0);  | 
230  |  | 
  | 
231  | 0  |     new_dp = TMP_ALLOC_LIMBS (qn + 1);  | 
232  | 0  |     mpn_lshift (new_dp, dp + dn - (qn + 1), qn + 1, cnt);  | 
233  | 0  |     new_dp[0] |= dp[dn - (qn + 1) - 1] >> (GMP_NUMB_BITS - cnt);  | 
234  |  | 
  | 
235  | 0  |     if (qn + 1 == 2)  | 
236  | 0  |       { | 
237  | 0  |         qh = mpn_divrem_2 (tp, 0L, new_np, new_nn, new_dp);  | 
238  | 0  |       }  | 
239  | 0  |     else if (BELOW_THRESHOLD (qn, DC_DIVAPPR_Q_THRESHOLD - 1))  | 
240  | 0  |       { | 
241  | 0  |         invert_pi1 (dinv, new_dp[qn], new_dp[qn - 1]);  | 
242  | 0  |         qh = mpn_sbpi1_divappr_q (tp, new_np, new_nn, new_dp, qn + 1, dinv.inv32);  | 
243  | 0  |       }  | 
244  | 0  |     else if (BELOW_THRESHOLD (qn, MU_DIVAPPR_Q_THRESHOLD - 1))  | 
245  | 0  |       { | 
246  | 0  |         invert_pi1 (dinv, new_dp[qn], new_dp[qn - 1]);  | 
247  | 0  |         qh = mpn_dcpi1_divappr_q (tp, new_np, new_nn, new_dp, qn + 1, &dinv);  | 
248  | 0  |       }  | 
249  | 0  |     else  | 
250  | 0  |       { | 
251  | 0  |         mp_size_t itch = mpn_mu_divappr_q_itch (new_nn, qn + 1, 0);  | 
252  | 0  |         mp_ptr scratch = TMP_ALLOC_LIMBS (itch);  | 
253  | 0  |         qh = mpn_mu_divappr_q (tp, new_np, new_nn, new_dp, qn + 1, scratch);  | 
254  | 0  |       }  | 
255  | 0  |     if (cy == 0)  | 
256  | 0  |       tp[qn] = qh;  | 
257  | 0  |     else if (UNLIKELY (qh != 0))  | 
258  | 0  |       { | 
259  |  |         /* This happens only when the quotient is close to B^n and  | 
260  |  |      mpn_*_divappr_q returned B^n.  */  | 
261  | 0  |         mp_size_t i, n;  | 
262  | 0  |         n = new_nn - (qn + 1);  | 
263  | 0  |         for (i = 0; i < n; i++)  | 
264  | 0  |     tp[i] = GMP_NUMB_MAX;  | 
265  | 0  |         qh = 0;   /* currently ignored */  | 
266  | 0  |       }  | 
267  | 0  |   }  | 
268  | 0  |       else  /* divisor is already normalised */  | 
269  | 0  |   { | 
270  | 0  |     MPN_COPY (new_np, np + nn - new_nn, new_nn); /* pointless if MU will be used */  | 
271  |  |  | 
272  | 0  |     new_dp = (mp_ptr) dp + dn - (qn + 1);  | 
273  |  | 
  | 
274  | 0  |     if (qn == 2 - 1)  | 
275  | 0  |       { | 
276  | 0  |         qh = mpn_divrem_2 (tp, 0L, new_np, new_nn, new_dp);  | 
277  | 0  |       }  | 
278  | 0  |     else if (BELOW_THRESHOLD (qn, DC_DIVAPPR_Q_THRESHOLD - 1))  | 
279  | 0  |       { | 
280  | 0  |         invert_pi1 (dinv, dh, new_dp[qn - 1]);  | 
281  | 0  |         qh = mpn_sbpi1_divappr_q (tp, new_np, new_nn, new_dp, qn + 1, dinv.inv32);  | 
282  | 0  |       }  | 
283  | 0  |     else if (BELOW_THRESHOLD (qn, MU_DIVAPPR_Q_THRESHOLD - 1))  | 
284  | 0  |       { | 
285  | 0  |         invert_pi1 (dinv, dh, new_dp[qn - 1]);  | 
286  | 0  |         qh = mpn_dcpi1_divappr_q (tp, new_np, new_nn, new_dp, qn + 1, &dinv);  | 
287  | 0  |       }  | 
288  | 0  |     else  | 
289  | 0  |       { | 
290  | 0  |         mp_size_t itch = mpn_mu_divappr_q_itch (new_nn, qn + 1, 0);  | 
291  | 0  |         mp_ptr scratch = TMP_ALLOC_LIMBS (itch);  | 
292  | 0  |         qh = mpn_mu_divappr_q (tp, new_np, new_nn, new_dp, qn + 1, scratch);  | 
293  | 0  |       }  | 
294  | 0  |     tp[qn] = qh;  | 
295  | 0  |   }  | 
296  |  |  | 
297  | 0  |       MPN_COPY (qp, tp + 1, qn);  | 
298  | 0  |       if (tp[0] <= 4)  | 
299  | 0  |         { | 
300  | 0  |     mp_size_t rn;  | 
301  |  | 
  | 
302  | 0  |           rp = TMP_ALLOC_LIMBS (dn + qn);  | 
303  | 0  |           mpn_mul (rp, dp, dn, tp + 1, qn);  | 
304  | 0  |     rn = dn + qn;  | 
305  | 0  |     rn -= rp[rn - 1] == 0;  | 
306  |  | 
  | 
307  | 0  |           if (rn > nn || mpn_cmp (np, rp, nn) < 0)  | 
308  | 0  |             MPN_DECR_U (qp, qn, 1);  | 
309  | 0  |         }  | 
310  | 0  |     }  | 
311  |  |  | 
312  | 0  |   TMP_FREE;  | 
313  | 0  | }  |