/src/gmp-6.2.1/mpn/mu_div_q.c
Line  | Count  | Source (jump to first uncovered line)  | 
1  |  | /* mpn_mu_div_q.  | 
2  |  |  | 
3  |  |    Contributed to the GNU project by Torbjorn Granlund and Marco Bodrato.  | 
4  |  |  | 
5  |  |    THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY  | 
6  |  |    SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST  | 
7  |  |    GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.  | 
8  |  |  | 
9  |  | Copyright 2005-2007, 2009, 2010, 2013 Free Software Foundation, Inc.  | 
10  |  |  | 
11  |  | This file is part of the GNU MP Library.  | 
12  |  |  | 
13  |  | The GNU MP Library is free software; you can redistribute it and/or modify  | 
14  |  | it under the terms of either:  | 
15  |  |  | 
16  |  |   * the GNU Lesser General Public License as published by the Free  | 
17  |  |     Software Foundation; either version 3 of the License, or (at your  | 
18  |  |     option) any later version.  | 
19  |  |  | 
20  |  | or  | 
21  |  |  | 
22  |  |   * the GNU General Public License as published by the Free Software  | 
23  |  |     Foundation; either version 2 of the License, or (at your option) any  | 
24  |  |     later version.  | 
25  |  |  | 
26  |  | or both in parallel, as here.  | 
27  |  |  | 
28  |  | The GNU MP Library is distributed in the hope that it will be useful, but  | 
29  |  | WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY  | 
30  |  | or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License  | 
31  |  | for more details.  | 
32  |  |  | 
33  |  | You should have received copies of the GNU General Public License and the  | 
34  |  | GNU Lesser General Public License along with the GNU MP Library.  If not,  | 
35  |  | see https://www.gnu.org/licenses/.  */  | 
36  |  |  | 
37  |  |  | 
38  |  | /*  | 
39  |  |    The idea of the algorithm used herein is to compute a smaller inverted value  | 
40  |  |    than used in the standard Barrett algorithm, and thus save time in the  | 
41  |  |    Newton iterations, and pay just a small price when using the inverted value  | 
42  |  |    for developing quotient bits.  This algorithm was presented at ICMS 2006.  | 
43  |  | */  | 
44  |  |  | 
45  |  | /*  | 
46  |  |   Things to work on:  | 
47  |  |  | 
48  |  |   1. This is a rudimentary implementation of mpn_mu_div_q.  The algorithm is  | 
49  |  |      probably close to optimal, except when mpn_mu_divappr_q fails.  | 
50  |  |  | 
51  |  |   2. We used to fall back to mpn_mu_div_qr when we detect a possible  | 
52  |  |      mpn_mu_divappr_q rounding problem, now we multiply and compare.  | 
53  |  |      Unfortunately, since mpn_mu_divappr_q does not return the partial  | 
54  |  |      remainder, this also doesn't become optimal.  A mpn_mu_divappr_qr could  | 
55  |  |      solve that.  | 
56  |  |  | 
57  |  |   3. The allocations done here should be made from the scratch area, which  | 
58  |  |      then would need to be amended.  | 
59  |  | */  | 
60  |  |  | 
61  |  | #include <stdlib.h>   /* for NULL */  | 
62  |  | #include "gmp-impl.h"  | 
63  |  |  | 
64  |  |  | 
65  |  | mp_limb_t  | 
66  |  | mpn_mu_div_q (mp_ptr qp,  | 
67  |  |         mp_srcptr np, mp_size_t nn,  | 
68  |  |         mp_srcptr dp, mp_size_t dn,  | 
69  |  |         mp_ptr scratch)  | 
70  | 0  | { | 
71  | 0  |   mp_ptr tp, rp;  | 
72  | 0  |   mp_size_t qn;  | 
73  | 0  |   mp_limb_t cy, qh;  | 
74  | 0  |   TMP_DECL;  | 
75  |  | 
  | 
76  | 0  |   TMP_MARK;  | 
77  |  | 
  | 
78  | 0  |   qn = nn - dn;  | 
79  |  | 
  | 
80  | 0  |   tp = TMP_BALLOC_LIMBS (qn + 1);  | 
81  |  | 
  | 
82  | 0  |   if (qn >= dn)     /* nn >= 2*dn + 1 */  | 
83  | 0  |     { | 
84  |  |        /* |_______________________|   dividend  | 
85  |  |        |________|   divisor  */  | 
86  |  | 
  | 
87  | 0  |       rp = TMP_BALLOC_LIMBS (nn + 1);  | 
88  | 0  |       MPN_COPY (rp + 1, np, nn);  | 
89  | 0  |       rp[0] = 0;  | 
90  |  | 
  | 
91  | 0  |       qh = mpn_cmp (rp + 1 + nn - dn, dp, dn) >= 0;  | 
92  | 0  |       if (qh != 0)  | 
93  | 0  |   mpn_sub_n (rp + 1 + nn - dn, rp + 1 + nn - dn, dp, dn);  | 
94  |  | 
  | 
95  | 0  |       cy = mpn_mu_divappr_q (tp, rp, nn + 1, dp, dn, scratch);  | 
96  |  | 
  | 
97  | 0  |       if (UNLIKELY (cy != 0))  | 
98  | 0  |   { | 
99  |  |     /* Since the partial remainder fed to mpn_preinv_mu_divappr_q was  | 
100  |  |        canonically reduced, replace the returned value of B^(qn-dn)+eps  | 
101  |  |        by the largest possible value.  */  | 
102  | 0  |     mp_size_t i;  | 
103  | 0  |     for (i = 0; i < qn + 1; i++)  | 
104  | 0  |       tp[i] = GMP_NUMB_MAX;  | 
105  | 0  |   }  | 
106  |  |  | 
107  |  |       /* The max error of mpn_mu_divappr_q is +4.  If the low quotient limb is  | 
108  |  |    smaller than the max error, we cannot trust the quotient.  */  | 
109  | 0  |       if (tp[0] > 4)  | 
110  | 0  |   { | 
111  | 0  |     MPN_COPY (qp, tp + 1, qn);  | 
112  | 0  |   }  | 
113  | 0  |       else  | 
114  | 0  |   { | 
115  | 0  |     mp_limb_t cy;  | 
116  | 0  |     mp_ptr pp;  | 
117  |  | 
  | 
118  | 0  |     pp = rp;  | 
119  | 0  |     mpn_mul (pp, tp + 1, qn, dp, dn);  | 
120  |  | 
  | 
121  | 0  |     cy = (qh != 0) ? mpn_add_n (pp + qn, pp + qn, dp, dn) : 0;  | 
122  |  | 
  | 
123  | 0  |     if (cy || mpn_cmp (pp, np, nn) > 0) /* At most is wrong by one, no cycle. */  | 
124  | 0  |       qh -= mpn_sub_1 (qp, tp + 1, qn, 1);  | 
125  | 0  |     else /* Same as above */  | 
126  | 0  |       MPN_COPY (qp, tp + 1, qn);  | 
127  | 0  |   }  | 
128  | 0  |     }  | 
129  | 0  |   else  | 
130  | 0  |     { | 
131  |  |        /* |_______________________|   dividend  | 
132  |  |      |________________|   divisor  */  | 
133  |  |  | 
134  |  |       /* FIXME: When nn = 2dn-1, qn becomes dn-1, and the numerator size passed  | 
135  |  |    here becomes 2dn, i.e., more than nn.  This shouldn't hurt, since only  | 
136  |  |    the most significant dn-1 limbs will actually be read, but it is not  | 
137  |  |    pretty.  */  | 
138  |  | 
  | 
139  | 0  |       qh = mpn_mu_divappr_q (tp, np + nn - (2 * qn + 2), 2 * qn + 2,  | 
140  | 0  |            dp + dn - (qn + 1), qn + 1, scratch);  | 
141  |  |  | 
142  |  |       /* The max error of mpn_mu_divappr_q is +4, but we get an additional  | 
143  |  |          error from the divisor truncation.  */  | 
144  | 0  |       if (tp[0] > 6)  | 
145  | 0  |   { | 
146  | 0  |     MPN_COPY (qp, tp + 1, qn);  | 
147  | 0  |   }  | 
148  | 0  |       else  | 
149  | 0  |   { | 
150  | 0  |     mp_limb_t cy;  | 
151  |  |  | 
152  |  |     /* FIXME: a shorter product should be enough; we may use already  | 
153  |  |        allocated space... */  | 
154  | 0  |     rp = TMP_BALLOC_LIMBS (nn);  | 
155  | 0  |     mpn_mul (rp, dp, dn, tp + 1, qn);  | 
156  |  | 
  | 
157  | 0  |     cy = (qh != 0) ? mpn_add_n (rp + qn, rp + qn, dp, dn) : 0;  | 
158  |  | 
  | 
159  | 0  |     if (cy || mpn_cmp (rp, np, nn) > 0) /* At most is wrong by one, no cycle. */  | 
160  | 0  |       qh -= mpn_sub_1 (qp, tp + 1, qn, 1);  | 
161  | 0  |     else /* Same as above */  | 
162  | 0  |       MPN_COPY (qp, tp + 1, qn);  | 
163  | 0  |   }  | 
164  | 0  |     }  | 
165  |  |  | 
166  | 0  |   TMP_FREE;  | 
167  | 0  |   return qh;  | 
168  | 0  | }  | 
169  |  |  | 
170  |  | mp_size_t  | 
171  |  | mpn_mu_div_q_itch (mp_size_t nn, mp_size_t dn, int mua_k)  | 
172  | 0  | { | 
173  | 0  |   mp_size_t qn;  | 
174  |  | 
  | 
175  | 0  |   qn = nn - dn;  | 
176  | 0  |   if (qn >= dn)  | 
177  | 0  |     { | 
178  | 0  |       return mpn_mu_divappr_q_itch (nn + 1, dn, mua_k);  | 
179  | 0  |     }  | 
180  | 0  |   else  | 
181  | 0  |     { | 
182  | 0  |       return mpn_mu_divappr_q_itch (2 * qn + 2, qn + 1, mua_k);  | 
183  | 0  |     }  | 
184  | 0  | }  |