/src/gmp-6.2.1/mpn/mu_div_q.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* mpn_mu_div_q. |
2 | | |
3 | | Contributed to the GNU project by Torbjorn Granlund and Marco Bodrato. |
4 | | |
5 | | THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES. IT IS ONLY |
6 | | SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST |
7 | | GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE. |
8 | | |
9 | | Copyright 2005-2007, 2009, 2010, 2013 Free Software Foundation, Inc. |
10 | | |
11 | | This file is part of the GNU MP Library. |
12 | | |
13 | | The GNU MP Library is free software; you can redistribute it and/or modify |
14 | | it under the terms of either: |
15 | | |
16 | | * the GNU Lesser General Public License as published by the Free |
17 | | Software Foundation; either version 3 of the License, or (at your |
18 | | option) any later version. |
19 | | |
20 | | or |
21 | | |
22 | | * the GNU General Public License as published by the Free Software |
23 | | Foundation; either version 2 of the License, or (at your option) any |
24 | | later version. |
25 | | |
26 | | or both in parallel, as here. |
27 | | |
28 | | The GNU MP Library is distributed in the hope that it will be useful, but |
29 | | WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY |
30 | | or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
31 | | for more details. |
32 | | |
33 | | You should have received copies of the GNU General Public License and the |
34 | | GNU Lesser General Public License along with the GNU MP Library. If not, |
35 | | see https://www.gnu.org/licenses/. */ |
36 | | |
37 | | |
38 | | /* |
39 | | The idea of the algorithm used herein is to compute a smaller inverted value |
40 | | than used in the standard Barrett algorithm, and thus save time in the |
41 | | Newton iterations, and pay just a small price when using the inverted value |
42 | | for developing quotient bits. This algorithm was presented at ICMS 2006. |
43 | | */ |
44 | | |
45 | | /* |
46 | | Things to work on: |
47 | | |
48 | | 1. This is a rudimentary implementation of mpn_mu_div_q. The algorithm is |
49 | | probably close to optimal, except when mpn_mu_divappr_q fails. |
50 | | |
51 | | 2. We used to fall back to mpn_mu_div_qr when we detect a possible |
52 | | mpn_mu_divappr_q rounding problem, now we multiply and compare. |
53 | | Unfortunately, since mpn_mu_divappr_q does not return the partial |
54 | | remainder, this also doesn't become optimal. A mpn_mu_divappr_qr could |
55 | | solve that. |
56 | | |
57 | | 3. The allocations done here should be made from the scratch area, which |
58 | | then would need to be amended. |
59 | | */ |
60 | | |
61 | | #include <stdlib.h> /* for NULL */ |
62 | | #include "gmp-impl.h" |
63 | | |
64 | | |
65 | | mp_limb_t |
66 | | mpn_mu_div_q (mp_ptr qp, |
67 | | mp_srcptr np, mp_size_t nn, |
68 | | mp_srcptr dp, mp_size_t dn, |
69 | | mp_ptr scratch) |
70 | 0 | { |
71 | 0 | mp_ptr tp, rp; |
72 | 0 | mp_size_t qn; |
73 | 0 | mp_limb_t cy, qh; |
74 | 0 | TMP_DECL; |
75 | |
|
76 | 0 | TMP_MARK; |
77 | |
|
78 | 0 | qn = nn - dn; |
79 | |
|
80 | 0 | tp = TMP_BALLOC_LIMBS (qn + 1); |
81 | |
|
82 | 0 | if (qn >= dn) /* nn >= 2*dn + 1 */ |
83 | 0 | { |
84 | | /* |_______________________| dividend |
85 | | |________| divisor */ |
86 | |
|
87 | 0 | rp = TMP_BALLOC_LIMBS (nn + 1); |
88 | 0 | MPN_COPY (rp + 1, np, nn); |
89 | 0 | rp[0] = 0; |
90 | |
|
91 | 0 | qh = mpn_cmp (rp + 1 + nn - dn, dp, dn) >= 0; |
92 | 0 | if (qh != 0) |
93 | 0 | mpn_sub_n (rp + 1 + nn - dn, rp + 1 + nn - dn, dp, dn); |
94 | |
|
95 | 0 | cy = mpn_mu_divappr_q (tp, rp, nn + 1, dp, dn, scratch); |
96 | |
|
97 | 0 | if (UNLIKELY (cy != 0)) |
98 | 0 | { |
99 | | /* Since the partial remainder fed to mpn_preinv_mu_divappr_q was |
100 | | canonically reduced, replace the returned value of B^(qn-dn)+eps |
101 | | by the largest possible value. */ |
102 | 0 | mp_size_t i; |
103 | 0 | for (i = 0; i < qn + 1; i++) |
104 | 0 | tp[i] = GMP_NUMB_MAX; |
105 | 0 | } |
106 | | |
107 | | /* The max error of mpn_mu_divappr_q is +4. If the low quotient limb is |
108 | | smaller than the max error, we cannot trust the quotient. */ |
109 | 0 | if (tp[0] > 4) |
110 | 0 | { |
111 | 0 | MPN_COPY (qp, tp + 1, qn); |
112 | 0 | } |
113 | 0 | else |
114 | 0 | { |
115 | 0 | mp_limb_t cy; |
116 | 0 | mp_ptr pp; |
117 | |
|
118 | 0 | pp = rp; |
119 | 0 | mpn_mul (pp, tp + 1, qn, dp, dn); |
120 | |
|
121 | 0 | cy = (qh != 0) ? mpn_add_n (pp + qn, pp + qn, dp, dn) : 0; |
122 | |
|
123 | 0 | if (cy || mpn_cmp (pp, np, nn) > 0) /* At most is wrong by one, no cycle. */ |
124 | 0 | qh -= mpn_sub_1 (qp, tp + 1, qn, 1); |
125 | 0 | else /* Same as above */ |
126 | 0 | MPN_COPY (qp, tp + 1, qn); |
127 | 0 | } |
128 | 0 | } |
129 | 0 | else |
130 | 0 | { |
131 | | /* |_______________________| dividend |
132 | | |________________| divisor */ |
133 | | |
134 | | /* FIXME: When nn = 2dn-1, qn becomes dn-1, and the numerator size passed |
135 | | here becomes 2dn, i.e., more than nn. This shouldn't hurt, since only |
136 | | the most significant dn-1 limbs will actually be read, but it is not |
137 | | pretty. */ |
138 | |
|
139 | 0 | qh = mpn_mu_divappr_q (tp, np + nn - (2 * qn + 2), 2 * qn + 2, |
140 | 0 | dp + dn - (qn + 1), qn + 1, scratch); |
141 | | |
142 | | /* The max error of mpn_mu_divappr_q is +4, but we get an additional |
143 | | error from the divisor truncation. */ |
144 | 0 | if (tp[0] > 6) |
145 | 0 | { |
146 | 0 | MPN_COPY (qp, tp + 1, qn); |
147 | 0 | } |
148 | 0 | else |
149 | 0 | { |
150 | 0 | mp_limb_t cy; |
151 | | |
152 | | /* FIXME: a shorter product should be enough; we may use already |
153 | | allocated space... */ |
154 | 0 | rp = TMP_BALLOC_LIMBS (nn); |
155 | 0 | mpn_mul (rp, dp, dn, tp + 1, qn); |
156 | |
|
157 | 0 | cy = (qh != 0) ? mpn_add_n (rp + qn, rp + qn, dp, dn) : 0; |
158 | |
|
159 | 0 | if (cy || mpn_cmp (rp, np, nn) > 0) /* At most is wrong by one, no cycle. */ |
160 | 0 | qh -= mpn_sub_1 (qp, tp + 1, qn, 1); |
161 | 0 | else /* Same as above */ |
162 | 0 | MPN_COPY (qp, tp + 1, qn); |
163 | 0 | } |
164 | 0 | } |
165 | | |
166 | 0 | TMP_FREE; |
167 | 0 | return qh; |
168 | 0 | } |
169 | | |
170 | | mp_size_t |
171 | | mpn_mu_div_q_itch (mp_size_t nn, mp_size_t dn, int mua_k) |
172 | 0 | { |
173 | 0 | mp_size_t qn; |
174 | |
|
175 | 0 | qn = nn - dn; |
176 | 0 | if (qn >= dn) |
177 | 0 | { |
178 | 0 | return mpn_mu_divappr_q_itch (nn + 1, dn, mua_k); |
179 | 0 | } |
180 | 0 | else |
181 | 0 | { |
182 | 0 | return mpn_mu_divappr_q_itch (2 * qn + 2, qn + 1, mua_k); |
183 | 0 | } |
184 | 0 | } |