/src/gmp-6.2.1/mpn/div_q.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* mpn_div_q -- division for arbitrary size operands. |
2 | | |
3 | | Contributed to the GNU project by Torbjorn Granlund. |
4 | | |
5 | | THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE. IT IS ONLY |
6 | | SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST |
7 | | GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE. |
8 | | |
9 | | Copyright 2009, 2010, 2015, 2018 Free Software Foundation, Inc. |
10 | | |
11 | | This file is part of the GNU MP Library. |
12 | | |
13 | | The GNU MP Library is free software; you can redistribute it and/or modify |
14 | | it under the terms of either: |
15 | | |
16 | | * the GNU Lesser General Public License as published by the Free |
17 | | Software Foundation; either version 3 of the License, or (at your |
18 | | option) any later version. |
19 | | |
20 | | or |
21 | | |
22 | | * the GNU General Public License as published by the Free Software |
23 | | Foundation; either version 2 of the License, or (at your option) any |
24 | | later version. |
25 | | |
26 | | or both in parallel, as here. |
27 | | |
28 | | The GNU MP Library is distributed in the hope that it will be useful, but |
29 | | WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY |
30 | | or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
31 | | for more details. |
32 | | |
33 | | You should have received copies of the GNU General Public License and the |
34 | | GNU Lesser General Public License along with the GNU MP Library. If not, |
35 | | see https://www.gnu.org/licenses/. */ |
36 | | |
37 | | #include "gmp-impl.h" |
38 | | #include "longlong.h" |
39 | | |
40 | | |
41 | | /* Compute Q = N/D with truncation. |
42 | | N = {np,nn} |
43 | | D = {dp,dn} |
44 | | Q = {qp,nn-dn+1} |
45 | | T = {scratch,nn+1} is scratch space |
46 | | N and D are both untouched by the computation. |
47 | | N and T may overlap; pass the same space if N is irrelevant after the call, |
48 | | but note that tp needs an extra limb. |
49 | | |
50 | | Operand requirements: |
51 | | N >= D > 0 |
52 | | dp[dn-1] != 0 |
53 | | No overlap between the N, D, and Q areas. |
54 | | |
55 | | This division function does not clobber its input operands, since it is |
56 | | intended to support average-O(qn) division, and for that to be effective, it |
57 | | cannot put requirements on callers to copy a O(nn) operand. |
58 | | |
59 | | If a caller does not care about the value of {np,nn+1} after calling this |
60 | | function, it should pass np also for the scratch argument. This function |
61 | | will then save some time and space by avoiding allocation and copying. |
62 | | (FIXME: Is this a good design? We only really save any copying for |
63 | | already-normalised divisors, which should be rare. It also prevents us from |
64 | | reasonably asking for all scratch space we need.) |
65 | | |
66 | | We write nn-dn+1 limbs for the quotient, but return void. Why not return |
67 | | the most significant quotient limb? Look at the 4 main code blocks below |
68 | | (consisting of an outer if-else where each arm contains an if-else). It is |
69 | | tricky for the first code block, since the mpn_*_div_q calls will typically |
70 | | generate all nn-dn+1 and return 0 or 1. I don't see how to fix that unless |
71 | | we generate the most significant quotient limb here, before calling |
72 | | mpn_*_div_q, or put the quotient in a temporary area. Since this is a |
73 | | critical division case (the SB sub-case in particular) copying is not a good |
74 | | idea. |
75 | | |
76 | | It might make sense to split the if-else parts of the (qn + FUDGE |
77 | | >= dn) blocks into separate functions, since we could promise quite |
78 | | different things to callers in these two cases. The 'then' case |
79 | | benefits from np=scratch, and it could perhaps even tolerate qp=np, |
80 | | saving some headache for many callers. |
81 | | |
82 | | FIXME: Scratch allocation leaves a lot to be desired. E.g., for the MU size |
83 | | operands, we do not reuse the huge scratch for adjustments. This can be a |
84 | | serious waste of memory for the largest operands. |
85 | | */ |
86 | | |
87 | | /* FUDGE determines when to try getting an approximate quotient from the upper |
88 | | parts of the dividend and divisor, then adjust. N.B. FUDGE must be >= 2 |
89 | | for the code to be correct. */ |
90 | 0 | #define FUDGE 5 /* FIXME: tune this */ |
91 | | |
92 | | #define DC_DIV_Q_THRESHOLD DC_DIVAPPR_Q_THRESHOLD |
93 | 0 | #define MU_DIV_Q_THRESHOLD MU_DIVAPPR_Q_THRESHOLD |
94 | 0 | #define MUPI_DIV_Q_THRESHOLD MUPI_DIVAPPR_Q_THRESHOLD |
95 | | #ifndef MUPI_DIVAPPR_Q_THRESHOLD |
96 | 0 | #define MUPI_DIVAPPR_Q_THRESHOLD MUPI_DIV_QR_THRESHOLD |
97 | | #endif |
98 | | |
99 | | void |
100 | | mpn_div_q (mp_ptr qp, |
101 | | mp_srcptr np, mp_size_t nn, |
102 | | mp_srcptr dp, mp_size_t dn, mp_ptr scratch) |
103 | 0 | { |
104 | 0 | mp_ptr new_dp, new_np, tp, rp; |
105 | 0 | mp_limb_t cy, dh, qh; |
106 | 0 | mp_size_t new_nn, qn; |
107 | 0 | gmp_pi1_t dinv; |
108 | 0 | int cnt; |
109 | 0 | TMP_DECL; |
110 | 0 | TMP_MARK; |
111 | |
|
112 | 0 | ASSERT (nn >= dn); |
113 | 0 | ASSERT (dn > 0); |
114 | 0 | ASSERT (dp[dn - 1] != 0); |
115 | 0 | ASSERT (! MPN_OVERLAP_P (qp, nn - dn + 1, np, nn)); |
116 | 0 | ASSERT (! MPN_OVERLAP_P (qp, nn - dn + 1, dp, dn)); |
117 | 0 | ASSERT (MPN_SAME_OR_SEPARATE_P (np, scratch, nn)); |
118 | | |
119 | 0 | ASSERT_ALWAYS (FUDGE >= 2); |
120 | | |
121 | 0 | dh = dp[dn - 1]; |
122 | 0 | if (dn == 1) |
123 | 0 | { |
124 | 0 | mpn_divrem_1 (qp, 0L, np, nn, dh); |
125 | 0 | return; |
126 | 0 | } |
127 | | |
128 | 0 | qn = nn - dn + 1; /* Quotient size, high limb might be zero */ |
129 | |
|
130 | 0 | if (qn + FUDGE >= dn) |
131 | 0 | { |
132 | | /* |________________________| |
133 | | |_______| */ |
134 | 0 | new_np = scratch; |
135 | |
|
136 | 0 | if (LIKELY ((dh & GMP_NUMB_HIGHBIT) == 0)) |
137 | 0 | { |
138 | 0 | count_leading_zeros (cnt, dh); |
139 | | |
140 | 0 | cy = mpn_lshift (new_np, np, nn, cnt); |
141 | 0 | new_np[nn] = cy; |
142 | 0 | new_nn = nn + (cy != 0); |
143 | |
|
144 | 0 | new_dp = TMP_ALLOC_LIMBS (dn); |
145 | 0 | mpn_lshift (new_dp, dp, dn, cnt); |
146 | |
|
147 | 0 | if (dn == 2) |
148 | 0 | { |
149 | 0 | qh = mpn_divrem_2 (qp, 0L, new_np, new_nn, new_dp); |
150 | 0 | } |
151 | 0 | else if (BELOW_THRESHOLD (dn, DC_DIV_Q_THRESHOLD) || |
152 | 0 | BELOW_THRESHOLD (new_nn - dn, DC_DIV_Q_THRESHOLD)) |
153 | 0 | { |
154 | 0 | invert_pi1 (dinv, new_dp[dn - 1], new_dp[dn - 2]); |
155 | 0 | qh = mpn_sbpi1_div_q (qp, new_np, new_nn, new_dp, dn, dinv.inv32); |
156 | 0 | } |
157 | 0 | else if (BELOW_THRESHOLD (dn, MUPI_DIV_Q_THRESHOLD) || /* fast condition */ |
158 | 0 | BELOW_THRESHOLD (nn, 2 * MU_DIV_Q_THRESHOLD) || /* fast condition */ |
159 | 0 | (double) (2 * (MU_DIV_Q_THRESHOLD - MUPI_DIV_Q_THRESHOLD)) * dn /* slow... */ |
160 | 0 | + (double) MUPI_DIV_Q_THRESHOLD * nn > (double) dn * nn) /* ...condition */ |
161 | 0 | { |
162 | 0 | invert_pi1 (dinv, new_dp[dn - 1], new_dp[dn - 2]); |
163 | 0 | qh = mpn_dcpi1_div_q (qp, new_np, new_nn, new_dp, dn, &dinv); |
164 | 0 | } |
165 | 0 | else |
166 | 0 | { |
167 | 0 | mp_size_t itch = mpn_mu_div_q_itch (new_nn, dn, 0); |
168 | 0 | mp_ptr scratch = TMP_ALLOC_LIMBS (itch); |
169 | 0 | qh = mpn_mu_div_q (qp, new_np, new_nn, new_dp, dn, scratch); |
170 | 0 | } |
171 | 0 | if (cy == 0) |
172 | 0 | qp[qn - 1] = qh; |
173 | 0 | else |
174 | 0 | ASSERT (qh == 0); |
175 | 0 | } |
176 | 0 | else /* divisor is already normalised */ |
177 | 0 | { |
178 | 0 | if (new_np != np) |
179 | 0 | MPN_COPY (new_np, np, nn); |
180 | | |
181 | 0 | if (dn == 2) |
182 | 0 | { |
183 | 0 | qh = mpn_divrem_2 (qp, 0L, new_np, nn, dp); |
184 | 0 | } |
185 | 0 | else if (BELOW_THRESHOLD (dn, DC_DIV_Q_THRESHOLD) || |
186 | 0 | BELOW_THRESHOLD (nn - dn, DC_DIV_Q_THRESHOLD)) |
187 | 0 | { |
188 | 0 | invert_pi1 (dinv, dh, dp[dn - 2]); |
189 | 0 | qh = mpn_sbpi1_div_q (qp, new_np, nn, dp, dn, dinv.inv32); |
190 | 0 | } |
191 | 0 | else if (BELOW_THRESHOLD (dn, MUPI_DIV_Q_THRESHOLD) || /* fast condition */ |
192 | 0 | BELOW_THRESHOLD (nn, 2 * MU_DIV_Q_THRESHOLD) || /* fast condition */ |
193 | 0 | (double) (2 * (MU_DIV_Q_THRESHOLD - MUPI_DIV_Q_THRESHOLD)) * dn /* slow... */ |
194 | 0 | + (double) MUPI_DIV_Q_THRESHOLD * nn > (double) dn * nn) /* ...condition */ |
195 | 0 | { |
196 | 0 | invert_pi1 (dinv, dh, dp[dn - 2]); |
197 | 0 | qh = mpn_dcpi1_div_q (qp, new_np, nn, dp, dn, &dinv); |
198 | 0 | } |
199 | 0 | else |
200 | 0 | { |
201 | 0 | mp_size_t itch = mpn_mu_div_q_itch (nn, dn, 0); |
202 | 0 | mp_ptr scratch = TMP_ALLOC_LIMBS (itch); |
203 | 0 | qh = mpn_mu_div_q (qp, np, nn, dp, dn, scratch); |
204 | 0 | } |
205 | 0 | qp[nn - dn] = qh; |
206 | 0 | } |
207 | 0 | } |
208 | 0 | else |
209 | 0 | { |
210 | | /* |________________________| |
211 | | |_________________| */ |
212 | 0 | tp = TMP_ALLOC_LIMBS (qn + 1); |
213 | |
|
214 | 0 | new_np = scratch; |
215 | 0 | new_nn = 2 * qn + 1; |
216 | 0 | if (new_np == np) |
217 | | /* We need {np,nn} to remain untouched until the final adjustment, so |
218 | | we need to allocate separate space for new_np. */ |
219 | 0 | new_np = TMP_ALLOC_LIMBS (new_nn + 1); |
220 | | |
221 | |
|
222 | 0 | if (LIKELY ((dh & GMP_NUMB_HIGHBIT) == 0)) |
223 | 0 | { |
224 | 0 | count_leading_zeros (cnt, dh); |
225 | | |
226 | 0 | cy = mpn_lshift (new_np, np + nn - new_nn, new_nn, cnt); |
227 | 0 | new_np[new_nn] = cy; |
228 | |
|
229 | 0 | new_nn += (cy != 0); |
230 | |
|
231 | 0 | new_dp = TMP_ALLOC_LIMBS (qn + 1); |
232 | 0 | mpn_lshift (new_dp, dp + dn - (qn + 1), qn + 1, cnt); |
233 | 0 | new_dp[0] |= dp[dn - (qn + 1) - 1] >> (GMP_NUMB_BITS - cnt); |
234 | |
|
235 | 0 | if (qn + 1 == 2) |
236 | 0 | { |
237 | 0 | qh = mpn_divrem_2 (tp, 0L, new_np, new_nn, new_dp); |
238 | 0 | } |
239 | 0 | else if (BELOW_THRESHOLD (qn, DC_DIVAPPR_Q_THRESHOLD - 1)) |
240 | 0 | { |
241 | 0 | invert_pi1 (dinv, new_dp[qn], new_dp[qn - 1]); |
242 | 0 | qh = mpn_sbpi1_divappr_q (tp, new_np, new_nn, new_dp, qn + 1, dinv.inv32); |
243 | 0 | } |
244 | 0 | else if (BELOW_THRESHOLD (qn, MU_DIVAPPR_Q_THRESHOLD - 1)) |
245 | 0 | { |
246 | 0 | invert_pi1 (dinv, new_dp[qn], new_dp[qn - 1]); |
247 | 0 | qh = mpn_dcpi1_divappr_q (tp, new_np, new_nn, new_dp, qn + 1, &dinv); |
248 | 0 | } |
249 | 0 | else |
250 | 0 | { |
251 | 0 | mp_size_t itch = mpn_mu_divappr_q_itch (new_nn, qn + 1, 0); |
252 | 0 | mp_ptr scratch = TMP_ALLOC_LIMBS (itch); |
253 | 0 | qh = mpn_mu_divappr_q (tp, new_np, new_nn, new_dp, qn + 1, scratch); |
254 | 0 | } |
255 | 0 | if (cy == 0) |
256 | 0 | tp[qn] = qh; |
257 | 0 | else if (UNLIKELY (qh != 0)) |
258 | 0 | { |
259 | | /* This happens only when the quotient is close to B^n and |
260 | | mpn_*_divappr_q returned B^n. */ |
261 | 0 | mp_size_t i, n; |
262 | 0 | n = new_nn - (qn + 1); |
263 | 0 | for (i = 0; i < n; i++) |
264 | 0 | tp[i] = GMP_NUMB_MAX; |
265 | 0 | qh = 0; /* currently ignored */ |
266 | 0 | } |
267 | 0 | } |
268 | 0 | else /* divisor is already normalised */ |
269 | 0 | { |
270 | 0 | MPN_COPY (new_np, np + nn - new_nn, new_nn); /* pointless if MU will be used */ |
271 | | |
272 | 0 | new_dp = (mp_ptr) dp + dn - (qn + 1); |
273 | |
|
274 | 0 | if (qn == 2 - 1) |
275 | 0 | { |
276 | 0 | qh = mpn_divrem_2 (tp, 0L, new_np, new_nn, new_dp); |
277 | 0 | } |
278 | 0 | else if (BELOW_THRESHOLD (qn, DC_DIVAPPR_Q_THRESHOLD - 1)) |
279 | 0 | { |
280 | 0 | invert_pi1 (dinv, dh, new_dp[qn - 1]); |
281 | 0 | qh = mpn_sbpi1_divappr_q (tp, new_np, new_nn, new_dp, qn + 1, dinv.inv32); |
282 | 0 | } |
283 | 0 | else if (BELOW_THRESHOLD (qn, MU_DIVAPPR_Q_THRESHOLD - 1)) |
284 | 0 | { |
285 | 0 | invert_pi1 (dinv, dh, new_dp[qn - 1]); |
286 | 0 | qh = mpn_dcpi1_divappr_q (tp, new_np, new_nn, new_dp, qn + 1, &dinv); |
287 | 0 | } |
288 | 0 | else |
289 | 0 | { |
290 | 0 | mp_size_t itch = mpn_mu_divappr_q_itch (new_nn, qn + 1, 0); |
291 | 0 | mp_ptr scratch = TMP_ALLOC_LIMBS (itch); |
292 | 0 | qh = mpn_mu_divappr_q (tp, new_np, new_nn, new_dp, qn + 1, scratch); |
293 | 0 | } |
294 | 0 | tp[qn] = qh; |
295 | 0 | } |
296 | | |
297 | 0 | MPN_COPY (qp, tp + 1, qn); |
298 | 0 | if (tp[0] <= 4) |
299 | 0 | { |
300 | 0 | mp_size_t rn; |
301 | |
|
302 | 0 | rp = TMP_ALLOC_LIMBS (dn + qn); |
303 | 0 | mpn_mul (rp, dp, dn, tp + 1, qn); |
304 | 0 | rn = dn + qn; |
305 | 0 | rn -= rp[rn - 1] == 0; |
306 | |
|
307 | 0 | if (rn > nn || mpn_cmp (np, rp, nn) < 0) |
308 | 0 | MPN_DECR_U (qp, qn, 1); |
309 | 0 | } |
310 | 0 | } |
311 | | |
312 | 0 | TMP_FREE; |
313 | 0 | } |