/src/gmp/mpn/dcpi1_divappr_q.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* mpn_dcpi1_divappr_q -- divide-and-conquer division, returning approximate |
2 | | quotient. The quotient returned is either correct, or one too large. |
3 | | |
4 | | Contributed to the GNU project by Torbjorn Granlund. |
5 | | |
6 | | THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES. IT IS ONLY |
7 | | SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST |
8 | | GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE. |
9 | | |
10 | | Copyright 2006, 2007, 2009, 2010 Free Software Foundation, Inc. |
11 | | |
12 | | This file is part of the GNU MP Library. |
13 | | |
14 | | The GNU MP Library is free software; you can redistribute it and/or modify |
15 | | it under the terms of either: |
16 | | |
17 | | * the GNU Lesser General Public License as published by the Free |
18 | | Software Foundation; either version 3 of the License, or (at your |
19 | | option) any later version. |
20 | | |
21 | | or |
22 | | |
23 | | * the GNU General Public License as published by the Free Software |
24 | | Foundation; either version 2 of the License, or (at your option) any |
25 | | later version. |
26 | | |
27 | | or both in parallel, as here. |
28 | | |
29 | | The GNU MP Library is distributed in the hope that it will be useful, but |
30 | | WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY |
31 | | or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
32 | | for more details. |
33 | | |
34 | | You should have received copies of the GNU General Public License and the |
35 | | GNU Lesser General Public License along with the GNU MP Library. If not, |
36 | | see https://www.gnu.org/licenses/. */ |
37 | | |
38 | | #include "gmp-impl.h" |
39 | | #include "longlong.h" |
40 | | |
41 | | |
42 | | static mp_limb_t |
43 | | mpn_dcpi1_divappr_q_n (mp_ptr qp, mp_ptr np, mp_srcptr dp, mp_size_t n, |
44 | | gmp_pi1_t *dinv, mp_ptr tp) |
45 | 0 | { |
46 | 0 | mp_size_t lo, hi; |
47 | 0 | mp_limb_t cy, qh, ql; |
48 | |
|
49 | 0 | lo = n >> 1; /* floor(n/2) */ |
50 | 0 | hi = n - lo; /* ceil(n/2) */ |
51 | |
|
52 | 0 | if (BELOW_THRESHOLD (hi, DC_DIV_QR_THRESHOLD)) |
53 | 0 | qh = mpn_sbpi1_div_qr (qp + lo, np + 2 * lo, 2 * hi, dp + lo, hi, dinv->inv32); |
54 | 0 | else |
55 | 0 | qh = mpn_dcpi1_div_qr_n (qp + lo, np + 2 * lo, dp + lo, hi, dinv, tp); |
56 | |
|
57 | 0 | mpn_mul (tp, qp + lo, hi, dp, lo); |
58 | |
|
59 | 0 | cy = mpn_sub_n (np + lo, np + lo, tp, n); |
60 | 0 | if (qh != 0) |
61 | 0 | cy += mpn_sub_n (np + n, np + n, dp, lo); |
62 | |
|
63 | 0 | while (cy != 0) |
64 | 0 | { |
65 | 0 | qh -= mpn_sub_1 (qp + lo, qp + lo, hi, 1); |
66 | 0 | cy -= mpn_add_n (np + lo, np + lo, dp, n); |
67 | 0 | } |
68 | |
|
69 | 0 | if (BELOW_THRESHOLD (lo, DC_DIVAPPR_Q_THRESHOLD)) |
70 | 0 | ql = mpn_sbpi1_divappr_q (qp, np + hi, 2 * lo, dp + hi, lo, dinv->inv32); |
71 | 0 | else |
72 | 0 | ql = mpn_dcpi1_divappr_q_n (qp, np + hi, dp + hi, lo, dinv, tp); |
73 | |
|
74 | 0 | if (UNLIKELY (ql != 0)) |
75 | 0 | { |
76 | 0 | mp_size_t i; |
77 | 0 | for (i = 0; i < lo; i++) |
78 | 0 | qp[i] = GMP_NUMB_MASK; |
79 | 0 | } |
80 | |
|
81 | 0 | return qh; |
82 | 0 | } |
83 | | |
84 | | mp_limb_t |
85 | | mpn_dcpi1_divappr_q (mp_ptr qp, mp_ptr np, mp_size_t nn, |
86 | | mp_srcptr dp, mp_size_t dn, gmp_pi1_t *dinv) |
87 | 0 | { |
88 | 0 | mp_size_t qn; |
89 | 0 | mp_limb_t qh, cy, qsave; |
90 | 0 | mp_ptr tp; |
91 | 0 | TMP_DECL; |
92 | |
|
93 | 0 | TMP_MARK; |
94 | |
|
95 | 0 | ASSERT (dn >= 6); |
96 | 0 | ASSERT (nn > dn); |
97 | 0 | ASSERT (dp[dn-1] & GMP_NUMB_HIGHBIT); |
98 | |
|
99 | 0 | qn = nn - dn; |
100 | 0 | qp += qn; |
101 | 0 | np += nn; |
102 | 0 | dp += dn; |
103 | |
|
104 | 0 | if (qn >= dn) |
105 | 0 | { |
106 | 0 | qn++; /* pretend we'll need an extra limb */ |
107 | | /* Reduce qn mod dn without division, optimizing small operations. */ |
108 | 0 | do |
109 | 0 | qn -= dn; |
110 | 0 | while (qn > dn); |
111 | |
|
112 | 0 | qp -= qn; /* point at low limb of next quotient block */ |
113 | 0 | np -= qn; /* point in the middle of partial remainder */ |
114 | |
|
115 | 0 | tp = TMP_SALLOC_LIMBS (dn); |
116 | | |
117 | | /* Perform the typically smaller block first. */ |
118 | 0 | if (qn == 1) |
119 | 0 | { |
120 | 0 | mp_limb_t q, n2, n1, n0, d1, d0; |
121 | | |
122 | | /* Handle qh up front, for simplicity. */ |
123 | 0 | qh = mpn_cmp (np - dn + 1, dp - dn, dn) >= 0; |
124 | 0 | if (qh) |
125 | 0 | ASSERT_NOCARRY (mpn_sub_n (np - dn + 1, np - dn + 1, dp - dn, dn)); |
126 | | |
127 | | /* A single iteration of schoolbook: One 3/2 division, |
128 | | followed by the bignum update and adjustment. */ |
129 | 0 | n2 = np[0]; |
130 | 0 | n1 = np[-1]; |
131 | 0 | n0 = np[-2]; |
132 | 0 | d1 = dp[-1]; |
133 | 0 | d0 = dp[-2]; |
134 | |
|
135 | 0 | ASSERT (n2 < d1 || (n2 == d1 && n1 <= d0)); |
136 | |
|
137 | 0 | if (UNLIKELY (n2 == d1) && n1 == d0) |
138 | 0 | { |
139 | 0 | q = GMP_NUMB_MASK; |
140 | 0 | cy = mpn_submul_1 (np - dn, dp - dn, dn, q); |
141 | 0 | ASSERT (cy == n2); |
142 | 0 | } |
143 | 0 | else |
144 | 0 | { |
145 | 0 | udiv_qr_3by2 (q, n1, n0, n2, n1, n0, d1, d0, dinv->inv32); |
146 | |
|
147 | 0 | if (dn > 2) |
148 | 0 | { |
149 | 0 | mp_limb_t cy, cy1; |
150 | 0 | cy = mpn_submul_1 (np - dn, dp - dn, dn - 2, q); |
151 | |
|
152 | 0 | cy1 = n0 < cy; |
153 | 0 | n0 = (n0 - cy) & GMP_NUMB_MASK; |
154 | 0 | cy = n1 < cy1; |
155 | 0 | n1 = (n1 - cy1) & GMP_NUMB_MASK; |
156 | 0 | np[-2] = n0; |
157 | |
|
158 | 0 | if (UNLIKELY (cy != 0)) |
159 | 0 | { |
160 | 0 | n1 += d1 + mpn_add_n (np - dn, np - dn, dp - dn, dn - 1); |
161 | 0 | qh -= (q == 0); |
162 | 0 | q = (q - 1) & GMP_NUMB_MASK; |
163 | 0 | } |
164 | 0 | } |
165 | 0 | else |
166 | 0 | np[-2] = n0; |
167 | |
|
168 | 0 | np[-1] = n1; |
169 | 0 | } |
170 | 0 | qp[0] = q; |
171 | 0 | } |
172 | 0 | else |
173 | 0 | { |
174 | 0 | if (qn == 2) |
175 | 0 | qh = mpn_divrem_2 (qp, 0L, np - 2, 4, dp - 2); |
176 | 0 | else if (BELOW_THRESHOLD (qn, DC_DIV_QR_THRESHOLD)) |
177 | 0 | qh = mpn_sbpi1_div_qr (qp, np - qn, 2 * qn, dp - qn, qn, dinv->inv32); |
178 | 0 | else |
179 | 0 | qh = mpn_dcpi1_div_qr_n (qp, np - qn, dp - qn, qn, dinv, tp); |
180 | |
|
181 | 0 | if (qn != dn) |
182 | 0 | { |
183 | 0 | if (qn > dn - qn) |
184 | 0 | mpn_mul (tp, qp, qn, dp - dn, dn - qn); |
185 | 0 | else |
186 | 0 | mpn_mul (tp, dp - dn, dn - qn, qp, qn); |
187 | |
|
188 | 0 | cy = mpn_sub_n (np - dn, np - dn, tp, dn); |
189 | 0 | if (qh != 0) |
190 | 0 | cy += mpn_sub_n (np - dn + qn, np - dn + qn, dp - dn, dn - qn); |
191 | |
|
192 | 0 | while (cy != 0) |
193 | 0 | { |
194 | 0 | qh -= mpn_sub_1 (qp, qp, qn, 1); |
195 | 0 | cy -= mpn_add_n (np - dn, np - dn, dp - dn, dn); |
196 | 0 | } |
197 | 0 | } |
198 | 0 | } |
199 | 0 | qn = nn - dn - qn + 1; |
200 | 0 | while (qn > dn) |
201 | 0 | { |
202 | 0 | qp -= dn; |
203 | 0 | np -= dn; |
204 | 0 | mpn_dcpi1_div_qr_n (qp, np - dn, dp - dn, dn, dinv, tp); |
205 | 0 | qn -= dn; |
206 | 0 | } |
207 | | |
208 | | /* Since we pretended we'd need an extra quotient limb before, we now |
209 | | have made sure the code above left just dn-1=qn quotient limbs to |
210 | | develop. Develop that plus a guard limb. */ |
211 | 0 | qn--; |
212 | 0 | qp -= qn; |
213 | 0 | np -= dn; |
214 | 0 | qsave = qp[qn]; |
215 | 0 | mpn_dcpi1_divappr_q_n (qp, np - dn, dp - dn, dn, dinv, tp); |
216 | 0 | MPN_COPY_INCR (qp, qp + 1, qn); |
217 | 0 | qp[qn] = qsave; |
218 | 0 | } |
219 | 0 | else /* (qn < dn) */ |
220 | 0 | { |
221 | 0 | mp_ptr q2p; |
222 | | #if 0 /* not possible since we demand nn > dn */ |
223 | | if (qn == 0) |
224 | | { |
225 | | qh = mpn_cmp (np - dn, dp - dn, dn) >= 0; |
226 | | if (qh) |
227 | | mpn_sub_n (np - dn, np - dn, dp - dn, dn); |
228 | | TMP_FREE; |
229 | | return qh; |
230 | | } |
231 | | #endif |
232 | |
|
233 | 0 | qp -= qn; /* point at low limb of next quotient block */ |
234 | 0 | np -= qn; /* point in the middle of partial remainder */ |
235 | |
|
236 | 0 | q2p = TMP_SALLOC_LIMBS (qn + 1); |
237 | | /* Should we at all check DC_DIVAPPR_Q_THRESHOLD here, or reply on |
238 | | callers not to be silly? */ |
239 | 0 | if (BELOW_THRESHOLD (qn, DC_DIVAPPR_Q_THRESHOLD)) |
240 | 0 | { |
241 | 0 | qh = mpn_sbpi1_divappr_q (q2p, np - qn - 2, 2 * (qn + 1), |
242 | 0 | dp - (qn + 1), qn + 1, dinv->inv32); |
243 | 0 | } |
244 | 0 | else |
245 | 0 | { |
246 | | /* It is tempting to use qp for recursive scratch and put quotient in |
247 | | tp, but the recursive scratch needs one limb too many. */ |
248 | 0 | tp = TMP_SALLOC_LIMBS (qn + 1); |
249 | 0 | qh = mpn_dcpi1_divappr_q_n (q2p, np - qn - 2, dp - (qn + 1), qn + 1, dinv, tp); |
250 | 0 | } |
251 | 0 | MPN_COPY (qp, q2p + 1, qn); |
252 | 0 | } |
253 | |
|
254 | 0 | TMP_FREE; |
255 | 0 | return qh; |
256 | 0 | } |