/src/gmp-6.2.1/mpn/dcpi1_div_qr.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* mpn_dcpi1_div_qr_n -- recursive divide-and-conquer division for arbitrary |
2 | | size operands. |
3 | | |
4 | | Contributed to the GNU project by Torbjorn Granlund. |
5 | | |
6 | | THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES. IT IS ONLY |
7 | | SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST |
8 | | GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE. |
9 | | |
10 | | Copyright 2006, 2007, 2009 Free Software Foundation, Inc. |
11 | | |
12 | | This file is part of the GNU MP Library. |
13 | | |
14 | | The GNU MP Library is free software; you can redistribute it and/or modify |
15 | | it under the terms of either: |
16 | | |
17 | | * the GNU Lesser General Public License as published by the Free |
18 | | Software Foundation; either version 3 of the License, or (at your |
19 | | option) any later version. |
20 | | |
21 | | or |
22 | | |
23 | | * the GNU General Public License as published by the Free Software |
24 | | Foundation; either version 2 of the License, or (at your option) any |
25 | | later version. |
26 | | |
27 | | or both in parallel, as here. |
28 | | |
29 | | The GNU MP Library is distributed in the hope that it will be useful, but |
30 | | WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY |
31 | | or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
32 | | for more details. |
33 | | |
34 | | You should have received copies of the GNU General Public License and the |
35 | | GNU Lesser General Public License along with the GNU MP Library. If not, |
36 | | see https://www.gnu.org/licenses/. */ |
37 | | |
38 | | #include "gmp-impl.h" |
39 | | #include "longlong.h" |
40 | | |
41 | | |
42 | | mp_limb_t |
43 | | mpn_dcpi1_div_qr_n (mp_ptr qp, mp_ptr np, mp_srcptr dp, mp_size_t n, |
44 | | gmp_pi1_t *dinv, mp_ptr tp) |
45 | 2.87k | { |
46 | 2.87k | mp_size_t lo, hi; |
47 | 2.87k | mp_limb_t cy, qh, ql; |
48 | | |
49 | 2.87k | lo = n >> 1; /* floor(n/2) */ |
50 | 2.87k | hi = n - lo; /* ceil(n/2) */ |
51 | | |
52 | 2.87k | if (BELOW_THRESHOLD (hi, DC_DIV_QR_THRESHOLD)) |
53 | 2.27k | qh = mpn_sbpi1_div_qr (qp + lo, np + 2 * lo, 2 * hi, dp + lo, hi, dinv->inv32); |
54 | 605 | else |
55 | 605 | qh = mpn_dcpi1_div_qr_n (qp + lo, np + 2 * lo, dp + lo, hi, dinv, tp); |
56 | | |
57 | 2.87k | mpn_mul (tp, qp + lo, hi, dp, lo); |
58 | | |
59 | 2.87k | cy = mpn_sub_n (np + lo, np + lo, tp, n); |
60 | 2.87k | if (qh != 0) |
61 | 31 | cy += mpn_sub_n (np + n, np + n, dp, lo); |
62 | | |
63 | 3.91k | while (cy != 0) |
64 | 1.03k | { |
65 | 1.03k | qh -= mpn_sub_1 (qp + lo, qp + lo, hi, 1); |
66 | 1.03k | cy -= mpn_add_n (np + lo, np + lo, dp, n); |
67 | 1.03k | } |
68 | | |
69 | 2.87k | if (BELOW_THRESHOLD (lo, DC_DIV_QR_THRESHOLD)) |
70 | 2.27k | ql = mpn_sbpi1_div_qr (qp, np + hi, 2 * lo, dp + hi, lo, dinv->inv32); |
71 | 599 | else |
72 | 599 | ql = mpn_dcpi1_div_qr_n (qp, np + hi, dp + hi, lo, dinv, tp); |
73 | | |
74 | 2.87k | mpn_mul (tp, dp, hi, qp, lo); |
75 | | |
76 | 2.87k | cy = mpn_sub_n (np, np, tp, n); |
77 | 2.87k | if (ql != 0) |
78 | 2 | cy += mpn_sub_n (np + lo, np + lo, dp, hi); |
79 | | |
80 | 3.77k | while (cy != 0) |
81 | 895 | { |
82 | 895 | mpn_sub_1 (qp, qp, lo, 1); |
83 | 895 | cy -= mpn_add_n (np, np, dp, n); |
84 | 895 | } |
85 | | |
86 | 2.87k | return qh; |
87 | 2.87k | } |
88 | | |
89 | | mp_limb_t |
90 | | mpn_dcpi1_div_qr (mp_ptr qp, |
91 | | mp_ptr np, mp_size_t nn, |
92 | | mp_srcptr dp, mp_size_t dn, |
93 | | gmp_pi1_t *dinv) |
94 | 1.53k | { |
95 | 1.53k | mp_size_t qn; |
96 | 1.53k | mp_limb_t qh, cy; |
97 | 1.53k | mp_ptr tp; |
98 | 1.53k | TMP_DECL; |
99 | | |
100 | 1.53k | TMP_MARK; |
101 | | |
102 | 1.53k | ASSERT (dn >= 6); /* to adhere to mpn_sbpi1_div_qr's limits */ |
103 | 1.53k | ASSERT (nn - dn >= 3); /* to adhere to mpn_sbpi1_div_qr's limits */ |
104 | 1.53k | ASSERT (dp[dn-1] & GMP_NUMB_HIGHBIT); |
105 | | |
106 | 1.53k | tp = TMP_ALLOC_LIMBS (dn); |
107 | | |
108 | 1.53k | qn = nn - dn; |
109 | 1.53k | qp += qn; |
110 | 1.53k | np += nn; |
111 | 1.53k | dp += dn; |
112 | | |
113 | 1.53k | if (qn > dn) |
114 | 1.05k | { |
115 | | /* Reduce qn mod dn without division, optimizing small operations. */ |
116 | 1.05k | do |
117 | 1.10k | qn -= dn; |
118 | 1.10k | while (qn > dn); |
119 | | |
120 | 1.05k | qp -= qn; /* point at low limb of next quotient block */ |
121 | 1.05k | np -= qn; /* point in the middle of partial remainder */ |
122 | | |
123 | | /* Perform the typically smaller block first. */ |
124 | 1.05k | if (qn == 1) |
125 | 179 | { |
126 | 179 | mp_limb_t q, n2, n1, n0, d1, d0; |
127 | | |
128 | | /* Handle qh up front, for simplicity. */ |
129 | 179 | qh = mpn_cmp (np - dn + 1, dp - dn, dn) >= 0; |
130 | 179 | if (qh) |
131 | 179 | ASSERT_NOCARRY (mpn_sub_n (np - dn + 1, np - dn + 1, dp - dn, dn)); |
132 | | |
133 | | /* A single iteration of schoolbook: One 3/2 division, |
134 | | followed by the bignum update and adjustment. */ |
135 | 179 | n2 = np[0]; |
136 | 179 | n1 = np[-1]; |
137 | 179 | n0 = np[-2]; |
138 | 179 | d1 = dp[-1]; |
139 | 179 | d0 = dp[-2]; |
140 | | |
141 | 179 | ASSERT (n2 < d1 || (n2 == d1 && n1 <= d0)); |
142 | | |
143 | 179 | if (UNLIKELY (n2 == d1) && n1 == d0) |
144 | 0 | { |
145 | 0 | q = GMP_NUMB_MASK; |
146 | 0 | cy = mpn_submul_1 (np - dn, dp - dn, dn, q); |
147 | 0 | ASSERT (cy == n2); |
148 | 0 | } |
149 | 179 | else |
150 | 179 | { |
151 | 179 | udiv_qr_3by2 (q, n1, n0, n2, n1, n0, d1, d0, dinv->inv32); |
152 | | |
153 | 179 | if (dn > 2) |
154 | 179 | { |
155 | 179 | mp_limb_t cy, cy1; |
156 | 179 | cy = mpn_submul_1 (np - dn, dp - dn, dn - 2, q); |
157 | | |
158 | 179 | cy1 = n0 < cy; |
159 | 179 | n0 = (n0 - cy) & GMP_NUMB_MASK; |
160 | 179 | cy = n1 < cy1; |
161 | 179 | n1 = (n1 - cy1) & GMP_NUMB_MASK; |
162 | 179 | np[-2] = n0; |
163 | | |
164 | 179 | if (UNLIKELY (cy != 0)) |
165 | 15 | { |
166 | 15 | n1 += d1 + mpn_add_n (np - dn, np - dn, dp - dn, dn - 1); |
167 | 15 | qh -= (q == 0); |
168 | 15 | q = (q - 1) & GMP_NUMB_MASK; |
169 | 15 | } |
170 | 179 | } |
171 | 0 | else |
172 | 0 | np[-2] = n0; |
173 | | |
174 | 179 | np[-1] = n1; |
175 | 179 | } |
176 | 179 | qp[0] = q; |
177 | 179 | } |
178 | 875 | else |
179 | 875 | { |
180 | | /* Do a 2qn / qn division */ |
181 | 875 | if (qn == 2) |
182 | 22 | qh = mpn_divrem_2 (qp, 0L, np - 2, 4, dp - 2); /* FIXME: obsolete function. Use 5/3 division? */ |
183 | 853 | else if (BELOW_THRESHOLD (qn, DC_DIV_QR_THRESHOLD)) |
184 | 766 | qh = mpn_sbpi1_div_qr (qp, np - qn, 2 * qn, dp - qn, qn, dinv->inv32); |
185 | 87 | else |
186 | 87 | qh = mpn_dcpi1_div_qr_n (qp, np - qn, dp - qn, qn, dinv, tp); |
187 | | |
188 | 875 | if (qn != dn) |
189 | 871 | { |
190 | 871 | if (qn > dn - qn) |
191 | 62 | mpn_mul (tp, qp, qn, dp - dn, dn - qn); |
192 | 809 | else |
193 | 809 | mpn_mul (tp, dp - dn, dn - qn, qp, qn); |
194 | | |
195 | 871 | cy = mpn_sub_n (np - dn, np - dn, tp, dn); |
196 | 871 | if (qh != 0) |
197 | 0 | cy += mpn_sub_n (np - dn + qn, np - dn + qn, dp - dn, dn - qn); |
198 | | |
199 | 912 | while (cy != 0) |
200 | 41 | { |
201 | 41 | qh -= mpn_sub_1 (qp, qp, qn, 1); |
202 | 41 | cy -= mpn_add_n (np - dn, np - dn, dp - dn, dn); |
203 | 41 | } |
204 | 871 | } |
205 | 875 | } |
206 | | |
207 | 1.05k | qn = nn - dn - qn; |
208 | 1.05k | do |
209 | 1.10k | { |
210 | 1.10k | qp -= dn; |
211 | 1.10k | np -= dn; |
212 | 1.10k | mpn_dcpi1_div_qr_n (qp, np - dn, dp - dn, dn, dinv, tp); |
213 | 1.10k | qn -= dn; |
214 | 1.10k | } |
215 | 1.10k | while (qn > 0); |
216 | 1.05k | } |
217 | 484 | else |
218 | 484 | { |
219 | 484 | qp -= qn; /* point at low limb of next quotient block */ |
220 | 484 | np -= qn; /* point in the middle of partial remainder */ |
221 | | |
222 | 484 | if (BELOW_THRESHOLD (qn, DC_DIV_QR_THRESHOLD)) |
223 | 0 | qh = mpn_sbpi1_div_qr (qp, np - qn, 2 * qn, dp - qn, qn, dinv->inv32); |
224 | 484 | else |
225 | 484 | qh = mpn_dcpi1_div_qr_n (qp, np - qn, dp - qn, qn, dinv, tp); |
226 | | |
227 | 484 | if (qn != dn) |
228 | 75 | { |
229 | 75 | if (qn > dn - qn) |
230 | 75 | mpn_mul (tp, qp, qn, dp - dn, dn - qn); |
231 | 0 | else |
232 | 0 | mpn_mul (tp, dp - dn, dn - qn, qp, qn); |
233 | | |
234 | 75 | cy = mpn_sub_n (np - dn, np - dn, tp, dn); |
235 | 75 | if (qh != 0) |
236 | 23 | cy += mpn_sub_n (np - dn + qn, np - dn + qn, dp - dn, dn - qn); |
237 | | |
238 | 140 | while (cy != 0) |
239 | 65 | { |
240 | 65 | qh -= mpn_sub_1 (qp, qp, qn, 1); |
241 | 65 | cy -= mpn_add_n (np - dn, np - dn, dp - dn, dn); |
242 | 65 | } |
243 | 75 | } |
244 | 484 | } |
245 | | |
246 | 1.53k | TMP_FREE; |
247 | 1.53k | return qh; |
248 | 1.53k | } |