Line | Count | Source (jump to first uncovered line) |
1 | | /* mpn_divrem_1 -- mpn by limb division. |
2 | | |
3 | | Copyright 1991, 1993, 1994, 1996, 1998-2000, 2002, 2003 Free Software |
4 | | Foundation, Inc. |
5 | | |
6 | | This file is part of the GNU MP Library. |
7 | | |
8 | | The GNU MP Library is free software; you can redistribute it and/or modify |
9 | | it under the terms of either: |
10 | | |
11 | | * the GNU Lesser General Public License as published by the Free |
12 | | Software Foundation; either version 3 of the License, or (at your |
13 | | option) any later version. |
14 | | |
15 | | or |
16 | | |
17 | | * the GNU General Public License as published by the Free Software |
18 | | Foundation; either version 2 of the License, or (at your option) any |
19 | | later version. |
20 | | |
21 | | or both in parallel, as here. |
22 | | |
23 | | The GNU MP Library is distributed in the hope that it will be useful, but |
24 | | WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY |
25 | | or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
26 | | for more details. |
27 | | |
28 | | You should have received copies of the GNU General Public License and the |
29 | | GNU Lesser General Public License along with the GNU MP Library. If not, |
30 | | see https://www.gnu.org/licenses/. */ |
31 | | |
32 | | #include "gmp-impl.h" |
33 | | #include "longlong.h" |
34 | | |
35 | | |
36 | | /* The size where udiv_qrnnd_preinv should be used rather than udiv_qrnnd, |
37 | | meaning the quotient size where that should happen, the quotient size |
38 | | being how many udiv divisions will be done. |
39 | | |
40 | | The default is to use preinv always, CPUs where this doesn't suit have |
41 | | tuned thresholds. Note in particular that preinv should certainly be |
42 | | used if that's the only division available (USE_PREINV_ALWAYS). */ |
43 | | |
44 | | #ifndef DIVREM_1_NORM_THRESHOLD |
45 | | #define DIVREM_1_NORM_THRESHOLD 0 |
46 | | #endif |
47 | | #ifndef DIVREM_1_UNNORM_THRESHOLD |
48 | | #define DIVREM_1_UNNORM_THRESHOLD 0 |
49 | | #endif |
50 | | |
51 | | |
52 | | |
53 | | /* If the cpu only has multiply-by-inverse division (eg. alpha), then NORM |
54 | | and UNNORM thresholds are 0 and only the inversion code is included. |
55 | | |
56 | | If multiply-by-inverse is never viable, then NORM and UNNORM thresholds |
57 | | will be MP_SIZE_T_MAX and only the plain division code is included. |
58 | | |
59 | | Otherwise mul-by-inverse is better than plain division above some |
60 | | threshold, and best results are obtained by having code for both present. |
61 | | |
62 | | The main reason for separating the norm and unnorm cases is that not all |
63 | | CPUs give zero for "n0 >> GMP_LIMB_BITS" which would arise in the unnorm |
64 | | code used on an already normalized divisor. |
65 | | |
66 | | If UDIV_NEEDS_NORMALIZATION is false then plain division uses the same |
67 | | non-shifting code for both the norm and unnorm cases, though with |
68 | | different criteria for skipping a division, and with different thresholds |
69 | | of course. And in fact if inversion is never viable, then that simple |
70 | | non-shifting division would be all that's left. |
71 | | |
72 | | The NORM and UNNORM thresholds might not differ much, but if there's |
73 | | going to be separate code for norm and unnorm then it makes sense to have |
74 | | separate thresholds. One thing that's possible is that the |
75 | | mul-by-inverse might be better only for normalized divisors, due to that |
76 | | case not needing variable bit shifts. |
77 | | |
78 | | Notice that the thresholds are tested after the decision to possibly skip |
79 | | one divide step, so they're based on the actual number of divisions done. |
80 | | |
81 | | For the unnorm case, it would be possible to call mpn_lshift to adjust |
82 | | the dividend all in one go (into the quotient space say), rather than |
83 | | limb-by-limb in the loop. This might help if mpn_lshift is a lot faster |
84 | | than what the compiler can generate for EXTRACT. But this is left to CPU |
85 | | specific implementations to consider, especially since EXTRACT isn't on |
86 | | the dependent chain. */ |
87 | | |
88 | | mp_limb_t |
89 | | mpn_divrem_1 (mp_ptr qp, mp_size_t qxn, |
90 | | mp_srcptr up, mp_size_t un, mp_limb_t d) |
91 | 8.37k | { |
92 | 8.37k | mp_size_t n; |
93 | 8.37k | mp_size_t i; |
94 | 8.37k | mp_limb_t n1, n0; |
95 | 8.37k | mp_limb_t r = 0; |
96 | | |
97 | 8.37k | ASSERT (qxn >= 0); |
98 | 8.37k | ASSERT (un >= 0); |
99 | 8.37k | ASSERT (d != 0); |
100 | | /* FIXME: What's the correct overlap rule when qxn!=0? */ |
101 | 8.37k | ASSERT (MPN_SAME_OR_SEPARATE_P (qp+qxn, up, un)); |
102 | | |
103 | 8.37k | n = un + qxn; |
104 | 8.37k | if (n == 0) |
105 | 0 | return 0; |
106 | | |
107 | 8.37k | d <<= GMP_NAIL_BITS; |
108 | | |
109 | 8.37k | qp += (n - 1); /* Make qp point at most significant quotient limb */ |
110 | | |
111 | 8.37k | if ((d & GMP_LIMB_HIGHBIT) != 0) |
112 | 2.62k | { |
113 | 2.62k | if (un != 0) |
114 | 2.62k | { |
115 | | /* High quotient limb is 0 or 1, skip a divide step. */ |
116 | 2.62k | mp_limb_t q; |
117 | 2.62k | r = up[un - 1] << GMP_NAIL_BITS; |
118 | 2.62k | q = (r >= d); |
119 | 2.62k | *qp-- = q; |
120 | 2.62k | r -= (d & -q); |
121 | 2.62k | r >>= GMP_NAIL_BITS; |
122 | 2.62k | n--; |
123 | 2.62k | un--; |
124 | 2.62k | } |
125 | | |
126 | 2.62k | if (BELOW_THRESHOLD (n, DIVREM_1_NORM_THRESHOLD)) |
127 | 0 | { |
128 | 0 | plain: |
129 | 0 | for (i = un - 1; i >= 0; i--) |
130 | 0 | { |
131 | 0 | n0 = up[i] << GMP_NAIL_BITS; |
132 | 0 | udiv_qrnnd (*qp, r, r, n0, d); |
133 | 0 | r >>= GMP_NAIL_BITS; |
134 | 0 | qp--; |
135 | 0 | } |
136 | 0 | for (i = qxn - 1; i >= 0; i--) |
137 | 0 | { |
138 | 0 | udiv_qrnnd (*qp, r, r, CNST_LIMB(0), d); |
139 | 0 | r >>= GMP_NAIL_BITS; |
140 | 0 | qp--; |
141 | 0 | } |
142 | 0 | return r; |
143 | 0 | } |
144 | 2.62k | else |
145 | 2.62k | { |
146 | | /* Multiply-by-inverse, divisor already normalized. */ |
147 | 2.62k | mp_limb_t dinv; |
148 | 2.62k | invert_limb (dinv, d); |
149 | | |
150 | 21.6k | for (i = un - 1; i >= 0; i--) |
151 | 19.0k | { |
152 | 19.0k | n0 = up[i] << GMP_NAIL_BITS; |
153 | 19.0k | udiv_qrnnd_preinv (*qp, r, r, n0, d, dinv); |
154 | 19.0k | r >>= GMP_NAIL_BITS; |
155 | 19.0k | qp--; |
156 | 19.0k | } |
157 | 2.62k | for (i = qxn - 1; i >= 0; i--) |
158 | 0 | { |
159 | 0 | udiv_qrnnd_preinv (*qp, r, r, CNST_LIMB(0), d, dinv); |
160 | 0 | r >>= GMP_NAIL_BITS; |
161 | 0 | qp--; |
162 | 0 | } |
163 | 2.62k | return r; |
164 | 2.62k | } |
165 | 2.62k | } |
166 | 5.75k | else |
167 | 5.75k | { |
168 | | /* Most significant bit of divisor == 0. */ |
169 | 5.75k | int cnt; |
170 | | |
171 | | /* Skip a division if high < divisor (high quotient 0). Testing here |
172 | | before normalizing will still skip as often as possible. */ |
173 | 5.75k | if (un != 0) |
174 | 5.75k | { |
175 | 5.75k | n1 = up[un - 1] << GMP_NAIL_BITS; |
176 | 5.75k | if (n1 < d) |
177 | 2.62k | { |
178 | 2.62k | r = n1 >> GMP_NAIL_BITS; |
179 | 2.62k | *qp-- = 0; |
180 | 2.62k | n--; |
181 | 2.62k | if (n == 0) |
182 | 476 | return r; |
183 | 2.15k | un--; |
184 | 2.15k | } |
185 | 5.75k | } |
186 | | |
187 | 5.27k | if (! UDIV_NEEDS_NORMALIZATION |
188 | 5.27k | && BELOW_THRESHOLD (n, DIVREM_1_UNNORM_THRESHOLD)) |
189 | 0 | goto plain; |
190 | | |
191 | 5.27k | count_leading_zeros (cnt, d); |
192 | 5.27k | d <<= cnt; |
193 | 5.27k | r <<= cnt; |
194 | | |
195 | 5.27k | if (UDIV_NEEDS_NORMALIZATION |
196 | 5.27k | && BELOW_THRESHOLD (n, DIVREM_1_UNNORM_THRESHOLD)) |
197 | 0 | { |
198 | 0 | mp_limb_t nshift; |
199 | 0 | if (un != 0) |
200 | 0 | { |
201 | 0 | n1 = up[un - 1] << GMP_NAIL_BITS; |
202 | 0 | r |= (n1 >> (GMP_LIMB_BITS - cnt)); |
203 | 0 | for (i = un - 2; i >= 0; i--) |
204 | 0 | { |
205 | 0 | n0 = up[i] << GMP_NAIL_BITS; |
206 | 0 | nshift = (n1 << cnt) | (n0 >> (GMP_NUMB_BITS - cnt)); |
207 | 0 | udiv_qrnnd (*qp, r, r, nshift, d); |
208 | 0 | r >>= GMP_NAIL_BITS; |
209 | 0 | qp--; |
210 | 0 | n1 = n0; |
211 | 0 | } |
212 | 0 | udiv_qrnnd (*qp, r, r, n1 << cnt, d); |
213 | 0 | r >>= GMP_NAIL_BITS; |
214 | 0 | qp--; |
215 | 0 | } |
216 | 0 | for (i = qxn - 1; i >= 0; i--) |
217 | 0 | { |
218 | 0 | udiv_qrnnd (*qp, r, r, CNST_LIMB(0), d); |
219 | 0 | r >>= GMP_NAIL_BITS; |
220 | 0 | qp--; |
221 | 0 | } |
222 | 0 | return r >> cnt; |
223 | 0 | } |
224 | 5.27k | else |
225 | 5.27k | { |
226 | 5.27k | mp_limb_t dinv, nshift; |
227 | 5.27k | invert_limb (dinv, d); |
228 | 5.27k | if (un != 0) |
229 | 5.27k | { |
230 | 5.27k | n1 = up[un - 1] << GMP_NAIL_BITS; |
231 | 5.27k | r |= (n1 >> (GMP_LIMB_BITS - cnt)); |
232 | 58.7k | for (i = un - 2; i >= 0; i--) |
233 | 53.4k | { |
234 | 53.4k | n0 = up[i] << GMP_NAIL_BITS; |
235 | 53.4k | nshift = (n1 << cnt) | (n0 >> (GMP_NUMB_BITS - cnt)); |
236 | 53.4k | udiv_qrnnd_preinv (*qp, r, r, nshift, d, dinv); |
237 | 53.4k | r >>= GMP_NAIL_BITS; |
238 | 53.4k | qp--; |
239 | 53.4k | n1 = n0; |
240 | 53.4k | } |
241 | 5.27k | udiv_qrnnd_preinv (*qp, r, r, n1 << cnt, d, dinv); |
242 | 5.27k | r >>= GMP_NAIL_BITS; |
243 | 5.27k | qp--; |
244 | 5.27k | } |
245 | 5.27k | for (i = qxn - 1; i >= 0; i--) |
246 | 0 | { |
247 | 0 | udiv_qrnnd_preinv (*qp, r, r, CNST_LIMB(0), d, dinv); |
248 | 0 | r >>= GMP_NAIL_BITS; |
249 | 0 | qp--; |
250 | 0 | } |
251 | 5.27k | return r >> cnt; |
252 | 5.27k | } |
253 | 5.27k | } |
254 | 8.37k | } |