Line | Count | Source (jump to first uncovered line) |
1 | | /* mpn_mod_1(dividend_ptr, dividend_size, divisor_limb) -- |
2 | | Divide (DIVIDEND_PTR,,DIVIDEND_SIZE) by DIVISOR_LIMB. |
3 | | Return the single-limb remainder. |
4 | | There are no constraints on the value of the divisor. |
5 | | |
6 | | Copyright 1991, 1993, 1994, 1999, 2000, 2002, 2007-2009, 2012, 2020 |
7 | | Free Software Foundation, Inc. |
8 | | |
9 | | This file is part of the GNU MP Library. |
10 | | |
11 | | The GNU MP Library is free software; you can redistribute it and/or modify |
12 | | it under the terms of either: |
13 | | |
14 | | * the GNU Lesser General Public License as published by the Free |
15 | | Software Foundation; either version 3 of the License, or (at your |
16 | | option) any later version. |
17 | | |
18 | | or |
19 | | |
20 | | * the GNU General Public License as published by the Free Software |
21 | | Foundation; either version 2 of the License, or (at your option) any |
22 | | later version. |
23 | | |
24 | | or both in parallel, as here. |
25 | | |
26 | | The GNU MP Library is distributed in the hope that it will be useful, but |
27 | | WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY |
28 | | or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
29 | | for more details. |
30 | | |
31 | | You should have received copies of the GNU General Public License and the |
32 | | GNU Lesser General Public License along with the GNU MP Library. If not, |
33 | | see https://www.gnu.org/licenses/. */ |
34 | | |
35 | | #include "gmp-impl.h" |
36 | | #include "longlong.h" |
37 | | |
38 | | |
39 | | /* The size where udiv_qrnnd_preinv should be used rather than udiv_qrnnd, |
40 | | meaning the quotient size where that should happen, the quotient size |
41 | | being how many udiv divisions will be done. |
42 | | |
43 | | The default is to use preinv always, CPUs where this doesn't suit have |
44 | | tuned thresholds. Note in particular that preinv should certainly be |
45 | | used if that's the only division available (USE_PREINV_ALWAYS). */ |
46 | | |
47 | | #ifndef MOD_1_NORM_THRESHOLD |
48 | | #define MOD_1_NORM_THRESHOLD 0 |
49 | | #endif |
50 | | |
51 | | #ifndef MOD_1_UNNORM_THRESHOLD |
52 | | #define MOD_1_UNNORM_THRESHOLD 0 |
53 | | #endif |
54 | | |
55 | | #ifndef MOD_1U_TO_MOD_1_1_THRESHOLD |
56 | | #define MOD_1U_TO_MOD_1_1_THRESHOLD MP_SIZE_T_MAX /* default is not to use mpn_mod_1s */ |
57 | | #endif |
58 | | |
59 | | #ifndef MOD_1N_TO_MOD_1_1_THRESHOLD |
60 | | #define MOD_1N_TO_MOD_1_1_THRESHOLD MP_SIZE_T_MAX /* default is not to use mpn_mod_1s */ |
61 | | #endif |
62 | | |
63 | | #ifndef MOD_1_1_TO_MOD_1_2_THRESHOLD |
64 | | #define MOD_1_1_TO_MOD_1_2_THRESHOLD 10 |
65 | | #endif |
66 | | |
67 | | #ifndef MOD_1_2_TO_MOD_1_4_THRESHOLD |
68 | | #define MOD_1_2_TO_MOD_1_4_THRESHOLD 20 |
69 | | #endif |
70 | | |
71 | | #if TUNE_PROGRAM_BUILD && !HAVE_NATIVE_mpn_mod_1_1p |
72 | | /* Duplicates declarations in tune/speed.h */ |
73 | | mp_limb_t mpn_mod_1_1p_1 (mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t [4]); |
74 | | mp_limb_t mpn_mod_1_1p_2 (mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t [4]); |
75 | | |
76 | | void mpn_mod_1_1p_cps_1 (mp_limb_t [4], mp_limb_t); |
77 | | void mpn_mod_1_1p_cps_2 (mp_limb_t [4], mp_limb_t); |
78 | | |
79 | | #undef mpn_mod_1_1p |
80 | | #define mpn_mod_1_1p(ap, n, b, pre) \ |
81 | | (mod_1_1p_method == 1 ? mpn_mod_1_1p_1 (ap, n, b, pre) \ |
82 | | : (mod_1_1p_method == 2 ? mpn_mod_1_1p_2 (ap, n, b, pre) \ |
83 | | : __gmpn_mod_1_1p (ap, n, b, pre))) |
84 | | |
85 | | #undef mpn_mod_1_1p_cps |
86 | | #define mpn_mod_1_1p_cps(pre, b) \ |
87 | | (mod_1_1p_method == 1 ? mpn_mod_1_1p_cps_1 (pre, b) \ |
88 | | : (mod_1_1p_method == 2 ? mpn_mod_1_1p_cps_2 (pre, b) \ |
89 | | : __gmpn_mod_1_1p_cps (pre, b))) |
90 | | #endif /* TUNE_PROGRAM_BUILD && !HAVE_NATIVE_mpn_mod_1_1p */ |
91 | | |
92 | | |
93 | | /* The comments in mpn/generic/divrem_1.c apply here too. |
94 | | |
95 | | As noted in the algorithms section of the manual, the shifts in the loop |
96 | | for the unnorm case can be avoided by calculating r = a%(d*2^n), followed |
97 | | by a final (r*2^n)%(d*2^n). In fact if it happens that a%(d*2^n) can |
98 | | skip a division where (a*2^n)%(d*2^n) can't then there's the same number |
99 | | of divide steps, though how often that happens depends on the assumed |
100 | | distributions of dividend and divisor. In any case this idea is left to |
101 | | CPU specific implementations to consider. */ |
102 | | |
103 | | static mp_limb_t |
104 | | mpn_mod_1_unnorm (mp_srcptr up, mp_size_t un, mp_limb_t d) |
105 | 0 | { |
106 | 0 | mp_size_t i; |
107 | 0 | mp_limb_t n1, n0, r; |
108 | 0 | mp_limb_t dummy; |
109 | 0 | int cnt; |
110 | |
|
111 | 0 | ASSERT (un > 0); |
112 | 0 | ASSERT (d != 0); |
113 | | |
114 | | /* Skip a division if high < divisor. Having the test here before |
115 | | normalizing will still skip as often as possible. */ |
116 | 0 | r = up[un - 1]; |
117 | 0 | if (r < d) |
118 | 0 | { |
119 | 0 | if (--un == 0) |
120 | 0 | return r; |
121 | 0 | } |
122 | 0 | else |
123 | 0 | r = 0; |
124 | | |
125 | 0 | d <<= GMP_NAIL_BITS; |
126 | | |
127 | | /* If udiv_qrnnd doesn't need a normalized divisor, can use the simple |
128 | | code above. */ |
129 | 0 | if (! UDIV_NEEDS_NORMALIZATION |
130 | 0 | && BELOW_THRESHOLD (un, MOD_1_UNNORM_THRESHOLD)) |
131 | 0 | { |
132 | 0 | for (i = un - 1; i >= 0; i--) |
133 | 0 | { |
134 | 0 | n0 = up[i] << GMP_NAIL_BITS; |
135 | 0 | udiv_qrnnd (dummy, r, r, n0, d); |
136 | 0 | r >>= GMP_NAIL_BITS; |
137 | 0 | } |
138 | 0 | return r; |
139 | 0 | } |
140 | | |
141 | 0 | count_leading_zeros (cnt, d); |
142 | 0 | d <<= cnt; |
143 | |
|
144 | 0 | n1 = up[un - 1] << GMP_NAIL_BITS; |
145 | 0 | r = (r << cnt) | (n1 >> (GMP_LIMB_BITS - cnt)); |
146 | |
|
147 | 0 | if (UDIV_NEEDS_NORMALIZATION |
148 | 0 | && BELOW_THRESHOLD (un, MOD_1_UNNORM_THRESHOLD)) |
149 | 0 | { |
150 | 0 | mp_limb_t nshift; |
151 | 0 | for (i = un - 2; i >= 0; i--) |
152 | 0 | { |
153 | 0 | n0 = up[i] << GMP_NAIL_BITS; |
154 | 0 | nshift = (n1 << cnt) | (n0 >> (GMP_NUMB_BITS - cnt)); |
155 | 0 | udiv_qrnnd (dummy, r, r, nshift, d); |
156 | 0 | r >>= GMP_NAIL_BITS; |
157 | 0 | n1 = n0; |
158 | 0 | } |
159 | 0 | udiv_qrnnd (dummy, r, r, n1 << cnt, d); |
160 | 0 | r >>= GMP_NAIL_BITS; |
161 | 0 | return r >> cnt; |
162 | 0 | } |
163 | 0 | else |
164 | 0 | { |
165 | 0 | mp_limb_t inv, nshift; |
166 | 0 | invert_limb (inv, d); |
167 | |
|
168 | 0 | for (i = un - 2; i >= 0; i--) |
169 | 0 | { |
170 | 0 | n0 = up[i] << GMP_NAIL_BITS; |
171 | 0 | nshift = (n1 << cnt) | (n0 >> (GMP_NUMB_BITS - cnt)); |
172 | 0 | udiv_rnnd_preinv (r, r, nshift, d, inv); |
173 | 0 | r >>= GMP_NAIL_BITS; |
174 | 0 | n1 = n0; |
175 | 0 | } |
176 | 0 | udiv_rnnd_preinv (r, r, n1 << cnt, d, inv); |
177 | 0 | r >>= GMP_NAIL_BITS; |
178 | 0 | return r >> cnt; |
179 | 0 | } |
180 | 0 | } |
181 | | |
182 | | static mp_limb_t |
183 | | mpn_mod_1_norm (mp_srcptr up, mp_size_t un, mp_limb_t d) |
184 | 0 | { |
185 | 0 | mp_size_t i; |
186 | 0 | mp_limb_t n0, r; |
187 | 0 | mp_limb_t dummy; |
188 | |
|
189 | 0 | ASSERT (un > 0); |
190 | |
|
191 | 0 | d <<= GMP_NAIL_BITS; |
192 | |
|
193 | 0 | ASSERT (d & GMP_LIMB_HIGHBIT); |
194 | | |
195 | | /* High limb is initial remainder, possibly with one subtract of |
196 | | d to get r<d. */ |
197 | 0 | r = up[un - 1] << GMP_NAIL_BITS; |
198 | 0 | if (r >= d) |
199 | 0 | r -= d; |
200 | 0 | r >>= GMP_NAIL_BITS; |
201 | 0 | un--; |
202 | 0 | if (un == 0) |
203 | 0 | return r; |
204 | | |
205 | 0 | if (BELOW_THRESHOLD (un, MOD_1_NORM_THRESHOLD)) |
206 | 0 | { |
207 | 0 | for (i = un - 1; i >= 0; i--) |
208 | 0 | { |
209 | 0 | n0 = up[i] << GMP_NAIL_BITS; |
210 | 0 | udiv_qrnnd (dummy, r, r, n0, d); |
211 | 0 | r >>= GMP_NAIL_BITS; |
212 | 0 | } |
213 | 0 | return r; |
214 | 0 | } |
215 | 0 | else |
216 | 0 | { |
217 | 0 | mp_limb_t inv; |
218 | 0 | invert_limb (inv, d); |
219 | 0 | for (i = un - 1; i >= 0; i--) |
220 | 0 | { |
221 | 0 | n0 = up[i] << GMP_NAIL_BITS; |
222 | 0 | udiv_rnnd_preinv (r, r, n0, d, inv); |
223 | 0 | r >>= GMP_NAIL_BITS; |
224 | 0 | } |
225 | 0 | return r; |
226 | 0 | } |
227 | 0 | } |
228 | | |
229 | | mp_limb_t |
230 | | mpn_mod_1 (mp_srcptr ap, mp_size_t n, mp_limb_t b) |
231 | 0 | { |
232 | 0 | ASSERT (n >= 0); |
233 | 0 | ASSERT (b != 0); |
234 | | |
235 | | /* Should this be handled at all? Rely on callers? Note un==0 is currently |
236 | | required by mpz/fdiv_r_ui.c and possibly other places. */ |
237 | 0 | if (n == 0) |
238 | 0 | return 0; |
239 | | |
240 | 0 | if (UNLIKELY ((b & GMP_NUMB_HIGHBIT) != 0)) |
241 | 0 | { |
242 | 0 | if (BELOW_THRESHOLD (n, MOD_1N_TO_MOD_1_1_THRESHOLD)) |
243 | 0 | { |
244 | 0 | return mpn_mod_1_norm (ap, n, b); |
245 | 0 | } |
246 | 0 | else |
247 | 0 | { |
248 | 0 | mp_limb_t pre[4]; |
249 | 0 | mpn_mod_1_1p_cps (pre, b); |
250 | 0 | return mpn_mod_1_1p (ap, n, b, pre); |
251 | 0 | } |
252 | 0 | } |
253 | 0 | else |
254 | 0 | { |
255 | 0 | if (BELOW_THRESHOLD (n, MOD_1U_TO_MOD_1_1_THRESHOLD)) |
256 | 0 | { |
257 | 0 | return mpn_mod_1_unnorm (ap, n, b); |
258 | 0 | } |
259 | 0 | else if (BELOW_THRESHOLD (n, MOD_1_1_TO_MOD_1_2_THRESHOLD)) |
260 | 0 | { |
261 | 0 | mp_limb_t pre[4]; |
262 | 0 | mpn_mod_1_1p_cps (pre, b); |
263 | 0 | return mpn_mod_1_1p (ap, n, b << pre[1], pre); |
264 | 0 | } |
265 | 0 | else if (BELOW_THRESHOLD (n, MOD_1_2_TO_MOD_1_4_THRESHOLD) || UNLIKELY (b > GMP_NUMB_MASK / 4)) |
266 | 0 | { |
267 | 0 | mp_limb_t pre[5]; |
268 | 0 | mpn_mod_1s_2p_cps (pre, b); |
269 | 0 | return mpn_mod_1s_2p (ap, n, b << pre[1], pre); |
270 | 0 | } |
271 | 0 | else |
272 | 0 | { |
273 | 0 | mp_limb_t pre[7]; |
274 | 0 | mpn_mod_1s_4p_cps (pre, b); |
275 | 0 | return mpn_mod_1s_4p (ap, n, b << pre[1], pre); |
276 | 0 | } |
277 | 0 | } |
278 | 0 | } |