/src/gmp-6.2.1/mpn/mod_1.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* mpn_mod_1(dividend_ptr, dividend_size, divisor_limb) -- |
2 | | Divide (DIVIDEND_PTR,,DIVIDEND_SIZE) by DIVISOR_LIMB. |
3 | | Return the single-limb remainder. |
4 | | There are no constraints on the value of the divisor. |
5 | | |
6 | | Copyright 1991, 1993, 1994, 1999, 2000, 2002, 2007-2009, 2012 Free Software |
7 | | Foundation, Inc. |
8 | | |
9 | | This file is part of the GNU MP Library. |
10 | | |
11 | | The GNU MP Library is free software; you can redistribute it and/or modify |
12 | | it under the terms of either: |
13 | | |
14 | | * the GNU Lesser General Public License as published by the Free |
15 | | Software Foundation; either version 3 of the License, or (at your |
16 | | option) any later version. |
17 | | |
18 | | or |
19 | | |
20 | | * the GNU General Public License as published by the Free Software |
21 | | Foundation; either version 2 of the License, or (at your option) any |
22 | | later version. |
23 | | |
24 | | or both in parallel, as here. |
25 | | |
26 | | The GNU MP Library is distributed in the hope that it will be useful, but |
27 | | WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY |
28 | | or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
29 | | for more details. |
30 | | |
31 | | You should have received copies of the GNU General Public License and the |
32 | | GNU Lesser General Public License along with the GNU MP Library. If not, |
33 | | see https://www.gnu.org/licenses/. */ |
34 | | |
35 | | #include "gmp-impl.h" |
36 | | #include "longlong.h" |
37 | | |
38 | | |
39 | | /* The size where udiv_qrnnd_preinv should be used rather than udiv_qrnnd, |
40 | | meaning the quotient size where that should happen, the quotient size |
41 | | being how many udiv divisions will be done. |
42 | | |
43 | | The default is to use preinv always, CPUs where this doesn't suit have |
44 | | tuned thresholds. Note in particular that preinv should certainly be |
45 | | used if that's the only division available (USE_PREINV_ALWAYS). */ |
46 | | |
47 | | #ifndef MOD_1_NORM_THRESHOLD |
48 | | #define MOD_1_NORM_THRESHOLD 0 |
49 | | #endif |
50 | | |
51 | | #ifndef MOD_1_UNNORM_THRESHOLD |
52 | | #define MOD_1_UNNORM_THRESHOLD 0 |
53 | | #endif |
54 | | |
55 | | #ifndef MOD_1U_TO_MOD_1_1_THRESHOLD |
56 | | #define MOD_1U_TO_MOD_1_1_THRESHOLD MP_SIZE_T_MAX /* default is not to use mpn_mod_1s */ |
57 | | #endif |
58 | | |
59 | | #ifndef MOD_1N_TO_MOD_1_1_THRESHOLD |
60 | | #define MOD_1N_TO_MOD_1_1_THRESHOLD MP_SIZE_T_MAX /* default is not to use mpn_mod_1s */ |
61 | | #endif |
62 | | |
63 | | #ifndef MOD_1_1_TO_MOD_1_2_THRESHOLD |
64 | | #define MOD_1_1_TO_MOD_1_2_THRESHOLD 10 |
65 | | #endif |
66 | | |
67 | | #ifndef MOD_1_2_TO_MOD_1_4_THRESHOLD |
68 | | #define MOD_1_2_TO_MOD_1_4_THRESHOLD 20 |
69 | | #endif |
70 | | |
71 | | #if TUNE_PROGRAM_BUILD && !HAVE_NATIVE_mpn_mod_1_1p |
72 | | /* Duplicates declarations in tune/speed.h */ |
73 | | mp_limb_t mpn_mod_1_1p_1 (mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t [4]); |
74 | | mp_limb_t mpn_mod_1_1p_2 (mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t [4]); |
75 | | |
76 | | void mpn_mod_1_1p_cps_1 (mp_limb_t [4], mp_limb_t); |
77 | | void mpn_mod_1_1p_cps_2 (mp_limb_t [4], mp_limb_t); |
78 | | |
79 | | #undef mpn_mod_1_1p |
80 | | #define mpn_mod_1_1p(ap, n, b, pre) \ |
81 | | (mod_1_1p_method == 1 ? mpn_mod_1_1p_1 (ap, n, b, pre) \ |
82 | | : (mod_1_1p_method == 2 ? mpn_mod_1_1p_2 (ap, n, b, pre) \ |
83 | | : __gmpn_mod_1_1p (ap, n, b, pre))) |
84 | | |
85 | | #undef mpn_mod_1_1p_cps |
86 | | #define mpn_mod_1_1p_cps(pre, b) \ |
87 | | (mod_1_1p_method == 1 ? mpn_mod_1_1p_cps_1 (pre, b) \ |
88 | | : (mod_1_1p_method == 2 ? mpn_mod_1_1p_cps_2 (pre, b) \ |
89 | | : __gmpn_mod_1_1p_cps (pre, b))) |
90 | | #endif /* TUNE_PROGRAM_BUILD && !HAVE_NATIVE_mpn_mod_1_1p */ |
91 | | |
92 | | |
93 | | /* The comments in mpn/generic/divrem_1.c apply here too. |
94 | | |
95 | | As noted in the algorithms section of the manual, the shifts in the loop |
96 | | for the unnorm case can be avoided by calculating r = a%(d*2^n), followed |
97 | | by a final (r*2^n)%(d*2^n). In fact if it happens that a%(d*2^n) can |
98 | | skip a division where (a*2^n)%(d*2^n) can't then there's the same number |
99 | | of divide steps, though how often that happens depends on the assumed |
100 | | distributions of dividend and divisor. In any case this idea is left to |
101 | | CPU specific implementations to consider. */ |
102 | | |
103 | | static mp_limb_t |
104 | | mpn_mod_1_unnorm (mp_srcptr up, mp_size_t un, mp_limb_t d) |
105 | 0 | { |
106 | 0 | mp_size_t i; |
107 | 0 | mp_limb_t n1, n0, r; |
108 | 0 | mp_limb_t dummy; |
109 | 0 | int cnt; |
110 | |
|
111 | 0 | ASSERT (un > 0); |
112 | 0 | ASSERT (d != 0); |
113 | |
|
114 | 0 | d <<= GMP_NAIL_BITS; |
115 | | |
116 | | /* Skip a division if high < divisor. Having the test here before |
117 | | normalizing will still skip as often as possible. */ |
118 | 0 | r = up[un - 1] << GMP_NAIL_BITS; |
119 | 0 | if (r < d) |
120 | 0 | { |
121 | 0 | r >>= GMP_NAIL_BITS; |
122 | 0 | un--; |
123 | 0 | if (un == 0) |
124 | 0 | return r; |
125 | 0 | } |
126 | 0 | else |
127 | 0 | r = 0; |
128 | | |
129 | | /* If udiv_qrnnd doesn't need a normalized divisor, can use the simple |
130 | | code above. */ |
131 | 0 | if (! UDIV_NEEDS_NORMALIZATION |
132 | 0 | && BELOW_THRESHOLD (un, MOD_1_UNNORM_THRESHOLD)) |
133 | 0 | { |
134 | 0 | for (i = un - 1; i >= 0; i--) |
135 | 0 | { |
136 | 0 | n0 = up[i] << GMP_NAIL_BITS; |
137 | 0 | udiv_qrnnd (dummy, r, r, n0, d); |
138 | 0 | r >>= GMP_NAIL_BITS; |
139 | 0 | } |
140 | 0 | return r; |
141 | 0 | } |
142 | | |
143 | 0 | count_leading_zeros (cnt, d); |
144 | 0 | d <<= cnt; |
145 | |
|
146 | 0 | n1 = up[un - 1] << GMP_NAIL_BITS; |
147 | 0 | r = (r << cnt) | (n1 >> (GMP_LIMB_BITS - cnt)); |
148 | |
|
149 | 0 | if (UDIV_NEEDS_NORMALIZATION |
150 | 0 | && BELOW_THRESHOLD (un, MOD_1_UNNORM_THRESHOLD)) |
151 | 0 | { |
152 | 0 | mp_limb_t nshift; |
153 | 0 | for (i = un - 2; i >= 0; i--) |
154 | 0 | { |
155 | 0 | n0 = up[i] << GMP_NAIL_BITS; |
156 | 0 | nshift = (n1 << cnt) | (n0 >> (GMP_NUMB_BITS - cnt)); |
157 | 0 | udiv_qrnnd (dummy, r, r, nshift, d); |
158 | 0 | r >>= GMP_NAIL_BITS; |
159 | 0 | n1 = n0; |
160 | 0 | } |
161 | 0 | udiv_qrnnd (dummy, r, r, n1 << cnt, d); |
162 | 0 | r >>= GMP_NAIL_BITS; |
163 | 0 | return r >> cnt; |
164 | 0 | } |
165 | 0 | else |
166 | 0 | { |
167 | 0 | mp_limb_t inv, nshift; |
168 | 0 | invert_limb (inv, d); |
169 | |
|
170 | 0 | for (i = un - 2; i >= 0; i--) |
171 | 0 | { |
172 | 0 | n0 = up[i] << GMP_NAIL_BITS; |
173 | 0 | nshift = (n1 << cnt) | (n0 >> (GMP_NUMB_BITS - cnt)); |
174 | 0 | udiv_rnnd_preinv (r, r, nshift, d, inv); |
175 | 0 | r >>= GMP_NAIL_BITS; |
176 | 0 | n1 = n0; |
177 | 0 | } |
178 | 0 | udiv_rnnd_preinv (r, r, n1 << cnt, d, inv); |
179 | 0 | r >>= GMP_NAIL_BITS; |
180 | 0 | return r >> cnt; |
181 | 0 | } |
182 | 0 | } |
183 | | |
184 | | static mp_limb_t |
185 | | mpn_mod_1_norm (mp_srcptr up, mp_size_t un, mp_limb_t d) |
186 | 0 | { |
187 | 0 | mp_size_t i; |
188 | 0 | mp_limb_t n0, r; |
189 | 0 | mp_limb_t dummy; |
190 | |
|
191 | 0 | ASSERT (un > 0); |
192 | |
|
193 | 0 | d <<= GMP_NAIL_BITS; |
194 | |
|
195 | 0 | ASSERT (d & GMP_LIMB_HIGHBIT); |
196 | | |
197 | | /* High limb is initial remainder, possibly with one subtract of |
198 | | d to get r<d. */ |
199 | 0 | r = up[un - 1] << GMP_NAIL_BITS; |
200 | 0 | if (r >= d) |
201 | 0 | r -= d; |
202 | 0 | r >>= GMP_NAIL_BITS; |
203 | 0 | un--; |
204 | 0 | if (un == 0) |
205 | 0 | return r; |
206 | | |
207 | 0 | if (BELOW_THRESHOLD (un, MOD_1_NORM_THRESHOLD)) |
208 | 0 | { |
209 | 0 | for (i = un - 1; i >= 0; i--) |
210 | 0 | { |
211 | 0 | n0 = up[i] << GMP_NAIL_BITS; |
212 | 0 | udiv_qrnnd (dummy, r, r, n0, d); |
213 | 0 | r >>= GMP_NAIL_BITS; |
214 | 0 | } |
215 | 0 | return r; |
216 | 0 | } |
217 | 0 | else |
218 | 0 | { |
219 | 0 | mp_limb_t inv; |
220 | 0 | invert_limb (inv, d); |
221 | 0 | for (i = un - 1; i >= 0; i--) |
222 | 0 | { |
223 | 0 | n0 = up[i] << GMP_NAIL_BITS; |
224 | 0 | udiv_rnnd_preinv (r, r, n0, d, inv); |
225 | 0 | r >>= GMP_NAIL_BITS; |
226 | 0 | } |
227 | 0 | return r; |
228 | 0 | } |
229 | 0 | } |
230 | | |
231 | | mp_limb_t |
232 | | mpn_mod_1 (mp_srcptr ap, mp_size_t n, mp_limb_t b) |
233 | 0 | { |
234 | 0 | ASSERT (n >= 0); |
235 | 0 | ASSERT (b != 0); |
236 | | |
237 | | /* Should this be handled at all? Rely on callers? Note un==0 is currently |
238 | | required by mpz/fdiv_r_ui.c and possibly other places. */ |
239 | 0 | if (n == 0) |
240 | 0 | return 0; |
241 | | |
242 | 0 | if (UNLIKELY ((b & GMP_NUMB_HIGHBIT) != 0)) |
243 | 0 | { |
244 | 0 | if (BELOW_THRESHOLD (n, MOD_1N_TO_MOD_1_1_THRESHOLD)) |
245 | 0 | { |
246 | 0 | return mpn_mod_1_norm (ap, n, b); |
247 | 0 | } |
248 | 0 | else |
249 | 0 | { |
250 | 0 | mp_limb_t pre[4]; |
251 | 0 | mpn_mod_1_1p_cps (pre, b); |
252 | 0 | return mpn_mod_1_1p (ap, n, b, pre); |
253 | 0 | } |
254 | 0 | } |
255 | 0 | else |
256 | 0 | { |
257 | 0 | if (BELOW_THRESHOLD (n, MOD_1U_TO_MOD_1_1_THRESHOLD)) |
258 | 0 | { |
259 | 0 | return mpn_mod_1_unnorm (ap, n, b); |
260 | 0 | } |
261 | 0 | else if (BELOW_THRESHOLD (n, MOD_1_1_TO_MOD_1_2_THRESHOLD)) |
262 | 0 | { |
263 | 0 | mp_limb_t pre[4]; |
264 | 0 | mpn_mod_1_1p_cps (pre, b); |
265 | 0 | return mpn_mod_1_1p (ap, n, b << pre[1], pre); |
266 | 0 | } |
267 | 0 | else if (BELOW_THRESHOLD (n, MOD_1_2_TO_MOD_1_4_THRESHOLD) || UNLIKELY (b > GMP_NUMB_MASK / 4)) |
268 | 0 | { |
269 | 0 | mp_limb_t pre[5]; |
270 | 0 | mpn_mod_1s_2p_cps (pre, b); |
271 | 0 | return mpn_mod_1s_2p (ap, n, b << pre[1], pre); |
272 | 0 | } |
273 | 0 | else |
274 | 0 | { |
275 | 0 | mp_limb_t pre[7]; |
276 | 0 | mpn_mod_1s_4p_cps (pre, b); |
277 | 0 | return mpn_mod_1s_4p (ap, n, b << pre[1], pre); |
278 | 0 | } |
279 | 0 | } |
280 | 0 | } |