/src/gmp-6.2.1/mpn/compute_powtab.c
Line | Count | Source |
1 | | /* mpn_compute_powtab. |
2 | | |
3 | | Contributed to the GNU project by Torbjorn Granlund. |
4 | | |
5 | | THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES. IT IS ONLY |
6 | | SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST |
7 | | GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE. |
8 | | |
9 | | Copyright 1991-2017 Free Software Foundation, Inc. |
10 | | |
11 | | This file is part of the GNU MP Library. |
12 | | |
13 | | The GNU MP Library is free software; you can redistribute it and/or modify |
14 | | it under the terms of either: |
15 | | |
16 | | * the GNU Lesser General Public License as published by the Free |
17 | | Software Foundation; either version 3 of the License, or (at your |
18 | | option) any later version. |
19 | | |
20 | | or |
21 | | |
22 | | * the GNU General Public License as published by the Free Software |
23 | | Foundation; either version 2 of the License, or (at your option) any |
24 | | later version. |
25 | | |
26 | | or both in parallel, as here. |
27 | | |
28 | | The GNU MP Library is distributed in the hope that it will be useful, but |
29 | | WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY |
30 | | or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
31 | | for more details. |
32 | | |
33 | | You should have received copies of the GNU General Public License and the |
34 | | GNU Lesser General Public License along with the GNU MP Library. If not, |
35 | | see https://www.gnu.org/licenses/. */ |
36 | | |
37 | | /* |
38 | | CAVEATS: |
39 | | * The exptab and powtab vectors are in opposite orders. Probably OK. |
40 | | * Consider getting rid of exptab, doing bit ops on the un argument instead. |
41 | | * Consider rounding greatest power slightly upwards to save adjustments. |
42 | | * In powtab_decide, consider computing cost from just the 2-3 largest |
43 | | operands, since smaller operand contribute little. This makes most sense |
44 | | if exptab is suppressed. |
45 | | */ |
46 | | |
47 | | #include "gmp-impl.h" |
48 | | |
49 | | #ifndef DIV_1_VS_MUL_1_PERCENT |
50 | | #define DIV_1_VS_MUL_1_PERCENT 150 |
51 | | #endif |
52 | | |
53 | | #define SET_powers_t(dest, ptr, size, dib, b, sh) \ |
54 | 10.0k | do { \ |
55 | 10.0k | dest.p = ptr; \ |
56 | 10.0k | dest.n = size; \ |
57 | 10.0k | dest.digits_in_base = dib; \ |
58 | 10.0k | dest.base = b; \ |
59 | 10.0k | dest.shift = sh; \ |
60 | 10.0k | } while (0) |
61 | | |
62 | | #if DIV_1_VS_MUL_1_PERCENT > 120 |
63 | | #define HAVE_mpn_compute_powtab_mul 1 |
64 | | static void |
65 | | mpn_compute_powtab_mul (powers_t *powtab, mp_ptr powtab_mem, mp_size_t un, |
66 | | int base, const size_t *exptab, size_t n_pows) |
67 | 1.14k | { |
68 | 1.14k | mp_size_t n; |
69 | 1.14k | mp_ptr p, t; |
70 | 1.14k | mp_limb_t cy; |
71 | 1.14k | long start_idx; |
72 | 1.14k | int c; |
73 | 1.14k | mp_size_t shift; |
74 | 1.14k | long pi; |
75 | | |
76 | 1.14k | mp_limb_t big_base = mp_bases[base].big_base; |
77 | 1.14k | int chars_per_limb = mp_bases[base].chars_per_limb; |
78 | | |
79 | 1.14k | mp_ptr powtab_mem_ptr = powtab_mem; |
80 | | |
81 | 1.14k | size_t digits_in_base = chars_per_limb; |
82 | | |
83 | 1.14k | powers_t *pt = powtab; |
84 | | |
85 | 1.14k | p = powtab_mem_ptr; |
86 | 1.14k | powtab_mem_ptr += 1; |
87 | 1.14k | p[0] = big_base; |
88 | | |
89 | 1.14k | SET_powers_t (pt[0], p, 1, digits_in_base, base, 0); |
90 | 1.14k | pt++; |
91 | | |
92 | 1.14k | t = powtab_mem_ptr; |
93 | 1.14k | powtab_mem_ptr += 2; |
94 | 1.14k | t[1] = mpn_mul_1 (t, p, 1, big_base); |
95 | 1.14k | n = 2; |
96 | | |
97 | 1.14k | digits_in_base *= 2; |
98 | | |
99 | 1.14k | c = t[0] == 0; |
100 | 1.14k | t += c; |
101 | 1.14k | n -= c; |
102 | 1.14k | shift = c; |
103 | | |
104 | 1.14k | SET_powers_t (pt[0], t, n, digits_in_base, base, shift); |
105 | 1.14k | p = t; |
106 | 1.14k | pt++; |
107 | | |
108 | 1.14k | if (exptab[0] == ((size_t) chars_per_limb << n_pows)) |
109 | 36 | { |
110 | 36 | start_idx = n_pows - 2; |
111 | 36 | } |
112 | 1.11k | else |
113 | 1.11k | { |
114 | 1.11k | if (((digits_in_base + chars_per_limb) << (n_pows-2)) <= exptab[0]) |
115 | 298 | { |
116 | | /* 3, sometimes adjusted to 4. */ |
117 | 298 | t = powtab_mem_ptr; |
118 | 298 | powtab_mem_ptr += 4; |
119 | 298 | t[n] = cy = mpn_mul_1 (t, p, n, big_base); |
120 | 298 | n += cy != 0;; |
121 | | |
122 | 298 | digits_in_base += chars_per_limb; |
123 | | |
124 | 298 | c = t[0] == 0; |
125 | 298 | t += c; |
126 | 298 | n -= c; |
127 | 298 | shift += c; |
128 | 298 | } |
129 | 814 | else |
130 | 814 | { |
131 | | /* 2 copy, will always become 3 with back-multiplication. */ |
132 | 814 | t = powtab_mem_ptr; |
133 | 814 | powtab_mem_ptr += 3; |
134 | 814 | t[0] = p[0]; |
135 | 814 | t[1] = p[1]; |
136 | 814 | } |
137 | | |
138 | 1.11k | SET_powers_t (pt[0], t, n, digits_in_base, base, shift); |
139 | 1.11k | p = t; |
140 | 1.11k | pt++; |
141 | 1.11k | start_idx = n_pows - 3; |
142 | 1.11k | } |
143 | | |
144 | 7.79k | for (pi = start_idx; pi >= 0; pi--) |
145 | 6.64k | { |
146 | 6.64k | t = powtab_mem_ptr; |
147 | 6.64k | powtab_mem_ptr += 2 * n + 2; |
148 | | |
149 | 6.64k | ASSERT (powtab_mem_ptr < powtab_mem + mpn_str_powtab_alloc (un)); |
150 | | |
151 | 6.64k | mpn_sqr (t, p, n); |
152 | | |
153 | 6.64k | digits_in_base *= 2; |
154 | 6.64k | n *= 2; |
155 | 6.64k | n -= t[n - 1] == 0; |
156 | 6.64k | shift *= 2; |
157 | | |
158 | 6.64k | c = t[0] == 0; |
159 | 6.64k | t += c; |
160 | 6.64k | n -= c; |
161 | 6.64k | shift += c; |
162 | | |
163 | | /* Adjust new value if it is too small as input to the next squaring. */ |
164 | 6.64k | if (((digits_in_base + chars_per_limb) << pi) <= exptab[0]) |
165 | 2.95k | { |
166 | 2.95k | t[n] = cy = mpn_mul_1 (t, t, n, big_base); |
167 | 2.95k | n += cy != 0; |
168 | | |
169 | 2.95k | digits_in_base += chars_per_limb; |
170 | | |
171 | 2.95k | c = t[0] == 0; |
172 | 2.95k | t += c; |
173 | 2.95k | n -= c; |
174 | 2.95k | shift += c; |
175 | 2.95k | } |
176 | | |
177 | 6.64k | SET_powers_t (pt[0], t, n, digits_in_base, base, shift); |
178 | | |
179 | | /* Adjust previous value if it is not at its target power. */ |
180 | 6.64k | if (pt[-1].digits_in_base < exptab[pi + 1]) |
181 | 5.69k | { |
182 | 5.69k | mp_size_t n = pt[-1].n; |
183 | 5.69k | mp_ptr p = pt[-1].p; |
184 | 5.69k | p[n] = cy = mpn_mul_1 (p, p, n, big_base); |
185 | 5.69k | n += cy != 0; |
186 | | |
187 | 5.69k | ASSERT (pt[-1].digits_in_base + chars_per_limb == exptab[pi + 1]); |
188 | 5.69k | pt[-1].digits_in_base = exptab[pi + 1]; |
189 | | |
190 | 5.69k | c = p[0] == 0; |
191 | 5.69k | pt[-1].p = p + c; |
192 | 5.69k | pt[-1].n = n - c; |
193 | 5.69k | pt[-1].shift += c; |
194 | 5.69k | } |
195 | | |
196 | 6.64k | p = t; |
197 | 6.64k | pt++; |
198 | 6.64k | } |
199 | 1.14k | } |
200 | | #endif |
201 | | |
202 | | #if DIV_1_VS_MUL_1_PERCENT < 275 |
203 | | #define HAVE_mpn_compute_powtab_div 1 |
204 | | static void |
205 | | mpn_compute_powtab_div (powers_t *powtab, mp_ptr powtab_mem, mp_size_t un, |
206 | | int base, const size_t *exptab, size_t n_pows) |
207 | | { |
208 | | mp_ptr p, t; |
209 | | |
210 | | mp_limb_t big_base = mp_bases[base].big_base; |
211 | | int chars_per_limb = mp_bases[base].chars_per_limb; |
212 | | |
213 | | mp_ptr powtab_mem_ptr = powtab_mem; |
214 | | |
215 | | size_t digits_in_base = chars_per_limb; |
216 | | |
217 | | powers_t *pt = powtab; |
218 | | |
219 | | mp_size_t n = 1; |
220 | | mp_size_t shift = 0; |
221 | | long pi; |
222 | | |
223 | | p = powtab_mem_ptr; |
224 | | powtab_mem_ptr += 1; |
225 | | p[0] = big_base; |
226 | | |
227 | | SET_powers_t (pt[0], p, 1, digits_in_base, base, 0); |
228 | | pt++; |
229 | | |
230 | | for (pi = n_pows - 1; pi >= 0; pi--) |
231 | | { |
232 | | t = powtab_mem_ptr; |
233 | | powtab_mem_ptr += 2 * n; |
234 | | |
235 | | ASSERT (powtab_mem_ptr < powtab_mem + mpn_str_powtab_alloc (un)); |
236 | | |
237 | | mpn_sqr (t, p, n); |
238 | | n = 2 * n - 1; n += t[n] != 0; |
239 | | digits_in_base *= 2; |
240 | | |
241 | | if (digits_in_base != exptab[pi]) /* if ((((un - 1) >> pi) & 2) == 0) */ |
242 | | { |
243 | | #if HAVE_NATIVE_mpn_pi1_bdiv_q_1 || ! HAVE_NATIVE_mpn_divexact_1 |
244 | | if (__GMP_LIKELY (base == 10)) |
245 | | mpn_pi1_bdiv_q_1 (t, t, n, big_base >> MP_BASES_BIG_BASE_CTZ_10, |
246 | | MP_BASES_BIG_BASE_BINVERTED_10, |
247 | | MP_BASES_BIG_BASE_CTZ_10); |
248 | | else |
249 | | #endif |
250 | | /* FIXME: We could use _pi1 here if we add big_base_binverted and |
251 | | big_base_ctz fields to struct bases. That would add about 2 KiB |
252 | | to mp_bases.c. |
253 | | FIXME: Use mpn_bdiv_q_1 here when mpn_divexact_1 is converted to |
254 | | mpn_bdiv_q_1 for more machines. */ |
255 | | mpn_divexact_1 (t, t, n, big_base); |
256 | | |
257 | | n -= t[n - 1] == 0; |
258 | | digits_in_base -= chars_per_limb; |
259 | | } |
260 | | |
261 | | shift *= 2; |
262 | | /* Strip low zero limbs, but be careful to keep the result divisible by |
263 | | big_base. */ |
264 | | while (t[0] == 0 && (t[1] & ((big_base & -big_base) - 1)) == 0) |
265 | | { |
266 | | t++; |
267 | | n--; |
268 | | shift++; |
269 | | } |
270 | | p = t; |
271 | | |
272 | | SET_powers_t (pt[0], p, n, digits_in_base, base, shift); |
273 | | pt++; |
274 | | } |
275 | | |
276 | | /* Strip any remaining low zero limbs. */ |
277 | | pt -= n_pows + 1; |
278 | | for (pi = n_pows; pi >= 0; pi--) |
279 | | { |
280 | | mp_ptr t = pt[pi].p; |
281 | | mp_size_t shift = pt[pi].shift; |
282 | | mp_size_t n = pt[pi].n; |
283 | | int c; |
284 | | c = t[0] == 0; |
285 | | t += c; |
286 | | n -= c; |
287 | | shift += c; |
288 | | pt[pi].p = t; |
289 | | pt[pi].shift = shift; |
290 | | pt[pi].n = n; |
291 | | } |
292 | | } |
293 | | #endif |
294 | | |
295 | | static long |
296 | | powtab_decide (size_t *exptab, size_t un, int base) |
297 | 1.14k | { |
298 | 1.14k | int chars_per_limb = mp_bases[base].chars_per_limb; |
299 | 1.14k | long n_pows = 0; |
300 | 1.14k | size_t pn; |
301 | 10.0k | for (pn = (un + 1) >> 1; pn != 1; pn = (pn + 1) >> 1) |
302 | 8.90k | { |
303 | 8.90k | exptab[n_pows] = pn * chars_per_limb; |
304 | 8.90k | n_pows++; |
305 | 8.90k | } |
306 | 1.14k | exptab[n_pows] = chars_per_limb; |
307 | | |
308 | | #if HAVE_mpn_compute_powtab_mul && HAVE_mpn_compute_powtab_div |
309 | | { |
310 | | size_t pn = un - 1; |
311 | | size_t xn = (un + 1) >> 1; |
312 | | unsigned mcost = 1; |
313 | | unsigned dcost = 1; |
314 | | long i; |
315 | | for (i = n_pows - 2; i >= 0; i--) |
316 | | { |
317 | | size_t pow = (pn >> (i + 1)) + 1; |
318 | | |
319 | | if (pow & 1) |
320 | | dcost += pow; |
321 | | |
322 | | if (xn != (pow << i)) |
323 | | { |
324 | | if (pow > 2 && (pow & 1) == 0) |
325 | | mcost += 2 * pow; |
326 | | else |
327 | | mcost += pow; |
328 | | } |
329 | | else |
330 | | { |
331 | | if (pow & 1) |
332 | | mcost += pow; |
333 | | } |
334 | | } |
335 | | |
336 | | dcost = dcost * DIV_1_VS_MUL_1_PERCENT / 100; |
337 | | |
338 | | if (mcost <= dcost) |
339 | | return n_pows; |
340 | | else |
341 | | return -n_pows; |
342 | | } |
343 | | #elif HAVE_mpn_compute_powtab_mul |
344 | 1.14k | return n_pows; |
345 | | #elif HAVE_mpn_compute_powtab_div |
346 | | return -n_pows; |
347 | | #else |
348 | | #error "no powtab function available" |
349 | | #endif |
350 | 1.14k | } |
351 | | |
352 | | size_t |
353 | | mpn_compute_powtab (powers_t *powtab, mp_ptr powtab_mem, mp_size_t un, int base) |
354 | 1.14k | { |
355 | 1.14k | size_t exptab[GMP_LIMB_BITS]; |
356 | | |
357 | 1.14k | long n_pows = powtab_decide (exptab, un, base); |
358 | | |
359 | | #if HAVE_mpn_compute_powtab_mul && HAVE_mpn_compute_powtab_div |
360 | | if (n_pows >= 0) |
361 | | { |
362 | | mpn_compute_powtab_mul (powtab, powtab_mem, un, base, exptab, n_pows); |
363 | | return n_pows; |
364 | | } |
365 | | else |
366 | | { |
367 | | mpn_compute_powtab_div (powtab, powtab_mem, un, base, exptab, -n_pows); |
368 | | return -n_pows; |
369 | | } |
370 | | #elif HAVE_mpn_compute_powtab_mul |
371 | 1.14k | ASSERT (n_pows > 0); |
372 | 1.14k | mpn_compute_powtab_mul (powtab, powtab_mem, un, base, exptab, n_pows); |
373 | 1.14k | return n_pows; |
374 | | #elif HAVE_mpn_compute_powtab_div |
375 | | ASSERT (n_pows < 0); |
376 | | mpn_compute_powtab_div (powtab, powtab_mem, un, base, exptab, -n_pows); |
377 | | return -n_pows; |
378 | | #else |
379 | | #error "no powtab function available" |
380 | | #endif |
381 | 1.14k | } |