/src/libgmp/mpn/compute_powtab.c
Line | Count | Source |
1 | | /* mpn_compute_powtab. |
2 | | |
3 | | Contributed to the GNU project by Torbjorn Granlund. |
4 | | |
5 | | THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES. IT IS ONLY |
6 | | SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST |
7 | | GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE. |
8 | | |
9 | | Copyright 1991-2017 Free Software Foundation, Inc. |
10 | | |
11 | | This file is part of the GNU MP Library. |
12 | | |
13 | | The GNU MP Library is free software; you can redistribute it and/or modify |
14 | | it under the terms of either: |
15 | | |
16 | | * the GNU Lesser General Public License as published by the Free |
17 | | Software Foundation; either version 3 of the License, or (at your |
18 | | option) any later version. |
19 | | |
20 | | or |
21 | | |
22 | | * the GNU General Public License as published by the Free Software |
23 | | Foundation; either version 2 of the License, or (at your option) any |
24 | | later version. |
25 | | |
26 | | or both in parallel, as here. |
27 | | |
28 | | The GNU MP Library is distributed in the hope that it will be useful, but |
29 | | WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY |
30 | | or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
31 | | for more details. |
32 | | |
33 | | You should have received copies of the GNU General Public License and the |
34 | | GNU Lesser General Public License along with the GNU MP Library. If not, |
35 | | see https://www.gnu.org/licenses/. */ |
36 | | |
37 | | /* |
38 | | CAVEATS: |
39 | | * The exptab and powtab vectors are in opposite orders. Probably OK. |
40 | | * Consider getting rid of exptab, doing bit ops on the un argument instead. |
41 | | * Consider rounding greatest power slightly upwards to save adjustments. |
42 | | * In powtab_decide, consider computing cost from just the 2-3 largest |
43 | | operands, since smaller operand contribute little. This makes most sense |
44 | | if exptab is suppressed. |
45 | | */ |
46 | | |
47 | | #include "gmp-impl.h" |
48 | | |
49 | | #ifndef DIV_1_VS_MUL_1_PERCENT |
50 | | #define DIV_1_VS_MUL_1_PERCENT 150 |
51 | | #endif |
52 | | |
53 | | #define SET_powers_t(dest, ptr, size, dib, b, sh) \ |
54 | 40.9k | do { \ |
55 | 40.9k | dest.p = ptr; \ |
56 | 40.9k | dest.n = size; \ |
57 | 40.9k | dest.digits_in_base = dib; \ |
58 | 40.9k | dest.base = b; \ |
59 | 40.9k | dest.shift = sh; \ |
60 | 40.9k | } while (0) |
61 | | |
62 | | #if DIV_1_VS_MUL_1_PERCENT > 120 |
63 | | #define HAVE_mpn_compute_powtab_mul 1 |
64 | | static void |
65 | | mpn_compute_powtab_mul (powers_t *powtab, mp_ptr powtab_mem, mp_size_t un, |
66 | | int base, const size_t *exptab, size_t n_pows) |
67 | 5.78k | { |
68 | 5.78k | mp_size_t n; |
69 | 5.78k | mp_ptr p, t; |
70 | 5.78k | mp_limb_t cy; |
71 | 5.78k | long start_idx; |
72 | 5.78k | int c; |
73 | | |
74 | 5.78k | mp_limb_t big_base = mp_bases[base].big_base; |
75 | 5.78k | int chars_per_limb = mp_bases[base].chars_per_limb; |
76 | | |
77 | 5.78k | mp_ptr powtab_mem_ptr = powtab_mem; |
78 | | |
79 | 5.78k | size_t digits_in_base = chars_per_limb; |
80 | | |
81 | 5.78k | powers_t *pt = powtab; |
82 | | |
83 | 5.78k | p = powtab_mem_ptr; |
84 | 5.78k | powtab_mem_ptr += 1; |
85 | 5.78k | p[0] = big_base; |
86 | | |
87 | 5.78k | SET_powers_t (pt[0], p, 1, digits_in_base, base, 0); |
88 | 5.78k | pt++; |
89 | | |
90 | 5.78k | t = powtab_mem_ptr; |
91 | 5.78k | powtab_mem_ptr += 2; |
92 | 5.78k | t[1] = mpn_mul_1 (t, p, 1, big_base); |
93 | 5.78k | n = 2; |
94 | | |
95 | 5.78k | digits_in_base *= 2; |
96 | | |
97 | 5.78k | c = t[0] == 0; |
98 | 5.78k | t += c; |
99 | 5.78k | n -= c; |
100 | 5.78k | mp_size_t shift = c; |
101 | | |
102 | 5.78k | SET_powers_t (pt[0], t, n, digits_in_base, base, shift); |
103 | 5.78k | p = t; |
104 | 5.78k | pt++; |
105 | | |
106 | 5.78k | if (exptab[0] == ((size_t) chars_per_limb << n_pows)) |
107 | 468 | { |
108 | 468 | start_idx = n_pows - 2; |
109 | 468 | } |
110 | 5.31k | else |
111 | 5.31k | { |
112 | 5.31k | if (((digits_in_base + chars_per_limb) << (n_pows-2)) <= exptab[0]) |
113 | 2.12k | { |
114 | | /* 3, sometimes adjusted to 4. */ |
115 | 2.12k | t = powtab_mem_ptr; |
116 | 2.12k | powtab_mem_ptr += 4; |
117 | 2.12k | t[n] = cy = mpn_mul_1 (t, p, n, big_base); |
118 | 2.12k | n += cy != 0;; |
119 | | |
120 | 2.12k | digits_in_base += chars_per_limb; |
121 | | |
122 | 2.12k | c = t[0] == 0; |
123 | 2.12k | t += c; |
124 | 2.12k | n -= c; |
125 | 2.12k | shift += c; |
126 | 2.12k | } |
127 | 3.18k | else |
128 | 3.18k | { |
129 | | /* 2 copy, will always become 3 with back-multiplication. */ |
130 | 3.18k | t = powtab_mem_ptr; |
131 | 3.18k | powtab_mem_ptr += 3; |
132 | 3.18k | t[0] = p[0]; |
133 | 3.18k | t[1] = p[1]; |
134 | 3.18k | } |
135 | | |
136 | 5.31k | SET_powers_t (pt[0], t, n, digits_in_base, base, shift); |
137 | 5.31k | p = t; |
138 | 5.31k | pt++; |
139 | 5.31k | start_idx = n_pows - 3; |
140 | 5.31k | } |
141 | | |
142 | 29.8k | for (long pi = start_idx; pi >= 0; pi--) |
143 | 24.0k | { |
144 | 24.0k | t = powtab_mem_ptr; |
145 | 24.0k | powtab_mem_ptr += 2 * n + 2; |
146 | | |
147 | 24.0k | ASSERT (powtab_mem_ptr < powtab_mem + mpn_str_powtab_alloc (un)); |
148 | | |
149 | 24.0k | mpn_sqr (t, p, n); |
150 | | |
151 | 24.0k | digits_in_base *= 2; |
152 | 24.0k | n *= 2; |
153 | 24.0k | n -= t[n - 1] == 0; |
154 | 24.0k | shift *= 2; |
155 | | |
156 | 24.0k | c = t[0] == 0; |
157 | 24.0k | t += c; |
158 | 24.0k | n -= c; |
159 | 24.0k | shift += c; |
160 | | |
161 | | /* Adjust new value if it is too small as input to the next squaring. */ |
162 | 24.0k | if (((digits_in_base + chars_per_limb) << pi) <= exptab[0]) |
163 | 9.32k | { |
164 | 9.32k | t[n] = cy = mpn_mul_1 (t, t, n, big_base); |
165 | 9.32k | n += cy != 0; |
166 | | |
167 | 9.32k | digits_in_base += chars_per_limb; |
168 | | |
169 | 9.32k | c = t[0] == 0; |
170 | 9.32k | t += c; |
171 | 9.32k | n -= c; |
172 | 9.32k | shift += c; |
173 | 9.32k | } |
174 | | |
175 | 24.0k | SET_powers_t (pt[0], t, n, digits_in_base, base, shift); |
176 | | |
177 | | /* Adjust previous value if it is not at its target power. */ |
178 | 24.0k | if (pt[-1].digits_in_base < exptab[pi + 1]) |
179 | 19.9k | { |
180 | 19.9k | mp_size_t n = pt[-1].n; |
181 | 19.9k | mp_ptr p = pt[-1].p; |
182 | 19.9k | p[n] = cy = mpn_mul_1 (p, p, n, big_base); |
183 | 19.9k | n += cy != 0; |
184 | | |
185 | 19.9k | ASSERT (pt[-1].digits_in_base + chars_per_limb == exptab[pi + 1]); |
186 | 19.9k | pt[-1].digits_in_base = exptab[pi + 1]; |
187 | | |
188 | 19.9k | c = p[0] == 0; |
189 | 19.9k | pt[-1].p = p + c; |
190 | 19.9k | pt[-1].n = n - c; |
191 | 19.9k | pt[-1].shift += c; |
192 | 19.9k | } |
193 | | |
194 | 24.0k | p = t; |
195 | 24.0k | pt++; |
196 | 24.0k | } |
197 | 5.78k | } |
198 | | #endif |
199 | | |
200 | | #if DIV_1_VS_MUL_1_PERCENT < 275 |
201 | | #define HAVE_mpn_compute_powtab_div 1 |
202 | | static void |
203 | | mpn_compute_powtab_div (powers_t *powtab, mp_ptr powtab_mem, mp_size_t un, |
204 | | int base, const size_t *exptab, size_t n_pows) |
205 | | { |
206 | | mp_ptr p, t; |
207 | | |
208 | | mp_limb_t big_base = mp_bases[base].big_base; |
209 | | int chars_per_limb = mp_bases[base].chars_per_limb; |
210 | | |
211 | | mp_ptr powtab_mem_ptr = powtab_mem; |
212 | | |
213 | | size_t digits_in_base = chars_per_limb; |
214 | | |
215 | | powers_t *pt = powtab; |
216 | | |
217 | | p = powtab_mem_ptr; |
218 | | powtab_mem_ptr += 1; |
219 | | p[0] = big_base; |
220 | | |
221 | | SET_powers_t (pt[0], p, 1, digits_in_base, base, 0); |
222 | | pt++; |
223 | | |
224 | | mp_size_t n = 1; |
225 | | mp_size_t shift = 0; |
226 | | for (long pi = n_pows - 1; pi >= 0; pi--) |
227 | | { |
228 | | t = powtab_mem_ptr; |
229 | | powtab_mem_ptr += 2 * n; |
230 | | |
231 | | ASSERT (powtab_mem_ptr < powtab_mem + mpn_str_powtab_alloc (un)); |
232 | | |
233 | | mpn_sqr (t, p, n); |
234 | | n = 2 * n - 1; n += t[n] != 0; |
235 | | digits_in_base *= 2; |
236 | | |
237 | | if (digits_in_base != exptab[pi]) /* if ((((un - 1) >> pi) & 2) == 0) */ |
238 | | { |
239 | | #if HAVE_NATIVE_mpn_pi1_bdiv_q_1 || ! HAVE_NATIVE_mpn_divexact_1 |
240 | | if (__GMP_LIKELY (base == 10)) |
241 | | mpn_pi1_bdiv_q_1 (t, t, n, big_base >> MP_BASES_BIG_BASE_CTZ_10, |
242 | | MP_BASES_BIG_BASE_BINVERTED_10, |
243 | | MP_BASES_BIG_BASE_CTZ_10); |
244 | | else |
245 | | #endif |
246 | | /* FIXME: We could use _pi1 here if we add big_base_binverted and |
247 | | big_base_ctz fields to struct bases. That would add about 2 KiB |
248 | | to mp_bases.c. |
249 | | FIXME: Use mpn_bdiv_q_1 here when mpn_divexact_1 is converted to |
250 | | mpn_bdiv_q_1 for more machines. */ |
251 | | mpn_divexact_1 (t, t, n, big_base); |
252 | | |
253 | | n -= t[n - 1] == 0; |
254 | | digits_in_base -= chars_per_limb; |
255 | | } |
256 | | |
257 | | shift *= 2; |
258 | | /* Strip low zero limbs, but be careful to keep the result divisible by |
259 | | big_base. */ |
260 | | while (t[0] == 0 && (t[1] & ((big_base & -big_base) - 1)) == 0) |
261 | | { |
262 | | t++; |
263 | | n--; |
264 | | shift++; |
265 | | } |
266 | | p = t; |
267 | | |
268 | | SET_powers_t (pt[0], p, n, digits_in_base, base, shift); |
269 | | pt++; |
270 | | } |
271 | | |
272 | | /* Strip any remaining low zero limbs. */ |
273 | | pt -= n_pows + 1; |
274 | | for (long pi = n_pows; pi >= 0; pi--) |
275 | | { |
276 | | mp_ptr t = pt[pi].p; |
277 | | mp_size_t shift = pt[pi].shift; |
278 | | mp_size_t n = pt[pi].n; |
279 | | int c; |
280 | | c = t[0] == 0; |
281 | | t += c; |
282 | | n -= c; |
283 | | shift += c; |
284 | | pt[pi].p = t; |
285 | | pt[pi].shift = shift; |
286 | | pt[pi].n = n; |
287 | | } |
288 | | } |
289 | | #endif |
290 | | |
291 | | static long |
292 | | powtab_decide (size_t *exptab, size_t un, int base) |
293 | 5.78k | { |
294 | 5.78k | int chars_per_limb = mp_bases[base].chars_per_limb; |
295 | 5.78k | long n_pows = 0; |
296 | 40.9k | for (size_t pn = (un + 1) >> 1; pn != 1; pn = (pn + 1) >> 1) |
297 | 35.1k | { |
298 | 35.1k | exptab[n_pows] = pn * chars_per_limb; |
299 | 35.1k | n_pows++; |
300 | 35.1k | } |
301 | 5.78k | exptab[n_pows] = chars_per_limb; |
302 | | |
303 | | #if HAVE_mpn_compute_powtab_mul && HAVE_mpn_compute_powtab_div |
304 | | size_t pn = un - 1; |
305 | | size_t xn = (un + 1) >> 1; |
306 | | unsigned mcost = 1; |
307 | | unsigned dcost = 1; |
308 | | for (long i = n_pows - 2; i >= 0; i--) |
309 | | { |
310 | | size_t pow = (pn >> (i + 1)) + 1; |
311 | | |
312 | | if (pow & 1) |
313 | | dcost += pow; |
314 | | |
315 | | if (xn != (pow << i)) |
316 | | { |
317 | | if (pow > 2 && (pow & 1) == 0) |
318 | | mcost += 2 * pow; |
319 | | else |
320 | | mcost += pow; |
321 | | } |
322 | | else |
323 | | { |
324 | | if (pow & 1) |
325 | | mcost += pow; |
326 | | } |
327 | | } |
328 | | |
329 | | dcost = dcost * DIV_1_VS_MUL_1_PERCENT / 100; |
330 | | |
331 | | if (mcost <= dcost) |
332 | | return n_pows; |
333 | | else |
334 | | return -n_pows; |
335 | | #elif HAVE_mpn_compute_powtab_mul |
336 | 5.78k | return n_pows; |
337 | | #elif HAVE_mpn_compute_powtab_div |
338 | | return -n_pows; |
339 | | #else |
340 | | #error "no powtab function available" |
341 | | #endif |
342 | 5.78k | } |
343 | | |
344 | | size_t |
345 | | mpn_compute_powtab (powers_t *powtab, mp_ptr powtab_mem, mp_size_t un, int base) |
346 | 5.78k | { |
347 | 5.78k | size_t exptab[GMP_LIMB_BITS]; |
348 | | |
349 | 5.78k | long n_pows = powtab_decide (exptab, un, base); |
350 | | |
351 | | #if HAVE_mpn_compute_powtab_mul && HAVE_mpn_compute_powtab_div |
352 | | if (n_pows >= 0) |
353 | | { |
354 | | mpn_compute_powtab_mul (powtab, powtab_mem, un, base, exptab, n_pows); |
355 | | return n_pows; |
356 | | } |
357 | | else |
358 | | { |
359 | | mpn_compute_powtab_div (powtab, powtab_mem, un, base, exptab, -n_pows); |
360 | | return -n_pows; |
361 | | } |
362 | | #elif HAVE_mpn_compute_powtab_mul |
363 | 5.78k | ASSERT (n_pows > 0); |
364 | 5.78k | mpn_compute_powtab_mul (powtab, powtab_mem, un, base, exptab, n_pows); |
365 | 5.78k | return n_pows; |
366 | | #elif HAVE_mpn_compute_powtab_div |
367 | | ASSERT (n_pows < 0); |
368 | | mpn_compute_powtab_div (powtab, powtab_mem, un, base, exptab, -n_pows); |
369 | | return -n_pows; |
370 | | #else |
371 | | #error "no powtab function available" |
372 | | #endif |
373 | 5.78k | } |