/src/gmp/mpn/compute_powtab.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* mpn_compute_powtab. |
2 | | |
3 | | Contributed to the GNU project by Torbjorn Granlund. |
4 | | |
5 | | THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES. IT IS ONLY |
6 | | SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST |
7 | | GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE. |
8 | | |
9 | | Copyright 1991-2017 Free Software Foundation, Inc. |
10 | | |
11 | | This file is part of the GNU MP Library. |
12 | | |
13 | | The GNU MP Library is free software; you can redistribute it and/or modify |
14 | | it under the terms of either: |
15 | | |
16 | | * the GNU Lesser General Public License as published by the Free |
17 | | Software Foundation; either version 3 of the License, or (at your |
18 | | option) any later version. |
19 | | |
20 | | or |
21 | | |
22 | | * the GNU General Public License as published by the Free Software |
23 | | Foundation; either version 2 of the License, or (at your option) any |
24 | | later version. |
25 | | |
26 | | or both in parallel, as here. |
27 | | |
28 | | The GNU MP Library is distributed in the hope that it will be useful, but |
29 | | WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY |
30 | | or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
31 | | for more details. |
32 | | |
33 | | You should have received copies of the GNU General Public License and the |
34 | | GNU Lesser General Public License along with the GNU MP Library. If not, |
35 | | see https://www.gnu.org/licenses/. */ |
36 | | |
37 | | /* |
38 | | CAVEATS: |
39 | | * The exptab and powtab vectors are in opposite orders. Probably OK. |
40 | | * Consider getting rid of exptab, doing bit ops on the un argument instead. |
41 | | * Consider rounding greatest power slightly upwards to save adjustments. |
42 | | * In powtab_decide, consider computing cost from just the 2-3 largest |
43 | | operands, since smaller operand contribute little. This makes most sense |
44 | | if exptab is suppressed. |
45 | | */ |
46 | | |
47 | | #include "gmp-impl.h" |
48 | | |
49 | | #ifndef DIV_1_VS_MUL_1_PERCENT |
50 | | #define DIV_1_VS_MUL_1_PERCENT 150 |
51 | | #endif |
52 | | |
53 | | #define SET_powers_t(dest, ptr, size, dib, b, sh) \ |
54 | 0 | do { \ |
55 | 0 | dest.p = ptr; \ |
56 | 0 | dest.n = size; \ |
57 | 0 | dest.digits_in_base = dib; \ |
58 | 0 | dest.base = b; \ |
59 | 0 | dest.shift = sh; \ |
60 | 0 | } while (0) |
61 | | |
62 | | #if DIV_1_VS_MUL_1_PERCENT > 120 |
63 | | #define HAVE_mpn_compute_powtab_mul 1 |
64 | | static void |
65 | | mpn_compute_powtab_mul (powers_t *powtab, mp_ptr powtab_mem, mp_size_t un, |
66 | | int base, const size_t *exptab, size_t n_pows) |
67 | 0 | { |
68 | 0 | mp_size_t n; |
69 | 0 | mp_ptr p, t; |
70 | 0 | mp_limb_t cy; |
71 | 0 | long start_idx; |
72 | 0 | int c; |
73 | |
|
74 | 0 | mp_limb_t big_base = mp_bases[base].big_base; |
75 | 0 | int chars_per_limb = mp_bases[base].chars_per_limb; |
76 | |
|
77 | 0 | mp_ptr powtab_mem_ptr = powtab_mem; |
78 | |
|
79 | 0 | size_t digits_in_base = chars_per_limb; |
80 | |
|
81 | 0 | powers_t *pt = powtab; |
82 | |
|
83 | 0 | p = powtab_mem_ptr; |
84 | 0 | powtab_mem_ptr += 1; |
85 | 0 | p[0] = big_base; |
86 | |
|
87 | 0 | SET_powers_t (pt[0], p, 1, digits_in_base, base, 0); |
88 | 0 | pt++; |
89 | |
|
90 | 0 | t = powtab_mem_ptr; |
91 | 0 | powtab_mem_ptr += 2; |
92 | 0 | t[1] = mpn_mul_1 (t, p, 1, big_base); |
93 | 0 | n = 2; |
94 | |
|
95 | 0 | digits_in_base *= 2; |
96 | |
|
97 | 0 | c = t[0] == 0; |
98 | 0 | t += c; |
99 | 0 | n -= c; |
100 | 0 | mp_size_t shift = c; |
101 | |
|
102 | 0 | SET_powers_t (pt[0], t, n, digits_in_base, base, shift); |
103 | 0 | p = t; |
104 | 0 | pt++; |
105 | |
|
106 | 0 | if (exptab[0] == ((size_t) chars_per_limb << n_pows)) |
107 | 0 | { |
108 | 0 | start_idx = n_pows - 2; |
109 | 0 | } |
110 | 0 | else |
111 | 0 | { |
112 | 0 | if (((digits_in_base + chars_per_limb) << (n_pows-2)) <= exptab[0]) |
113 | 0 | { |
114 | | /* 3, sometimes adjusted to 4. */ |
115 | 0 | t = powtab_mem_ptr; |
116 | 0 | powtab_mem_ptr += 4; |
117 | 0 | t[n] = cy = mpn_mul_1 (t, p, n, big_base); |
118 | 0 | n += cy != 0;; |
119 | |
|
120 | 0 | digits_in_base += chars_per_limb; |
121 | |
|
122 | 0 | c = t[0] == 0; |
123 | 0 | t += c; |
124 | 0 | n -= c; |
125 | 0 | shift += c; |
126 | 0 | } |
127 | 0 | else |
128 | 0 | { |
129 | | /* 2 copy, will always become 3 with back-multiplication. */ |
130 | 0 | t = powtab_mem_ptr; |
131 | 0 | powtab_mem_ptr += 3; |
132 | 0 | t[0] = p[0]; |
133 | 0 | t[1] = p[1]; |
134 | 0 | } |
135 | |
|
136 | 0 | SET_powers_t (pt[0], t, n, digits_in_base, base, shift); |
137 | 0 | p = t; |
138 | 0 | pt++; |
139 | 0 | start_idx = n_pows - 3; |
140 | 0 | } |
141 | |
|
142 | 0 | for (long pi = start_idx; pi >= 0; pi--) |
143 | 0 | { |
144 | 0 | t = powtab_mem_ptr; |
145 | 0 | powtab_mem_ptr += 2 * n + 2; |
146 | |
|
147 | 0 | ASSERT (powtab_mem_ptr < powtab_mem + mpn_str_powtab_alloc (un)); |
148 | |
|
149 | 0 | mpn_sqr (t, p, n); |
150 | |
|
151 | 0 | digits_in_base *= 2; |
152 | 0 | n *= 2; |
153 | 0 | n -= t[n - 1] == 0; |
154 | 0 | shift *= 2; |
155 | |
|
156 | 0 | c = t[0] == 0; |
157 | 0 | t += c; |
158 | 0 | n -= c; |
159 | 0 | shift += c; |
160 | | |
161 | | /* Adjust new value if it is too small as input to the next squaring. */ |
162 | 0 | if (((digits_in_base + chars_per_limb) << pi) <= exptab[0]) |
163 | 0 | { |
164 | 0 | t[n] = cy = mpn_mul_1 (t, t, n, big_base); |
165 | 0 | n += cy != 0; |
166 | |
|
167 | 0 | digits_in_base += chars_per_limb; |
168 | |
|
169 | 0 | c = t[0] == 0; |
170 | 0 | t += c; |
171 | 0 | n -= c; |
172 | 0 | shift += c; |
173 | 0 | } |
174 | |
|
175 | 0 | SET_powers_t (pt[0], t, n, digits_in_base, base, shift); |
176 | | |
177 | | /* Adjust previous value if it is not at its target power. */ |
178 | 0 | if (pt[-1].digits_in_base < exptab[pi + 1]) |
179 | 0 | { |
180 | 0 | mp_size_t n = pt[-1].n; |
181 | 0 | mp_ptr p = pt[-1].p; |
182 | 0 | p[n] = cy = mpn_mul_1 (p, p, n, big_base); |
183 | 0 | n += cy != 0; |
184 | |
|
185 | 0 | ASSERT (pt[-1].digits_in_base + chars_per_limb == exptab[pi + 1]); |
186 | 0 | pt[-1].digits_in_base = exptab[pi + 1]; |
187 | |
|
188 | 0 | c = p[0] == 0; |
189 | 0 | pt[-1].p = p + c; |
190 | 0 | pt[-1].n = n - c; |
191 | 0 | pt[-1].shift += c; |
192 | 0 | } |
193 | |
|
194 | 0 | p = t; |
195 | 0 | pt++; |
196 | 0 | } |
197 | 0 | } |
198 | | #endif |
199 | | |
200 | | #if DIV_1_VS_MUL_1_PERCENT < 275 |
201 | | #define HAVE_mpn_compute_powtab_div 1 |
202 | | static void |
203 | | mpn_compute_powtab_div (powers_t *powtab, mp_ptr powtab_mem, mp_size_t un, |
204 | | int base, const size_t *exptab, size_t n_pows) |
205 | | { |
206 | | mp_ptr p, t; |
207 | | |
208 | | mp_limb_t big_base = mp_bases[base].big_base; |
209 | | int chars_per_limb = mp_bases[base].chars_per_limb; |
210 | | |
211 | | mp_ptr powtab_mem_ptr = powtab_mem; |
212 | | |
213 | | size_t digits_in_base = chars_per_limb; |
214 | | |
215 | | powers_t *pt = powtab; |
216 | | |
217 | | p = powtab_mem_ptr; |
218 | | powtab_mem_ptr += 1; |
219 | | p[0] = big_base; |
220 | | |
221 | | SET_powers_t (pt[0], p, 1, digits_in_base, base, 0); |
222 | | pt++; |
223 | | |
224 | | mp_size_t n = 1; |
225 | | mp_size_t shift = 0; |
226 | | for (long pi = n_pows - 1; pi >= 0; pi--) |
227 | | { |
228 | | t = powtab_mem_ptr; |
229 | | powtab_mem_ptr += 2 * n; |
230 | | |
231 | | ASSERT (powtab_mem_ptr < powtab_mem + mpn_str_powtab_alloc (un)); |
232 | | |
233 | | mpn_sqr (t, p, n); |
234 | | n = 2 * n - 1; n += t[n] != 0; |
235 | | digits_in_base *= 2; |
236 | | |
237 | | if (digits_in_base != exptab[pi]) /* if ((((un - 1) >> pi) & 2) == 0) */ |
238 | | { |
239 | | #if HAVE_NATIVE_mpn_pi1_bdiv_q_1 || ! HAVE_NATIVE_mpn_divexact_1 |
240 | | if (__GMP_LIKELY (base == 10)) |
241 | | mpn_pi1_bdiv_q_1 (t, t, n, big_base >> MP_BASES_BIG_BASE_CTZ_10, |
242 | | MP_BASES_BIG_BASE_BINVERTED_10, |
243 | | MP_BASES_BIG_BASE_CTZ_10); |
244 | | else |
245 | | #endif |
246 | | /* FIXME: We could use _pi1 here if we add big_base_binverted and |
247 | | big_base_ctz fields to struct bases. That would add about 2 KiB |
248 | | to mp_bases.c. |
249 | | FIXME: Use mpn_bdiv_q_1 here when mpn_divexact_1 is converted to |
250 | | mpn_bdiv_q_1 for more machines. */ |
251 | | mpn_divexact_1 (t, t, n, big_base); |
252 | | |
253 | | n -= t[n - 1] == 0; |
254 | | digits_in_base -= chars_per_limb; |
255 | | } |
256 | | |
257 | | shift *= 2; |
258 | | /* Strip low zero limbs, but be careful to keep the result divisible by |
259 | | big_base. */ |
260 | | while (t[0] == 0 && (t[1] & ((big_base & -big_base) - 1)) == 0) |
261 | | { |
262 | | t++; |
263 | | n--; |
264 | | shift++; |
265 | | } |
266 | | p = t; |
267 | | |
268 | | SET_powers_t (pt[0], p, n, digits_in_base, base, shift); |
269 | | pt++; |
270 | | } |
271 | | |
272 | | /* Strip any remaining low zero limbs. */ |
273 | | pt -= n_pows + 1; |
274 | | for (long pi = n_pows; pi >= 0; pi--) |
275 | | { |
276 | | mp_ptr t = pt[pi].p; |
277 | | mp_size_t shift = pt[pi].shift; |
278 | | mp_size_t n = pt[pi].n; |
279 | | int c; |
280 | | c = t[0] == 0; |
281 | | t += c; |
282 | | n -= c; |
283 | | shift += c; |
284 | | pt[pi].p = t; |
285 | | pt[pi].shift = shift; |
286 | | pt[pi].n = n; |
287 | | } |
288 | | } |
289 | | #endif |
290 | | |
291 | | static long |
292 | | powtab_decide (size_t *exptab, size_t un, int base) |
293 | 0 | { |
294 | 0 | int chars_per_limb = mp_bases[base].chars_per_limb; |
295 | 0 | long n_pows = 0; |
296 | 0 | for (size_t pn = (un + 1) >> 1; pn != 1; pn = (pn + 1) >> 1) |
297 | 0 | { |
298 | 0 | exptab[n_pows] = pn * chars_per_limb; |
299 | 0 | n_pows++; |
300 | 0 | } |
301 | 0 | exptab[n_pows] = chars_per_limb; |
302 | |
|
303 | | #if HAVE_mpn_compute_powtab_mul && HAVE_mpn_compute_powtab_div |
304 | | size_t pn = un - 1; |
305 | | size_t xn = (un + 1) >> 1; |
306 | | unsigned mcost = 1; |
307 | | unsigned dcost = 1; |
308 | | for (long i = n_pows - 2; i >= 0; i--) |
309 | | { |
310 | | size_t pow = (pn >> (i + 1)) + 1; |
311 | | |
312 | | if (pow & 1) |
313 | | dcost += pow; |
314 | | |
315 | | if (xn != (pow << i)) |
316 | | { |
317 | | if (pow > 2 && (pow & 1) == 0) |
318 | | mcost += 2 * pow; |
319 | | else |
320 | | mcost += pow; |
321 | | } |
322 | | else |
323 | | { |
324 | | if (pow & 1) |
325 | | mcost += pow; |
326 | | } |
327 | | } |
328 | | |
329 | | dcost = dcost * DIV_1_VS_MUL_1_PERCENT / 100; |
330 | | |
331 | | if (mcost <= dcost) |
332 | | return n_pows; |
333 | | else |
334 | | return -n_pows; |
335 | | #elif HAVE_mpn_compute_powtab_mul |
336 | | return n_pows; |
337 | | #elif HAVE_mpn_compute_powtab_div |
338 | | return -n_pows; |
339 | | #else |
340 | | #error "no powtab function available" |
341 | | #endif |
342 | 0 | } |
343 | | |
344 | | size_t |
345 | | mpn_compute_powtab (powers_t *powtab, mp_ptr powtab_mem, mp_size_t un, int base) |
346 | 0 | { |
347 | 0 | size_t exptab[GMP_LIMB_BITS]; |
348 | |
|
349 | 0 | long n_pows = powtab_decide (exptab, un, base); |
350 | |
|
351 | | #if HAVE_mpn_compute_powtab_mul && HAVE_mpn_compute_powtab_div |
352 | | if (n_pows >= 0) |
353 | | { |
354 | | mpn_compute_powtab_mul (powtab, powtab_mem, un, base, exptab, n_pows); |
355 | | return n_pows; |
356 | | } |
357 | | else |
358 | | { |
359 | | mpn_compute_powtab_div (powtab, powtab_mem, un, base, exptab, -n_pows); |
360 | | return -n_pows; |
361 | | } |
362 | | #elif HAVE_mpn_compute_powtab_mul |
363 | 0 | ASSERT (n_pows > 0); |
364 | 0 | mpn_compute_powtab_mul (powtab, powtab_mem, un, base, exptab, n_pows); |
365 | 0 | return n_pows; |
366 | | #elif HAVE_mpn_compute_powtab_div |
367 | | ASSERT (n_pows < 0); |
368 | | mpn_compute_powtab_div (powtab, powtab_mem, un, base, exptab, -n_pows); |
369 | | return -n_pows; |
370 | | #else |
371 | | #error "no powtab function available" |
372 | | #endif |
373 | 0 | } |