/src/gmp-6.2.1/mpn/get_str.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* mpn_get_str -- Convert {UP,USIZE} to a base BASE string in STR. |
2 | | |
3 | | Contributed to the GNU project by Torbjorn Granlund. |
4 | | |
5 | | THE FUNCTIONS IN THIS FILE, EXCEPT mpn_get_str, ARE INTERNAL WITH MUTABLE |
6 | | INTERFACES. IT IS ONLY SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES. |
7 | | IN FACT, IT IS ALMOST GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A |
8 | | FUTURE GNU MP RELEASE. |
9 | | |
10 | | Copyright 1991-2017 Free Software Foundation, Inc. |
11 | | |
12 | | This file is part of the GNU MP Library. |
13 | | |
14 | | The GNU MP Library is free software; you can redistribute it and/or modify |
15 | | it under the terms of either: |
16 | | |
17 | | * the GNU Lesser General Public License as published by the Free |
18 | | Software Foundation; either version 3 of the License, or (at your |
19 | | option) any later version. |
20 | | |
21 | | or |
22 | | |
23 | | * the GNU General Public License as published by the Free Software |
24 | | Foundation; either version 2 of the License, or (at your option) any |
25 | | later version. |
26 | | |
27 | | or both in parallel, as here. |
28 | | |
29 | | The GNU MP Library is distributed in the hope that it will be useful, but |
30 | | WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY |
31 | | or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
32 | | for more details. |
33 | | |
34 | | You should have received copies of the GNU General Public License and the |
35 | | GNU Lesser General Public License along with the GNU MP Library. If not, |
36 | | see https://www.gnu.org/licenses/. */ |
37 | | |
38 | | #include "gmp-impl.h" |
39 | | #include "longlong.h" |
40 | | |
41 | | /* Conversion of U {up,un} to a string in base b. Internally, we convert to |
42 | | base B = b^m, the largest power of b that fits a limb. Basic algorithms: |
43 | | |
44 | | A) Divide U repeatedly by B, generating a quotient and remainder, until the |
45 | | quotient becomes zero. The remainders hold the converted digits. Digits |
46 | | come out from right to left. (Used in mpn_bc_get_str.) |
47 | | |
48 | | B) Divide U by b^g, for g such that 1/b <= U/b^g < 1, generating a fraction. |
49 | | Then develop digits by multiplying the fraction repeatedly by b. Digits |
50 | | come out from left to right. (Currently not used herein, except for in |
51 | | code for converting single limbs to individual digits.) |
52 | | |
53 | | C) Compute B^1, B^2, B^4, ..., B^s, for s such that B^s is just above |
54 | | sqrt(U). Then divide U by B^s, generating quotient and remainder. |
55 | | Recursively convert the quotient, then the remainder, using the |
56 | | precomputed powers. Digits come out from left to right. (Used in |
57 | | mpn_dc_get_str.) |
58 | | |
59 | | When using algorithm C, algorithm B might be suitable for basecase code, |
60 | | since the required b^g power will be readily accessible. |
61 | | |
62 | | Optimization ideas: |
63 | | 1. The recursive function of (C) could use less temporary memory. The powtab |
64 | | allocation could be trimmed with some computation, and the tmp area could |
65 | | be reduced, or perhaps eliminated if up is reused for both quotient and |
66 | | remainder (it is currently used just for remainder). |
67 | | 2. Store the powers of (C) in normalized form, with the normalization count. |
68 | | Quotients will usually need to be left-shifted before each divide, and |
69 | | remainders will either need to be left-shifted of right-shifted. |
70 | | 3. In the code for developing digits from a single limb, we could avoid using |
71 | | a full umul_ppmm except for the first (or first few) digits, provided base |
72 | | is even. Subsequent digits can be developed using plain multiplication. |
73 | | (This saves on register-starved machines (read x86) and on all machines |
74 | | that generate the upper product half using a separate instruction (alpha, |
75 | | powerpc, IA-64) or lacks such support altogether (sparc64, hppa64). |
76 | | 4. Separate mpn_dc_get_str basecase code from code for small conversions. The |
77 | | former code will have the exact right power readily available in the |
78 | | powtab parameter for dividing the current number into a fraction. Convert |
79 | | that using algorithm B. |
80 | | 5. Completely avoid division. Compute the inverses of the powers now in |
81 | | powtab instead of the actual powers. |
82 | | 6. Decrease powtab allocation for even bases. E.g. for base 10 we could save |
83 | | about 30% (1-log(5)/log(10)). |
84 | | |
85 | | Basic structure of (C): |
86 | | mpn_get_str: |
87 | | if POW2_P (n) |
88 | | ... |
89 | | else |
90 | | if (un < GET_STR_PRECOMPUTE_THRESHOLD) |
91 | | mpn_bx_get_str (str, base, up, un); |
92 | | else |
93 | | precompute_power_tables |
94 | | mpn_dc_get_str |
95 | | |
96 | | mpn_dc_get_str: |
97 | | mpn_tdiv_qr |
98 | | if (qn < GET_STR_DC_THRESHOLD) |
99 | | mpn_bc_get_str |
100 | | else |
101 | | mpn_dc_get_str |
102 | | if (rn < GET_STR_DC_THRESHOLD) |
103 | | mpn_bc_get_str |
104 | | else |
105 | | mpn_dc_get_str |
106 | | |
107 | | |
108 | | The reason for the two threshold values is the cost of |
109 | | precompute_power_tables. GET_STR_PRECOMPUTE_THRESHOLD will be |
110 | | considerably larger than GET_STR_DC_THRESHOLD. */ |
111 | | |
112 | | |
113 | | /* The x86s and m68020 have a quotient and remainder "div" instruction and |
114 | | gcc recognises an adjacent "/" and "%" can be combined using that. |
115 | | Elsewhere "/" and "%" are either separate instructions, or separate |
116 | | libgcc calls (which unfortunately gcc as of version 3.0 doesn't combine). |
117 | | A multiply and subtract should be faster than a "%" in those cases. */ |
118 | | #if HAVE_HOST_CPU_FAMILY_x86 \ |
119 | | || HAVE_HOST_CPU_m68020 \ |
120 | | || HAVE_HOST_CPU_m68030 \ |
121 | | || HAVE_HOST_CPU_m68040 \ |
122 | | || HAVE_HOST_CPU_m68060 \ |
123 | | || HAVE_HOST_CPU_m68360 /* CPU32 */ |
124 | | #define udiv_qrnd_unnorm(q,r,n,d) \ |
125 | | do { \ |
126 | | mp_limb_t __q = (n) / (d); \ |
127 | | mp_limb_t __r = (n) % (d); \ |
128 | | (q) = __q; \ |
129 | | (r) = __r; \ |
130 | | } while (0) |
131 | | #else |
132 | | #define udiv_qrnd_unnorm(q,r,n,d) \ |
133 | 9.82k | do { \ |
134 | 9.82k | mp_limb_t __q = (n) / (d); \ |
135 | 9.82k | mp_limb_t __r = (n) - __q*(d); \ |
136 | 9.82k | (q) = __q; \ |
137 | 9.82k | (r) = __r; \ |
138 | 9.82k | } while (0) |
139 | | #endif |
140 | | |
141 | | |
142 | | /* Convert {up,un} to a string in base base, and put the result in str. |
143 | | Generate len characters, possibly padding with zeros to the left. If len is |
144 | | zero, generate as many characters as required. Return a pointer immediately |
145 | | after the last digit of the result string. Complexity is O(un^2); intended |
146 | | for small conversions. */ |
147 | | static unsigned char * |
148 | | mpn_bc_get_str (unsigned char *str, size_t len, |
149 | | mp_ptr up, mp_size_t un, int base) |
150 | 1.88k | { |
151 | 1.88k | mp_limb_t rl, ul; |
152 | 1.88k | unsigned char *s; |
153 | 1.88k | size_t l; |
154 | | /* Allocate memory for largest possible string, given that we only get here |
155 | | for operands with un < GET_STR_PRECOMPUTE_THRESHOLD and that the smallest |
156 | | base is 3. 7/11 is an approximation to 1/log2(3). */ |
157 | | #if TUNE_PROGRAM_BUILD |
158 | | #define BUF_ALLOC (GET_STR_THRESHOLD_LIMIT * GMP_LIMB_BITS * 7 / 11) |
159 | | #else |
160 | 3.76k | #define BUF_ALLOC (GET_STR_PRECOMPUTE_THRESHOLD * GMP_LIMB_BITS * 7 / 11) |
161 | 1.88k | #endif |
162 | 1.88k | unsigned char buf[BUF_ALLOC]; |
163 | | #if TUNE_PROGRAM_BUILD |
164 | | mp_limb_t rp[GET_STR_THRESHOLD_LIMIT]; |
165 | | #else |
166 | 1.88k | mp_limb_t rp[GET_STR_PRECOMPUTE_THRESHOLD]; |
167 | 1.88k | #endif |
168 | | |
169 | 1.88k | if (base == 10) |
170 | 1.88k | { |
171 | | /* Special case code for base==10 so that the compiler has a chance to |
172 | | optimize things. */ |
173 | | |
174 | 1.88k | MPN_COPY (rp + 1, up, un); |
175 | | |
176 | 1.88k | s = buf + BUF_ALLOC; |
177 | 10.7k | while (un > 1) |
178 | 8.83k | { |
179 | 8.83k | int i; |
180 | 8.83k | mp_limb_t frac, digit; |
181 | 8.83k | MPN_DIVREM_OR_PREINV_DIVREM_1 (rp, (mp_size_t) 1, rp + 1, un, |
182 | 8.83k | MP_BASES_BIG_BASE_10, |
183 | 8.83k | MP_BASES_BIG_BASE_INVERTED_10, |
184 | 8.83k | MP_BASES_NORMALIZATION_STEPS_10); |
185 | 8.83k | un -= rp[un] == 0; |
186 | 8.83k | frac = (rp[0] + 1) << GMP_NAIL_BITS; |
187 | 8.83k | s -= MP_BASES_CHARS_PER_LIMB_10; |
188 | | #if HAVE_HOST_CPU_FAMILY_x86 |
189 | | /* The code below turns out to be a bit slower for x86 using gcc. |
190 | | Use plain code. */ |
191 | | i = MP_BASES_CHARS_PER_LIMB_10; |
192 | | do |
193 | | { |
194 | | umul_ppmm (digit, frac, frac, 10); |
195 | | *s++ = digit; |
196 | | } |
197 | | while (--i); |
198 | | #else |
199 | | /* Use the fact that 10 in binary is 1010, with the lowest bit 0. |
200 | | After a few umul_ppmm, we will have accumulated enough low zeros |
201 | | to use a plain multiply. */ |
202 | 8.83k | if (MP_BASES_NORMALIZATION_STEPS_10 == 0) |
203 | 8.83k | { |
204 | 8.83k | umul_ppmm (digit, frac, frac, 10); |
205 | 8.83k | *s++ = digit; |
206 | 8.83k | } |
207 | 8.83k | if (MP_BASES_NORMALIZATION_STEPS_10 <= 1) |
208 | 8.83k | { |
209 | 8.83k | umul_ppmm (digit, frac, frac, 10); |
210 | 8.83k | *s++ = digit; |
211 | 8.83k | } |
212 | 8.83k | if (MP_BASES_NORMALIZATION_STEPS_10 <= 2) |
213 | 8.83k | { |
214 | 8.83k | umul_ppmm (digit, frac, frac, 10); |
215 | 8.83k | *s++ = digit; |
216 | 8.83k | } |
217 | 8.83k | if (MP_BASES_NORMALIZATION_STEPS_10 <= 3) |
218 | 8.83k | { |
219 | 8.83k | umul_ppmm (digit, frac, frac, 10); |
220 | 8.83k | *s++ = digit; |
221 | 8.83k | } |
222 | 8.83k | i = (MP_BASES_CHARS_PER_LIMB_10 - ((MP_BASES_NORMALIZATION_STEPS_10 < 4) |
223 | 8.83k | ? (4-MP_BASES_NORMALIZATION_STEPS_10) |
224 | 8.83k | : 0)); |
225 | 8.83k | frac = (frac + 0xf) >> 4; |
226 | 8.83k | do |
227 | 132k | { |
228 | 132k | frac *= 10; |
229 | 132k | digit = frac >> (GMP_LIMB_BITS - 4); |
230 | 132k | *s++ = digit; |
231 | 132k | frac &= (~(mp_limb_t) 0) >> 4; |
232 | 132k | } |
233 | 132k | while (--i); |
234 | 8.83k | #endif |
235 | 8.83k | s -= MP_BASES_CHARS_PER_LIMB_10; |
236 | 8.83k | } |
237 | | |
238 | 1.88k | ul = rp[1]; |
239 | 11.7k | while (ul != 0) |
240 | 9.82k | { |
241 | 9.82k | udiv_qrnd_unnorm (ul, rl, ul, 10); |
242 | 9.82k | *--s = rl; |
243 | 9.82k | } |
244 | 1.88k | } |
245 | 0 | else /* not base 10 */ |
246 | 0 | { |
247 | 0 | unsigned chars_per_limb; |
248 | 0 | mp_limb_t big_base, big_base_inverted; |
249 | 0 | unsigned normalization_steps; |
250 | |
|
251 | 0 | chars_per_limb = mp_bases[base].chars_per_limb; |
252 | 0 | big_base = mp_bases[base].big_base; |
253 | 0 | big_base_inverted = mp_bases[base].big_base_inverted; |
254 | 0 | count_leading_zeros (normalization_steps, big_base); |
255 | |
|
256 | 0 | MPN_COPY (rp + 1, up, un); |
257 | |
|
258 | 0 | s = buf + BUF_ALLOC; |
259 | 0 | while (un > 1) |
260 | 0 | { |
261 | 0 | int i; |
262 | 0 | mp_limb_t frac; |
263 | 0 | MPN_DIVREM_OR_PREINV_DIVREM_1 (rp, (mp_size_t) 1, rp + 1, un, |
264 | 0 | big_base, big_base_inverted, |
265 | 0 | normalization_steps); |
266 | 0 | un -= rp[un] == 0; |
267 | 0 | frac = (rp[0] + 1) << GMP_NAIL_BITS; |
268 | 0 | s -= chars_per_limb; |
269 | 0 | i = chars_per_limb; |
270 | 0 | do |
271 | 0 | { |
272 | 0 | mp_limb_t digit; |
273 | 0 | umul_ppmm (digit, frac, frac, base); |
274 | 0 | *s++ = digit; |
275 | 0 | } |
276 | 0 | while (--i); |
277 | 0 | s -= chars_per_limb; |
278 | 0 | } |
279 | |
|
280 | 0 | ul = rp[1]; |
281 | 0 | while (ul != 0) |
282 | 0 | { |
283 | 0 | udiv_qrnd_unnorm (ul, rl, ul, base); |
284 | 0 | *--s = rl; |
285 | 0 | } |
286 | 0 | } |
287 | | |
288 | 1.88k | l = buf + BUF_ALLOC - s; |
289 | 1.88k | while (l < len) |
290 | 0 | { |
291 | 0 | *str++ = 0; |
292 | 0 | len--; |
293 | 0 | } |
294 | 179k | while (l != 0) |
295 | 177k | { |
296 | 177k | *str++ = *s++; |
297 | 177k | l--; |
298 | 177k | } |
299 | 1.88k | return str; |
300 | 1.88k | } |
301 | | |
302 | | |
303 | | /* Convert {UP,UN} to a string with a base as represented in POWTAB, and put |
304 | | the string in STR. Generate LEN characters, possibly padding with zeros to |
305 | | the left. If LEN is zero, generate as many characters as required. |
306 | | Return a pointer immediately after the last digit of the result string. |
307 | | This uses divide-and-conquer and is intended for large conversions. */ |
308 | | static unsigned char * |
309 | | mpn_dc_get_str (unsigned char *str, size_t len, |
310 | | mp_ptr up, mp_size_t un, |
311 | | const powers_t *powtab, mp_ptr tmp) |
312 | 0 | { |
313 | 0 | if (BELOW_THRESHOLD (un, GET_STR_DC_THRESHOLD)) |
314 | 0 | { |
315 | 0 | if (un != 0) |
316 | 0 | str = mpn_bc_get_str (str, len, up, un, powtab->base); |
317 | 0 | else |
318 | 0 | { |
319 | 0 | while (len != 0) |
320 | 0 | { |
321 | 0 | *str++ = 0; |
322 | 0 | len--; |
323 | 0 | } |
324 | 0 | } |
325 | 0 | } |
326 | 0 | else |
327 | 0 | { |
328 | 0 | mp_ptr pwp, qp, rp; |
329 | 0 | mp_size_t pwn, qn; |
330 | 0 | mp_size_t sn; |
331 | |
|
332 | 0 | pwp = powtab->p; |
333 | 0 | pwn = powtab->n; |
334 | 0 | sn = powtab->shift; |
335 | |
|
336 | 0 | if (un < pwn + sn || (un == pwn + sn && mpn_cmp (up + sn, pwp, un - sn) < 0)) |
337 | 0 | { |
338 | 0 | str = mpn_dc_get_str (str, len, up, un, powtab - 1, tmp); |
339 | 0 | } |
340 | 0 | else |
341 | 0 | { |
342 | 0 | qp = tmp; /* (un - pwn + 1) limbs for qp */ |
343 | 0 | rp = up; /* pwn limbs for rp; overwrite up area */ |
344 | |
|
345 | 0 | mpn_tdiv_qr (qp, rp + sn, 0L, up + sn, un - sn, pwp, pwn); |
346 | 0 | qn = un - sn - pwn; qn += qp[qn] != 0; /* quotient size */ |
347 | |
|
348 | 0 | ASSERT (qn < pwn + sn || (qn == pwn + sn && mpn_cmp (qp + sn, pwp, pwn) < 0)); |
349 | |
|
350 | 0 | if (len != 0) |
351 | 0 | len = len - powtab->digits_in_base; |
352 | |
|
353 | 0 | str = mpn_dc_get_str (str, len, qp, qn, powtab - 1, tmp + qn); |
354 | 0 | str = mpn_dc_get_str (str, powtab->digits_in_base, rp, pwn + sn, powtab - 1, tmp); |
355 | 0 | } |
356 | 0 | } |
357 | 0 | return str; |
358 | 0 | } |
359 | | |
360 | | /* There are no leading zeros on the digits generated at str, but that's not |
361 | | currently a documented feature. The current mpz_out_str and mpz_get_str |
362 | | rely on it. */ |
363 | | |
364 | | size_t |
365 | | mpn_get_str (unsigned char *str, int base, mp_ptr up, mp_size_t un) |
366 | 1.89k | { |
367 | 1.89k | mp_ptr powtab_mem; |
368 | 1.89k | powers_t powtab[GMP_LIMB_BITS]; |
369 | 1.89k | int pi; |
370 | 1.89k | size_t out_len; |
371 | 1.89k | mp_ptr tmp; |
372 | 1.89k | size_t ndig; |
373 | 1.89k | mp_size_t xn; |
374 | 1.89k | TMP_DECL; |
375 | | |
376 | | /* Special case zero, as the code below doesn't handle it. */ |
377 | 1.89k | if (un == 0) |
378 | 15 | { |
379 | 15 | str[0] = 0; |
380 | 15 | return 1; |
381 | 15 | } |
382 | | |
383 | 1.88k | if (POW2_P (base)) |
384 | 0 | { |
385 | | /* The base is a power of 2. Convert from most significant end. */ |
386 | 0 | mp_limb_t n1, n0; |
387 | 0 | int bits_per_digit = mp_bases[base].big_base; |
388 | 0 | int cnt; |
389 | 0 | int bit_pos; |
390 | 0 | mp_size_t i; |
391 | 0 | unsigned char *s = str; |
392 | 0 | mp_bitcnt_t bits; |
393 | |
|
394 | 0 | n1 = up[un - 1]; |
395 | 0 | count_leading_zeros (cnt, n1); |
396 | | |
397 | | /* BIT_POS should be R when input ends in least significant nibble, |
398 | | R + bits_per_digit * n when input ends in nth least significant |
399 | | nibble. */ |
400 | |
|
401 | 0 | bits = (mp_bitcnt_t) GMP_NUMB_BITS * un - cnt + GMP_NAIL_BITS; |
402 | 0 | cnt = bits % bits_per_digit; |
403 | 0 | if (cnt != 0) |
404 | 0 | bits += bits_per_digit - cnt; |
405 | 0 | bit_pos = bits - (mp_bitcnt_t) (un - 1) * GMP_NUMB_BITS; |
406 | | |
407 | | /* Fast loop for bit output. */ |
408 | 0 | i = un - 1; |
409 | 0 | for (;;) |
410 | 0 | { |
411 | 0 | bit_pos -= bits_per_digit; |
412 | 0 | while (bit_pos >= 0) |
413 | 0 | { |
414 | 0 | *s++ = (n1 >> bit_pos) & ((1 << bits_per_digit) - 1); |
415 | 0 | bit_pos -= bits_per_digit; |
416 | 0 | } |
417 | 0 | i--; |
418 | 0 | if (i < 0) |
419 | 0 | break; |
420 | 0 | n0 = (n1 << -bit_pos) & ((1 << bits_per_digit) - 1); |
421 | 0 | n1 = up[i]; |
422 | 0 | bit_pos += GMP_NUMB_BITS; |
423 | 0 | *s++ = n0 | (n1 >> bit_pos); |
424 | 0 | } |
425 | |
|
426 | 0 | return s - str; |
427 | 0 | } |
428 | | |
429 | | /* General case. The base is not a power of 2. */ |
430 | | |
431 | 1.88k | if (BELOW_THRESHOLD (un, GET_STR_PRECOMPUTE_THRESHOLD)) |
432 | 1.88k | return mpn_bc_get_str (str, (size_t) 0, up, un, base) - str; |
433 | | |
434 | 0 | TMP_MARK; |
435 | | |
436 | | /* Allocate one large block for the powers of big_base. */ |
437 | 0 | powtab_mem = TMP_BALLOC_LIMBS (mpn_str_powtab_alloc (un)); |
438 | | |
439 | | /* Compute a table of powers, were the largest power is >= sqrt(U). */ |
440 | 0 | DIGITS_IN_BASE_PER_LIMB (ndig, un, base); |
441 | 0 | xn = 1 + ndig / mp_bases[base].chars_per_limb; /* FIXME: scalar integer division */ |
442 | |
|
443 | 0 | pi = 1 + mpn_compute_powtab (powtab, powtab_mem, xn, base); |
444 | | |
445 | | /* Using our precomputed powers, now in powtab[], convert our number. */ |
446 | 0 | tmp = TMP_BALLOC_LIMBS (mpn_dc_get_str_itch (un)); |
447 | 0 | out_len = mpn_dc_get_str (str, 0, up, un, powtab + (pi - 1), tmp) - str; |
448 | 0 | TMP_FREE; |
449 | |
|
450 | 0 | return out_len; |
451 | 1.88k | } |