/src/mupdf/source/fitz/strtof.c
Line | Count | Source |
1 | | // Copyright (C) 2004-2021 Artifex Software, Inc. |
2 | | // |
3 | | // This file is part of MuPDF. |
4 | | // |
5 | | // MuPDF is free software: you can redistribute it and/or modify it under the |
6 | | // terms of the GNU Affero General Public License as published by the Free |
7 | | // Software Foundation, either version 3 of the License, or (at your option) |
8 | | // any later version. |
9 | | // |
10 | | // MuPDF is distributed in the hope that it will be useful, but WITHOUT ANY |
11 | | // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS |
12 | | // FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more |
13 | | // details. |
14 | | // |
15 | | // You should have received a copy of the GNU Affero General Public License |
16 | | // along with MuPDF. If not, see <https://www.gnu.org/licenses/agpl-3.0.en.html> |
17 | | // |
18 | | // Alternative licensing terms are available from the licensor. |
19 | | // For commercial licensing, see <https://www.artifex.com/> or contact |
20 | | // Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco, |
21 | | // CA 94129, USA, for further information. |
22 | | |
23 | | #include "mupdf/fitz.h" |
24 | | |
25 | | #include <assert.h> |
26 | | #include <errno.h> |
27 | | #include <float.h> |
28 | | |
29 | | #ifndef INFINITY |
30 | | #define INFINITY (DBL_MAX+DBL_MAX) |
31 | | #endif |
32 | | #ifndef NAN |
33 | | #define NAN (INFINITY-INFINITY) |
34 | | #endif |
35 | | |
36 | | /* |
37 | | We use "Algorithm D" from "Contributions to a Proposed Standard for Binary |
38 | | Floating-Point Arithmetic" by Jerome Coonen (1984). |
39 | | |
40 | | The implementation uses a self-made floating point type, 'strtof_fp_t', with |
41 | | a 32-bit significand. The steps of the algorithm are |
42 | | |
43 | | INPUT: Up to 9 decimal digits d1, ... d9 and an exponent dexp. |
44 | | OUTPUT: A float corresponding to the number d1 ... d9 * 10^dexp. |
45 | | |
46 | | 1) Convert the integer d1 ... d9 to an strtof_fp_t x. |
47 | | 2) Lookup the strtof_fp_t power = 10 ^ |dexp|. |
48 | | 3) If dexp is positive set x = x * power, else set x = x / power. Use rounding mode 'round to odd'. |
49 | | 4) Round x to a float using rounding mode 'to even'. |
50 | | |
51 | | Step 1) is always lossless as the strtof_fp_t's significand can hold a 9-digit integer. |
52 | | In the case |dexp| <= 13 the cached power is exact and the algorithm returns |
53 | | the exactly rounded result (with rounding mode 'to even'). |
54 | | There is no double-rounding in 3), 4) as the multiply/divide uses 'round to odd'. |
55 | | |
56 | | For |dexp| > 13 the maximum error is bounded by (1/2 + 1/256) ulp. |
57 | | This is small enough to ensure that binary to decimal to binary conversion |
58 | | is the identity if the decimal format uses 9 correctly rounded significant digits. |
59 | | */ |
60 | | typedef struct strtof_fp_t |
61 | | { |
62 | | uint32_t f; |
63 | | int e; |
64 | | } strtof_fp_t; |
65 | | |
66 | | /* Multiply/Divide x by y with 'round to odd'. Assume that x and y are normalized. */ |
67 | | |
68 | | static strtof_fp_t |
69 | | strtof_multiply(strtof_fp_t x, strtof_fp_t y) |
70 | 5 | { |
71 | 5 | uint64_t tmp; |
72 | 5 | strtof_fp_t res; |
73 | | |
74 | 5 | assert(x.f & y.f & 0x80000000); |
75 | | |
76 | 5 | res.e = x.e + y.e + 32; |
77 | 5 | tmp = (uint64_t) x.f * y.f; |
78 | | /* Normalize. */ |
79 | 5 | if ((tmp < ((uint64_t) 1 << 63))) |
80 | 5 | { |
81 | 5 | tmp <<= 1; |
82 | 5 | --res.e; |
83 | 5 | } |
84 | | |
85 | 5 | res.f = tmp >> 32; |
86 | | |
87 | | /* Set the last bit of the significand to 1 if the result is |
88 | | inexact. */ |
89 | 5 | if (tmp & 0xffffffff) |
90 | 0 | res.f |= 1; |
91 | 5 | return res; |
92 | 5 | } |
93 | | |
94 | | static strtof_fp_t |
95 | | divide(strtof_fp_t x, strtof_fp_t y) |
96 | 1.95k | { |
97 | 1.95k | uint64_t product, quotient; |
98 | 1.95k | uint32_t remainder; |
99 | 1.95k | strtof_fp_t res; |
100 | | |
101 | 1.95k | res.e = x.e - y.e - 32; |
102 | 1.95k | product = (uint64_t) x.f << 32; |
103 | 1.95k | quotient = product / y.f; |
104 | 1.95k | remainder = product % y.f; |
105 | | /* 2^31 <= quotient <= 2^33 - 2. */ |
106 | 1.95k | if (quotient <= 0xffffffff) |
107 | 1.01k | res.f = quotient; |
108 | 942 | else |
109 | 942 | { |
110 | 942 | ++res.e; |
111 | | /* If quotient % 2 != 0 we have remainder != 0. */ |
112 | 942 | res.f = quotient >> 1; |
113 | 942 | } |
114 | 1.95k | if (remainder) |
115 | 1.66k | res.f |= 1; |
116 | 1.95k | return res; |
117 | 1.95k | } |
118 | | |
119 | | /* From 10^0 to 10^54. Generated with GNU MPFR. */ |
120 | | static const uint32_t strtof_powers_ten[55] = { |
121 | | 0x80000000, 0xa0000000, 0xc8000000, 0xfa000000, 0x9c400000, 0xc3500000, |
122 | | 0xf4240000, 0x98968000, 0xbebc2000, 0xee6b2800, 0x9502f900, 0xba43b740, |
123 | | 0xe8d4a510, 0x9184e72a, 0xb5e620f4, 0xe35fa932, 0x8e1bc9bf, 0xb1a2bc2f, |
124 | | 0xde0b6b3a, 0x8ac72305, 0xad78ebc6, 0xd8d726b7, 0x87867832, 0xa968163f, |
125 | | 0xd3c21bcf, 0x84595161, 0xa56fa5ba, 0xcecb8f28, 0x813f3979, 0xa18f07d7, |
126 | | 0xc9f2c9cd, 0xfc6f7c40, 0x9dc5ada8, 0xc5371912, 0xf684df57, 0x9a130b96, |
127 | | 0xc097ce7c, 0xf0bdc21b, 0x96769951, 0xbc143fa5, 0xeb194f8e, 0x92efd1b9, |
128 | | 0xb7abc627, 0xe596b7b1, 0x8f7e32ce, 0xb35dbf82, 0xe0352f63, 0x8c213d9e, |
129 | | 0xaf298d05, 0xdaf3f046, 0x88d8762c, 0xab0e93b7, 0xd5d238a5, 0x85a36367, |
130 | | 0xa70c3c41 |
131 | | }; |
132 | | static const int strtof_powers_ten_e[55] = { |
133 | | -31, -28, -25, -22, -18, -15, -12, -8, -5, -2, |
134 | | 2, 5, 8, 12, 15, 18, 22, 25, 28, 32, 35, 38, 42, 45, 48, 52, 55, 58, 62, 65, |
135 | | 68, 71, 75, 78, 81, 85, 88, 91, 95, 98, 101, 105, 108, 111, 115, 118, 121, |
136 | | 125, 128, 131, 135, 138, 141, 145, 148 |
137 | | }; |
138 | | |
139 | | static strtof_fp_t |
140 | | strtof_cached_power(int i) |
141 | 1.96k | { |
142 | 1.96k | strtof_fp_t result; |
143 | 1.96k | assert (i >= 0 && i <= 54); |
144 | 1.96k | result.f = strtof_powers_ten[i]; |
145 | 1.96k | result.e = strtof_powers_ten_e[i]; |
146 | 1.96k | return result; |
147 | 1.96k | } |
148 | | |
149 | | /* Find number of leading zero bits in an uint32_t. Derived from the |
150 | | "Bit Twiddling Hacks" at graphics.stanford.edu/~seander/bithacks.html. */ |
151 | | static unsigned char clz_table[256] = { |
152 | | 8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4, |
153 | | # define sixteen_times(N) N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, |
154 | | sixteen_times (3) sixteen_times (2) sixteen_times (2) |
155 | | sixteen_times (1) sixteen_times (1) sixteen_times (1) sixteen_times (1) |
156 | | /* Zero for the rest. */ |
157 | | }; |
158 | | static unsigned |
159 | | leading_zeros (uint32_t x) |
160 | 1.96k | { |
161 | 1.96k | unsigned tmp1, tmp2; |
162 | | |
163 | 1.96k | tmp1 = x >> 16; |
164 | 1.96k | if (tmp1) |
165 | 1.35k | { |
166 | 1.35k | tmp2 = tmp1 >> 8; |
167 | 1.35k | if (tmp2) |
168 | 586 | return clz_table[tmp2]; |
169 | 766 | else |
170 | 766 | return 8 + clz_table[tmp1]; |
171 | 1.35k | } |
172 | 612 | else |
173 | 612 | { |
174 | 612 | tmp1 = x >> 8; |
175 | 612 | if (tmp1) |
176 | 562 | return 16 + clz_table[tmp1]; |
177 | 50 | else |
178 | 50 | return 24 + clz_table[x]; |
179 | 612 | } |
180 | 1.96k | } |
181 | | |
182 | | static strtof_fp_t |
183 | | uint32_to_diy (uint32_t x) |
184 | 1.96k | { |
185 | 1.96k | strtof_fp_t result = {x, 0}; |
186 | 1.96k | unsigned shift = leading_zeros(x); |
187 | | |
188 | 1.96k | result.f <<= shift; |
189 | 1.96k | result.e -= shift; |
190 | 1.96k | return result; |
191 | 1.96k | } |
192 | | |
193 | 1.96k | #define SP_SIGNIFICAND_SIZE 23 |
194 | | #define SP_EXPONENT_BIAS (127 + SP_SIGNIFICAND_SIZE) |
195 | | #define SP_MIN_EXPONENT (-SP_EXPONENT_BIAS) |
196 | | #define SP_EXPONENT_MASK 0x7f800000 |
197 | 1.96k | #define SP_SIGNIFICAND_MASK 0x7fffff |
198 | | #define SP_HIDDEN_BIT 0x800000 /* 2^23 */ |
199 | | |
200 | | /* Convert normalized strtof_fp_t to IEEE-754 single with 'round to even'. |
201 | | See "Implementing IEEE 754-2008 Rounding" in the |
202 | | "Handbook of Floating-Point Arithmetik". |
203 | | */ |
204 | | static float |
205 | | diy_to_float(strtof_fp_t x, int negative) |
206 | 1.96k | { |
207 | 1.96k | uint32_t result; |
208 | 1.96k | union |
209 | 1.96k | { |
210 | 1.96k | float f; |
211 | 1.96k | uint32_t n; |
212 | 1.96k | } tmp; |
213 | | |
214 | 1.96k | assert(x.f & 0x80000000); |
215 | | |
216 | | /* We have 2^32 - 2^7 = 0xffffff80. */ |
217 | 1.96k | if (x.e > 96 || (x.e == 96 && x.f >= 0xffffff80)) |
218 | 0 | { |
219 | | /* Overflow. Set result to infinity. */ |
220 | 0 | errno = ERANGE; |
221 | 0 | result = 0xff << SP_SIGNIFICAND_SIZE; |
222 | 0 | } |
223 | | /* We have 2^32 - 2^8 = 0xffffff00. */ |
224 | 1.96k | else if (x.e > -158) |
225 | 1.96k | { |
226 | | /* x is greater or equal to FLT_MAX. So we get a normalized number. */ |
227 | 1.96k | result = (uint32_t) (x.e + 158) << SP_SIGNIFICAND_SIZE; |
228 | 1.96k | result |= (x.f >> 8) & SP_SIGNIFICAND_MASK; |
229 | | |
230 | 1.96k | if (x.f & 0x80) |
231 | 688 | { |
232 | | /* Round-bit is set. */ |
233 | 688 | if (x.f & 0x7f) |
234 | | /* Sticky-bit is set. */ |
235 | 688 | ++result; |
236 | 0 | else if (x.f & 0x100) |
237 | | /* Significand is odd. */ |
238 | 0 | ++result; |
239 | 688 | } |
240 | 1.96k | } |
241 | 0 | else if (x.e == -158 && x.f >= 0xffffff00) |
242 | 0 | { |
243 | | /* x is in the range (2^32, 2^32 - 2^8] * 2^-158, so its smaller than |
244 | | FLT_MIN but still rounds to it. */ |
245 | 0 | result = 1U << SP_SIGNIFICAND_SIZE; |
246 | 0 | } |
247 | 0 | else if (x.e > -181) |
248 | 0 | { |
249 | | /* Non-zero Denormal. */ |
250 | 0 | int shift = -149 - x.e; /* 9 <= shift <= 31. */ |
251 | |
|
252 | 0 | result = x.f >> shift; |
253 | |
|
254 | 0 | if (x.f & (1U << (shift - 1))) |
255 | | /* Round-bit is set. */ |
256 | 0 | { |
257 | 0 | if (x.f & ((1U << (shift - 1)) - 1)) |
258 | | /* Sticky-bit is set. */ |
259 | 0 | ++result; |
260 | 0 | else if (x.f & 1U << shift) |
261 | | /* Significand is odd. */ |
262 | 0 | ++result; |
263 | 0 | } |
264 | 0 | } |
265 | 0 | else if (x.e == -181 && x.f > 0x80000000) |
266 | 0 | { |
267 | | /* x is in the range (0.5,1) * 2^-149 so it rounds to the smallest |
268 | | denormal. Can't handle this in the previous case as shifting a |
269 | | uint32_t 32 bits to the right is undefined behaviour. */ |
270 | 0 | result = 1; |
271 | 0 | } |
272 | 0 | else |
273 | 0 | { |
274 | | /* Underflow. */ |
275 | 0 | errno = ERANGE; |
276 | 0 | result = 0; |
277 | 0 | } |
278 | | |
279 | 1.96k | if (negative) |
280 | 216 | result |= 0x80000000; |
281 | | |
282 | 1.96k | tmp.n = result; |
283 | 1.96k | return tmp.f; |
284 | 1.96k | } |
285 | | |
286 | | static float |
287 | | scale_integer_to_float(uint32_t M, int N, int negative) |
288 | 2.18k | { |
289 | 2.18k | strtof_fp_t result, x, power; |
290 | | |
291 | 2.18k | if (M == 0) |
292 | 216 | return negative ? -0.f : 0.f; |
293 | 1.96k | if (N > 38) |
294 | 0 | { |
295 | | /* Overflow. */ |
296 | 0 | errno = ERANGE; |
297 | 0 | return negative ? -INFINITY : INFINITY; |
298 | 0 | } |
299 | 1.96k | if (N < -54) |
300 | 0 | { |
301 | | /* Underflow. */ |
302 | 0 | errno = ERANGE; |
303 | 0 | return negative ? -0.f : 0.f; |
304 | 0 | } |
305 | | /* If N is in the range {-13, ..., 13} the conversion is exact. |
306 | | Try to scale N into this region. */ |
307 | 1.96k | while (N > 13 && M <= 0xffffffff / 10) |
308 | 0 | { |
309 | 0 | M *= 10; |
310 | 0 | --N; |
311 | 0 | } |
312 | | |
313 | 1.96k | while (N < -13 && M % 10 == 0) |
314 | 0 | { |
315 | 0 | M /= 10; |
316 | 0 | ++N; |
317 | 0 | } |
318 | | |
319 | 1.96k | x = uint32_to_diy (M); |
320 | 1.96k | if (N >= 0) |
321 | 5 | { |
322 | 5 | power = strtof_cached_power(N); |
323 | 5 | result = strtof_multiply(x, power); |
324 | 5 | } |
325 | 1.95k | else |
326 | 1.95k | { |
327 | 1.95k | power = strtof_cached_power(-N); |
328 | 1.95k | result = divide(x, power); |
329 | 1.95k | } |
330 | | |
331 | 1.96k | return diy_to_float(result, negative); |
332 | 1.96k | } |
333 | | |
334 | | /* Return non-zero if *s starts with string (must be uppercase), ignoring case, |
335 | | and increment *s by its length. */ |
336 | | static int |
337 | | starts_with(const char **s, const char *string) |
338 | 12.9k | { |
339 | 12.9k | const char *x = *s, *y = string; |
340 | 12.9k | while (*x && *y && (*x == *y || *x == *y + 32)) |
341 | 0 | ++x, ++y; |
342 | 12.9k | if (*y == 0) |
343 | 0 | { |
344 | | /* Match. */ |
345 | 0 | *s = x; |
346 | 0 | return 1; |
347 | 0 | } |
348 | 12.9k | else |
349 | 12.9k | return 0; |
350 | 12.9k | } |
351 | | #define SET_TAILPTR(tailptr, s) \ |
352 | 6.48k | do \ |
353 | 6.48k | if (tailptr) \ |
354 | 6.48k | *tailptr = (char *) s; \ |
355 | 6.48k | while (0) |
356 | | |
357 | | float |
358 | | fz_strtof(const char *string, char **tailptr) |
359 | 6.48k | { |
360 | | /* FIXME: error (1/2 + 1/256) ulp */ |
361 | 6.48k | const char *s; |
362 | 6.48k | uint32_t M = 0; |
363 | 6.48k | int N = 0; |
364 | | /* If decimal_digits gets 9 we truncate all following digits. */ |
365 | 6.48k | int decimal_digits = 0; |
366 | 6.48k | int negative = 0; |
367 | 6.48k | const char *number_start = 0; |
368 | | |
369 | | /* Skip leading whitespace (isspace in "C" locale). */ |
370 | 6.48k | s = string; |
371 | 6.48k | while (*s == ' ' || *s == '\f' || *s == '\n' || *s == '\r' || *s == '\t' || *s == '\v') |
372 | 0 | ++s; |
373 | | |
374 | | /* Parse sign. */ |
375 | 6.48k | if (*s == '+') |
376 | 0 | ++s; |
377 | 6.48k | if (*s == '-') |
378 | 216 | { |
379 | 216 | negative = 1; |
380 | 216 | ++s; |
381 | 216 | } |
382 | 6.48k | number_start = s; |
383 | | /* Parse digits before decimal point. */ |
384 | 11.2k | while (*s >= '0' && *s <= '9') |
385 | 4.78k | { |
386 | 4.78k | if (decimal_digits) |
387 | 2.64k | { |
388 | 2.64k | if (decimal_digits < 9) |
389 | 2.64k | { |
390 | 2.64k | ++decimal_digits; |
391 | 2.64k | M = M * 10 + *s - '0'; |
392 | 2.64k | } |
393 | | /* Really arcane strings might overflow N. */ |
394 | 0 | else if (N < 1000) |
395 | 0 | ++N; |
396 | 2.64k | } |
397 | 2.14k | else if (*s > '0') |
398 | 1.68k | { |
399 | 1.68k | M = *s - '0'; |
400 | 1.68k | ++decimal_digits; |
401 | 1.68k | } |
402 | 4.78k | ++s; |
403 | 4.78k | } |
404 | | |
405 | | /* Parse decimal point. */ |
406 | 6.48k | if (*s == '.') |
407 | 6.48k | ++s; |
408 | | |
409 | | /* Parse digits after decimal point. */ |
410 | 15.2k | while (*s >= '0' && *s <= '9') |
411 | 8.72k | { |
412 | 8.72k | if (decimal_digits < 9) |
413 | 8.72k | { |
414 | 8.72k | if (decimal_digits || *s > '0') |
415 | 7.80k | { |
416 | 7.80k | ++decimal_digits; |
417 | 7.80k | M = M * 10 + *s - '0'; |
418 | 7.80k | } |
419 | 8.72k | --N; |
420 | 8.72k | } |
421 | 8.72k | ++s; |
422 | 8.72k | } |
423 | 6.48k | if ((s == number_start + 1 && *number_start == '.') || number_start == s) |
424 | 4.30k | { |
425 | | /* No Number. Check for INF and NAN strings. */ |
426 | 4.30k | s = number_start; |
427 | 4.30k | if (starts_with(&s, "INFINITY") || starts_with(&s, "INF")) |
428 | 0 | { |
429 | 0 | errno = ERANGE; |
430 | 0 | SET_TAILPTR(tailptr, s); |
431 | 0 | return negative ? -INFINITY : +INFINITY; |
432 | 0 | } |
433 | 4.30k | else if (starts_with(&s, "NAN")) |
434 | 0 | { |
435 | 0 | SET_TAILPTR(tailptr, s); |
436 | 0 | return (float)NAN; |
437 | 0 | } |
438 | 4.30k | else |
439 | 4.30k | { |
440 | 4.30k | SET_TAILPTR(tailptr, string); |
441 | 4.30k | return 0.f; |
442 | 4.30k | } |
443 | 4.30k | } |
444 | | |
445 | | /* Parse exponent. */ |
446 | 2.18k | if (*s == 'e' || *s == 'E') |
447 | 0 | { |
448 | 0 | int exp_negative = 0; |
449 | 0 | int exp = 0; |
450 | 0 | const char *int_start; |
451 | 0 | const char *exp_start = s; |
452 | |
|
453 | 0 | ++s; |
454 | 0 | if (*s == '+') |
455 | 0 | ++s; |
456 | 0 | else if (*s == '-') |
457 | 0 | { |
458 | 0 | ++s; |
459 | 0 | exp_negative = 1; |
460 | 0 | } |
461 | 0 | int_start = s; |
462 | | /* Parse integer. */ |
463 | 0 | while (*s >= '0' && *s <= '9') |
464 | 0 | { |
465 | | /* Make sure exp does not get overflowed. */ |
466 | 0 | if (exp < 100) |
467 | 0 | exp = exp * 10 + *s - '0'; |
468 | 0 | ++s; |
469 | 0 | } |
470 | 0 | if (exp_negative) |
471 | 0 | exp = -exp; |
472 | 0 | if (s == int_start) |
473 | | /* No Number. */ |
474 | 0 | s = exp_start; |
475 | 0 | else |
476 | 0 | N += exp; |
477 | 0 | } |
478 | | |
479 | 2.18k | SET_TAILPTR(tailptr, s); |
480 | 2.18k | return scale_integer_to_float(M, N, negative); |
481 | 6.48k | } |