/src/moddable/xs/tools/fdlibm/math_private.h
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * ==================================================== |
3 | | * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. |
4 | | * |
5 | | * Developed at SunPro, a Sun Microsystems, Inc. business. |
6 | | * Permission to use, copy, modify, and distribute this |
7 | | * software is freely granted, provided that this notice |
8 | | * is preserved. |
9 | | * ==================================================== |
10 | | */ |
11 | | |
12 | | #ifndef _MATH_PRIVATE_H_ |
13 | | #define _MATH_PRIVATE_H_ |
14 | | |
15 | | #include <sys/types.h> |
16 | | |
17 | | #include <stdint.h> |
18 | | #ifndef u_int32_t |
19 | 50.4M | #define u_int32_t uint32_t |
20 | | #endif |
21 | | #ifndef u_int64_t |
22 | | #define u_int64_t uint64_t |
23 | | #endif |
24 | | |
25 | | #include <float.h> |
26 | | typedef double __double_t; |
27 | | typedef __double_t double_t; |
28 | | typedef float __float_t; |
29 | | |
30 | | #include <math.h> |
31 | | // extern double fabs(double x); |
32 | | // extern long double fabsl(long double x); |
33 | | // extern double floor(double x); |
34 | | // extern double sqrt(double); |
35 | | |
36 | | #include "fdlibm.h" |
37 | | |
38 | | #ifndef LITTLE_ENDIAN |
39 | | #define LITTLE_ENDIAN 1234 |
40 | | #endif |
41 | | #ifndef BIG_ENDIAN |
42 | | #define BIG_ENDIAN 4321 |
43 | | #endif |
44 | | #ifndef BYTE_ORDER |
45 | | #define BYTE_ORDER LITTLE_ENDIAN |
46 | | #endif |
47 | | |
48 | | /* |
49 | | * The original fdlibm code used statements like: |
50 | | * n0 = ((*(int*)&one)>>29)^1; * index of high word * |
51 | | * ix0 = *(n0+(int*)&x); * high word of x * |
52 | | * ix1 = *((1-n0)+(int*)&x); * low word of x * |
53 | | * to dig two 32 bit words out of the 64 bit IEEE floating point |
54 | | * value. That is non-ANSI, and, moreover, the gcc instruction |
55 | | * scheduler gets it wrong. We instead use the following macros. |
56 | | * Unlike the original code, we determine the endianness at compile |
57 | | * time, not at run time; I don't see much benefit to selecting |
58 | | * endianness at run time. |
59 | | */ |
60 | | |
61 | | /* |
62 | | * A union which permits us to convert between a double and two 32 bit |
63 | | * ints. |
64 | | */ |
65 | | |
66 | | #ifdef __arm__ |
67 | | #if defined(__VFP_FP__) || defined(__ARM_EABI__) |
68 | | #define IEEE_WORD_ORDER BYTE_ORDER |
69 | | #else |
70 | | #define IEEE_WORD_ORDER BIG_ENDIAN |
71 | | #endif |
72 | | #else /* __arm__ */ |
73 | | #define IEEE_WORD_ORDER BYTE_ORDER |
74 | | #endif |
75 | | |
76 | | /* A union which permits us to convert between a long double and |
77 | | four 32 bit ints. */ |
78 | | |
79 | | #if IEEE_WORD_ORDER == BIG_ENDIAN |
80 | | |
81 | | typedef union |
82 | | { |
83 | | long double value; |
84 | | struct { |
85 | | u_int32_t mswhi; |
86 | | u_int32_t mswlo; |
87 | | u_int32_t lswhi; |
88 | | u_int32_t lswlo; |
89 | | } parts32; |
90 | | struct { |
91 | | u_int64_t msw; |
92 | | u_int64_t lsw; |
93 | | } parts64; |
94 | | } ieee_quad_shape_type; |
95 | | |
96 | | #endif |
97 | | |
98 | | #if IEEE_WORD_ORDER == LITTLE_ENDIAN |
99 | | |
100 | | typedef union |
101 | | { |
102 | | long double value; |
103 | | struct { |
104 | | u_int32_t lswlo; |
105 | | u_int32_t lswhi; |
106 | | u_int32_t mswlo; |
107 | | u_int32_t mswhi; |
108 | | } parts32; |
109 | | struct { |
110 | | u_int64_t lsw; |
111 | | u_int64_t msw; |
112 | | } parts64; |
113 | | } ieee_quad_shape_type; |
114 | | |
115 | | #endif |
116 | | |
117 | | #if IEEE_WORD_ORDER == BIG_ENDIAN |
118 | | |
119 | | typedef union |
120 | | { |
121 | | double value; |
122 | | struct |
123 | | { |
124 | | u_int32_t msw; |
125 | | u_int32_t lsw; |
126 | | } parts; |
127 | | struct |
128 | | { |
129 | | u_int64_t w; |
130 | | } xparts; |
131 | | } ieee_double_shape_type; |
132 | | |
133 | | #endif |
134 | | |
135 | | #if IEEE_WORD_ORDER == LITTLE_ENDIAN |
136 | | |
137 | | typedef union |
138 | | { |
139 | | double value; |
140 | | struct |
141 | | { |
142 | | u_int32_t lsw; |
143 | | u_int32_t msw; |
144 | | } parts; |
145 | | struct |
146 | | { |
147 | | u_int64_t w; |
148 | | } xparts; |
149 | | } ieee_double_shape_type; |
150 | | |
151 | | #endif |
152 | | |
153 | | /* Get two 32 bit ints from a double. */ |
154 | | |
155 | 74.5M | #define EXTRACT_WORDS(ix0,ix1,d) \ |
156 | 74.5M | do { \ |
157 | 74.5M | ieee_double_shape_type ew_u; \ |
158 | 74.5M | ew_u.value = (d); \ |
159 | 74.5M | (ix0) = ew_u.parts.msw; \ |
160 | 74.5M | (ix1) = ew_u.parts.lsw; \ |
161 | 74.5M | } while (0) |
162 | | |
163 | | /* Get a 64-bit int from a double. */ |
164 | | #define EXTRACT_WORD64(ix,d) \ |
165 | | do { \ |
166 | | ieee_double_shape_type ew_u; \ |
167 | | ew_u.value = (d); \ |
168 | | (ix) = ew_u.xparts.w; \ |
169 | | } while (0) |
170 | | |
171 | | /* Get the more significant 32 bit int from a double. */ |
172 | | |
173 | 5.27M | #define GET_HIGH_WORD(i,d) \ |
174 | 5.27M | do { \ |
175 | 5.27M | ieee_double_shape_type gh_u; \ |
176 | 5.27M | gh_u.value = (d); \ |
177 | 5.27M | (i) = gh_u.parts.msw; \ |
178 | 5.27M | } while (0) |
179 | | |
180 | | /* Get the less significant 32 bit int from a double. */ |
181 | | |
182 | 25.9k | #define GET_LOW_WORD(i,d) \ |
183 | 25.9k | do { \ |
184 | 25.9k | ieee_double_shape_type gl_u; \ |
185 | 25.9k | gl_u.value = (d); \ |
186 | 25.9k | (i) = gl_u.parts.lsw; \ |
187 | 25.9k | } while (0) |
188 | | |
189 | | /* Set a double from two 32 bit ints. */ |
190 | | |
191 | 13.0M | #define INSERT_WORDS(d,ix0,ix1) \ |
192 | 13.0M | do { \ |
193 | 13.0M | ieee_double_shape_type iw_u; \ |
194 | 13.0M | iw_u.parts.msw = (ix0); \ |
195 | 13.0M | iw_u.parts.lsw = (ix1); \ |
196 | 13.0M | (d) = iw_u.value; \ |
197 | 13.0M | } while (0) |
198 | | |
199 | | /* Set a double from a 64-bit int. */ |
200 | | #define INSERT_WORD64(d,ix) \ |
201 | | do { \ |
202 | | ieee_double_shape_type iw_u; \ |
203 | | iw_u.xparts.w = (ix); \ |
204 | | (d) = iw_u.value; \ |
205 | | } while (0) |
206 | | |
207 | | /* Set the more significant 32 bits of a double from an int. */ |
208 | | |
209 | 11.8M | #define SET_HIGH_WORD(d,v) \ |
210 | 11.8M | do { \ |
211 | 11.8M | ieee_double_shape_type sh_u; \ |
212 | 11.8M | sh_u.value = (d); \ |
213 | 11.8M | sh_u.parts.msw = (v); \ |
214 | 11.8M | (d) = sh_u.value; \ |
215 | 11.8M | } while (0) |
216 | | |
217 | | /* Set the less significant 32 bits of a double from an int. */ |
218 | | |
219 | 17.0M | #define SET_LOW_WORD(d,v) \ |
220 | 17.0M | do { \ |
221 | 17.0M | ieee_double_shape_type sl_u; \ |
222 | 17.0M | sl_u.value = (d); \ |
223 | 17.0M | sl_u.parts.lsw = (v); \ |
224 | 17.0M | (d) = sl_u.value; \ |
225 | 17.0M | } while (0) |
226 | | |
227 | | /* |
228 | | * A union which permits us to convert between a float and a 32 bit |
229 | | * int. |
230 | | */ |
231 | | |
232 | | typedef union |
233 | | { |
234 | | float value; |
235 | | /* FIXME: Assumes 32 bit int. */ |
236 | | unsigned int word; |
237 | | } ieee_float_shape_type; |
238 | | |
239 | | /* Get a 32 bit int from a float. */ |
240 | | |
241 | | #define GET_FLOAT_WORD(i,d) \ |
242 | | do { \ |
243 | | ieee_float_shape_type gf_u; \ |
244 | | gf_u.value = (d); \ |
245 | | (i) = gf_u.word; \ |
246 | | } while (0) |
247 | | |
248 | | /* Set a float from a 32 bit int. */ |
249 | | |
250 | | #define SET_FLOAT_WORD(d,i) \ |
251 | | do { \ |
252 | | ieee_float_shape_type sf_u; \ |
253 | | sf_u.word = (i); \ |
254 | | (d) = sf_u.value; \ |
255 | | } while (0) |
256 | | |
257 | | /* |
258 | | * Get expsign and mantissa as 16 bit and 64 bit ints from an 80 bit long |
259 | | * double. |
260 | | */ |
261 | | |
262 | | #define EXTRACT_LDBL80_WORDS(ix0,ix1,d) \ |
263 | | do { \ |
264 | | union IEEEl2bits ew_u; \ |
265 | | ew_u.e = (d); \ |
266 | | (ix0) = ew_u.xbits.expsign; \ |
267 | | (ix1) = ew_u.xbits.man; \ |
268 | | } while (0) |
269 | | |
270 | | /* |
271 | | * Get expsign and mantissa as one 16 bit and two 64 bit ints from a 128 bit |
272 | | * long double. |
273 | | */ |
274 | | |
275 | | #define EXTRACT_LDBL128_WORDS(ix0,ix1,ix2,d) \ |
276 | | do { \ |
277 | | union IEEEl2bits ew_u; \ |
278 | | ew_u.e = (d); \ |
279 | | (ix0) = ew_u.xbits.expsign; \ |
280 | | (ix1) = ew_u.xbits.manh; \ |
281 | | (ix2) = ew_u.xbits.manl; \ |
282 | | } while (0) |
283 | | |
284 | | /* Get expsign as a 16 bit int from a long double. */ |
285 | | |
286 | | #define GET_LDBL_EXPSIGN(i,d) \ |
287 | | do { \ |
288 | | union IEEEl2bits ge_u; \ |
289 | | ge_u.e = (d); \ |
290 | | (i) = ge_u.xbits.expsign; \ |
291 | | } while (0) |
292 | | |
293 | | /* |
294 | | * Set an 80 bit long double from a 16 bit int expsign and a 64 bit int |
295 | | * mantissa. |
296 | | */ |
297 | | |
298 | | #define INSERT_LDBL80_WORDS(d,ix0,ix1) \ |
299 | | do { \ |
300 | | union IEEEl2bits iw_u; \ |
301 | | iw_u.xbits.expsign = (ix0); \ |
302 | | iw_u.xbits.man = (ix1); \ |
303 | | (d) = iw_u.e; \ |
304 | | } while (0) |
305 | | |
306 | | /* |
307 | | * Set a 128 bit long double from a 16 bit int expsign and two 64 bit ints |
308 | | * comprising the mantissa. |
309 | | */ |
310 | | |
311 | | #define INSERT_LDBL128_WORDS(d,ix0,ix1,ix2) \ |
312 | | do { \ |
313 | | union IEEEl2bits iw_u; \ |
314 | | iw_u.xbits.expsign = (ix0); \ |
315 | | iw_u.xbits.manh = (ix1); \ |
316 | | iw_u.xbits.manl = (ix2); \ |
317 | | (d) = iw_u.e; \ |
318 | | } while (0) |
319 | | |
320 | | /* Set expsign of a long double from a 16 bit int. */ |
321 | | |
322 | | #define SET_LDBL_EXPSIGN(d,v) \ |
323 | | do { \ |
324 | | union IEEEl2bits se_u; \ |
325 | | se_u.e = (d); \ |
326 | | se_u.xbits.expsign = (v); \ |
327 | | (d) = se_u.e; \ |
328 | | } while (0) |
329 | | |
330 | | #ifdef __i386__ |
331 | | /* Long double constants are broken on i386. */ |
332 | | #define LD80C(m, ex, v) { \ |
333 | | .xbits.man = __CONCAT(m, ULL), \ |
334 | | .xbits.expsign = (0x3fff + (ex)) | ((v) < 0 ? 0x8000 : 0), \ |
335 | | } |
336 | | #else |
337 | | /* The above works on non-i386 too, but we use this to check v. */ |
338 | | #define LD80C(m, ex, v) { .e = (v), } |
339 | | #endif |
340 | | |
341 | | #ifdef FLT_EVAL_METHOD |
342 | | /* |
343 | | * Attempt to get strict C99 semantics for assignment with non-C99 compilers. |
344 | | */ |
345 | | #if FLT_EVAL_METHOD == 0 || __GNUC__ == 0 |
346 | 652k | #define STRICT_ASSIGN(type, lval, rval) ((lval) = (rval)) |
347 | | #else |
348 | | #define STRICT_ASSIGN(type, lval, rval) do { \ |
349 | | volatile type __lval; \ |
350 | | \ |
351 | | if (sizeof(type) >= sizeof(long double)) \ |
352 | | (lval) = (rval); \ |
353 | | else { \ |
354 | | __lval = (rval); \ |
355 | | (lval) = __lval; \ |
356 | | } \ |
357 | | } while (0) |
358 | | #endif |
359 | | #endif /* FLT_EVAL_METHOD */ |
360 | | |
361 | | /* Support switching the mode to FP_PE if necessary. */ |
362 | | #if defined(__i386__) && !defined(NO_FPSETPREC) |
363 | | #define ENTERI() ENTERIT(long double) |
364 | | #define ENTERIT(returntype) \ |
365 | | returntype __retval; \ |
366 | | fp_prec_t __oprec; \ |
367 | | \ |
368 | | if ((__oprec = fpgetprec()) != FP_PE) \ |
369 | | fpsetprec(FP_PE) |
370 | | #define RETURNI(x) do { \ |
371 | | __retval = (x); \ |
372 | | if (__oprec != FP_PE) \ |
373 | | fpsetprec(__oprec); \ |
374 | | RETURNF(__retval); \ |
375 | | } while (0) |
376 | | #define ENTERV() \ |
377 | | fp_prec_t __oprec; \ |
378 | | \ |
379 | | if ((__oprec = fpgetprec()) != FP_PE) \ |
380 | | fpsetprec(FP_PE) |
381 | | #define RETURNV() do { \ |
382 | | if (__oprec != FP_PE) \ |
383 | | fpsetprec(__oprec); \ |
384 | | return; \ |
385 | | } while (0) |
386 | | #else |
387 | | #define ENTERI() |
388 | | #define ENTERIT(x) |
389 | | #define RETURNI(x) RETURNF(x) |
390 | | #define ENTERV() |
391 | | #define RETURNV() return |
392 | | #endif |
393 | | |
394 | | /* Default return statement if hack*_t() is not used. */ |
395 | | #define RETURNF(v) return (v) |
396 | | |
397 | | /* |
398 | | * 2sum gives the same result as 2sumF without requiring |a| >= |b| or |
399 | | * a == 0, but is slower. |
400 | | */ |
401 | | #define _2sum(a, b) do { \ |
402 | | __typeof(a) __s, __w; \ |
403 | | \ |
404 | | __w = (a) + (b); \ |
405 | | __s = __w - (a); \ |
406 | | (b) = ((a) - (__w - __s)) + ((b) - __s); \ |
407 | | (a) = __w; \ |
408 | | } while (0) |
409 | | |
410 | | /* |
411 | | * 2sumF algorithm. |
412 | | * |
413 | | * "Normalize" the terms in the infinite-precision expression a + b for |
414 | | * the sum of 2 floating point values so that b is as small as possible |
415 | | * relative to 'a'. (The resulting 'a' is the value of the expression in |
416 | | * the same precision as 'a' and the resulting b is the rounding error.) |
417 | | * |a| must be >= |b| or 0, b's type must be no larger than 'a's type, and |
418 | | * exponent overflow or underflow must not occur. This uses a Theorem of |
419 | | * Dekker (1971). See Knuth (1981) 4.2.2 Theorem C. The name "TwoSum" |
420 | | * is apparently due to Skewchuk (1997). |
421 | | * |
422 | | * For this to always work, assignment of a + b to 'a' must not retain any |
423 | | * extra precision in a + b. This is required by C standards but broken |
424 | | * in many compilers. The brokenness cannot be worked around using |
425 | | * STRICT_ASSIGN() like we do elsewhere, since the efficiency of this |
426 | | * algorithm would be destroyed by non-null strict assignments. (The |
427 | | * compilers are correct to be broken -- the efficiency of all floating |
428 | | * point code calculations would be destroyed similarly if they forced the |
429 | | * conversions.) |
430 | | * |
431 | | * Fortunately, a case that works well can usually be arranged by building |
432 | | * any extra precision into the type of 'a' -- 'a' should have type float_t, |
433 | | * double_t or long double. b's type should be no larger than 'a's type. |
434 | | * Callers should use these types with scopes as large as possible, to |
435 | | * reduce their own extra-precision and efficiency problems. In |
436 | | * particular, they shouldn't convert back and forth just to call here. |
437 | | */ |
438 | | #ifdef DEBUG |
439 | | #define _2sumF(a, b) do { \ |
440 | | __typeof(a) __w; \ |
441 | | volatile __typeof(a) __ia, __ib, __r, __vw; \ |
442 | | \ |
443 | | __ia = (a); \ |
444 | | __ib = (b); \ |
445 | | assert(__ia == 0 || fabsl(__ia) >= fabsl(__ib)); \ |
446 | | \ |
447 | | __w = (a) + (b); \ |
448 | | (b) = ((a) - __w) + (b); \ |
449 | | (a) = __w; \ |
450 | | \ |
451 | | /* The next 2 assertions are weak if (a) is already long double. */ \ |
452 | | assert((long double)__ia + __ib == (long double)(a) + (b)); \ |
453 | | __vw = __ia + __ib; \ |
454 | | __r = __ia - __vw; \ |
455 | | __r += __ib; \ |
456 | | assert(__vw == (a) && __r == (b)); \ |
457 | | } while (0) |
458 | | #else /* !DEBUG */ |
459 | | #define _2sumF(a, b) do { \ |
460 | | __typeof(a) __w; \ |
461 | | \ |
462 | | __w = (a) + (b); \ |
463 | | (b) = ((a) - __w) + (b); \ |
464 | | (a) = __w; \ |
465 | | } while (0) |
466 | | #endif /* DEBUG */ |
467 | | |
468 | | /* |
469 | | * Set x += c, where x is represented in extra precision as a + b. |
470 | | * x must be sufficiently normalized and sufficiently larger than c, |
471 | | * and the result is then sufficiently normalized. |
472 | | * |
473 | | * The details of ordering are that |a| must be >= |c| (so that (a, c) |
474 | | * can be normalized without extra work to swap 'a' with c). The details of |
475 | | * the normalization are that b must be small relative to the normalized 'a'. |
476 | | * Normalization of (a, c) makes the normalized c tiny relative to the |
477 | | * normalized a, so b remains small relative to 'a' in the result. However, |
478 | | * b need not ever be tiny relative to 'a'. For example, b might be about |
479 | | * 2**20 times smaller than 'a' to give about 20 extra bits of precision. |
480 | | * That is usually enough, and adding c (which by normalization is about |
481 | | * 2**53 times smaller than a) cannot change b significantly. However, |
482 | | * cancellation of 'a' with c in normalization of (a, c) may reduce 'a' |
483 | | * significantly relative to b. The caller must ensure that significant |
484 | | * cancellation doesn't occur, either by having c of the same sign as 'a', |
485 | | * or by having |c| a few percent smaller than |a|. Pre-normalization of |
486 | | * (a, b) may help. |
487 | | * |
488 | | * This is a variant of an algorithm of Kahan (see Knuth (1981) 4.2.2 |
489 | | * exercise 19). We gain considerable efficiency by requiring the terms to |
490 | | * be sufficiently normalized and sufficiently increasing. |
491 | | */ |
492 | | #define _3sumF(a, b, c) do { \ |
493 | | __typeof(a) __tmp; \ |
494 | | \ |
495 | | __tmp = (c); \ |
496 | | _2sumF(__tmp, (a)); \ |
497 | | (b) += (a); \ |
498 | | (a) = __tmp; \ |
499 | | } while (0) |
500 | | |
501 | | /* |
502 | | * Common routine to process the arguments to nan(), nanf(), and nanl(). |
503 | | */ |
504 | | void _scan_nan(uint32_t *__words, int __num_words, const char *__s); |
505 | | |
506 | | /* |
507 | | * Mix 0, 1 or 2 NaNs. First add 0 to each arg. This normally just turns |
508 | | * signaling NaNs into quiet NaNs by setting a quiet bit. We do this |
509 | | * because we want to never return a signaling NaN, and also because we |
510 | | * don't want the quiet bit to affect the result. Then mix the converted |
511 | | * args using the specified operation. |
512 | | * |
513 | | * When one arg is NaN, the result is typically that arg quieted. When both |
514 | | * args are NaNs, the result is typically the quietening of the arg whose |
515 | | * mantissa is largest after quietening. When neither arg is NaN, the |
516 | | * result may be NaN because it is indeterminate, or finite for subsequent |
517 | | * construction of a NaN as the indeterminate 0.0L/0.0L. |
518 | | * |
519 | | * Technical complications: the result in bits after rounding to the final |
520 | | * precision might depend on the runtime precision and/or on compiler |
521 | | * optimizations, especially when different register sets are used for |
522 | | * different precisions. Try to make the result not depend on at least the |
523 | | * runtime precision by always doing the main mixing step in long double |
524 | | * precision. Try to reduce dependencies on optimizations by adding the |
525 | | * the 0's in different precisions (unless everything is in long double |
526 | | * precision). |
527 | | */ |
528 | 989k | #define nan_mix(x, y) (nan_mix_op((x), (y), +)) |
529 | 5.39M | #define nan_mix_op(x, y, op) (((x) + 0.0L) op ((y) + 0)) |
530 | | |
531 | | #ifdef _COMPLEX_H |
532 | | |
533 | | /* |
534 | | * C99 specifies that complex numbers have the same representation as |
535 | | * an array of two elements, where the first element is the real part |
536 | | * and the second element is the imaginary part. |
537 | | */ |
538 | | typedef union { |
539 | | float complex f; |
540 | | float a[2]; |
541 | | } float_complex; |
542 | | typedef union { |
543 | | double complex f; |
544 | | double a[2]; |
545 | | } double_complex; |
546 | | typedef union { |
547 | | long double complex f; |
548 | | long double a[2]; |
549 | | } long_double_complex; |
550 | | #define REALPART(z) ((z).a[0]) |
551 | | #define IMAGPART(z) ((z).a[1]) |
552 | | |
553 | | /* |
554 | | * Inline functions that can be used to construct complex values. |
555 | | * |
556 | | * The C99 standard intends x+I*y to be used for this, but x+I*y is |
557 | | * currently unusable in general since gcc introduces many overflow, |
558 | | * underflow, sign and efficiency bugs by rewriting I*y as |
559 | | * (0.0+I)*(y+0.0*I) and laboriously computing the full complex product. |
560 | | * In particular, I*Inf is corrupted to NaN+I*Inf, and I*-0 is corrupted |
561 | | * to -0.0+I*0.0. |
562 | | * |
563 | | * The C11 standard introduced the macros CMPLX(), CMPLXF() and CMPLXL() |
564 | | * to construct complex values. Compilers that conform to the C99 |
565 | | * standard require the following functions to avoid the above issues. |
566 | | */ |
567 | | |
568 | | #ifndef CMPLXF |
569 | | static __inline float complex |
570 | | CMPLXF(float x, float y) |
571 | | { |
572 | | float_complex z; |
573 | | |
574 | | REALPART(z) = x; |
575 | | IMAGPART(z) = y; |
576 | | return (z.f); |
577 | | } |
578 | | #endif |
579 | | |
580 | | #ifndef CMPLX |
581 | | static __inline double complex |
582 | | CMPLX(double x, double y) |
583 | | { |
584 | | double_complex z; |
585 | | |
586 | | REALPART(z) = x; |
587 | | IMAGPART(z) = y; |
588 | | return (z.f); |
589 | | } |
590 | | #endif |
591 | | |
592 | | #ifndef CMPLXL |
593 | | static __inline long double complex |
594 | | CMPLXL(long double x, long double y) |
595 | | { |
596 | | long_double_complex z; |
597 | | |
598 | | REALPART(z) = x; |
599 | | IMAGPART(z) = y; |
600 | | return (z.f); |
601 | | } |
602 | | #endif |
603 | | |
604 | | #endif /* _COMPLEX_H */ |
605 | | |
606 | | /* |
607 | | * The rnint() family rounds to the nearest integer for a restricted range |
608 | | * range of args (up to about 2**MANT_DIG). We assume that the current |
609 | | * rounding mode is FE_TONEAREST so that this can be done efficiently. |
610 | | * Extra precision causes more problems in practice, and we only centralize |
611 | | * this here to reduce those problems, and have not solved the efficiency |
612 | | * problems. The exp2() family uses a more delicate version of this that |
613 | | * requires extracting bits from the intermediate value, so it is not |
614 | | * centralized here and should copy any solution of the efficiency problems. |
615 | | */ |
616 | | |
617 | | static inline double |
618 | | rnint(__double_t x) |
619 | 64.0k | { |
620 | | /* |
621 | | * This casts to double to kill any extra precision. This depends |
622 | | * on the cast being applied to a double_t to avoid compiler bugs |
623 | | * (this is a cleaner version of STRICT_ASSIGN()). This is |
624 | | * inefficient if there actually is extra precision, but is hard |
625 | | * to improve on. We use double_t in the API to minimise conversions |
626 | | * for just calling here. Note that we cannot easily change the |
627 | | * magic number to the one that works directly with double_t, since |
628 | | * the rounding precision is variable at runtime on x86 so the |
629 | | * magic number would need to be variable. Assuming that the |
630 | | * rounding precision is always the default is too fragile. This |
631 | | * and many other complications will move when the default is |
632 | | * changed to FP_PE. |
633 | | */ |
634 | 64.0k | return ((double)(x + 0x1.8p52) - 0x1.8p52); |
635 | 64.0k | } Unexecuted instantiation: e_acos.c:rnint Unexecuted instantiation: e_acosh.c:rnint Unexecuted instantiation: e_asin.c:rnint Unexecuted instantiation: e_atan2.c:rnint Unexecuted instantiation: e_atanh.c:rnint Unexecuted instantiation: e_cosh.c:rnint Unexecuted instantiation: e_exp.c:rnint Unexecuted instantiation: e_fmod.c:rnint Unexecuted instantiation: e_hypot.c:rnint Unexecuted instantiation: e_log.c:rnint Unexecuted instantiation: e_log10.c:rnint Unexecuted instantiation: e_pow.c:rnint Unexecuted instantiation: e_rem_pio2.c:rnint Unexecuted instantiation: e_sinh.c:rnint Unexecuted instantiation: k_cos.c:rnint Unexecuted instantiation: k_exp.c:rnint Unexecuted instantiation: k_rem_pio2.c:rnint Unexecuted instantiation: k_sin.c:rnint Unexecuted instantiation: k_tan.c:rnint Unexecuted instantiation: s_asinh.c:rnint Unexecuted instantiation: s_atan.c:rnint Unexecuted instantiation: s_cbrt.c:rnint Unexecuted instantiation: s_ceil.c:rnint Line | Count | Source | 619 | 61.6k | { | 620 | | /* | 621 | | * This casts to double to kill any extra precision. This depends | 622 | | * on the cast being applied to a double_t to avoid compiler bugs | 623 | | * (this is a cleaner version of STRICT_ASSIGN()). This is | 624 | | * inefficient if there actually is extra precision, but is hard | 625 | | * to improve on. We use double_t in the API to minimise conversions | 626 | | * for just calling here. Note that we cannot easily change the | 627 | | * magic number to the one that works directly with double_t, since | 628 | | * the rounding precision is variable at runtime on x86 so the | 629 | | * magic number would need to be variable. Assuming that the | 630 | | * rounding precision is always the default is too fragile. This | 631 | | * and many other complications will move when the default is | 632 | | * changed to FP_PE. | 633 | | */ | 634 | 61.6k | return ((double)(x + 0x1.8p52) - 0x1.8p52); | 635 | 61.6k | } |
Unexecuted instantiation: s_expm1.c:rnint Unexecuted instantiation: s_log1p.c:rnint Unexecuted instantiation: s_scalbn.c:rnint Line | Count | Source | 619 | 852 | { | 620 | | /* | 621 | | * This casts to double to kill any extra precision. This depends | 622 | | * on the cast being applied to a double_t to avoid compiler bugs | 623 | | * (this is a cleaner version of STRICT_ASSIGN()). This is | 624 | | * inefficient if there actually is extra precision, but is hard | 625 | | * to improve on. We use double_t in the API to minimise conversions | 626 | | * for just calling here. Note that we cannot easily change the | 627 | | * magic number to the one that works directly with double_t, since | 628 | | * the rounding precision is variable at runtime on x86 so the | 629 | | * magic number would need to be variable. Assuming that the | 630 | | * rounding precision is always the default is too fragile. This | 631 | | * and many other complications will move when the default is | 632 | | * changed to FP_PE. | 633 | | */ | 634 | 852 | return ((double)(x + 0x1.8p52) - 0x1.8p52); | 635 | 852 | } |
Line | Count | Source | 619 | 1.45k | { | 620 | | /* | 621 | | * This casts to double to kill any extra precision. This depends | 622 | | * on the cast being applied to a double_t to avoid compiler bugs | 623 | | * (this is a cleaner version of STRICT_ASSIGN()). This is | 624 | | * inefficient if there actually is extra precision, but is hard | 625 | | * to improve on. We use double_t in the API to minimise conversions | 626 | | * for just calling here. Note that we cannot easily change the | 627 | | * magic number to the one that works directly with double_t, since | 628 | | * the rounding precision is variable at runtime on x86 so the | 629 | | * magic number would need to be variable. Assuming that the | 630 | | * rounding precision is always the default is too fragile. This | 631 | | * and many other complications will move when the default is | 632 | | * changed to FP_PE. | 633 | | */ | 634 | 1.45k | return ((double)(x + 0x1.8p52) - 0x1.8p52); | 635 | 1.45k | } |
Unexecuted instantiation: s_tanh.c:rnint Unexecuted instantiation: s_trunc.c:rnint |
636 | | |
637 | | static inline float |
638 | | rnintf(__float_t x) |
639 | 0 | { |
640 | 0 | /* |
641 | 0 | * As for rnint(), except we could just call that to handle the |
642 | 0 | * extra precision case, usually without losing efficiency. |
643 | 0 | */ |
644 | 0 | return ((float)(x + 0x1.8p23F) - 0x1.8p23F); |
645 | 0 | } Unexecuted instantiation: e_acos.c:rnintf Unexecuted instantiation: e_acosh.c:rnintf Unexecuted instantiation: e_asin.c:rnintf Unexecuted instantiation: e_atan2.c:rnintf Unexecuted instantiation: e_atanh.c:rnintf Unexecuted instantiation: e_cosh.c:rnintf Unexecuted instantiation: e_exp.c:rnintf Unexecuted instantiation: e_fmod.c:rnintf Unexecuted instantiation: e_hypot.c:rnintf Unexecuted instantiation: e_log.c:rnintf Unexecuted instantiation: e_log10.c:rnintf Unexecuted instantiation: e_pow.c:rnintf Unexecuted instantiation: e_rem_pio2.c:rnintf Unexecuted instantiation: e_sinh.c:rnintf Unexecuted instantiation: k_cos.c:rnintf Unexecuted instantiation: k_exp.c:rnintf Unexecuted instantiation: k_rem_pio2.c:rnintf Unexecuted instantiation: k_sin.c:rnintf Unexecuted instantiation: k_tan.c:rnintf Unexecuted instantiation: s_asinh.c:rnintf Unexecuted instantiation: s_atan.c:rnintf Unexecuted instantiation: s_cbrt.c:rnintf Unexecuted instantiation: s_ceil.c:rnintf Unexecuted instantiation: s_cos.c:rnintf Unexecuted instantiation: s_expm1.c:rnintf Unexecuted instantiation: s_log1p.c:rnintf Unexecuted instantiation: s_scalbn.c:rnintf Unexecuted instantiation: s_sin.c:rnintf Unexecuted instantiation: s_tan.c:rnintf Unexecuted instantiation: s_tanh.c:rnintf Unexecuted instantiation: s_trunc.c:rnintf |
646 | | |
647 | | //#ifdef LDBL_MANT_DIG |
648 | | ///* |
649 | | // * The complications for extra precision are smaller for rnintl() since it |
650 | | // * can safely assume that the rounding precision has been increased from |
651 | | // * its default to FP_PE on x86. We don't exploit that here to get small |
652 | | // * optimizations from limiting the range to double. We just need it for |
653 | | // * the magic number to work with long doubles. ld128 callers should use |
654 | | // * rnint() instead of this if possible. ld80 callers should prefer |
655 | | // * rnintl() since for amd64 this avoids swapping the register set, while |
656 | | // * for i386 it makes no difference (assuming FP_PE), and for other arches |
657 | | // * it makes little difference. |
658 | | // */ |
659 | | //static inline long double |
660 | | //rnintl(long double x) |
661 | | //{ |
662 | | // return (x + __CONCAT(0x1.8p, LDBL_MANT_DIG) / 2 - |
663 | | // __CONCAT(0x1.8p, LDBL_MANT_DIG) / 2); |
664 | | //} |
665 | | //#endif /* LDBL_MANT_DIG */ |
666 | | |
667 | | /* |
668 | | * irint() and i64rint() give the same result as casting to their integer |
669 | | * return type provided their arg is a floating point integer. They can |
670 | | * sometimes be more efficient because no rounding is required. |
671 | | */ |
672 | | #if defined(amd64) || defined(__i386__) |
673 | | #define irint(x) \ |
674 | | (sizeof(x) == sizeof(float) && \ |
675 | | sizeof(__float_t) == sizeof(long double) ? irintf(x) : \ |
676 | | sizeof(x) == sizeof(double) && \ |
677 | | sizeof(__double_t) == sizeof(long double) ? irintd(x) : \ |
678 | | sizeof(x) == sizeof(long double) ? irintl(x) : (int)(x)) |
679 | | #else |
680 | 64.0k | #define irint(x) ((int)(x)) |
681 | | #endif |
682 | | |
683 | | #define i64rint(x) ((int64_t)(x)) /* only needed for ld128 so not opt. */ |
684 | | |
685 | | #if defined(__i386__) |
686 | | static __inline int |
687 | | irintf(float x) |
688 | | { |
689 | | int n; |
690 | | |
691 | | __asm("fistl %0" : "=m" (n) : "t" (x)); |
692 | | return (n); |
693 | | } |
694 | | |
695 | | static __inline int |
696 | | irintd(double x) |
697 | | { |
698 | | int n; |
699 | | |
700 | | __asm("fistl %0" : "=m" (n) : "t" (x)); |
701 | | return (n); |
702 | | } |
703 | | #endif |
704 | | |
705 | | #if defined(__amd64__) || defined(__i386__) |
706 | | static __inline int |
707 | | irintl(long double x) |
708 | 0 | { |
709 | 0 | int n; |
710 | 0 |
|
711 | 0 | __asm("fistl %0" : "=m" (n) : "t" (x)); |
712 | 0 | return (n); |
713 | 0 | } Unexecuted instantiation: e_acos.c:irintl Unexecuted instantiation: e_acosh.c:irintl Unexecuted instantiation: e_asin.c:irintl Unexecuted instantiation: e_atan2.c:irintl Unexecuted instantiation: e_atanh.c:irintl Unexecuted instantiation: e_cosh.c:irintl Unexecuted instantiation: e_exp.c:irintl Unexecuted instantiation: e_fmod.c:irintl Unexecuted instantiation: e_hypot.c:irintl Unexecuted instantiation: e_log.c:irintl Unexecuted instantiation: e_log10.c:irintl Unexecuted instantiation: e_pow.c:irintl Unexecuted instantiation: e_rem_pio2.c:irintl Unexecuted instantiation: e_sinh.c:irintl Unexecuted instantiation: k_cos.c:irintl Unexecuted instantiation: k_exp.c:irintl Unexecuted instantiation: k_rem_pio2.c:irintl Unexecuted instantiation: k_sin.c:irintl Unexecuted instantiation: k_tan.c:irintl Unexecuted instantiation: s_asinh.c:irintl Unexecuted instantiation: s_atan.c:irintl Unexecuted instantiation: s_cbrt.c:irintl Unexecuted instantiation: s_ceil.c:irintl Unexecuted instantiation: s_cos.c:irintl Unexecuted instantiation: s_expm1.c:irintl Unexecuted instantiation: s_log1p.c:irintl Unexecuted instantiation: s_scalbn.c:irintl Unexecuted instantiation: s_sin.c:irintl Unexecuted instantiation: s_tan.c:irintl Unexecuted instantiation: s_tanh.c:irintl Unexecuted instantiation: s_trunc.c:irintl |
714 | | #endif |
715 | | |
716 | | /* |
717 | | * The following are fast floor macros for 0 <= |x| < 0x1p(N-1), where |
718 | | * N is the precision of the type of x. These macros are used in the |
719 | | * half-cycle trignometric functions (e.g., sinpi(x)). |
720 | | */ |
721 | | #define FFLOORF(x, j0, ix) do { \ |
722 | | (j0) = (((ix) >> 23) & 0xff) - 0x7f; \ |
723 | | (ix) &= ~(0x007fffff >> (j0)); \ |
724 | | SET_FLOAT_WORD((x), (ix)); \ |
725 | | } while (0) |
726 | | |
727 | | #define FFLOOR(x, j0, ix, lx) do { \ |
728 | | (j0) = (((ix) >> 20) & 0x7ff) - 0x3ff; \ |
729 | | if ((j0) < 20) { \ |
730 | | (ix) &= ~(0x000fffff >> (j0)); \ |
731 | | (lx) = 0; \ |
732 | | } else { \ |
733 | | (lx) &= ~((uint32_t)0xffffffff >> ((j0) - 20)); \ |
734 | | } \ |
735 | | INSERT_WORDS((x), (ix), (lx)); \ |
736 | | } while (0) |
737 | | |
738 | | #define FFLOORL80(x, j0, ix, lx) do { \ |
739 | | j0 = ix - 0x3fff + 1; \ |
740 | | if ((j0) < 32) { \ |
741 | | (lx) = ((lx) >> 32) << 32; \ |
742 | | (lx) &= ~((((lx) << 32)-1) >> (j0)); \ |
743 | | } else { \ |
744 | | uint64_t _m; \ |
745 | | _m = (uint64_t)-1 >> (j0); \ |
746 | | if ((lx) & _m) (lx) &= ~_m; \ |
747 | | } \ |
748 | | INSERT_LDBL80_WORDS((x), (ix), (lx)); \ |
749 | | } while (0) |
750 | | |
751 | | #define FFLOORL128(x, ai, ar) do { \ |
752 | | union IEEEl2bits u; \ |
753 | | uint64_t m; \ |
754 | | int e; \ |
755 | | u.e = (x); \ |
756 | | e = u.bits.exp - 16383; \ |
757 | | if (e < 48) { \ |
758 | | m = ((1llu << 49) - 1) >> (e + 1); \ |
759 | | u.bits.manh &= ~m; \ |
760 | | u.bits.manl = 0; \ |
761 | | } else { \ |
762 | | m = (uint64_t)-1 >> (e - 48); \ |
763 | | u.bits.manl &= ~m; \ |
764 | | } \ |
765 | | (ai) = u.e; \ |
766 | | (ar) = (x) - (ai); \ |
767 | | } while (0) |
768 | | |
769 | | /* |
770 | | * For a double entity split into high and low parts, compute ilogb. |
771 | | */ |
772 | | static inline int32_t |
773 | | subnormal_ilogb(int32_t hi, int32_t lo) |
774 | 15.7k | { |
775 | 15.7k | int32_t j; |
776 | 15.7k | uint32_t i; |
777 | | |
778 | 15.7k | j = -1022; |
779 | 15.7k | if (hi == 0) { |
780 | 10.4k | j -= 21; |
781 | 10.4k | i = (uint32_t)lo; |
782 | 10.4k | } else |
783 | 5.26k | i = (uint32_t)hi << 11; |
784 | | |
785 | 227k | for (; i < 0x7fffffff; i <<= 1) j -= 1; |
786 | | |
787 | 15.7k | return (j); |
788 | 15.7k | } Unexecuted instantiation: e_acos.c:subnormal_ilogb Unexecuted instantiation: e_acosh.c:subnormal_ilogb Unexecuted instantiation: e_asin.c:subnormal_ilogb Unexecuted instantiation: e_atan2.c:subnormal_ilogb Unexecuted instantiation: e_atanh.c:subnormal_ilogb Unexecuted instantiation: e_cosh.c:subnormal_ilogb Unexecuted instantiation: e_exp.c:subnormal_ilogb Line | Count | Source | 774 | 15.7k | { | 775 | 15.7k | int32_t j; | 776 | 15.7k | uint32_t i; | 777 | | | 778 | 15.7k | j = -1022; | 779 | 15.7k | if (hi == 0) { | 780 | 10.4k | j -= 21; | 781 | 10.4k | i = (uint32_t)lo; | 782 | 10.4k | } else | 783 | 5.26k | i = (uint32_t)hi << 11; | 784 | | | 785 | 227k | for (; i < 0x7fffffff; i <<= 1) j -= 1; | 786 | | | 787 | 15.7k | return (j); | 788 | 15.7k | } |
Unexecuted instantiation: e_hypot.c:subnormal_ilogb Unexecuted instantiation: e_log.c:subnormal_ilogb Unexecuted instantiation: e_log10.c:subnormal_ilogb Unexecuted instantiation: e_pow.c:subnormal_ilogb Unexecuted instantiation: e_rem_pio2.c:subnormal_ilogb Unexecuted instantiation: e_sinh.c:subnormal_ilogb Unexecuted instantiation: k_cos.c:subnormal_ilogb Unexecuted instantiation: k_exp.c:subnormal_ilogb Unexecuted instantiation: k_rem_pio2.c:subnormal_ilogb Unexecuted instantiation: k_sin.c:subnormal_ilogb Unexecuted instantiation: k_tan.c:subnormal_ilogb Unexecuted instantiation: s_asinh.c:subnormal_ilogb Unexecuted instantiation: s_atan.c:subnormal_ilogb Unexecuted instantiation: s_cbrt.c:subnormal_ilogb Unexecuted instantiation: s_ceil.c:subnormal_ilogb Unexecuted instantiation: s_cos.c:subnormal_ilogb Unexecuted instantiation: s_expm1.c:subnormal_ilogb Unexecuted instantiation: s_log1p.c:subnormal_ilogb Unexecuted instantiation: s_scalbn.c:subnormal_ilogb Unexecuted instantiation: s_sin.c:subnormal_ilogb Unexecuted instantiation: s_tan.c:subnormal_ilogb Unexecuted instantiation: s_tanh.c:subnormal_ilogb Unexecuted instantiation: s_trunc.c:subnormal_ilogb |
789 | | |
790 | | #ifdef DEBUG |
791 | | #if defined(__amd64__) || defined(__i386__) |
792 | | #define breakpoint() asm("int $3") |
793 | | #else |
794 | | #include <signal.h> |
795 | | |
796 | | #define breakpoint() raise(SIGTRAP) |
797 | | #endif |
798 | | #endif |
799 | | |
800 | | #ifdef STRUCT_RETURN |
801 | | #define RETURNSP(rp) do { \ |
802 | | if (!(rp)->lo_set) \ |
803 | | RETURNF((rp)->hi); \ |
804 | | RETURNF((rp)->hi + (rp)->lo); \ |
805 | | } while (0) |
806 | | #define RETURNSPI(rp) do { \ |
807 | | if (!(rp)->lo_set) \ |
808 | | RETURNI((rp)->hi); \ |
809 | | RETURNI((rp)->hi + (rp)->lo); \ |
810 | | } while (0) |
811 | | #endif |
812 | | |
813 | | #define SUM2P(x, y) ({ \ |
814 | | const __typeof (x) __x = (x); \ |
815 | | const __typeof (y) __y = (y); \ |
816 | | __x + __y; \ |
817 | | }) |
818 | | |
819 | | /* fdlibm kernel function */ |
820 | | int __kernel_rem_pio2(double*,double*,int,int,int); |
821 | | |
822 | | /* double precision kernel functions */ |
823 | | #ifndef INLINE_REM_PIO2 |
824 | | int __ieee754_rem_pio2(double,double*); |
825 | | #endif |
826 | | double __kernel_sin(double,double,int); |
827 | | double __kernel_cos(double,double); |
828 | | double __kernel_tan(double,double,int); |
829 | | double __ldexp_exp(double,int); |
830 | | #ifdef _COMPLEX_H |
831 | | double complex __ldexp_cexp(double complex,int); |
832 | | #endif |
833 | | |
834 | | /* float precision kernel functions */ |
835 | | #ifndef INLINE_REM_PIO2F |
836 | | int __ieee754_rem_pio2f(float,double*); |
837 | | #endif |
838 | | #ifndef INLINE_KERNEL_SINDF |
839 | | float __kernel_sindf(double); |
840 | | #endif |
841 | | #ifndef INLINE_KERNEL_COSDF |
842 | | float __kernel_cosdf(double); |
843 | | #endif |
844 | | #ifndef INLINE_KERNEL_TANDF |
845 | | float __kernel_tandf(double,int); |
846 | | #endif |
847 | | float __ldexp_expf(float,int); |
848 | | #ifdef _COMPLEX_H |
849 | | float complex __ldexp_cexpf(float complex,int); |
850 | | #endif |
851 | | |
852 | | /* long double precision kernel functions */ |
853 | | long double __kernel_sinl(long double, long double, int); |
854 | | long double __kernel_cosl(long double, long double); |
855 | | long double __kernel_tanl(long double, long double, int); |
856 | | |
857 | | #endif /* !_MATH_PRIVATE_H_ */ |