Coverage Report

Created: 2025-09-04 06:38

/src/moddable/xs/tools/fdlibm/math_private.h
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * ====================================================
3
 * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
4
 *
5
 * Developed at SunPro, a Sun Microsystems, Inc. business.
6
 * Permission to use, copy, modify, and distribute this
7
 * software is freely granted, provided that this notice
8
 * is preserved.
9
 * ====================================================
10
 */
11
12
#ifndef _MATH_PRIVATE_H_
13
#define _MATH_PRIVATE_H_
14
15
#include <sys/types.h>
16
17
#include <stdint.h>
18
#ifndef u_int32_t
19
50.4M
#define u_int32_t uint32_t
20
#endif
21
#ifndef u_int64_t
22
#define u_int64_t uint64_t
23
#endif
24
25
#include <float.h>
26
typedef double      __double_t;
27
typedef __double_t  double_t;
28
typedef float       __float_t;
29
30
#include <math.h>
31
// extern double fabs(double x);
32
// extern long double fabsl(long double x);
33
// extern double floor(double x);
34
// extern double sqrt(double);
35
36
#include "fdlibm.h"
37
38
#ifndef LITTLE_ENDIAN
39
  #define LITTLE_ENDIAN 1234
40
#endif
41
#ifndef BIG_ENDIAN
42
  #define BIG_ENDIAN 4321
43
#endif
44
#ifndef BYTE_ORDER
45
  #define BYTE_ORDER LITTLE_ENDIAN
46
#endif
47
48
/*
49
 * The original fdlibm code used statements like:
50
 *  n0 = ((*(int*)&one)>>29)^1;   * index of high word *
51
 *  ix0 = *(n0+(int*)&x);     * high word of x *
52
 *  ix1 = *((1-n0)+(int*)&x);   * low word of x *
53
 * to dig two 32 bit words out of the 64 bit IEEE floating point
54
 * value.  That is non-ANSI, and, moreover, the gcc instruction
55
 * scheduler gets it wrong.  We instead use the following macros.
56
 * Unlike the original code, we determine the endianness at compile
57
 * time, not at run time; I don't see much benefit to selecting
58
 * endianness at run time.
59
 */
60
61
/*
62
 * A union which permits us to convert between a double and two 32 bit
63
 * ints.
64
 */
65
66
#ifdef __arm__
67
#if defined(__VFP_FP__) || defined(__ARM_EABI__)
68
#define IEEE_WORD_ORDER BYTE_ORDER
69
#else
70
#define IEEE_WORD_ORDER BIG_ENDIAN
71
#endif
72
#else /* __arm__ */
73
#define IEEE_WORD_ORDER BYTE_ORDER
74
#endif
75
76
/* A union which permits us to convert between a long double and
77
   four 32 bit ints.  */
78
79
#if IEEE_WORD_ORDER == BIG_ENDIAN
80
81
typedef union
82
{
83
  long double value;
84
  struct {
85
    u_int32_t mswhi;
86
    u_int32_t mswlo;
87
    u_int32_t lswhi;
88
    u_int32_t lswlo;
89
  } parts32;
90
  struct {
91
    u_int64_t msw;
92
    u_int64_t lsw;
93
  } parts64;
94
} ieee_quad_shape_type;
95
96
#endif
97
98
#if IEEE_WORD_ORDER == LITTLE_ENDIAN
99
100
typedef union
101
{
102
  long double value;
103
  struct {
104
    u_int32_t lswlo;
105
    u_int32_t lswhi;
106
    u_int32_t mswlo;
107
    u_int32_t mswhi;
108
  } parts32;
109
  struct {
110
    u_int64_t lsw;
111
    u_int64_t msw;
112
  } parts64;
113
} ieee_quad_shape_type;
114
115
#endif
116
117
#if IEEE_WORD_ORDER == BIG_ENDIAN
118
119
typedef union
120
{
121
  double value;
122
  struct
123
  {
124
    u_int32_t msw;
125
    u_int32_t lsw;
126
  } parts;
127
  struct
128
  {
129
    u_int64_t w;
130
  } xparts;
131
} ieee_double_shape_type;
132
133
#endif
134
135
#if IEEE_WORD_ORDER == LITTLE_ENDIAN
136
137
typedef union
138
{
139
  double value;
140
  struct
141
  {
142
    u_int32_t lsw;
143
    u_int32_t msw;
144
  } parts;
145
  struct
146
  {
147
    u_int64_t w;
148
  } xparts;
149
} ieee_double_shape_type;
150
151
#endif
152
153
/* Get two 32 bit ints from a double.  */
154
155
74.5M
#define EXTRACT_WORDS(ix0,ix1,d)        \
156
74.5M
do {               \
157
74.5M
  ieee_double_shape_type ew_u;          \
158
74.5M
  ew_u.value = (d);           \
159
74.5M
  (ix0) = ew_u.parts.msw;         \
160
74.5M
  (ix1) = ew_u.parts.lsw;         \
161
74.5M
} while (0)
162
163
/* Get a 64-bit int from a double. */
164
#define EXTRACT_WORD64(ix,d)          \
165
do {                \
166
  ieee_double_shape_type ew_u;          \
167
  ew_u.value = (d);           \
168
  (ix) = ew_u.xparts.w;           \
169
} while (0)
170
171
/* Get the more significant 32 bit int from a double.  */
172
173
5.27M
#define GET_HIGH_WORD(i,d)          \
174
5.27M
do {               \
175
5.27M
  ieee_double_shape_type gh_u;          \
176
5.27M
  gh_u.value = (d);           \
177
5.27M
  (i) = gh_u.parts.msw;           \
178
5.27M
} while (0)
179
180
/* Get the less significant 32 bit int from a double.  */
181
182
25.9k
#define GET_LOW_WORD(i,d)         \
183
25.9k
do {               \
184
25.9k
  ieee_double_shape_type gl_u;          \
185
25.9k
  gl_u.value = (d);           \
186
25.9k
  (i) = gl_u.parts.lsw;           \
187
25.9k
} while (0)
188
189
/* Set a double from two 32 bit ints.  */
190
191
13.0M
#define INSERT_WORDS(d,ix0,ix1)         \
192
13.0M
do {               \
193
13.0M
  ieee_double_shape_type iw_u;          \
194
13.0M
  iw_u.parts.msw = (ix0);         \
195
13.0M
  iw_u.parts.lsw = (ix1);         \
196
13.0M
  (d) = iw_u.value;           \
197
13.0M
} while (0)
198
199
/* Set a double from a 64-bit int. */
200
#define INSERT_WORD64(d,ix)         \
201
do {                \
202
  ieee_double_shape_type iw_u;          \
203
  iw_u.xparts.w = (ix);           \
204
  (d) = iw_u.value;           \
205
} while (0)
206
207
/* Set the more significant 32 bits of a double from an int.  */
208
209
11.8M
#define SET_HIGH_WORD(d,v)          \
210
11.8M
do {               \
211
11.8M
  ieee_double_shape_type sh_u;          \
212
11.8M
  sh_u.value = (d);           \
213
11.8M
  sh_u.parts.msw = (v);           \
214
11.8M
  (d) = sh_u.value;           \
215
11.8M
} while (0)
216
217
/* Set the less significant 32 bits of a double from an int.  */
218
219
17.0M
#define SET_LOW_WORD(d,v)         \
220
17.0M
do {               \
221
17.0M
  ieee_double_shape_type sl_u;          \
222
17.0M
  sl_u.value = (d);           \
223
17.0M
  sl_u.parts.lsw = (v);           \
224
17.0M
  (d) = sl_u.value;           \
225
17.0M
} while (0)
226
227
/*
228
 * A union which permits us to convert between a float and a 32 bit
229
 * int.
230
 */
231
232
typedef union
233
{
234
  float value;
235
  /* FIXME: Assumes 32 bit int.  */
236
  unsigned int word;
237
} ieee_float_shape_type;
238
239
/* Get a 32 bit int from a float.  */
240
241
#define GET_FLOAT_WORD(i,d)         \
242
do {                \
243
  ieee_float_shape_type gf_u;         \
244
  gf_u.value = (d);           \
245
  (i) = gf_u.word;            \
246
} while (0)
247
248
/* Set a float from a 32 bit int.  */
249
250
#define SET_FLOAT_WORD(d,i)         \
251
do {                \
252
  ieee_float_shape_type sf_u;         \
253
  sf_u.word = (i);            \
254
  (d) = sf_u.value;           \
255
} while (0)
256
257
/*
258
 * Get expsign and mantissa as 16 bit and 64 bit ints from an 80 bit long
259
 * double.
260
 */
261
262
#define EXTRACT_LDBL80_WORDS(ix0,ix1,d)       \
263
do {                \
264
  union IEEEl2bits ew_u;          \
265
  ew_u.e = (d);             \
266
  (ix0) = ew_u.xbits.expsign;         \
267
  (ix1) = ew_u.xbits.man;         \
268
} while (0)
269
270
/*
271
 * Get expsign and mantissa as one 16 bit and two 64 bit ints from a 128 bit
272
 * long double.
273
 */
274
275
#define EXTRACT_LDBL128_WORDS(ix0,ix1,ix2,d)      \
276
do {                \
277
  union IEEEl2bits ew_u;          \
278
  ew_u.e = (d);             \
279
  (ix0) = ew_u.xbits.expsign;         \
280
  (ix1) = ew_u.xbits.manh;          \
281
  (ix2) = ew_u.xbits.manl;          \
282
} while (0)
283
284
/* Get expsign as a 16 bit int from a long double.  */
285
286
#define GET_LDBL_EXPSIGN(i,d)         \
287
do {                \
288
  union IEEEl2bits ge_u;          \
289
  ge_u.e = (d);             \
290
  (i) = ge_u.xbits.expsign;         \
291
} while (0)
292
293
/*
294
 * Set an 80 bit long double from a 16 bit int expsign and a 64 bit int
295
 * mantissa.
296
 */
297
298
#define INSERT_LDBL80_WORDS(d,ix0,ix1)        \
299
do {                \
300
  union IEEEl2bits iw_u;          \
301
  iw_u.xbits.expsign = (ix0);         \
302
  iw_u.xbits.man = (ix1);         \
303
  (d) = iw_u.e;             \
304
} while (0)
305
306
/*
307
 * Set a 128 bit long double from a 16 bit int expsign and two 64 bit ints
308
 * comprising the mantissa.
309
 */
310
311
#define INSERT_LDBL128_WORDS(d,ix0,ix1,ix2)     \
312
do {                \
313
  union IEEEl2bits iw_u;          \
314
  iw_u.xbits.expsign = (ix0);         \
315
  iw_u.xbits.manh = (ix1);          \
316
  iw_u.xbits.manl = (ix2);          \
317
  (d) = iw_u.e;             \
318
} while (0)
319
320
/* Set expsign of a long double from a 16 bit int.  */
321
322
#define SET_LDBL_EXPSIGN(d,v)         \
323
do {                \
324
  union IEEEl2bits se_u;          \
325
  se_u.e = (d);             \
326
  se_u.xbits.expsign = (v);         \
327
  (d) = se_u.e;             \
328
} while (0)
329
330
#ifdef __i386__
331
/* Long double constants are broken on i386. */
332
#define LD80C(m, ex, v) {           \
333
  .xbits.man = __CONCAT(m, ULL),          \
334
  .xbits.expsign = (0x3fff + (ex)) | ((v) < 0 ? 0x8000 : 0),  \
335
}
336
#else
337
/* The above works on non-i386 too, but we use this to check v. */
338
#define LD80C(m, ex, v) { .e = (v), }
339
#endif
340
341
#ifdef FLT_EVAL_METHOD
342
/*
343
 * Attempt to get strict C99 semantics for assignment with non-C99 compilers.
344
 */
345
#if FLT_EVAL_METHOD == 0 || __GNUC__ == 0
346
652k
#define STRICT_ASSIGN(type, lval, rval) ((lval) = (rval))
347
#else
348
#define STRICT_ASSIGN(type, lval, rval) do {  \
349
  volatile type __lval;     \
350
            \
351
  if (sizeof(type) >= sizeof(long double))  \
352
    (lval) = (rval);    \
353
  else {          \
354
    __lval = (rval);    \
355
    (lval) = __lval;    \
356
  }         \
357
} while (0)
358
#endif
359
#endif /* FLT_EVAL_METHOD */
360
361
/* Support switching the mode to FP_PE if necessary. */
362
#if defined(__i386__) && !defined(NO_FPSETPREC)
363
#define ENTERI() ENTERIT(long double)
364
#define ENTERIT(returntype)     \
365
  returntype __retval;      \
366
  fp_prec_t __oprec;      \
367
            \
368
  if ((__oprec = fpgetprec()) != FP_PE) \
369
    fpsetprec(FP_PE)
370
#define RETURNI(x) do {       \
371
  __retval = (x);       \
372
  if (__oprec != FP_PE)     \
373
    fpsetprec(__oprec);   \
374
  RETURNF(__retval);      \
375
} while (0)
376
#define ENTERV()        \
377
  fp_prec_t __oprec;      \
378
            \
379
  if ((__oprec = fpgetprec()) != FP_PE) \
380
    fpsetprec(FP_PE)
381
#define RETURNV() do {        \
382
  if (__oprec != FP_PE)     \
383
    fpsetprec(__oprec);   \
384
  return;     \
385
} while (0)
386
#else
387
#define ENTERI()
388
#define ENTERIT(x)
389
#define RETURNI(x)  RETURNF(x)
390
#define ENTERV()
391
#define RETURNV() return
392
#endif
393
394
/* Default return statement if hack*_t() is not used. */
395
#define      RETURNF(v)      return (v)
396
397
/*
398
 * 2sum gives the same result as 2sumF without requiring |a| >= |b| or
399
 * a == 0, but is slower.
400
 */
401
#define _2sum(a, b) do {  \
402
  __typeof(a) __s, __w; \
403
        \
404
  __w = (a) + (b);  \
405
  __s = __w - (a);  \
406
  (b) = ((a) - (__w - __s)) + ((b) - __s); \
407
  (a) = __w;    \
408
} while (0)
409
410
/*
411
 * 2sumF algorithm.
412
 *
413
 * "Normalize" the terms in the infinite-precision expression a + b for
414
 * the sum of 2 floating point values so that b is as small as possible
415
 * relative to 'a'.  (The resulting 'a' is the value of the expression in
416
 * the same precision as 'a' and the resulting b is the rounding error.)
417
 * |a| must be >= |b| or 0, b's type must be no larger than 'a's type, and
418
 * exponent overflow or underflow must not occur.  This uses a Theorem of
419
 * Dekker (1971).  See Knuth (1981) 4.2.2 Theorem C.  The name "TwoSum"
420
 * is apparently due to Skewchuk (1997).
421
 *
422
 * For this to always work, assignment of a + b to 'a' must not retain any
423
 * extra precision in a + b.  This is required by C standards but broken
424
 * in many compilers.  The brokenness cannot be worked around using
425
 * STRICT_ASSIGN() like we do elsewhere, since the efficiency of this
426
 * algorithm would be destroyed by non-null strict assignments.  (The
427
 * compilers are correct to be broken -- the efficiency of all floating
428
 * point code calculations would be destroyed similarly if they forced the
429
 * conversions.)
430
 *
431
 * Fortunately, a case that works well can usually be arranged by building
432
 * any extra precision into the type of 'a' -- 'a' should have type float_t,
433
 * double_t or long double.  b's type should be no larger than 'a's type.
434
 * Callers should use these types with scopes as large as possible, to
435
 * reduce their own extra-precision and efficiency problems.  In
436
 * particular, they shouldn't convert back and forth just to call here.
437
 */
438
#ifdef DEBUG
439
#define _2sumF(a, b) do {       \
440
  __typeof(a) __w;        \
441
  volatile __typeof(a) __ia, __ib, __r, __vw; \
442
              \
443
  __ia = (a);         \
444
  __ib = (b);         \
445
  assert(__ia == 0 || fabsl(__ia) >= fabsl(__ib));  \
446
              \
447
  __w = (a) + (b);        \
448
  (b) = ((a) - __w) + (b);      \
449
  (a) = __w;          \
450
              \
451
  /* The next 2 assertions are weak if (a) is already long double. */ \
452
  assert((long double)__ia + __ib == (long double)(a) + (b)); \
453
  __vw = __ia + __ib;       \
454
  __r = __ia - __vw;        \
455
  __r += __ib;          \
456
  assert(__vw == (a) && __r == (b));    \
457
} while (0)
458
#else /* !DEBUG */
459
#define _2sumF(a, b) do { \
460
  __typeof(a) __w;  \
461
        \
462
  __w = (a) + (b);  \
463
  (b) = ((a) - __w) + (b); \
464
  (a) = __w;    \
465
} while (0)
466
#endif /* DEBUG */
467
468
/*
469
 * Set x += c, where x is represented in extra precision as a + b.
470
 * x must be sufficiently normalized and sufficiently larger than c,
471
 * and the result is then sufficiently normalized.
472
 *
473
 * The details of ordering are that |a| must be >= |c| (so that (a, c)
474
 * can be normalized without extra work to swap 'a' with c).  The details of
475
 * the normalization are that b must be small relative to the normalized 'a'.
476
 * Normalization of (a, c) makes the normalized c tiny relative to the
477
 * normalized a, so b remains small relative to 'a' in the result.  However,
478
 * b need not ever be tiny relative to 'a'.  For example, b might be about
479
 * 2**20 times smaller than 'a' to give about 20 extra bits of precision.
480
 * That is usually enough, and adding c (which by normalization is about
481
 * 2**53 times smaller than a) cannot change b significantly.  However,
482
 * cancellation of 'a' with c in normalization of (a, c) may reduce 'a'
483
 * significantly relative to b.  The caller must ensure that significant
484
 * cancellation doesn't occur, either by having c of the same sign as 'a',
485
 * or by having |c| a few percent smaller than |a|.  Pre-normalization of
486
 * (a, b) may help.
487
 *
488
 * This is a variant of an algorithm of Kahan (see Knuth (1981) 4.2.2
489
 * exercise 19).  We gain considerable efficiency by requiring the terms to
490
 * be sufficiently normalized and sufficiently increasing.
491
 */
492
#define _3sumF(a, b, c) do {  \
493
  __typeof(a) __tmp;  \
494
        \
495
  __tmp = (c);    \
496
  _2sumF(__tmp, (a)); \
497
  (b) += (a);   \
498
  (a) = __tmp;    \
499
} while (0)
500
501
/*
502
 * Common routine to process the arguments to nan(), nanf(), and nanl().
503
 */
504
void _scan_nan(uint32_t *__words, int __num_words, const char *__s);
505
506
/*
507
 * Mix 0, 1 or 2 NaNs.  First add 0 to each arg.  This normally just turns
508
 * signaling NaNs into quiet NaNs by setting a quiet bit.  We do this
509
 * because we want to never return a signaling NaN, and also because we
510
 * don't want the quiet bit to affect the result.  Then mix the converted
511
 * args using the specified operation.
512
 *
513
 * When one arg is NaN, the result is typically that arg quieted.  When both
514
 * args are NaNs, the result is typically the quietening of the arg whose
515
 * mantissa is largest after quietening.  When neither arg is NaN, the
516
 * result may be NaN because it is indeterminate, or finite for subsequent
517
 * construction of a NaN as the indeterminate 0.0L/0.0L.
518
 *
519
 * Technical complications: the result in bits after rounding to the final
520
 * precision might depend on the runtime precision and/or on compiler
521
 * optimizations, especially when different register sets are used for
522
 * different precisions.  Try to make the result not depend on at least the
523
 * runtime precision by always doing the main mixing step in long double
524
 * precision.  Try to reduce dependencies on optimizations by adding the
525
 * the 0's in different precisions (unless everything is in long double
526
 * precision).
527
 */
528
989k
#define nan_mix(x, y)   (nan_mix_op((x), (y), +))
529
5.39M
#define nan_mix_op(x, y, op)  (((x) + 0.0L) op ((y) + 0))
530
531
#ifdef _COMPLEX_H
532
533
/*
534
 * C99 specifies that complex numbers have the same representation as
535
 * an array of two elements, where the first element is the real part
536
 * and the second element is the imaginary part.
537
 */
538
typedef union {
539
  float complex f;
540
  float a[2];
541
} float_complex;
542
typedef union {
543
  double complex f;
544
  double a[2];
545
} double_complex;
546
typedef union {
547
  long double complex f;
548
  long double a[2];
549
} long_double_complex;
550
#define REALPART(z) ((z).a[0])
551
#define IMAGPART(z) ((z).a[1])
552
553
/*
554
 * Inline functions that can be used to construct complex values.
555
 *
556
 * The C99 standard intends x+I*y to be used for this, but x+I*y is
557
 * currently unusable in general since gcc introduces many overflow,
558
 * underflow, sign and efficiency bugs by rewriting I*y as
559
 * (0.0+I)*(y+0.0*I) and laboriously computing the full complex product.
560
 * In particular, I*Inf is corrupted to NaN+I*Inf, and I*-0 is corrupted
561
 * to -0.0+I*0.0.
562
 *
563
 * The C11 standard introduced the macros CMPLX(), CMPLXF() and CMPLXL()
564
 * to construct complex values.  Compilers that conform to the C99
565
 * standard require the following functions to avoid the above issues.
566
 */
567
568
#ifndef CMPLXF
569
static __inline float complex
570
CMPLXF(float x, float y)
571
{
572
  float_complex z;
573
574
  REALPART(z) = x;
575
  IMAGPART(z) = y;
576
  return (z.f);
577
}
578
#endif
579
580
#ifndef CMPLX
581
static __inline double complex
582
CMPLX(double x, double y)
583
{
584
  double_complex z;
585
586
  REALPART(z) = x;
587
  IMAGPART(z) = y;
588
  return (z.f);
589
}
590
#endif
591
592
#ifndef CMPLXL
593
static __inline long double complex
594
CMPLXL(long double x, long double y)
595
{
596
  long_double_complex z;
597
598
  REALPART(z) = x;
599
  IMAGPART(z) = y;
600
  return (z.f);
601
}
602
#endif
603
604
#endif /* _COMPLEX_H */
605
 
606
/*
607
 * The rnint() family rounds to the nearest integer for a restricted range
608
 * range of args (up to about 2**MANT_DIG).  We assume that the current
609
 * rounding mode is FE_TONEAREST so that this can be done efficiently.
610
 * Extra precision causes more problems in practice, and we only centralize
611
 * this here to reduce those problems, and have not solved the efficiency
612
 * problems.  The exp2() family uses a more delicate version of this that
613
 * requires extracting bits from the intermediate value, so it is not
614
 * centralized here and should copy any solution of the efficiency problems.
615
 */
616
617
static inline double
618
rnint(__double_t x)
619
64.0k
{
620
  /*
621
   * This casts to double to kill any extra precision.  This depends
622
   * on the cast being applied to a double_t to avoid compiler bugs
623
   * (this is a cleaner version of STRICT_ASSIGN()).  This is
624
   * inefficient if there actually is extra precision, but is hard
625
   * to improve on.  We use double_t in the API to minimise conversions
626
   * for just calling here.  Note that we cannot easily change the
627
   * magic number to the one that works directly with double_t, since
628
   * the rounding precision is variable at runtime on x86 so the
629
   * magic number would need to be variable.  Assuming that the
630
   * rounding precision is always the default is too fragile.  This
631
   * and many other complications will move when the default is
632
   * changed to FP_PE.
633
   */
634
64.0k
  return ((double)(x + 0x1.8p52) - 0x1.8p52);
635
64.0k
}
Unexecuted instantiation: e_acos.c:rnint
Unexecuted instantiation: e_acosh.c:rnint
Unexecuted instantiation: e_asin.c:rnint
Unexecuted instantiation: e_atan2.c:rnint
Unexecuted instantiation: e_atanh.c:rnint
Unexecuted instantiation: e_cosh.c:rnint
Unexecuted instantiation: e_exp.c:rnint
Unexecuted instantiation: e_fmod.c:rnint
Unexecuted instantiation: e_hypot.c:rnint
Unexecuted instantiation: e_log.c:rnint
Unexecuted instantiation: e_log10.c:rnint
Unexecuted instantiation: e_pow.c:rnint
Unexecuted instantiation: e_rem_pio2.c:rnint
Unexecuted instantiation: e_sinh.c:rnint
Unexecuted instantiation: k_cos.c:rnint
Unexecuted instantiation: k_exp.c:rnint
Unexecuted instantiation: k_rem_pio2.c:rnint
Unexecuted instantiation: k_sin.c:rnint
Unexecuted instantiation: k_tan.c:rnint
Unexecuted instantiation: s_asinh.c:rnint
Unexecuted instantiation: s_atan.c:rnint
Unexecuted instantiation: s_cbrt.c:rnint
Unexecuted instantiation: s_ceil.c:rnint
s_cos.c:rnint
Line
Count
Source
619
61.6k
{
620
  /*
621
   * This casts to double to kill any extra precision.  This depends
622
   * on the cast being applied to a double_t to avoid compiler bugs
623
   * (this is a cleaner version of STRICT_ASSIGN()).  This is
624
   * inefficient if there actually is extra precision, but is hard
625
   * to improve on.  We use double_t in the API to minimise conversions
626
   * for just calling here.  Note that we cannot easily change the
627
   * magic number to the one that works directly with double_t, since
628
   * the rounding precision is variable at runtime on x86 so the
629
   * magic number would need to be variable.  Assuming that the
630
   * rounding precision is always the default is too fragile.  This
631
   * and many other complications will move when the default is
632
   * changed to FP_PE.
633
   */
634
61.6k
  return ((double)(x + 0x1.8p52) - 0x1.8p52);
635
61.6k
}
Unexecuted instantiation: s_expm1.c:rnint
Unexecuted instantiation: s_log1p.c:rnint
Unexecuted instantiation: s_scalbn.c:rnint
s_sin.c:rnint
Line
Count
Source
619
852
{
620
  /*
621
   * This casts to double to kill any extra precision.  This depends
622
   * on the cast being applied to a double_t to avoid compiler bugs
623
   * (this is a cleaner version of STRICT_ASSIGN()).  This is
624
   * inefficient if there actually is extra precision, but is hard
625
   * to improve on.  We use double_t in the API to minimise conversions
626
   * for just calling here.  Note that we cannot easily change the
627
   * magic number to the one that works directly with double_t, since
628
   * the rounding precision is variable at runtime on x86 so the
629
   * magic number would need to be variable.  Assuming that the
630
   * rounding precision is always the default is too fragile.  This
631
   * and many other complications will move when the default is
632
   * changed to FP_PE.
633
   */
634
852
  return ((double)(x + 0x1.8p52) - 0x1.8p52);
635
852
}
s_tan.c:rnint
Line
Count
Source
619
1.45k
{
620
  /*
621
   * This casts to double to kill any extra precision.  This depends
622
   * on the cast being applied to a double_t to avoid compiler bugs
623
   * (this is a cleaner version of STRICT_ASSIGN()).  This is
624
   * inefficient if there actually is extra precision, but is hard
625
   * to improve on.  We use double_t in the API to minimise conversions
626
   * for just calling here.  Note that we cannot easily change the
627
   * magic number to the one that works directly with double_t, since
628
   * the rounding precision is variable at runtime on x86 so the
629
   * magic number would need to be variable.  Assuming that the
630
   * rounding precision is always the default is too fragile.  This
631
   * and many other complications will move when the default is
632
   * changed to FP_PE.
633
   */
634
1.45k
  return ((double)(x + 0x1.8p52) - 0x1.8p52);
635
1.45k
}
Unexecuted instantiation: s_tanh.c:rnint
Unexecuted instantiation: s_trunc.c:rnint
636
637
static inline float
638
rnintf(__float_t x)
639
0
{
640
0
  /*
641
0
   * As for rnint(), except we could just call that to handle the
642
0
   * extra precision case, usually without losing efficiency.
643
0
   */
644
0
  return ((float)(x + 0x1.8p23F) - 0x1.8p23F);
645
0
}
Unexecuted instantiation: e_acos.c:rnintf
Unexecuted instantiation: e_acosh.c:rnintf
Unexecuted instantiation: e_asin.c:rnintf
Unexecuted instantiation: e_atan2.c:rnintf
Unexecuted instantiation: e_atanh.c:rnintf
Unexecuted instantiation: e_cosh.c:rnintf
Unexecuted instantiation: e_exp.c:rnintf
Unexecuted instantiation: e_fmod.c:rnintf
Unexecuted instantiation: e_hypot.c:rnintf
Unexecuted instantiation: e_log.c:rnintf
Unexecuted instantiation: e_log10.c:rnintf
Unexecuted instantiation: e_pow.c:rnintf
Unexecuted instantiation: e_rem_pio2.c:rnintf
Unexecuted instantiation: e_sinh.c:rnintf
Unexecuted instantiation: k_cos.c:rnintf
Unexecuted instantiation: k_exp.c:rnintf
Unexecuted instantiation: k_rem_pio2.c:rnintf
Unexecuted instantiation: k_sin.c:rnintf
Unexecuted instantiation: k_tan.c:rnintf
Unexecuted instantiation: s_asinh.c:rnintf
Unexecuted instantiation: s_atan.c:rnintf
Unexecuted instantiation: s_cbrt.c:rnintf
Unexecuted instantiation: s_ceil.c:rnintf
Unexecuted instantiation: s_cos.c:rnintf
Unexecuted instantiation: s_expm1.c:rnintf
Unexecuted instantiation: s_log1p.c:rnintf
Unexecuted instantiation: s_scalbn.c:rnintf
Unexecuted instantiation: s_sin.c:rnintf
Unexecuted instantiation: s_tan.c:rnintf
Unexecuted instantiation: s_tanh.c:rnintf
Unexecuted instantiation: s_trunc.c:rnintf
646
647
//#ifdef LDBL_MANT_DIG
648
///*
649
// * The complications for extra precision are smaller for rnintl() since it
650
// * can safely assume that the rounding precision has been increased from
651
// * its default to FP_PE on x86.  We don't exploit that here to get small
652
// * optimizations from limiting the range to double.  We just need it for
653
// * the magic number to work with long doubles.  ld128 callers should use
654
// * rnint() instead of this if possible.  ld80 callers should prefer
655
// * rnintl() since for amd64 this avoids swapping the register set, while
656
// * for i386 it makes no difference (assuming FP_PE), and for other arches
657
// * it makes little difference.
658
// */
659
//static inline long double
660
//rnintl(long double x)
661
//{
662
//  return (x + __CONCAT(0x1.8p, LDBL_MANT_DIG) / 2 -
663
//      __CONCAT(0x1.8p, LDBL_MANT_DIG) / 2);
664
//}
665
//#endif /* LDBL_MANT_DIG */
666
667
/*
668
 * irint() and i64rint() give the same result as casting to their integer
669
 * return type provided their arg is a floating point integer.  They can
670
 * sometimes be more efficient because no rounding is required.
671
 */
672
#if defined(amd64) || defined(__i386__)
673
#define irint(x)            \
674
    (sizeof(x) == sizeof(float) &&        \
675
    sizeof(__float_t) == sizeof(long double) ? irintf(x) :  \
676
    sizeof(x) == sizeof(double) &&        \
677
    sizeof(__double_t) == sizeof(long double) ? irintd(x) : \
678
    sizeof(x) == sizeof(long double) ? irintl(x) : (int)(x))
679
#else
680
64.0k
#define irint(x)  ((int)(x))
681
#endif
682
683
#define i64rint(x)  ((int64_t)(x))  /* only needed for ld128 so not opt. */
684
685
#if defined(__i386__)
686
static __inline int
687
irintf(float x)
688
{
689
  int n;
690
691
  __asm("fistl %0" : "=m" (n) : "t" (x));
692
  return (n);
693
}
694
695
static __inline int
696
irintd(double x)
697
{
698
  int n;
699
700
  __asm("fistl %0" : "=m" (n) : "t" (x));
701
  return (n);
702
}
703
#endif
704
705
#if defined(__amd64__) || defined(__i386__)
706
static __inline int
707
irintl(long double x)
708
0
{
709
0
  int n;
710
0
711
0
  __asm("fistl %0" : "=m" (n) : "t" (x));
712
0
  return (n);
713
0
}
Unexecuted instantiation: e_acos.c:irintl
Unexecuted instantiation: e_acosh.c:irintl
Unexecuted instantiation: e_asin.c:irintl
Unexecuted instantiation: e_atan2.c:irintl
Unexecuted instantiation: e_atanh.c:irintl
Unexecuted instantiation: e_cosh.c:irintl
Unexecuted instantiation: e_exp.c:irintl
Unexecuted instantiation: e_fmod.c:irintl
Unexecuted instantiation: e_hypot.c:irintl
Unexecuted instantiation: e_log.c:irintl
Unexecuted instantiation: e_log10.c:irintl
Unexecuted instantiation: e_pow.c:irintl
Unexecuted instantiation: e_rem_pio2.c:irintl
Unexecuted instantiation: e_sinh.c:irintl
Unexecuted instantiation: k_cos.c:irintl
Unexecuted instantiation: k_exp.c:irintl
Unexecuted instantiation: k_rem_pio2.c:irintl
Unexecuted instantiation: k_sin.c:irintl
Unexecuted instantiation: k_tan.c:irintl
Unexecuted instantiation: s_asinh.c:irintl
Unexecuted instantiation: s_atan.c:irintl
Unexecuted instantiation: s_cbrt.c:irintl
Unexecuted instantiation: s_ceil.c:irintl
Unexecuted instantiation: s_cos.c:irintl
Unexecuted instantiation: s_expm1.c:irintl
Unexecuted instantiation: s_log1p.c:irintl
Unexecuted instantiation: s_scalbn.c:irintl
Unexecuted instantiation: s_sin.c:irintl
Unexecuted instantiation: s_tan.c:irintl
Unexecuted instantiation: s_tanh.c:irintl
Unexecuted instantiation: s_trunc.c:irintl
714
#endif
715
716
/*
717
 * The following are fast floor macros for 0 <= |x| < 0x1p(N-1), where
718
 * N is the precision of the type of x. These macros are used in the
719
 * half-cycle trignometric functions (e.g., sinpi(x)).
720
 */
721
#define FFLOORF(x, j0, ix) do {     \
722
  (j0) = (((ix) >> 23) & 0xff) - 0x7f;  \
723
  (ix) &= ~(0x007fffff >> (j0));    \
724
  SET_FLOAT_WORD((x), (ix));    \
725
} while (0)
726
727
#define FFLOOR(x, j0, ix, lx) do {        \
728
  (j0) = (((ix) >> 20) & 0x7ff) - 0x3ff;      \
729
  if ((j0) < 20) {          \
730
    (ix) &= ~(0x000fffff >> (j0));      \
731
    (lx) = 0;         \
732
  } else {            \
733
    (lx) &= ~((uint32_t)0xffffffff >> ((j0) - 20)); \
734
  }             \
735
  INSERT_WORDS((x), (ix), (lx));        \
736
} while (0)
737
738
#define FFLOORL80(x, j0, ix, lx) do {     \
739
  j0 = ix - 0x3fff + 1;       \
740
  if ((j0) < 32) {        \
741
    (lx) = ((lx) >> 32) << 32;    \
742
    (lx) &= ~((((lx) << 32)-1) >> (j0));  \
743
  } else {          \
744
    uint64_t _m;        \
745
    _m = (uint64_t)-1 >> (j0);    \
746
    if ((lx) & _m) (lx) &= ~_m;   \
747
  }           \
748
  INSERT_LDBL80_WORDS((x), (ix), (lx));   \
749
} while (0)
750
751
#define FFLOORL128(x, ai, ar) do {      \
752
  union IEEEl2bits u;       \
753
  uint64_t m;         \
754
  int e;            \
755
  u.e = (x);          \
756
  e = u.bits.exp - 16383;       \
757
  if (e < 48) {         \
758
    m = ((1llu << 49) - 1) >> (e + 1);  \
759
    u.bits.manh &= ~m;      \
760
    u.bits.manl = 0;      \
761
  } else {          \
762
    m = (uint64_t)-1 >> (e - 48);   \
763
    u.bits.manl &= ~m;      \
764
  }           \
765
  (ai) = u.e;         \
766
  (ar) = (x) - (ai);        \
767
} while (0)
768
769
/*
770
 * For a double entity split into high and low parts, compute ilogb.
771
 */
772
static inline int32_t
773
subnormal_ilogb(int32_t hi, int32_t lo)
774
15.7k
{
775
15.7k
  int32_t j;
776
15.7k
  uint32_t i;
777
778
15.7k
  j = -1022;
779
15.7k
  if (hi == 0) {
780
10.4k
      j -= 21;
781
10.4k
      i = (uint32_t)lo;
782
10.4k
  } else
783
5.26k
      i = (uint32_t)hi << 11;
784
785
227k
  for (; i < 0x7fffffff; i <<= 1) j -= 1;
786
787
15.7k
  return (j);
788
15.7k
}
Unexecuted instantiation: e_acos.c:subnormal_ilogb
Unexecuted instantiation: e_acosh.c:subnormal_ilogb
Unexecuted instantiation: e_asin.c:subnormal_ilogb
Unexecuted instantiation: e_atan2.c:subnormal_ilogb
Unexecuted instantiation: e_atanh.c:subnormal_ilogb
Unexecuted instantiation: e_cosh.c:subnormal_ilogb
Unexecuted instantiation: e_exp.c:subnormal_ilogb
e_fmod.c:subnormal_ilogb
Line
Count
Source
774
15.7k
{
775
15.7k
  int32_t j;
776
15.7k
  uint32_t i;
777
778
15.7k
  j = -1022;
779
15.7k
  if (hi == 0) {
780
10.4k
      j -= 21;
781
10.4k
      i = (uint32_t)lo;
782
10.4k
  } else
783
5.26k
      i = (uint32_t)hi << 11;
784
785
227k
  for (; i < 0x7fffffff; i <<= 1) j -= 1;
786
787
15.7k
  return (j);
788
15.7k
}
Unexecuted instantiation: e_hypot.c:subnormal_ilogb
Unexecuted instantiation: e_log.c:subnormal_ilogb
Unexecuted instantiation: e_log10.c:subnormal_ilogb
Unexecuted instantiation: e_pow.c:subnormal_ilogb
Unexecuted instantiation: e_rem_pio2.c:subnormal_ilogb
Unexecuted instantiation: e_sinh.c:subnormal_ilogb
Unexecuted instantiation: k_cos.c:subnormal_ilogb
Unexecuted instantiation: k_exp.c:subnormal_ilogb
Unexecuted instantiation: k_rem_pio2.c:subnormal_ilogb
Unexecuted instantiation: k_sin.c:subnormal_ilogb
Unexecuted instantiation: k_tan.c:subnormal_ilogb
Unexecuted instantiation: s_asinh.c:subnormal_ilogb
Unexecuted instantiation: s_atan.c:subnormal_ilogb
Unexecuted instantiation: s_cbrt.c:subnormal_ilogb
Unexecuted instantiation: s_ceil.c:subnormal_ilogb
Unexecuted instantiation: s_cos.c:subnormal_ilogb
Unexecuted instantiation: s_expm1.c:subnormal_ilogb
Unexecuted instantiation: s_log1p.c:subnormal_ilogb
Unexecuted instantiation: s_scalbn.c:subnormal_ilogb
Unexecuted instantiation: s_sin.c:subnormal_ilogb
Unexecuted instantiation: s_tan.c:subnormal_ilogb
Unexecuted instantiation: s_tanh.c:subnormal_ilogb
Unexecuted instantiation: s_trunc.c:subnormal_ilogb
789
790
#ifdef DEBUG
791
#if defined(__amd64__) || defined(__i386__)
792
#define breakpoint()  asm("int $3")
793
#else
794
#include <signal.h>
795
796
#define breakpoint()  raise(SIGTRAP)
797
#endif
798
#endif
799
800
#ifdef STRUCT_RETURN
801
#define RETURNSP(rp) do {   \
802
  if (!(rp)->lo_set)    \
803
    RETURNF((rp)->hi);  \
804
  RETURNF((rp)->hi + (rp)->lo); \
805
} while (0)
806
#define RETURNSPI(rp) do {    \
807
  if (!(rp)->lo_set)    \
808
    RETURNI((rp)->hi);  \
809
  RETURNI((rp)->hi + (rp)->lo); \
810
} while (0)
811
#endif
812
813
#define SUM2P(x, y) ({      \
814
  const __typeof (x) __x = (x); \
815
  const __typeof (y) __y = (y); \
816
  __x + __y;      \
817
})
818
819
/* fdlibm kernel function */
820
int __kernel_rem_pio2(double*,double*,int,int,int);
821
822
/* double precision kernel functions */
823
#ifndef INLINE_REM_PIO2
824
int __ieee754_rem_pio2(double,double*);
825
#endif
826
double  __kernel_sin(double,double,int);
827
double  __kernel_cos(double,double);
828
double  __kernel_tan(double,double,int);
829
double  __ldexp_exp(double,int);
830
#ifdef _COMPLEX_H
831
double complex __ldexp_cexp(double complex,int);
832
#endif
833
834
/* float precision kernel functions */
835
#ifndef INLINE_REM_PIO2F
836
int __ieee754_rem_pio2f(float,double*);
837
#endif
838
#ifndef INLINE_KERNEL_SINDF
839
float __kernel_sindf(double);
840
#endif
841
#ifndef INLINE_KERNEL_COSDF
842
float __kernel_cosdf(double);
843
#endif
844
#ifndef INLINE_KERNEL_TANDF
845
float __kernel_tandf(double,int);
846
#endif
847
float __ldexp_expf(float,int);
848
#ifdef _COMPLEX_H
849
float complex __ldexp_cexpf(float complex,int);
850
#endif
851
852
/* long double precision kernel functions */
853
long double __kernel_sinl(long double, long double, int);
854
long double __kernel_cosl(long double, long double);
855
long double __kernel_tanl(long double, long double, int);
856
857
#endif /* !_MATH_PRIVATE_H_ */