/src/moddable/xs/tools/fdlibm/e_exp.c
Line | Count | Source |
1 | | |
2 | | /* |
3 | | * ==================================================== |
4 | | * Copyright (C) 2004 by Sun Microsystems, Inc. All rights reserved. |
5 | | * |
6 | | * Permission to use, copy, modify, and distribute this |
7 | | * software is freely granted, provided that this notice |
8 | | * is preserved. |
9 | | * ==================================================== |
10 | | */ |
11 | | |
12 | | /* exp(x) |
13 | | * Returns the exponential of x. |
14 | | * |
15 | | * Method |
16 | | * 1. Argument reduction: |
17 | | * Reduce x to an r so that |r| <= 0.5*ln2 ~ 0.34658. |
18 | | * Given x, find r and integer k such that |
19 | | * |
20 | | * x = k*ln2 + r, |r| <= 0.5*ln2. |
21 | | * |
22 | | * Here r will be represented as r = hi-lo for better |
23 | | * accuracy. |
24 | | * |
25 | | * 2. Approximation of exp(r) by a special rational function on |
26 | | * the interval [0,0.34658]: |
27 | | * Write |
28 | | * R(r**2) = r*(exp(r)+1)/(exp(r)-1) = 2 + r*r/6 - r**4/360 + ... |
29 | | * We use a special Remes algorithm on [0,0.34658] to generate |
30 | | * a polynomial of degree 5 to approximate R. The maximum error |
31 | | * of this polynomial approximation is bounded by 2**-59. In |
32 | | * other words, |
33 | | * R(z) ~ 2.0 + P1*z + P2*z**2 + P3*z**3 + P4*z**4 + P5*z**5 |
34 | | * (where z=r*r, and the values of P1 to P5 are listed below) |
35 | | * and |
36 | | * | 5 | -59 |
37 | | * | 2.0+P1*z+...+P5*z - R(z) | <= 2 |
38 | | * | | |
39 | | * The computation of exp(r) thus becomes |
40 | | * 2*r |
41 | | * exp(r) = 1 + ------- |
42 | | * R - r |
43 | | * r*R1(r) |
44 | | * = 1 + r + ----------- (for better accuracy) |
45 | | * 2 - R1(r) |
46 | | * where |
47 | | * 2 4 10 |
48 | | * R1(r) = r - (P1*r + P2*r + ... + P5*r ). |
49 | | * |
50 | | * 3. Scale back to obtain exp(x): |
51 | | * From step 1, we have |
52 | | * exp(x) = 2^k * exp(r) |
53 | | * |
54 | | * Special cases: |
55 | | * exp(INF) is INF, exp(NaN) is NaN; |
56 | | * exp(-INF) is 0, and |
57 | | * for finite argument, only exp(0)=1 is exact. |
58 | | * |
59 | | * Accuracy: |
60 | | * according to an error analysis, the error is always less than |
61 | | * 1 ulp (unit in the last place). |
62 | | * |
63 | | * Misc. info. |
64 | | * For IEEE double |
65 | | * if x > 7.09782712893383973096e+02 then exp(x) overflow |
66 | | * if x < -7.45133219101941108420e+02 then exp(x) underflow |
67 | | * |
68 | | * Constants: |
69 | | * The hexadecimal values are the intended ones for the following |
70 | | * constants. The decimal values may be used, provided that the |
71 | | * compiler will convert from decimal to binary accurately enough |
72 | | * to produce the hexadecimal values shown. |
73 | | */ |
74 | | |
75 | | #include "math_private.h" |
76 | | |
77 | | static const double |
78 | | one = 1.0, |
79 | | halF[2] = {0.5,-0.5,}, |
80 | | o_threshold= 7.09782712893383973096e+02, /* 0x40862E42, 0xFEFA39EF */ |
81 | | u_threshold= -7.45133219101941108420e+02, /* 0xc0874910, 0xD52D3051 */ |
82 | | ln2HI[2] ={ 6.93147180369123816490e-01, /* 0x3fe62e42, 0xfee00000 */ |
83 | | -6.93147180369123816490e-01,},/* 0xbfe62e42, 0xfee00000 */ |
84 | | ln2LO[2] ={ 1.90821492927058770002e-10, /* 0x3dea39ef, 0x35793c76 */ |
85 | | -1.90821492927058770002e-10,},/* 0xbdea39ef, 0x35793c76 */ |
86 | | invln2 = 1.44269504088896338700e+00, /* 0x3ff71547, 0x652b82fe */ |
87 | | P1 = 1.66666666666666019037e-01, /* 0x3FC55555, 0x5555553E */ |
88 | | P2 = -2.77777777770155933842e-03, /* 0xBF66C16C, 0x16BEBD93 */ |
89 | | P3 = 6.61375632143793436117e-05, /* 0x3F11566A, 0xAF25DE2C */ |
90 | | P4 = -1.65339022054652515390e-06, /* 0xBEBBBD41, 0xC5D26BF1 */ |
91 | | P5 = 4.13813679705723846039e-08; /* 0x3E663769, 0x72BEA4D0 */ |
92 | | |
93 | | static const double E = 2.7182818284590452354; /* e */ |
94 | | |
95 | | static volatile double |
96 | | huge = 1.0e+300, |
97 | | twom1000= 9.33263618503218878990e-302; /* 2**-1000=0x01700000,0*/ |
98 | | |
99 | | double |
100 | | __ieee754_exp(double x) /* default IEEE double exp */ |
101 | 506k | { |
102 | 506k | double y,hi=0.0,lo=0.0,c,t,twopk; |
103 | 506k | int32_t k=0,xsb; |
104 | 506k | u_int32_t hx; |
105 | | |
106 | 506k | GET_HIGH_WORD(hx,x); |
107 | 506k | xsb = (hx>>31)&1; /* sign bit of x */ |
108 | 506k | hx &= 0x7fffffff; /* high word of |x| */ |
109 | | |
110 | | /* filter out non-finite argument */ |
111 | 506k | if(hx >= 0x40862E42) { /* if |x|>=709.78... */ |
112 | 31.2k | if(hx>=0x7ff00000) { |
113 | 2.80k | u_int32_t lx; |
114 | 2.80k | GET_LOW_WORD(lx,x); |
115 | 2.80k | if(((hx&0xfffff)|lx)!=0) |
116 | 1.76k | return x+x; /* NaN */ |
117 | 1.04k | else return (xsb==0)? x:0.0; /* exp(+-inf)={inf,0} */ |
118 | 2.80k | } |
119 | 28.4k | if(x > o_threshold) return huge*huge; /* overflow */ |
120 | 27.4k | if(x < u_threshold) return twom1000*twom1000; /* underflow */ |
121 | 27.4k | } |
122 | | |
123 | | /* argument reduction */ |
124 | 502k | if(hx > 0x3fd62e42) { /* if |x| > 0.5 ln2 */ |
125 | 444k | if(hx < 0x3FF0A2B2) { /* and |x| < 1.5 ln2 */ |
126 | 4.07k | if (x == 1.0) return E; |
127 | 3.78k | hi = x-ln2HI[xsb]; lo=ln2LO[xsb]; k = 1-xsb-xsb; |
128 | 440k | } else { |
129 | 440k | k = (int)(invln2*x+halF[xsb]); |
130 | 440k | t = k; |
131 | 440k | hi = x - t*ln2HI[0]; /* t*ln2HI is exact here */ |
132 | 440k | lo = t*ln2LO[0]; |
133 | 440k | } |
134 | 444k | STRICT_ASSIGN(double, x, hi - lo); |
135 | 444k | } |
136 | 57.9k | else if(hx < 0x3e300000) { /* when |x|<2**-28 */ |
137 | 56.8k | if(huge+x>one) return one+x;/* trigger inexact */ |
138 | 56.8k | } |
139 | 1.17k | else k = 0; |
140 | | |
141 | | /* x is now in primary range */ |
142 | 445k | t = x*x; |
143 | 445k | if(k >= -1021) |
144 | 416k | INSERT_WORDS(twopk,((u_int32_t)(0x3ff+k))<<20, 0); |
145 | 29.6k | else |
146 | 29.6k | INSERT_WORDS(twopk,((u_int32_t)(0x3ff+(k+1000)))<<20, 0); |
147 | 445k | c = x - t*(P1+t*(P2+t*(P3+t*(P4+t*P5)))); |
148 | 445k | if(k==0) return one-((x*c)/(c-2.0)-x); |
149 | 444k | else y = one-((lo-(x*c)/(2.0-c))-hi); |
150 | 444k | if(k >= -1021) { |
151 | 415k | if (k==1024) { |
152 | 0 | double const_0x1p1023 = __ieee754_pow(2, 1023); |
153 | 0 | return y*2.0*const_0x1p1023; |
154 | 0 | } |
155 | 415k | return y*twopk; |
156 | 415k | } else { |
157 | 29.6k | return y*twopk*twom1000; |
158 | 29.6k | } |
159 | 444k | } |