xref: /relibc/openlibm/src/e_exp.c (revision 06dbb6e72b1d3e43ca813caa0b139094068e6a88)
1 
2 /* @(#)e_exp.c 1.6 04/04/22 */
3 /*
4  * ====================================================
5  * Copyright (C) 2004 by Sun Microsystems, Inc. All rights reserved.
6  *
7  * Permission to use, copy, modify, and distribute this
8  * software is freely granted, provided that this notice
9  * is preserved.
10  * ====================================================
11  */
12 
13 #include "cdefs-compat.h"
14 //__FBSDID("$FreeBSD: src/lib/msun/src/e_exp.c,v 1.14 2011/10/21 06:26:38 das Exp $");
15 
16 /* __ieee754_exp(x)
17  * Returns the exponential of x.
18  *
19  * Method
20  *   1. Argument reduction:
21  *      Reduce x to an r so that |r| <= 0.5*ln2 ~ 0.34658.
22  *	Given x, find r and integer k such that
23  *
24  *               x = k*ln2 + r,  |r| <= 0.5*ln2.
25  *
26  *      Here r will be represented as r = hi-lo for better
27  *	accuracy.
28  *
29  *   2. Approximation of exp(r) by a special rational function on
30  *	the interval [0,0.34658]:
31  *	Write
32  *	    R(r**2) = r*(exp(r)+1)/(exp(r)-1) = 2 + r*r/6 - r**4/360 + ...
33  *      We use a special Remes algorithm on [0,0.34658] to generate
34  * 	a polynomial of degree 5 to approximate R. The maximum error
35  *	of this polynomial approximation is bounded by 2**-59. In
36  *	other words,
37  *	    R(z) ~ 2.0 + P1*z + P2*z**2 + P3*z**3 + P4*z**4 + P5*z**5
38  *  	(where z=r*r, and the values of P1 to P5 are listed below)
39  *	and
40  *	    |                  5          |     -59
41  *	    | 2.0+P1*z+...+P5*z   -  R(z) | <= 2
42  *	    |                             |
43  *	The computation of exp(r) thus becomes
44  *                             2*r
45  *		exp(r) = 1 + -------
46  *		              R - r
47  *                                 r*R1(r)
48  *		       = 1 + r + ----------- (for better accuracy)
49  *		                  2 - R1(r)
50  *	where
51  *			         2       4             10
52  *		R1(r) = r - (P1*r  + P2*r  + ... + P5*r   ).
53  *
54  *   3. Scale back to obtain exp(x):
55  *	From step 1, we have
56  *	   exp(x) = 2^k * exp(r)
57  *
58  * Special cases:
59  *	exp(INF) is INF, exp(NaN) is NaN;
60  *	exp(-INF) is 0, and
61  *	for finite argument, only exp(0)=1 is exact.
62  *
63  * Accuracy:
64  *	according to an error analysis, the error is always less than
65  *	1 ulp (unit in the last place).
66  *
67  * Misc. info.
68  *	For IEEE double
69  *	    if x >  7.09782712893383973096e+02 then exp(x) overflow
70  *	    if x < -7.45133219101941108420e+02 then exp(x) underflow
71  *
72  * Constants:
73  * The hexadecimal values are the intended ones for the following
74  * constants. The decimal values may be used, provided that the
75  * compiler will convert from decimal to binary accurately enough
76  * to produce the hexadecimal values shown.
77  */
78 
79 #include <float.h>
80 #include <openlibm.h>
81 
82 #include "math_private.h"
83 
84 static const double
85 one	= 1.0,
86 halF[2]	= {0.5,-0.5,},
87 huge	= 1.0e+300,
88 o_threshold=  7.09782712893383973096e+02,  /* 0x40862E42, 0xFEFA39EF */
89 u_threshold= -7.45133219101941108420e+02,  /* 0xc0874910, 0xD52D3051 */
90 ln2HI[2]   ={ 6.93147180369123816490e-01,  /* 0x3fe62e42, 0xfee00000 */
91 	     -6.93147180369123816490e-01,},/* 0xbfe62e42, 0xfee00000 */
92 ln2LO[2]   ={ 1.90821492927058770002e-10,  /* 0x3dea39ef, 0x35793c76 */
93 	     -1.90821492927058770002e-10,},/* 0xbdea39ef, 0x35793c76 */
94 invln2 =  1.44269504088896338700e+00, /* 0x3ff71547, 0x652b82fe */
95 P1   =  1.66666666666666019037e-01, /* 0x3FC55555, 0x5555553E */
96 P2   = -2.77777777770155933842e-03, /* 0xBF66C16C, 0x16BEBD93 */
97 P3   =  6.61375632143793436117e-05, /* 0x3F11566A, 0xAF25DE2C */
98 P4   = -1.65339022054652515390e-06, /* 0xBEBBBD41, 0xC5D26BF1 */
99 P5   =  4.13813679705723846039e-08; /* 0x3E663769, 0x72BEA4D0 */
100 
101 static volatile double
102 twom1000= 9.33263618503218878990e-302;     /* 2**-1000=0x01700000,0*/
103 
104 DLLEXPORT double
105 __ieee754_exp(double x)	/* default IEEE double exp */
106 {
107 	double y,hi=0.0,lo=0.0,c,t,twopk;
108 	int32_t k=0,xsb;
109 	u_int32_t hx;
110 
111 	GET_HIGH_WORD(hx,x);
112 	xsb = (hx>>31)&1;		/* sign bit of x */
113 	hx &= 0x7fffffff;		/* high word of |x| */
114 
115     /* filter out non-finite argument */
116 	if(hx >= 0x40862E42) {			/* if |x|>=709.78... */
117             if(hx>=0x7ff00000) {
118 	        u_int32_t lx;
119 		GET_LOW_WORD(lx,x);
120 		if(((hx&0xfffff)|lx)!=0)
121 		     return x+x; 		/* NaN */
122 		else return (xsb==0)? x:0.0;	/* exp(+-inf)={inf,0} */
123 	    }
124 	    if(x > o_threshold) return huge*huge; /* overflow */
125 	    if(x < u_threshold) return twom1000*twom1000; /* underflow */
126 	}
127 
128         /* this implementation gives 2.7182818284590455 for exp(1.0),
129            which is well within the allowable error. however,
130            2.718281828459045 is closer to the true value so we prefer that
131            answer, given that 1.0 is such an important argument value. */
132         if (x == 1.0)
133             return 2.718281828459045235360;
134 
135     /* argument reduction */
136 	if(hx > 0x3fd62e42) {		/* if  |x| > 0.5 ln2 */
137 	    if(hx < 0x3FF0A2B2) {	/* and |x| < 1.5 ln2 */
138 		hi = x-ln2HI[xsb]; lo=ln2LO[xsb]; k = 1-xsb-xsb;
139 	    } else {
140 		k  = (int)(invln2*x+halF[xsb]);
141 		t  = k;
142 		hi = x - t*ln2HI[0];	/* t*ln2HI is exact here */
143 		lo = t*ln2LO[0];
144 	    }
145 	    STRICT_ASSIGN(double, x, hi - lo);
146 	}
147 	else if(hx < 0x3e300000)  {	/* when |x|<2**-28 */
148 	    if(huge+x>one) return one+x;/* trigger inexact */
149 	}
150 	else k = 0;
151 
152     /* x is now in primary range */
153 	t  = x*x;
154 	if(k >= -1021)
155 	    INSERT_WORDS(twopk,0x3ff00000+(k<<20), 0);
156 	else
157 	    INSERT_WORDS(twopk,0x3ff00000+((k+1000)<<20), 0);
158 	c  = x - t*(P1+t*(P2+t*(P3+t*(P4+t*P5))));
159 	if(k==0) 	return one-((x*c)/(c-2.0)-x);
160 	else 		y = one-((lo-(x*c)/(2.0-c))-hi);
161 	if(k >= -1021) {
162 	    if (k==1024) return y*2.0*0x1p1023;
163 	    return y*twopk;
164 	} else {
165 	    return y*twopk*twom1000;
166 	}
167 }
168