xref: /illumos-gate/usr/src/lib/libm/common/C/log2.c (revision 88e55da9244bc48e3b3ad957a29e4be71309adcd)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
24  */
25 /*
26  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
27  * Use is subject to license terms.
28  */
29 
30 #pragma weak __log2 = log2
31 
32 /* INDENT OFF */
33 /*
34  * log2(x) = log(x)/log2
35  *
36  * Base on Table look-up algorithm with product polynomial
37  * approximation for log(x).
38  *
39  * By K.C. Ng, Nov 29, 2004
40  *
41  * (a). For x in [1-0.125, 1+0.125], from log.c we have
42  *	log(x) =  f + ((a1*f^2) *
43  *		   ((a2 + (a3*f)*(a4+f)) + (f^3)*(a5+f))) *
44  *		   (((a6 + f*(a7+f)) + (f^3)*(a8+f)) *
45  *		   ((a9 + (a10*f)*(a11+f)) + (f^3)*(a12+f)))
46  *	where f = x - 1.
47  *	(i) modify a1 <- a1 / log2
48  *	(ii) 1/log2 = 1.4426950408889634...
49  *		    = 1.5 - 0.057304959... (4 bit shift)
50  *	     Let lv = 1.5 - 1/log2, then
51  *	     lv = 0.057304959111036592640075318998107956665325,
52  *	(iii) f*1.5 is exact because f has 3 trailing zero.
53  *	(iv) Thus, log2(x) = f*1.5 - (lv*f - PPoly)
54  *
55  * (b). For 0.09375 <= x < 24
56  *	Let j = (ix - 0x3fb80000) >> 15. Look up Y[j], 1/Y[j], and log(Y[j])
57  *	from _TBL_log.c. Then
58  *		log2(x)  = log2(Y[j]) + log2(1 + (x-Y[j])*(1/Y[j]))
59  *			  = log(Y[j])(1/log2) + log2(1 + s)
60  *	where
61  *		s = (x-Y[j])*(1/Y[j])
62  *	From log.c, we have log(1+s) =
63  *				  2              2                     2
64  *		(b s) (b + b s + s ) [b + b s + s (b + s)] (b + b s + s )
65  *		  1     2   3          4   5        6        7   8
66  *
67  *	By setting b1 <- b1/log2, we have
68  *		log2(x) = 1.5 * T - (lv * T - POLY(s))
69  *
70  * (c). Otherwise, get "n", the exponent of x, and then normalize x to
71  *	z in [1,2). Then similar to (b) find a Y[i] that matches z to 5.5
72  *	significant bits. Then
73  *	    log2(x) = n + log2(z).
74  *
75  * Special cases:
76  *	log2(x) is NaN with signal if x < 0 (including -INF) ;
77  *	log2(+INF) is +INF; log2(0) is -INF with signal;
78  *	log2(NaN) is that NaN with no signal.
79  *
80  * Maximum error observed: less than 0.84 ulp
81  *
82  * Constants:
83  * The hexadecimal values are the intended ones for the following constants.
84  * The decimal values may be used, provided that the compiler will convert
85  * from decimal to binary accurately enough to produce the hexadecimal values
86  * shown.
87  */
88 /* INDENT ON */
89 
90 #include "libm.h"
91 #include "libm_protos.h"
92 
93 extern const double _TBL_log[];
94 
95 static const double P[] = {
96 /* ONE   */  1.0,
97 /* TWO52 */  4503599627370496.0,
98 /* LN10V */  1.4426950408889634073599246810018920433347,   /* 1/log10 */
99 /* ZERO  */  0.0,
100 /* A1    */ -9.6809362455249638217841932228967194640116e-02,
101 /* A2    */  1.99628461483039965074226529395673424005508422852e+0000,
102 /* A3    */  2.26812367662950720159642514772713184356689453125e+0000,
103 /* A4    */ -9.05030639084976384900471657601883634924888610840e-0001,
104 /* A5    */ -1.48275767132434044270894446526654064655303955078e+0000,
105 /* A6    */  1.88158320939722756293122074566781520843505859375e+0000,
106 /* A7    */  1.83309386046986411145098827546462416648864746094e+0000,
107 /* A8    */  1.24847063988317086291601754055591300129890441895e+0000,
108 /* A9    */  1.98372421445537705508854742220137268304824829102e+0000,
109 /* A10   */ -3.94711735767898475035764249696512706577777862549e-0001,
110 /* A11   */  3.07890395362954372160402272129431366920471191406e+0000,
111 /* A12   */ -9.60099585275022149311041630426188930869102478027e-0001,
112 /* B1    */ -1.8039695622547469514898963204616532885451e-01,
113 /* B2    */  1.87161713283355151891381127914642725337613123482e+0000,
114 /* B3    */ -1.89082956295731507978530316904652863740921020508e+0000,
115 /* B4    */ -2.50562891673640253387134180229622870683670043945e+0000,
116 /* B5    */  1.64822828085258366037635369139024987816810607910e+0000,
117 /* B6    */ -1.24409107065868340669112512841820716857910156250e+0000,
118 /* B7    */  1.70534231658220414296067701798165217041969299316e+0000,
119 /* B8    */  1.99196833784655646937267192697618156671524047852e+0000,
120 /* LGH   */  1.5,
121 /* LGL   */  0.057304959111036592640075318998107956665325,
122 };
123 
124 #define	ONE   P[0]
125 #define	TWO52 P[1]
126 #define	LN10V P[2]
127 #define	ZERO  P[3]
128 #define	A1    P[4]
129 #define	A2    P[5]
130 #define	A3    P[6]
131 #define	A4    P[7]
132 #define	A5    P[8]
133 #define	A6    P[9]
134 #define	A7    P[10]
135 #define	A8    P[11]
136 #define	A9    P[12]
137 #define	A10   P[13]
138 #define	A11   P[14]
139 #define	A12   P[15]
140 #define	B1    P[16]
141 #define	B2    P[17]
142 #define	B3    P[18]
143 #define	B4    P[19]
144 #define	B5    P[20]
145 #define	B6    P[21]
146 #define	B7    P[22]
147 #define	B8    P[23]
148 #define	LGH   P[24]
149 #define	LGL   P[25]
150 
151 double
152 log2(double x) {
153 	int i, hx, ix, n, lx;
154 
155 	n = 0;
156 	hx = ((int *) &x)[HIWORD]; ix = hx & 0x7fffffff;
157 	lx = ((int *) &x)[LOWORD];
158 
159 	/* subnormal,0,negative,inf,nan */
160 	if ((hx + 0x100000) < 0x200000) {
161 #if defined(FPADD_TRAPS_INCOMPLETE_ON_NAN)
162 		if (ix >= 0x7ff80000)		/* assumes sparc-like QNaN */
163 			return (x);		/* for Cheetah when x is QNaN */
164 #endif
165 		if (((hx << 1) | lx) == 0)	/* log(0.0) = -inf */
166 			return (A5 / fabs(x));
167 		if (hx < 0) {	/* x < 0 */
168 			if (ix >= 0x7ff00000)
169 				return (x - x);	/* x is -inf or NaN */
170 			else
171 				return (ZERO / (x - x));
172 		}
173 		if (((hx - 0x7ff00000) | lx) == 0)	/* log(inf) = inf */
174 			return (x);
175 		if (ix >= 0x7ff00000)		/* log(NaN) = NaN */
176 			return (x - x);
177 		x *= TWO52;
178 		n = -52;
179 		hx = ((int *) &x)[HIWORD]; ix = hx & 0x7fffffff;
180 		lx = ((int *) &x)[LOWORD];
181 	}
182 
183 	/* 0.09375 (0x3fb80000) <= x < 24 (0x40380000) */
184 	i = ix >> 19;
185 	if (i >= 0x7f7 && i <= 0x806) {
186 		/* 0.875 <= x < 1.125 */
187 		if (ix >= 0x3fec0000 && ix < 0x3ff20000) {
188 			double s, z, r, w;
189 			s = x - ONE; z = s * s; r = (A10 * s) * (A11 + s);
190 			w = z * s;
191 			if (((ix << 12) | lx) == 0)
192 				return (z);
193 			else
194 				return (LGH * s - (LGL * s - ((A1 * z) *
195 				((A2 + (A3 * s) * (A4 + s)) + w * (A5 + s))) *
196 				(((A6 + s * (A7 + s)) + w * (A8 + s)) *
197 				((A9 + r) + w * (A12 + s)))));
198 		} else {
199 			double *tb, s;
200 			i = (ix - 0x3fb80000) >> 15;
201 			tb = (double *) _TBL_log + (i + i + i);
202 			if (((ix << 12) | lx) == 0)	/* 2's power */
203 				return ((double) ((ix >> 20) - 0x3ff));
204 			s = (x - tb[0]) * tb[1];
205 			return (LGH * tb[2] - (LGL * tb[2] - ((B1 * s) *
206 				(B2 + s * (B3 + s))) *
207 				(((B4 + s * B5) + (s * s) * (B6 + s)) *
208 				(B7 + s * (B8 + s)))));
209 		}
210 	} else {
211 		double *tb, dn, s;
212 		dn = (double) (n + ((ix >> 20) - 0x3ff));
213 		ix <<= 12;
214 		if ((ix | lx) == 0)
215 			return (dn);
216 		i = ((unsigned) ix >> 12) | 0x3ff00000;	/* scale x to [1,2) */
217 		((int *) &x)[HIWORD] = i;
218 		i = (i - 0x3fb80000) >> 15;
219 		tb = (double *) _TBL_log + (i + i + i);
220 		s = (x - tb[0]) * tb[1];
221 		return (dn + (tb[2] * LN10V + ((B1 * s) *
222 			(B2 + s * (B3 + s))) *
223 			(((B4 + s * B5) + (s * s) * (B6 + s)) *
224 			(B7 + s * (B8 + s)))));
225 	}
226 }
227