xref: /illumos-gate/usr/src/lib/libm/common/R/atan2f.c (revision b1e2e3fb17324e9ddf43db264a0c64da7756d9e6)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
23  */
24 /*
25  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
26  * Use is subject to license terms.
27  */
28 
29 #pragma weak __atan2f = atan2f
30 
31 #include "libm.h"
32 
33 #if defined(__i386) && !defined(__amd64)
34 extern int __swapRP(int);
35 #endif
36 
37 /*
38  * For i = 0, ..., 192, let x[i] be the double precision number whose
39  * high order 32 bits are 0x3f900000 + (i << 16) and whose low order
40  * 32 bits are zero.  Then TBL[i] := atan(x[i]) to double precision.
41  */
42 
43 static const double TBL[] = {
44 	1.56237286204768313e-02,
45 	1.66000375562312640e-02,
46 	1.75763148444955872e-02,
47 	1.85525586258889763e-02,
48 	1.95287670414137082e-02,
49 	2.05049382324763683e-02,
50 	2.14810703409090559e-02,
51 	2.24571615089905717e-02,
52 	2.34332098794675855e-02,
53 	2.44092135955758099e-02,
54 	2.53851708010611396e-02,
55 	2.63610796402007873e-02,
56 	2.73369382578244127e-02,
57 	2.83127447993351995e-02,
58 	2.92884974107309737e-02,
59 	3.02641942386252458e-02,
60 	3.12398334302682774e-02,
61 	3.31909314971115949e-02,
62 	3.51417768027967800e-02,
63 	3.70923545503918164e-02,
64 	3.90426499551669928e-02,
65 	4.09926482452637811e-02,
66 	4.29423346623621707e-02,
67 	4.48916944623464972e-02,
68 	4.68407129159696539e-02,
69 	4.87893753095156174e-02,
70 	5.07376669454602178e-02,
71 	5.26855731431300420e-02,
72 	5.46330792393594777e-02,
73 	5.65801705891457105e-02,
74 	5.85268325663017702e-02,
75 	6.04730505641073168e-02,
76 	6.24188099959573500e-02,
77 	6.63088949198234884e-02,
78 	7.01969710718705203e-02,
79 	7.40829225490337306e-02,
80 	7.79666338315423008e-02,
81 	8.18479898030765457e-02,
82 	8.57268757707448092e-02,
83 	8.96031774848717461e-02,
84 	9.34767811585894698e-02,
85 	9.73475734872236709e-02,
86 	1.01215441667466668e-01,
87 	1.05080273416329528e-01,
88 	1.08941956989865793e-01,
89 	1.12800381201659389e-01,
90 	1.16655435441069349e-01,
91 	1.20507009691224562e-01,
92 	1.24354994546761438e-01,
93 	1.32039761614638762e-01,
94 	1.39708874289163648e-01,
95 	1.47361481088651630e-01,
96 	1.54996741923940973e-01,
97 	1.62613828597948568e-01,
98 	1.70211925285474408e-01,
99 	1.77790228992676075e-01,
100 	1.85347949995694761e-01,
101 	1.92884312257974672e-01,
102 	2.00398553825878512e-01,
103 	2.07889927202262986e-01,
104 	2.15357699697738048e-01,
105 	2.22801153759394521e-01,
106 	2.30219587276843718e-01,
107 	2.37612313865471242e-01,
108 	2.44978663126864143e-01,
109 	2.59629629408257512e-01,
110 	2.74167451119658789e-01,
111 	2.88587361894077410e-01,
112 	3.02884868374971417e-01,
113 	3.17055753209147029e-01,
114 	3.31096076704132103e-01,
115 	3.45002177207105132e-01,
116 	3.58770670270572245e-01,
117 	3.72398446676754202e-01,
118 	3.85882669398073752e-01,
119 	3.99220769575252543e-01,
120 	4.12410441597387323e-01,
121 	4.25449637370042266e-01,
122 	4.38336559857957830e-01,
123 	4.51069655988523499e-01,
124 	4.63647609000806094e-01,
125 	4.88333951056405535e-01,
126 	5.12389460310737732e-01,
127 	5.35811237960463704e-01,
128 	5.58599315343562441e-01,
129 	5.80756353567670414e-01,
130 	6.02287346134964152e-01,
131 	6.23199329934065904e-01,
132 	6.43501108793284371e-01,
133 	6.63202992706093286e-01,
134 	6.82316554874748071e-01,
135 	7.00854407884450192e-01,
136 	7.18829999621624527e-01,
137 	7.36257428981428097e-01,
138 	7.53151280962194414e-01,
139 	7.69526480405658297e-01,
140 	7.85398163397448279e-01,
141 	8.15691923316223422e-01,
142 	8.44153986113171051e-01,
143 	8.70903457075652976e-01,
144 	8.96055384571343927e-01,
145 	9.19719605350416858e-01,
146 	9.42000040379463610e-01,
147 	9.62994330680936206e-01,
148 	9.82793723247329054e-01,
149 	1.00148313569423464e+00,
150 	1.01914134426634972e+00,
151 	1.03584125300880014e+00,
152 	1.05165021254837376e+00,
153 	1.06663036531574362e+00,
154 	1.08083900054116833e+00,
155 	1.09432890732118993e+00,
156 	1.10714871779409041e+00,
157 	1.13095374397916038e+00,
158 	1.15257199721566761e+00,
159 	1.17227388112847630e+00,
160 	1.19028994968253166e+00,
161 	1.20681737028525249e+00,
162 	1.22202532321098967e+00,
163 	1.23605948947808186e+00,
164 	1.24904577239825443e+00,
165 	1.26109338225244039e+00,
166 	1.27229739520871732e+00,
167 	1.28274087974427076e+00,
168 	1.29249666778978534e+00,
169 	1.30162883400919616e+00,
170 	1.31019393504755555e+00,
171 	1.31824205101683711e+00,
172 	1.32581766366803255e+00,
173 	1.33970565959899957e+00,
174 	1.35212738092095464e+00,
175 	1.36330010035969384e+00,
176 	1.37340076694501589e+00,
177 	1.38257482149012589e+00,
178 	1.39094282700241845e+00,
179 	1.39860551227195762e+00,
180 	1.40564764938026987e+00,
181 	1.41214106460849531e+00,
182 	1.41814699839963154e+00,
183 	1.42371797140649403e+00,
184 	1.42889927219073276e+00,
185 	1.43373015248470903e+00,
186 	1.43824479449822262e+00,
187 	1.44247309910910193e+00,
188 	1.44644133224813509e+00,
189 	1.45368758222803240e+00,
190 	1.46013910562100091e+00,
191 	1.46591938806466282e+00,
192 	1.47112767430373470e+00,
193 	1.47584462045214027e+00,
194 	1.48013643959415142e+00,
195 	1.48405798811891154e+00,
196 	1.48765509490645531e+00,
197 	1.49096634108265924e+00,
198 	1.49402443552511865e+00,
199 	1.49685728913695626e+00,
200 	1.49948886200960629e+00,
201 	1.50193983749385196e+00,
202 	1.50422816301907281e+00,
203 	1.50636948736934317e+00,
204 	1.50837751679893928e+00,
205 	1.51204050407917401e+00,
206 	1.51529782154917969e+00,
207 	1.51821326518395483e+00,
208 	1.52083793107295384e+00,
209 	1.52321322351791322e+00,
210 	1.52537304737331958e+00,
211 	1.52734543140336587e+00,
212 	1.52915374769630819e+00,
213 	1.53081763967160667e+00,
214 	1.53235373677370856e+00,
215 	1.53377621092096650e+00,
216 	1.53509721411557254e+00,
217 	1.53632722579538861e+00,
218 	1.53747533091664934e+00,
219 	1.53854944435964280e+00,
220 	1.53955649336462841e+00,
221 	1.54139303859089161e+00,
222 	1.54302569020147562e+00,
223 	1.54448660954197448e+00,
224 	1.54580153317597646e+00,
225 	1.54699130060982659e+00,
226 	1.54807296595325550e+00,
227 	1.54906061995310385e+00,
228 	1.54996600675867957e+00,
229 	1.55079899282174605e+00,
230 	1.55156792769518947e+00,
231 	1.55227992472688747e+00,
232 	1.55294108165534417e+00,
233 	1.55355665560036682e+00,
234 	1.55413120308095598e+00,
235 	1.55466869295126031e+00,
236 	1.55517259817441977e+00,
237 };
238 
239 static const double
240 	pio4	=  7.8539816339744827900e-01,
241 	pio2	=  1.5707963267948965580e+00,
242 	negpi	= -3.1415926535897931160e+00,
243 	q1	= -3.3333333333296428046e-01,
244 	q2	=  1.9999999186853752618e-01,
245 	zero	=  0.0;
246 
247 static const float two24 = 16777216.0;
248 
249 float
250 atan2f(float fy, float fx)
251 {
252 	double	a, t, s, dbase;
253 	float	x, y, base;
254 	int	i, k, hx, hy, ix, iy, sign;
255 #if defined(__i386) && !defined(__amd64)
256 	int	rp;
257 #endif
258 
259 	iy = *(int *)&fy;
260 	ix = *(int *)&fx;
261 	hy = iy & ~0x80000000;
262 	hx = ix & ~0x80000000;
263 
264 	sign = 0;
265 	if (hy > hx) {
266 		x = fy;
267 		y = fx;
268 		i = hx;
269 		hx = hy;
270 		hy = i;
271 		if (iy < 0) {
272 			x = -x;
273 			sign = 1;
274 		}
275 		if (ix < 0) {
276 			y = -y;
277 			a = pio2;
278 		} else {
279 			a = -pio2;
280 			sign = 1 - sign;
281 		}
282 	} else {
283 		y = fy;
284 		x = fx;
285 		if (iy < 0) {
286 			y = -y;
287 			sign = 1;
288 		}
289 		if (ix < 0) {
290 			x = -x;
291 			a = negpi;
292 			sign = 1 - sign;
293 		} else {
294 			a = zero;
295 		}
296 	}
297 
298 	if (hx >= 0x7f800000 || hx - hy >= 0x0c800000) {
299 		if (hx >= 0x7f800000) {
300 			if (hx > 0x7f800000) /* nan */
301 				return (x * y);
302 			else if (hy >= 0x7f800000)
303 				a += pio4;
304 		} else if ((int)a == 0) {
305 			a = (double)y / x;
306 		}
307 		return ((float)((sign)? -a : a));
308 	}
309 
310 	if (hy < 0x00800000) {
311 		if (hy == 0)
312 			return ((float)((sign)? -a : a));
313 		/* scale subnormal y */
314 		y *= two24;
315 		x *= two24;
316 		hy = *(int *)&y;
317 		hx = *(int *)&x;
318 	}
319 
320 #if defined(__i386) && !defined(__amd64)
321 	rp = __swapRP(fp_extended);
322 #endif
323 	k = (hy - hx + 0x3f800000) & 0xfff80000;
324 	if (k >= 0x3c800000) {	/* |y/x| >= 1/64 */
325 		*(int *)&base = k;
326 		k = (k - 0x3c800000) >> 19;
327 		a += TBL[k];
328 	} else {
329 		/*
330 		 * For some reason this is faster on USIII than just
331 		 * doing t = y/x in this case.
332 		 */
333 		*(int *)&base = 0;
334 	}
335 	dbase = (double)base;
336 	t = (y - x * dbase) / (x + y * dbase);
337 	s = t * t;
338 	a = (a + t) + t * s * (q1 + s * q2);
339 #if defined(__i386) && !defined(__amd64)
340 	if (rp != fp_extended)
341 		(void) __swapRP(rp);
342 #endif
343 	return ((float)((sign)? -a : a));
344 }
345