1*25c28e83SPiotr Jasiukajtis /*
2*25c28e83SPiotr Jasiukajtis * CDDL HEADER START
3*25c28e83SPiotr Jasiukajtis *
4*25c28e83SPiotr Jasiukajtis * The contents of this file are subject to the terms of the
5*25c28e83SPiotr Jasiukajtis * Common Development and Distribution License (the "License").
6*25c28e83SPiotr Jasiukajtis * You may not use this file except in compliance with the License.
7*25c28e83SPiotr Jasiukajtis *
8*25c28e83SPiotr Jasiukajtis * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9*25c28e83SPiotr Jasiukajtis * or http://www.opensolaris.org/os/licensing.
10*25c28e83SPiotr Jasiukajtis * See the License for the specific language governing permissions
11*25c28e83SPiotr Jasiukajtis * and limitations under the License.
12*25c28e83SPiotr Jasiukajtis *
13*25c28e83SPiotr Jasiukajtis * When distributing Covered Code, include this CDDL HEADER in each
14*25c28e83SPiotr Jasiukajtis * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15*25c28e83SPiotr Jasiukajtis * If applicable, add the following below this CDDL HEADER, with the
16*25c28e83SPiotr Jasiukajtis * fields enclosed by brackets "[]" replaced with your own identifying
17*25c28e83SPiotr Jasiukajtis * information: Portions Copyright [yyyy] [name of copyright owner]
18*25c28e83SPiotr Jasiukajtis *
19*25c28e83SPiotr Jasiukajtis * CDDL HEADER END
20*25c28e83SPiotr Jasiukajtis */
21*25c28e83SPiotr Jasiukajtis
22*25c28e83SPiotr Jasiukajtis /*
23*25c28e83SPiotr Jasiukajtis * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
24*25c28e83SPiotr Jasiukajtis */
25*25c28e83SPiotr Jasiukajtis /*
26*25c28e83SPiotr Jasiukajtis * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
27*25c28e83SPiotr Jasiukajtis * Use is subject to license terms.
28*25c28e83SPiotr Jasiukajtis */
29*25c28e83SPiotr Jasiukajtis
30*25c28e83SPiotr Jasiukajtis #ifdef __RESTRICT
31*25c28e83SPiotr Jasiukajtis #define restrict _Restrict
32*25c28e83SPiotr Jasiukajtis #else
33*25c28e83SPiotr Jasiukajtis #define restrict
34*25c28e83SPiotr Jasiukajtis #endif
35*25c28e83SPiotr Jasiukajtis
36*25c28e83SPiotr Jasiukajtis extern const double __vlibm_TBL_atan1[];
37*25c28e83SPiotr Jasiukajtis
38*25c28e83SPiotr Jasiukajtis static const double
39*25c28e83SPiotr Jasiukajtis pio4 = 7.8539816339744827900e-01,
40*25c28e83SPiotr Jasiukajtis pio2 = 1.5707963267948965580e+00,
41*25c28e83SPiotr Jasiukajtis pi = 3.1415926535897931160e+00;
42*25c28e83SPiotr Jasiukajtis
43*25c28e83SPiotr Jasiukajtis static const float
44*25c28e83SPiotr Jasiukajtis zero = 0.0f,
45*25c28e83SPiotr Jasiukajtis one = 1.0f,
46*25c28e83SPiotr Jasiukajtis q1 = -3.3333333333296428046e-01f,
47*25c28e83SPiotr Jasiukajtis q2 = 1.9999999186853752618e-01f,
48*25c28e83SPiotr Jasiukajtis twop24 = 16777216.0f;
49*25c28e83SPiotr Jasiukajtis
50*25c28e83SPiotr Jasiukajtis void
__vatan2f(int n,float * restrict y,int stridey,float * restrict x,int stridex,float * restrict z,int stridez)51*25c28e83SPiotr Jasiukajtis __vatan2f(int n, float * restrict y, int stridey, float * restrict x,
52*25c28e83SPiotr Jasiukajtis int stridex, float * restrict z, int stridez)
53*25c28e83SPiotr Jasiukajtis {
54*25c28e83SPiotr Jasiukajtis float x0, x1, x2, y0, y1, y2, *pz0 = 0, *pz1, *pz2;
55*25c28e83SPiotr Jasiukajtis double ah0, ah1, ah2;
56*25c28e83SPiotr Jasiukajtis double t0, t1, t2;
57*25c28e83SPiotr Jasiukajtis double sx0, sx1, sx2;
58*25c28e83SPiotr Jasiukajtis double sign0, sign1, sign2;
59*25c28e83SPiotr Jasiukajtis int i, k0 = 0, k1, k2, hx, sx, sy;
60*25c28e83SPiotr Jasiukajtis int hy0, hy1, hy2;
61*25c28e83SPiotr Jasiukajtis float base0 = 0.0, base1, base2;
62*25c28e83SPiotr Jasiukajtis double num0, num1, num2;
63*25c28e83SPiotr Jasiukajtis double den0, den1, den2;
64*25c28e83SPiotr Jasiukajtis double dx0, dx1, dx2;
65*25c28e83SPiotr Jasiukajtis double dy0, dy1, dy2;
66*25c28e83SPiotr Jasiukajtis double db0, db1, db2;
67*25c28e83SPiotr Jasiukajtis
68*25c28e83SPiotr Jasiukajtis do
69*25c28e83SPiotr Jasiukajtis {
70*25c28e83SPiotr Jasiukajtis loop0:
71*25c28e83SPiotr Jasiukajtis hy0 = *(int*)y;
72*25c28e83SPiotr Jasiukajtis hx = *(int*)x;
73*25c28e83SPiotr Jasiukajtis sign0 = one;
74*25c28e83SPiotr Jasiukajtis sy = hy0 & 0x80000000;
75*25c28e83SPiotr Jasiukajtis hy0 &= ~0x80000000;
76*25c28e83SPiotr Jasiukajtis
77*25c28e83SPiotr Jasiukajtis sx = hx & 0x80000000;
78*25c28e83SPiotr Jasiukajtis hx &= ~0x80000000;
79*25c28e83SPiotr Jasiukajtis
80*25c28e83SPiotr Jasiukajtis if (hy0 > hx)
81*25c28e83SPiotr Jasiukajtis {
82*25c28e83SPiotr Jasiukajtis x0 = *y;
83*25c28e83SPiotr Jasiukajtis y0 = *x;
84*25c28e83SPiotr Jasiukajtis i = hx;
85*25c28e83SPiotr Jasiukajtis hx = hy0;
86*25c28e83SPiotr Jasiukajtis hy0 = i;
87*25c28e83SPiotr Jasiukajtis if (sy)
88*25c28e83SPiotr Jasiukajtis {
89*25c28e83SPiotr Jasiukajtis x0 = -x0;
90*25c28e83SPiotr Jasiukajtis sign0 = -sign0;
91*25c28e83SPiotr Jasiukajtis }
92*25c28e83SPiotr Jasiukajtis if (sx)
93*25c28e83SPiotr Jasiukajtis {
94*25c28e83SPiotr Jasiukajtis y0 = -y0;
95*25c28e83SPiotr Jasiukajtis ah0 = pio2;
96*25c28e83SPiotr Jasiukajtis }
97*25c28e83SPiotr Jasiukajtis else
98*25c28e83SPiotr Jasiukajtis {
99*25c28e83SPiotr Jasiukajtis ah0 = -pio2;
100*25c28e83SPiotr Jasiukajtis sign0 = -sign0;
101*25c28e83SPiotr Jasiukajtis }
102*25c28e83SPiotr Jasiukajtis }
103*25c28e83SPiotr Jasiukajtis else
104*25c28e83SPiotr Jasiukajtis {
105*25c28e83SPiotr Jasiukajtis y0 = *y;
106*25c28e83SPiotr Jasiukajtis x0 = *x;
107*25c28e83SPiotr Jasiukajtis if (sy)
108*25c28e83SPiotr Jasiukajtis {
109*25c28e83SPiotr Jasiukajtis y0 = -y0;
110*25c28e83SPiotr Jasiukajtis sign0 = -sign0;
111*25c28e83SPiotr Jasiukajtis }
112*25c28e83SPiotr Jasiukajtis if (sx)
113*25c28e83SPiotr Jasiukajtis {
114*25c28e83SPiotr Jasiukajtis x0 = -x0;
115*25c28e83SPiotr Jasiukajtis ah0 = -pi;
116*25c28e83SPiotr Jasiukajtis sign0 = -sign0;
117*25c28e83SPiotr Jasiukajtis }
118*25c28e83SPiotr Jasiukajtis else
119*25c28e83SPiotr Jasiukajtis ah0 = zero;
120*25c28e83SPiotr Jasiukajtis }
121*25c28e83SPiotr Jasiukajtis
122*25c28e83SPiotr Jasiukajtis if (hx >= 0x7f800000 || hx - hy0 >= 0x0c800000)
123*25c28e83SPiotr Jasiukajtis {
124*25c28e83SPiotr Jasiukajtis if (hx >= 0x7f800000)
125*25c28e83SPiotr Jasiukajtis {
126*25c28e83SPiotr Jasiukajtis if (hx ^ 0x7f800000) /* nan */
127*25c28e83SPiotr Jasiukajtis ah0 = x0 + y0;
128*25c28e83SPiotr Jasiukajtis else if (hy0 >= 0x7f800000)
129*25c28e83SPiotr Jasiukajtis ah0 += pio4;
130*25c28e83SPiotr Jasiukajtis }
131*25c28e83SPiotr Jasiukajtis else if ((int) ah0 == 0)
132*25c28e83SPiotr Jasiukajtis ah0 = y0 / x0;
133*25c28e83SPiotr Jasiukajtis *z = (sign0 == one) ? ah0 : -ah0;
134*25c28e83SPiotr Jasiukajtis /* sign0*ah0 would change nan behavior relative to previous release */
135*25c28e83SPiotr Jasiukajtis x += stridex;
136*25c28e83SPiotr Jasiukajtis y += stridey;
137*25c28e83SPiotr Jasiukajtis z += stridez;
138*25c28e83SPiotr Jasiukajtis i = 0;
139*25c28e83SPiotr Jasiukajtis if (--n <= 0)
140*25c28e83SPiotr Jasiukajtis break;
141*25c28e83SPiotr Jasiukajtis goto loop0;
142*25c28e83SPiotr Jasiukajtis }
143*25c28e83SPiotr Jasiukajtis if (hy0 < 0x00800000) {
144*25c28e83SPiotr Jasiukajtis if (hy0 == 0)
145*25c28e83SPiotr Jasiukajtis {
146*25c28e83SPiotr Jasiukajtis *z = sign0 * (float) ah0;
147*25c28e83SPiotr Jasiukajtis x += stridex;
148*25c28e83SPiotr Jasiukajtis y += stridey;
149*25c28e83SPiotr Jasiukajtis z += stridez;
150*25c28e83SPiotr Jasiukajtis i = 0;
151*25c28e83SPiotr Jasiukajtis if (--n <= 0)
152*25c28e83SPiotr Jasiukajtis break;
153*25c28e83SPiotr Jasiukajtis goto loop0;
154*25c28e83SPiotr Jasiukajtis }
155*25c28e83SPiotr Jasiukajtis y0 *= twop24; /* scale subnormal y */
156*25c28e83SPiotr Jasiukajtis x0 *= twop24; /* scale possibly subnormal x */
157*25c28e83SPiotr Jasiukajtis hy0 = *(int*)&y0;
158*25c28e83SPiotr Jasiukajtis hx = *(int*)&x0;
159*25c28e83SPiotr Jasiukajtis }
160*25c28e83SPiotr Jasiukajtis pz0 = z;
161*25c28e83SPiotr Jasiukajtis
162*25c28e83SPiotr Jasiukajtis k0 = (hy0 - hx + 0x3f800000) & 0xfff80000;
163*25c28e83SPiotr Jasiukajtis if (k0 >= 0x3C800000) /* if |x| >= (1/64)... */
164*25c28e83SPiotr Jasiukajtis {
165*25c28e83SPiotr Jasiukajtis *(int*)&base0 = k0;
166*25c28e83SPiotr Jasiukajtis k0 = (k0 - 0x3C800000) >> 18; /* (index >> 19) << 1) */
167*25c28e83SPiotr Jasiukajtis k0 += 4;
168*25c28e83SPiotr Jasiukajtis /* skip over 0,0,pi/2,pi/2 */
169*25c28e83SPiotr Jasiukajtis }
170*25c28e83SPiotr Jasiukajtis else /* |x| < 1/64 */
171*25c28e83SPiotr Jasiukajtis {
172*25c28e83SPiotr Jasiukajtis k0 = 0;
173*25c28e83SPiotr Jasiukajtis base0 = zero;
174*25c28e83SPiotr Jasiukajtis }
175*25c28e83SPiotr Jasiukajtis
176*25c28e83SPiotr Jasiukajtis x += stridex;
177*25c28e83SPiotr Jasiukajtis y += stridey;
178*25c28e83SPiotr Jasiukajtis z += stridez;
179*25c28e83SPiotr Jasiukajtis i = 1;
180*25c28e83SPiotr Jasiukajtis if (--n <= 0)
181*25c28e83SPiotr Jasiukajtis break;
182*25c28e83SPiotr Jasiukajtis
183*25c28e83SPiotr Jasiukajtis
184*25c28e83SPiotr Jasiukajtis loop1:
185*25c28e83SPiotr Jasiukajtis hy1 = *(int*)y;
186*25c28e83SPiotr Jasiukajtis hx = *(int*)x;
187*25c28e83SPiotr Jasiukajtis sign1 = one;
188*25c28e83SPiotr Jasiukajtis sy = hy1 & 0x80000000;
189*25c28e83SPiotr Jasiukajtis hy1 &= ~0x80000000;
190*25c28e83SPiotr Jasiukajtis
191*25c28e83SPiotr Jasiukajtis sx = hx & 0x80000000;
192*25c28e83SPiotr Jasiukajtis hx &= ~0x80000000;
193*25c28e83SPiotr Jasiukajtis
194*25c28e83SPiotr Jasiukajtis if (hy1 > hx)
195*25c28e83SPiotr Jasiukajtis {
196*25c28e83SPiotr Jasiukajtis x1 = *y;
197*25c28e83SPiotr Jasiukajtis y1 = *x;
198*25c28e83SPiotr Jasiukajtis i = hx;
199*25c28e83SPiotr Jasiukajtis hx = hy1;
200*25c28e83SPiotr Jasiukajtis hy1 = i;
201*25c28e83SPiotr Jasiukajtis if (sy)
202*25c28e83SPiotr Jasiukajtis {
203*25c28e83SPiotr Jasiukajtis x1 = -x1;
204*25c28e83SPiotr Jasiukajtis sign1 = -sign1;
205*25c28e83SPiotr Jasiukajtis }
206*25c28e83SPiotr Jasiukajtis if (sx)
207*25c28e83SPiotr Jasiukajtis {
208*25c28e83SPiotr Jasiukajtis y1 = -y1;
209*25c28e83SPiotr Jasiukajtis ah1 = pio2;
210*25c28e83SPiotr Jasiukajtis }
211*25c28e83SPiotr Jasiukajtis else
212*25c28e83SPiotr Jasiukajtis {
213*25c28e83SPiotr Jasiukajtis ah1 = -pio2;
214*25c28e83SPiotr Jasiukajtis sign1 = -sign1;
215*25c28e83SPiotr Jasiukajtis }
216*25c28e83SPiotr Jasiukajtis }
217*25c28e83SPiotr Jasiukajtis else
218*25c28e83SPiotr Jasiukajtis {
219*25c28e83SPiotr Jasiukajtis y1 = *y;
220*25c28e83SPiotr Jasiukajtis x1 = *x;
221*25c28e83SPiotr Jasiukajtis if (sy)
222*25c28e83SPiotr Jasiukajtis {
223*25c28e83SPiotr Jasiukajtis y1 = -y1;
224*25c28e83SPiotr Jasiukajtis sign1 = -sign1;
225*25c28e83SPiotr Jasiukajtis }
226*25c28e83SPiotr Jasiukajtis if (sx)
227*25c28e83SPiotr Jasiukajtis {
228*25c28e83SPiotr Jasiukajtis x1 = -x1;
229*25c28e83SPiotr Jasiukajtis ah1 = -pi;
230*25c28e83SPiotr Jasiukajtis sign1 = -sign1;
231*25c28e83SPiotr Jasiukajtis }
232*25c28e83SPiotr Jasiukajtis else
233*25c28e83SPiotr Jasiukajtis ah1 = zero;
234*25c28e83SPiotr Jasiukajtis }
235*25c28e83SPiotr Jasiukajtis
236*25c28e83SPiotr Jasiukajtis if (hx >= 0x7f800000 || hx - hy1 >= 0x0c800000)
237*25c28e83SPiotr Jasiukajtis {
238*25c28e83SPiotr Jasiukajtis if (hx >= 0x7f800000)
239*25c28e83SPiotr Jasiukajtis {
240*25c28e83SPiotr Jasiukajtis if (hx ^ 0x7f800000) /* nan */
241*25c28e83SPiotr Jasiukajtis ah1 = x1 + y1;
242*25c28e83SPiotr Jasiukajtis else if (hy1 >= 0x7f800000)
243*25c28e83SPiotr Jasiukajtis ah1 += pio4;
244*25c28e83SPiotr Jasiukajtis }
245*25c28e83SPiotr Jasiukajtis else if ((int) ah1 == 0)
246*25c28e83SPiotr Jasiukajtis ah1 = y1 / x1;
247*25c28e83SPiotr Jasiukajtis *z = (sign1 == one)? ah1 : -ah1;
248*25c28e83SPiotr Jasiukajtis x += stridex;
249*25c28e83SPiotr Jasiukajtis y += stridey;
250*25c28e83SPiotr Jasiukajtis z += stridez;
251*25c28e83SPiotr Jasiukajtis i = 1;
252*25c28e83SPiotr Jasiukajtis if (--n <= 0)
253*25c28e83SPiotr Jasiukajtis break;
254*25c28e83SPiotr Jasiukajtis goto loop1;
255*25c28e83SPiotr Jasiukajtis }
256*25c28e83SPiotr Jasiukajtis if (hy1 < 0x00800000) {
257*25c28e83SPiotr Jasiukajtis if (hy1 == 0)
258*25c28e83SPiotr Jasiukajtis {
259*25c28e83SPiotr Jasiukajtis *z = sign1 * (float) ah1;
260*25c28e83SPiotr Jasiukajtis x += stridex;
261*25c28e83SPiotr Jasiukajtis y += stridey;
262*25c28e83SPiotr Jasiukajtis z += stridez;
263*25c28e83SPiotr Jasiukajtis i = 1;
264*25c28e83SPiotr Jasiukajtis if (--n <= 0)
265*25c28e83SPiotr Jasiukajtis break;
266*25c28e83SPiotr Jasiukajtis goto loop1;
267*25c28e83SPiotr Jasiukajtis }
268*25c28e83SPiotr Jasiukajtis y1 *= twop24; /* scale subnormal y */
269*25c28e83SPiotr Jasiukajtis x1 *= twop24; /* scale possibly subnormal x */
270*25c28e83SPiotr Jasiukajtis hy1 = *(int*)&y1;
271*25c28e83SPiotr Jasiukajtis hx = *(int*)&x1;
272*25c28e83SPiotr Jasiukajtis }
273*25c28e83SPiotr Jasiukajtis pz1 = z;
274*25c28e83SPiotr Jasiukajtis
275*25c28e83SPiotr Jasiukajtis k1 = (hy1 - hx + 0x3f800000) & 0xfff80000;
276*25c28e83SPiotr Jasiukajtis if (k1 >= 0x3C800000) /* if |x| >= (1/64)... */
277*25c28e83SPiotr Jasiukajtis {
278*25c28e83SPiotr Jasiukajtis *(int*)&base1 = k1;
279*25c28e83SPiotr Jasiukajtis k1 = (k1 - 0x3C800000) >> 18; /* (index >> 19) << 1) */
280*25c28e83SPiotr Jasiukajtis k1 += 4;
281*25c28e83SPiotr Jasiukajtis /* skip over 0,0,pi/2,pi/2 */
282*25c28e83SPiotr Jasiukajtis }
283*25c28e83SPiotr Jasiukajtis else /* |x| < 1/64 */
284*25c28e83SPiotr Jasiukajtis {
285*25c28e83SPiotr Jasiukajtis k1 = 0;
286*25c28e83SPiotr Jasiukajtis base1 = zero;
287*25c28e83SPiotr Jasiukajtis }
288*25c28e83SPiotr Jasiukajtis
289*25c28e83SPiotr Jasiukajtis x += stridex;
290*25c28e83SPiotr Jasiukajtis y += stridey;
291*25c28e83SPiotr Jasiukajtis z += stridez;
292*25c28e83SPiotr Jasiukajtis i = 2;
293*25c28e83SPiotr Jasiukajtis if (--n <= 0)
294*25c28e83SPiotr Jasiukajtis break;
295*25c28e83SPiotr Jasiukajtis
296*25c28e83SPiotr Jasiukajtis loop2:
297*25c28e83SPiotr Jasiukajtis hy2 = *(int*)y;
298*25c28e83SPiotr Jasiukajtis hx = *(int*)x;
299*25c28e83SPiotr Jasiukajtis sign2 = one;
300*25c28e83SPiotr Jasiukajtis sy = hy2 & 0x80000000;
301*25c28e83SPiotr Jasiukajtis hy2 &= ~0x80000000;
302*25c28e83SPiotr Jasiukajtis
303*25c28e83SPiotr Jasiukajtis sx = hx & 0x80000000;
304*25c28e83SPiotr Jasiukajtis hx &= ~0x80000000;
305*25c28e83SPiotr Jasiukajtis
306*25c28e83SPiotr Jasiukajtis if (hy2 > hx)
307*25c28e83SPiotr Jasiukajtis {
308*25c28e83SPiotr Jasiukajtis x2 = *y;
309*25c28e83SPiotr Jasiukajtis y2 = *x;
310*25c28e83SPiotr Jasiukajtis i = hx;
311*25c28e83SPiotr Jasiukajtis hx = hy2;
312*25c28e83SPiotr Jasiukajtis hy2 = i;
313*25c28e83SPiotr Jasiukajtis if (sy)
314*25c28e83SPiotr Jasiukajtis {
315*25c28e83SPiotr Jasiukajtis x2 = -x2;
316*25c28e83SPiotr Jasiukajtis sign2 = -sign2;
317*25c28e83SPiotr Jasiukajtis }
318*25c28e83SPiotr Jasiukajtis if (sx)
319*25c28e83SPiotr Jasiukajtis {
320*25c28e83SPiotr Jasiukajtis y2 = -y2;
321*25c28e83SPiotr Jasiukajtis ah2 = pio2;
322*25c28e83SPiotr Jasiukajtis }
323*25c28e83SPiotr Jasiukajtis else
324*25c28e83SPiotr Jasiukajtis {
325*25c28e83SPiotr Jasiukajtis ah2 = -pio2;
326*25c28e83SPiotr Jasiukajtis sign2 = -sign2;
327*25c28e83SPiotr Jasiukajtis }
328*25c28e83SPiotr Jasiukajtis }
329*25c28e83SPiotr Jasiukajtis else
330*25c28e83SPiotr Jasiukajtis {
331*25c28e83SPiotr Jasiukajtis y2 = *y;
332*25c28e83SPiotr Jasiukajtis x2 = *x;
333*25c28e83SPiotr Jasiukajtis if (sy)
334*25c28e83SPiotr Jasiukajtis {
335*25c28e83SPiotr Jasiukajtis y2 = -y2;
336*25c28e83SPiotr Jasiukajtis sign2 = -sign2;
337*25c28e83SPiotr Jasiukajtis }
338*25c28e83SPiotr Jasiukajtis if (sx)
339*25c28e83SPiotr Jasiukajtis {
340*25c28e83SPiotr Jasiukajtis x2 = -x2;
341*25c28e83SPiotr Jasiukajtis ah2 = -pi;
342*25c28e83SPiotr Jasiukajtis sign2 = -sign2;
343*25c28e83SPiotr Jasiukajtis }
344*25c28e83SPiotr Jasiukajtis else
345*25c28e83SPiotr Jasiukajtis ah2 = zero;
346*25c28e83SPiotr Jasiukajtis }
347*25c28e83SPiotr Jasiukajtis
348*25c28e83SPiotr Jasiukajtis if (hx >= 0x7f800000 || hx - hy2 >= 0x0c800000)
349*25c28e83SPiotr Jasiukajtis {
350*25c28e83SPiotr Jasiukajtis if (hx >= 0x7f800000)
351*25c28e83SPiotr Jasiukajtis {
352*25c28e83SPiotr Jasiukajtis if (hx ^ 0x7f800000) /* nan */
353*25c28e83SPiotr Jasiukajtis ah2 = x2 + y2;
354*25c28e83SPiotr Jasiukajtis else if (hy2 >= 0x7f800000)
355*25c28e83SPiotr Jasiukajtis ah2 += pio4;
356*25c28e83SPiotr Jasiukajtis }
357*25c28e83SPiotr Jasiukajtis else if ((int) ah2 == 0)
358*25c28e83SPiotr Jasiukajtis ah2 = y2 / x2;
359*25c28e83SPiotr Jasiukajtis *z = (sign2 == one)? ah2 : -ah2;
360*25c28e83SPiotr Jasiukajtis x += stridex;
361*25c28e83SPiotr Jasiukajtis y += stridey;
362*25c28e83SPiotr Jasiukajtis z += stridez;
363*25c28e83SPiotr Jasiukajtis i = 2;
364*25c28e83SPiotr Jasiukajtis if (--n <= 0)
365*25c28e83SPiotr Jasiukajtis break;
366*25c28e83SPiotr Jasiukajtis goto loop2;
367*25c28e83SPiotr Jasiukajtis }
368*25c28e83SPiotr Jasiukajtis if (hy2 < 0x00800000) {
369*25c28e83SPiotr Jasiukajtis if (hy2 == 0)
370*25c28e83SPiotr Jasiukajtis {
371*25c28e83SPiotr Jasiukajtis *z = sign2 * (float) ah2;
372*25c28e83SPiotr Jasiukajtis x += stridex;
373*25c28e83SPiotr Jasiukajtis y += stridey;
374*25c28e83SPiotr Jasiukajtis z += stridez;
375*25c28e83SPiotr Jasiukajtis i = 2;
376*25c28e83SPiotr Jasiukajtis if (--n <= 0)
377*25c28e83SPiotr Jasiukajtis break;
378*25c28e83SPiotr Jasiukajtis goto loop2;
379*25c28e83SPiotr Jasiukajtis }
380*25c28e83SPiotr Jasiukajtis y2 *= twop24; /* scale subnormal y */
381*25c28e83SPiotr Jasiukajtis x2 *= twop24; /* scale possibly subnormal x */
382*25c28e83SPiotr Jasiukajtis hy2 = *(int*)&y2;
383*25c28e83SPiotr Jasiukajtis hx = *(int*)&x2;
384*25c28e83SPiotr Jasiukajtis }
385*25c28e83SPiotr Jasiukajtis
386*25c28e83SPiotr Jasiukajtis pz2 = z;
387*25c28e83SPiotr Jasiukajtis
388*25c28e83SPiotr Jasiukajtis k2 = (hy2 - hx + 0x3f800000) & 0xfff80000;
389*25c28e83SPiotr Jasiukajtis if (k2 >= 0x3C800000) /* if |x| >= (1/64)... */
390*25c28e83SPiotr Jasiukajtis {
391*25c28e83SPiotr Jasiukajtis *(int*)&base2 = k2;
392*25c28e83SPiotr Jasiukajtis k2 = (k2 - 0x3C800000) >> 18; /* (index >> 19) << 1) */
393*25c28e83SPiotr Jasiukajtis k2 += 4;
394*25c28e83SPiotr Jasiukajtis /* skip over 0,0,pi/2,pi/2 */
395*25c28e83SPiotr Jasiukajtis }
396*25c28e83SPiotr Jasiukajtis else /* |x| < 1/64 */
397*25c28e83SPiotr Jasiukajtis {
398*25c28e83SPiotr Jasiukajtis k2 = 0;
399*25c28e83SPiotr Jasiukajtis base2 = zero;
400*25c28e83SPiotr Jasiukajtis }
401*25c28e83SPiotr Jasiukajtis
402*25c28e83SPiotr Jasiukajtis goto endloop;
403*25c28e83SPiotr Jasiukajtis
404*25c28e83SPiotr Jasiukajtis endloop:
405*25c28e83SPiotr Jasiukajtis
406*25c28e83SPiotr Jasiukajtis ah2 += __vlibm_TBL_atan1[k2];
407*25c28e83SPiotr Jasiukajtis ah1 += __vlibm_TBL_atan1[k1];
408*25c28e83SPiotr Jasiukajtis ah0 += __vlibm_TBL_atan1[k0];
409*25c28e83SPiotr Jasiukajtis
410*25c28e83SPiotr Jasiukajtis db2 = base2;
411*25c28e83SPiotr Jasiukajtis db1 = base1;
412*25c28e83SPiotr Jasiukajtis db0 = base0;
413*25c28e83SPiotr Jasiukajtis dy2 = y2;
414*25c28e83SPiotr Jasiukajtis dy1 = y1;
415*25c28e83SPiotr Jasiukajtis dy0 = y0;
416*25c28e83SPiotr Jasiukajtis dx2 = x2;
417*25c28e83SPiotr Jasiukajtis dx1 = x1;
418*25c28e83SPiotr Jasiukajtis dx0 = x0;
419*25c28e83SPiotr Jasiukajtis
420*25c28e83SPiotr Jasiukajtis num2 = dy2 - dx2 * db2;
421*25c28e83SPiotr Jasiukajtis den2 = dx2 + dy2 * db2;
422*25c28e83SPiotr Jasiukajtis
423*25c28e83SPiotr Jasiukajtis num1 = dy1 - dx1 * db1;
424*25c28e83SPiotr Jasiukajtis den1 = dx1 + dy1 * db1;
425*25c28e83SPiotr Jasiukajtis
426*25c28e83SPiotr Jasiukajtis num0 = dy0 - dx0 * db0;
427*25c28e83SPiotr Jasiukajtis den0 = dx0 + dy0 * db0;
428*25c28e83SPiotr Jasiukajtis
429*25c28e83SPiotr Jasiukajtis t2 = num2 / den2;
430*25c28e83SPiotr Jasiukajtis t1 = num1 / den1;
431*25c28e83SPiotr Jasiukajtis t0 = num0 / den0;
432*25c28e83SPiotr Jasiukajtis
433*25c28e83SPiotr Jasiukajtis sx2 = t2 * t2;
434*25c28e83SPiotr Jasiukajtis sx1 = t1 * t1;
435*25c28e83SPiotr Jasiukajtis sx0 = t0 * t0;
436*25c28e83SPiotr Jasiukajtis
437*25c28e83SPiotr Jasiukajtis t2 += t2 * sx2 * (q1 + sx2 * q2);
438*25c28e83SPiotr Jasiukajtis t1 += t1 * sx1 * (q1 + sx1 * q2);
439*25c28e83SPiotr Jasiukajtis t0 += t0 * sx0 * (q1 + sx0 * q2);
440*25c28e83SPiotr Jasiukajtis
441*25c28e83SPiotr Jasiukajtis t2 += ah2;
442*25c28e83SPiotr Jasiukajtis t1 += ah1;
443*25c28e83SPiotr Jasiukajtis t0 += ah0;
444*25c28e83SPiotr Jasiukajtis
445*25c28e83SPiotr Jasiukajtis *pz2 = sign2 * t2;
446*25c28e83SPiotr Jasiukajtis *pz1 = sign1 * t1;
447*25c28e83SPiotr Jasiukajtis *pz0 = sign0 * t0;
448*25c28e83SPiotr Jasiukajtis
449*25c28e83SPiotr Jasiukajtis x += stridex;
450*25c28e83SPiotr Jasiukajtis y += stridey;
451*25c28e83SPiotr Jasiukajtis z += stridez;
452*25c28e83SPiotr Jasiukajtis i = 0;
453*25c28e83SPiotr Jasiukajtis } while (--n > 0);
454*25c28e83SPiotr Jasiukajtis
455*25c28e83SPiotr Jasiukajtis if (i > 1)
456*25c28e83SPiotr Jasiukajtis {
457*25c28e83SPiotr Jasiukajtis ah1 += __vlibm_TBL_atan1[k1];
458*25c28e83SPiotr Jasiukajtis t1 = (y1 - x1 * (double)base1) /
459*25c28e83SPiotr Jasiukajtis (x1 + y1 * (double)base1);
460*25c28e83SPiotr Jasiukajtis sx1 = t1 * t1;
461*25c28e83SPiotr Jasiukajtis t1 += t1 * sx1 * (q1 + sx1 * q2);
462*25c28e83SPiotr Jasiukajtis t1 += ah1;
463*25c28e83SPiotr Jasiukajtis *pz1 = sign1 * t1;
464*25c28e83SPiotr Jasiukajtis }
465*25c28e83SPiotr Jasiukajtis
466*25c28e83SPiotr Jasiukajtis if (i > 0)
467*25c28e83SPiotr Jasiukajtis {
468*25c28e83SPiotr Jasiukajtis ah0 += __vlibm_TBL_atan1[k0];
469*25c28e83SPiotr Jasiukajtis t0 = (y0 - x0 * (double)base0) /
470*25c28e83SPiotr Jasiukajtis (x0 + y0 * (double)base0);
471*25c28e83SPiotr Jasiukajtis sx0 = t0 * t0;
472*25c28e83SPiotr Jasiukajtis t0 += t0 * sx0 * (q1 + sx0 * q2);
473*25c28e83SPiotr Jasiukajtis t0 += ah0;
474*25c28e83SPiotr Jasiukajtis *pz0 = sign0 * t0;
475*25c28e83SPiotr Jasiukajtis }
476*25c28e83SPiotr Jasiukajtis }
477