1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
24 */
25 /*
26 * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
27 * Use is subject to license terms.
28 */
29
30 #ifdef __RESTRICT
31 #define restrict _Restrict
32 #else
33 #define restrict
34 #endif
35
36 extern const double __vlibm_TBL_atan1[];
37
38 static const double
39 pio4 = 7.8539816339744827900e-01,
40 pio2 = 1.5707963267948965580e+00,
41 pi = 3.1415926535897931160e+00;
42
43 static const float
44 zero = 0.0f,
45 one = 1.0f,
46 q1 = -3.3333333333296428046e-01f,
47 q2 = 1.9999999186853752618e-01f,
48 twop24 = 16777216.0f;
49
50 void
__vatan2f(int n,float * restrict y,int stridey,float * restrict x,int stridex,float * restrict z,int stridez)51 __vatan2f(int n, float * restrict y, int stridey, float * restrict x,
52 int stridex, float * restrict z, int stridez)
53 {
54 float x0, x1, x2, y0, y1, y2, *pz0 = 0, *pz1, *pz2;
55 double ah0, ah1, ah2;
56 double t0, t1, t2;
57 double sx0, sx1, sx2;
58 double sign0, sign1, sign2;
59 int i, k0 = 0, k1, k2, hx, sx, sy;
60 int hy0, hy1, hy2;
61 float base0 = 0.0, base1, base2;
62 double num0, num1, num2;
63 double den0, den1, den2;
64 double dx0, dx1, dx2;
65 double dy0, dy1, dy2;
66 double db0, db1, db2;
67
68 do
69 {
70 loop0:
71 hy0 = *(int*)y;
72 hx = *(int*)x;
73 sign0 = one;
74 sy = hy0 & 0x80000000;
75 hy0 &= ~0x80000000;
76
77 sx = hx & 0x80000000;
78 hx &= ~0x80000000;
79
80 if (hy0 > hx)
81 {
82 x0 = *y;
83 y0 = *x;
84 i = hx;
85 hx = hy0;
86 hy0 = i;
87 if (sy)
88 {
89 x0 = -x0;
90 sign0 = -sign0;
91 }
92 if (sx)
93 {
94 y0 = -y0;
95 ah0 = pio2;
96 }
97 else
98 {
99 ah0 = -pio2;
100 sign0 = -sign0;
101 }
102 }
103 else
104 {
105 y0 = *y;
106 x0 = *x;
107 if (sy)
108 {
109 y0 = -y0;
110 sign0 = -sign0;
111 }
112 if (sx)
113 {
114 x0 = -x0;
115 ah0 = -pi;
116 sign0 = -sign0;
117 }
118 else
119 ah0 = zero;
120 }
121
122 if (hx >= 0x7f800000 || hx - hy0 >= 0x0c800000)
123 {
124 if (hx >= 0x7f800000)
125 {
126 if (hx ^ 0x7f800000) /* nan */
127 ah0 = x0 + y0;
128 else if (hy0 >= 0x7f800000)
129 ah0 += pio4;
130 }
131 else if ((int) ah0 == 0)
132 ah0 = y0 / x0;
133 *z = (sign0 == one) ? ah0 : -ah0;
134 /* sign0*ah0 would change nan behavior relative to previous release */
135 x += stridex;
136 y += stridey;
137 z += stridez;
138 i = 0;
139 if (--n <= 0)
140 break;
141 goto loop0;
142 }
143 if (hy0 < 0x00800000) {
144 if (hy0 == 0)
145 {
146 *z = sign0 * (float) ah0;
147 x += stridex;
148 y += stridey;
149 z += stridez;
150 i = 0;
151 if (--n <= 0)
152 break;
153 goto loop0;
154 }
155 y0 *= twop24; /* scale subnormal y */
156 x0 *= twop24; /* scale possibly subnormal x */
157 hy0 = *(int*)&y0;
158 hx = *(int*)&x0;
159 }
160 pz0 = z;
161
162 k0 = (hy0 - hx + 0x3f800000) & 0xfff80000;
163 if (k0 >= 0x3C800000) /* if |x| >= (1/64)... */
164 {
165 *(int*)&base0 = k0;
166 k0 = (k0 - 0x3C800000) >> 18; /* (index >> 19) << 1) */
167 k0 += 4;
168 /* skip over 0,0,pi/2,pi/2 */
169 }
170 else /* |x| < 1/64 */
171 {
172 k0 = 0;
173 base0 = zero;
174 }
175
176 x += stridex;
177 y += stridey;
178 z += stridez;
179 i = 1;
180 if (--n <= 0)
181 break;
182
183
184 loop1:
185 hy1 = *(int*)y;
186 hx = *(int*)x;
187 sign1 = one;
188 sy = hy1 & 0x80000000;
189 hy1 &= ~0x80000000;
190
191 sx = hx & 0x80000000;
192 hx &= ~0x80000000;
193
194 if (hy1 > hx)
195 {
196 x1 = *y;
197 y1 = *x;
198 i = hx;
199 hx = hy1;
200 hy1 = i;
201 if (sy)
202 {
203 x1 = -x1;
204 sign1 = -sign1;
205 }
206 if (sx)
207 {
208 y1 = -y1;
209 ah1 = pio2;
210 }
211 else
212 {
213 ah1 = -pio2;
214 sign1 = -sign1;
215 }
216 }
217 else
218 {
219 y1 = *y;
220 x1 = *x;
221 if (sy)
222 {
223 y1 = -y1;
224 sign1 = -sign1;
225 }
226 if (sx)
227 {
228 x1 = -x1;
229 ah1 = -pi;
230 sign1 = -sign1;
231 }
232 else
233 ah1 = zero;
234 }
235
236 if (hx >= 0x7f800000 || hx - hy1 >= 0x0c800000)
237 {
238 if (hx >= 0x7f800000)
239 {
240 if (hx ^ 0x7f800000) /* nan */
241 ah1 = x1 + y1;
242 else if (hy1 >= 0x7f800000)
243 ah1 += pio4;
244 }
245 else if ((int) ah1 == 0)
246 ah1 = y1 / x1;
247 *z = (sign1 == one)? ah1 : -ah1;
248 x += stridex;
249 y += stridey;
250 z += stridez;
251 i = 1;
252 if (--n <= 0)
253 break;
254 goto loop1;
255 }
256 if (hy1 < 0x00800000) {
257 if (hy1 == 0)
258 {
259 *z = sign1 * (float) ah1;
260 x += stridex;
261 y += stridey;
262 z += stridez;
263 i = 1;
264 if (--n <= 0)
265 break;
266 goto loop1;
267 }
268 y1 *= twop24; /* scale subnormal y */
269 x1 *= twop24; /* scale possibly subnormal x */
270 hy1 = *(int*)&y1;
271 hx = *(int*)&x1;
272 }
273 pz1 = z;
274
275 k1 = (hy1 - hx + 0x3f800000) & 0xfff80000;
276 if (k1 >= 0x3C800000) /* if |x| >= (1/64)... */
277 {
278 *(int*)&base1 = k1;
279 k1 = (k1 - 0x3C800000) >> 18; /* (index >> 19) << 1) */
280 k1 += 4;
281 /* skip over 0,0,pi/2,pi/2 */
282 }
283 else /* |x| < 1/64 */
284 {
285 k1 = 0;
286 base1 = zero;
287 }
288
289 x += stridex;
290 y += stridey;
291 z += stridez;
292 i = 2;
293 if (--n <= 0)
294 break;
295
296 loop2:
297 hy2 = *(int*)y;
298 hx = *(int*)x;
299 sign2 = one;
300 sy = hy2 & 0x80000000;
301 hy2 &= ~0x80000000;
302
303 sx = hx & 0x80000000;
304 hx &= ~0x80000000;
305
306 if (hy2 > hx)
307 {
308 x2 = *y;
309 y2 = *x;
310 i = hx;
311 hx = hy2;
312 hy2 = i;
313 if (sy)
314 {
315 x2 = -x2;
316 sign2 = -sign2;
317 }
318 if (sx)
319 {
320 y2 = -y2;
321 ah2 = pio2;
322 }
323 else
324 {
325 ah2 = -pio2;
326 sign2 = -sign2;
327 }
328 }
329 else
330 {
331 y2 = *y;
332 x2 = *x;
333 if (sy)
334 {
335 y2 = -y2;
336 sign2 = -sign2;
337 }
338 if (sx)
339 {
340 x2 = -x2;
341 ah2 = -pi;
342 sign2 = -sign2;
343 }
344 else
345 ah2 = zero;
346 }
347
348 if (hx >= 0x7f800000 || hx - hy2 >= 0x0c800000)
349 {
350 if (hx >= 0x7f800000)
351 {
352 if (hx ^ 0x7f800000) /* nan */
353 ah2 = x2 + y2;
354 else if (hy2 >= 0x7f800000)
355 ah2 += pio4;
356 }
357 else if ((int) ah2 == 0)
358 ah2 = y2 / x2;
359 *z = (sign2 == one)? ah2 : -ah2;
360 x += stridex;
361 y += stridey;
362 z += stridez;
363 i = 2;
364 if (--n <= 0)
365 break;
366 goto loop2;
367 }
368 if (hy2 < 0x00800000) {
369 if (hy2 == 0)
370 {
371 *z = sign2 * (float) ah2;
372 x += stridex;
373 y += stridey;
374 z += stridez;
375 i = 2;
376 if (--n <= 0)
377 break;
378 goto loop2;
379 }
380 y2 *= twop24; /* scale subnormal y */
381 x2 *= twop24; /* scale possibly subnormal x */
382 hy2 = *(int*)&y2;
383 hx = *(int*)&x2;
384 }
385
386 pz2 = z;
387
388 k2 = (hy2 - hx + 0x3f800000) & 0xfff80000;
389 if (k2 >= 0x3C800000) /* if |x| >= (1/64)... */
390 {
391 *(int*)&base2 = k2;
392 k2 = (k2 - 0x3C800000) >> 18; /* (index >> 19) << 1) */
393 k2 += 4;
394 /* skip over 0,0,pi/2,pi/2 */
395 }
396 else /* |x| < 1/64 */
397 {
398 k2 = 0;
399 base2 = zero;
400 }
401
402 goto endloop;
403
404 endloop:
405
406 ah2 += __vlibm_TBL_atan1[k2];
407 ah1 += __vlibm_TBL_atan1[k1];
408 ah0 += __vlibm_TBL_atan1[k0];
409
410 db2 = base2;
411 db1 = base1;
412 db0 = base0;
413 dy2 = y2;
414 dy1 = y1;
415 dy0 = y0;
416 dx2 = x2;
417 dx1 = x1;
418 dx0 = x0;
419
420 num2 = dy2 - dx2 * db2;
421 den2 = dx2 + dy2 * db2;
422
423 num1 = dy1 - dx1 * db1;
424 den1 = dx1 + dy1 * db1;
425
426 num0 = dy0 - dx0 * db0;
427 den0 = dx0 + dy0 * db0;
428
429 t2 = num2 / den2;
430 t1 = num1 / den1;
431 t0 = num0 / den0;
432
433 sx2 = t2 * t2;
434 sx1 = t1 * t1;
435 sx0 = t0 * t0;
436
437 t2 += t2 * sx2 * (q1 + sx2 * q2);
438 t1 += t1 * sx1 * (q1 + sx1 * q2);
439 t0 += t0 * sx0 * (q1 + sx0 * q2);
440
441 t2 += ah2;
442 t1 += ah1;
443 t0 += ah0;
444
445 *pz2 = sign2 * t2;
446 *pz1 = sign1 * t1;
447 *pz0 = sign0 * t0;
448
449 x += stridex;
450 y += stridey;
451 z += stridez;
452 i = 0;
453 } while (--n > 0);
454
455 if (i > 1)
456 {
457 ah1 += __vlibm_TBL_atan1[k1];
458 t1 = (y1 - x1 * (double)base1) /
459 (x1 + y1 * (double)base1);
460 sx1 = t1 * t1;
461 t1 += t1 * sx1 * (q1 + sx1 * q2);
462 t1 += ah1;
463 *pz1 = sign1 * t1;
464 }
465
466 if (i > 0)
467 {
468 ah0 += __vlibm_TBL_atan1[k0];
469 t0 = (y0 - x0 * (double)base0) /
470 (x0 + y0 * (double)base0);
471 sx0 = t0 * t0;
472 t0 += t0 * sx0 * (q1 + sx0 * q2);
473 t0 += ah0;
474 *pz0 = sign0 * t0;
475 }
476 }
477