xref: /illumos-gate/usr/src/lib/libmvec/common/__vcosbigf.c (revision a2cdcdd260232b58202b11a9bfc0103c9449ed52)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
24  */
25 /*
26  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
27  * Use is subject to license terms.
28  */
29 
30 #include <sys/isa_defs.h>
31 
32 #ifdef _LITTLE_ENDIAN
33 #define HI(x)	*(1+(int*)x)
34 #define LO(x)	*(unsigned*)x
35 #else
36 #define HI(x)	*(int*)x
37 #define LO(x)	*(1+(unsigned*)x)
38 #endif
39 
40 #ifdef __RESTRICT
41 #define restrict _Restrict
42 #else
43 #define restrict
44 #endif
45 
46 extern const double __vlibm_TBL_sincos_hi[], __vlibm_TBL_sincos_lo[];
47 extern int __vlibm_rem_pio2m(double *, double *, int, int, int);
48 
49 static const double
50 	zero	= 0.0,
51 	one		= 1.0,
52 	two24	= 16777216.0,
53 	pp1		= -1.666666666605760465276263943134982554676e-0001,
54 	pp2		=  8.333261209690963126718376566146180944442e-0003,
55 	p1		= -1.666666666666629669805215138920301589656e-0001,
56 	p2		=  8.333333332390951295683993455280336376663e-0003,
57 	p3		= -1.984126237997976692791551778230098403960e-0004,
58 	p4		=  2.753403624854277237649987622848330351110e-0006,
59 	qq1		= -4.999999999977710986407023955908711557870e-0001,
60 	qq2		=  4.166654863857219350645055881018842089580e-0002,
61 	q1		= -4.999999999999931701464060878888294524481e-0001,
62 	q2		=  4.166666666394861917535640593963708222319e-0002,
63 	q3		= -1.388888552656142867832756687736851681462e-0003,
64 	q4		=  2.478519423681460796618128289454530524759e-0005;
65 
66 void
67 __vlibm_vcos_bigf(int n, float * restrict x, int stridex, float * restrict y,
68 	int stridey)
69 {
70 	for (; n--; x += stridex, y += stridey)
71 	{
72 		double		tx, tt[3], ty[2], t, w, z, a;
73 		unsigned	hx, xsb;
74 		int			e0, nx, j;
75 
76 		tx = *x;
77 		hx = HI(&tx);
78 		xsb = hx & 0x80000000;
79 		hx &= ~0x80000000;
80 		if (hx <= 0x413921fb || hx >= 0x7ff00000)
81 			continue;
82 		e0 = (hx >> 20) - 1046;
83 		HI(&tx) = 0x41600000 | (hx & 0xfffff);
84 
85 		tt[0] = (double)((int) tx);
86 		tx = (tx - tt[0]) * two24;
87 		if (tx != zero)
88 		{
89 			nx = 2;
90 			tt[1] = (double)((int) tx);
91 			tt[2] = (tx - tt[1]) * two24;
92 			if (tt[2] != zero)
93 				nx = 3;
94 		}
95 		else
96 		{
97 			nx = 1;
98 			tt[1] = tt[2] = zero;
99 		}
100 		nx = __vlibm_rem_pio2m(tt, ty, e0, nx, 2);
101 		if (xsb)
102 		{
103 			nx = -nx;
104 			ty[0] = -ty[0];
105 			ty[1] = -ty[1];
106 		}
107 		nx = (nx + 1) & 3; /* Add 1 to turn sin into cos */
108 
109 		/* now nx and ty[*] are the quadrant and reduced arg */
110 		xsb = (nx & 2) << 30;
111 		hx = HI(&ty[0]);
112 		if (nx & 1)
113 		{
114 			if (hx & 0x80000000)
115 			{
116 				ty[0] = -ty[0];
117 				ty[1] = -ty[1];
118 				hx &= ~0x80000000;
119 			}
120 			if (hx < 0x3fc40000)
121 			{
122 				z = ty[0] * ty[0];
123 				t = z * (q1 + z * (q2 + z * (q3 + z * q4)));
124 				a = one + t;
125 			}
126 			else
127 			{
128 				j = (hx + 0x4000) & 0x7fff8000;
129 				HI(&t) = j;
130 				LO(&t) = 0;
131 				ty[0] = (ty[0] - t) + ty[1];
132 				z = ty[0] * ty[0];
133 				t = z * (qq1 + z * qq2);
134 				w = ty[0] * (one + z * (pp1 + z * pp2));
135 				j = ((j - 0x3fc40000) >> 13) & ~3;
136 				a = __vlibm_TBL_sincos_hi[j+1];
137 				t = __vlibm_TBL_sincos_lo[j+1] - (__vlibm_TBL_sincos_hi[j] * w - a * t);
138 				a += t;
139 			}
140 		}
141 		else
142 		{
143 			if (hx & 0x80000000)
144 			{
145 				ty[0] = -ty[0];
146 				ty[1] = -ty[1];
147 				hx &= ~0x80000000;
148 				xsb ^= 0x80000000;
149 			}
150 			if (hx < 0x3fc90000)
151 			{
152 				z = ty[0] * ty[0];
153 				t = z * (p1 + z * (p2 + z * (p3 + z * p4)));
154 				a = ty[0] + (ty[1] + ty[0] * t);
155 			}
156 			else
157 			{
158 				j = (hx + 0x4000) & 0x7fff8000;
159 				HI(&t) = j;
160 				LO(&t) = 0;
161 				ty[0] = (ty[0] - t) + ty[1];
162 				z = ty[0] * ty[0];
163 				t = z * (qq1 + z * qq2);
164 				w = ty[0] * (one + z * (pp1 + z * pp2));
165 				j = ((j - 0x3fc40000) >> 13) & ~3;
166 				a = __vlibm_TBL_sincos_hi[j];
167 				t = (__vlibm_TBL_sincos_hi[j+1] * w + a * t) + __vlibm_TBL_sincos_lo[j];
168 				a += t;
169 			}
170 		}
171 		if (xsb) a = -a;
172 		*y = a;
173 	}
174 }
175