xref: /titanic_51/usr/src/lib/libmvec/common/__vhypotf.c (revision ddc0e0b53c661f6e439e3b7072b3ef353eadb4af)
1*25c28e83SPiotr Jasiukajtis /*
2*25c28e83SPiotr Jasiukajtis  * CDDL HEADER START
3*25c28e83SPiotr Jasiukajtis  *
4*25c28e83SPiotr Jasiukajtis  * The contents of this file are subject to the terms of the
5*25c28e83SPiotr Jasiukajtis  * Common Development and Distribution License (the "License").
6*25c28e83SPiotr Jasiukajtis  * You may not use this file except in compliance with the License.
7*25c28e83SPiotr Jasiukajtis  *
8*25c28e83SPiotr Jasiukajtis  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9*25c28e83SPiotr Jasiukajtis  * or http://www.opensolaris.org/os/licensing.
10*25c28e83SPiotr Jasiukajtis  * See the License for the specific language governing permissions
11*25c28e83SPiotr Jasiukajtis  * and limitations under the License.
12*25c28e83SPiotr Jasiukajtis  *
13*25c28e83SPiotr Jasiukajtis  * When distributing Covered Code, include this CDDL HEADER in each
14*25c28e83SPiotr Jasiukajtis  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15*25c28e83SPiotr Jasiukajtis  * If applicable, add the following below this CDDL HEADER, with the
16*25c28e83SPiotr Jasiukajtis  * fields enclosed by brackets "[]" replaced with your own identifying
17*25c28e83SPiotr Jasiukajtis  * information: Portions Copyright [yyyy] [name of copyright owner]
18*25c28e83SPiotr Jasiukajtis  *
19*25c28e83SPiotr Jasiukajtis  * CDDL HEADER END
20*25c28e83SPiotr Jasiukajtis  */
21*25c28e83SPiotr Jasiukajtis 
22*25c28e83SPiotr Jasiukajtis /*
23*25c28e83SPiotr Jasiukajtis  * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
24*25c28e83SPiotr Jasiukajtis  */
25*25c28e83SPiotr Jasiukajtis /*
26*25c28e83SPiotr Jasiukajtis  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
27*25c28e83SPiotr Jasiukajtis  * Use is subject to license terms.
28*25c28e83SPiotr Jasiukajtis  */
29*25c28e83SPiotr Jasiukajtis 
30*25c28e83SPiotr Jasiukajtis #include "libm_inlines.h"
31*25c28e83SPiotr Jasiukajtis 
32*25c28e83SPiotr Jasiukajtis #ifdef __RESTRICT
33*25c28e83SPiotr Jasiukajtis #define restrict _Restrict
34*25c28e83SPiotr Jasiukajtis #else
35*25c28e83SPiotr Jasiukajtis #define restrict
36*25c28e83SPiotr Jasiukajtis #endif
37*25c28e83SPiotr Jasiukajtis 
38*25c28e83SPiotr Jasiukajtis extern double sqrt(double);
39*25c28e83SPiotr Jasiukajtis 
40*25c28e83SPiotr Jasiukajtis void
41*25c28e83SPiotr Jasiukajtis __vhypotf(int n, float * restrict x, int stridex, float * restrict y,
42*25c28e83SPiotr Jasiukajtis 	int stridey, float * restrict z, int stridez)
43*25c28e83SPiotr Jasiukajtis {
44*25c28e83SPiotr Jasiukajtis 	float		x0, x1, x2, y0, y1, y2, z0, z1, z2, *pz0, *pz1, *pz2;
45*25c28e83SPiotr Jasiukajtis 	unsigned	hx0, hx1, hx2, hy0, hy1, hy2;
46*25c28e83SPiotr Jasiukajtis 	int			i, j0, j1, j2;
47*25c28e83SPiotr Jasiukajtis 
48*25c28e83SPiotr Jasiukajtis 	do
49*25c28e83SPiotr Jasiukajtis 	{
50*25c28e83SPiotr Jasiukajtis LOOP0:
51*25c28e83SPiotr Jasiukajtis 		hx0 = *(unsigned*)x & ~0x80000000;
52*25c28e83SPiotr Jasiukajtis 		hy0 = *(unsigned*)y & ~0x80000000;
53*25c28e83SPiotr Jasiukajtis 		*(unsigned*)&x0 = hx0;
54*25c28e83SPiotr Jasiukajtis 		*(unsigned*)&y0 = hy0;
55*25c28e83SPiotr Jasiukajtis 		if (hy0 > hx0)
56*25c28e83SPiotr Jasiukajtis 		{
57*25c28e83SPiotr Jasiukajtis 			i = hy0 - hx0;
58*25c28e83SPiotr Jasiukajtis 			j0 = hy0 & 0x7f800000;
59*25c28e83SPiotr Jasiukajtis 			if (hx0 == 0)
60*25c28e83SPiotr Jasiukajtis 				i = 0x7f800000;
61*25c28e83SPiotr Jasiukajtis 		}
62*25c28e83SPiotr Jasiukajtis 		else
63*25c28e83SPiotr Jasiukajtis 		{
64*25c28e83SPiotr Jasiukajtis 			i = hx0 - hy0;
65*25c28e83SPiotr Jasiukajtis 			j0 = hx0 & 0x7f800000;
66*25c28e83SPiotr Jasiukajtis 			if (hy0 == 0)
67*25c28e83SPiotr Jasiukajtis 				i = 0x7f800000;
68*25c28e83SPiotr Jasiukajtis 			else if (hx0 == 0)
69*25c28e83SPiotr Jasiukajtis 				i = 0x7f800000;
70*25c28e83SPiotr Jasiukajtis 		}
71*25c28e83SPiotr Jasiukajtis 		if (i >= 0x0c800000 || j0 >= 0x7f800000)
72*25c28e83SPiotr Jasiukajtis 		{
73*25c28e83SPiotr Jasiukajtis 			z0 = x0 + y0;
74*25c28e83SPiotr Jasiukajtis 			if (hx0 == 0x7f800000)
75*25c28e83SPiotr Jasiukajtis 				z0 = x0;
76*25c28e83SPiotr Jasiukajtis 			else if (hy0  == 0x7f800000)
77*25c28e83SPiotr Jasiukajtis 				z0 = y0;
78*25c28e83SPiotr Jasiukajtis 			else if (hx0 > 0x7f800000 || hy0 > 0x7f800000)
79*25c28e83SPiotr Jasiukajtis 				z0 = *x + *y;
80*25c28e83SPiotr Jasiukajtis 			*z = z0;
81*25c28e83SPiotr Jasiukajtis 			x += stridex;
82*25c28e83SPiotr Jasiukajtis 			y += stridey;
83*25c28e83SPiotr Jasiukajtis 			z += stridez;
84*25c28e83SPiotr Jasiukajtis 			i = 0;
85*25c28e83SPiotr Jasiukajtis 			if (--n <= 0)
86*25c28e83SPiotr Jasiukajtis 				break;
87*25c28e83SPiotr Jasiukajtis 			goto LOOP0;
88*25c28e83SPiotr Jasiukajtis 		}
89*25c28e83SPiotr Jasiukajtis 		pz0 = z;
90*25c28e83SPiotr Jasiukajtis 		x += stridex;
91*25c28e83SPiotr Jasiukajtis 		y += stridey;
92*25c28e83SPiotr Jasiukajtis 		z += stridez;
93*25c28e83SPiotr Jasiukajtis 		i = 1;
94*25c28e83SPiotr Jasiukajtis 		if (--n <= 0)
95*25c28e83SPiotr Jasiukajtis 			break;
96*25c28e83SPiotr Jasiukajtis 
97*25c28e83SPiotr Jasiukajtis LOOP1:
98*25c28e83SPiotr Jasiukajtis 		hx1 = *(unsigned*)x & ~0x80000000;
99*25c28e83SPiotr Jasiukajtis 		hy1 = *(unsigned*)y & ~0x80000000;
100*25c28e83SPiotr Jasiukajtis 		*(unsigned*)&x1 = hx1;
101*25c28e83SPiotr Jasiukajtis 		*(unsigned*)&y1 = hy1;
102*25c28e83SPiotr Jasiukajtis 		if (hy1 > hx1)
103*25c28e83SPiotr Jasiukajtis 		{
104*25c28e83SPiotr Jasiukajtis 			i = hy1 - hx1;
105*25c28e83SPiotr Jasiukajtis 			j1 = hy1 & 0x7f800000;
106*25c28e83SPiotr Jasiukajtis 			if (hx1 == 0)
107*25c28e83SPiotr Jasiukajtis 				i = 0x7f800000;
108*25c28e83SPiotr Jasiukajtis 		}
109*25c28e83SPiotr Jasiukajtis 		else
110*25c28e83SPiotr Jasiukajtis 		{
111*25c28e83SPiotr Jasiukajtis 			i = hx1 - hy1;
112*25c28e83SPiotr Jasiukajtis 			j1 = hx1 & 0x7f800000;
113*25c28e83SPiotr Jasiukajtis 			if (hy1 == 0)
114*25c28e83SPiotr Jasiukajtis 				i = 0x7f800000;
115*25c28e83SPiotr Jasiukajtis 			else if (hx1 == 0)
116*25c28e83SPiotr Jasiukajtis 				i = 0x7f800000;
117*25c28e83SPiotr Jasiukajtis 		}
118*25c28e83SPiotr Jasiukajtis 		if (i >= 0x0c800000 || j1 >= 0x7f800000)
119*25c28e83SPiotr Jasiukajtis 		{
120*25c28e83SPiotr Jasiukajtis 			z1 = x1 + y1;
121*25c28e83SPiotr Jasiukajtis 			if (hx1 == 0x7f800000)
122*25c28e83SPiotr Jasiukajtis 				z1 = x1;
123*25c28e83SPiotr Jasiukajtis 			else if (hy1 == 0x7f800000)
124*25c28e83SPiotr Jasiukajtis 				z1 = y1;
125*25c28e83SPiotr Jasiukajtis 			else if (hx1 > 0x7f800000 || hy1 > 0x7f800000)
126*25c28e83SPiotr Jasiukajtis 				z1 = *x + *y;
127*25c28e83SPiotr Jasiukajtis 			*z = z1;
128*25c28e83SPiotr Jasiukajtis 			x += stridex;
129*25c28e83SPiotr Jasiukajtis 			y += stridey;
130*25c28e83SPiotr Jasiukajtis 			z += stridez;
131*25c28e83SPiotr Jasiukajtis 			i = 1;
132*25c28e83SPiotr Jasiukajtis 			if (--n <= 0)
133*25c28e83SPiotr Jasiukajtis 				break;
134*25c28e83SPiotr Jasiukajtis 			goto LOOP1;
135*25c28e83SPiotr Jasiukajtis 		}
136*25c28e83SPiotr Jasiukajtis 		pz1 = z;
137*25c28e83SPiotr Jasiukajtis 		x += stridex;
138*25c28e83SPiotr Jasiukajtis 		y += stridey;
139*25c28e83SPiotr Jasiukajtis 		z += stridez;
140*25c28e83SPiotr Jasiukajtis 		i = 2;
141*25c28e83SPiotr Jasiukajtis 		if (--n <= 0)
142*25c28e83SPiotr Jasiukajtis 			break;
143*25c28e83SPiotr Jasiukajtis 
144*25c28e83SPiotr Jasiukajtis LOOP2:
145*25c28e83SPiotr Jasiukajtis 		hx2 = *(unsigned*)x & ~0x80000000;
146*25c28e83SPiotr Jasiukajtis 		hy2 = *(unsigned*)y & ~0x80000000;
147*25c28e83SPiotr Jasiukajtis 		*(unsigned*)&x2 = hx2;
148*25c28e83SPiotr Jasiukajtis 		*(unsigned*)&y2 = hy2;
149*25c28e83SPiotr Jasiukajtis 		if (hy2 > hx2)
150*25c28e83SPiotr Jasiukajtis 		{
151*25c28e83SPiotr Jasiukajtis 			i = hy2 - hx2;
152*25c28e83SPiotr Jasiukajtis 			j2 = hy2 & 0x7f800000;
153*25c28e83SPiotr Jasiukajtis 			if (hx2 == 0)
154*25c28e83SPiotr Jasiukajtis 				i = 0x7f800000;
155*25c28e83SPiotr Jasiukajtis 		}
156*25c28e83SPiotr Jasiukajtis 		else
157*25c28e83SPiotr Jasiukajtis 		{
158*25c28e83SPiotr Jasiukajtis 			i = hx2 - hy2;
159*25c28e83SPiotr Jasiukajtis 			j2 = hx2 & 0x7f800000;
160*25c28e83SPiotr Jasiukajtis 			if (hy2 == 0)
161*25c28e83SPiotr Jasiukajtis 				i = 0x7f800000;
162*25c28e83SPiotr Jasiukajtis 			else if (hx2 == 0)
163*25c28e83SPiotr Jasiukajtis 				i = 0x7f800000;
164*25c28e83SPiotr Jasiukajtis 		}
165*25c28e83SPiotr Jasiukajtis 		if (i >= 0x0c800000 || j2 >= 0x7f800000)
166*25c28e83SPiotr Jasiukajtis 		{
167*25c28e83SPiotr Jasiukajtis 			z2 = x2 + y2;
168*25c28e83SPiotr Jasiukajtis 			if (hx2 == 0x7f800000)
169*25c28e83SPiotr Jasiukajtis 				z2 = x2;
170*25c28e83SPiotr Jasiukajtis 			else if (hy2 == 0x7f800000)
171*25c28e83SPiotr Jasiukajtis 				z2 = y2;
172*25c28e83SPiotr Jasiukajtis 			else if (hx2 > 0x7f800000 || hy2 > 0x7f800000)
173*25c28e83SPiotr Jasiukajtis 				z2 = *x + *y;
174*25c28e83SPiotr Jasiukajtis 			*z = z2;
175*25c28e83SPiotr Jasiukajtis 			x += stridex;
176*25c28e83SPiotr Jasiukajtis 			y += stridey;
177*25c28e83SPiotr Jasiukajtis 			z += stridez;
178*25c28e83SPiotr Jasiukajtis 			i = 2;
179*25c28e83SPiotr Jasiukajtis 			if (--n <= 0)
180*25c28e83SPiotr Jasiukajtis 				break;
181*25c28e83SPiotr Jasiukajtis 			goto LOOP2;
182*25c28e83SPiotr Jasiukajtis 		}
183*25c28e83SPiotr Jasiukajtis 		pz2 = z;
184*25c28e83SPiotr Jasiukajtis 
185*25c28e83SPiotr Jasiukajtis 		z0 = sqrt(x0 * (double)x0 + y0 * (double)y0);
186*25c28e83SPiotr Jasiukajtis 		z1 = sqrt(x1 * (double)x1 + y1 * (double)y1);
187*25c28e83SPiotr Jasiukajtis 		z2 = sqrt(x2 * (double)x2 + y2 * (double)y2);
188*25c28e83SPiotr Jasiukajtis 		*pz0 = z0;
189*25c28e83SPiotr Jasiukajtis 		*pz1 = z1;
190*25c28e83SPiotr Jasiukajtis 		*pz2 = z2;
191*25c28e83SPiotr Jasiukajtis 
192*25c28e83SPiotr Jasiukajtis 		x += stridex;
193*25c28e83SPiotr Jasiukajtis 		y += stridey;
194*25c28e83SPiotr Jasiukajtis 		z += stridez;
195*25c28e83SPiotr Jasiukajtis 		i = 0;
196*25c28e83SPiotr Jasiukajtis 	} while (--n > 0);
197*25c28e83SPiotr Jasiukajtis 
198*25c28e83SPiotr Jasiukajtis 	if (i > 0)
199*25c28e83SPiotr Jasiukajtis 	{
200*25c28e83SPiotr Jasiukajtis 		if (i > 1)
201*25c28e83SPiotr Jasiukajtis 		{
202*25c28e83SPiotr Jasiukajtis 			z1 = sqrt(x1 * (double)x1 + y1 * (double)y1);
203*25c28e83SPiotr Jasiukajtis 			*pz1 = z1;
204*25c28e83SPiotr Jasiukajtis 		}
205*25c28e83SPiotr Jasiukajtis 		z0 = sqrt(x0 * (double)x0 + y0 * (double)y0);
206*25c28e83SPiotr Jasiukajtis 		*pz0 = z0;
207*25c28e83SPiotr Jasiukajtis 	}
208*25c28e83SPiotr Jasiukajtis }
209