xref: /titanic_44/usr/src/lib/libmvec/common/__vhypotf.c (revision a05fd0c9b9aa46cf66ddea7617e56facdf1f4aaf)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
24  */
25 /*
26  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
27  * Use is subject to license terms.
28  */
29 
30 #include "libm_synonyms.h"
31 #include "libm_inlines.h"
32 
33 #ifdef __RESTRICT
34 #define restrict _Restrict
35 #else
36 #define restrict
37 #endif
38 
39 #define sqrt __sqrt
40 
41 extern double sqrt(double);
42 
43 void
44 __vhypotf(int n, float * restrict x, int stridex, float * restrict y,
45 	int stridey, float * restrict z, int stridez)
46 {
47 	float		x0, x1, x2, y0, y1, y2, z0, z1, z2, *pz0, *pz1, *pz2;
48 	unsigned	hx0, hx1, hx2, hy0, hy1, hy2;
49 	int			i, j0, j1, j2;
50 
51 	do
52 	{
53 LOOP0:
54 		hx0 = *(unsigned*)x & ~0x80000000;
55 		hy0 = *(unsigned*)y & ~0x80000000;
56 		*(unsigned*)&x0 = hx0;
57 		*(unsigned*)&y0 = hy0;
58 		if (hy0 > hx0)
59 		{
60 			i = hy0 - hx0;
61 			j0 = hy0 & 0x7f800000;
62 			if (hx0 == 0)
63 				i = 0x7f800000;
64 		}
65 		else
66 		{
67 			i = hx0 - hy0;
68 			j0 = hx0 & 0x7f800000;
69 			if (hy0 == 0)
70 				i = 0x7f800000;
71 			else if (hx0 == 0)
72 				i = 0x7f800000;
73 		}
74 		if (i >= 0x0c800000 || j0 >= 0x7f800000)
75 		{
76 			z0 = x0 + y0;
77 			if (hx0 == 0x7f800000)
78 				z0 = x0;
79 			else if (hy0  == 0x7f800000)
80 				z0 = y0;
81 			else if (hx0 > 0x7f800000 || hy0 > 0x7f800000)
82 				z0 = *x + *y;
83 			*z = z0;
84 			x += stridex;
85 			y += stridey;
86 			z += stridez;
87 			i = 0;
88 			if (--n <= 0)
89 				break;
90 			goto LOOP0;
91 		}
92 		pz0 = z;
93 		x += stridex;
94 		y += stridey;
95 		z += stridez;
96 		i = 1;
97 		if (--n <= 0)
98 			break;
99 
100 LOOP1:
101 		hx1 = *(unsigned*)x & ~0x80000000;
102 		hy1 = *(unsigned*)y & ~0x80000000;
103 		*(unsigned*)&x1 = hx1;
104 		*(unsigned*)&y1 = hy1;
105 		if (hy1 > hx1)
106 		{
107 			i = hy1 - hx1;
108 			j1 = hy1 & 0x7f800000;
109 			if (hx1 == 0)
110 				i = 0x7f800000;
111 		}
112 		else
113 		{
114 			i = hx1 - hy1;
115 			j1 = hx1 & 0x7f800000;
116 			if (hy1 == 0)
117 				i = 0x7f800000;
118 			else if (hx1 == 0)
119 				i = 0x7f800000;
120 		}
121 		if (i >= 0x0c800000 || j1 >= 0x7f800000)
122 		{
123 			z1 = x1 + y1;
124 			if (hx1 == 0x7f800000)
125 				z1 = x1;
126 			else if (hy1 == 0x7f800000)
127 				z1 = y1;
128 			else if (hx1 > 0x7f800000 || hy1 > 0x7f800000)
129 				z1 = *x + *y;
130 			*z = z1;
131 			x += stridex;
132 			y += stridey;
133 			z += stridez;
134 			i = 1;
135 			if (--n <= 0)
136 				break;
137 			goto LOOP1;
138 		}
139 		pz1 = z;
140 		x += stridex;
141 		y += stridey;
142 		z += stridez;
143 		i = 2;
144 		if (--n <= 0)
145 			break;
146 
147 LOOP2:
148 		hx2 = *(unsigned*)x & ~0x80000000;
149 		hy2 = *(unsigned*)y & ~0x80000000;
150 		*(unsigned*)&x2 = hx2;
151 		*(unsigned*)&y2 = hy2;
152 		if (hy2 > hx2)
153 		{
154 			i = hy2 - hx2;
155 			j2 = hy2 & 0x7f800000;
156 			if (hx2 == 0)
157 				i = 0x7f800000;
158 		}
159 		else
160 		{
161 			i = hx2 - hy2;
162 			j2 = hx2 & 0x7f800000;
163 			if (hy2 == 0)
164 				i = 0x7f800000;
165 			else if (hx2 == 0)
166 				i = 0x7f800000;
167 		}
168 		if (i >= 0x0c800000 || j2 >= 0x7f800000)
169 		{
170 			z2 = x2 + y2;
171 			if (hx2 == 0x7f800000)
172 				z2 = x2;
173 			else if (hy2 == 0x7f800000)
174 				z2 = y2;
175 			else if (hx2 > 0x7f800000 || hy2 > 0x7f800000)
176 				z2 = *x + *y;
177 			*z = z2;
178 			x += stridex;
179 			y += stridey;
180 			z += stridez;
181 			i = 2;
182 			if (--n <= 0)
183 				break;
184 			goto LOOP2;
185 		}
186 		pz2 = z;
187 
188 		z0 = sqrt(x0 * (double)x0 + y0 * (double)y0);
189 		z1 = sqrt(x1 * (double)x1 + y1 * (double)y1);
190 		z2 = sqrt(x2 * (double)x2 + y2 * (double)y2);
191 		*pz0 = z0;
192 		*pz1 = z1;
193 		*pz2 = z2;
194 
195 		x += stridex;
196 		y += stridey;
197 		z += stridez;
198 		i = 0;
199 	} while (--n > 0);
200 
201 	if (i > 0)
202 	{
203 		if (i > 1)
204 		{
205 			z1 = sqrt(x1 * (double)x1 + y1 * (double)y1);
206 			*pz1 = z1;
207 		}
208 		z0 = sqrt(x0 * (double)x0 + y0 * (double)y0);
209 		*pz0 = z0;
210 	}
211 }
212