xref: /illumos-gate/usr/src/lib/libmvec/common/__vhypotf.c (revision 97a9db610324e7db4393415018e0e737485a94cd)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
24  */
25 /*
26  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
27  * Use is subject to license terms.
28  */
29 
30 #include "libm_inlines.h"
31 
32 #ifdef __RESTRICT
33 #define restrict _Restrict
34 #else
35 #define restrict
36 #endif
37 
38 extern double sqrt(double);
39 
40 void
41 __vhypotf(int n, float * restrict x, int stridex, float * restrict y,
42 	int stridey, float * restrict z, int stridez)
43 {
44 	float		x0, x1, x2, y0, y1, y2, z0, z1, z2, *pz0, *pz1, *pz2;
45 	unsigned	hx0, hx1, hx2, hy0, hy1, hy2;
46 	int			i, j0, j1, j2;
47 
48 	do
49 	{
50 LOOP0:
51 		hx0 = *(unsigned*)x & ~0x80000000;
52 		hy0 = *(unsigned*)y & ~0x80000000;
53 		*(unsigned*)&x0 = hx0;
54 		*(unsigned*)&y0 = hy0;
55 		if (hy0 > hx0)
56 		{
57 			i = hy0 - hx0;
58 			j0 = hy0 & 0x7f800000;
59 			if (hx0 == 0)
60 				i = 0x7f800000;
61 		}
62 		else
63 		{
64 			i = hx0 - hy0;
65 			j0 = hx0 & 0x7f800000;
66 			if (hy0 == 0)
67 				i = 0x7f800000;
68 			else if (hx0 == 0)
69 				i = 0x7f800000;
70 		}
71 		if (i >= 0x0c800000 || j0 >= 0x7f800000)
72 		{
73 			z0 = x0 + y0;
74 			if (hx0 == 0x7f800000)
75 				z0 = x0;
76 			else if (hy0  == 0x7f800000)
77 				z0 = y0;
78 			else if (hx0 > 0x7f800000 || hy0 > 0x7f800000)
79 				z0 = *x + *y;
80 			*z = z0;
81 			x += stridex;
82 			y += stridey;
83 			z += stridez;
84 			i = 0;
85 			if (--n <= 0)
86 				break;
87 			goto LOOP0;
88 		}
89 		pz0 = z;
90 		x += stridex;
91 		y += stridey;
92 		z += stridez;
93 		i = 1;
94 		if (--n <= 0)
95 			break;
96 
97 LOOP1:
98 		hx1 = *(unsigned*)x & ~0x80000000;
99 		hy1 = *(unsigned*)y & ~0x80000000;
100 		*(unsigned*)&x1 = hx1;
101 		*(unsigned*)&y1 = hy1;
102 		if (hy1 > hx1)
103 		{
104 			i = hy1 - hx1;
105 			j1 = hy1 & 0x7f800000;
106 			if (hx1 == 0)
107 				i = 0x7f800000;
108 		}
109 		else
110 		{
111 			i = hx1 - hy1;
112 			j1 = hx1 & 0x7f800000;
113 			if (hy1 == 0)
114 				i = 0x7f800000;
115 			else if (hx1 == 0)
116 				i = 0x7f800000;
117 		}
118 		if (i >= 0x0c800000 || j1 >= 0x7f800000)
119 		{
120 			z1 = x1 + y1;
121 			if (hx1 == 0x7f800000)
122 				z1 = x1;
123 			else if (hy1 == 0x7f800000)
124 				z1 = y1;
125 			else if (hx1 > 0x7f800000 || hy1 > 0x7f800000)
126 				z1 = *x + *y;
127 			*z = z1;
128 			x += stridex;
129 			y += stridey;
130 			z += stridez;
131 			i = 1;
132 			if (--n <= 0)
133 				break;
134 			goto LOOP1;
135 		}
136 		pz1 = z;
137 		x += stridex;
138 		y += stridey;
139 		z += stridez;
140 		i = 2;
141 		if (--n <= 0)
142 			break;
143 
144 LOOP2:
145 		hx2 = *(unsigned*)x & ~0x80000000;
146 		hy2 = *(unsigned*)y & ~0x80000000;
147 		*(unsigned*)&x2 = hx2;
148 		*(unsigned*)&y2 = hy2;
149 		if (hy2 > hx2)
150 		{
151 			i = hy2 - hx2;
152 			j2 = hy2 & 0x7f800000;
153 			if (hx2 == 0)
154 				i = 0x7f800000;
155 		}
156 		else
157 		{
158 			i = hx2 - hy2;
159 			j2 = hx2 & 0x7f800000;
160 			if (hy2 == 0)
161 				i = 0x7f800000;
162 			else if (hx2 == 0)
163 				i = 0x7f800000;
164 		}
165 		if (i >= 0x0c800000 || j2 >= 0x7f800000)
166 		{
167 			z2 = x2 + y2;
168 			if (hx2 == 0x7f800000)
169 				z2 = x2;
170 			else if (hy2 == 0x7f800000)
171 				z2 = y2;
172 			else if (hx2 > 0x7f800000 || hy2 > 0x7f800000)
173 				z2 = *x + *y;
174 			*z = z2;
175 			x += stridex;
176 			y += stridey;
177 			z += stridez;
178 			i = 2;
179 			if (--n <= 0)
180 				break;
181 			goto LOOP2;
182 		}
183 		pz2 = z;
184 
185 		z0 = sqrt(x0 * (double)x0 + y0 * (double)y0);
186 		z1 = sqrt(x1 * (double)x1 + y1 * (double)y1);
187 		z2 = sqrt(x2 * (double)x2 + y2 * (double)y2);
188 		*pz0 = z0;
189 		*pz1 = z1;
190 		*pz2 = z2;
191 
192 		x += stridex;
193 		y += stridey;
194 		z += stridez;
195 		i = 0;
196 	} while (--n > 0);
197 
198 	if (i > 0)
199 	{
200 		if (i > 1)
201 		{
202 			z1 = sqrt(x1 * (double)x1 + y1 * (double)y1);
203 			*pz1 = z1;
204 		}
205 		z0 = sqrt(x0 * (double)x0 + y0 * (double)y0);
206 		*pz0 = z0;
207 	}
208 }
209