xref: /illumos-gate/usr/src/lib/libmvec/common/__vhypotf.c (revision 2e837a72011f54762249b6612c2a64f171efcd43)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
24  */
25 /*
26  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
27  * Use is subject to license terms.
28  */
29 
30 #include "libm_inlines.h"
31 
32 #ifdef __RESTRICT
33 #define	restrict _Restrict
34 #else
35 #define	restrict
36 #endif
37 
38 extern double sqrt(double);
39 
40 /*
41  * Instead of type punning, use union type.
42  */
43 typedef union h32 {
44 	float f;
45 	unsigned u;
46 } h32;
47 
48 void
49 __vhypotf(int n, float *restrict x, int stridex, float *restrict y,
50     int stridey, float *restrict z, int stridez)
51 {
52 	float		x0, x1, x2, y0, y1, y2, z0, z1, z2, *pz0, *pz1, *pz2;
53 	h32		hx0, hx1, hx2, hy0, hy1, hy2;
54 	int		i, j0, j1, j2;
55 
56 	do {
57 LOOP0:
58 		hx0.f = *x;
59 		hy0.f = *y;
60 		hx0.u &= ~0x80000000;
61 		hy0.u &= ~0x80000000;
62 		x0 = hx0.f;
63 		y0 = hy0.f;
64 		if (hy0.u > hx0.u) {
65 			i = hy0.u - hx0.u;
66 			j0 = hy0.u & 0x7f800000;
67 			if (hx0.u == 0)
68 				i = 0x7f800000;
69 		} else {
70 			i = hx0.u - hy0.u;
71 			j0 = hx0.u & 0x7f800000;
72 			if (hy0.u == 0)
73 				i = 0x7f800000;
74 			else if (hx0.u == 0)
75 				i = 0x7f800000;
76 		}
77 		if (i >= 0x0c800000 || j0 >= 0x7f800000) {
78 			z0 = x0 + y0;
79 			if (hx0.u == 0x7f800000)
80 				z0 = x0;
81 			else if (hy0.u  == 0x7f800000)
82 				z0 = y0;
83 			else if (hx0.u > 0x7f800000 || hy0.u > 0x7f800000)
84 				z0 = *x + *y;
85 			*z = z0;
86 			x += stridex;
87 			y += stridey;
88 			z += stridez;
89 			i = 0;
90 			if (--n <= 0)
91 				break;
92 			goto LOOP0;
93 		}
94 		pz0 = z;
95 		x += stridex;
96 		y += stridey;
97 		z += stridez;
98 		i = 1;
99 		if (--n <= 0)
100 			break;
101 
102 LOOP1:
103 		hx1.f = *x;
104 		hy1.f = *y;
105 		hx1.u &= ~0x80000000;
106 		hy1.u &= ~0x80000000;
107 		x1 = hx1.f;
108 		y1 = hy1.f;
109 		if (hy1.u > hx1.u) {
110 			i = hy1.u - hx1.u;
111 			j1 = hy1.u & 0x7f800000;
112 			if (hx1.u == 0)
113 				i = 0x7f800000;
114 		} else {
115 			i = hx1.u - hy1.u;
116 			j1 = hx1.u & 0x7f800000;
117 			if (hy1.u == 0)
118 				i = 0x7f800000;
119 			else if (hx1.u == 0)
120 				i = 0x7f800000;
121 		}
122 		if (i >= 0x0c800000 || j1 >= 0x7f800000) {
123 			z1 = x1 + y1;
124 			if (hx1.u == 0x7f800000)
125 				z1 = x1;
126 			else if (hy1.u == 0x7f800000)
127 				z1 = y1;
128 			else if (hx1.u > 0x7f800000 || hy1.u > 0x7f800000)
129 				z1 = *x + *y;
130 			*z = z1;
131 			x += stridex;
132 			y += stridey;
133 			z += stridez;
134 			i = 1;
135 			if (--n <= 0)
136 				break;
137 			goto LOOP1;
138 		}
139 		pz1 = z;
140 		x += stridex;
141 		y += stridey;
142 		z += stridez;
143 		i = 2;
144 		if (--n <= 0)
145 			break;
146 
147 LOOP2:
148 		hx2.f = *x;
149 		hy2.f = *y;
150 		hx2.u &= ~0x80000000;
151 		hy2.u &= ~0x80000000;
152 		x2 = hx2.f;
153 		y2 = hy2.f;
154 		if (hy2.u > hx2.u) {
155 			i = hy2.u - hx2.u;
156 			j2 = hy2.u & 0x7f800000;
157 			if (hx2.u == 0)
158 				i = 0x7f800000;
159 		} else {
160 			i = hx2.u - hy2.u;
161 			j2 = hx2.u & 0x7f800000;
162 			if (hy2.u == 0)
163 				i = 0x7f800000;
164 			else if (hx2.u == 0)
165 				i = 0x7f800000;
166 		}
167 		if (i >= 0x0c800000 || j2 >= 0x7f800000) {
168 			z2 = x2 + y2;
169 			if (hx2.u == 0x7f800000)
170 				z2 = x2;
171 			else if (hy2.u == 0x7f800000)
172 				z2 = y2;
173 			else if (hx2.u > 0x7f800000 || hy2.u > 0x7f800000)
174 				z2 = *x + *y;
175 			*z = z2;
176 			x += stridex;
177 			y += stridey;
178 			z += stridez;
179 			i = 2;
180 			if (--n <= 0)
181 				break;
182 			goto LOOP2;
183 		}
184 		pz2 = z;
185 
186 		z0 = sqrt(x0 * (double)x0 + y0 * (double)y0);
187 		z1 = sqrt(x1 * (double)x1 + y1 * (double)y1);
188 		z2 = sqrt(x2 * (double)x2 + y2 * (double)y2);
189 		*pz0 = z0;
190 		*pz1 = z1;
191 		*pz2 = z2;
192 
193 		x += stridex;
194 		y += stridey;
195 		z += stridez;
196 		i = 0;
197 	} while (--n > 0);
198 
199 	if (i > 0) {
200 		if (i > 1) {
201 			z1 = sqrt(x1 * (double)x1 + y1 * (double)y1);
202 			*pz1 = z1;
203 		}
204 		z0 = sqrt(x0 * (double)x0 + y0 * (double)y0);
205 		*pz0 = z0;
206 	}
207 }
208