xref: /illumos-gate/usr/src/lib/libmvec/common/__vhypot.c (revision 9adfa60d484ce2435f5af77cc99dcd4e692b6660)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
24  */
25 /*
26  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
27  * Use is subject to license terms.
28  */
29 
30 #include <sys/isa_defs.h>
31 #include "libm_inlines.h"
32 
33 #ifdef _LITTLE_ENDIAN
34 #define HI(x)	*(1+(int*)x)
35 #define LO(x)	*(unsigned*)x
36 #else
37 #define HI(x)	*(int*)x
38 #define LO(x)	*(1+(unsigned*)x)
39 #endif
40 
41 #ifdef __RESTRICT
42 #define restrict _Restrict
43 #else
44 #define restrict
45 #endif
46 
47 /* double hypot(double x, double y)
48  *
49  * Method :
50  *	1. Special cases:
51  *		x or y is +Inf or -Inf				=> +Inf
52  *		x or y is NaN					=> QNaN
53  *	2. Computes hypot(x,y):
54  *		hypot(x,y) = m * sqrt(xnm * xnm + ynm * ynm)
55  *	Where:
56  *		m = max(|x|,|y|)
57  *		xnm = x * (1/m)
58  *		ynm = y * (1/m)
59  *
60  *	Compute xnm * xnm + ynm * ynm by simulating
61  *	muti-precision arithmetic.
62  *
63  * Accuracy:
64  *	Maximum error observed: less than 0.872 ulp after 16.777.216.000
65  *	results.
66  */
67 
68 extern double sqrt(double);
69 extern double fabs(double);
70 
71 static const unsigned long long LCONST[] = {
72 0x41b0000000000000ULL,	/* D2ON28 = 2 ** 28		*/
73 0x0010000000000000ULL,	/* D2ONM1022 = 2 ** -1022	*/
74 0x7fd0000000000000ULL	/* D2ONP1022 = 2 **  1022	*/
75 };
76 
77 static void
78 __vhypot_n(int n, double * restrict px, int stridex, double * restrict py,
79 	int stridey, double * restrict pz, int stridez);
80 
81 #pragma no_inline(__vhypot_n)
82 
83 #define RETURN(ret)						\
84 {								\
85 	*pz = (ret);						\
86 	py += stridey;						\
87 	pz += stridez;						\
88 	if (n_n == 0)						\
89 	{							\
90 		hx0 = HI(px);					\
91 		hy0 = HI(py);					\
92 		spx = px; spy = py; spz = pz;			\
93 		continue;					\
94 	}							\
95 	n--;							\
96 	break;							\
97 }
98 
99 void
100 __vhypot(int n, double * restrict px, int stridex, double * restrict py,
101 	int stridey, double * restrict pz, int stridez)
102 {
103 	int		hx0, hx1, hy0, j0, diff;
104 	double		x_hi, x_lo, y_hi, y_lo;
105 	double		scl = 0;
106 	double		x, y, res;
107 	double		*spx, *spy, *spz;
108 	int		n_n;
109 	double		D2ON28 = ((double*)LCONST)[0];		/* 2 ** 28	*/
110 	double		D2ONM1022 = ((double*)LCONST)[1];	/* 2 **-1022	*/
111 	double		D2ONP1022 = ((double*)LCONST)[2];	/* 2 ** 1022	*/
112 
113 	while (n > 1)
114 	{
115 		n_n = 0;
116 		spx = px;
117 		spy = py;
118 		spz = pz;
119 		hx0 = HI(px);
120 		hy0 = HI(py);
121 		for (; n > 1 ; n--)
122 		{
123 			px += stridex;
124 			hx0 &= 0x7fffffff;
125 			hy0 &= 0x7fffffff;
126 
127 			if (hx0 >= 0x7fe00000)	/* |X| >= 2**1023 or Inf or NaN */
128 			{
129 				diff = hy0 - hx0;
130 				j0 = diff >> 31;
131 				j0 = hy0 - (diff & j0);
132 				j0 &= 0x7ff00000;
133 				x = *(px - stridex);
134 				y = *py;
135 				x = fabs(x);
136 				y = fabs(y);
137 				if (j0 >= 0x7ff00000)	/* |X| or |Y| = Inf or NaN */
138 				{
139 					int lx = LO((px - stridex));
140 					int ly = LO(py);
141 					if (hx0 == 0x7ff00000 && lx == 0) res = x == y ? y : x;
142 					else if (hy0 == 0x7ff00000 && ly == 0) res = x == y ? x : y;
143 					else res = x + y;
144 					RETURN (res)
145 				}
146 				else
147 				{
148 					j0 = diff >> 31;
149 					if (((diff ^ j0) - j0) < 0x03600000)	/* max(|X|,|Y|)/min(|X|,|Y|) < 2**54 */
150 					{
151 						x *= D2ONM1022;
152 						y *= D2ONM1022;
153 
154 						x_hi = (x + D2ON28) - D2ON28;
155 						x_lo = x - x_hi;
156 						y_hi = (y + D2ON28) - D2ON28;
157 						y_lo = y - y_hi;
158 						res = (x_hi * x_hi + y_hi * y_hi);
159 						res += ((x + x_hi) * x_lo + (y + y_hi) * y_lo);
160 
161 						res = sqrt (res);
162 
163 						res = D2ONP1022 * res;
164 						RETURN (res)
165 					}
166 					else RETURN (x + y)
167 				}
168 			}
169 			if (hy0 >= 0x7fe00000)	/* |Y| >= 2**1023 or Inf or NaN */
170 			{
171 				diff = hy0 - hx0;
172 				j0 = diff >> 31;
173 				j0 = hy0 - (diff & j0);
174 				j0 &= 0x7ff00000;
175 				x = *(px - stridex);
176 				y = *py;
177 				x = fabs(x);
178 				y = fabs(y);
179 				if (j0 >= 0x7ff00000)	/* |X| or |Y| = Inf or NaN */
180 				{
181 					int lx = LO((px - stridex));
182 					int ly = LO(py);
183 					if (hx0 == 0x7ff00000 && lx == 0) res = x == y ? y : x;
184 					else if (hy0 == 0x7ff00000 && ly == 0) res = x == y ? x : y;
185 					else res = x + y;
186 					RETURN (res)
187 				}
188 				else
189 				{
190 					j0 = diff >> 31;
191 					if (((diff ^ j0) - j0) < 0x03600000)	/* max(|X|,|Y|)/min(|X|,|Y|) < 2**54 */
192 					{
193 						x *= D2ONM1022;
194 						y *= D2ONM1022;
195 
196 						x_hi = (x + D2ON28) - D2ON28;
197 						x_lo = x - x_hi;
198 						y_hi = (y + D2ON28) - D2ON28;
199 						y_lo = y - y_hi;
200 						res = (x_hi * x_hi + y_hi * y_hi);
201 						res += ((x + x_hi) * x_lo + (y + y_hi) * y_lo);
202 
203 						res = sqrt (res);
204 
205 						res = D2ONP1022 * res;
206 						RETURN (res)
207 					}
208 					else RETURN (x + y)
209 				}
210 			}
211 
212 			hx1 = HI(px);
213 
214 			if (hx0 < 0x00100000 && hy0 < 0x00100000)	/* X and Y are subnormal */
215 			{
216 				x = *(px - stridex);
217 				y = *py;
218 
219 				x *= D2ONP1022;
220 				y *= D2ONP1022;
221 
222 				x_hi = (x + D2ON28) - D2ON28;
223 				x_lo = x - x_hi;
224 				y_hi = (y + D2ON28) - D2ON28;
225 				y_lo = y - y_hi;
226 				res = (x_hi * x_hi + y_hi * y_hi);
227 				res += ((x + x_hi) * x_lo + (y + y_hi) * y_lo);
228 
229 				res = sqrt(res);
230 
231 				res = D2ONM1022 * res;
232 				RETURN (res)
233 			}
234 
235 			hx0 = hx1;
236 			py += stridey;
237 			pz += stridez;
238 			n_n++;
239 			hy0 = HI(py);
240 		}
241 		if (n_n > 0)
242 			__vhypot_n (n_n, spx, stridex, spy, stridey, spz, stridez);
243 	}
244 
245 	if (n > 0)
246 	{
247 		x = *px;
248 		y = *py;
249 		hx0 = HI(px);
250 		hy0 = HI(py);
251 
252 		hx0 &= 0x7fffffff;
253 		hy0 &= 0x7fffffff;
254 
255 		diff = hy0 - hx0;
256 		j0 = diff >> 31;
257 		j0 = hy0 - (diff & j0);
258 		j0 &= 0x7ff00000;
259 
260 		if (j0 >= 0x7fe00000)	/* max(|X|,|Y|) >= 2**1023 or X or Y = Inf or NaN */
261 		{
262 			x = fabs(x);
263 			y = fabs(y);
264 			if (j0 >= 0x7ff00000)	/* |X| or |Y| = Inf or NaN */
265 			{
266 				int lx = LO(px);
267 				int ly = LO(py);
268 				if (hx0 == 0x7ff00000 && lx == 0) res = x == y ? y : x;
269 				else if (hy0 == 0x7ff00000 && ly == 0) res = x == y ? x : y;
270 				else res = x + y;
271 				*pz = res;
272 				return;
273 			}
274 			else
275 			{
276 				j0 = diff >> 31;
277 				if (((diff ^ j0) - j0) < 0x03600000)	/* max(|X|,|Y|)/min(|X|,|Y|) < 2**54 */
278 				{
279 					x *= D2ONM1022;
280 					y *= D2ONM1022;
281 
282 					x_hi = (x + D2ON28) - D2ON28;
283 					x_lo = x - x_hi;
284 					y_hi = (y + D2ON28) - D2ON28;
285 					y_lo = y - y_hi;
286 					res = (x_hi * x_hi + y_hi * y_hi);
287 					res += ((x + x_hi) * x_lo + (y + y_hi) * y_lo);
288 
289 					res = sqrt (res);
290 
291 					res = D2ONP1022 * res;
292 					*pz = res;
293 					return;
294 				}
295 				else
296 				{
297 					*pz = x + y;
298 					return;
299 				}
300 			}
301 		}
302 
303 		if (j0 < 0x00100000)	/* X and Y are subnormal */
304 		{
305 			x *= D2ONP1022;
306 			y *= D2ONP1022;
307 
308 			x_hi = (x + D2ON28) - D2ON28;
309 			x_lo = x - x_hi;
310 			y_hi = (y + D2ON28) - D2ON28;
311 			y_lo = y - y_hi;
312 			res = (x_hi * x_hi + y_hi * y_hi);
313 			res += ((x + x_hi) * x_lo + (y + y_hi) * y_lo);
314 
315 			res = sqrt(res);
316 
317 			res = D2ONM1022 * res;
318 			*pz = res;
319 			return;
320 		}
321 
322 		HI(&scl) = (0x7fe00000 - j0);
323 
324 		x *= scl;
325 		y *= scl;
326 
327 		x_hi = (x + D2ON28) - D2ON28;
328 		y_hi = (y + D2ON28) - D2ON28;
329 		x_lo = x - x_hi;
330 		y_lo = y - y_hi;
331 
332 		res = (x_hi * x_hi + y_hi * y_hi);
333 		res += ((x + x_hi) * x_lo + (y + y_hi) * y_lo);
334 
335 		res = sqrt(res);
336 
337 		HI(&scl) = j0;
338 
339 		res = scl * res;
340 		*pz = res;
341 	}
342 }
343 
344 static void
345 __vhypot_n(int n, double * restrict px, int stridex, double * restrict py,
346 	int stridey, double * restrict pz, int stridez)
347 {
348 	int		hx0, hy0, j0, diff0;
349 	double		x_hi0, x_lo0, y_hi0, y_lo0, scl0 = 0;
350 	double		x0, y0, res0;
351 	double		D2ON28 = ((double*)LCONST)[0];		/* 2 ** 28	*/
352 
353 	for(; n > 0 ; n--)
354 	{
355 		x0 = *px;
356 		y0 = *py;
357 		hx0 = HI(px);
358 		hy0 = HI(py);
359 
360 		hx0 &= 0x7fffffff;
361 		hy0 &= 0x7fffffff;
362 
363 		diff0 = hy0 - hx0;
364 		j0 = diff0 >> 31;
365 		j0 = hy0 - (diff0 & j0);
366 		j0 &= 0x7ff00000;
367 
368 		px += stridex;
369 		py += stridey;
370 
371 		HI(&scl0) = (0x7fe00000 - j0);
372 
373 		x0 *= scl0;
374 		y0 *= scl0;
375 
376 		x_hi0 = (x0 + D2ON28) - D2ON28;
377 		y_hi0 = (y0 + D2ON28) - D2ON28;
378 		x_lo0 = x0 - x_hi0;
379 		y_lo0 = y0 - y_hi0;
380 
381 		res0 = (x_hi0 * x_hi0 + y_hi0 * y_hi0);
382 		res0 += ((x0 + x_hi0) * x_lo0 + (y0 + y_hi0) * y_lo0);
383 
384 		res0 = sqrt(res0);
385 
386 		HI(&scl0) = j0;
387 
388 		res0 = scl0 * res0;
389 		*pz = res0;
390 
391 		pz += stridez;
392 	}
393 }
394