xref: /illumos-gate/usr/src/lib/libmvec/common/__vhypot.c (revision 6aa4fc89ec1cf2cdf7d7c3b9ec059802ac9abe65)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
24  */
25 /*
26  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
27  * Use is subject to license terms.
28  */
29 
30 #include <sys/isa_defs.h>
31 #include "libm_synonyms.h"
32 #include "libm_inlines.h"
33 
34 #ifdef _LITTLE_ENDIAN
35 #define HI(x)	*(1+(int*)x)
36 #define LO(x)	*(unsigned*)x
37 #else
38 #define HI(x)	*(int*)x
39 #define LO(x)	*(1+(unsigned*)x)
40 #endif
41 
42 #ifdef __RESTRICT
43 #define restrict _Restrict
44 #else
45 #define restrict
46 #endif
47 
48 /* double hypot(double x, double y)
49  *
50  * Method :
51  *	1. Special cases:
52  *		x or y is +Inf or -Inf				=> +Inf
53  *		x or y is NaN					=> QNaN
54  *	2. Computes hypot(x,y):
55  *		hypot(x,y) = m * sqrt(xnm * xnm + ynm * ynm)
56  *	Where:
57  *		m = max(|x|,|y|)
58  *		xnm = x * (1/m)
59  *		ynm = y * (1/m)
60  *
61  *	Compute xnm * xnm + ynm * ynm by simulating
62  *	muti-precision arithmetic.
63  *
64  * Accuracy:
65  *	Maximum error observed: less than 0.872 ulp after 16.777.216.000
66  *	results.
67  */
68 
69 #define sqrt __sqrt
70 
71 extern double sqrt(double);
72 extern double fabs(double);
73 
74 static const unsigned long long LCONST[] = {
75 0x41b0000000000000ULL,	/* D2ON28 = 2 ** 28		*/
76 0x0010000000000000ULL,	/* D2ONM1022 = 2 ** -1022	*/
77 0x7fd0000000000000ULL	/* D2ONP1022 = 2 **  1022	*/
78 };
79 
80 static void
81 __vhypot_n(int n, double * restrict px, int stridex, double * restrict py,
82 	int stridey, double * restrict pz, int stridez);
83 
84 #pragma no_inline(__vhypot_n)
85 
86 #define RETURN(ret)						\
87 {								\
88 	*pz = (ret);						\
89 	py += stridey;						\
90 	pz += stridez;						\
91 	if (n_n == 0)						\
92 	{							\
93 		hx0 = HI(px);					\
94 		hy0 = HI(py);					\
95 		spx = px; spy = py; spz = pz;			\
96 		continue;					\
97 	}							\
98 	n--;							\
99 	break;							\
100 }
101 
102 void
103 __vhypot(int n, double * restrict px, int stridex, double * restrict py,
104 	int stridey, double * restrict pz, int stridez)
105 {
106 	int		hx0, hx1, hy0, j0, diff;
107 	double		x_hi, x_lo, y_hi, y_lo;
108 	double		scl = 0;
109 	double		x, y, res;
110 	double		*spx, *spy, *spz;
111 	int		n_n;
112 	double		D2ON28 = ((double*)LCONST)[0];		/* 2 ** 28	*/
113 	double		D2ONM1022 = ((double*)LCONST)[1];	/* 2 **-1022	*/
114 	double		D2ONP1022 = ((double*)LCONST)[2];	/* 2 ** 1022	*/
115 
116 	while (n > 1)
117 	{
118 		n_n = 0;
119 		spx = px;
120 		spy = py;
121 		spz = pz;
122 		hx0 = HI(px);
123 		hy0 = HI(py);
124 		for (; n > 1 ; n--)
125 		{
126 			px += stridex;
127 			hx0 &= 0x7fffffff;
128 			hy0 &= 0x7fffffff;
129 
130 			if (hx0 >= 0x7fe00000)	/* |X| >= 2**1023 or Inf or NaN */
131 			{
132 				diff = hy0 - hx0;
133 				j0 = diff >> 31;
134 				j0 = hy0 - (diff & j0);
135 				j0 &= 0x7ff00000;
136 				x = *(px - stridex);
137 				y = *py;
138 				x = fabs(x);
139 				y = fabs(y);
140 				if (j0 >= 0x7ff00000)	/* |X| or |Y| = Inf or NaN */
141 				{
142 					int lx = LO((px - stridex));
143 					int ly = LO(py);
144 					if (hx0 == 0x7ff00000 && lx == 0) res = x == y ? y : x;
145 					else if (hy0 == 0x7ff00000 && ly == 0) res = x == y ? x : y;
146 					else res = x + y;
147 					RETURN (res)
148 				}
149 				else
150 				{
151 					j0 = diff >> 31;
152 					if (((diff ^ j0) - j0) < 0x03600000)	/* max(|X|,|Y|)/min(|X|,|Y|) < 2**54 */
153 					{
154 						x *= D2ONM1022;
155 						y *= D2ONM1022;
156 
157 						x_hi = (x + D2ON28) - D2ON28;
158 						x_lo = x - x_hi;
159 						y_hi = (y + D2ON28) - D2ON28;
160 						y_lo = y - y_hi;
161 						res = (x_hi * x_hi + y_hi * y_hi);
162 						res += ((x + x_hi) * x_lo + (y + y_hi) * y_lo);
163 
164 						res = sqrt (res);
165 
166 						res = D2ONP1022 * res;
167 						RETURN (res)
168 					}
169 					else RETURN (x + y)
170 				}
171 			}
172 			if (hy0 >= 0x7fe00000)	/* |Y| >= 2**1023 or Inf or NaN */
173 			{
174 				diff = hy0 - hx0;
175 				j0 = diff >> 31;
176 				j0 = hy0 - (diff & j0);
177 				j0 &= 0x7ff00000;
178 				x = *(px - stridex);
179 				y = *py;
180 				x = fabs(x);
181 				y = fabs(y);
182 				if (j0 >= 0x7ff00000)	/* |X| or |Y| = Inf or NaN */
183 				{
184 					int lx = LO((px - stridex));
185 					int ly = LO(py);
186 					if (hx0 == 0x7ff00000 && lx == 0) res = x == y ? y : x;
187 					else if (hy0 == 0x7ff00000 && ly == 0) res = x == y ? x : y;
188 					else res = x + y;
189 					RETURN (res)
190 				}
191 				else
192 				{
193 					j0 = diff >> 31;
194 					if (((diff ^ j0) - j0) < 0x03600000)	/* max(|X|,|Y|)/min(|X|,|Y|) < 2**54 */
195 					{
196 						x *= D2ONM1022;
197 						y *= D2ONM1022;
198 
199 						x_hi = (x + D2ON28) - D2ON28;
200 						x_lo = x - x_hi;
201 						y_hi = (y + D2ON28) - D2ON28;
202 						y_lo = y - y_hi;
203 						res = (x_hi * x_hi + y_hi * y_hi);
204 						res += ((x + x_hi) * x_lo + (y + y_hi) * y_lo);
205 
206 						res = sqrt (res);
207 
208 						res = D2ONP1022 * res;
209 						RETURN (res)
210 					}
211 					else RETURN (x + y)
212 				}
213 			}
214 
215 			hx1 = HI(px);
216 
217 			if (hx0 < 0x00100000 && hy0 < 0x00100000)	/* X and Y are subnormal */
218 			{
219 				x = *(px - stridex);
220 				y = *py;
221 
222 				x *= D2ONP1022;
223 				y *= D2ONP1022;
224 
225 				x_hi = (x + D2ON28) - D2ON28;
226 				x_lo = x - x_hi;
227 				y_hi = (y + D2ON28) - D2ON28;
228 				y_lo = y - y_hi;
229 				res = (x_hi * x_hi + y_hi * y_hi);
230 				res += ((x + x_hi) * x_lo + (y + y_hi) * y_lo);
231 
232 				res = sqrt(res);
233 
234 				res = D2ONM1022 * res;
235 				RETURN (res)
236 			}
237 
238 			hx0 = hx1;
239 			py += stridey;
240 			pz += stridez;
241 			n_n++;
242 			hy0 = HI(py);
243 		}
244 		if (n_n > 0)
245 			__vhypot_n (n_n, spx, stridex, spy, stridey, spz, stridez);
246 	}
247 
248 	if (n > 0)
249 	{
250 		x = *px;
251 		y = *py;
252 		hx0 = HI(px);
253 		hy0 = HI(py);
254 
255 		hx0 &= 0x7fffffff;
256 		hy0 &= 0x7fffffff;
257 
258 		diff = hy0 - hx0;
259 		j0 = diff >> 31;
260 		j0 = hy0 - (diff & j0);
261 		j0 &= 0x7ff00000;
262 
263 		if (j0 >= 0x7fe00000)	/* max(|X|,|Y|) >= 2**1023 or X or Y = Inf or NaN */
264 		{
265 			x = fabs(x);
266 			y = fabs(y);
267 			if (j0 >= 0x7ff00000)	/* |X| or |Y| = Inf or NaN */
268 			{
269 				int lx = LO(px);
270 				int ly = LO(py);
271 				if (hx0 == 0x7ff00000 && lx == 0) res = x == y ? y : x;
272 				else if (hy0 == 0x7ff00000 && ly == 0) res = x == y ? x : y;
273 				else res = x + y;
274 				*pz = res;
275 				return;
276 			}
277 			else
278 			{
279 				j0 = diff >> 31;
280 				if (((diff ^ j0) - j0) < 0x03600000)	/* max(|X|,|Y|)/min(|X|,|Y|) < 2**54 */
281 				{
282 					x *= D2ONM1022;
283 					y *= D2ONM1022;
284 
285 					x_hi = (x + D2ON28) - D2ON28;
286 					x_lo = x - x_hi;
287 					y_hi = (y + D2ON28) - D2ON28;
288 					y_lo = y - y_hi;
289 					res = (x_hi * x_hi + y_hi * y_hi);
290 					res += ((x + x_hi) * x_lo + (y + y_hi) * y_lo);
291 
292 					res = sqrt (res);
293 
294 					res = D2ONP1022 * res;
295 					*pz = res;
296 					return;
297 				}
298 				else
299 				{
300 					*pz = x + y;
301 					return;
302 				}
303 			}
304 		}
305 
306 		if (j0 < 0x00100000)	/* X and Y are subnormal */
307 		{
308 			x *= D2ONP1022;
309 			y *= D2ONP1022;
310 
311 			x_hi = (x + D2ON28) - D2ON28;
312 			x_lo = x - x_hi;
313 			y_hi = (y + D2ON28) - D2ON28;
314 			y_lo = y - y_hi;
315 			res = (x_hi * x_hi + y_hi * y_hi);
316 			res += ((x + x_hi) * x_lo + (y + y_hi) * y_lo);
317 
318 			res = sqrt(res);
319 
320 			res = D2ONM1022 * res;
321 			*pz = res;
322 			return;
323 		}
324 
325 		HI(&scl) = (0x7fe00000 - j0);
326 
327 		x *= scl;
328 		y *= scl;
329 
330 		x_hi = (x + D2ON28) - D2ON28;
331 		y_hi = (y + D2ON28) - D2ON28;
332 		x_lo = x - x_hi;
333 		y_lo = y - y_hi;
334 
335 		res = (x_hi * x_hi + y_hi * y_hi);
336 		res += ((x + x_hi) * x_lo + (y + y_hi) * y_lo);
337 
338 		res = sqrt(res);
339 
340 		HI(&scl) = j0;
341 
342 		res = scl * res;
343 		*pz = res;
344 	}
345 }
346 
347 static void
348 __vhypot_n(int n, double * restrict px, int stridex, double * restrict py,
349 	int stridey, double * restrict pz, int stridez)
350 {
351 	int		hx0, hy0, j0, diff0;
352 	double		x_hi0, x_lo0, y_hi0, y_lo0, scl0 = 0;
353 	double		x0, y0, res0;
354 	double		D2ON28 = ((double*)LCONST)[0];		/* 2 ** 28	*/
355 
356 	for(; n > 0 ; n--)
357 	{
358 		x0 = *px;
359 		y0 = *py;
360 		hx0 = HI(px);
361 		hy0 = HI(py);
362 
363 		hx0 &= 0x7fffffff;
364 		hy0 &= 0x7fffffff;
365 
366 		diff0 = hy0 - hx0;
367 		j0 = diff0 >> 31;
368 		j0 = hy0 - (diff0 & j0);
369 		j0 &= 0x7ff00000;
370 
371 		px += stridex;
372 		py += stridey;
373 
374 		HI(&scl0) = (0x7fe00000 - j0);
375 
376 		x0 *= scl0;
377 		y0 *= scl0;
378 
379 		x_hi0 = (x0 + D2ON28) - D2ON28;
380 		y_hi0 = (y0 + D2ON28) - D2ON28;
381 		x_lo0 = x0 - x_hi0;
382 		y_lo0 = y0 - y_hi0;
383 
384 		res0 = (x_hi0 * x_hi0 + y_hi0 * y_hi0);
385 		res0 += ((x0 + x_hi0) * x_lo0 + (y0 + y_hi0) * y_lo0);
386 
387 		res0 = sqrt(res0);
388 
389 		HI(&scl0) = j0;
390 
391 		res0 = scl0 * res0;
392 		*pz = res0;
393 
394 		pz += stridez;
395 	}
396 }
397 
398