1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
24 */
25 /*
26 * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
27 * Use is subject to license terms.
28 */
29
30 #include <sys/isa_defs.h>
31 #include "libm_inlines.h"
32
33 #ifdef _LITTLE_ENDIAN
34 #define HI(x) *(1+(int*)x)
35 #define LO(x) *(unsigned*)x
36 #else
37 #define HI(x) *(int*)x
38 #define LO(x) *(1+(unsigned*)x)
39 #endif
40
41 #ifdef __RESTRICT
42 #define restrict _Restrict
43 #else
44 #define restrict
45 #endif
46
47 /* double hypot(double x, double y)
48 *
49 * Method :
50 * 1. Special cases:
51 * x or y is +Inf or -Inf => +Inf
52 * x or y is NaN => QNaN
53 * 2. Computes hypot(x,y):
54 * hypot(x,y) = m * sqrt(xnm * xnm + ynm * ynm)
55 * Where:
56 * m = max(|x|,|y|)
57 * xnm = x * (1/m)
58 * ynm = y * (1/m)
59 *
60 * Compute xnm * xnm + ynm * ynm by simulating
61 * muti-precision arithmetic.
62 *
63 * Accuracy:
64 * Maximum error observed: less than 0.872 ulp after 16.777.216.000
65 * results.
66 */
67
68 extern double sqrt(double);
69 extern double fabs(double);
70
71 static const unsigned long long LCONST[] = {
72 0x41b0000000000000ULL, /* D2ON28 = 2 ** 28 */
73 0x0010000000000000ULL, /* D2ONM1022 = 2 ** -1022 */
74 0x7fd0000000000000ULL /* D2ONP1022 = 2 ** 1022 */
75 };
76
77 static void
78 __vhypot_n(int n, double * restrict px, int stridex, double * restrict py,
79 int stridey, double * restrict pz, int stridez);
80
81 #pragma no_inline(__vhypot_n)
82
83 #define RETURN(ret) \
84 { \
85 *pz = (ret); \
86 py += stridey; \
87 pz += stridez; \
88 if (n_n == 0) \
89 { \
90 hx0 = HI(px); \
91 hy0 = HI(py); \
92 spx = px; spy = py; spz = pz; \
93 continue; \
94 } \
95 n--; \
96 break; \
97 }
98
99 void
__vhypot(int n,double * restrict px,int stridex,double * restrict py,int stridey,double * restrict pz,int stridez)100 __vhypot(int n, double * restrict px, int stridex, double * restrict py,
101 int stridey, double * restrict pz, int stridez)
102 {
103 int hx0, hx1, hy0, j0, diff;
104 double x_hi, x_lo, y_hi, y_lo;
105 double scl = 0;
106 double x, y, res;
107 double *spx, *spy, *spz;
108 int n_n;
109 double D2ON28 = ((double*)LCONST)[0]; /* 2 ** 28 */
110 double D2ONM1022 = ((double*)LCONST)[1]; /* 2 **-1022 */
111 double D2ONP1022 = ((double*)LCONST)[2]; /* 2 ** 1022 */
112
113 while (n > 1)
114 {
115 n_n = 0;
116 spx = px;
117 spy = py;
118 spz = pz;
119 hx0 = HI(px);
120 hy0 = HI(py);
121 for (; n > 1 ; n--)
122 {
123 px += stridex;
124 hx0 &= 0x7fffffff;
125 hy0 &= 0x7fffffff;
126
127 if (hx0 >= 0x7fe00000) /* |X| >= 2**1023 or Inf or NaN */
128 {
129 diff = hy0 - hx0;
130 j0 = diff >> 31;
131 j0 = hy0 - (diff & j0);
132 j0 &= 0x7ff00000;
133 x = *(px - stridex);
134 y = *py;
135 x = fabs(x);
136 y = fabs(y);
137 if (j0 >= 0x7ff00000) /* |X| or |Y| = Inf or NaN */
138 {
139 int lx = LO((px - stridex));
140 int ly = LO(py);
141 if (hx0 == 0x7ff00000 && lx == 0) res = x == y ? y : x;
142 else if (hy0 == 0x7ff00000 && ly == 0) res = x == y ? x : y;
143 else res = x + y;
144 RETURN (res)
145 }
146 else
147 {
148 j0 = diff >> 31;
149 if (((diff ^ j0) - j0) < 0x03600000) /* max(|X|,|Y|)/min(|X|,|Y|) < 2**54 */
150 {
151 x *= D2ONM1022;
152 y *= D2ONM1022;
153
154 x_hi = (x + D2ON28) - D2ON28;
155 x_lo = x - x_hi;
156 y_hi = (y + D2ON28) - D2ON28;
157 y_lo = y - y_hi;
158 res = (x_hi * x_hi + y_hi * y_hi);
159 res += ((x + x_hi) * x_lo + (y + y_hi) * y_lo);
160
161 res = sqrt (res);
162
163 res = D2ONP1022 * res;
164 RETURN (res)
165 }
166 else RETURN (x + y)
167 }
168 }
169 if (hy0 >= 0x7fe00000) /* |Y| >= 2**1023 or Inf or NaN */
170 {
171 diff = hy0 - hx0;
172 j0 = diff >> 31;
173 j0 = hy0 - (diff & j0);
174 j0 &= 0x7ff00000;
175 x = *(px - stridex);
176 y = *py;
177 x = fabs(x);
178 y = fabs(y);
179 if (j0 >= 0x7ff00000) /* |X| or |Y| = Inf or NaN */
180 {
181 int lx = LO((px - stridex));
182 int ly = LO(py);
183 if (hx0 == 0x7ff00000 && lx == 0) res = x == y ? y : x;
184 else if (hy0 == 0x7ff00000 && ly == 0) res = x == y ? x : y;
185 else res = x + y;
186 RETURN (res)
187 }
188 else
189 {
190 j0 = diff >> 31;
191 if (((diff ^ j0) - j0) < 0x03600000) /* max(|X|,|Y|)/min(|X|,|Y|) < 2**54 */
192 {
193 x *= D2ONM1022;
194 y *= D2ONM1022;
195
196 x_hi = (x + D2ON28) - D2ON28;
197 x_lo = x - x_hi;
198 y_hi = (y + D2ON28) - D2ON28;
199 y_lo = y - y_hi;
200 res = (x_hi * x_hi + y_hi * y_hi);
201 res += ((x + x_hi) * x_lo + (y + y_hi) * y_lo);
202
203 res = sqrt (res);
204
205 res = D2ONP1022 * res;
206 RETURN (res)
207 }
208 else RETURN (x + y)
209 }
210 }
211
212 hx1 = HI(px);
213
214 if (hx0 < 0x00100000 && hy0 < 0x00100000) /* X and Y are subnormal */
215 {
216 x = *(px - stridex);
217 y = *py;
218
219 x *= D2ONP1022;
220 y *= D2ONP1022;
221
222 x_hi = (x + D2ON28) - D2ON28;
223 x_lo = x - x_hi;
224 y_hi = (y + D2ON28) - D2ON28;
225 y_lo = y - y_hi;
226 res = (x_hi * x_hi + y_hi * y_hi);
227 res += ((x + x_hi) * x_lo + (y + y_hi) * y_lo);
228
229 res = sqrt(res);
230
231 res = D2ONM1022 * res;
232 RETURN (res)
233 }
234
235 hx0 = hx1;
236 py += stridey;
237 pz += stridez;
238 n_n++;
239 hy0 = HI(py);
240 }
241 if (n_n > 0)
242 __vhypot_n (n_n, spx, stridex, spy, stridey, spz, stridez);
243 }
244
245 if (n > 0)
246 {
247 x = *px;
248 y = *py;
249 hx0 = HI(px);
250 hy0 = HI(py);
251
252 hx0 &= 0x7fffffff;
253 hy0 &= 0x7fffffff;
254
255 diff = hy0 - hx0;
256 j0 = diff >> 31;
257 j0 = hy0 - (diff & j0);
258 j0 &= 0x7ff00000;
259
260 if (j0 >= 0x7fe00000) /* max(|X|,|Y|) >= 2**1023 or X or Y = Inf or NaN */
261 {
262 x = fabs(x);
263 y = fabs(y);
264 if (j0 >= 0x7ff00000) /* |X| or |Y| = Inf or NaN */
265 {
266 int lx = LO(px);
267 int ly = LO(py);
268 if (hx0 == 0x7ff00000 && lx == 0) res = x == y ? y : x;
269 else if (hy0 == 0x7ff00000 && ly == 0) res = x == y ? x : y;
270 else res = x + y;
271 *pz = res;
272 return;
273 }
274 else
275 {
276 j0 = diff >> 31;
277 if (((diff ^ j0) - j0) < 0x03600000) /* max(|X|,|Y|)/min(|X|,|Y|) < 2**54 */
278 {
279 x *= D2ONM1022;
280 y *= D2ONM1022;
281
282 x_hi = (x + D2ON28) - D2ON28;
283 x_lo = x - x_hi;
284 y_hi = (y + D2ON28) - D2ON28;
285 y_lo = y - y_hi;
286 res = (x_hi * x_hi + y_hi * y_hi);
287 res += ((x + x_hi) * x_lo + (y + y_hi) * y_lo);
288
289 res = sqrt (res);
290
291 res = D2ONP1022 * res;
292 *pz = res;
293 return;
294 }
295 else
296 {
297 *pz = x + y;
298 return;
299 }
300 }
301 }
302
303 if (j0 < 0x00100000) /* X and Y are subnormal */
304 {
305 x *= D2ONP1022;
306 y *= D2ONP1022;
307
308 x_hi = (x + D2ON28) - D2ON28;
309 x_lo = x - x_hi;
310 y_hi = (y + D2ON28) - D2ON28;
311 y_lo = y - y_hi;
312 res = (x_hi * x_hi + y_hi * y_hi);
313 res += ((x + x_hi) * x_lo + (y + y_hi) * y_lo);
314
315 res = sqrt(res);
316
317 res = D2ONM1022 * res;
318 *pz = res;
319 return;
320 }
321
322 HI(&scl) = (0x7fe00000 - j0);
323
324 x *= scl;
325 y *= scl;
326
327 x_hi = (x + D2ON28) - D2ON28;
328 y_hi = (y + D2ON28) - D2ON28;
329 x_lo = x - x_hi;
330 y_lo = y - y_hi;
331
332 res = (x_hi * x_hi + y_hi * y_hi);
333 res += ((x + x_hi) * x_lo + (y + y_hi) * y_lo);
334
335 res = sqrt(res);
336
337 HI(&scl) = j0;
338
339 res = scl * res;
340 *pz = res;
341 }
342 }
343
344 static void
__vhypot_n(int n,double * restrict px,int stridex,double * restrict py,int stridey,double * restrict pz,int stridez)345 __vhypot_n(int n, double * restrict px, int stridex, double * restrict py,
346 int stridey, double * restrict pz, int stridez)
347 {
348 int hx0, hy0, j0, diff0;
349 double x_hi0, x_lo0, y_hi0, y_lo0, scl0 = 0;
350 double x0, y0, res0;
351 double D2ON28 = ((double*)LCONST)[0]; /* 2 ** 28 */
352
353 for(; n > 0 ; n--)
354 {
355 x0 = *px;
356 y0 = *py;
357 hx0 = HI(px);
358 hy0 = HI(py);
359
360 hx0 &= 0x7fffffff;
361 hy0 &= 0x7fffffff;
362
363 diff0 = hy0 - hx0;
364 j0 = diff0 >> 31;
365 j0 = hy0 - (diff0 & j0);
366 j0 &= 0x7ff00000;
367
368 px += stridex;
369 py += stridey;
370
371 HI(&scl0) = (0x7fe00000 - j0);
372
373 x0 *= scl0;
374 y0 *= scl0;
375
376 x_hi0 = (x0 + D2ON28) - D2ON28;
377 y_hi0 = (y0 + D2ON28) - D2ON28;
378 x_lo0 = x0 - x_hi0;
379 y_lo0 = y0 - y_hi0;
380
381 res0 = (x_hi0 * x_hi0 + y_hi0 * y_hi0);
382 res0 += ((x0 + x_hi0) * x_lo0 + (y0 + y_hi0) * y_lo0);
383
384 res0 = sqrt(res0);
385
386 HI(&scl0) = j0;
387
388 res0 = scl0 * res0;
389 *pz = res0;
390
391 pz += stridez;
392 }
393 }
394