1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
24 */
25 /*
26 * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
27 * Use is subject to license terms.
28 */
29
30 #include <sys/isa_defs.h>
31 #include "libm_inlines.h"
32
33 #ifdef _LITTLE_ENDIAN
34 #define HI(x) *(1+(int *)x)
35 #define LO(x) *(unsigned *)x
36 #else
37 #define HI(x) *(int *)x
38 #define LO(x) *(1+(unsigned *)x)
39 #endif
40
41 #ifdef __RESTRICT
42 #define restrict _Restrict
43 #else
44 #define restrict
45 #endif
46
47 /*
48 * double hypot(double x, double y)
49 *
50 * Method :
51 * 1. Special cases:
52 * x or y is +Inf or -Inf => +Inf
53 * x or y is NaN => QNaN
54 * 2. Computes hypot(x,y):
55 * hypot(x,y) = m * sqrt(xnm * xnm + ynm * ynm)
56 * Where:
57 * m = max(|x|,|y|)
58 * xnm = x * (1/m)
59 * ynm = y * (1/m)
60 *
61 * Compute xnm * xnm + ynm * ynm by simulating
62 * muti-precision arithmetic.
63 *
64 * Accuracy:
65 * Maximum error observed: less than 0.872 ulp after 16.777.216.000
66 * results.
67 */
68
69 extern double sqrt(double);
70 extern double fabs(double);
71
72 static const unsigned long long LCONST[] = {
73 0x41b0000000000000ULL, /* D2ON28 = 2 ** 28 */
74 0x0010000000000000ULL, /* D2ONM1022 = 2 ** -1022 */
75 0x7fd0000000000000ULL /* D2ONP1022 = 2 ** 1022 */
76 };
77
78 static void
79 __vhypot_n(int n, double *restrict px, int stridex, double *restrict py,
80 int stridey, double *restrict pz, int stridez);
81
82 #define RETURN(ret) \
83 { \
84 *pz = (ret); \
85 py += stridey; \
86 pz += stridez; \
87 if (n_n == 0) \
88 { \
89 hx0 = HI(px); \
90 hy0 = HI(py); \
91 spx = px; \
92 spy = py; \
93 spz = pz; \
94 continue; \
95 } \
96 n--; \
97 break; \
98 }
99
100 void
__vhypot(int n,double * restrict px,int stridex,double * restrict py,int stridey,double * restrict pz,int stridez)101 __vhypot(int n, double *restrict px, int stridex, double *restrict py,
102 int stridey, double *restrict pz, int stridez)
103 {
104 int hx0, hx1, hy0, j0, diff;
105 double x_hi, x_lo, y_hi, y_lo;
106 double scl = 0;
107 double x, y, res;
108 double *spx, *spy, *spz;
109 int n_n;
110 double D2ON28 = ((double *)LCONST)[0]; /* 2 ** 28 */
111 double D2ONM1022 = ((double *)LCONST)[1]; /* 2 **-1022 */
112 double D2ONP1022 = ((double *)LCONST)[2]; /* 2 ** 1022 */
113
114 while (n > 1) {
115 n_n = 0;
116 spx = px;
117 spy = py;
118 spz = pz;
119 hx0 = HI(px);
120 hy0 = HI(py);
121 for (; n > 1; n--) {
122 px += stridex;
123 hx0 &= 0x7fffffff;
124 hy0 &= 0x7fffffff;
125
126 if (hx0 >= 0x7fe00000) {
127 /* |X| >= 2**1023 or Inf or NaN */
128 diff = hy0 - hx0;
129 j0 = diff >> 31;
130 j0 = hy0 - (diff & j0);
131 j0 &= 0x7ff00000;
132 x = *(px - stridex);
133 y = *py;
134 x = fabs(x);
135 y = fabs(y);
136 if (j0 >= 0x7ff00000) {
137 /* |X| or |Y| = Inf or NaN */
138 int lx = LO((px - stridex));
139 int ly = LO(py);
140 if (hx0 == 0x7ff00000 && lx == 0)
141 res = x == y ? y : x;
142 else if (hy0 == 0x7ff00000 && ly == 0)
143 res = x == y ? x : y;
144 else
145 res = x + y;
146 RETURN(res)
147 } else {
148 j0 = diff >> 31;
149 if (((diff ^ j0) - j0) < 0x03600000) {
150 /*
151 * max(|X|,|Y|)/min(|X|,|Y|) <
152 * 2**54
153 */
154 x *= D2ONM1022;
155 y *= D2ONM1022;
156
157 x_hi = (x + D2ON28) - D2ON28;
158 x_lo = x - x_hi;
159 y_hi = (y + D2ON28) - D2ON28;
160 y_lo = y - y_hi;
161 res = x_hi * x_hi + y_hi * y_hi;
162 res += ((x + x_hi) * x_lo +
163 (y + y_hi) * y_lo);
164
165 res = sqrt(res);
166
167 res = D2ONP1022 * res;
168 RETURN(res)
169 } else {
170 RETURN(x + y)
171 }
172 }
173 }
174 if (hy0 >= 0x7fe00000) {
175 /* |Y| >= 2**1023 or Inf or NaN */
176 diff = hy0 - hx0;
177 j0 = diff >> 31;
178 j0 = hy0 - (diff & j0);
179 j0 &= 0x7ff00000;
180 x = *(px - stridex);
181 y = *py;
182 x = fabs(x);
183 y = fabs(y);
184 if (j0 >= 0x7ff00000) {
185 /* |X| or |Y| = Inf or NaN */
186 int lx = LO((px - stridex));
187 int ly = LO(py);
188 if (hx0 == 0x7ff00000 && lx == 0)
189 res = x == y ? y : x;
190 else if (hy0 == 0x7ff00000 && ly == 0)
191 res = x == y ? x : y;
192 else
193 res = x + y;
194 RETURN(res)
195 } else {
196 j0 = diff >> 31;
197 if (((diff ^ j0) - j0) < 0x03600000) {
198 /*
199 * max(|X|,|Y|)/min(|X|,|Y|) <
200 * 2**54
201 */
202 x *= D2ONM1022;
203 y *= D2ONM1022;
204
205 x_hi = (x + D2ON28) - D2ON28;
206 x_lo = x - x_hi;
207 y_hi = (y + D2ON28) - D2ON28;
208 y_lo = y - y_hi;
209 res = x_hi * x_hi + y_hi * y_hi;
210 res += ((x + x_hi) * x_lo +
211 (y + y_hi) * y_lo);
212
213 res = sqrt(res);
214
215 res = D2ONP1022 * res;
216 RETURN(res)
217 } else {
218 RETURN(x + y)
219 }
220 }
221 }
222
223 hx1 = HI(px);
224
225 if (hx0 < 0x00100000 && hy0 < 0x00100000) {
226 /* X and Y are subnormal */
227 x = *(px - stridex);
228 y = *py;
229
230 x *= D2ONP1022;
231 y *= D2ONP1022;
232
233 x_hi = (x + D2ON28) - D2ON28;
234 x_lo = x - x_hi;
235 y_hi = (y + D2ON28) - D2ON28;
236 y_lo = y - y_hi;
237 res = (x_hi * x_hi + y_hi * y_hi);
238 res += ((x + x_hi) * x_lo + (y + y_hi) * y_lo);
239
240 res = sqrt(res);
241
242 res = D2ONM1022 * res;
243 RETURN(res)
244 }
245
246 hx0 = hx1;
247 py += stridey;
248 pz += stridez;
249 n_n++;
250 hy0 = HI(py);
251 }
252 if (n_n > 0)
253 __vhypot_n(n_n, spx, stridex, spy, stridey, spz,
254 stridez);
255 }
256
257 if (n > 0) {
258 x = *px;
259 y = *py;
260 hx0 = HI(px);
261 hy0 = HI(py);
262
263 hx0 &= 0x7fffffff;
264 hy0 &= 0x7fffffff;
265
266 diff = hy0 - hx0;
267 j0 = diff >> 31;
268 j0 = hy0 - (diff & j0);
269 j0 &= 0x7ff00000;
270
271 if (j0 >= 0x7fe00000) {
272 /* max(|X|,|Y|) >= 2**1023 or X or Y = Inf or NaN */
273 x = fabs(x);
274 y = fabs(y);
275 if (j0 >= 0x7ff00000) /* |X| or |Y| = Inf or NaN */
276 {
277 int lx = LO(px);
278 int ly = LO(py);
279 if (hx0 == 0x7ff00000 && lx == 0)
280 res = x == y ? y : x;
281 else if (hy0 == 0x7ff00000 && ly == 0)
282 res = x == y ? x : y;
283 else
284 res = x + y;
285 *pz = res;
286 return;
287 } else {
288 j0 = diff >> 31;
289 if (((diff ^ j0) - j0) < 0x03600000) {
290 /* max(|X|,|Y|)/min(|X|,|Y|) < 2**54 */
291 x *= D2ONM1022;
292 y *= D2ONM1022;
293
294 x_hi = (x + D2ON28) - D2ON28;
295 x_lo = x - x_hi;
296 y_hi = (y + D2ON28) - D2ON28;
297 y_lo = y - y_hi;
298 res = (x_hi * x_hi + y_hi * y_hi);
299 res += ((x + x_hi) * x_lo +
300 (y + y_hi) * y_lo);
301
302 res = sqrt(res);
303
304 res = D2ONP1022 * res;
305 *pz = res;
306 return;
307 } else {
308 *pz = x + y;
309 return;
310 }
311 }
312 }
313
314 if (j0 < 0x00100000) { /* X and Y are subnormal */
315 x *= D2ONP1022;
316 y *= D2ONP1022;
317
318 x_hi = (x + D2ON28) - D2ON28;
319 x_lo = x - x_hi;
320 y_hi = (y + D2ON28) - D2ON28;
321 y_lo = y - y_hi;
322 res = (x_hi * x_hi + y_hi * y_hi);
323 res += ((x + x_hi) * x_lo + (y + y_hi) * y_lo);
324
325 res = sqrt(res);
326
327 res = D2ONM1022 * res;
328 *pz = res;
329 return;
330 }
331
332 HI(&scl) = (0x7fe00000 - j0);
333
334 x *= scl;
335 y *= scl;
336
337 x_hi = (x + D2ON28) - D2ON28;
338 y_hi = (y + D2ON28) - D2ON28;
339 x_lo = x - x_hi;
340 y_lo = y - y_hi;
341
342 res = (x_hi * x_hi + y_hi * y_hi);
343 res += ((x + x_hi) * x_lo + (y + y_hi) * y_lo);
344
345 res = sqrt(res);
346
347 HI(&scl) = j0;
348
349 res = scl * res;
350 *pz = res;
351 }
352 }
353
354 static void
__vhypot_n(int n,double * restrict px,int stridex,double * restrict py,int stridey,double * restrict pz,int stridez)355 __vhypot_n(int n, double *restrict px, int stridex, double *restrict py,
356 int stridey, double *restrict pz, int stridez)
357 {
358 int hx0, hy0, j0, diff0;
359 double x_hi0, x_lo0, y_hi0, y_lo0, scl0 = 0;
360 double x0, y0, res0;
361 double D2ON28 = ((double *)LCONST)[0]; /* 2 ** 28 */
362
363 for (; n > 0; n--) {
364 x0 = *px;
365 y0 = *py;
366 hx0 = HI(px);
367 hy0 = HI(py);
368
369 hx0 &= 0x7fffffff;
370 hy0 &= 0x7fffffff;
371
372 diff0 = hy0 - hx0;
373 j0 = diff0 >> 31;
374 j0 = hy0 - (diff0 & j0);
375 j0 &= 0x7ff00000;
376
377 px += stridex;
378 py += stridey;
379
380 HI(&scl0) = (0x7fe00000 - j0);
381
382 x0 *= scl0;
383 y0 *= scl0;
384
385 x_hi0 = (x0 + D2ON28) - D2ON28;
386 y_hi0 = (y0 + D2ON28) - D2ON28;
387 x_lo0 = x0 - x_hi0;
388 y_lo0 = y0 - y_hi0;
389
390 res0 = (x_hi0 * x_hi0 + y_hi0 * y_hi0);
391 res0 += ((x0 + x_hi0) * x_lo0 + (y0 + y_hi0) * y_lo0);
392
393 res0 = sqrt(res0);
394
395 HI(&scl0) = j0;
396
397 res0 = scl0 * res0;
398 *pz = res0;
399
400 pz += stridez;
401 }
402 }
403