1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2011 Nexenta Systems, Inc. All rights reserved. 24 */ 25 /* 26 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 27 * Use is subject to license terms. 28 */ 29 30 #include <sys/isa_defs.h> 31 #include "libm_inlines.h" 32 33 #ifdef _LITTLE_ENDIAN 34 #define HI(x) *(1+(int *)x) 35 #define LO(x) *(unsigned *)x 36 #else 37 #define HI(x) *(int *)x 38 #define LO(x) *(1+(unsigned *)x) 39 #endif 40 41 #ifdef __RESTRICT 42 #define restrict _Restrict 43 #else 44 #define restrict 45 #endif 46 47 /* 48 * double hypot(double x, double y) 49 * 50 * Method : 51 * 1. Special cases: 52 * x or y is +Inf or -Inf => +Inf 53 * x or y is NaN => QNaN 54 * 2. Computes hypot(x,y): 55 * hypot(x,y) = m * sqrt(xnm * xnm + ynm * ynm) 56 * Where: 57 * m = max(|x|,|y|) 58 * xnm = x * (1/m) 59 * ynm = y * (1/m) 60 * 61 * Compute xnm * xnm + ynm * ynm by simulating 62 * muti-precision arithmetic. 63 * 64 * Accuracy: 65 * Maximum error observed: less than 0.872 ulp after 16.777.216.000 66 * results. 67 */ 68 69 extern double sqrt(double); 70 extern double fabs(double); 71 72 static const unsigned long long LCONST[] = { 73 0x41b0000000000000ULL, /* D2ON28 = 2 ** 28 */ 74 0x0010000000000000ULL, /* D2ONM1022 = 2 ** -1022 */ 75 0x7fd0000000000000ULL /* D2ONP1022 = 2 ** 1022 */ 76 }; 77 78 static void 79 __vhypot_n(int n, double *restrict px, int stridex, double *restrict py, 80 int stridey, double *restrict pz, int stridez); 81 82 #define RETURN(ret) \ 83 { \ 84 *pz = (ret); \ 85 py += stridey; \ 86 pz += stridez; \ 87 if (n_n == 0) \ 88 { \ 89 hx0 = HI(px); \ 90 hy0 = HI(py); \ 91 spx = px; \ 92 spy = py; \ 93 spz = pz; \ 94 continue; \ 95 } \ 96 n--; \ 97 break; \ 98 } 99 100 void 101 __vhypot(int n, double *restrict px, int stridex, double *restrict py, 102 int stridey, double *restrict pz, int stridez) 103 { 104 int hx0, hx1, hy0, j0, diff; 105 double x_hi, x_lo, y_hi, y_lo; 106 double scl = 0; 107 double x, y, res; 108 double *spx, *spy, *spz; 109 int n_n; 110 double D2ON28 = ((double *)LCONST)[0]; /* 2 ** 28 */ 111 double D2ONM1022 = ((double *)LCONST)[1]; /* 2 **-1022 */ 112 double D2ONP1022 = ((double *)LCONST)[2]; /* 2 ** 1022 */ 113 114 while (n > 1) { 115 n_n = 0; 116 spx = px; 117 spy = py; 118 spz = pz; 119 hx0 = HI(px); 120 hy0 = HI(py); 121 for (; n > 1; n--) { 122 px += stridex; 123 hx0 &= 0x7fffffff; 124 hy0 &= 0x7fffffff; 125 126 if (hx0 >= 0x7fe00000) { 127 /* |X| >= 2**1023 or Inf or NaN */ 128 diff = hy0 - hx0; 129 j0 = diff >> 31; 130 j0 = hy0 - (diff & j0); 131 j0 &= 0x7ff00000; 132 x = *(px - stridex); 133 y = *py; 134 x = fabs(x); 135 y = fabs(y); 136 if (j0 >= 0x7ff00000) { 137 /* |X| or |Y| = Inf or NaN */ 138 int lx = LO((px - stridex)); 139 int ly = LO(py); 140 if (hx0 == 0x7ff00000 && lx == 0) 141 res = x == y ? y : x; 142 else if (hy0 == 0x7ff00000 && ly == 0) 143 res = x == y ? x : y; 144 else 145 res = x + y; 146 RETURN(res) 147 } else { 148 j0 = diff >> 31; 149 if (((diff ^ j0) - j0) < 0x03600000) { 150 /* 151 * max(|X|,|Y|)/min(|X|,|Y|) < 152 * 2**54 153 */ 154 x *= D2ONM1022; 155 y *= D2ONM1022; 156 157 x_hi = (x + D2ON28) - D2ON28; 158 x_lo = x - x_hi; 159 y_hi = (y + D2ON28) - D2ON28; 160 y_lo = y - y_hi; 161 res = x_hi * x_hi + y_hi * y_hi; 162 res += ((x + x_hi) * x_lo + 163 (y + y_hi) * y_lo); 164 165 res = sqrt(res); 166 167 res = D2ONP1022 * res; 168 RETURN(res) 169 } else { 170 RETURN(x + y) 171 } 172 } 173 } 174 if (hy0 >= 0x7fe00000) { 175 /* |Y| >= 2**1023 or Inf or NaN */ 176 diff = hy0 - hx0; 177 j0 = diff >> 31; 178 j0 = hy0 - (diff & j0); 179 j0 &= 0x7ff00000; 180 x = *(px - stridex); 181 y = *py; 182 x = fabs(x); 183 y = fabs(y); 184 if (j0 >= 0x7ff00000) { 185 /* |X| or |Y| = Inf or NaN */ 186 int lx = LO((px - stridex)); 187 int ly = LO(py); 188 if (hx0 == 0x7ff00000 && lx == 0) 189 res = x == y ? y : x; 190 else if (hy0 == 0x7ff00000 && ly == 0) 191 res = x == y ? x : y; 192 else 193 res = x + y; 194 RETURN(res) 195 } else { 196 j0 = diff >> 31; 197 if (((diff ^ j0) - j0) < 0x03600000) { 198 /* 199 * max(|X|,|Y|)/min(|X|,|Y|) < 200 * 2**54 201 */ 202 x *= D2ONM1022; 203 y *= D2ONM1022; 204 205 x_hi = (x + D2ON28) - D2ON28; 206 x_lo = x - x_hi; 207 y_hi = (y + D2ON28) - D2ON28; 208 y_lo = y - y_hi; 209 res = x_hi * x_hi + y_hi * y_hi; 210 res += ((x + x_hi) * x_lo + 211 (y + y_hi) * y_lo); 212 213 res = sqrt(res); 214 215 res = D2ONP1022 * res; 216 RETURN(res) 217 } else { 218 RETURN(x + y) 219 } 220 } 221 } 222 223 hx1 = HI(px); 224 225 if (hx0 < 0x00100000 && hy0 < 0x00100000) { 226 /* X and Y are subnormal */ 227 x = *(px - stridex); 228 y = *py; 229 230 x *= D2ONP1022; 231 y *= D2ONP1022; 232 233 x_hi = (x + D2ON28) - D2ON28; 234 x_lo = x - x_hi; 235 y_hi = (y + D2ON28) - D2ON28; 236 y_lo = y - y_hi; 237 res = (x_hi * x_hi + y_hi * y_hi); 238 res += ((x + x_hi) * x_lo + (y + y_hi) * y_lo); 239 240 res = sqrt(res); 241 242 res = D2ONM1022 * res; 243 RETURN(res) 244 } 245 246 hx0 = hx1; 247 py += stridey; 248 pz += stridez; 249 n_n++; 250 hy0 = HI(py); 251 } 252 if (n_n > 0) 253 __vhypot_n(n_n, spx, stridex, spy, stridey, spz, 254 stridez); 255 } 256 257 if (n > 0) { 258 x = *px; 259 y = *py; 260 hx0 = HI(px); 261 hy0 = HI(py); 262 263 hx0 &= 0x7fffffff; 264 hy0 &= 0x7fffffff; 265 266 diff = hy0 - hx0; 267 j0 = diff >> 31; 268 j0 = hy0 - (diff & j0); 269 j0 &= 0x7ff00000; 270 271 if (j0 >= 0x7fe00000) { 272 /* max(|X|,|Y|) >= 2**1023 or X or Y = Inf or NaN */ 273 x = fabs(x); 274 y = fabs(y); 275 if (j0 >= 0x7ff00000) /* |X| or |Y| = Inf or NaN */ 276 { 277 int lx = LO(px); 278 int ly = LO(py); 279 if (hx0 == 0x7ff00000 && lx == 0) 280 res = x == y ? y : x; 281 else if (hy0 == 0x7ff00000 && ly == 0) 282 res = x == y ? x : y; 283 else 284 res = x + y; 285 *pz = res; 286 return; 287 } else { 288 j0 = diff >> 31; 289 if (((diff ^ j0) - j0) < 0x03600000) { 290 /* max(|X|,|Y|)/min(|X|,|Y|) < 2**54 */ 291 x *= D2ONM1022; 292 y *= D2ONM1022; 293 294 x_hi = (x + D2ON28) - D2ON28; 295 x_lo = x - x_hi; 296 y_hi = (y + D2ON28) - D2ON28; 297 y_lo = y - y_hi; 298 res = (x_hi * x_hi + y_hi * y_hi); 299 res += ((x + x_hi) * x_lo + 300 (y + y_hi) * y_lo); 301 302 res = sqrt(res); 303 304 res = D2ONP1022 * res; 305 *pz = res; 306 return; 307 } else { 308 *pz = x + y; 309 return; 310 } 311 } 312 } 313 314 if (j0 < 0x00100000) { /* X and Y are subnormal */ 315 x *= D2ONP1022; 316 y *= D2ONP1022; 317 318 x_hi = (x + D2ON28) - D2ON28; 319 x_lo = x - x_hi; 320 y_hi = (y + D2ON28) - D2ON28; 321 y_lo = y - y_hi; 322 res = (x_hi * x_hi + y_hi * y_hi); 323 res += ((x + x_hi) * x_lo + (y + y_hi) * y_lo); 324 325 res = sqrt(res); 326 327 res = D2ONM1022 * res; 328 *pz = res; 329 return; 330 } 331 332 HI(&scl) = (0x7fe00000 - j0); 333 334 x *= scl; 335 y *= scl; 336 337 x_hi = (x + D2ON28) - D2ON28; 338 y_hi = (y + D2ON28) - D2ON28; 339 x_lo = x - x_hi; 340 y_lo = y - y_hi; 341 342 res = (x_hi * x_hi + y_hi * y_hi); 343 res += ((x + x_hi) * x_lo + (y + y_hi) * y_lo); 344 345 res = sqrt(res); 346 347 HI(&scl) = j0; 348 349 res = scl * res; 350 *pz = res; 351 } 352 } 353 354 static void 355 __vhypot_n(int n, double *restrict px, int stridex, double *restrict py, 356 int stridey, double *restrict pz, int stridez) 357 { 358 int hx0, hy0, j0, diff0; 359 double x_hi0, x_lo0, y_hi0, y_lo0, scl0 = 0; 360 double x0, y0, res0; 361 double D2ON28 = ((double *)LCONST)[0]; /* 2 ** 28 */ 362 363 for (; n > 0; n--) { 364 x0 = *px; 365 y0 = *py; 366 hx0 = HI(px); 367 hy0 = HI(py); 368 369 hx0 &= 0x7fffffff; 370 hy0 &= 0x7fffffff; 371 372 diff0 = hy0 - hx0; 373 j0 = diff0 >> 31; 374 j0 = hy0 - (diff0 & j0); 375 j0 &= 0x7ff00000; 376 377 px += stridex; 378 py += stridey; 379 380 HI(&scl0) = (0x7fe00000 - j0); 381 382 x0 *= scl0; 383 y0 *= scl0; 384 385 x_hi0 = (x0 + D2ON28) - D2ON28; 386 y_hi0 = (y0 + D2ON28) - D2ON28; 387 x_lo0 = x0 - x_hi0; 388 y_lo0 = y0 - y_hi0; 389 390 res0 = (x_hi0 * x_hi0 + y_hi0 * y_hi0); 391 res0 += ((x0 + x_hi0) * x_lo0 + (y0 + y_hi0) * y_lo0); 392 393 res0 = sqrt(res0); 394 395 HI(&scl0) = j0; 396 397 res0 = scl0 * res0; 398 *pz = res0; 399 400 pz += stridez; 401 } 402 } 403