1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2011 Nexenta Systems, Inc. All rights reserved. 24 */ 25 /* 26 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 27 * Use is subject to license terms. 28 */ 29 30 #include <sys/isa_defs.h> 31 #include "libm_synonyms.h" 32 #include "libm_inlines.h" 33 34 #ifdef _LITTLE_ENDIAN 35 #define HI(x) *(1+(int*)x) 36 #define LO(x) *(unsigned*)x 37 #else 38 #define HI(x) *(int*)x 39 #define LO(x) *(1+(unsigned*)x) 40 #endif 41 42 #ifdef __RESTRICT 43 #define restrict _Restrict 44 #else 45 #define restrict 46 #endif 47 48 /* double hypot(double x, double y) 49 * 50 * Method : 51 * 1. Special cases: 52 * x or y is +Inf or -Inf => +Inf 53 * x or y is NaN => QNaN 54 * 2. Computes hypot(x,y): 55 * hypot(x,y) = m * sqrt(xnm * xnm + ynm * ynm) 56 * Where: 57 * m = max(|x|,|y|) 58 * xnm = x * (1/m) 59 * ynm = y * (1/m) 60 * 61 * Compute xnm * xnm + ynm * ynm by simulating 62 * muti-precision arithmetic. 63 * 64 * Accuracy: 65 * Maximum error observed: less than 0.872 ulp after 16.777.216.000 66 * results. 67 */ 68 69 #define sqrt __sqrt 70 71 extern double sqrt(double); 72 extern double fabs(double); 73 74 static const unsigned long long LCONST[] = { 75 0x41b0000000000000ULL, /* D2ON28 = 2 ** 28 */ 76 0x0010000000000000ULL, /* D2ONM1022 = 2 ** -1022 */ 77 0x7fd0000000000000ULL /* D2ONP1022 = 2 ** 1022 */ 78 }; 79 80 static void 81 __vhypot_n(int n, double * restrict px, int stridex, double * restrict py, 82 int stridey, double * restrict pz, int stridez); 83 84 #pragma no_inline(__vhypot_n) 85 86 #define RETURN(ret) \ 87 { \ 88 *pz = (ret); \ 89 py += stridey; \ 90 pz += stridez; \ 91 if (n_n == 0) \ 92 { \ 93 hx0 = HI(px); \ 94 hy0 = HI(py); \ 95 spx = px; spy = py; spz = pz; \ 96 continue; \ 97 } \ 98 n--; \ 99 break; \ 100 } 101 102 void 103 __vhypot(int n, double * restrict px, int stridex, double * restrict py, 104 int stridey, double * restrict pz, int stridez) 105 { 106 int hx0, hx1, hy0, j0, diff; 107 double x_hi, x_lo, y_hi, y_lo; 108 double scl = 0; 109 double x, y, res; 110 double *spx, *spy, *spz; 111 int n_n; 112 double D2ON28 = ((double*)LCONST)[0]; /* 2 ** 28 */ 113 double D2ONM1022 = ((double*)LCONST)[1]; /* 2 **-1022 */ 114 double D2ONP1022 = ((double*)LCONST)[2]; /* 2 ** 1022 */ 115 116 while (n > 1) 117 { 118 n_n = 0; 119 spx = px; 120 spy = py; 121 spz = pz; 122 hx0 = HI(px); 123 hy0 = HI(py); 124 for (; n > 1 ; n--) 125 { 126 px += stridex; 127 hx0 &= 0x7fffffff; 128 hy0 &= 0x7fffffff; 129 130 if (hx0 >= 0x7fe00000) /* |X| >= 2**1023 or Inf or NaN */ 131 { 132 diff = hy0 - hx0; 133 j0 = diff >> 31; 134 j0 = hy0 - (diff & j0); 135 j0 &= 0x7ff00000; 136 x = *(px - stridex); 137 y = *py; 138 x = fabs(x); 139 y = fabs(y); 140 if (j0 >= 0x7ff00000) /* |X| or |Y| = Inf or NaN */ 141 { 142 int lx = LO((px - stridex)); 143 int ly = LO(py); 144 if (hx0 == 0x7ff00000 && lx == 0) res = x == y ? y : x; 145 else if (hy0 == 0x7ff00000 && ly == 0) res = x == y ? x : y; 146 else res = x + y; 147 RETURN (res) 148 } 149 else 150 { 151 j0 = diff >> 31; 152 if (((diff ^ j0) - j0) < 0x03600000) /* max(|X|,|Y|)/min(|X|,|Y|) < 2**54 */ 153 { 154 x *= D2ONM1022; 155 y *= D2ONM1022; 156 157 x_hi = (x + D2ON28) - D2ON28; 158 x_lo = x - x_hi; 159 y_hi = (y + D2ON28) - D2ON28; 160 y_lo = y - y_hi; 161 res = (x_hi * x_hi + y_hi * y_hi); 162 res += ((x + x_hi) * x_lo + (y + y_hi) * y_lo); 163 164 res = sqrt (res); 165 166 res = D2ONP1022 * res; 167 RETURN (res) 168 } 169 else RETURN (x + y) 170 } 171 } 172 if (hy0 >= 0x7fe00000) /* |Y| >= 2**1023 or Inf or NaN */ 173 { 174 diff = hy0 - hx0; 175 j0 = diff >> 31; 176 j0 = hy0 - (diff & j0); 177 j0 &= 0x7ff00000; 178 x = *(px - stridex); 179 y = *py; 180 x = fabs(x); 181 y = fabs(y); 182 if (j0 >= 0x7ff00000) /* |X| or |Y| = Inf or NaN */ 183 { 184 int lx = LO((px - stridex)); 185 int ly = LO(py); 186 if (hx0 == 0x7ff00000 && lx == 0) res = x == y ? y : x; 187 else if (hy0 == 0x7ff00000 && ly == 0) res = x == y ? x : y; 188 else res = x + y; 189 RETURN (res) 190 } 191 else 192 { 193 j0 = diff >> 31; 194 if (((diff ^ j0) - j0) < 0x03600000) /* max(|X|,|Y|)/min(|X|,|Y|) < 2**54 */ 195 { 196 x *= D2ONM1022; 197 y *= D2ONM1022; 198 199 x_hi = (x + D2ON28) - D2ON28; 200 x_lo = x - x_hi; 201 y_hi = (y + D2ON28) - D2ON28; 202 y_lo = y - y_hi; 203 res = (x_hi * x_hi + y_hi * y_hi); 204 res += ((x + x_hi) * x_lo + (y + y_hi) * y_lo); 205 206 res = sqrt (res); 207 208 res = D2ONP1022 * res; 209 RETURN (res) 210 } 211 else RETURN (x + y) 212 } 213 } 214 215 hx1 = HI(px); 216 217 if (hx0 < 0x00100000 && hy0 < 0x00100000) /* X and Y are subnormal */ 218 { 219 x = *(px - stridex); 220 y = *py; 221 222 x *= D2ONP1022; 223 y *= D2ONP1022; 224 225 x_hi = (x + D2ON28) - D2ON28; 226 x_lo = x - x_hi; 227 y_hi = (y + D2ON28) - D2ON28; 228 y_lo = y - y_hi; 229 res = (x_hi * x_hi + y_hi * y_hi); 230 res += ((x + x_hi) * x_lo + (y + y_hi) * y_lo); 231 232 res = sqrt(res); 233 234 res = D2ONM1022 * res; 235 RETURN (res) 236 } 237 238 hx0 = hx1; 239 py += stridey; 240 pz += stridez; 241 n_n++; 242 hy0 = HI(py); 243 } 244 if (n_n > 0) 245 __vhypot_n (n_n, spx, stridex, spy, stridey, spz, stridez); 246 } 247 248 if (n > 0) 249 { 250 x = *px; 251 y = *py; 252 hx0 = HI(px); 253 hy0 = HI(py); 254 255 hx0 &= 0x7fffffff; 256 hy0 &= 0x7fffffff; 257 258 diff = hy0 - hx0; 259 j0 = diff >> 31; 260 j0 = hy0 - (diff & j0); 261 j0 &= 0x7ff00000; 262 263 if (j0 >= 0x7fe00000) /* max(|X|,|Y|) >= 2**1023 or X or Y = Inf or NaN */ 264 { 265 x = fabs(x); 266 y = fabs(y); 267 if (j0 >= 0x7ff00000) /* |X| or |Y| = Inf or NaN */ 268 { 269 int lx = LO(px); 270 int ly = LO(py); 271 if (hx0 == 0x7ff00000 && lx == 0) res = x == y ? y : x; 272 else if (hy0 == 0x7ff00000 && ly == 0) res = x == y ? x : y; 273 else res = x + y; 274 *pz = res; 275 return; 276 } 277 else 278 { 279 j0 = diff >> 31; 280 if (((diff ^ j0) - j0) < 0x03600000) /* max(|X|,|Y|)/min(|X|,|Y|) < 2**54 */ 281 { 282 x *= D2ONM1022; 283 y *= D2ONM1022; 284 285 x_hi = (x + D2ON28) - D2ON28; 286 x_lo = x - x_hi; 287 y_hi = (y + D2ON28) - D2ON28; 288 y_lo = y - y_hi; 289 res = (x_hi * x_hi + y_hi * y_hi); 290 res += ((x + x_hi) * x_lo + (y + y_hi) * y_lo); 291 292 res = sqrt (res); 293 294 res = D2ONP1022 * res; 295 *pz = res; 296 return; 297 } 298 else 299 { 300 *pz = x + y; 301 return; 302 } 303 } 304 } 305 306 if (j0 < 0x00100000) /* X and Y are subnormal */ 307 { 308 x *= D2ONP1022; 309 y *= D2ONP1022; 310 311 x_hi = (x + D2ON28) - D2ON28; 312 x_lo = x - x_hi; 313 y_hi = (y + D2ON28) - D2ON28; 314 y_lo = y - y_hi; 315 res = (x_hi * x_hi + y_hi * y_hi); 316 res += ((x + x_hi) * x_lo + (y + y_hi) * y_lo); 317 318 res = sqrt(res); 319 320 res = D2ONM1022 * res; 321 *pz = res; 322 return; 323 } 324 325 HI(&scl) = (0x7fe00000 - j0); 326 327 x *= scl; 328 y *= scl; 329 330 x_hi = (x + D2ON28) - D2ON28; 331 y_hi = (y + D2ON28) - D2ON28; 332 x_lo = x - x_hi; 333 y_lo = y - y_hi; 334 335 res = (x_hi * x_hi + y_hi * y_hi); 336 res += ((x + x_hi) * x_lo + (y + y_hi) * y_lo); 337 338 res = sqrt(res); 339 340 HI(&scl) = j0; 341 342 res = scl * res; 343 *pz = res; 344 } 345 } 346 347 static void 348 __vhypot_n(int n, double * restrict px, int stridex, double * restrict py, 349 int stridey, double * restrict pz, int stridez) 350 { 351 int hx0, hy0, j0, diff0; 352 double x_hi0, x_lo0, y_hi0, y_lo0, scl0 = 0; 353 double x0, y0, res0; 354 double D2ON28 = ((double*)LCONST)[0]; /* 2 ** 28 */ 355 356 for(; n > 0 ; n--) 357 { 358 x0 = *px; 359 y0 = *py; 360 hx0 = HI(px); 361 hy0 = HI(py); 362 363 hx0 &= 0x7fffffff; 364 hy0 &= 0x7fffffff; 365 366 diff0 = hy0 - hx0; 367 j0 = diff0 >> 31; 368 j0 = hy0 - (diff0 & j0); 369 j0 &= 0x7ff00000; 370 371 px += stridex; 372 py += stridey; 373 374 HI(&scl0) = (0x7fe00000 - j0); 375 376 x0 *= scl0; 377 y0 *= scl0; 378 379 x_hi0 = (x0 + D2ON28) - D2ON28; 380 y_hi0 = (y0 + D2ON28) - D2ON28; 381 x_lo0 = x0 - x_hi0; 382 y_lo0 = y0 - y_hi0; 383 384 res0 = (x_hi0 * x_hi0 + y_hi0 * y_hi0); 385 res0 += ((x0 + x_hi0) * x_lo0 + (y0 + y_hi0) * y_lo0); 386 387 res0 = sqrt(res0); 388 389 HI(&scl0) = j0; 390 391 res0 = scl0 * res0; 392 *pz = res0; 393 394 pz += stridez; 395 } 396 } 397 398