1 /* 2 * ==================================================== 3 * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. 4 * 5 * Developed at SunPro, a Sun Microsystems, Inc. business. 6 * Permission to use, copy, modify, and distribute this 7 * software is freely granted, provided that this notice 8 * is preserved. 9 * ==================================================== 10 */ 11 12 /* 13 */ 14 15 #ifndef _MATH_PRIVATE_H_ 16 #define _MATH_PRIVATE_H_ 17 18 #include <sys/types.h> 19 #include <machine/endian.h> 20 21 /* 22 * The original fdlibm code used statements like: 23 * n0 = ((*(int*)&one)>>29)^1; * index of high word * 24 * ix0 = *(n0+(int*)&x); * high word of x * 25 * ix1 = *((1-n0)+(int*)&x); * low word of x * 26 * to dig two 32 bit words out of the 64 bit IEEE floating point 27 * value. That is non-ANSI, and, moreover, the gcc instruction 28 * scheduler gets it wrong. We instead use the following macros. 29 * Unlike the original code, we determine the endianness at compile 30 * time, not at run time; I don't see much benefit to selecting 31 * endianness at run time. 32 */ 33 34 /* 35 * A union which permits us to convert between a double and two 32 bit 36 * ints. 37 */ 38 39 #ifdef __arm__ 40 #if defined(__VFP_FP__) || defined(__ARM_EABI__) 41 #define IEEE_WORD_ORDER BYTE_ORDER 42 #else 43 #define IEEE_WORD_ORDER BIG_ENDIAN 44 #endif 45 #else /* __arm__ */ 46 #define IEEE_WORD_ORDER BYTE_ORDER 47 #endif 48 49 /* A union which permits us to convert between a long double and 50 four 32 bit ints. */ 51 52 #if IEEE_WORD_ORDER == BIG_ENDIAN 53 54 typedef union 55 { 56 long double value; 57 struct { 58 u_int32_t mswhi; 59 u_int32_t mswlo; 60 u_int32_t lswhi; 61 u_int32_t lswlo; 62 } parts32; 63 struct { 64 u_int64_t msw; 65 u_int64_t lsw; 66 } parts64; 67 } ieee_quad_shape_type; 68 69 #endif 70 71 #if IEEE_WORD_ORDER == LITTLE_ENDIAN 72 73 typedef union 74 { 75 long double value; 76 struct { 77 u_int32_t lswlo; 78 u_int32_t lswhi; 79 u_int32_t mswlo; 80 u_int32_t mswhi; 81 } parts32; 82 struct { 83 u_int64_t lsw; 84 u_int64_t msw; 85 } parts64; 86 } ieee_quad_shape_type; 87 88 #endif 89 90 #if IEEE_WORD_ORDER == BIG_ENDIAN 91 92 typedef union 93 { 94 double value; 95 struct 96 { 97 u_int32_t msw; 98 u_int32_t lsw; 99 } parts; 100 struct 101 { 102 u_int64_t w; 103 } xparts; 104 } ieee_double_shape_type; 105 106 #endif 107 108 #if IEEE_WORD_ORDER == LITTLE_ENDIAN 109 110 typedef union 111 { 112 double value; 113 struct 114 { 115 u_int32_t lsw; 116 u_int32_t msw; 117 } parts; 118 struct 119 { 120 u_int64_t w; 121 } xparts; 122 } ieee_double_shape_type; 123 124 #endif 125 126 /* Get two 32 bit ints from a double. */ 127 128 #define EXTRACT_WORDS(ix0,ix1,d) \ 129 do { \ 130 ieee_double_shape_type ew_u; \ 131 ew_u.value = (d); \ 132 (ix0) = ew_u.parts.msw; \ 133 (ix1) = ew_u.parts.lsw; \ 134 } while (0) 135 136 /* Get a 64-bit int from a double. */ 137 #define EXTRACT_WORD64(ix,d) \ 138 do { \ 139 ieee_double_shape_type ew_u; \ 140 ew_u.value = (d); \ 141 (ix) = ew_u.xparts.w; \ 142 } while (0) 143 144 /* Get the more significant 32 bit int from a double. */ 145 146 #define GET_HIGH_WORD(i,d) \ 147 do { \ 148 ieee_double_shape_type gh_u; \ 149 gh_u.value = (d); \ 150 (i) = gh_u.parts.msw; \ 151 } while (0) 152 153 /* Get the less significant 32 bit int from a double. */ 154 155 #define GET_LOW_WORD(i,d) \ 156 do { \ 157 ieee_double_shape_type gl_u; \ 158 gl_u.value = (d); \ 159 (i) = gl_u.parts.lsw; \ 160 } while (0) 161 162 /* Set a double from two 32 bit ints. */ 163 164 #define INSERT_WORDS(d,ix0,ix1) \ 165 do { \ 166 ieee_double_shape_type iw_u; \ 167 iw_u.parts.msw = (ix0); \ 168 iw_u.parts.lsw = (ix1); \ 169 (d) = iw_u.value; \ 170 } while (0) 171 172 /* Set a double from a 64-bit int. */ 173 #define INSERT_WORD64(d,ix) \ 174 do { \ 175 ieee_double_shape_type iw_u; \ 176 iw_u.xparts.w = (ix); \ 177 (d) = iw_u.value; \ 178 } while (0) 179 180 /* Set the more significant 32 bits of a double from an int. */ 181 182 #define SET_HIGH_WORD(d,v) \ 183 do { \ 184 ieee_double_shape_type sh_u; \ 185 sh_u.value = (d); \ 186 sh_u.parts.msw = (v); \ 187 (d) = sh_u.value; \ 188 } while (0) 189 190 /* Set the less significant 32 bits of a double from an int. */ 191 192 #define SET_LOW_WORD(d,v) \ 193 do { \ 194 ieee_double_shape_type sl_u; \ 195 sl_u.value = (d); \ 196 sl_u.parts.lsw = (v); \ 197 (d) = sl_u.value; \ 198 } while (0) 199 200 /* 201 * A union which permits us to convert between a float and a 32 bit 202 * int. 203 */ 204 205 typedef union 206 { 207 float value; 208 /* FIXME: Assumes 32 bit int. */ 209 unsigned int word; 210 } ieee_float_shape_type; 211 212 /* Get a 32 bit int from a float. */ 213 214 #define GET_FLOAT_WORD(i,d) \ 215 do { \ 216 ieee_float_shape_type gf_u; \ 217 gf_u.value = (d); \ 218 (i) = gf_u.word; \ 219 } while (0) 220 221 /* Set a float from a 32 bit int. */ 222 223 #define SET_FLOAT_WORD(d,i) \ 224 do { \ 225 ieee_float_shape_type sf_u; \ 226 sf_u.word = (i); \ 227 (d) = sf_u.value; \ 228 } while (0) 229 230 /* 231 * Get expsign and mantissa as 16 bit and 64 bit ints from an 80 bit long 232 * double. 233 */ 234 235 #define EXTRACT_LDBL80_WORDS(ix0,ix1,d) \ 236 do { \ 237 union IEEEl2bits ew_u; \ 238 ew_u.e = (d); \ 239 (ix0) = ew_u.xbits.expsign; \ 240 (ix1) = ew_u.xbits.man; \ 241 } while (0) 242 243 /* 244 * Get expsign and mantissa as one 16 bit and two 64 bit ints from a 128 bit 245 * long double. 246 */ 247 248 #define EXTRACT_LDBL128_WORDS(ix0,ix1,ix2,d) \ 249 do { \ 250 union IEEEl2bits ew_u; \ 251 ew_u.e = (d); \ 252 (ix0) = ew_u.xbits.expsign; \ 253 (ix1) = ew_u.xbits.manh; \ 254 (ix2) = ew_u.xbits.manl; \ 255 } while (0) 256 257 /* Get expsign as a 16 bit int from a long double. */ 258 259 #define GET_LDBL_EXPSIGN(i,d) \ 260 do { \ 261 union IEEEl2bits ge_u; \ 262 ge_u.e = (d); \ 263 (i) = ge_u.xbits.expsign; \ 264 } while (0) 265 266 /* 267 * Set an 80 bit long double from a 16 bit int expsign and a 64 bit int 268 * mantissa. 269 */ 270 271 #define INSERT_LDBL80_WORDS(d,ix0,ix1) \ 272 do { \ 273 union IEEEl2bits iw_u; \ 274 iw_u.xbits.expsign = (ix0); \ 275 iw_u.xbits.man = (ix1); \ 276 (d) = iw_u.e; \ 277 } while (0) 278 279 /* 280 * Set a 128 bit long double from a 16 bit int expsign and two 64 bit ints 281 * comprising the mantissa. 282 */ 283 284 #define INSERT_LDBL128_WORDS(d,ix0,ix1,ix2) \ 285 do { \ 286 union IEEEl2bits iw_u; \ 287 iw_u.xbits.expsign = (ix0); \ 288 iw_u.xbits.manh = (ix1); \ 289 iw_u.xbits.manl = (ix2); \ 290 (d) = iw_u.e; \ 291 } while (0) 292 293 /* Set expsign of a long double from a 16 bit int. */ 294 295 #define SET_LDBL_EXPSIGN(d,v) \ 296 do { \ 297 union IEEEl2bits se_u; \ 298 se_u.e = (d); \ 299 se_u.xbits.expsign = (v); \ 300 (d) = se_u.e; \ 301 } while (0) 302 303 #ifdef __i386__ 304 /* Long double constants are broken on i386. */ 305 #define LD80C(m, ex, v) { \ 306 .xbits.man = __CONCAT(m, ULL), \ 307 .xbits.expsign = (0x3fff + (ex)) | ((v) < 0 ? 0x8000 : 0), \ 308 } 309 #else 310 /* The above works on non-i386 too, but we use this to check v. */ 311 #define LD80C(m, ex, v) { .e = (v), } 312 #endif 313 314 #ifdef FLT_EVAL_METHOD 315 /* 316 * Attempt to get strict C99 semantics for assignment with non-C99 compilers. 317 */ 318 #if FLT_EVAL_METHOD == 0 || __GNUC__ == 0 319 #define STRICT_ASSIGN(type, lval, rval) ((lval) = (rval)) 320 #else 321 #define STRICT_ASSIGN(type, lval, rval) do { \ 322 volatile type __lval; \ 323 \ 324 if (sizeof(type) >= sizeof(long double)) \ 325 (lval) = (rval); \ 326 else { \ 327 __lval = (rval); \ 328 (lval) = __lval; \ 329 } \ 330 } while (0) 331 #endif 332 #endif /* FLT_EVAL_METHOD */ 333 334 /* Support switching the mode to FP_PE if necessary. */ 335 #if defined(__i386__) && !defined(NO_FPSETPREC) 336 #define ENTERI() ENTERIT(long double) 337 #define ENTERIT(returntype) \ 338 returntype __retval; \ 339 fp_prec_t __oprec; \ 340 \ 341 if ((__oprec = fpgetprec()) != FP_PE) \ 342 fpsetprec(FP_PE) 343 #define RETURNI(x) do { \ 344 __retval = (x); \ 345 if (__oprec != FP_PE) \ 346 fpsetprec(__oprec); \ 347 RETURNF(__retval); \ 348 } while (0) 349 #define ENTERV() \ 350 fp_prec_t __oprec; \ 351 \ 352 if ((__oprec = fpgetprec()) != FP_PE) \ 353 fpsetprec(FP_PE) 354 #define RETURNV() do { \ 355 if (__oprec != FP_PE) \ 356 fpsetprec(__oprec); \ 357 return; \ 358 } while (0) 359 #else 360 #define ENTERI() 361 #define ENTERIT(x) 362 #define RETURNI(x) RETURNF(x) 363 #define ENTERV() 364 #define RETURNV() return 365 #endif 366 367 /* Default return statement if hack*_t() is not used. */ 368 #define RETURNF(v) return (v) 369 370 /* 371 * 2sum gives the same result as 2sumF without requiring |a| >= |b| or 372 * a == 0, but is slower. 373 */ 374 #define _2sum(a, b) do { \ 375 __typeof(a) __s, __w; \ 376 \ 377 __w = (a) + (b); \ 378 __s = __w - (a); \ 379 (b) = ((a) - (__w - __s)) + ((b) - __s); \ 380 (a) = __w; \ 381 } while (0) 382 383 /* 384 * 2sumF algorithm. 385 * 386 * "Normalize" the terms in the infinite-precision expression a + b for 387 * the sum of 2 floating point values so that b is as small as possible 388 * relative to 'a'. (The resulting 'a' is the value of the expression in 389 * the same precision as 'a' and the resulting b is the rounding error.) 390 * |a| must be >= |b| or 0, b's type must be no larger than 'a's type, and 391 * exponent overflow or underflow must not occur. This uses a Theorem of 392 * Dekker (1971). See Knuth (1981) 4.2.2 Theorem C. The name "TwoSum" 393 * is apparently due to Skewchuk (1997). 394 * 395 * For this to always work, assignment of a + b to 'a' must not retain any 396 * extra precision in a + b. This is required by C standards but broken 397 * in many compilers. The brokenness cannot be worked around using 398 * STRICT_ASSIGN() like we do elsewhere, since the efficiency of this 399 * algorithm would be destroyed by non-null strict assignments. (The 400 * compilers are correct to be broken -- the efficiency of all floating 401 * point code calculations would be destroyed similarly if they forced the 402 * conversions.) 403 * 404 * Fortunately, a case that works well can usually be arranged by building 405 * any extra precision into the type of 'a' -- 'a' should have type float_t, 406 * double_t or long double. b's type should be no larger than 'a's type. 407 * Callers should use these types with scopes as large as possible, to 408 * reduce their own extra-precision and efficiency problems. In 409 * particular, they shouldn't convert back and forth just to call here. 410 */ 411 #ifdef DEBUG 412 #define _2sumF(a, b) do { \ 413 __typeof(a) __w; \ 414 volatile __typeof(a) __ia, __ib, __r, __vw; \ 415 \ 416 __ia = (a); \ 417 __ib = (b); \ 418 assert(__ia == 0 || fabsl(__ia) >= fabsl(__ib)); \ 419 \ 420 __w = (a) + (b); \ 421 (b) = ((a) - __w) + (b); \ 422 (a) = __w; \ 423 \ 424 /* The next 2 assertions are weak if (a) is already long double. */ \ 425 assert((long double)__ia + __ib == (long double)(a) + (b)); \ 426 __vw = __ia + __ib; \ 427 __r = __ia - __vw; \ 428 __r += __ib; \ 429 assert(__vw == (a) && __r == (b)); \ 430 } while (0) 431 #else /* !DEBUG */ 432 #define _2sumF(a, b) do { \ 433 __typeof(a) __w; \ 434 \ 435 __w = (a) + (b); \ 436 (b) = ((a) - __w) + (b); \ 437 (a) = __w; \ 438 } while (0) 439 #endif /* DEBUG */ 440 441 /* 442 * Set x += c, where x is represented in extra precision as a + b. 443 * x must be sufficiently normalized and sufficiently larger than c, 444 * and the result is then sufficiently normalized. 445 * 446 * The details of ordering are that |a| must be >= |c| (so that (a, c) 447 * can be normalized without extra work to swap 'a' with c). The details of 448 * the normalization are that b must be small relative to the normalized 'a'. 449 * Normalization of (a, c) makes the normalized c tiny relative to the 450 * normalized a, so b remains small relative to 'a' in the result. However, 451 * b need not ever be tiny relative to 'a'. For example, b might be about 452 * 2**20 times smaller than 'a' to give about 20 extra bits of precision. 453 * That is usually enough, and adding c (which by normalization is about 454 * 2**53 times smaller than a) cannot change b significantly. However, 455 * cancellation of 'a' with c in normalization of (a, c) may reduce 'a' 456 * significantly relative to b. The caller must ensure that significant 457 * cancellation doesn't occur, either by having c of the same sign as 'a', 458 * or by having |c| a few percent smaller than |a|. Pre-normalization of 459 * (a, b) may help. 460 * 461 * This is a variant of an algorithm of Kahan (see Knuth (1981) 4.2.2 462 * exercise 19). We gain considerable efficiency by requiring the terms to 463 * be sufficiently normalized and sufficiently increasing. 464 */ 465 #define _3sumF(a, b, c) do { \ 466 __typeof(a) __tmp; \ 467 \ 468 __tmp = (c); \ 469 _2sumF(__tmp, (a)); \ 470 (b) += (a); \ 471 (a) = __tmp; \ 472 } while (0) 473 474 /* 475 * Split x into high and low bits where CC is 0x1p(N/2) + 1 where 476 * N is rounded up for types with odd precisions. 477 * 478 * #define _CC (0x1p12F + 1) // float 479 * #define _CC (0x1p27 + 1) // double 480 * #define _CC (0x1p32L + 1) // long double (LD80) 481 * #define _CC (0x1p57L + 1) // long double (LD128) 482 */ 483 #define _SPLIT(x, xh, xl) \ 484 do { \ 485 typeof(x) __t1; \ 486 __t1 = (x) * _CC; \ 487 xh = __t1 + ((x) - __t1); \ 488 xl = (x) - xh; \ 489 } while(0) 490 491 /* 492 * FAST2SUM requires |x| >= |y|. x and y are full precision. 493 * Note, _2SUMF(x,y) above destroys x and y. 494 */ 495 #define _FAST2SUM(x, y, hi, lo) \ 496 do { \ 497 hi = (x) + (y); \ 498 lo = (y) - (hi - (x)); \ 499 } while(0) 500 501 /* 502 * SLOW2SUM does not require |x| >= |y|. Here, x and y are full precision. 503 * The t1 temporary variable is volatile to prevent compiler optimizations. 504 * Note, _2SUM(x,y) above destroys x and y. 505 */ 506 #define _SLOW2SUM(x, y, hi, lo) \ 507 do { \ 508 volatile typeof(x) __t1; \ 509 typeof(x) __t2; \ 510 hi = (x) + (y); \ 511 __t1 = hi - (y); \ 512 __t2 = hi - __t1; \ 513 lo = ((x) - __t1) + ((y) - __t2); \ 514 } while(0) 515 516 /* 517 * x and y are full precision quantities that have been split into high 518 * and low parts via the _SPLIT macro. x and y are added to give z as 519 * high and low parts. 520 */ 521 #define _XADD(xh, xl, yh, yl, zh, zl) \ 522 do { \ 523 typeof(xh) __s1, __s2, __s3, __s4, __s5, __s6; \ 524 _SLOW2SUM(xh, yh, __s1, __s2); \ 525 _SLOW2SUM(xl, yl, __s3, __s4); \ 526 _FAST2SUM(__s1, __s2 + __s3, __s5, __s6); \ 527 _FAST2SUM(__s5, __s6 + __s4, zh, zl); \ 528 } while(0) 529 530 /* 531 * x and y are full precision quantities. r1 and r2 are full precision 532 * high and low parts of the multiplication x * y. 533 */ 534 #define _MUL(x, y, r1 ,r2) \ 535 do { \ 536 typeof(x) __xh, __xl, __yh, __yl; \ 537 typeof(x) __t1; \ 538 _SPLIT(x, __xh, __xl); \ 539 _SPLIT(y, __yh, __yl); \ 540 r1 = (x) * (y); \ 541 __t1 = __xh * __yl + (__xh * __yh - r1); \ 542 r2 = __xl * __yl + (__xl * __yh + __t1); \ 543 } while(0) 544 545 /* 546 * x and y are full precision quantities that have been split into high 547 * and low parts via the _SPLIT macro. x and y are multiplied to give z 548 * as high and low parts. 549 */ 550 #define _XMUL(xh, xl, yh, yl, ph, pl) \ 551 do { \ 552 _MUL(xh, yh, ph, pl); \ 553 pl += xl * yl + xl * yh + xh * yl; \ 554 } while(0) 555 556 557 /* 558 * Common routine to process the arguments to nan(), nanf(), and nanl(). 559 */ 560 void _scan_nan(uint32_t *__words, int __num_words, const char *__s); 561 562 /* 563 * Mix 0, 1 or 2 NaNs. First add 0 to each arg. This normally just turns 564 * signaling NaNs into quiet NaNs by setting a quiet bit. We do this 565 * because we want to never return a signaling NaN, and also because we 566 * don't want the quiet bit to affect the result. Then mix the converted 567 * args using the specified operation. 568 * 569 * When one arg is NaN, the result is typically that arg quieted. When both 570 * args are NaNs, the result is typically the quietening of the arg whose 571 * mantissa is largest after quietening. When neither arg is NaN, the 572 * result may be NaN because it is indeterminate, or finite for subsequent 573 * construction of a NaN as the indeterminate 0.0L/0.0L. 574 * 575 * Technical complications: the result in bits after rounding to the final 576 * precision might depend on the runtime precision and/or on compiler 577 * optimizations, especially when different register sets are used for 578 * different precisions. Try to make the result not depend on at least the 579 * runtime precision by always doing the main mixing step in long double 580 * precision. Try to reduce dependencies on optimizations by adding the 581 * the 0's in different precisions (unless everything is in long double 582 * precision). 583 */ 584 #define nan_mix(x, y) (nan_mix_op((x), (y), +)) 585 #define nan_mix_op(x, y, op) (((x) + 0.0L) op ((y) + 0)) 586 587 #ifdef _COMPLEX_H 588 589 /* 590 * C99 specifies that complex numbers have the same representation as 591 * an array of two elements, where the first element is the real part 592 * and the second element is the imaginary part. 593 */ 594 typedef union { 595 float complex f; 596 float a[2]; 597 } float_complex; 598 typedef union { 599 double complex f; 600 double a[2]; 601 } double_complex; 602 typedef union { 603 long double complex f; 604 long double a[2]; 605 } long_double_complex; 606 #define REALPART(z) ((z).a[0]) 607 #define IMAGPART(z) ((z).a[1]) 608 609 /* 610 * Inline functions that can be used to construct complex values. 611 * 612 * The C99 standard intends x+I*y to be used for this, but x+I*y is 613 * currently unusable in general since gcc introduces many overflow, 614 * underflow, sign and efficiency bugs by rewriting I*y as 615 * (0.0+I)*(y+0.0*I) and laboriously computing the full complex product. 616 * In particular, I*Inf is corrupted to NaN+I*Inf, and I*-0 is corrupted 617 * to -0.0+I*0.0. 618 * 619 * The C11 standard introduced the macros CMPLX(), CMPLXF() and CMPLXL() 620 * to construct complex values. Compilers that conform to the C99 621 * standard require the following functions to avoid the above issues. 622 */ 623 624 #ifndef CMPLXF 625 static __inline float complex 626 CMPLXF(float x, float y) 627 { 628 float_complex z; 629 630 REALPART(z) = x; 631 IMAGPART(z) = y; 632 return (z.f); 633 } 634 #endif 635 636 #ifndef CMPLX 637 static __inline double complex 638 CMPLX(double x, double y) 639 { 640 double_complex z; 641 642 REALPART(z) = x; 643 IMAGPART(z) = y; 644 return (z.f); 645 } 646 #endif 647 648 #ifndef CMPLXL 649 static __inline long double complex 650 CMPLXL(long double x, long double y) 651 { 652 long_double_complex z; 653 654 REALPART(z) = x; 655 IMAGPART(z) = y; 656 return (z.f); 657 } 658 #endif 659 660 #endif /* _COMPLEX_H */ 661 662 /* 663 * The rnint() family rounds to the nearest integer for a restricted range 664 * range of args (up to about 2**MANT_DIG). We assume that the current 665 * rounding mode is FE_TONEAREST so that this can be done efficiently. 666 * Extra precision causes more problems in practice, and we only centralize 667 * this here to reduce those problems, and have not solved the efficiency 668 * problems. The exp2() family uses a more delicate version of this that 669 * requires extracting bits from the intermediate value, so it is not 670 * centralized here and should copy any solution of the efficiency problems. 671 */ 672 673 static inline double 674 rnint(__double_t x) 675 { 676 /* 677 * This casts to double to kill any extra precision. This depends 678 * on the cast being applied to a double_t to avoid compiler bugs 679 * (this is a cleaner version of STRICT_ASSIGN()). This is 680 * inefficient if there actually is extra precision, but is hard 681 * to improve on. We use double_t in the API to minimise conversions 682 * for just calling here. Note that we cannot easily change the 683 * magic number to the one that works directly with double_t, since 684 * the rounding precision is variable at runtime on x86 so the 685 * magic number would need to be variable. Assuming that the 686 * rounding precision is always the default is too fragile. This 687 * and many other complications will move when the default is 688 * changed to FP_PE. 689 */ 690 return ((double)(x + 0x1.8p52) - 0x1.8p52); 691 } 692 693 static inline float 694 rnintf(__float_t x) 695 { 696 /* 697 * As for rnint(), except we could just call that to handle the 698 * extra precision case, usually without losing efficiency. 699 */ 700 return ((float)(x + 0x1.8p23F) - 0x1.8p23F); 701 } 702 703 #ifdef LDBL_MANT_DIG 704 /* 705 * The complications for extra precision are smaller for rnintl() since it 706 * can safely assume that the rounding precision has been increased from 707 * its default to FP_PE on x86. We don't exploit that here to get small 708 * optimizations from limiting the range to double. We just need it for 709 * the magic number to work with long doubles. ld128 callers should use 710 * rnint() instead of this if possible. ld80 callers should prefer 711 * rnintl() since for amd64 this avoids swapping the register set, while 712 * for i386 it makes no difference (assuming FP_PE), and for other arches 713 * it makes little difference. 714 */ 715 static inline long double 716 rnintl(long double x) 717 { 718 return (x + __CONCAT(0x1.8p, LDBL_MANT_DIG) / 2 - 719 __CONCAT(0x1.8p, LDBL_MANT_DIG) / 2); 720 } 721 #endif /* LDBL_MANT_DIG */ 722 723 /* 724 * irint() and i64rint() give the same result as casting to their integer 725 * return type provided their arg is a floating point integer. They can 726 * sometimes be more efficient because no rounding is required. 727 */ 728 #if defined(amd64) || defined(__i386__) 729 #define irint(x) \ 730 (sizeof(x) == sizeof(float) && \ 731 sizeof(__float_t) == sizeof(long double) ? irintf(x) : \ 732 sizeof(x) == sizeof(double) && \ 733 sizeof(__double_t) == sizeof(long double) ? irintd(x) : \ 734 sizeof(x) == sizeof(long double) ? irintl(x) : (int)(x)) 735 #else 736 #define irint(x) ((int)(x)) 737 #endif 738 739 #define i64rint(x) ((int64_t)(x)) /* only needed for ld128 so not opt. */ 740 741 #if defined(__i386__) 742 static __inline int 743 irintf(float x) 744 { 745 int n; 746 747 __asm("fistl %0" : "=m" (n) : "t" (x)); 748 return (n); 749 } 750 751 static __inline int 752 irintd(double x) 753 { 754 int n; 755 756 __asm("fistl %0" : "=m" (n) : "t" (x)); 757 return (n); 758 } 759 #endif 760 761 #if defined(__amd64__) || defined(__i386__) 762 static __inline int 763 irintl(long double x) 764 { 765 int n; 766 767 __asm("fistl %0" : "=m" (n) : "t" (x)); 768 return (n); 769 } 770 #endif 771 772 /* 773 * The following are fast floor macros for 0 <= |x| < 0x1p(N-1), where 774 * N is the precision of the type of x. These macros are used in the 775 * half-cycle trignometric functions (e.g., sinpi(x)). 776 */ 777 #define FFLOORF(x, j0, ix) do { \ 778 (j0) = (((ix) >> 23) & 0xff) - 0x7f; \ 779 (ix) &= ~(0x007fffff >> (j0)); \ 780 SET_FLOAT_WORD((x), (ix)); \ 781 } while (0) 782 783 #define FFLOOR(x, j0, ix, lx) do { \ 784 (j0) = (((ix) >> 20) & 0x7ff) - 0x3ff; \ 785 if ((j0) < 20) { \ 786 (ix) &= ~(0x000fffff >> (j0)); \ 787 (lx) = 0; \ 788 } else { \ 789 (lx) &= ~((uint32_t)0xffffffff >> ((j0) - 20)); \ 790 } \ 791 INSERT_WORDS((x), (ix), (lx)); \ 792 } while (0) 793 794 #define FFLOORL80(x, j0, ix, lx) do { \ 795 j0 = ix - 0x3fff + 1; \ 796 if ((j0) < 32) { \ 797 (lx) = ((lx) >> 32) << 32; \ 798 (lx) &= ~((((lx) << 32)-1) >> (j0)); \ 799 } else { \ 800 uint64_t _m; \ 801 _m = (uint64_t)-1 >> (j0); \ 802 if ((lx) & _m) (lx) &= ~_m; \ 803 } \ 804 INSERT_LDBL80_WORDS((x), (ix), (lx)); \ 805 } while (0) 806 807 #define FFLOORL128(x, ai, ar) do { \ 808 union IEEEl2bits u; \ 809 uint64_t m; \ 810 int e; \ 811 u.e = (x); \ 812 e = u.bits.exp - 16383; \ 813 if (e < 48) { \ 814 m = ((1llu << 49) - 1) >> (e + 1); \ 815 u.bits.manh &= ~m; \ 816 u.bits.manl = 0; \ 817 } else { \ 818 m = (uint64_t)-1 >> (e - 48); \ 819 u.bits.manl &= ~m; \ 820 } \ 821 (ai) = u.e; \ 822 (ar) = (x) - (ai); \ 823 } while (0) 824 825 /* 826 * For a subnormal double entity split into high and low parts, compute ilogb. 827 */ 828 static inline int32_t 829 subnormal_ilogb(int32_t hi, int32_t lo) 830 { 831 int32_t j; 832 uint32_t i; 833 834 j = -1022; 835 if (hi == 0) { 836 j -= 21; 837 i = (uint32_t)lo; 838 } else 839 i = (uint32_t)hi << 11; 840 841 for (; i < 0x7fffffff; i <<= 1) j -= 1; 842 843 return (j); 844 } 845 846 /* 847 * For a subnormal float entity represented as an int32_t, compute ilogb. 848 */ 849 static inline int32_t 850 subnormal_ilogbf(int32_t hx) 851 { 852 int32_t j; 853 uint32_t i; 854 i = (uint32_t) hx << 8; 855 for (j = -126; i < 0x7fffffff; i <<= 1) j -=1; 856 857 return (j); 858 } 859 860 #ifdef DEBUG 861 #if defined(__amd64__) || defined(__i386__) 862 #define breakpoint() asm("int $3") 863 #else 864 #include <signal.h> 865 866 #define breakpoint() raise(SIGTRAP) 867 #endif 868 #endif 869 870 #ifdef STRUCT_RETURN 871 #define RETURNSP(rp) do { \ 872 if (!(rp)->lo_set) \ 873 RETURNF((rp)->hi); \ 874 RETURNF((rp)->hi + (rp)->lo); \ 875 } while (0) 876 #define RETURNSPI(rp) do { \ 877 if (!(rp)->lo_set) \ 878 RETURNI((rp)->hi); \ 879 RETURNI((rp)->hi + (rp)->lo); \ 880 } while (0) 881 #endif 882 883 #define SUM2P(x, y) ({ \ 884 const __typeof (x) __x = (x); \ 885 const __typeof (y) __y = (y); \ 886 __x + __y; \ 887 }) 888 889 /* fdlibm kernel function */ 890 int __kernel_rem_pio2(double*,double*,int,int,int); 891 892 /* double precision kernel functions */ 893 #ifndef INLINE_REM_PIO2 894 int __ieee754_rem_pio2(double,double*); 895 #endif 896 double __kernel_sin(double,double,int); 897 double __kernel_cos(double,double); 898 double __kernel_tan(double,double,int); 899 double __ldexp_exp(double,int); 900 #ifdef _COMPLEX_H 901 double complex __ldexp_cexp(double complex,int); 902 #endif 903 904 /* float precision kernel functions */ 905 #ifndef INLINE_REM_PIO2F 906 int __ieee754_rem_pio2f(float,double*); 907 #endif 908 #ifndef INLINE_KERNEL_SINDF 909 float __kernel_sindf(double); 910 #endif 911 #ifndef INLINE_KERNEL_COSDF 912 float __kernel_cosdf(double); 913 #endif 914 #ifndef INLINE_KERNEL_TANDF 915 float __kernel_tandf(double,int); 916 #endif 917 float __ldexp_expf(float,int); 918 #ifdef _COMPLEX_H 919 float complex __ldexp_cexpf(float complex,int); 920 #endif 921 922 /* long double precision kernel functions */ 923 long double __kernel_sinl(long double, long double, int); 924 long double __kernel_cosl(long double, long double); 925 long double __kernel_tanl(long double, long double, int); 926 927 #endif /* !_MATH_PRIVATE_H_ */ 928