1 /* 2 * ==================================================== 3 * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. 4 * 5 * Developed at SunPro, a Sun Microsystems, Inc. business. 6 * Permission to use, copy, modify, and distribute this 7 * software is freely granted, provided that this notice 8 * is preserved. 9 * ==================================================== 10 */ 11 12 /* 13 */ 14 15 #ifndef _MATH_PRIVATE_H_ 16 #define _MATH_PRIVATE_H_ 17 18 #include <sys/types.h> 19 #include <machine/endian.h> 20 21 /* 22 * The original fdlibm code used statements like: 23 * n0 = ((*(int*)&one)>>29)^1; * index of high word * 24 * ix0 = *(n0+(int*)&x); * high word of x * 25 * ix1 = *((1-n0)+(int*)&x); * low word of x * 26 * to dig two 32 bit words out of the 64 bit IEEE floating point 27 * value. That is non-ANSI, and, moreover, the gcc instruction 28 * scheduler gets it wrong. We instead use the following macros. 29 * Unlike the original code, we determine the endianness at compile 30 * time, not at run time; I don't see much benefit to selecting 31 * endianness at run time. 32 */ 33 34 /* 35 * A union which permits us to convert between a double and two 32 bit 36 * ints. 37 */ 38 39 #ifdef __arm__ 40 #if defined(__VFP_FP__) || defined(__ARM_EABI__) 41 #define IEEE_WORD_ORDER BYTE_ORDER 42 #else 43 #define IEEE_WORD_ORDER BIG_ENDIAN 44 #endif 45 #else /* __arm__ */ 46 #define IEEE_WORD_ORDER BYTE_ORDER 47 #endif 48 49 /* A union which permits us to convert between a long double and 50 four 32 bit ints. */ 51 52 #if IEEE_WORD_ORDER == BIG_ENDIAN 53 54 typedef union 55 { 56 long double value; 57 struct { 58 u_int32_t mswhi; 59 u_int32_t mswlo; 60 u_int32_t lswhi; 61 u_int32_t lswlo; 62 } parts32; 63 struct { 64 u_int64_t msw; 65 u_int64_t lsw; 66 } parts64; 67 } ieee_quad_shape_type; 68 69 #endif 70 71 #if IEEE_WORD_ORDER == LITTLE_ENDIAN 72 73 typedef union 74 { 75 long double value; 76 struct { 77 u_int32_t lswlo; 78 u_int32_t lswhi; 79 u_int32_t mswlo; 80 u_int32_t mswhi; 81 } parts32; 82 struct { 83 u_int64_t lsw; 84 u_int64_t msw; 85 } parts64; 86 } ieee_quad_shape_type; 87 88 #endif 89 90 #if IEEE_WORD_ORDER == BIG_ENDIAN 91 92 typedef union 93 { 94 double value; 95 struct 96 { 97 u_int32_t msw; 98 u_int32_t lsw; 99 } parts; 100 struct 101 { 102 u_int64_t w; 103 } xparts; 104 } ieee_double_shape_type; 105 106 #endif 107 108 #if IEEE_WORD_ORDER == LITTLE_ENDIAN 109 110 typedef union 111 { 112 double value; 113 struct 114 { 115 u_int32_t lsw; 116 u_int32_t msw; 117 } parts; 118 struct 119 { 120 u_int64_t w; 121 } xparts; 122 } ieee_double_shape_type; 123 124 #endif 125 126 /* Get two 32 bit ints from a double. */ 127 128 #define EXTRACT_WORDS(ix0,ix1,d) \ 129 do { \ 130 ieee_double_shape_type ew_u; \ 131 ew_u.value = (d); \ 132 (ix0) = ew_u.parts.msw; \ 133 (ix1) = ew_u.parts.lsw; \ 134 } while (0) 135 136 /* Get a 64-bit int from a double. */ 137 #define EXTRACT_WORD64(ix,d) \ 138 do { \ 139 ieee_double_shape_type ew_u; \ 140 ew_u.value = (d); \ 141 (ix) = ew_u.xparts.w; \ 142 } while (0) 143 144 /* Get the more significant 32 bit int from a double. */ 145 146 #define GET_HIGH_WORD(i,d) \ 147 do { \ 148 ieee_double_shape_type gh_u; \ 149 gh_u.value = (d); \ 150 (i) = gh_u.parts.msw; \ 151 } while (0) 152 153 /* Get the less significant 32 bit int from a double. */ 154 155 #define GET_LOW_WORD(i,d) \ 156 do { \ 157 ieee_double_shape_type gl_u; \ 158 gl_u.value = (d); \ 159 (i) = gl_u.parts.lsw; \ 160 } while (0) 161 162 /* Set a double from two 32 bit ints. */ 163 164 #define INSERT_WORDS(d,ix0,ix1) \ 165 do { \ 166 ieee_double_shape_type iw_u; \ 167 iw_u.parts.msw = (ix0); \ 168 iw_u.parts.lsw = (ix1); \ 169 (d) = iw_u.value; \ 170 } while (0) 171 172 /* Set a double from a 64-bit int. */ 173 #define INSERT_WORD64(d,ix) \ 174 do { \ 175 ieee_double_shape_type iw_u; \ 176 iw_u.xparts.w = (ix); \ 177 (d) = iw_u.value; \ 178 } while (0) 179 180 /* Set the more significant 32 bits of a double from an int. */ 181 182 #define SET_HIGH_WORD(d,v) \ 183 do { \ 184 ieee_double_shape_type sh_u; \ 185 sh_u.value = (d); \ 186 sh_u.parts.msw = (v); \ 187 (d) = sh_u.value; \ 188 } while (0) 189 190 /* Set the less significant 32 bits of a double from an int. */ 191 192 #define SET_LOW_WORD(d,v) \ 193 do { \ 194 ieee_double_shape_type sl_u; \ 195 sl_u.value = (d); \ 196 sl_u.parts.lsw = (v); \ 197 (d) = sl_u.value; \ 198 } while (0) 199 200 /* 201 * A union which permits us to convert between a float and a 32 bit 202 * int. 203 */ 204 205 typedef union 206 { 207 float value; 208 /* FIXME: Assumes 32 bit int. */ 209 unsigned int word; 210 } ieee_float_shape_type; 211 212 /* Get a 32 bit int from a float. */ 213 214 #define GET_FLOAT_WORD(i,d) \ 215 do { \ 216 ieee_float_shape_type gf_u; \ 217 gf_u.value = (d); \ 218 (i) = gf_u.word; \ 219 } while (0) 220 221 /* Set a float from a 32 bit int. */ 222 223 #define SET_FLOAT_WORD(d,i) \ 224 do { \ 225 ieee_float_shape_type sf_u; \ 226 sf_u.word = (i); \ 227 (d) = sf_u.value; \ 228 } while (0) 229 230 /* 231 * Get expsign and mantissa as 16 bit and 64 bit ints from an 80 bit long 232 * double. 233 */ 234 235 #define EXTRACT_LDBL80_WORDS(ix0,ix1,d) \ 236 do { \ 237 union IEEEl2bits ew_u; \ 238 ew_u.e = (d); \ 239 (ix0) = ew_u.xbits.expsign; \ 240 (ix1) = ew_u.xbits.man; \ 241 } while (0) 242 243 /* 244 * Get expsign and mantissa as one 16 bit and two 64 bit ints from a 128 bit 245 * long double. 246 */ 247 248 #define EXTRACT_LDBL128_WORDS(ix0,ix1,ix2,d) \ 249 do { \ 250 union IEEEl2bits ew_u; \ 251 ew_u.e = (d); \ 252 (ix0) = ew_u.xbits.expsign; \ 253 (ix1) = ew_u.xbits.manh; \ 254 (ix2) = ew_u.xbits.manl; \ 255 } while (0) 256 257 /* Get expsign as a 16 bit int from a long double. */ 258 259 #define GET_LDBL_EXPSIGN(i,d) \ 260 do { \ 261 union IEEEl2bits ge_u; \ 262 ge_u.e = (d); \ 263 (i) = ge_u.xbits.expsign; \ 264 } while (0) 265 266 /* 267 * Set an 80 bit long double from a 16 bit int expsign and a 64 bit int 268 * mantissa. 269 */ 270 271 #define INSERT_LDBL80_WORDS(d,ix0,ix1) \ 272 do { \ 273 union IEEEl2bits iw_u; \ 274 iw_u.xbits.expsign = (ix0); \ 275 iw_u.xbits.man = (ix1); \ 276 (d) = iw_u.e; \ 277 } while (0) 278 279 /* 280 * Set a 128 bit long double from a 16 bit int expsign and two 64 bit ints 281 * comprising the mantissa. 282 */ 283 284 #define INSERT_LDBL128_WORDS(d,ix0,ix1,ix2) \ 285 do { \ 286 union IEEEl2bits iw_u; \ 287 iw_u.xbits.expsign = (ix0); \ 288 iw_u.xbits.manh = (ix1); \ 289 iw_u.xbits.manl = (ix2); \ 290 (d) = iw_u.e; \ 291 } while (0) 292 293 /* Set expsign of a long double from a 16 bit int. */ 294 295 #define SET_LDBL_EXPSIGN(d,v) \ 296 do { \ 297 union IEEEl2bits se_u; \ 298 se_u.e = (d); \ 299 se_u.xbits.expsign = (v); \ 300 (d) = se_u.e; \ 301 } while (0) 302 303 #ifdef __i386__ 304 /* Long double constants are broken on i386. */ 305 #define LD80C(m, ex, v) { \ 306 .xbits.man = __CONCAT(m, ULL), \ 307 .xbits.expsign = (0x3fff + (ex)) | ((v) < 0 ? 0x8000 : 0), \ 308 } 309 #else 310 /* The above works on non-i386 too, but we use this to check v. */ 311 #define LD80C(m, ex, v) { .e = (v), } 312 #endif 313 314 #ifdef FLT_EVAL_METHOD 315 /* 316 * Attempt to get strict C99 semantics for assignment with non-C99 compilers. 317 */ 318 #if FLT_EVAL_METHOD == 0 || __GNUC__ == 0 319 #define STRICT_ASSIGN(type, lval, rval) ((lval) = (rval)) 320 #else 321 #define STRICT_ASSIGN(type, lval, rval) do { \ 322 volatile type __lval; \ 323 \ 324 if (sizeof(type) >= sizeof(long double)) \ 325 (lval) = (rval); \ 326 else { \ 327 __lval = (rval); \ 328 (lval) = __lval; \ 329 } \ 330 } while (0) 331 #endif 332 #endif /* FLT_EVAL_METHOD */ 333 334 /* Support switching the mode to FP_PE if necessary. */ 335 #if defined(__i386__) && !defined(NO_FPSETPREC) 336 #define ENTERI() ENTERIT(long double) 337 #define ENTERIT(returntype) \ 338 returntype __retval; \ 339 fp_prec_t __oprec; \ 340 \ 341 if ((__oprec = fpgetprec()) != FP_PE) \ 342 fpsetprec(FP_PE) 343 #define RETURNI(x) do { \ 344 __retval = (x); \ 345 if (__oprec != FP_PE) \ 346 fpsetprec(__oprec); \ 347 RETURNF(__retval); \ 348 } while (0) 349 #define ENTERV() \ 350 fp_prec_t __oprec; \ 351 \ 352 if ((__oprec = fpgetprec()) != FP_PE) \ 353 fpsetprec(FP_PE) 354 #define RETURNV() do { \ 355 if (__oprec != FP_PE) \ 356 fpsetprec(__oprec); \ 357 return; \ 358 } while (0) 359 #else 360 #define ENTERI() 361 #define ENTERIT(x) 362 #define RETURNI(x) RETURNF(x) 363 #define ENTERV() 364 #define RETURNV() return 365 #endif 366 367 /* Default return statement if hack*_t() is not used. */ 368 #define RETURNF(v) return (v) 369 370 /* 371 * 2sum gives the same result as 2sumF without requiring |a| >= |b| or 372 * a == 0, but is slower. 373 */ 374 #define _2sum(a, b) do { \ 375 __typeof(a) __s, __w; \ 376 \ 377 __w = (a) + (b); \ 378 __s = __w - (a); \ 379 (b) = ((a) - (__w - __s)) + ((b) - __s); \ 380 (a) = __w; \ 381 } while (0) 382 383 /* 384 * 2sumF algorithm. 385 * 386 * "Normalize" the terms in the infinite-precision expression a + b for 387 * the sum of 2 floating point values so that b is as small as possible 388 * relative to 'a'. (The resulting 'a' is the value of the expression in 389 * the same precision as 'a' and the resulting b is the rounding error.) 390 * |a| must be >= |b| or 0, b's type must be no larger than 'a's type, and 391 * exponent overflow or underflow must not occur. This uses a Theorem of 392 * Dekker (1971). See Knuth (1981) 4.2.2 Theorem C. The name "TwoSum" 393 * is apparently due to Skewchuk (1997). 394 * 395 * For this to always work, assignment of a + b to 'a' must not retain any 396 * extra precision in a + b. This is required by C standards but broken 397 * in many compilers. The brokenness cannot be worked around using 398 * STRICT_ASSIGN() like we do elsewhere, since the efficiency of this 399 * algorithm would be destroyed by non-null strict assignments. (The 400 * compilers are correct to be broken -- the efficiency of all floating 401 * point code calculations would be destroyed similarly if they forced the 402 * conversions.) 403 * 404 * Fortunately, a case that works well can usually be arranged by building 405 * any extra precision into the type of 'a' -- 'a' should have type float_t, 406 * double_t or long double. b's type should be no larger than 'a's type. 407 * Callers should use these types with scopes as large as possible, to 408 * reduce their own extra-precision and efficiciency problems. In 409 * particular, they shouldn't convert back and forth just to call here. 410 */ 411 #ifdef DEBUG 412 #define _2sumF(a, b) do { \ 413 __typeof(a) __w; \ 414 volatile __typeof(a) __ia, __ib, __r, __vw; \ 415 \ 416 __ia = (a); \ 417 __ib = (b); \ 418 assert(__ia == 0 || fabsl(__ia) >= fabsl(__ib)); \ 419 \ 420 __w = (a) + (b); \ 421 (b) = ((a) - __w) + (b); \ 422 (a) = __w; \ 423 \ 424 /* The next 2 assertions are weak if (a) is already long double. */ \ 425 assert((long double)__ia + __ib == (long double)(a) + (b)); \ 426 __vw = __ia + __ib; \ 427 __r = __ia - __vw; \ 428 __r += __ib; \ 429 assert(__vw == (a) && __r == (b)); \ 430 } while (0) 431 #else /* !DEBUG */ 432 #define _2sumF(a, b) do { \ 433 __typeof(a) __w; \ 434 \ 435 __w = (a) + (b); \ 436 (b) = ((a) - __w) + (b); \ 437 (a) = __w; \ 438 } while (0) 439 #endif /* DEBUG */ 440 441 /* 442 * Set x += c, where x is represented in extra precision as a + b. 443 * x must be sufficiently normalized and sufficiently larger than c, 444 * and the result is then sufficiently normalized. 445 * 446 * The details of ordering are that |a| must be >= |c| (so that (a, c) 447 * can be normalized without extra work to swap 'a' with c). The details of 448 * the normalization are that b must be small relative to the normalized 'a'. 449 * Normalization of (a, c) makes the normalized c tiny relative to the 450 * normalized a, so b remains small relative to 'a' in the result. However, 451 * b need not ever be tiny relative to 'a'. For example, b might be about 452 * 2**20 times smaller than 'a' to give about 20 extra bits of precision. 453 * That is usually enough, and adding c (which by normalization is about 454 * 2**53 times smaller than a) cannot change b significantly. However, 455 * cancellation of 'a' with c in normalization of (a, c) may reduce 'a' 456 * significantly relative to b. The caller must ensure that significant 457 * cancellation doesn't occur, either by having c of the same sign as 'a', 458 * or by having |c| a few percent smaller than |a|. Pre-normalization of 459 * (a, b) may help. 460 * 461 * This is a variant of an algorithm of Kahan (see Knuth (1981) 4.2.2 462 * exercise 19). We gain considerable efficiency by requiring the terms to 463 * be sufficiently normalized and sufficiently increasing. 464 */ 465 #define _3sumF(a, b, c) do { \ 466 __typeof(a) __tmp; \ 467 \ 468 __tmp = (c); \ 469 _2sumF(__tmp, (a)); \ 470 (b) += (a); \ 471 (a) = __tmp; \ 472 } while (0) 473 474 /* 475 * Common routine to process the arguments to nan(), nanf(), and nanl(). 476 */ 477 void _scan_nan(uint32_t *__words, int __num_words, const char *__s); 478 479 /* 480 * Mix 0, 1 or 2 NaNs. First add 0 to each arg. This normally just turns 481 * signaling NaNs into quiet NaNs by setting a quiet bit. We do this 482 * because we want to never return a signaling NaN, and also because we 483 * don't want the quiet bit to affect the result. Then mix the converted 484 * args using the specified operation. 485 * 486 * When one arg is NaN, the result is typically that arg quieted. When both 487 * args are NaNs, the result is typically the quietening of the arg whose 488 * mantissa is largest after quietening. When neither arg is NaN, the 489 * result may be NaN because it is indeterminate, or finite for subsequent 490 * construction of a NaN as the indeterminate 0.0L/0.0L. 491 * 492 * Technical complications: the result in bits after rounding to the final 493 * precision might depend on the runtime precision and/or on compiler 494 * optimizations, especially when different register sets are used for 495 * different precisions. Try to make the result not depend on at least the 496 * runtime precision by always doing the main mixing step in long double 497 * precision. Try to reduce dependencies on optimizations by adding the 498 * the 0's in different precisions (unless everything is in long double 499 * precision). 500 */ 501 #define nan_mix(x, y) (nan_mix_op((x), (y), +)) 502 #define nan_mix_op(x, y, op) (((x) + 0.0L) op ((y) + 0)) 503 504 #ifdef _COMPLEX_H 505 506 /* 507 * C99 specifies that complex numbers have the same representation as 508 * an array of two elements, where the first element is the real part 509 * and the second element is the imaginary part. 510 */ 511 typedef union { 512 float complex f; 513 float a[2]; 514 } float_complex; 515 typedef union { 516 double complex f; 517 double a[2]; 518 } double_complex; 519 typedef union { 520 long double complex f; 521 long double a[2]; 522 } long_double_complex; 523 #define REALPART(z) ((z).a[0]) 524 #define IMAGPART(z) ((z).a[1]) 525 526 /* 527 * Inline functions that can be used to construct complex values. 528 * 529 * The C99 standard intends x+I*y to be used for this, but x+I*y is 530 * currently unusable in general since gcc introduces many overflow, 531 * underflow, sign and efficiency bugs by rewriting I*y as 532 * (0.0+I)*(y+0.0*I) and laboriously computing the full complex product. 533 * In particular, I*Inf is corrupted to NaN+I*Inf, and I*-0 is corrupted 534 * to -0.0+I*0.0. 535 * 536 * The C11 standard introduced the macros CMPLX(), CMPLXF() and CMPLXL() 537 * to construct complex values. Compilers that conform to the C99 538 * standard require the following functions to avoid the above issues. 539 */ 540 541 #ifndef CMPLXF 542 static __inline float complex 543 CMPLXF(float x, float y) 544 { 545 float_complex z; 546 547 REALPART(z) = x; 548 IMAGPART(z) = y; 549 return (z.f); 550 } 551 #endif 552 553 #ifndef CMPLX 554 static __inline double complex 555 CMPLX(double x, double y) 556 { 557 double_complex z; 558 559 REALPART(z) = x; 560 IMAGPART(z) = y; 561 return (z.f); 562 } 563 #endif 564 565 #ifndef CMPLXL 566 static __inline long double complex 567 CMPLXL(long double x, long double y) 568 { 569 long_double_complex z; 570 571 REALPART(z) = x; 572 IMAGPART(z) = y; 573 return (z.f); 574 } 575 #endif 576 577 #endif /* _COMPLEX_H */ 578 579 /* 580 * The rnint() family rounds to the nearest integer for a restricted range 581 * range of args (up to about 2**MANT_DIG). We assume that the current 582 * rounding mode is FE_TONEAREST so that this can be done efficiently. 583 * Extra precision causes more problems in practice, and we only centralize 584 * this here to reduce those problems, and have not solved the efficiency 585 * problems. The exp2() family uses a more delicate version of this that 586 * requires extracting bits from the intermediate value, so it is not 587 * centralized here and should copy any solution of the efficiency problems. 588 */ 589 590 static inline double 591 rnint(__double_t x) 592 { 593 /* 594 * This casts to double to kill any extra precision. This depends 595 * on the cast being applied to a double_t to avoid compiler bugs 596 * (this is a cleaner version of STRICT_ASSIGN()). This is 597 * inefficient if there actually is extra precision, but is hard 598 * to improve on. We use double_t in the API to minimise conversions 599 * for just calling here. Note that we cannot easily change the 600 * magic number to the one that works directly with double_t, since 601 * the rounding precision is variable at runtime on x86 so the 602 * magic number would need to be variable. Assuming that the 603 * rounding precision is always the default is too fragile. This 604 * and many other complications will move when the default is 605 * changed to FP_PE. 606 */ 607 return ((double)(x + 0x1.8p52) - 0x1.8p52); 608 } 609 610 static inline float 611 rnintf(__float_t x) 612 { 613 /* 614 * As for rnint(), except we could just call that to handle the 615 * extra precision case, usually without losing efficiency. 616 */ 617 return ((float)(x + 0x1.8p23F) - 0x1.8p23F); 618 } 619 620 #ifdef LDBL_MANT_DIG 621 /* 622 * The complications for extra precision are smaller for rnintl() since it 623 * can safely assume that the rounding precision has been increased from 624 * its default to FP_PE on x86. We don't exploit that here to get small 625 * optimizations from limiting the range to double. We just need it for 626 * the magic number to work with long doubles. ld128 callers should use 627 * rnint() instead of this if possible. ld80 callers should prefer 628 * rnintl() since for amd64 this avoids swapping the register set, while 629 * for i386 it makes no difference (assuming FP_PE), and for other arches 630 * it makes little difference. 631 */ 632 static inline long double 633 rnintl(long double x) 634 { 635 return (x + __CONCAT(0x1.8p, LDBL_MANT_DIG) / 2 - 636 __CONCAT(0x1.8p, LDBL_MANT_DIG) / 2); 637 } 638 #endif /* LDBL_MANT_DIG */ 639 640 /* 641 * irint() and i64rint() give the same result as casting to their integer 642 * return type provided their arg is a floating point integer. They can 643 * sometimes be more efficient because no rounding is required. 644 */ 645 #if defined(amd64) || defined(__i386__) 646 #define irint(x) \ 647 (sizeof(x) == sizeof(float) && \ 648 sizeof(__float_t) == sizeof(long double) ? irintf(x) : \ 649 sizeof(x) == sizeof(double) && \ 650 sizeof(__double_t) == sizeof(long double) ? irintd(x) : \ 651 sizeof(x) == sizeof(long double) ? irintl(x) : (int)(x)) 652 #else 653 #define irint(x) ((int)(x)) 654 #endif 655 656 #define i64rint(x) ((int64_t)(x)) /* only needed for ld128 so not opt. */ 657 658 #if defined(__i386__) 659 static __inline int 660 irintf(float x) 661 { 662 int n; 663 664 __asm("fistl %0" : "=m" (n) : "t" (x)); 665 return (n); 666 } 667 668 static __inline int 669 irintd(double x) 670 { 671 int n; 672 673 __asm("fistl %0" : "=m" (n) : "t" (x)); 674 return (n); 675 } 676 #endif 677 678 #if defined(__amd64__) || defined(__i386__) 679 static __inline int 680 irintl(long double x) 681 { 682 int n; 683 684 __asm("fistl %0" : "=m" (n) : "t" (x)); 685 return (n); 686 } 687 #endif 688 689 /* 690 * The following are fast floor macros for 0 <= |x| < 0x1p(N-1), where 691 * N is the precision of the type of x. These macros are used in the 692 * half-cycle trignometric functions (e.g., sinpi(x)). 693 */ 694 #define FFLOORF(x, j0, ix) do { \ 695 (j0) = (((ix) >> 23) & 0xff) - 0x7f; \ 696 (ix) &= ~(0x007fffff >> (j0)); \ 697 SET_FLOAT_WORD((x), (ix)); \ 698 } while (0) 699 700 #define FFLOOR(x, j0, ix, lx) do { \ 701 (j0) = (((ix) >> 20) & 0x7ff) - 0x3ff; \ 702 if ((j0) < 20) { \ 703 (ix) &= ~(0x000fffff >> (j0)); \ 704 (lx) = 0; \ 705 } else { \ 706 (lx) &= ~((uint32_t)0xffffffff >> ((j0) - 20)); \ 707 } \ 708 INSERT_WORDS((x), (ix), (lx)); \ 709 } while (0) 710 711 #define FFLOORL80(x, j0, ix, lx) do { \ 712 j0 = ix - 0x3fff + 1; \ 713 if ((j0) < 32) { \ 714 (lx) = ((lx) >> 32) << 32; \ 715 (lx) &= ~((((lx) << 32)-1) >> (j0)); \ 716 } else { \ 717 uint64_t _m; \ 718 _m = (uint64_t)-1 >> (j0); \ 719 if ((lx) & _m) (lx) &= ~_m; \ 720 } \ 721 INSERT_LDBL80_WORDS((x), (ix), (lx)); \ 722 } while (0) 723 724 #define FFLOORL128(x, ai, ar) do { \ 725 union IEEEl2bits u; \ 726 uint64_t m; \ 727 int e; \ 728 u.e = (x); \ 729 e = u.bits.exp - 16383; \ 730 if (e < 48) { \ 731 m = ((1llu << 49) - 1) >> (e + 1); \ 732 u.bits.manh &= ~m; \ 733 u.bits.manl = 0; \ 734 } else { \ 735 m = (uint64_t)-1 >> (e - 48); \ 736 u.bits.manl &= ~m; \ 737 } \ 738 (ai) = u.e; \ 739 (ar) = (x) - (ai); \ 740 } while (0) 741 742 #ifdef DEBUG 743 #if defined(__amd64__) || defined(__i386__) 744 #define breakpoint() asm("int $3") 745 #else 746 #include <signal.h> 747 748 #define breakpoint() raise(SIGTRAP) 749 #endif 750 #endif 751 752 #ifdef STRUCT_RETURN 753 #define RETURNSP(rp) do { \ 754 if (!(rp)->lo_set) \ 755 RETURNF((rp)->hi); \ 756 RETURNF((rp)->hi + (rp)->lo); \ 757 } while (0) 758 #define RETURNSPI(rp) do { \ 759 if (!(rp)->lo_set) \ 760 RETURNI((rp)->hi); \ 761 RETURNI((rp)->hi + (rp)->lo); \ 762 } while (0) 763 #endif 764 765 #define SUM2P(x, y) ({ \ 766 const __typeof (x) __x = (x); \ 767 const __typeof (y) __y = (y); \ 768 __x + __y; \ 769 }) 770 771 /* fdlibm kernel function */ 772 int __kernel_rem_pio2(double*,double*,int,int,int); 773 774 /* double precision kernel functions */ 775 #ifndef INLINE_REM_PIO2 776 int __ieee754_rem_pio2(double,double*); 777 #endif 778 double __kernel_sin(double,double,int); 779 double __kernel_cos(double,double); 780 double __kernel_tan(double,double,int); 781 double __ldexp_exp(double,int); 782 #ifdef _COMPLEX_H 783 double complex __ldexp_cexp(double complex,int); 784 #endif 785 786 /* float precision kernel functions */ 787 #ifndef INLINE_REM_PIO2F 788 int __ieee754_rem_pio2f(float,double*); 789 #endif 790 #ifndef INLINE_KERNEL_SINDF 791 float __kernel_sindf(double); 792 #endif 793 #ifndef INLINE_KERNEL_COSDF 794 float __kernel_cosdf(double); 795 #endif 796 #ifndef INLINE_KERNEL_TANDF 797 float __kernel_tandf(double,int); 798 #endif 799 float __ldexp_expf(float,int); 800 #ifdef _COMPLEX_H 801 float complex __ldexp_cexpf(float complex,int); 802 #endif 803 804 /* long double precision kernel functions */ 805 long double __kernel_sinl(long double, long double, int); 806 long double __kernel_cosl(long double, long double); 807 long double __kernel_tanl(long double, long double, int); 808 809 #endif /* !_MATH_PRIVATE_H_ */ 810