1 /* 2 * This file and its contents are supplied under the terms of the 3 * Common Development and Distribution License ("CDDL"), version 1.0. 4 * You may only use this file in accordance with the terms of version 5 * 1.0 of the CDDL. 6 * 7 * A full copy of the text of the CDDL should have accompanied this 8 * source. A copy of the CDDL is also available via the Internet at 9 * http://www.illumos.org/license/CDDL. 10 */ 11 12 /* 13 * Copyright 2011, Richard Lowe 14 */ 15 16 #ifndef _FENV_INLINES_H 17 #define _FENV_INLINES_H 18 19 #ifdef __GNUC__ 20 21 #ifdef __cplusplus 22 extern "C" { 23 #endif 24 25 #include <sys/types.h> 26 27 #if defined(__x86) 28 29 /* 30 * Floating point Control Word and Status Word 31 * Definition should actually be shared with x86 32 * (much of this 'amd64' code can be, in fact.) 33 */ 34 union fp_cwsw { 35 uint32_t cwsw; 36 struct { 37 uint16_t cw; 38 uint16_t sw; 39 } words; 40 }; 41 42 extern __inline__ void 43 __fenv_getcwsw(unsigned int *value) 44 { 45 union fp_cwsw *u = (union fp_cwsw *)value; 46 47 __asm__ __volatile__( 48 "fstsw %0\n\t" 49 "fstcw %1\n\t" 50 : "=m" (u->words.cw), "=m" (u->words.sw)); 51 } 52 53 extern __inline__ void 54 __fenv_setcwsw(const unsigned int *value) 55 { 56 union fp_cwsw cwsw; 57 short fenv[16]; 58 59 cwsw.cwsw = *value; 60 61 __asm__ __volatile__( 62 "fstenv %0\n\t" 63 "movw %4,%1\n\t" 64 "movw %3,%2\n\t" 65 "fldenv %0\n\t" 66 "fwait\n\t" 67 : "=m" (fenv), "=m" (fenv[0]), "=m" (fenv[2]) 68 : "r" (cwsw.words.cw), "r" (cwsw.words.sw) 69 /* For practical purposes, we clobber the whole FPU */ 70 : "cc", "st", "st(1)", "st(2)", "st(3)", "st(4)", "st(5)", 71 "st(6)", "st(7)"); 72 } 73 74 extern __inline__ void 75 __fenv_getmxcsr(unsigned int *value) 76 { 77 __asm__ __volatile__("stmxcsr %0" : "=m" (*value)); 78 } 79 80 extern __inline__ void 81 __fenv_setmxcsr(const unsigned int *value) 82 { 83 __asm__ __volatile__("ldmxcsr %0" : : "m" (*value)); 84 } 85 86 extern __inline__ long double 87 f2xm1(long double x) 88 { 89 long double ret; 90 91 __asm__ __volatile__("f2xm1" : "=t" (ret) : "0" (x) : "cc"); 92 return (ret); 93 } 94 95 extern __inline__ long double 96 fyl2x(long double y, long double x) 97 { 98 long double ret; 99 100 __asm__ __volatile__("fyl2x" 101 : "=t" (ret) 102 : "0" (x), "u" (y) 103 : "st(1)", "cc"); 104 return (ret); 105 } 106 107 extern __inline__ long double 108 fptan(long double x) 109 { 110 /* 111 * fptan pushes 1.0 then the result on completion, so we want to pop 112 * the FP stack twice, so we need a dummy value into which to pop it. 113 */ 114 long double ret; 115 long double dummy; 116 117 __asm__ __volatile__("fptan" 118 : "=t" (dummy), "=u" (ret) 119 : "0" (x) 120 : "cc"); 121 return (ret); 122 } 123 124 extern __inline__ long double 125 fpatan(long double x, long double y) 126 { 127 long double ret; 128 129 __asm__ __volatile__("fpatan" 130 : "=t" (ret) 131 : "0" (y), "u" (x) 132 : "st(1)", "cc"); 133 return (ret); 134 } 135 136 extern __inline__ long double 137 fxtract(long double x) 138 { 139 __asm__ __volatile__("fxtract" : "+t" (x) : : "cc"); 140 return (x); 141 } 142 143 extern __inline__ long double 144 fprem1(long double idend, long double div) 145 { 146 __asm__ __volatile__("fprem1" : "+t" (div) : "u" (idend) : "cc"); 147 return (div); 148 } 149 150 extern __inline__ long double 151 fprem(long double idend, long double div) 152 { 153 __asm__ __volatile__("fprem" : "+t" (div) : "u" (idend) : "cc"); 154 return (div); 155 } 156 157 extern __inline__ long double 158 fyl2xp1(long double y, long double x) 159 { 160 long double ret; 161 162 __asm__ __volatile__("fyl2xp1" 163 : "=t" (ret) 164 : "0" (x), "u" (y) 165 : "st(1)", "cc"); 166 return (ret); 167 } 168 169 extern __inline__ long double 170 fsqrt(long double x) 171 { 172 __asm__ __volatile__("fsqrt" : "+t" (x) : : "cc"); 173 return (x); 174 } 175 176 extern __inline__ long double 177 fsincos(long double x) 178 { 179 long double dummy; 180 181 __asm__ __volatile__("fsincos" : "+t" (x), "=u" (dummy) : : "cc"); 182 return (x); 183 } 184 185 extern __inline__ long double 186 frndint(long double x) 187 { 188 __asm__ __volatile__("frndint" : "+t" (x) : : "cc"); 189 return (x); 190 } 191 192 extern __inline__ long double 193 fscale(long double x, long double y) 194 { 195 long double ret; 196 197 __asm__ __volatile__("fscale" : "=t" (ret) : "0" (y), "u" (x) : "cc"); 198 return (ret); 199 } 200 201 extern __inline__ long double 202 fsin(long double x) 203 { 204 __asm__ __volatile__("fsin" : "+t" (x) : : "cc"); 205 return (x); 206 } 207 208 extern __inline__ long double 209 fcos(long double x) 210 { 211 __asm__ __volatile__("fcos" : "+t" (x) : : "cc"); 212 return (x); 213 } 214 215 extern __inline__ void 216 sse_cmpeqss(float *f1, float *f2, int *i1) 217 { 218 __asm__ __volatile__( 219 "cmpeqss %2, %1\n\t" 220 "movss %1, %0" 221 : "=m" (*i1), "+x" (*f1) 222 : "x" (*f2) 223 : "cc"); 224 } 225 226 extern __inline__ void 227 sse_cmpltss(float *f1, float *f2, int *i1) 228 { 229 __asm__ __volatile__( 230 "cmpltss %2, %1\n\t" 231 "movss %1, %0" 232 : "=m" (*i1), "+x" (*f1) 233 : "x" (*f2) 234 : "cc"); 235 } 236 237 extern __inline__ void 238 sse_cmpless(float *f1, float *f2, int *i1) 239 { 240 __asm__ __volatile__( 241 "cmpless %2, %1\n\t" 242 "movss %1, %0" 243 : "=m" (*i1), "+x" (*f1) 244 : "x" (*f2) 245 : "cc"); 246 } 247 248 extern __inline__ void 249 sse_cmpunordss(float *f1, float *f2, int *i1) 250 { 251 __asm__ __volatile__( 252 "cmpunordss %2, %1\n\t" 253 "movss %1, %0" 254 : "=m" (*i1), "+x" (*f1) 255 : "x" (*f2) 256 : "cc"); 257 } 258 259 extern __inline__ void 260 sse_minss(float *f1, float *f2, float *f3) 261 { 262 __asm__ __volatile__( 263 "minss %2, %1\n\t" 264 "movss %1, %0" 265 : "=m" (*f3), "+x" (*f1) 266 : "x" (*f2)); 267 } 268 269 extern __inline__ void 270 sse_maxss(float *f1, float *f2, float *f3) 271 { 272 __asm__ __volatile__( 273 "maxss %2, %1\n\t" 274 "movss %1, %0" 275 : "=m" (*f3), "+x" (*f1) 276 : "x" (*f2)); 277 } 278 279 extern __inline__ void 280 sse_addss(float *f1, float *f2, float *f3) 281 { 282 __asm__ __volatile__( 283 "addss %2, %1\n\t" 284 "movss %1, %0" 285 : "=m" (*f3), "+x" (*f1) 286 : "x" (*f2)); 287 } 288 289 extern __inline__ void 290 sse_subss(float *f1, float *f2, float *f3) 291 { 292 __asm__ __volatile__( 293 "subss %2, %1\n\t" 294 "movss %1, %0" 295 : "=m" (*f3), "+x" (*f1) 296 : "x" (*f2)); 297 } 298 299 extern __inline__ void 300 sse_mulss(float *f1, float *f2, float *f3) 301 { 302 __asm__ __volatile__( 303 "mulss %2, %1\n\t" 304 "movss %1, %0" 305 : "=m" (*f3), "+x" (*f1) 306 : "x" (*f2)); 307 } 308 309 extern __inline__ void 310 sse_divss(float *f1, float *f2, float *f3) 311 { 312 __asm__ __volatile__( 313 "divss %2, %1\n\t" 314 "movss %1, %0" 315 : "=m" (*f3), "+x" (*f1) 316 : "x" (*f2)); 317 } 318 319 extern __inline__ void 320 sse_sqrtss(float *f1, float *f2) 321 { 322 double tmp; 323 324 __asm__ __volatile__( 325 "sqrtss %2, %1\n\t" 326 "movss %1, %0" 327 : "=m" (*f2), "=x" (tmp) 328 : "m" (*f1)); 329 } 330 331 extern __inline__ void 332 sse_ucomiss(float *f1, float *f2) 333 { 334 __asm__ __volatile__("ucomiss %1, %0" : : "x" (*f1), "x" (*f2)); 335 336 } 337 338 extern __inline__ void 339 sse_comiss(float *f1, float *f2) 340 { 341 __asm__ __volatile__("comiss %1, %0" : : "x" (*f1), "x" (*f2)); 342 } 343 344 extern __inline__ void 345 sse_cvtss2sd(float *f1, double *d1) 346 { 347 double tmp; 348 349 __asm__ __volatile__( 350 "cvtss2sd %2, %1\n\t" 351 "movsd %1, %0" 352 : "=m" (*d1), "=x" (tmp) 353 : "m" (*f1)); 354 } 355 356 extern __inline__ void 357 sse_cvtsi2ss(int *i1, float *f1) 358 { 359 double tmp; 360 361 __asm__ __volatile__( 362 "cvtsi2ss %2, %1\n\t" 363 "movss %1, %0" 364 : "=m" (*f1), "=x" (tmp) 365 : "m" (*i1)); 366 } 367 368 extern __inline__ void 369 sse_cvttss2si(float *f1, int *i1) 370 { 371 int tmp; 372 373 __asm__ __volatile__( 374 "cvttss2si %2, %1\n\t" 375 "movl %1, %0" 376 : "=m" (*i1), "=r" (tmp) 377 : "m" (*f1)); 378 } 379 380 extern __inline__ void 381 sse_cvtss2si(float *f1, int *i1) 382 { 383 int tmp; 384 385 __asm__ __volatile__( 386 "cvtss2si %2, %1\n\t" 387 "movl %1, %0" 388 : "=m" (*i1), "=r" (tmp) 389 : "m" (*f1)); 390 } 391 392 #if defined(__amd64) 393 extern __inline__ void 394 sse_cvtsi2ssq(long long *ll1, float *f1) 395 { 396 double tmp; 397 398 __asm__ __volatile__( 399 "cvtsi2ssq %2, %1\n\t" 400 "movss %1, %0" 401 : "=m" (*f1), "=x" (tmp) 402 : "m" (*ll1)); 403 } 404 405 extern __inline__ void 406 sse_cvttss2siq(float *f1, long long *ll1) 407 { 408 uint64_t tmp; 409 410 __asm__ __volatile__( 411 "cvttss2siq %2, %1\n\t" 412 "movq %1, %0" 413 : "=m" (*ll1), "=r" (tmp) 414 : "m" (*f1)); 415 } 416 417 extern __inline__ void 418 sse_cvtss2siq(float *f1, long long *ll1) 419 { 420 uint64_t tmp; 421 422 __asm__ __volatile__( 423 "cvtss2siq %2, %1\n\t" 424 "movq %1, %0" 425 : "=m" (*ll1), "=r" (tmp) 426 : "m" (*f1)); 427 } 428 429 #endif 430 431 extern __inline__ void 432 sse_cmpeqsd(double *d1, double *d2, long long *ll1) 433 { 434 __asm__ __volatile__( 435 "cmpeqsd %2,%1\n\t" 436 "movsd %1,%0" 437 : "=m" (*ll1), "+x" (*d1) 438 : "x" (*d2)); 439 } 440 441 extern __inline__ void 442 sse_cmpltsd(double *d1, double *d2, long long *ll1) 443 { 444 __asm__ __volatile__( 445 "cmpltsd %2,%1\n\t" 446 "movsd %1,%0" 447 : "=m" (*ll1), "+x" (*d1) 448 : "x" (*d2)); 449 } 450 451 extern __inline__ void 452 sse_cmplesd(double *d1, double *d2, long long *ll1) 453 { 454 __asm__ __volatile__( 455 "cmplesd %2,%1\n\t" 456 "movsd %1,%0" 457 : "=m" (*ll1), "+x" (*d1) 458 : "x" (*d2)); 459 } 460 461 extern __inline__ void 462 sse_cmpunordsd(double *d1, double *d2, long long *ll1) 463 { 464 __asm__ __volatile__( 465 "cmpunordsd %2,%1\n\t" 466 "movsd %1,%0" 467 : "=m" (*ll1), "+x" (*d1) 468 : "x" (*d2)); 469 } 470 471 472 extern __inline__ void 473 sse_minsd(double *d1, double *d2, double *d3) 474 { 475 __asm__ __volatile__( 476 "minsd %2,%1\n\t" 477 "movsd %1,%0" 478 : "=m" (*d3), "+x" (*d1) 479 : "x" (*d2)); 480 } 481 482 extern __inline__ void 483 sse_maxsd(double *d1, double *d2, double *d3) 484 { 485 __asm__ __volatile__( 486 "maxsd %2,%1\n\t" 487 "movsd %1,%0" 488 : "=m" (*d3), "+x" (*d1) 489 : "x" (*d2)); 490 } 491 492 extern __inline__ void 493 sse_addsd(double *d1, double *d2, double *d3) 494 { 495 __asm__ __volatile__( 496 "addsd %2,%1\n\t" 497 "movsd %1,%0" 498 : "=m" (*d3), "+x" (*d1) 499 : "x" (*d2)); 500 } 501 502 extern __inline__ void 503 sse_subsd(double *d1, double *d2, double *d3) 504 { 505 __asm__ __volatile__( 506 "subsd %2,%1\n\t" 507 "movsd %1,%0" 508 : "=m" (*d3), "+x" (*d1) 509 : "x" (*d2)); 510 } 511 512 extern __inline__ void 513 sse_mulsd(double *d1, double *d2, double *d3) 514 { 515 __asm__ __volatile__( 516 "mulsd %2,%1\n\t" 517 "movsd %1,%0" 518 : "=m" (*d3), "+x" (*d1) 519 : "x" (*d2)); 520 } 521 522 extern __inline__ void 523 sse_divsd(double *d1, double *d2, double *d3) 524 { 525 __asm__ __volatile__( 526 "divsd %2,%1\n\t" 527 "movsd %1,%0" 528 : "=m" (*d3), "+x" (*d1) 529 : "x" (*d2)); 530 } 531 532 extern __inline__ void 533 sse_sqrtsd(double *d1, double *d2) 534 { 535 double tmp; 536 537 __asm__ __volatile__( 538 "sqrtsd %2, %1\n\t" 539 "movsd %1, %0" 540 : "=m" (*d2), "=x" (tmp) 541 : "m" (*d1)); 542 } 543 544 extern __inline__ void 545 sse_ucomisd(double *d1, double *d2) 546 { 547 __asm__ __volatile__("ucomisd %1, %0" : : "x" (*d1), "x" (*d2)); 548 } 549 550 extern __inline__ void 551 sse_comisd(double *d1, double *d2) 552 { 553 __asm__ __volatile__("comisd %1, %0" : : "x" (*d1), "x" (*d2)); 554 } 555 556 extern __inline__ void 557 sse_cvtsd2ss(double *d1, float *f1) 558 { 559 double tmp; 560 561 __asm__ __volatile__( 562 "cvtsd2ss %2,%1\n\t" 563 "movss %1,%0" 564 : "=m" (*f1), "=x" (tmp) 565 : "m" (*d1)); 566 } 567 568 extern __inline__ void 569 sse_cvtsi2sd(int *i1, double *d1) 570 { 571 double tmp; 572 __asm__ __volatile__( 573 "cvtsi2sd %2,%1\n\t" 574 "movsd %1,%0" 575 : "=m" (*d1), "=x" (tmp) 576 : "m" (*i1)); 577 } 578 579 extern __inline__ void 580 sse_cvttsd2si(double *d1, int *i1) 581 { 582 int tmp; 583 584 __asm__ __volatile__( 585 "cvttsd2si %2,%1\n\t" 586 "movl %1,%0" 587 : "=m" (*i1), "=r" (tmp) 588 : "m" (*d1)); 589 } 590 591 extern __inline__ void 592 sse_cvtsd2si(double *d1, int *i1) 593 { 594 int tmp; 595 596 __asm__ __volatile__( 597 "cvtsd2si %2,%1\n\t" 598 "movl %1,%0" 599 : "=m" (*i1), "=r" (tmp) 600 : "m" (*d1)); 601 } 602 603 #if defined(__amd64) 604 extern __inline__ void 605 sse_cvtsi2sdq(long long *ll1, double *d1) 606 { 607 double tmp; 608 609 __asm__ __volatile__( 610 "cvtsi2sdq %2,%1\n\t" 611 "movsd %1,%0" 612 : "=m" (*d1), "=x" (tmp) 613 : "m" (*ll1)); 614 } 615 616 extern __inline__ void 617 sse_cvttsd2siq(double *d1, long long *ll1) 618 { 619 uint64_t tmp; 620 621 __asm__ __volatile__( 622 "cvttsd2siq %2,%1\n\t" 623 "movq %1,%0" 624 : "=m" (*ll1), "=r" (tmp) 625 : "m" (*d1)); 626 } 627 628 extern __inline__ void 629 sse_cvtsd2siq(double *d1, long long *ll1) 630 { 631 uint64_t tmp; 632 633 __asm__ __volatile__( 634 "cvtsd2siq %2,%1\n\t" 635 "movq %1,%0" 636 : "=m" (*ll1), "=r" (tmp) 637 : "m" (*d1)); 638 } 639 #endif 640 641 #elif defined(__sparc) 642 extern __inline__ void 643 __fenv_getfsr(unsigned long *l) 644 { 645 __asm__ __volatile__( 646 #if defined(__sparcv9) 647 "stx %%fsr,%0\n\t" 648 #else 649 "st %%fsr,%0\n\t" 650 #endif 651 : "=m" (*l)); 652 } 653 654 extern __inline__ void 655 __fenv_setfsr(const unsigned long *l) 656 { 657 __asm__ __volatile__( 658 #if defined(__sparcv9) 659 "ldx %0,%%fsr\n\t" 660 #else 661 "ld %0,%%fsr\n\t" 662 #endif 663 : : "m" (*l) : "cc"); 664 } 665 666 extern __inline__ void 667 __fenv_getfsr32(unsigned int *l) 668 { 669 __asm__ __volatile__("st %%fsr,%0\n\t" : "=m" (*l)); 670 } 671 672 extern __inline__ void 673 __fenv_setfsr32(const unsigned int *l) 674 { 675 __asm__ __volatile__("ld %0,%%fsr\n\t" : : "m" (*l)); 676 } 677 #else 678 #error "GCC FENV inlines not implemented for this platform" 679 #endif 680 681 #ifdef __cplusplus 682 } 683 #endif 684 685 #endif /* __GNUC__ */ 686 687 #endif /* _FENV_INLINES_H */ 688