1/* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21/* 22 * Copyright 2011 Nexenta Systems, Inc. All rights reserved. 23 */ 24/* 25 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 26 * Use is subject to license terms. 27 */ 28 29 .file "__vatan2.S" 30 31#include "libm.h" 32 33 RO_DATA 34 .align 64 35constants: 36 .word 0x3ff921fb,0x54442d18 ! pio2 37 .word 0x3c91a626,0x33145c07 ! pio2_lo 38 .word 0xbfd55555,0x555554ee ! p1 39 .word 0x3fc99999,0x997a1559 ! p2 40 .word 0xbfc24923,0x158dfe02 ! p3 41 .word 0x3fbc639d,0x0ed1347b ! p4 42 .word 0xffffffff,0x00000000 ! mask 43 .word 0x3fc00000,0x00000000 ! twom3 44 .word 0x46d00000,0x00000000 ! two110 45 .word 0x3fe921fb,0x54442d18 ! pio4 46 47! local storage indices 48 49#define xscl STACK_BIAS-0x8 50#define yscl STACK_BIAS-0x10 51#define twom3 STACK_BIAS-0x18 52#define two110 STACK_BIAS-0x20 53#define pio4 STACK_BIAS-0x28 54#define junk STACK_BIAS-0x30 55! sizeof temp storage - must be a multiple of 16 for V9 56#define tmps 0x30 57 58! register use 59 60! i0 n 61! i1 y 62! i2 stridey 63! i3 x 64! i4 stridex 65! i5 z 66 67! l0 k0 68! l1 k1 69! l2 k2 70! l3 hx 71! l4 pz0 72! l5 pz1 73! l6 pz2 74! l7 stridez 75 76! the following are 64-bit registers in both V8+ and V9 77 78! g1 __vlibm_TBL_atan2 79! g5 80 81! o0 hy 82! o1 0x00004000 83! o2 0x1420 84! o3 0x7fe00000 85! o4 0x03600000 86! o5 0x00100000 87! o7 88 89! f0 y0 90! f2 x0 91! f4 t0 92! f6 ah0 93! f8 al0 94! f10 y1 95! f12 x1 96! f14 t1 97! f16 ah1 98! f18 al1 99! f20 y2 100! f22 x2 101! f24 t2 102! f26 ah2 103! f28 al2 104! f30 105! f32 106! f34 107! f36 sx0 108! f38 sx1 109! f40 sx2 110! f42 sy0 111! f44 sy1 112! f46 sy2 113 114#define mask %f48 115#define signbit %f50 116#define pio2 %f52 117#define pio2_lo %f54 118#define p1 %f56 119#define p2 %f58 120#define p3 %f60 121#define p4 %f62 122 123 ENTRY(__vatan2) 124 save %sp,-SA(MINFRAME)-tmps,%sp 125 PIC_SETUP(l7) 126 PIC_SET(l7,constants,o0) 127 PIC_SET(l7,__vlibm_TBL_atan2,o1) 128 wr %g0,0x82,%asi ! set %asi for non-faulting loads 129 mov %o1, %g1 130#ifdef __sparcv9 131 ldx [%fp+STACK_BIAS+0xb0],%l7 132#else 133 ld [%fp+0x5c],%l7 134#endif 135 ldd [%o0+0x00],pio2 ! load/set up constants 136 ldd [%o0+0x08],pio2_lo 137 ldd [%o0+0x10],p1 138 ldd [%o0+0x18],p2 139 ldd [%o0+0x20],p3 140 ldd [%o0+0x28],p4 141 ldd [%o0+0x30],mask 142 fzero signbit 143 fnegd signbit,signbit 144 sethi %hi(0x00004000),%o1 145 sethi %hi(0x1420),%o2 146 or %o2,%lo(0x1420),%o2 147 sethi %hi(0x7fe00000),%o3 148 sethi %hi(0x03600000),%o4 149 sethi %hi(0x00100000),%o5 150 ldd [%o0+0x38],%f0 ! copy rarely used constants to stack 151 ldd [%o0+0x40],%f2 152 ldd [%o0+0x48],%f4 153 std %f0,[%fp+twom3] 154 std %f2,[%fp+two110] 155 std %f4,[%fp+pio4] 156 sll %i2,3,%i2 ! scale strides 157 sll %i4,3,%i4 158 sll %l7,3,%l7 159 fzero %f20 ! loop prologue 160 fzero %f22 161 fzero %f24 162 fzero %f26 163 fzero %f46 164 add %fp,junk,%l6 165 ld [%i1],%f0 ! *y 166 ld [%i1+4],%f1 167 ld [%i3],%f8 ! *x 168 ld [%i3+4],%f9 169 ld [%i1],%o0 ! hy 170 ba .loop 171 ld [%i3],%l3 ! hx 172 173! 16-byte aligned 174 .align 16 175.loop: 176 fabsd %f0,%f4 177 mov %i5,%l4 178 add %i1,%i2,%i1 ! y += stridey 179 180 fabsd %f8,%f2 181 add %i3,%i4,%i3 ! x += stridex 182 add %i5,%l7,%i5 ! z += stridez 183 184 fand %f0,signbit,%f42 185 sethi %hi(0x80000000),%g5 186 187 fand %f8,signbit,%f36 188 andn %o0,%g5,%o0 189 andn %l3,%g5,%l3 190 191 fcmpd %fcc0,%f4,%f2 192 193 fmovd %f4,%f0 194 195 fmovdg %fcc0,%f2,%f0 ! swap if |y| > |x| 196 197 fmovdg %fcc0,%f4,%f2 198 mov %o0,%o7 199 lda [%i1]%asi,%f10 ! preload next argument 200 201 faddd %f26,%f20,%f26 202 lda [%i1+4]%asi,%f11 203 204 faddd %f22,%f24,%f22 205 movg %fcc0,%l3,%o0 206 207 movg %fcc0,%o7,%l3 208 209 fbu,pn %fcc0,.nan0 ! if x or y is nan 210! delay slot 211 lda [%i3]%asi,%f18 212 213 sub %l3,%o0,%l0 ! hx - hy 214 sub %l3,%o3,%g5 215 fabsd %f10,%f14 216 lda [%i3+4]%asi,%f19 217 218 sub %l0,%o4,%o7 219 faddd %f22,%f26,%f26 220 221 andcc %g5,%o7,%g0 222 bge,pn %icc,.big0 ! if |x| or |x/y| is big 223! delay slot 224 nop 225 226 fabsd %f18,%f12 227 cmp %o0,%o5 228 bl,pn %icc,.small0 ! if |y| is small 229! delay slot 230 lda [%i1]%asi,%o0 231 232 add %l0,%o1,%l0 ! k 233 addcc %i0,-1,%i0 234 ble,pn %icc,.last1 235! delay slot 236 lda [%i3]%asi,%l3 237 238.cont1: 239 srl %l0,10,%l0 240 mov %i5,%l5 241 fxor %f26,%f46,%f26 242 st %f26,[%l6] 243 244 fand %f10,signbit,%f44 245 andn %l0,0x1f,%l0 246 add %i1,%i2,%i1 247 st %f27,[%l6+4] 248 249 fand %f18,signbit,%f38 250 cmp %l0,%o2 251 movg %icc,%o2,%l0 252 253 fcmpd %fcc1,%f14,%f12 254 add %i3,%i4,%i3 255 add %i5,%l7,%i5 256 257 fmovd %f14,%f10 258 add %l0,%g1,%l0 259 sethi %hi(0x80000000),%g5 260 261 ldd [%l0+0x10],%f4 262 fand %f2,mask,%f6 263 andn %o0,%g5,%o0 264 andn %l3,%g5,%l3 265 266 fmovdg %fcc1,%f12,%f10 267 268 fmovdg %fcc1,%f14,%f12 269 mov %o0,%o7 270 lda [%i1]%asi,%f20 271 272 fsubd %f2,%f6,%f30 273 fmuld %f6,%f4,%f6 274 movg %fcc1,%l3,%o0 275 276 fmuld %f0,%f4,%f8 277 movg %fcc1,%o7,%l3 278 279 lda [%i1+4]%asi,%f21 280 fbu,pn %fcc1,.nan1 281! delay slot 282 nop 283 284 lda [%i3]%asi,%f28 285 sub %l3,%o0,%l1 286 sub %l3,%o3,%g5 287 288 lda [%i3+4]%asi,%f29 289 fmuld %f30,%f4,%f30 290 fsubd %f0,%f6,%f4 291 sub %l1,%o4,%o7 292 293 fabsd %f20,%f24 294 andcc %g5,%o7,%g0 295 bge,pn %icc,.big1 296! delay slot 297 nop 298 299 faddd %f2,%f8,%f8 300 cmp %o0,%o5 301 bl,pn %icc,.small1 302! delay slot 303 lda [%i1]%asi,%o0 304 305 fabsd %f28,%f22 306 add %l1,%o1,%l1 307 addcc %i0,-1,%i0 308 lda [%i3]%asi,%l3 309 310 fsubd %f4,%f30,%f4 311 srl %l1,10,%l1 312 ble,pn %icc,.last2 313! delay slot 314 mov %i5,%l6 315 316.cont2: 317 fand %f20,signbit,%f46 318 andn %l1,0x1f,%l1 319 add %i1,%i2,%i1 320 321 fand %f28,signbit,%f40 322 cmp %l1,%o2 323 movg %icc,%o2,%l1 324 325 fcmpd %fcc2,%f24,%f22 326 add %i3,%i4,%i3 327 add %i5,%l7,%i5 328 329 fdivd %f4,%f8,%f4 330 fmovd %f24,%f20 331 add %l1,%g1,%l1 332 sethi %hi(0x80000000),%g5 333 334 ldd [%l1+0x10],%f14 335 fand %f12,mask,%f16 336 andn %o0,%g5,%o0 337 andn %l3,%g5,%l3 338 339 fmovdg %fcc2,%f22,%f20 340 341 fmovdg %fcc2,%f24,%f22 342 mov %o0,%o7 343 344 fsubd %f12,%f16,%f32 345 fmuld %f16,%f14,%f16 346 movg %fcc2,%l3,%o0 347 348 fnegd pio2_lo,%f8 ! al 349 fmuld %f10,%f14,%f18 350 movg %fcc2,%o7,%l3 351 352 fzero %f0 353 fbu,pn %fcc2,.nan2 354! delay slot 355 nop 356 357 fmovdg %fcc0,signbit,%f0 358 sub %l3,%o0,%l2 359 sub %l3,%o3,%g5 360 361 fmuld %f32,%f14,%f32 362 fsubd %f10,%f16,%f14 363 sub %l2,%o4,%o7 364 365 faddd %f12,%f18,%f18 366 andcc %g5,%o7,%g0 367 bge,pn %icc,.big2 368! delay slot 369 nop 370 371 fxor %f36,%f0,%f36 372 cmp %o0,%o5 373 bl,pn %icc,.small2 374! delay slot 375 nop 376 377.cont3: 378 fmovdg %fcc0,signbit,%f8 379 add %l2,%o1,%l2 380 381 fsubd %f14,%f32,%f14 382 srl %l2,10,%l2 383 384 fxor %f36,pio2_lo,%f30 ! al 385 andn %l2,0x1f,%l2 386 387 fxor %f36,pio2,%f0 ! ah 388 cmp %l2,%o2 389 movg %icc,%o2,%l2 390 391 fxor %f42,%f36,%f42 ! sy 392 393 faddd %f8,%f30,%f8 394 ldd [%l0+0x8],%f30 395 add %l2,%g1,%l2 396 397 fdivd %f14,%f18,%f14 398 fzero %f10 399 400 ldd [%l2+0x10],%f24 401 fand %f22,mask,%f26 402 403 fmovdg %fcc1,signbit,%f10 404 405 fmuld %f4,%f4,%f36 406 faddd %f8,%f30,%f8 407 408 fsubd %f22,%f26,%f34 409 fmuld %f26,%f24,%f26 410 411 fmuld %f20,%f24,%f28 412 fxor %f38,%f10,%f38 413 414 fmuld %f4,p3,%f6 415 fnegd pio2_lo,%f18 416 417 fmuld %f36,p2,%f2 418 fmovdg %fcc1,signbit,%f18 419 420 fmuld %f36,%f4,%f36 421 fxor %f38,pio2,%f10 422 423 fmuld %f34,%f24,%f34 424 fsubd %f20,%f26,%f24 425 426 faddd %f22,%f28,%f28 427 428 faddd %f2,p1,%f2 429 430 fmuld %f36,p4,%f30 431 fxor %f38,pio2_lo,%f32 432 433 fsubd %f24,%f34,%f24 434 435 fxor %f44,%f38,%f44 436 437 fmuld %f36,%f2,%f2 438 faddd %f18,%f32,%f18 439 ldd [%l1+0x8],%f32 440 441 fmuld %f36,%f36,%f36 442 faddd %f6,%f30,%f30 443 444 fdivd %f24,%f28,%f24 445 fzero %f20 446 447 fmovdg %fcc2,signbit,%f20 448 449 faddd %f2,%f8,%f2 450 451 fmuld %f14,%f14,%f38 452 faddd %f18,%f32,%f18 453 454 fmuld %f36,%f30,%f36 455 fxor %f40,%f20,%f40 456 457 fnegd pio2,%f6 ! ah 458 fmuld %f14,p3,%f16 459 460 fmovdg %fcc0,signbit,%f6 461 462 fmuld %f38,p2,%f12 463 fnegd pio2_lo,%f28 464 465 faddd %f2,%f36,%f2 466 fmuld %f38,%f14,%f38 467 468 faddd %f6,%f0,%f6 469 ldd [%l0],%f0 470 471 fmovdg %fcc2,signbit,%f28 472 473 faddd %f12,p1,%f12 474 475 fmuld %f38,p4,%f32 476 fxor %f40,pio2_lo,%f34 477 478 fxor %f40,pio2,%f20 479 480 faddd %f2,%f4,%f2 481 482 fmuld %f38,%f12,%f12 483 fxor %f46,%f40,%f46 484 485 fmuld %f38,%f38,%f38 486 faddd %f16,%f32,%f32 487 488 faddd %f28,%f34,%f28 489 ldd [%l2+0x8],%f34 490 491 faddd %f6,%f0,%f6 492 lda [%i1]%asi,%f0 ! preload next argument 493 494 faddd %f12,%f18,%f12 495 lda [%i1+4]%asi,%f1 496 497 fmuld %f24,%f24,%f40 498 lda [%i3]%asi,%f8 499 500 fmuld %f38,%f32,%f38 501 faddd %f28,%f34,%f28 502 lda [%i3+4]%asi,%f9 503 504 fnegd pio2,%f16 505 fmuld %f24,p3,%f26 506 lda [%i1]%asi,%o0 507 508 fmovdg %fcc1,signbit,%f16 509 lda [%i3]%asi,%l3 510 511 fmuld %f40,p2,%f22 512 513 faddd %f12,%f38,%f12 514 fmuld %f40,%f24,%f40 515 516 faddd %f2,%f6,%f6 517 518 faddd %f16,%f10,%f16 519 ldd [%l1],%f10 520 521 faddd %f22,p1,%f22 522 523 faddd %f12,%f14,%f12 524 fmuld %f40,p4,%f34 525 526 fxor %f6,%f42,%f6 527 st %f6,[%l4] 528 529 faddd %f16,%f10,%f16 530 st %f7,[%l4+4] 531 532 fmuld %f40,%f22,%f22 533 534 fmuld %f40,%f40,%f40 535 faddd %f26,%f34,%f34 536 537 fnegd pio2,%f26 538 539 faddd %f12,%f16,%f16 540 541 faddd %f22,%f28,%f22 542 543 fmuld %f40,%f34,%f40 544 fmovdg %fcc2,signbit,%f26 545 546! - 547 548 fxor %f16,%f44,%f16 549 st %f16,[%l5] 550 551 faddd %f26,%f20,%f26 552 st %f17,[%l5+4] 553 addcc %i0,-1,%i0 554 555 faddd %f22,%f40,%f22 556 bg,pt %icc,.loop 557! delay slot 558 ldd [%l2],%f20 559 560 561 faddd %f26,%f20,%f26 562 faddd %f22,%f24,%f22 563 faddd %f22,%f26,%f26 564.done_from_special0: 565 fxor %f26,%f46,%f26 566 st %f26,[%l6] 567 st %f27,[%l6+4] 568 ret 569 restore 570 571 572 573 .align 16 574.last1: 575 fmovd pio2,%f10 ! set up dummy arguments 576 fmovd pio2,%f18 577 fabsd %f10,%f14 578 fabsd %f18,%f12 579 sethi %hi(0x3ff921fb),%o0 580 or %o0,%lo(0x3ff921fb),%o0 581 mov %o0,%l3 582 ba,pt %icc,.cont1 583! delay slot 584 add %fp,junk,%i5 585 586 587 588 .align 16 589.last2: 590 fmovd pio2,%f20 591 fmovd pio2,%f28 592 fabsd %f20,%f24 593 fabsd %f28,%f22 594 sethi %hi(0x3ff921fb),%o0 595 or %o0,%lo(0x3ff921fb),%o0 596 mov %o0,%l3 597 ba,pt %icc,.cont2 598! delay slot 599 add %fp,junk,%l6 600 601 602 603 .align 16 604.nan0: 605 faddd %f22,%f26,%f26 606.nan0_from_special0: 607 fabsd %f10,%f14 608 lda [%i3+4]%asi,%f19 609 fabsd %f18,%f12 610 lda [%i1]%asi,%o0 611 lda [%i3]%asi,%l3 612 ba,pt %icc,.special0 613! delay slot 614 fmuld %f0,%f2,%f6 615 616 617 .align 16 618.big0: 619 fabsd %f18,%f12 620 lda [%i1]%asi,%o0 621 lda [%i3]%asi,%l3 622 cmp %g5,%o5 623 bge,pn %icc,.return_ah0 ! if hx >= 0x7ff00000 624! delay slot 625 nop 626 cmp %l0,%o4 627 bge,pn %icc,1f ! if hx - hy >= 0x03600000 628! delay slot 629 nop 630 ldd [%fp+twom3],%f6 631 fmuld %f0,%f6,%f0 632 fmuld %f2,%f6,%f2 633 add %l0,%o1,%l0 634 addcc %i0,-1,%i0 635 ble,pn %icc,.last1 636! delay slot 637 nop 638 ba,pt %icc,.cont1 639! delay slot 640 nop 6411: 642 fbg,pn %fcc0,.return_ah0 643! delay slot 644 nop 645 fcmpd %fcc3,%f8,signbit 646 fbl,pn %fcc3,.return_ah0 647! delay slot 648 nop 649 ba,pt %icc,.special0 650! delay slot 651 fdivd %f0,%f2,%f6 652 653 654 .align 16 655.small0: 656 lda [%i3]%asi,%l3 657 fcmpd %fcc3,%f0,signbit 658 fbe,pt %fcc3,.return_ah0 659! delay slot 660 nop 661 ldd [%fp+two110],%f6 662 fmuld %f0,%f6,%f0 663 fmuld %f2,%f6,%f2 664 st %f0,[%fp+yscl] 665 ld [%fp+yscl],%o7 666 st %f2,[%fp+xscl] 667 ld [%fp+xscl],%l0 668 sub %l0,%o7,%l0 669 add %l0,%o1,%l0 670 addcc %i0,-1,%i0 671 ble,pn %icc,.last1 672! delay slot 673 nop 674 ba,pt %icc,.cont1 675! delay slot 676 nop 677 678 679 .align 16 680.return_ah0: 681 fzero %f0 682 fmovdg %fcc0,signbit,%f0 683 fxor %f36,%f0,%f36 684 fxor %f36,pio2,%f0 685 fxor %f42,%f36,%f42 686 fnegd pio2,%f6 687 fmovdg %fcc0,signbit,%f6 688 faddd %f6,%f0,%f6 689 sub %g5,%l0,%o7 690 cmp %o7,%o5 691 bl,pt %icc,1f ! if hy < 0x7ff00000 692! delay slot 693 nop 694 ldd [%fp+pio4],%f0 695 faddd %f6,%f0,%f6 6961: 697 fdtoi %f6,%f4 698.special0: 699 fxor %f6,%f42,%f6 700 st %f6,[%l4] 701 st %f7,[%l4+4] 702 addcc %i0,-1,%i0 703 ble,pn %icc,.done_from_special0 704! delay slot 705 nop 706 fmovd %f10,%f0 707 fmovd %f18,%f8 708 fmovd %f14,%f4 709 fmovd %f12,%f2 710 mov %i5,%l4 711 add %i1,%i2,%i1 712 add %i3,%i4,%i3 713 add %i5,%l7,%i5 714 fand %f0,signbit,%f42 715 sethi %hi(0x80000000),%g5 716 fand %f8,signbit,%f36 717 andn %o0,%g5,%o0 718 andn %l3,%g5,%l3 719 fcmpd %fcc0,%f4,%f2 720 fmovd %f4,%f0 721 fmovdg %fcc0,%f2,%f0 722 fmovdg %fcc0,%f4,%f2 723 mov %o0,%o7 724 movg %fcc0,%l3,%o0 725 movg %fcc0,%o7,%l3 726 lda [%i1]%asi,%f10 727 lda [%i1+4]%asi,%f11 728 fbu,pn %fcc0,.nan0_from_special0 729! delay slot 730 lda [%i3]%asi,%f18 731 fabsd %f10,%f14 732 lda [%i3+4]%asi,%f19 733 sub %l3,%o0,%l0 734 sub %l3,%o3,%g5 735 sub %l0,%o4,%o7 736 andcc %g5,%o7,%g0 737 bge,pn %icc,.big0 738! delay slot 739 nop 740 fabsd %f18,%f12 741 cmp %o0,%o5 742 bl,pn %icc,.small0 743! delay slot 744 lda [%i1]%asi,%o0 745 add %l0,%o1,%l0 746 addcc %i0,-1,%i0 747 ble,pn %icc,.last1 748! delay slot 749 lda [%i3]%asi,%l3 750 ba,pt %icc,.cont1 751! delay slot 752 nop 753 754 755 756 .align 16 757.nan1: 758 fmuld %f30,%f4,%f30 759 fsubd %f0,%f6,%f4 760 faddd %f2,%f8,%f8 761 fsubd %f4,%f30,%f4 762.nan1_from_special1: 763 lda [%i3]%asi,%f28 764 lda [%i3+4]%asi,%f29 765 fabsd %f20,%f24 766 lda [%i1]%asi,%o0 767 fabsd %f28,%f22 768 lda [%i3]%asi,%l3 769 mov %i5,%l6 770 ba,pt %icc,.special1 771! delay slot 772 fmuld %f10,%f12,%f16 773 774 775 .align 16 776.big1: 777 faddd %f2,%f8,%f8 778 fsubd %f4,%f30,%f4 779.big1_from_special1: 780 lda [%i1]%asi,%o0 781 fabsd %f28,%f22 782 lda [%i3]%asi,%l3 783 mov %i5,%l6 784 cmp %g5,%o5 785 bge,pn %icc,.return_ah1 786! delay slot 787 nop 788 cmp %l1,%o4 789 bge,pn %icc,1f 790! delay slot 791 nop 792 ldd [%fp+twom3],%f16 793 fmuld %f10,%f16,%f10 794 fmuld %f12,%f16,%f12 795 add %l1,%o1,%l1 796 srl %l1,10,%l1 797 addcc %i0,-1,%i0 798 ble,pn %icc,.last2 799! delay slot 800 nop 801 ba,pt %icc,.cont2 802! delay slot 803 nop 8041: 805 fbg,pn %fcc1,.return_ah1 806! delay slot 807 nop 808 fcmpd %fcc3,%f18,signbit 809 fbl,pn %fcc3,.return_ah1 810! delay slot 811 nop 812 ba,pt %icc,.special1 813! delay slot 814 fdivd %f10,%f12,%f16 815 816 817 .align 16 818.small1: 819 fsubd %f4,%f30,%f4 820.small1_from_special1: 821 fabsd %f28,%f22 822 lda [%i3]%asi,%l3 823 mov %i5,%l6 824 fcmpd %fcc3,%f10,signbit 825 fbe,pt %fcc3,.return_ah1 826! delay slot 827 nop 828 ldd [%fp+two110],%f16 829 fmuld %f10,%f16,%f10 830 fmuld %f12,%f16,%f12 831 st %f10,[%fp+yscl] 832 ld [%fp+yscl],%o7 833 st %f12,[%fp+xscl] 834 ld [%fp+xscl],%l1 835 sub %l1,%o7,%l1 836 add %l1,%o1,%l1 837 srl %l1,10,%l1 838 addcc %i0,-1,%i0 839 ble,pn %icc,.last2 840! delay slot 841 nop 842 ba,pt %icc,.cont2 843! delay slot 844 nop 845 846 847 .align 16 848.return_ah1: 849 fzero %f10 850 fmovdg %fcc1,signbit,%f10 851 fxor %f38,%f10,%f38 852 fxor %f38,pio2,%f10 853 fxor %f44,%f38,%f44 854 fnegd pio2,%f16 855 fmovdg %fcc1,signbit,%f16 856 faddd %f16,%f10,%f16 857 sub %g5,%l1,%o7 858 cmp %o7,%o5 859 bl,pt %icc,1f 860! delay slot 861 nop 862 ldd [%fp+pio4],%f10 863 faddd %f16,%f10,%f16 8641: 865 fdtoi %f16,%f14 866.special1: 867 fxor %f16,%f44,%f16 868 st %f16,[%l5] 869 st %f17,[%l5+4] 870 addcc %i0,-1,%i0 871 bg,pn %icc,1f 872! delay slot 873 nop 874 fmovd pio2,%f20 ! set up dummy argument 875 fmovd pio2,%f28 876 fabsd %f20,%f24 877 fabsd %f28,%f22 878 sethi %hi(0x3ff921fb),%o0 879 or %o0,%lo(0x3ff921fb),%o0 880 mov %o0,%l3 881 add %fp,junk,%i5 8821: 883 fmovd %f20,%f10 884 fmovd %f28,%f18 885 fmovd %f24,%f14 886 fmovd %f22,%f12 887 mov %i5,%l5 888 add %i1,%i2,%i1 889 add %i3,%i4,%i3 890 add %i5,%l7,%i5 891 fand %f10,signbit,%f44 892 sethi %hi(0x80000000),%g5 893 fand %f18,signbit,%f38 894 andn %o0,%g5,%o0 895 andn %l3,%g5,%l3 896 fcmpd %fcc1,%f14,%f12 897 fmovd %f14,%f10 898 fmovdg %fcc1,%f12,%f10 899 fmovdg %fcc1,%f14,%f12 900 mov %o0,%o7 901 movg %fcc1,%l3,%o0 902 movg %fcc1,%o7,%l3 903 lda [%i1]%asi,%f20 904 lda [%i1+4]%asi,%f21 905 fbu,pn %fcc1,.nan1_from_special1 906! delay slot 907 nop 908 lda [%i3]%asi,%f28 909 lda [%i3+4]%asi,%f29 910 fabsd %f20,%f24 911 sub %l3,%o0,%l1 912 sub %l3,%o3,%g5 913 sub %l1,%o4,%o7 914 andcc %g5,%o7,%g0 915 bge,pn %icc,.big1_from_special1 916! delay slot 917 nop 918 cmp %o0,%o5 919 bl,pn %icc,.small1_from_special1 920! delay slot 921 lda [%i1]%asi,%o0 922 fabsd %f28,%f22 923 lda [%i3]%asi,%l3 924 add %l1,%o1,%l1 925 srl %l1,10,%l1 926 addcc %i0,-1,%i0 927 ble,pn %icc,.last2 928! delay slot 929 mov %i5,%l6 930 ba,pt %icc,.cont2 931! delay slot 932 nop 933 934 935 936 .align 16 937.nan2: 938 fmovdg %fcc0,signbit,%f0 939 fmuld %f32,%f14,%f32 940 fsubd %f10,%f16,%f14 941 faddd %f12,%f18,%f18 942 fxor %f36,%f0,%f36 943.nan2_from_special2: 944 ba,pt %icc,.special2 945! delay slot 946 fmuld %f20,%f22,%f26 947 948 949 .align 16 950.big2: 951 fxor %f36,%f0,%f36 952.big2_from_special2: 953 cmp %g5,%o5 954 bge,pn %icc,.return_ah2 955! delay slot 956 nop 957 cmp %l2,%o4 958 bge,pn %icc,1f 959! delay slot 960 nop 961 ldd [%fp+twom3],%f26 962 fmuld %f20,%f26,%f20 963 fmuld %f22,%f26,%f22 964 ba,pt %icc,.cont3 965! delay slot 966 nop 9671: 968 fbg,pn %fcc2,.return_ah2 969! delay slot 970 nop 971 fcmpd %fcc3,%f28,signbit 972 fbl,pn %fcc3,.return_ah2 973! delay slot 974 nop 975 ba,pt %icc,.special2 976! delay slot 977 fdivd %f20,%f22,%f26 978 979 980 .align 16 981.small2: 982 fcmpd %fcc3,%f20,signbit 983 fbe,pt %fcc3,.return_ah2 984! delay slot 985 nop 986 ldd [%fp+two110],%f26 987 fmuld %f20,%f26,%f20 988 fmuld %f22,%f26,%f22 989 st %f20,[%fp+yscl] 990 ld [%fp+yscl],%o7 991 st %f22,[%fp+xscl] 992 ld [%fp+xscl],%l2 993 sub %l2,%o7,%l2 994 ba,pt %icc,.cont3 995! delay slot 996 nop 997 998 999 .align 16 1000.return_ah2: 1001 fzero %f20 1002 fmovdg %fcc2,signbit,%f20 1003 fxor %f40,%f20,%f40 1004 fxor %f40,pio2,%f20 1005 fxor %f46,%f40,%f46 1006 fnegd pio2,%f26 1007 fmovdg %fcc2,signbit,%f26 1008 faddd %f26,%f20,%f26 1009 sub %g5,%l2,%o7 1010 cmp %o7,%o5 1011 bl,pt %icc,1f 1012! delay slot 1013 nop 1014 ldd [%fp+pio4],%f20 1015 faddd %f26,%f20,%f26 10161: 1017 fdtoi %f26,%f24 1018.special2: 1019 fxor %f26,%f46,%f26 1020 st %f26,[%l6] 1021 st %f27,[%l6+4] 1022 addcc %i0,-1,%i0 1023 bg,pn %icc,1f 1024! delay slot 1025 nop 1026 fmovd pio2,%f20 ! set up dummy argument 1027 fmovd pio2,%f22 1028 fzero %f40 1029 fzero %f46 1030 mov 0,%l2 1031 ba,pt %icc,.cont3 1032! delay slot 1033 add %fp,junk,%l6 10341: 1035 lda [%i1]%asi,%f20 1036 lda [%i1+4]%asi,%f21 1037 lda [%i3]%asi,%f28 1038 lda [%i3+4]%asi,%f29 1039 fabsd %f20,%f24 1040 lda [%i1]%asi,%o0 1041 fabsd %f28,%f22 1042 lda [%i3]%asi,%l3 1043 mov %i5,%l6 1044 fand %f20,signbit,%f46 1045 add %i1,%i2,%i1 1046 fand %f28,signbit,%f40 1047 fcmpd %fcc2,%f24,%f22 1048 add %i3,%i4,%i3 1049 add %i5,%l7,%i5 1050 fmovd %f24,%f20 1051 sethi %hi(0x80000000),%g5 1052 andn %o0,%g5,%o0 1053 andn %l3,%g5,%l3 1054 fmovdg %fcc2,%f22,%f20 1055 fmovdg %fcc2,%f24,%f22 1056 mov %o0,%o7 1057 movg %fcc2,%l3,%o0 1058 movg %fcc2,%o7,%l3 1059 fbu,pn %fcc2,.nan2_from_special2 1060! delay slot 1061 nop 1062 sub %l3,%o0,%l2 1063 sub %l3,%o3,%g5 1064 sub %l2,%o4,%o7 1065 andcc %g5,%o7,%g0 1066 bge,pn %icc,.big2_from_special2 1067! delay slot 1068 nop 1069 cmp %o0,%o5 1070 bl,pn %icc,.small2 1071! delay slot 1072 nop 1073 ba,pt %icc,.cont3 1074! delay slot 1075 nop 1076 1077 SET_SIZE(__vatan2) 1078 1079